本文共 2058 字,大约阅读时间需要 6 分钟。
scala> val lines = sc.textFile("/root/helloSpark.txt")lines: org.apache.spark.rdd.RDD[String] = /root/helloSpark.txt MapPartitionsRDD[1] at textFile at:24
scala> lines.count()res0: Long = 3
scala> val rdd=sc.parallelize(Array(1,2,2,4),4)rdd: org.apache.spark.rdd.RDD[Int] = ParallelCollectionRDD[2] at parallelize at:24scala> rdd.count()res2: Long = 4scala> rdd.foreach(println)1224scala> rdd.foreach(println)1224
scala> val lines = sc.textFile("/root/helloSpark.txt")
scala> lines=sc.textFile("/root/test.txt"):26: error: reassignment to val lines=sc.textFile("/root/test.txt") ^scala> var lines2=sc.textFile("/root/test.log")lines2: org.apache.spark.rdd.RDD[String] = /root/test.log MapPartitionsRDD[8] at textFile at :24scala> lines2=sc.textFile("/root/test.txt")lines2: org.apache.spark.rdd.RDD[String] = /root/test.txt MapPartitionsRDD[10] at textFile at :26
scala> val lines2=lines.filter(line=>line.contains("hello"))lines2: org.apache.spark.rdd.RDD[String] = MapPartitionsRDD[13] at filter at:26scala> lines2.collect()res7: Array[String] = Array(go to home hello java, so many to hello word kafka java)