val rdd1 = sc.textFile("/spark_data/spark1-test.txt")
val rdd2 = rdd1.flatMap(x=> x.split(" "))
val rdd3 = rdd2.map(word => (word,1))
val rdd4 = rdd3.reduceByKey((x,y)=> {x+y})
rdd4.collect
//val rdd4 = rdd3.reduceBykey(_+_)
scala> val rdd1 = sc.textFile("/spark_data/spark1-test.txt")
rdd1: org.apache.spark.rdd.RDD[String] = /spark_data/spark1-test.txt MapPartitionsRDD[6] at textFile at:27
scala> val rdd2 = rdd1.flatMap(x=> x.split(" "))
rdd2: org.apache.spark.rdd.RDD[String] = MapPartitionsRDD[7] at flatMap at:29
scala> val rdd3 = rdd2.map(word => (word,1))
rdd3: org.apache.spark.rdd.RDD[(String, Int)] = MapPartitionsRDD[8] at map at:31
scala> val rdd4 = rdd3.reduceByKey((x,y)=> {x+y})
rdd4: org.apache.spark.rdd.RDD[(String, Int)] = ShuffledRDD[9] at reduceByKey at:33
scala> rdd4.collect
res2: Array[(String, Int)] = Array((hive,1), (am,1), (training,1), (attending,1), (hello,1), (pig,1), (spark,1), (I,1), (Basan,1))
scala>
val rdd2 = rdd1.flatMap(x=> x.split(" "))
val rdd3 = rdd2.map(word => (word,1))
val rdd4 = rdd3.reduceByKey((x,y)=> {x+y})
rdd4.collect
//val rdd4 = rdd3.reduceBykey(_+_)
scala> val rdd1 = sc.textFile("/spark_data/spark1-test.txt")
rdd1: org.apache.spark.rdd.RDD[String] = /spark_data/spark1-test.txt MapPartitionsRDD[6] at textFile at
scala> val rdd2 = rdd1.flatMap(x=> x.split(" "))
rdd2: org.apache.spark.rdd.RDD[String] = MapPartitionsRDD[7] at flatMap at
scala> val rdd3 = rdd2.map(word => (word,1))
rdd3: org.apache.spark.rdd.RDD[(String, Int)] = MapPartitionsRDD[8] at map at
scala> val rdd4 = rdd3.reduceByKey((x,y)=> {x+y})
rdd4: org.apache.spark.rdd.RDD[(String, Int)] = ShuffledRDD[9] at reduceByKey at
scala> rdd4.collect
res2: Array[(String, Int)] = Array((hive,1), (am,1), (training,1), (attending,1), (hello,1), (pig,1), (spark,1), (I,1), (Basan,1))
scala>
No comments:
Post a Comment