Powered By Blogger

Saturday, October 5, 2019

Message to RDD to Dataframe example

import spark.implicits._
val columns = Seq("language","users_count")
val data = Seq(("Java", "20000"), ("Python", "100000"), ("Scala", "3000"))
val rdd = spark.sparkContext.parallelize(data)
val dfFromRDD1 = rdd.toDF()
dfFromRDD1.printSchema()
val dfFromRDD1 = rdd.toDF("language","users_count")
dfFromRDD1.printSchema()

val dfFromRDD2 = spark.createDataFrame(rdd).toDF(columns:_*)

import org.apache.spark.sql.types.StringType
import org.apache.spark.sql.types.StructType
import org.apache.spark.sql.types.StructField
import org.apache.spark.sql.Row
val schema = StructType(columns
  .map(fieldName => StructField(fieldName, StringType)))
val rowRDD = rdd.map(attributes => Row(attributes._1, attributes._2))
val dfFromRDD3 = spark.createDataFrame(rowRDD,schema)

import spark.implicits._
val dfFromData1 = data.toDF()


import scala.collection.JavaConversions._

val rowData = data.map(attributes => Row(attributes._1, attributes._2))
var dfFromData4 = spark.createDataFrame(rowData,schema)

No comments:

Post a Comment