import spark.implicits._
val columns = Seq("language","users_count")
val data = Seq(("Java", "20000"), ("Python", "100000"), ("Scala", "3000"))
val rdd = spark.sparkContext.parallelize(data)
val dfFromRDD1 = rdd.toDF()
dfFromRDD1.printSchema()
val dfFromRDD1 = rdd.toDF("language","users_count")
dfFromRDD1.printSchema()
val dfFromRDD2 = spark.createDataFrame(rdd).toDF(columns:_*)
import org.apache.spark.sql.types.StringType
import org.apache.spark.sql.types.StructType
import org.apache.spark.sql.types.StructField
import org.apache.spark.sql.Row
val schema = StructType(columns
.map(fieldName => StructField(fieldName, StringType)))
val rowRDD = rdd.map(attributes => Row(attributes._1, attributes._2))
val dfFromRDD3 = spark.createDataFrame(rowRDD,schema)
import spark.implicits._
val dfFromData1 = data.toDF()
import scala.collection.JavaConversions._
val rowData = data.map(attributes => Row(attributes._1, attributes._2))
var dfFromData4 = spark.createDataFrame(rowData,schema)
val columns = Seq("language","users_count")
val data = Seq(("Java", "20000"), ("Python", "100000"), ("Scala", "3000"))
val rdd = spark.sparkContext.parallelize(data)
val dfFromRDD1 = rdd.toDF()
dfFromRDD1.printSchema()
val dfFromRDD1 = rdd.toDF("language","users_count")
dfFromRDD1.printSchema()
val dfFromRDD2 = spark.createDataFrame(rdd).toDF(columns:_*)
import org.apache.spark.sql.types.StringType
import org.apache.spark.sql.types.StructType
import org.apache.spark.sql.types.StructField
import org.apache.spark.sql.Row
val schema = StructType(columns
.map(fieldName => StructField(fieldName, StringType)))
val rowRDD = rdd.map(attributes => Row(attributes._1, attributes._2))
val dfFromRDD3 = spark.createDataFrame(rowRDD,schema)
import spark.implicits._
val dfFromData1 = data.toDF()
import scala.collection.JavaConversions._
val rowData = data.map(attributes => Row(attributes._1, attributes._2))
var dfFromData4 = spark.createDataFrame(rowData,schema)
No comments:
Post a Comment