Powered By Blogger

Thursday, December 8, 2022

UnionByName with the empty dataframe

 import spark.implicits._

import org.apache.spark.sql.types.
import org.apache.spark.sql._

val data = Seq(("James","Sales",34), ("Michael","Sales",56),
("Robert","Sales",30), ("Maria","Finance",24) )
val df1 = data.toDF("name","dept","age")
df1.printSchema()



val schema = StructType(
StructField("name", StringType, true) ::
StructField("dept", StringType, false) ::
StructField("age", IntegerType, false) :: Nil)


val df = spark.createDataFrame(spark.sparkContext.emptyRDD[Row], schema)


val merged_df = df1.unionByName(df)
merged_df.show(false)

val merged_df2 = df.unionByName(df1)
merged_df2.show(false)

No comments:

Post a Comment