scala> import spark.implicits._
import spark.implicits._
scala> import org.apache.spark.sql.types._
import org.apache.spark.sql.types._
scala> import org.apache.spark.sql._
import org.apache.spark.sql._
scala> val arrayStructData = Seq(
| Row("James",List(Row("Java","XX",120),Row("Scala","XA",300))),
| Row("Michael",List(Row("Java","XY",200),Row("Scala","XB",500))),
| Row("Robert",List(Row("Java","XZ",400),Row("Scala","XC",250))),
| Row("Washington",null)
| )
arrayStructData: Seq[org.apache.spark.sql.Row] = List([James,List([Java,XX,120], [Scala,XA,300])], [Michael,List([Java,XY,200], [Scala,XB,500])], [Robert,List([Java,XZ,400], [Scala,XC,250])], [Washington,null])
val arrayStructSchema = new StructType().add("name",StringType)
.add("booksIntersted",ArrayType(new StructType()
.add("name",StringType)
.add("author",StringType)
.add("pages",IntegerType)))
arrayStructSchema: org.apache.spark.sql.types.StructType = StructType(StructField(name,StringType,true), StructField(booksIntersted,ArrayType(StructType(StructField(name,StringType,true), StructField(author,StringType,true), StructField(pages,IntegerType,true)),true),true))
scala> val df = spark.createDataFrame(spark.sparkContext
| .parallelize(arrayStructData),arrayStructSchema)
df: org.apache.spark.sql.DataFrame = [name: string, booksIntersted: array<struct<name:string,author:string,pages:int>>]
scala> df.printSchema()
root
|-- name: string (nullable = true)
|-- booksIntersted: array (nullable = true)
| |-- element: struct (containsNull = true)
| | |-- name: string (nullable = true)
| | |-- author: string (nullable = true)
| | |-- pages: integer (nullable = true)
scala> df.show(false)
+----------+-----------------------------------+
|name |booksIntersted |
+----------+-----------------------------------+
|James |[[Java, XX, 120], [Scala, XA, 300]]|
|Michael |[[Java, XY, 200], [Scala, XB, 500]]|
|Robert |[[Java, XZ, 400], [Scala, XC, 250]]|
|Washington|null |
+----------+-----------------------------------+