import spark.implicits._
val sourceDF = Seq(
("2019-12-09", "11", 1, "L1", "I11", "2018"),
("2019-12-09", "11", 2, "L2", "I10", "2018"),
("2019-12-09", "11", 3, "L3", "I4", "2018"),
("2019-12-09", "11", 4, "L4", "I4", "2015"),
("2019-12-09", "11", 5, "L5", "I4", "2019"),
("2019-12-09", "11", 6, "L6", null, "2014"),
("2019-12-09", null, 7, "L7", null, "2013")
).toDF("date", "hour", "order", "line", "item", "time")
sourceDF.show(false)
val filledDF = sourceDF.na.fill("-99999",Seq("hour","item"))
filledDF.show(false)
scala> sourceDF.show(false)
+----------+----+-----+----+----+----+
|date |hour|order|line|item|time|
+----------+----+-----+----+----+----+
|2019-12-09|11 |1 |L1 |I11 |2018|
|2019-12-09|11 |2 |L2 |I10 |2018|
|2019-12-09|11 |3 |L3 |I4 |2018|
|2019-12-09|11 |4 |L4 |I4 |2015|
|2019-12-09|11 |5 |L5 |I4 |2019|
|2019-12-09|11 |6 |L6 |null|2014|
|2019-12-09|null|7 |L7 |null|2013|
+----------+----+-----+----+----+----+
scala> val filledDF = sourceDF.na.fill("-99999",Seq("hour","item"))
filledDF: org.apache.spark.sql.DataFrame = [date: string, hour: string ... 4 more fields]
scala> filledDF.show(false)
+----------+------+-----+----+------+----+
|date |hour |order|line|item |time|
+----------+------+-----+----+------+----+
|2019-12-09|11 |1 |L1 |I11 |2018|
|2019-12-09|11 |2 |L2 |I10 |2018|
|2019-12-09|11 |3 |L3 |I4 |2018|
|2019-12-09|11 |4 |L4 |I4 |2015|
|2019-12-09|11 |5 |L5 |I4 |2019|
|2019-12-09|11 |6 |L6 |-99999|2014|
|2019-12-09|-99999|7 |L7 |-99999|2013|
+----------+------+-----+----+------+----+
scala>
No comments:
Post a Comment