Powered By Blogger

Monday, February 24, 2020

Adding column in dataframe with user defined value if the column is null

Add another column by checking the previous column value


scala> val df1 = Seq(
     |            (
     |              "track1",
     |              "2019-11-18T16:11:28",
     |              "linekey1",
     |              "item1",
     |              "123"
     |            ),
     |            (
     |              "track2",
     |              "2019-11-18T16:11:28",
     |              "linekey2",
     |              "item2",
     |              null
     |            )).toDF(
     |            "number",
     |            "source_modified_timestamp",
     |            "line_key",
     |            "item_no",
     |            "tracking_number"
     |          )
df1: org.apache.spark.sql.DataFrame = [number: string, source_modified_timestamp: string ... 3 more fields]

scala> df1.show
+------+-------------------------+--------+-------+---------------+
|number|source_modified_timestamp|line_key|item_no|tracking_number|
+------+-------------------------+--------+-------+---------------+
|track1|      2019-11-18T16:11:28|linekey1|  item1|            123|
|track2|      2019-11-18T16:11:28|linekey2|  item2|           null|
+------+-------------------------+--------+-------+---------------+


scala>     df1.withColumn("tracking_number_rm", when($"tracking_number".isNull, lit("-999999")).otherwise(lit($"tracking_number"))).show
+------+-------------------------+--------+-------+---------------+------------------+
|number|source_modified_timestamp|line_key|item_no|tracking_number|tracking_number_rm|
+------+-------------------------+--------+-------+---------------+------------------+
|track1|      2019-11-18T16:11:28|linekey1|  item1|            123|               123|
|track2|      2019-11-18T16:11:28|linekey2|  item2|           null|           -999999|
+------+-------------------------+--------+-------+---------------+------------------+


scala>         df1.withColumn("tracking_number_rm", when($"tracking_number".isNull, "-1111").otherwise($"tracking_number")).show
+------+-------------------------+--------+-------+---------------+------------------+
|number|source_modified_timestamp|line_key|item_no|tracking_number|tracking_number_rm|
+------+-------------------------+--------+-------+---------------+------------------+
|track1|      2019-11-18T16:11:28|linekey1|  item1|            123|               123|
|track2|      2019-11-18T16:11:28|linekey2|  item2|           null|             -1111|
+------+-------------------------+--------+-------+---------------+------------------+

No comments:

Post a Comment