Powered By Blogger

Monday, October 7, 2019

Add fixed value column in dataframe

scala> val dfLocal = Seq(
     |              ("DEPARTMENT_ID1","DEPARTMENT_NAME1","GROUP_NAME1"),
     |
     |               ("1","PRESCHOOL",      "asdasf"),
     |               ("2","COSMETICS",              "COSMETICS"),
     |               ("3","FURNITURE",           "HOME")
     |             ).toDF("DEPARTMENT_ID","DEPARTMENT_NAME","GROUP_NAME")
dfLocal: org.apache.spark.sql.DataFrame = [DEPARTMENT_ID: string, DEPARTMENT_NAME: string ... 1 more field]

scala> dfLocal.printSchema()
root
 |-- DEPARTMENT_ID: string (nullable = true)
 |-- DEPARTMENT_NAME: string (nullable = true)
 |-- GROUP_NAME: string (nullable = true)


scala> import org.apache.spark.sql.functions.typedLit
import org.apache.spark.sql.functions.typedLit

scala> val successDataSet2 = dfLocal.withColumn("newcolumnadded", lit("204"))
successDataSet2: org.apache.spark.sql.DataFrame = [DEPARTMENT_ID: string, DEPARTMENT_NAME: string ... 2 more fields]

scala> successDataSet2.show(2)
+--------------+----------------+-----------+--------------+
| DEPARTMENT_ID| DEPARTMENT_NAME| GROUP_NAME|newcolumnadded|
+--------------+----------------+-----------+--------------+
|DEPARTMENT_ID1|DEPARTMENT_NAME1|GROUP_NAME1|           204|
|             1|       PRESCHOOL|     asdasf|           204|
+--------------+----------------+-----------+--------------+
only showing top 2 rows


scala> successDataSet2.printSchema()
root
 |-- DEPARTMENT_ID: string (nullable = true)
 |-- DEPARTMENT_NAME: string (nullable = true)
 |-- GROUP_NAME: string (nullable = true)
 |-- newcolumnadded: string (nullable = false)


scala>

No comments:

Post a Comment