scala> val dfLocal = Seq(
| ("DEPARTMENT_ID1","DEPARTMENT_NAME1","GROUP_NAME1"),
|
| ("1","PRESCHOOL", "asdasf"),
| ("2","COSMETICS", "COSMETICS"),
| ("3","FURNITURE", "HOME")
| ).toDF("DEPARTMENT_ID","DEPARTMENT_NAME","GROUP_NAME")
dfLocal: org.apache.spark.sql.DataFrame = [DEPARTMENT_ID: string, DEPARTMENT_NAME: string ... 1 more field]
scala> dfLocal.printSchema()
root
|-- DEPARTMENT_ID: string (nullable = true)
|-- DEPARTMENT_NAME: string (nullable = true)
|-- GROUP_NAME: string (nullable = true)
scala> import org.apache.spark.sql.functions.typedLit
import org.apache.spark.sql.functions.typedLit
scala> val successDataSet2 = dfLocal.withColumn("newcolumnadded", lit("204"))
successDataSet2: org.apache.spark.sql.DataFrame = [DEPARTMENT_ID: string, DEPARTMENT_NAME: string ... 2 more fields]
scala> successDataSet2.show(2)
+--------------+----------------+-----------+--------------+
| DEPARTMENT_ID| DEPARTMENT_NAME| GROUP_NAME|newcolumnadded|
+--------------+----------------+-----------+--------------+
|DEPARTMENT_ID1|DEPARTMENT_NAME1|GROUP_NAME1| 204|
| 1| PRESCHOOL| asdasf| 204|
+--------------+----------------+-----------+--------------+
only showing top 2 rows
scala> successDataSet2.printSchema()
root
|-- DEPARTMENT_ID: string (nullable = true)
|-- DEPARTMENT_NAME: string (nullable = true)
|-- GROUP_NAME: string (nullable = true)
|-- newcolumnadded: string (nullable = false)
scala>
| ("DEPARTMENT_ID1","DEPARTMENT_NAME1","GROUP_NAME1"),
|
| ("1","PRESCHOOL", "asdasf"),
| ("2","COSMETICS", "COSMETICS"),
| ("3","FURNITURE", "HOME")
| ).toDF("DEPARTMENT_ID","DEPARTMENT_NAME","GROUP_NAME")
dfLocal: org.apache.spark.sql.DataFrame = [DEPARTMENT_ID: string, DEPARTMENT_NAME: string ... 1 more field]
scala> dfLocal.printSchema()
root
|-- DEPARTMENT_ID: string (nullable = true)
|-- DEPARTMENT_NAME: string (nullable = true)
|-- GROUP_NAME: string (nullable = true)
scala> import org.apache.spark.sql.functions.typedLit
import org.apache.spark.sql.functions.typedLit
scala> val successDataSet2 = dfLocal.withColumn("newcolumnadded", lit("204"))
successDataSet2: org.apache.spark.sql.DataFrame = [DEPARTMENT_ID: string, DEPARTMENT_NAME: string ... 2 more fields]
scala> successDataSet2.show(2)
+--------------+----------------+-----------+--------------+
| DEPARTMENT_ID| DEPARTMENT_NAME| GROUP_NAME|newcolumnadded|
+--------------+----------------+-----------+--------------+
|DEPARTMENT_ID1|DEPARTMENT_NAME1|GROUP_NAME1| 204|
| 1| PRESCHOOL| asdasf| 204|
+--------------+----------------+-----------+--------------+
only showing top 2 rows
scala> successDataSet2.printSchema()
root
|-- DEPARTMENT_ID: string (nullable = true)
|-- DEPARTMENT_NAME: string (nullable = true)
|-- GROUP_NAME: string (nullable = true)
|-- newcolumnadded: string (nullable = false)
scala>
No comments:
Post a Comment