scala> import org.apache.spark.sql.functions.to_date
import org.apache.spark.sql.functions.to_date
scala> val dateFormat = "yyyy-dd-MM"
dateFormat: String = yyyy-dd-MM
scala> val cleanDateDF = spark.range(1).select(
| to_date(lit("2017-12-11"), dateFormat).alias("date"),
| to_date(lit("2017-20-12"), dateFormat).alias("date2"))
cleanDateDF: org.apache.spark.sql.DataFrame = [date: date, date2: date]
scala> cleanDateDF.show(2)
+----------+----------+
| date| date2|
+----------+----------+
|2017-11-12|2017-12-20|
+----------+----------+
Spark the Definitive Guide is having good details about manipulating Hadoop
No comments:
Post a Comment