import org.apache.spark.sql.types._
import org.apache.spark.sql.functions._
import org.apache.spark.sql.Column;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SparkSession;
import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan;
import org.apache.spark.sql.types.DataType;
import org.apache.spark.sql.types.DataTypes;
val dfDate = Seq(("2021-01-25T13:33:44.343Z"),
("2019-02-05T14:06:31.556+0100"),
("2021-01-25T13:33:44.343+1:00")).toDF("input_timestamp")
dfDate.show(false)
println("==== apply the to_timestamp method, loosing sub seconds=====")
val resultdf = dfDate.withColumn("datetype_timestamp",to_timestamp(col("input_timestamp"),"yyyy-MM-dd'T'HH:mm:ss.SSSZ"))
resultdf.printSchema
resultdf.show(false)
println("==== explicit cast to timestamp... loosing sub seconds=====")
val resultdf = dfDate.withColumn("datetype_timestamp",to_timestamp(col("input_timestamp"),"yyyy-MM-dd'T'HH:mm:ss.SSSZ").cast(TimestampType))
resultdf.printSchema
resultdf.show(false)
println("==== cast to timestamp... retains sub seconds =====")
val resultdf = dfDate.withColumn("datetype_timestamp",col("input_timestamp").cast(TimestampType))
resultdf.printSchema
resultdf.show(false)
println("==== cast to date... retains sub seconds =====")
val resultdf = dfDate.withColumn("datetype_timestamp",to_date(col("input_timestamp"),"yyyy-MM-dd'T'HH:mm:ss.SSSZ"))
resultdf.printSchema
resultdf.show(false)
println("==== parse as String =====")
val resultdf = dfDate.withColumn("datetype_timestamp1",date_format(col("input_timestamp"),"yyyy-MM-dd'T'HH:mm:ss.SSSZ"))
resultdf.printSchema
resultdf.show(false)
println("==== parsed String cast it =====")
val resultdf3 = resultdf.withColumn("datetype_timestamp2",col("datetype_timestamp1").cast(TimestampType))
resultdf3.printSchema
resultdf3.show(false)
No comments:
Post a Comment