Powered By Blogger

Wednesday, October 26, 2022

ISO 8601 format to Date

 import org.apache.spark.sql.types._

import org.apache.spark.sql.functions._

import org.apache.spark.sql.Column;

import org.apache.spark.sql.Dataset;

import org.apache.spark.sql.Row;

import org.apache.spark.sql.SparkSession;

import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan;

import org.apache.spark.sql.types.DataType;

import org.apache.spark.sql.types.DataTypes;


    val dfDate = Seq(("2021-01-25T13:33:44.343Z"),

    ("2019-02-05T14:06:31.556+0100"),

    ("2021-01-25T13:33:44.343+1:00")).toDF("input_timestamp")


    dfDate.show(false)


println("====   apply the to_timestamp method, loosing sub seconds=====")


  val resultdf = dfDate.withColumn("datetype_timestamp",to_timestamp(col("input_timestamp"),"yyyy-MM-dd'T'HH:mm:ss.SSSZ"))

  resultdf.printSchema

  resultdf.show(false)


println("==== explicit cast to timestamp... loosing sub seconds=====")

   val resultdf = dfDate.withColumn("datetype_timestamp",to_timestamp(col("input_timestamp"),"yyyy-MM-dd'T'HH:mm:ss.SSSZ").cast(TimestampType))

  resultdf.printSchema

  resultdf.show(false)



println("====  cast to timestamp... retains sub seconds  =====")


  val resultdf = dfDate.withColumn("datetype_timestamp",col("input_timestamp").cast(TimestampType))

   resultdf.printSchema

  resultdf.show(false)



println("====  cast to date... retains sub seconds  =====")


  val resultdf = dfDate.withColumn("datetype_timestamp",to_date(col("input_timestamp"),"yyyy-MM-dd'T'HH:mm:ss.SSSZ"))

   resultdf.printSchema

  resultdf.show(false)



println("====  parse as String  =====")


  val resultdf = dfDate.withColumn("datetype_timestamp1",date_format(col("input_timestamp"),"yyyy-MM-dd'T'HH:mm:ss.SSSZ"))

 resultdf.printSchema

  resultdf.show(false)



println("====  parsed  String cast it  =====")


  val resultdf3 = resultdf.withColumn("datetype_timestamp2",col("datetype_timestamp1").cast(TimestampType))

 resultdf3.printSchema

  resultdf3.show(false)

Monday, October 24, 2022

Parsing ISO 8601 format dates in spark

 

import org.apache.spark.sql.functions._

import org.apache.spark.sql.Column;

import org.apache.spark.sql.Dataset;

import org.apache.spark.sql.Row;

import org.apache.spark.sql.SparkSession;

import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan;

import org.apache.spark.sql.types.DataType;

import org.apache.spark.sql.types.DataTypes;


    val dfDate = Seq(("2021-01-25T13:33:44.343Z"),

    ("2019-02-05T14:06:31.556+0100")).toDF("input_timestamp")

  dfDate.withColumn("datetype_timestamp",to_timestamp(col("input_timestamp"),"yyyy-MM-dd'T'HH:mm:ss.SSSZ")).show(false)


  

+----------------------------+-------------------+

|input_timestamp             |datetype_timestamp |

+----------------------------+-------------------+

|2021-01-25T13:33:44.343Z    |null               |

|2019-02-05T14:06:31.556+0100|2019-02-05 18:36:31|

+----------------------------+-------------------+