Powered By Blogger

Monday, October 24, 2022

Parsing ISO 8601 format dates in spark

 

import org.apache.spark.sql.functions._

import org.apache.spark.sql.Column;

import org.apache.spark.sql.Dataset;

import org.apache.spark.sql.Row;

import org.apache.spark.sql.SparkSession;

import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan;

import org.apache.spark.sql.types.DataType;

import org.apache.spark.sql.types.DataTypes;


    val dfDate = Seq(("2021-01-25T13:33:44.343Z"),

    ("2019-02-05T14:06:31.556+0100")).toDF("input_timestamp")

  dfDate.withColumn("datetype_timestamp",to_timestamp(col("input_timestamp"),"yyyy-MM-dd'T'HH:mm:ss.SSSZ")).show(false)


  

+----------------------------+-------------------+

|input_timestamp             |datetype_timestamp |

+----------------------------+-------------------+

|2021-01-25T13:33:44.343Z    |null               |

|2019-02-05T14:06:31.556+0100|2019-02-05 18:36:31|

+----------------------------+-------------------+



1 comment:

  1. https://stackoverflow.com/questions/54601917/spark-2-4-0-unable-to-parse-iso8601-string-into-timestamptype-preserving-ms it has good details on parsing

    ReplyDelete