Powered By Blogger

Monday, May 3, 2021

Write the json file to hdfs with spark

 

Pass the valid hdfs path. It will create the single file in overwrite mode

@throws[WriterException]
def writeFileToHDFS(fileLocation: String, fileBody : String)(sparkSession: SparkSession) = {
logInfo(s"Write the contents to the file location $fileLocation")
var fsDataOutputStream : FSDataOutputStream = null
try {
val hadoopConf: Configuration = sparkSession.sparkContext.hadoopConfiguration
val hdfs: FileSystem = org.apache.hadoop.fs.FileSystem.get(hadoopConf)
val exportToFile = new Path(fileLocation)

fsDataOutputStream = hdfs.create(exportToFile)
fsDataOutputStream.write(fileBody.getBytes)
fsDataOutputStream.hflush()
}catch {
case ex : Exception => logError(s"Error in writing to the file $fileLocation", ex)
throw WriterException(s"Error in writing to the file $fileLocation")
}finally {
logInfo("Closing the writer object")
if(fsDataOutputStream != null){
fsDataOutputStream.close()
}
}
}



If we write the dataframe it will create the folder and within that it will write the file

def writeFileToHDFSUsingDataFrame(fileLocation: String, fileBody : String)(sparkSession: SparkSession): Unit ={
import sparkSession.implicits._
Seq(fileBody)
.toDF().coalesce(1)
.write
.mode("overwrite").text(fileLocation)
}

No comments:

Post a Comment