import scala.util.{Failure, Success, Try}
import org.apache.spark.sql.Row
val rdd= spark.sparkContext.parallelize(Seq(("Java", 20000), ("Python", 100000), ("Scala", 3000)))
val goodBadRecords = rdd.map(line =>
Try{
print(line)
} match {
case Success(map) => Right(map)
case Failure(e) => Left(e)
}
)
val records = goodBadRecords.filter(_.isRight)
val errors = goodBadRecords.filter(_.isLeft)
records.count()
errors.count()
import org.apache.spark.sql.Row
val rdd= spark.sparkContext.parallelize(Seq(("Java", 20000), ("Python", 100000), ("Scala", 3000)))
val goodBadRecords = rdd.map(line =>
Try{
print(line)
} match {
case Success(map) => Right(map)
case Failure(e) => Left(e)
}
)
val records = goodBadRecords.filter(_.isRight)
val errors = goodBadRecords.filter(_.isLeft)
records.count()
errors.count()
No comments:
Post a Comment