object DataFrameExample {
case class Person(ID: Int, name: String, age: Int, numFriends: Int)
def mapper(line: String): Person = {
val fields = line.split(",")
val person: Person = Person(fields(0).toInt, fields(1), fields(2).toInt, fields(3).toInt)
person
}
def main(args: Array[String]) {
Logger.getLogger("org").setLevel(Level.ERROR)
val spark = SparkSession.builder
.appName("Data frame test")
.master("local[*]")
//needed only in windows check it
// .config("spark.sql.warehouse.dir", "/Users/basan/Documents/Spark/tempwarehouse")
//.config("spark.driver.allowMultipleContexts", "true")
.getOrCreate()
// val sc = new SparkContext("local[*]", "ERROR count with bigFile")
var lines = spark.sparkContext.textFile("/Users/basan/workspace-spark/sparkDemo1/spark-data/friends-data.csv")
import spark.implicits._
val people = lines.map(mapper).toDS().cache()
println("Inferred schema")
people.printSchema()
println("select the name column")
people.select("name").show()
println("Filter out anyone over 21")
people.filter(people("age") < 21).show()
println("group by age")
people.groupBy("age").count().show()
println("make everyone 10 years older")
people.select(people("name"), (people("age") + 10)).show()
people.select(people("name"), ((people("age") + 10)).alias("newcolumn")).show()
spark.stop()
}
}
case class Person(ID: Int, name: String, age: Int, numFriends: Int)
def mapper(line: String): Person = {
val fields = line.split(",")
val person: Person = Person(fields(0).toInt, fields(1), fields(2).toInt, fields(3).toInt)
person
}
def main(args: Array[String]) {
Logger.getLogger("org").setLevel(Level.ERROR)
val spark = SparkSession.builder
.appName("Data frame test")
.master("local[*]")
//needed only in windows check it
// .config("spark.sql.warehouse.dir", "/Users/basan/Documents/Spark/tempwarehouse")
//.config("spark.driver.allowMultipleContexts", "true")
.getOrCreate()
// val sc = new SparkContext("local[*]", "ERROR count with bigFile")
var lines = spark.sparkContext.textFile("/Users/basan/workspace-spark/sparkDemo1/spark-data/friends-data.csv")
import spark.implicits._
val people = lines.map(mapper).toDS().cache()
println("Inferred schema")
people.printSchema()
println("select the name column")
people.select("name").show()
println("Filter out anyone over 21")
people.filter(people("age") < 21).show()
println("group by age")
people.groupBy("age").count().show()
println("make everyone 10 years older")
people.select(people("name"), (people("age") + 10)).show()
people.select(people("name"), ((people("age") + 10)).alias("newcolumn")).show()
spark.stop()
}
}
No comments:
Post a Comment