An example that shows how to access Hive table from SparkSQL (using HiveContext)
sparkexamples > c20 > a40 > a20
sparkexamples > c20 > a40 > a20
hive> select * from hivedemodb. subjects; OK math 80 science 75 history 78 science 80 Time taken: 0.368 seconds, Fetched: 4 row(s)
scala> // Create Hive Context
scala> val hc = new org.apache.spark.sql.hive.HiveContext(sc)
hc: org.apache.spark.sql.hive.HiveContext = org.apache.spark.sql.hive.HiveContext@3f193141
scala> // Query a Hive table
scala> val resultDf = hc.sql("select * from hivedemodb. subjects where subject='science'")
resultDf: org.apache.spark.sql.DataFrame = [subject: string, mark: string]
scala> resultDf.collect()
res4: Array[org.apache.spark.sql.Row] = Array([science,75], [science,80])
scala> // Change the subject name
scala> val modifiedDf = resultDf.map(x => ("science-n-environment", x(1)))
modifiedDf: org.apache.spark.rdd.RDD[(String, Any)] = MapPartitionsRDD[21] at map at <console>:31
scala> modifiedDf.collect()
res5: Array[(String, Any)] = Array((science-n-environment,75), (science-n-environment,80))
No comments:
Post a Comment