An example that shows how to access Hive table from SparkSQL (using HiveContext)
sparkexamples > c20 > a40 > a20
sparkexamples > c20 > a40 > a20
hive> select * from hivedemodb. subjects; OK math 80 science 75 history 78 science 80 Time taken: 0.368 seconds, Fetched: 4 row(s)
scala> // Create Hive Context scala> val hc = new org.apache.spark.sql.hive.HiveContext(sc) hc: org.apache.spark.sql.hive.HiveContext = org.apache.spark.sql.hive.HiveContext@3f193141 scala> // Query a Hive table scala> val resultDf = hc.sql("select * from hivedemodb. subjects where subject='science'") resultDf: org.apache.spark.sql.DataFrame = [subject: string, mark: string] scala> resultDf.collect() res4: Array[org.apache.spark.sql.Row] = Array([science,75], [science,80]) scala> // Change the subject name scala> val modifiedDf = resultDf.map(x => ("science-n-environment", x(1))) modifiedDf: org.apache.spark.rdd.RDD[(String, Any)] = MapPartitionsRDD[21] at map at <console>:31 scala> modifiedDf.collect() res5: Array[(String, Any)] = Array((science-n-environment,75), (science-n-environment,80))
No comments:
Post a Comment