本文共 5546 字,大约阅读时间需要 18 分钟。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 | CREATE EXTERNAL TABLE if not exists finance.json_serde_optd_table ( retCode string, retMsg string, data array<struct< secid:string,= "" tradedate:date,= "" optid:string,= "" ticker:string,= "" secshortname:string,= "" exchangecd:string,= "" presettleprice: double ,= "" precloseprice: double ,= "" openprice: double ,= "" highestprice: double ,= "" lowestprice: double ,= "" closeprice: double ,= "" settlprice: double ,= "" turnovervol: double ,= "" turnovervalue: double ,= "" openint: int = "" >>) ROW FORMAT SERDE 'org.apache.hive.hcatalog.data.JsonSerDe' LOCATION 'hdfs://wdp.xxxxx.cn:8020/nifi/finance1/optd/' ; create table if not exists finance.tb_optd as SELECT b.data.secID, b.data.tradeDate, b.data.optID, b.data.ticker, b.data.secShortName, b.data.exchangeCD, b.data.preSettlePrice, b.data.preClosePrice, b.data.openPrice, b.data.highestPrice, b.data.lowestPrice, b.data.closePrice, b.data.settlPrice, b.data.turnoverVol, b.data.turnoverValue, b.data.openInt FROM finance.json_serde_optd_table LATERAL VIEW explode(json_serde_optd_table.data) b AS data; |
1 | |
1 2 | %dep z.load( "/usr/hdp/2.4.2.0-258/hive-hcatalog/share/hcatalog/hive-hcatalog-core.jar" ); |
1 2 3 4 | // 定义导入的hive对象集合 case class HiveConfig(database: String, modelName: String, hdfsPath: String, schema: String, schema_tb: String); var hiveConfigList = List[HiveConfig](); |
1 | |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 | // 创建equd数据结构 // 定义json结构 val schema_json_equd_serde = "" " retCode string, retMsg string, data array<struct< secid= "" := "" string,= "" tradedate= "" date,= "" ticker= "" secshortname= "" exchangecd= "" precloseprice= "" double ,= "" actprecloseprice:= "" openprice= "" highestprice= "" lowestprice= "" closeprice= "" turnovervol= "" turnovervalue= "" dealamount= "" int ,= "" turnoverrate= "" accumadjfactor= "" negmarketvalue= "" marketvalue= "" pe= "" pe1= "" pb= "" isopen= "" int = "" >> "" "; var schema_equd = "" "b.data.secID, b.data.ticker, b.data.secShortName, b.data.exchangeCD, b.data.tradeDate, b.data.preClosePrice, b.data.actPreClosePrice, b.data.openPrice, b.data.highestPrice, b.data.lowestPrice, b.data.closePrice, b.data.turnoverVol, b.data.turnoverValue, b.data.dealAmount, b.data.turnoverRate, b.data.accumAdjFactor, b.data.negMarketValue, b.data.marketValue, b.data.PE, b.data.PE1, b.data.PB, b.data.isOpen "" "; hiveConfigList = hiveConfigList :+ HiveConfig( "finance" , "equd" , "hdfs://wdp.xxxxx.cn:8020/nifi/finance1/" , schema_json_equd_serde, schema_equd); |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 | // 创建idxd数据结构 // 定义json结构 val schema_json_idxd_serde = "" " retCode string, retMsg string, data array<struct< indexid:string,= "" tradedate:date,= "" ticker:string,= "" porgfullname:string,= "" secshortname:string,= "" exchangecd:string,= "" precloseindex: double ,= "" openindex: double ,= "" lowestindex: double ,= "" highestindex: double ,= "" closeindex: double ,= "" turnovervol: double ,= "" turnovervalue: double ,= "" chg: double ,= "" chgpct: double = "" >> "" "; var schema_idxd = "" "b.data.indexID, b.data.tradeDate, b.data.ticker, b.data.porgFullName, b.data.secShortName, b.data.exchangeCD, b.data.preCloseIndex, b.data.openIndex, b.data.lowestIndex, b.data.highestIndex, b.data.closeIndex, b.data.turnoverVol, b.data.turnoverValue, b.data.CHG, b.data.CHGPct "" "; hiveConfigList = hiveConfigList :+ HiveConfig( "finance" , "idxd" , "hdfs://wdp.xxxxx.cn:8020/nifi/finance1/" , schema_json_idxd_serde, schema_idxd); |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 | // 循环加载数据中 def loadDataToHive(args:HiveConfig){ val loadPath = args.hdfsPath + args.modelName; val tb_json_serde = "json_serde_" + args.modelName + "_table" ; val tb= "tb_" + args.modelName; val hiveContext = new org.apache.spark.sql.hive.HiveContext(sc) if (args.database != "" && args.schema != "" ) { print( "正在创建项目..." + args.modelName) hiveContext.sql( "CREATE DATABASE IF NOT EXISTS " + args.database); print( "正在构造扩展模型..." ); hiveContext.sql( "CREATE TABLE IF NOT EXISTS " + args.database + "." + tb_json_serde + "(" + args.schema + ") row format serde 'org.apache.hive.hcatalog.data.JsonSerDe' LOCATION " + "'" + loadPath + "/'" ); println( "CREATE TABLE IF NOT EXISTS " + args.database + "." + tb + " as select " + args.schema_tb + " from " + args.database + "." + tb_json_serde + " LATERAL VIEW explode(" + tb_json_serde + ".data) b AS data" ); hiveContext.sql( "CREATE TABLE IF NOT EXISTS " + args.database + "." + tb + " as select " + args.schema_tb + " from " + args.database + "." + tb_json_serde + " LATERAL VIEW explode(" + tb_json_serde + ".data) b AS data" ); println(args.modelName + " 扩展模型加载已完成!" ); } } hiveConfigList.size; hiveConfigList.foreach { x => loadDataToHive(x) }; |
由于data是json数据里的一个数组,所以上面的转换复杂了一点。下面这种方法是先把json里data数组取出来放到hdfs,然后直接用下面的语句放到hive:
用splitjson 来提取、分隔 data 数组
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 | CREATE EXTERNAL TABLE if not exists finance.awen_optd ( secid string, tradedate date, optid string, ticker string, secshortname string, exchangecd string, presettleprice double , precloseprice double , openprice double , highestprice double , lowestprice double , closeprice double , settlprice double , turnovervol double , turnovervalue double , openint int ) ROW FORMAT SERDE 'org.apache.hive.hcatalog.data.JsonSerDe' LOCATION 'hdfs://wdp.xxxx.cn:8020/nifi/finance2/optd/' ; |
本文转自疯吻IT博客园博客,原文链接:http://www.cnblogs.com/fengwenit/p/5631455.html,如需转载请自行联系原作者