通过sql语句,以’t’为划分符号进行划分
代码如下:
import happybase import numpy as np import pandas as pd from pyhive import hive conn = hive.connect(host='192.168.43.148', port=10000, auth='NOSASL', username='沙粒',database='movie') cursor = conn.cursor() d_sql='drop table word' c_sqr="create table if not exists movie.word(id int,name string,age int,tel string) ROW FORMAT DELIMITED FIELDS TERMINATER BY 't' STORED AS TEXTFILE" connection = happybase.Connection('192.168.43.148',9090) table = connection.table('final_movie2') k=[] d=[] word_count=[] for key, data in table.scan(): temp=key.decode('utf-8') t=temp.split(",") if " "in t[1]: continue k.append(t) temp2=list(data.values()) t2=int(temp2[0].decode('utf-8')) d.append(t2) word_count.append([t[0],t[1],t2]) # f=open("word.txt",encoding="utf-8",mode="w") # for w in word_count: # for m in w: # f.write(str(m)+'t') # f.write("n") # f.close() #print(word_count) s_sql='load data local inpath "word.txt" into table final_word' #cursor.execute(d_sql) cursor.execute(s_sql) cursor.execute('select * from final_word') result = cursor.fetchall() print(result)
欢迎分享,转载请注明来源:内存溢出
评论列表(0条)