forked from p5e6vibhr/python_bigData
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
35 lines
985 B
35 lines
985 B
import happybase
|
|
from pyspark.sql import SparkSession, Row
|
|
import os
|
|
|
|
def HBaseDataLoading(column):
|
|
# 指定python环境
|
|
os.environ['PYSPARK_PYTHON'] = 'D:\Program Files\Python37\python.exe'
|
|
|
|
# 创建Spark会话
|
|
spark = SparkSession.builder.appName("HBaseDataLoading").master('local').getOrCreate()
|
|
|
|
# 连接到HBase
|
|
connection = happybase.Connection('192.168.142.144')
|
|
|
|
# 获取表
|
|
table = connection.table('mytest_table')
|
|
|
|
# 定义列名
|
|
columns = column
|
|
|
|
# 查询数据
|
|
data = []
|
|
for key, row in table.scan(columns=[col.encode('utf-8') for col in columns]):
|
|
row_data = {col.replace('info:', ''): row[col.encode('utf-8')].decode() for col in columns}
|
|
row_data['序号'] = key.decode()
|
|
data.append(row_data)
|
|
|
|
# 关闭连接
|
|
connection.close()
|
|
|
|
# 将数据转换为Spark DataFrame
|
|
spark_df = spark.createDataFrame([Row(**{k: str(v) for k, v in i.items()}) for i in data])
|
|
|
|
return spark_df
|