You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

35 lines
985 B

11 months ago
import happybase
from pyspark.sql import SparkSession, Row
import os
def HBaseDataLoading(column):
# 指定python环境
os.environ['PYSPARK_PYTHON'] = 'D:\Program Files\Python37\python.exe'
# 创建Spark会话
spark = SparkSession.builder.appName("HBaseDataLoading").master('local').getOrCreate()
# 连接到HBase
connection = happybase.Connection('192.168.142.144')
# 获取表
table = connection.table('mytest_table')
# 定义列名
columns = column
# 查询数据
data = []
for key, row in table.scan(columns=[col.encode('utf-8') for col in columns]):
row_data = {col.replace('info:', ''): row[col.encode('utf-8')].decode() for col in columns}
row_data['序号'] = key.decode()
data.append(row_data)
# 关闭连接
connection.close()
# 将数据转换为Spark DataFrame
spark_df = spark.createDataFrame([Row(**{k: str(v) for k, v in i.items()}) for i in data])
return spark_df