121 lines
4.5 KiB
Python
121 lines
4.5 KiB
Python
import pandas as pd
|
||
import sqlalchemy
|
||
from sqlalchemy import create_engine, text
|
||
import configparser
|
||
import os
|
||
from datetime import datetime
|
||
from urllib.parse import quote_plus
|
||
|
||
def get_db_connection_string(config, section):
|
||
"""从配置文件中构建数据库连接字符串"""
|
||
return f"mysql+pymysql://{config[section]['user']}:{quote_plus(config[section]['password'])}@" \
|
||
f"{config[section]['host']}:{config[section]['port']}/{config[section]['database']}"
|
||
|
||
|
||
def transform_and_load_qrcode(config_file_path):
|
||
"""
|
||
从源数据库提取qr_code数据,转换后加载到目标数据库bm_qrcode_info
|
||
:param config_file_path: 配置文件路径
|
||
"""
|
||
# 读取配置文件
|
||
if not os.path.exists(config_file_path):
|
||
raise FileNotFoundError(f"配置文件不存在: {config_file_path}")
|
||
|
||
config = configparser.ConfigParser()
|
||
config.read(config_file_path)
|
||
|
||
try:
|
||
# 获取数据库连接
|
||
source_conn_str = get_db_connection_string(config, 'source_db')
|
||
target_conn_str = get_db_connection_string(config, 'target_db')
|
||
source_engine = create_engine(source_conn_str)
|
||
target_engine = create_engine(target_conn_str)
|
||
|
||
# 从源表读取数据(过滤COMPANY_ID=1的记录)
|
||
print("正在从源表qr_code读取数据...")
|
||
source_query = """
|
||
SELECT code, ma_model, vender, is_bind, gen_month, nullif(task_id, '') as task_id
|
||
FROM bm_qrcode
|
||
WHERE COMPANY_ID = 1 \
|
||
"""
|
||
source_df = pd.read_sql(source_query, source_engine)
|
||
|
||
if source_df.empty:
|
||
print("没有符合条件的数据需要转换")
|
||
return
|
||
print(f"读取到{len(source_df)}条待转换数据")
|
||
|
||
# 数据转换
|
||
print("正在进行数据转换...")
|
||
target_df = pd.DataFrame()
|
||
|
||
# 字段映射(源字段 → 目标字段)
|
||
field_mapping = {
|
||
'code': 'qr_code',
|
||
'ma_model': 'type_id',
|
||
'vender': 'supplier_id',
|
||
'is_bind': 'is_bind',
|
||
'gen_month': 'create_time',
|
||
'task_id': 'task_id'
|
||
}
|
||
|
||
# 复制字段
|
||
for source_field, target_field in field_mapping.items():
|
||
target_df[target_field] = source_df[source_field]
|
||
|
||
# 检查数据质量
|
||
print("\n数据质量检查:")
|
||
print(f"- 空qr_code记录: {target_df['qr_code'].isna().sum()}")
|
||
print(f"- 空type_id记录: {target_df['type_id'].isna().sum()}")
|
||
|
||
# 写入目标表(使用connection避免重复记录问题)
|
||
print("\n正在写入目标表bm_qrcode_info...")
|
||
with target_engine.begin() as conn:
|
||
# 先检查并删除可能重复的qr_code(根据业务需求决定是否保留)
|
||
existing_codes = pd.read_sql(
|
||
"SELECT qr_code FROM bm_qrcode_info",
|
||
conn
|
||
)['qr_code'].tolist()
|
||
|
||
new_records = target_df[~target_df['qr_code'].isin(existing_codes)]
|
||
dup_count = len(target_df) - len(new_records)
|
||
if dup_count > 0:
|
||
print(f"发现{dup_count}条重复qr_code记录,将自动跳过")
|
||
|
||
if not new_records.empty:
|
||
new_records.to_sql(
|
||
'bm_qrcode_info',
|
||
conn,
|
||
if_exists='append',
|
||
index=False,
|
||
dtype={
|
||
'qr_code': sqlalchemy.types.VARCHAR(length=100),
|
||
'type_id': sqlalchemy.types.INTEGER(),
|
||
'supplier_id': sqlalchemy.types.INTEGER(),
|
||
'is_bind': sqlalchemy.types.SmallInteger(),
|
||
'create_time': sqlalchemy.types.DateTime(),
|
||
'task_id': sqlalchemy.types.VARCHAR(length=50)
|
||
}
|
||
)
|
||
print(f"成功写入{len(new_records)}条新数据")
|
||
else:
|
||
print("没有新数据需要写入")
|
||
|
||
except Exception as e:
|
||
print(f"\n处理过程中发生错误: {str(e)}")
|
||
raise
|
||
finally:
|
||
if 'source_engine' in locals():
|
||
source_engine.dispose()
|
||
if 'target_engine' in locals():
|
||
target_engine.dispose()
|
||
|
||
|
||
if __name__ == "__main__":
|
||
# 配置文件路径
|
||
config_file = "config.ini"
|
||
|
||
try:
|
||
transform_and_load_qrcode(config_file)
|
||
except Exception as e:
|
||
print(f"程序执行失败: {str(e)}") |