150 lines
5.6 KiB
Python
150 lines
5.6 KiB
Python
|
|
import pandas as pd
|
|||
|
|
import sqlalchemy
|
|||
|
|
from sqlalchemy import create_engine
|
|||
|
|
import configparser
|
|||
|
|
import os
|
|||
|
|
from urllib.parse import quote_plus
|
|||
|
|
|
|||
|
|
def get_db_connection_string(config, section):
|
|||
|
|
"""
|
|||
|
|
从配置文件中构建数据库连接字符串
|
|||
|
|
:param config: 配置解析器对象
|
|||
|
|
:param section: 配置节名(source_db或target_db)
|
|||
|
|
:return: 数据库连接字符串
|
|||
|
|
"""
|
|||
|
|
db_type = 'mysql' # 假设使用MySQL数据库
|
|||
|
|
driver = 'pymysql' # MySQL驱动
|
|||
|
|
|
|||
|
|
return f"{db_type}+{driver}://{config[section]['user']}:{quote_plus(config[section]['password'])}@" \
|
|||
|
|
f"{config[section]['host']}:{config[section]['port']}/{config[section]['database']}"
|
|||
|
|
|
|||
|
|
|
|||
|
|
def transform_and_load_bm_project(config_file_path):
|
|||
|
|
"""
|
|||
|
|
从源数据库提取bm_project数据,转换后加载到目标数据库
|
|||
|
|
:param config_file_path: 配置文件路径
|
|||
|
|
"""
|
|||
|
|
# 读取配置文件
|
|||
|
|
if not os.path.exists(config_file_path):
|
|||
|
|
raise FileNotFoundError(f"配置文件不存在: {config_file_path}")
|
|||
|
|
|
|||
|
|
config = configparser.ConfigParser()
|
|||
|
|
config.read(config_file_path)
|
|||
|
|
|
|||
|
|
# 定义替换映射
|
|||
|
|
imp_unit_mapping = {
|
|||
|
|
1: 327, # 送电一分公司 → 327
|
|||
|
|
2: 102, # 送电二分公司 → 102
|
|||
|
|
3: 309, # 宏源变电工程处 → 309
|
|||
|
|
5: 338, # 土建分公司 → 338
|
|||
|
|
8: 309, # 宏源送电工程处 → 309
|
|||
|
|
9: 100, # 变电分公司 → 100
|
|||
|
|
10: 101, # 机具(物流)分公司 → 101
|
|||
|
|
11: 345, # 外部往来单位 → 345
|
|||
|
|
12: 344, # 机械化分公司 → 344
|
|||
|
|
13: 346, # 运检分公司 → 346
|
|||
|
|
15: 340, # 安徽顺全电力工程有限公司 → 340
|
|||
|
|
16: 337, # 检修试验分公司 → 337
|
|||
|
|
17: 339, # 安徽顺安电网建设有限公司 → 339
|
|||
|
|
18: 342, # 公司机关 → 342
|
|||
|
|
21: 341 # 班组管理中心 → 341
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
pro_type_mapping = {
|
|||
|
|
1: 0, # 线路工程 → 0
|
|||
|
|
2: 1, # 变电工程 → 1
|
|||
|
|
3: 2, # 业务工程 → 2
|
|||
|
|
4: 3 # 其他工程 → 3
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
try:
|
|||
|
|
# 获取数据库连接字符串
|
|||
|
|
source_conn_str = get_db_connection_string(config, 'source_db')
|
|||
|
|
target_conn_str = get_db_connection_string(config, 'target_db')
|
|||
|
|
|
|||
|
|
# 创建数据库引擎
|
|||
|
|
source_engine = create_engine(source_conn_str)
|
|||
|
|
target_engine = create_engine(target_conn_str)
|
|||
|
|
|
|||
|
|
# 从源数据库读取数据
|
|||
|
|
print("正在从源数据库读取bm_project表数据...")
|
|||
|
|
source_query = """
|
|||
|
|
SELECT ID, NAME, NUM, PRO_ID, HTZT, time, COMPANY_ID, TYPE_ID, COMPANY, IS_ACTIVE
|
|||
|
|
FROM bm_project
|
|||
|
|
WHERE COMPANY = 1 AND IS_ACTIVE = 1 \
|
|||
|
|
"""
|
|||
|
|
source_df = pd.read_sql(source_query, source_engine)
|
|||
|
|
|
|||
|
|
if source_df.empty:
|
|||
|
|
print("没有符合条件的数据需要转换")
|
|||
|
|
return
|
|||
|
|
|
|||
|
|
print(f"读取到{len(source_df)}条待转换数据")
|
|||
|
|
|
|||
|
|
# 数据转换
|
|||
|
|
print("正在进行数据转换...")
|
|||
|
|
target_df = pd.DataFrame()
|
|||
|
|
target_df['pro_id'] = source_df['ID'] # 复制ID → pro_id
|
|||
|
|
target_df['pro_name'] = source_df['NAME'] # 复制NAME → pro_name
|
|||
|
|
target_df['pro_code'] = source_df['NUM'] # 复制NUM → pro_code
|
|||
|
|
target_df['external_id'] = source_df['PRO_ID'] # 复制PRO_ID → external_id
|
|||
|
|
target_df['contract_part'] = source_df['HTZT'] # 复制HTZT → contract_part
|
|||
|
|
target_df['create_time'] = source_df['time'] # 复制time → create_time
|
|||
|
|
|
|||
|
|
# 替换COMPANY_ID → imp_unit
|
|||
|
|
target_df['imp_unit'] = source_df['COMPANY_ID'].map(imp_unit_mapping)
|
|||
|
|
|
|||
|
|
# 替换TYPE_ID → pro_type_id
|
|||
|
|
target_df['pro_type_id'] = source_df['TYPE_ID'].map(pro_type_mapping)
|
|||
|
|
|
|||
|
|
# 检查是否有未映射的值
|
|||
|
|
if target_df['imp_unit'].isna().any():
|
|||
|
|
unmapped_units = source_df[target_df['imp_unit'].isna()]['COMPANY_ID'].unique()
|
|||
|
|
print(f"警告: 发现未映射的COMPANY_ID值: {unmapped_units}")
|
|||
|
|
|
|||
|
|
if target_df['pro_type_id'].isna().any():
|
|||
|
|
unmapped_types = source_df[target_df['pro_type_id'].isna()]['TYPE_ID'].unique()
|
|||
|
|
print(f"警告: 发现未映射的TYPE_ID值: {unmapped_types}")
|
|||
|
|
|
|||
|
|
# 写入目标数据库
|
|||
|
|
print("正在将数据写入目标数据库...")
|
|||
|
|
target_df.to_sql(
|
|||
|
|
'bm_project',
|
|||
|
|
target_engine,
|
|||
|
|
if_exists='append',
|
|||
|
|
index=False,
|
|||
|
|
dtype={
|
|||
|
|
'pro_id': sqlalchemy.types.INTEGER(),
|
|||
|
|
'pro_name': sqlalchemy.types.VARCHAR(length=255),
|
|||
|
|
'pro_code': sqlalchemy.types.VARCHAR(length=50),
|
|||
|
|
'external_id': sqlalchemy.types.VARCHAR(length=50),
|
|||
|
|
'contract_part': sqlalchemy.types.VARCHAR(length=50),
|
|||
|
|
'create_time': sqlalchemy.types.DateTime(),
|
|||
|
|
'imp_unit': sqlalchemy.types.INTEGER(),
|
|||
|
|
'pro_type_id': sqlalchemy.types.INTEGER()
|
|||
|
|
}
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
print(f"成功写入{len(target_df)}条数据到目标数据库")
|
|||
|
|
|
|||
|
|
except Exception as e:
|
|||
|
|
print(f"处理过程中发生错误: {str(e)}")
|
|||
|
|
raise
|
|||
|
|
finally:
|
|||
|
|
# 关闭数据库连接
|
|||
|
|
if 'source_engine' in locals():
|
|||
|
|
source_engine.dispose()
|
|||
|
|
if 'target_engine' in locals():
|
|||
|
|
target_engine.dispose()
|
|||
|
|
|
|||
|
|
|
|||
|
|
# 使用示例
|
|||
|
|
if __name__ == "__main__":
|
|||
|
|
# 配置文件路径
|
|||
|
|
config_file = "config.ini" # 假设配置文件在当前目录下
|
|||
|
|
|
|||
|
|
try:
|
|||
|
|
# 执行转换
|
|||
|
|
transform_and_load_bm_project(config_file)
|
|||
|
|
except Exception as e:
|
|||
|
|
print(f"程序执行失败: {str(e)}")
|