268 lines
9.2 KiB
Python
268 lines
9.2 KiB
Python
import configparser
|
||
import pandas as pd
|
||
from sqlalchemy import create_engine, text
|
||
from urllib.parse import quote_plus
|
||
import logging
|
||
|
||
# 配置日志
|
||
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
||
logger = logging.getLogger(__name__)
|
||
|
||
# 读取配置文件
|
||
config = configparser.ConfigParser()
|
||
config.read('config.ini')
|
||
|
||
# 获取数据库连接配置
|
||
source_config = {
|
||
'host': config.get('source_db', 'host'),
|
||
'user': config.get('source_db', 'user'),
|
||
'password': config.get('source_db', 'password'),
|
||
'database': config.get('source_db', 'database'),
|
||
'port': config.getint('source_db', 'port')
|
||
}
|
||
|
||
target_config = {
|
||
'host': config.get('target_db', 'host'),
|
||
'user': config.get('target_db', 'user'),
|
||
'password': config.get('target_db', 'password'),
|
||
'database': config.get('target_db', 'database'),
|
||
'port': config.getint('target_db', 'port')
|
||
}
|
||
|
||
# 创建数据库引擎
|
||
source_engine = create_engine(
|
||
f"mysql+pymysql://{source_config['user']}:{quote_plus(source_config['password'])}@{source_config['host']}:{source_config['port']}/{source_config['database']}"
|
||
)
|
||
target_engine = create_engine(
|
||
f"mysql+pymysql://{target_config['user']}:{quote_plus(target_config['password'])}@{target_config['host']}:{target_config['port']}/{target_config['database']}"
|
||
)
|
||
|
||
|
||
class ClzBmAgreementInfoUpdater:
|
||
"""clz_bm_agreement_info表数据对比和更新器"""
|
||
|
||
def __init__(self):
|
||
self.source_engine = source_engine
|
||
self.target_engine = target_engine
|
||
|
||
def query_target_data(self):
|
||
"""
|
||
从目标数据库查询clz_bm_agreement_info数据
|
||
"""
|
||
try:
|
||
sql = """
|
||
SELECT
|
||
ca.agreement_id,
|
||
ca.agreement_code,
|
||
ca.project_id,
|
||
bu.unit_id,
|
||
bu.unit_name,
|
||
bp.pro_id,
|
||
bp.pro_name,
|
||
bp.external_id
|
||
FROM
|
||
clz_bm_agreement_info ca
|
||
LEFT JOIN bm_unit bu ON ca.unit_id = bu.unit_id
|
||
LEFT JOIN bm_project bp ON ca.project_id = bp.pro_id
|
||
WHERE
|
||
bp.external_id is not null
|
||
"""
|
||
|
||
logger.info("执行目标数据库查询...")
|
||
df = pd.read_sql(sql, self.target_engine)
|
||
logger.info(f"目标数据库查询到 {len(df)} 条记录")
|
||
return df
|
||
|
||
except Exception as e:
|
||
logger.error(f"查询目标数据库时发生错误: {str(e)}")
|
||
return pd.DataFrame()
|
||
|
||
def query_source_data(self):
|
||
"""
|
||
从源数据库查询clz_bm_agreement_info数据
|
||
"""
|
||
try:
|
||
sql = """
|
||
SELECT
|
||
ca.agreement_id,
|
||
ca.agreement_code,
|
||
ca.project_id,
|
||
bu.unit_id,
|
||
bu.unit_name,
|
||
bp.pro_id,
|
||
bp.pro_name,
|
||
bp.external_id
|
||
FROM
|
||
clz_bm_agreement_info ca
|
||
LEFT JOIN bm_unit bu ON ca.unit_id = bu.unit_id
|
||
LEFT JOIN bm_project bp ON ca.project_id = bp.pro_id
|
||
WHERE
|
||
bp.external_id is not null
|
||
"""
|
||
|
||
logger.info("执行源数据库查询...")
|
||
df = pd.read_sql(sql, self.source_engine)
|
||
logger.info(f"源数据库查询到 {len(df)} 条记录")
|
||
return df
|
||
|
||
except Exception as e:
|
||
logger.error(f"查询源数据库时发生错误: {str(e)}")
|
||
return pd.DataFrame()
|
||
|
||
def compare_and_update_data(self, target_df, source_df):
|
||
"""
|
||
对比数据并执行更新
|
||
|
||
Args:
|
||
target_df: 目标数据库数据
|
||
source_df: 源数据库数据
|
||
|
||
Returns:
|
||
int: 更新的记录数
|
||
"""
|
||
try:
|
||
update_count = 0
|
||
|
||
# 创建对比条件:external_id相同
|
||
logger.info("开始数据对比...")
|
||
|
||
# 使用pandas merge进行数据对比
|
||
merged_df = pd.merge(
|
||
target_df,
|
||
source_df,
|
||
on=['external_id'],
|
||
how='inner',
|
||
suffixes=('_target', '_source')
|
||
)
|
||
|
||
logger.info(f"找到 {len(merged_df)} 条匹配记录")
|
||
|
||
if merged_df.empty:
|
||
logger.warning("没有找到匹配的数据")
|
||
return 0
|
||
|
||
# 执行更新操作
|
||
with self.source_engine.connect() as conn:
|
||
for _, row in merged_df.iterrows():
|
||
# 检查目标表的project_id字段是否有值
|
||
target_project_id = row.get('project_id_target')
|
||
source_agreement_id = row.get('agreement_id_source')
|
||
|
||
if pd.notna(target_project_id) and pd.notna(source_agreement_id):
|
||
# 构建UPDATE语句 - 更新ca.project_id字段
|
||
sql = f"""
|
||
UPDATE clz_bm_agreement_info
|
||
SET project_id = {target_project_id}
|
||
WHERE agreement_id = {source_agreement_id}
|
||
"""
|
||
|
||
logger.debug(f"执行更新SQL: {sql}")
|
||
result = conn.execute(text(sql))
|
||
update_count += result.rowcount
|
||
|
||
conn.commit()
|
||
|
||
logger.info(f"成功更新 {update_count} 条记录")
|
||
return update_count
|
||
|
||
except Exception as e:
|
||
logger.error(f"数据对比和更新时发生错误: {str(e)}")
|
||
return 0
|
||
|
||
def execute_update(self):
|
||
"""
|
||
执行完整的数据对比和更新流程
|
||
"""
|
||
try:
|
||
logger.info("=== 开始clz_bm_agreement_info表数据对比和更新 ===")
|
||
|
||
# 1. 查询目标数据库数据
|
||
target_df = self.query_target_data()
|
||
if target_df.empty:
|
||
logger.error("目标数据库查询结果为空")
|
||
return False
|
||
|
||
# 2. 查询源数据库数据
|
||
source_df = self.query_source_data()
|
||
if source_df.empty:
|
||
logger.error("源数据库查询结果为空")
|
||
return False
|
||
|
||
# 3. 对比数据并执行更新
|
||
update_count = self.compare_and_update_data(target_df, source_df)
|
||
|
||
if update_count > 0:
|
||
logger.info(f"✅ 数据对比和更新完成,共更新 {update_count} 条记录")
|
||
return True
|
||
else:
|
||
logger.warning("⚠️ 没有找到需要更新的数据")
|
||
return True
|
||
|
||
except Exception as e:
|
||
logger.error(f"执行更新流程时发生错误: {str(e)}")
|
||
return False
|
||
|
||
def preview_changes(self):
|
||
"""
|
||
预览将要进行的更改(不实际执行更新)
|
||
"""
|
||
try:
|
||
logger.info("=== 预览数据对比结果 ===")
|
||
|
||
# 查询数据
|
||
target_df = self.query_target_data()
|
||
source_df = self.query_source_data()
|
||
|
||
if target_df.empty or source_df.empty:
|
||
logger.warning("查询结果为空,无法预览")
|
||
return
|
||
|
||
# 对比数据
|
||
merged_df = pd.merge(
|
||
target_df,
|
||
source_df,
|
||
on=['external_id'],
|
||
how='inner',
|
||
suffixes=('_target', '_source')
|
||
)
|
||
|
||
logger.info(f"找到 {len(merged_df)} 条匹配记录")
|
||
|
||
if not merged_df.empty:
|
||
logger.info("预览将要更新的记录:")
|
||
for idx, row in merged_df.head(10).iterrows(): # 只显示前10条
|
||
logger.info(f"agreement_id: {row['agreement_id_source']}, "
|
||
f"external_id: {row['external_id']}, "
|
||
f"原project_id: {row.get('project_id_source', 'NULL')}, "
|
||
f"新project_id: {row.get('project_id_target', 'NULL')}")
|
||
|
||
if len(merged_df) > 10:
|
||
logger.info(f"... 还有 {len(merged_df) - 10} 条记录")
|
||
|
||
except Exception as e:
|
||
logger.error(f"预览时发生错误: {str(e)}")
|
||
|
||
|
||
def main():
|
||
"""主函数"""
|
||
updater = ClzBmAgreementInfoUpdater()
|
||
|
||
# 询问用户是否要预览更改
|
||
preview = input("是否要预览将要进行的更改?(y/N): ")
|
||
if preview.lower() == 'y':
|
||
updater.preview_changes()
|
||
|
||
# 询问用户是否要执行更新
|
||
confirm = input("是否要执行数据更新?(y/N): ")
|
||
if confirm.lower() == 'y':
|
||
success = updater.execute_update()
|
||
if success:
|
||
print("✅ 更新操作完成")
|
||
else:
|
||
print("❌ 更新操作失败")
|
||
else:
|
||
print("操作已取消")
|
||
|
||
|
||
if __name__ == "__main__":
|
||
main() |