import configparser import pandas as pd from sqlalchemy import create_engine, text from urllib.parse import quote_plus import logging # 配置日志 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') logger = logging.getLogger(__name__) # 读取配置文件 config = configparser.ConfigParser() config.read('config.ini') # 获取数据库连接配置 source_config = { 'host': config.get('source_db', 'host'), 'user': config.get('source_db', 'user'), 'password': config.get('source_db', 'password'), 'database': config.get('source_db', 'database'), 'port': config.getint('source_db', 'port') } target_config = { 'host': config.get('target_db', 'host'), 'user': config.get('target_db', 'user'), 'password': config.get('target_db', 'password'), 'database': config.get('target_db', 'database'), 'port': config.getint('target_db', 'port') } # 创建数据库引擎 source_engine = create_engine( f"mysql+pymysql://{source_config['user']}:{quote_plus(source_config['password'])}@{source_config['host']}:{source_config['port']}/{source_config['database']}" ) target_engine = create_engine( f"mysql+pymysql://{target_config['user']}:{quote_plus(target_config['password'])}@{target_config['host']}:{target_config['port']}/{target_config['database']}" ) class ClzLeaseApplyInfoUpdater: """clz_lease_apply_info表数据对比和更新器""" def __init__(self): self.source_engine = source_engine self.target_engine = target_engine def query_target_data(self): """ 从目标数据库查询clz_lease_apply_info数据 """ try: sql = """ SELECT cli.id, cli.`code`, cli.project_id, bu.unit_id, bu.unit_name, bp.pro_id, bp.pro_name, bp.external_id FROM clz_lease_apply_info cli LEFT JOIN bm_unit bu on cli.team_id = bu.unit_id LEFT JOIN bm_project bp on cli.project_id = bp.pro_id WHERE bp.external_id is not null """ logger.info("执行目标数据库查询...") df = pd.read_sql(sql, self.target_engine) logger.info(f"目标数据库查询到 {len(df)} 条记录") return df except Exception as e: logger.error(f"查询目标数据库时发生错误: {str(e)}") return pd.DataFrame() def query_source_data(self): """ 从源数据库查询clz_lease_apply_info数据 """ try: sql = """ SELECT cli.id, cli.`code`, cli.project_id, bu.unit_id, bu.unit_name, bp.pro_id, bp.pro_name, bp.external_id FROM clz_lease_apply_info cli LEFT JOIN bm_unit bu on cli.team_id = bu.unit_id LEFT JOIN bm_project bp on cli.project_id = bp.pro_id WHERE bp.external_id is not null """ logger.info("执行源数据库查询...") df = pd.read_sql(sql, self.source_engine) logger.info(f"源数据库查询到 {len(df)} 条记录") return df except Exception as e: logger.error(f"查询源数据库时发生错误: {str(e)}") return pd.DataFrame() def compare_and_update_data(self, target_df, source_df): """ 对比数据并执行更新 Args: target_df: 目标数据库数据 source_df: 源数据库数据 Returns: int: 更新的记录数 """ try: update_count = 0 # 创建对比条件:external_id相同 logger.info("开始数据对比...") # 使用pandas merge进行数据对比 merged_df = pd.merge( target_df, source_df, on=['external_id'], how='inner', suffixes=('_target', '_source') ) logger.info(f"找到 {len(merged_df)} 条匹配记录") if merged_df.empty: logger.warning("没有找到匹配的数据") return 0 # 执行更新操作 with self.source_engine.connect() as conn: for _, row in merged_df.iterrows(): # 检查目标表的project_id字段是否有值 target_project_id = row.get('project_id_target') source_id = row.get('id_source') if pd.notna(target_project_id) and pd.notna(source_id): # 构建UPDATE语句 - 更新cli.project_id字段 sql = f""" UPDATE clz_lease_apply_info SET project_id = {target_project_id} WHERE id = {source_id} """ logger.debug(f"执行更新SQL: {sql}") result = conn.execute(text(sql)) update_count += result.rowcount conn.commit() logger.info(f"成功更新 {update_count} 条记录") return update_count except Exception as e: logger.error(f"数据对比和更新时发生错误: {str(e)}") return 0 def execute_update(self): """ 执行完整的数据对比和更新流程 """ try: logger.info("=== 开始clz_lease_apply_info表数据对比和更新 ===") # 1. 查询目标数据库数据 target_df = self.query_target_data() if target_df.empty: logger.error("目标数据库查询结果为空") return False # 2. 查询源数据库数据 source_df = self.query_source_data() if source_df.empty: logger.error("源数据库查询结果为空") return False # 3. 对比数据并执行更新 update_count = self.compare_and_update_data(target_df, source_df) if update_count > 0: logger.info(f"✅ 数据对比和更新完成,共更新 {update_count} 条记录") return True else: logger.warning("⚠️ 没有找到需要更新的数据") return True except Exception as e: logger.error(f"执行更新流程时发生错误: {str(e)}") return False def preview_changes(self): """ 预览将要进行的更改(不实际执行更新) """ try: logger.info("=== 预览数据对比结果 ===") # 查询数据 target_df = self.query_target_data() source_df = self.query_source_data() if target_df.empty or source_df.empty: logger.warning("查询结果为空,无法预览") return # 对比数据 merged_df = pd.merge( target_df, source_df, on=['external_id'], how='inner', suffixes=('_target', '_source') ) logger.info(f"找到 {len(merged_df)} 条匹配记录") if not merged_df.empty: logger.info("预览将要更新的记录:") for idx, row in merged_df.head(10).iterrows(): # 只显示前10条 logger.info(f"ID: {row['id_source']}, " f"code: {row.get('code_source', 'NULL')}, " f"external_id: {row['external_id']}, " f"原project_id: {row.get('project_id_source', 'NULL')}, " f"新project_id: {row.get('project_id_target', 'NULL')}") if len(merged_df) > 10: logger.info(f"... 还有 {len(merged_df) - 10} 条记录") except Exception as e: logger.error(f"预览时发生错误: {str(e)}") def main(): """主函数""" updater = ClzLeaseApplyInfoUpdater() # 询问用户是否要预览更改 preview = input("是否要预览将要进行的更改?(y/N): ") if preview.lower() == 'y': updater.preview_changes() # 询问用户是否要执行更新 confirm = input("是否要执行数据更新?(y/N): ") if confirm.lower() == 'y': success = updater.execute_update() if success: print("✅ 更新操作完成") else: print("❌ 更新操作失败") else: print("操作已取消") if __name__ == "__main__": main()