import configparser import pandas as pd from sqlalchemy import create_engine from urllib.parse import quote_plus # @author: 阮世耀 # 描述:处理ba_ma_input_check表数据到repair_audit_details表的迁移 # 读取配置文件 config = configparser.ConfigParser() config.read('config.ini') # 获取数据库连接配置 source_config = { 'host': config.get('source_db', 'host'), 'user': config.get('source_db', 'user'), 'password': config.get('source_db', 'password'), 'database': config.get('source_db', 'database'), 'port': config.getint('source_db', 'port') } target_config = { 'host': config.get('target_db', 'host'), 'user': config.get('target_db', 'user'), 'password': config.get('target_db', 'password'), 'database': config.get('target_db', 'database'), 'port': config.getint('target_db', 'port') } # 创建数据库引擎 source_engine = create_engine( f"mysql+pymysql://{source_config['user']}:{quote_plus(source_config['password'])}@{source_config['host']}:{source_config['port']}/{source_config['database']}" ) target_engine = create_engine( f"mysql+pymysql://{target_config['user']}:{quote_plus(target_config['password'])}@{target_config['host']}:{target_config['port']}/{target_config['database']}" ) def process_repair_audit_details(): """处理ba_ma_input_check表数据到repair_audit_details表的迁移""" try: # 源表查询SQL(包含UNION的复杂查询) sql = """ SELECT bic.ID as task_id, bic.REPAIR_ID as repair_id, mm.TYPE AS type_id, ttm.MA_ID as ma_id, 1 as repair_num, IF(ttm.MA_ID = tmsr.MA_ID, 1, 0) as scrap_num, 1 - IF(ttm.MA_ID = tmsr.MA_ID, 1, 0) as repaired_num, bic.IS_SURE as status, bic.CREATE_TIME, bic.CREATOR as create_by FROM ba_ma_input_check bic LEFT JOIN ba_ma_scarp bms on bic.REPAIR_ID = bms.REPAIR_ID LEFT JOIN tm_ma_scarp_reason tmsr on bms.ID = tmsr.TASK_ID and tmsr.IS_COUNT = 0 LEFT JOIN tm_task_ma ttm on ttm.task_id = bic.id LEFT JOIN ma_machines mm on ttm.ma_id = mm.id WHERE mm.id is not null UNION SELECT bic.ID as task_id, bic.REPAIR_ID as repair_id, ttmt.MA_TYPE_ID as type_id, null as ma_id, ttmt.MACHINES_NUM as repair_num, ttmt2.MACHINES_NUM as scrap_num, ttmt.MACHINES_NUM - ttmt2.MACHINES_NUM as repaired_num, bic.IS_SURE as status, bic.CREATE_TIME, bic.CREATOR as create_by FROM ba_ma_input_check bic LEFT JOIN tm_task_ma_type ttmt on bic.ID = ttmt.TASK_ID LEFT JOIN ba_ma_scarp bms on bic.REPAIR_ID = bms.REPAIR_ID LEFT JOIN tm_task_ma_type ttmt2 on bms.ID = ttmt2.TASK_ID and ttmt.MA_TYPE_ID = ttmt2.MA_TYPE_ID WHERE ttmt.IS_COUNT = 1 """ print("正在执行复杂查询,请稍候...") # 执行查询获取源数据 df = pd.read_sql(sql, source_engine) print(f"查询完成,获取到 {len(df)} 条原始记录") # 数据转换和字段映射 result = pd.DataFrame() result['task_id'] = df['task_id'] result['repair_id'] = df['repair_id'] result['ma_id'] = df['ma_id'] # 可能为null result['type_id'] = df['type_id'] result['repair_num'] = df['repair_num'] result['repaired_num'] = df['repaired_num'] result['scrap_num'] = df['scrap_num'] result['create_by'] = df['create_by'] result['create_time'] = df['CREATE_TIME'] result['status'] = df['status'] # 数据清洗 # 移除关键字段为空的记录 before_clean = len(result) result = result.dropna(subset=['task_id', 'repair_id', 'type_id']) after_clean = len(result) if before_clean != after_clean: print(f"数据清洗:移除了 {before_clean - after_clean} 条关键字段为空的记录") # 处理数值字段的空值,设为0 numeric_fields = ['repair_num', 'repaired_num', 'scrap_num'] for field in numeric_fields: result[field] = result[field].fillna(0) # 写入目标表 result.to_sql('repair_audit_details', target_engine, if_exists='append', index=False) print(f"成功转换并导入 {len(result)} 条记录到 repair_audit_details") # 生成统计报告 generate_statistics_report(result) return True except Exception as e: print(f"处理 repair_audit_details 时发生错误: {str(e)}") import traceback print("详细错误信息:") traceback.print_exc() return False def generate_statistics_report(result): """生成详细的统计报告""" print("\n=== 数据统计报告 ===") # 基本统计 print(f"总记录数: {len(result)}") # 按状态分组统计 print(f"\n审核状态分布:") status_counts = result['status'].value_counts() for status, count in status_counts.items(): status_desc = "已审核" if status == 1 else "待审核" print(f" - status = {status} ({status_desc}): {count} 条") # 有ma_id和无ma_id的记录分布 print(f"\n设备记录类型分布:") ma_id_not_null = result['ma_id'].notna().sum() ma_id_null = result['ma_id'].isna().sum() print(f" - 具体设备记录 (ma_id不为空): {ma_id_not_null} 条") print(f" - 设备类型记录 (ma_id为空): {ma_id_null} 条") # 数量统计 print(f"\n维修数量统计:") print(f" - 总维修数量: {result['repair_num'].sum()}") print(f" - 总修复数量: {result['repaired_num'].sum()}") print(f" - 总报废数量: {result['scrap_num'].sum()}") # 按repair_id分组统计 repair_groups = result.groupby('repair_id').size() print(f"\n维修单分布:") print(f" - 涉及维修单数量: {len(repair_groups)}") print(f" - 平均每个维修单记录数: {repair_groups.mean():.2f}") def validate_data(): """验证迁移数据的完整性和逻辑一致性""" try: print("\n=== 开始数据验证 ===") # 检查目标表记录数 target_count_sql = "SELECT COUNT(*) as total_count FROM repair_audit_details" target_count = pd.read_sql(target_count_sql, target_engine)['total_count'].iloc[0] print(f"目标表总记录数: {target_count}") # 检查数据逻辑一致性 validation_sql = """ SELECT COUNT(*) as total_records, COUNT(CASE WHEN ma_id IS NOT NULL THEN 1 END) as with_ma_id, COUNT(CASE WHEN ma_id IS NULL THEN 1 END) as without_ma_id, SUM(repair_num) as total_repair, SUM(repaired_num) as total_repaired, SUM(scrap_num) as total_scrapped FROM repair_audit_details """ validation_result = pd.read_sql(validation_sql, target_engine) print(f"\n逻辑验证结果:") print(f" - 总记录数: {validation_result['total_records'].iloc[0]}") print(f" - 有设备ID记录: {validation_result['with_ma_id'].iloc[0]}") print(f" - 无设备ID记录: {validation_result['without_ma_id'].iloc[0]}") print(f" - 总维修数: {validation_result['total_repair'].iloc[0]}") print(f" - 总修复数: {validation_result['total_repaired'].iloc[0]}") print(f" - 总报废数: {validation_result['total_scrapped'].iloc[0]}") # 检查数据完整性 print(f"\n✅ 数据验证完成") return True except Exception as e: print(f"数据验证时发生错误: {str(e)}") return False def clear_existing_data(): """清理目标表中的现有数据(可选)""" try: confirmation = input("是否要清理目标表 repair_audit_details 中的现有数据?(y/N): ") if confirmation.lower() == 'y': with target_engine.connect() as conn: result = conn.execute("DELETE FROM repair_audit_details") print(f"已清理 {result.rowcount} 条现有数据") return True else: print("跳过数据清理") return True except Exception as e: print(f"清理数据时发生错误: {str(e)}") return False if __name__ == "__main__": print("=== repair_audit_details 审核明细数据迁移 ===") print("注意:此脚本将执行复杂的UNION查询,可能需要较长时间") print("开始执行数据迁移...") # 可选:清理现有数据 if clear_existing_data(): # 执行数据迁移 if process_repair_audit_details(): # 验证数据完整性 validate_data() print("\n=== 迁移完成 ===") else: print("\n=== 迁移失败 ===") else: print("\n=== 迁移中止 ===")