Bonus-Transfer-Machines/materialSite/clz_lease_apply_info_update.py

267 lines
9.1 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import configparser
import pandas as pd
from sqlalchemy import create_engine, text
from urllib.parse import quote_plus
import logging
# 配置日志
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)
# 读取配置文件
config = configparser.ConfigParser()
config.read('config.ini')
# 获取数据库连接配置
source_config = {
'host': config.get('source_db', 'host'),
'user': config.get('source_db', 'user'),
'password': config.get('source_db', 'password'),
'database': config.get('source_db', 'database'),
'port': config.getint('source_db', 'port')
}
target_config = {
'host': config.get('target_db', 'host'),
'user': config.get('target_db', 'user'),
'password': config.get('target_db', 'password'),
'database': config.get('target_db', 'database'),
'port': config.getint('target_db', 'port')
}
# 创建数据库引擎
source_engine = create_engine(
f"mysql+pymysql://{source_config['user']}:{quote_plus(source_config['password'])}@{source_config['host']}:{source_config['port']}/{source_config['database']}"
)
target_engine = create_engine(
f"mysql+pymysql://{target_config['user']}:{quote_plus(target_config['password'])}@{target_config['host']}:{target_config['port']}/{target_config['database']}"
)
class ClzLeaseApplyInfoUpdater:
"""clz_lease_apply_info表数据对比和更新器"""
def __init__(self):
self.source_engine = source_engine
self.target_engine = target_engine
def query_target_data(self):
"""
从目标数据库查询clz_lease_apply_info数据
"""
try:
sql = """
SELECT
cli.id,
cli.`code`,
cli.project_id,
bu.unit_id,
bu.unit_name,
bp.pro_id,
bp.pro_name,
bp.external_id
FROM
clz_lease_apply_info cli
LEFT JOIN bm_unit bu on cli.team_id = bu.unit_id
LEFT JOIN bm_project bp on cli.project_id = bp.pro_id
WHERE bp.external_id is not null
"""
logger.info("执行目标数据库查询...")
df = pd.read_sql(sql, self.target_engine)
logger.info(f"目标数据库查询到 {len(df)} 条记录")
return df
except Exception as e:
logger.error(f"查询目标数据库时发生错误: {str(e)}")
return pd.DataFrame()
def query_source_data(self):
"""
从源数据库查询clz_lease_apply_info数据
"""
try:
sql = """
SELECT
cli.id,
cli.`code`,
cli.project_id,
bu.unit_id,
bu.unit_name,
bp.pro_id,
bp.pro_name,
bp.external_id
FROM
clz_lease_apply_info cli
LEFT JOIN bm_unit bu on cli.team_id = bu.unit_id
LEFT JOIN bm_project bp on cli.project_id = bp.pro_id
WHERE bp.external_id is not null
"""
logger.info("执行源数据库查询...")
df = pd.read_sql(sql, self.source_engine)
logger.info(f"源数据库查询到 {len(df)} 条记录")
return df
except Exception as e:
logger.error(f"查询源数据库时发生错误: {str(e)}")
return pd.DataFrame()
def compare_and_update_data(self, target_df, source_df):
"""
对比数据并执行更新
Args:
target_df: 目标数据库数据
source_df: 源数据库数据
Returns:
int: 更新的记录数
"""
try:
update_count = 0
# 创建对比条件external_id相同
logger.info("开始数据对比...")
# 使用pandas merge进行数据对比
merged_df = pd.merge(
target_df,
source_df,
on=['external_id'],
how='inner',
suffixes=('_target', '_source')
)
logger.info(f"找到 {len(merged_df)} 条匹配记录")
if merged_df.empty:
logger.warning("没有找到匹配的数据")
return 0
# 执行更新操作
with self.source_engine.connect() as conn:
for _, row in merged_df.iterrows():
# 检查目标表的project_id字段是否有值
target_project_id = row.get('project_id_target')
source_id = row.get('id_source')
if pd.notna(target_project_id) and pd.notna(source_id):
# 构建UPDATE语句 - 更新cli.project_id字段
sql = f"""
UPDATE clz_lease_apply_info
SET project_id = {target_project_id}
WHERE id = {source_id}
"""
logger.debug(f"执行更新SQL: {sql}")
result = conn.execute(text(sql))
update_count += result.rowcount
conn.commit()
logger.info(f"成功更新 {update_count} 条记录")
return update_count
except Exception as e:
logger.error(f"数据对比和更新时发生错误: {str(e)}")
return 0
def execute_update(self):
"""
执行完整的数据对比和更新流程
"""
try:
logger.info("=== 开始clz_lease_apply_info表数据对比和更新 ===")
# 1. 查询目标数据库数据
target_df = self.query_target_data()
if target_df.empty:
logger.error("目标数据库查询结果为空")
return False
# 2. 查询源数据库数据
source_df = self.query_source_data()
if source_df.empty:
logger.error("源数据库查询结果为空")
return False
# 3. 对比数据并执行更新
update_count = self.compare_and_update_data(target_df, source_df)
if update_count > 0:
logger.info(f"✅ 数据对比和更新完成,共更新 {update_count} 条记录")
return True
else:
logger.warning("⚠️ 没有找到需要更新的数据")
return True
except Exception as e:
logger.error(f"执行更新流程时发生错误: {str(e)}")
return False
def preview_changes(self):
"""
预览将要进行的更改(不实际执行更新)
"""
try:
logger.info("=== 预览数据对比结果 ===")
# 查询数据
target_df = self.query_target_data()
source_df = self.query_source_data()
if target_df.empty or source_df.empty:
logger.warning("查询结果为空,无法预览")
return
# 对比数据
merged_df = pd.merge(
target_df,
source_df,
on=['external_id'],
how='inner',
suffixes=('_target', '_source')
)
logger.info(f"找到 {len(merged_df)} 条匹配记录")
if not merged_df.empty:
logger.info("预览将要更新的记录:")
for idx, row in merged_df.head(10).iterrows(): # 只显示前10条
logger.info(f"ID: {row['id_source']}, "
f"code: {row.get('code_source', 'NULL')}, "
f"external_id: {row['external_id']}, "
f"原project_id: {row.get('project_id_source', 'NULL')}, "
f"新project_id: {row.get('project_id_target', 'NULL')}")
if len(merged_df) > 10:
logger.info(f"... 还有 {len(merged_df) - 10} 条记录")
except Exception as e:
logger.error(f"预览时发生错误: {str(e)}")
def main():
"""主函数"""
updater = ClzLeaseApplyInfoUpdater()
# 询问用户是否要预览更改
preview = input("是否要预览将要进行的更改?(y/N): ")
if preview.lower() == 'y':
updater.preview_changes()
# 询问用户是否要执行更新
confirm = input("是否要执行数据更新?(y/N): ")
if confirm.lower() == 'y':
success = updater.execute_update()
if success:
print("✅ 更新操作完成")
else:
print("❌ 更新操作失败")
else:
print("操作已取消")
if __name__ == "__main__":
main()