Bonus-Transfer-Machines/机具/ma_supplier_info.py

import pandas as pd
import sqlalchemy
from sqlalchemy import create_engine, text
import configparser
import os
from urllib.parse import quote_plus

def get_db_connection_string(config, section):
    """从配置文件中构建数据库连接字符串"""
    return f"mysql+pymysql://{config[section]['user']}:{quote_plus(config[section]['password'])}@" \
           f"{config[section]['host']}:{config[section]['port']}/{config[section]['database']}"


def transform_and_load_supplier(config_file_path):
    """
    从源数据库提取ma_vender数据，转换后加载到目标数据库ma_supplier_info
    :param config_file_path: 配置文件路径
    """
    # 读取配置文件
    if not os.path.exists(config_file_path):
        raise FileNotFoundError(f"配置文件不存在: {config_file_path}")

    config = configparser.ConfigParser()
    config.read(config_file_path)

    try:
        # 获取数据库连接
        source_conn_str = get_db_connection_string(config, 'source_db')
        target_conn_str = get_db_connection_string(config, 'target_db')
        source_engine = create_engine(source_conn_str)
        target_engine = create_engine(target_conn_str)

        # 从源表读取数据（过滤company_id=1且IS_ACTIVE=1的记录）
        print("正在从源表ma_vender读取数据...")
        source_query = """
                       SELECT id, NAME, ADDRESS, COMPANY_MAN, MAIN_PERSON, PHONE, SCOPE_BUSINESS
                       FROM ma_vender
                       WHERE company_id = 1 \
                         AND IS_ACTIVE = 1 \
                       """
        source_df = pd.read_sql(source_query, source_engine)

        if source_df.empty:
            print("没有符合条件的数据需要转换")
            return
        print(f"读取到{len(source_df)}条待转换数据")

        # 数据转换
        print("正在进行数据转换...")
        target_df = pd.DataFrame()

        # 字段映射（源字段 → 目标字段）
        field_mapping = {
            'id': 'supplier_id',
            'NAME': 'supplier',
            'ADDRESS': 'address',
            'COMPANY_MAN': 'legal_person',
            'MAIN_PERSON': 'primary_contact',
            'PHONE': 'phone',
            'SCOPE_BUSINESS': 'business_scope'
        }

        # 复制字段
        for source_field, target_field in field_mapping.items():
            target_df[target_field] = source_df[source_field]

        # 检查数据质量
        print("\n数据质量检查:")
        print(f"- 空供应商ID记录: {target_df['supplier_id'].isna().sum()}")
        print(f"- 空供应商名称记录: {target_df['supplier'].isna().sum()}")
        print(f"- 无效电话号码记录: {target_df['phone'].str.contains('[^0-9-]').sum()}")

        # 写入目标表（使用事务确保原子性）
        print("\n正在写入目标表ma_supplier_info...")
        with target_engine.begin() as conn:
            # 检查并处理可能的主键冲突
            existing_ids = pd.read_sql(
                "SELECT supplier_id FROM ma_supplier_info",
                conn
            )['supplier_id'].tolist()

            new_records = target_df[~target_df['supplier_id'].isin(existing_ids)]
            dup_count = len(target_df) - len(new_records)
            if dup_count > 0:
                print(f"发现{dup_count}条重复供应商记录，将自动跳过")
                print("重复ID示例:",
                      target_df[target_df['supplier_id'].isin(existing_ids)]['supplier_id'].head(3).tolist())

            if not new_records.empty:
                new_records.to_sql(
                    'ma_supplier_info',
                    conn,
                    if_exists='append',
                    index=False,
                    dtype={
                        'supplier_id': sqlalchemy.types.INTEGER(),
                        'supplier': sqlalchemy.types.VARCHAR(length=100),
                        'address': sqlalchemy.types.VARCHAR(length=200),
                        'legal_person': sqlalchemy.types.VARCHAR(length=50),
                        'primary_contact': sqlalchemy.types.VARCHAR(length=50),
                        'phone': sqlalchemy.types.VARCHAR(length=20),
                        'business_scope': sqlalchemy.types.TEXT()
                    }
                )
                print(f"成功写入{len(new_records)}条新供应商数据")
            else:
                print("没有新供应商数据需要写入")

    except Exception as e:
        print(f"\n处理过程中发生错误: {str(e)}")
        raise
    finally:
        if 'source_engine' in locals():
            source_engine.dispose()
        if 'target_engine' in locals():
            target_engine.dispose()


if __name__ == "__main__":
    # 配置文件路径
    config_file = "config.ini"

    try:
        transform_and_load_supplier(config_file)
    except Exception as e:
        print(f"程序执行失败: {str(e)}")