import pandas as pd
import re
import json



import re
import json
import sys
from openpyxl import load_workbook


def main():
    try:
        # 指定文件路径
        input_file = r'C:\Users\Administrator\Desktop\杂\9.4.xlsx'
        output_file = 'extracted_codes.json'

        # 结果字典
        result_dict = {}

        # 加载Excel文件
        print(f"正在读取Excel文件: {input_file}")
        wb = load_workbook(filename=input_file)
        sheet = wb['sheet1']

        # 查找规格编码所在列
        header_row = sheet[1]  # 假设第一行是表头
        col_idx = None
        for idx, cell in enumerate(header_row):
            if cell.value == "规格编码":
                col_idx = idx
                break

        if col_idx is None:
            raise ValueError("在Excel中找不到'规格编码'列")

        print(f"找到'规格编码'列，位于第 {col_idx + 1} 列")

        # 处理每一行数据
        processed_count = 0
        for row_idx, row in enumerate(sheet.iter_rows(min_row=2, values_only=True), start=2):  # 从第二行开始
            if row[col_idx] is None:
                continue  # 跳过空值

            try:
                spec_code = str(row[col_idx])
                parts = spec_code.split('-')

                if len(parts) < 3:
                    print(f"行 {row_idx} 格式不正确: {spec_code}")
                    continue

                merchant_code = parts[0]
                target_part = parts[2]

                # 清理目标编码
                cleaned_target = re.sub(r'[^a-zA-Z0-9]', '', target_part).lower()

                # 添加到结果字典
                if merchant_code not in result_dict:
                    result_dict[merchant_code] = []
                result_dict[merchant_code].append(cleaned_target)

                processed_count += 1

            except Exception as e:
                print(f"行 {row_idx} 处理错误: {str(e)}")

        # 保存为JSON文件
        print(f"成功处理 {processed_count} 行数据")
        with open(output_file, 'w', encoding='utf-8') as f:
            json.dump(result_dict, f, ensure_ascii=False, indent=2)

        print(f"结果已保存到 {output_file}")

    except Exception as e:
        print(f"\n发生严重错误: {str(e)}")
        print("\n** 解决方案 ​**")
        print("1. 请确保已安装openpyxl: pip install openpyxl")
        print("2. 检查Excel文件路径是否正确")
        print("3. 确认Excel中确实有名为'sheet1'的工作表")
        print("4. 确保工作表中包含'规格编码'列")
        sys.exit(1)


if __name__ == "__main__":
    main()