# -*- coding: utf-8 -*- import sys import io from extractor.identitycard_extractor import IdentityCardExtractor from document_loader.imgloader import RapidOCRLoader from configs.basic_config import logger import base64 import time import os import json sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8') def extractIdCardInfo(type:int, filePath1: str, filePath2: str)->str: if (0 == type): return extractIdCardInfoByPath(filePath1, filePath2) elif (1 == type): return extractIdCardInfoByBase64Data(filePath1,filePath2) else: pass def extractIdCardInfoByPath(filePath1: str, filePath2: str)->str: start_time = time.time() # 记录结束时间 context = "" jsonString = "" try: if 0 != len(filePath1) and os.path.exists(filePath1): loader = RapidOCRLoader(filePath1) docs = loader.load() context = "\n".join([doc.page_content for doc in docs]) if 0 != len(filePath2) and os.path.exists(filePath2): loader = RapidOCRLoader(filePath2) docs = loader.load() context += "\n".join([doc.page_content for doc in docs]) extractor = IdentityCardExtractor() tempdict = extractor.extract_text(context) json_string = json.dumps(tempdict, ensure_ascii=False) except Exception as e: logger.error(e) end_time = time.time() # 记录结束时间 execution_time = end_time - start_time # 计算执行时间 logger.info(f"extractIdCardInfo 耗时{execution_time}秒") return jsonString def extractIdCardInfoByBase64Data(base64data1:str, base64Data2: str)->str: image_data1 = "" image_data2 = "" logger.info(f"extractIdCardInfoByBase64Data") start_time = time.time() # 记录结束时间 jsonString = "" try: if 0!=len(base64data1): logger.info(f"not base64data1.empty()") image_data1 = base64.b64decode(base64data1) with open("file1.png", "wb") as file: file.write(image_data1) if 0!=len(base64Data2): logger.info(f"not base64Data2.empty()") image_data2 = base64.b64decode(base64Data2) with open("file2.png", "wb") as file: file.write(image_data2) if os.path.exists("file1.png") and os.path.exists("file2.png"): logger.info(f"file1.png and file2.png exist") jsonString = extractIdCardInfoByPath("file1.png","file2.png") os.remove("file1.png") os.remove("file2.png") elif os.path.exists("file1.png"): logger.info(f"file1.png exist") jsonString = extractIdCardInfoByPath("file1.png","") os.remove("file1.png") elif os.path.exists("file2.png"): logger.info(f"file2.png exist") jsonString = extractIdCardInfoByPath("file2.png","") os.remove("file2.png") except Exception as e: logger.error(e) end_time = time.time() # 记录结束时间 execution_time = end_time - start_time # 计算执行时间 logger.info(f"extractIdCardInfoByBase64Data 耗时{execution_time}秒") return jsonString # with open('/Users/wangvivi/Desktop/Code/ocrtest/images/id_card.JPG', 'rb') as image_file: # base64_image_string = base64.b64encode(image_file.read()).decode('utf-8') # jsonString = extractIdCardInfoByBase64Data(base64_image_string,"") # print(jsonString) # jsonString = extractIdCardInfoByPath("/Users/wangvivi/Desktop/Code/ocrtest/images/id_card.JPG","") # print(jsonString) if __name__ == "__main__": try: logger.info(f"main.py len of parameter: {len(sys.argv)}") jsonString = "" if len(sys.argv) > 3: logger.info(f"{sys.argv[1]}") logger.info(f"{sys.argv[2]}") logger.info(f"{sys.argv[3]}") jsonString = extractIdCardInfo(int(sys.argv[1]), sys.argv[2], sys.argv[3]) elif len(sys.argv) > 1: logger.info(f"python 脚本里的接收到的参数是:") logger.info(f"{sys.argv[1]}") logger.info(f"开始执行sys.stdin.read") input_data = sys.stdin.read() logger.info("") logger.info(f"{len(input_data)}") split_data = input_data.split(os.linesep) data1 = "" data2 = "" if 2 == len(split_data): data1 = split_data[0] data2 = split_data[1] elif 1 == len(split_data): data1 = split_data[0] logger.info(f"{len(data1)}") logger.info(f"{len(data2)}") jsonString = extractIdCardInfo(int(sys.argv[1]), data1, data2) print(jsonString) except KeyboardInterrupt: logger.error("KeyboardInterrupt") except IndexError: # 如果参数过长,捕获 IndexError 异常并进行处理 logger.error("参数过长,未指定足够的参数") except OverflowError: # 如果发生了 OverflowError 异常,捕获并进行相应的处理 logger.error("命令行参数过长,导致溢出错误") except EOFError: # 如果达到输入流的末尾,捕获 EOFError 异常并进行处理 logger.error("已经到达输入流的末尾") except Exception as e: # 如果发生了其他异常,捕获并进行相应的处理 logger.error("发生了异常:", e) # if __name__ == "__main__": # #loader = RapidOCRLoader(file_path="/Users/wangvivi/Desktop/Code/ocrtest/images/id_card.JPG") # #loader = RapidOCRLoader(file_path="/Users/wangvivi/Desktop/Code/ocrtest/images/id_card.JPG") # #loader = RapidOCRLoader(file_path="/Users/wangvivi/Desktop/Code/ocrtest/images/20230726163834.png") # #loader = RapidOCRLoader(file_path="/Users/wangvivi/Desktop/Code/ocrtest/images/QQ截图20230726163813.png") # # loader = RapidOCRLoader(file_path="/Users/wangvivi/Desktop/Code/ocrtest/images/id_card.JPG") # # #loader = RapidOCRLoader(file_path="/Users/wangvivi/Desktop/Code/ocrtest/images/032002200511-91445598.pdf") # # #loader = RapidOCRLoader(file_path="/Users/wangvivi/Desktop/Code/ocrtest/images/fapiao.jpg") # # docs = loader.load() # # context = "\n".join([doc.page_content for doc in docs]) # # print(context) # # extractor = IdentityCardExtractor() # # jsonString = extractor.extract_text(context) # # print(jsonString) # # context = remove_blank_lines(context) # # print("*"*20) # # print(context) # # info = extract_id_card_info(context) # # jsonString = json.dumps(info, ensure_ascii=False) # # print(jsonString) # result = extractIdCardInfo("/Users/wangvivi/Desktop/Code/ocrtest/images/id_card.JPG") # print(result)