enhance log
This commit is contained in:
parent
6ed7002758
commit
cc706ce7ef
|
|
@ -3,7 +3,7 @@ import os
|
|||
import langchain
|
||||
import tempfile
|
||||
import shutil
|
||||
|
||||
from logging.handlers import RotatingFileHandler
|
||||
|
||||
# 是否显示详细日志
|
||||
log_verbose = False
|
||||
|
|
@ -14,9 +14,12 @@ langchain.verbose = False
|
|||
# 日志格式
|
||||
LOG_FORMAT = "%(asctime)s - %(filename)s[line:%(lineno)d] - %(levelname)s: %(message)s"
|
||||
logger = logging.getLogger()
|
||||
logger.setLevel(logging.INFO)
|
||||
logging.basicConfig(format=LOG_FORMAT)
|
||||
|
||||
LOG_BACKUP_COUNT = 10 # 保留的归档文件数量
|
||||
LOG_MAX_FILE_SIZE = 1024 * 1024 # 每个日志文件的最大大小(以字节为单位)
|
||||
|
||||
# 创建日志记录器并设置日志级别
|
||||
logging.basicConfig(level=logging.INFO, format=LOG_FORMAT)
|
||||
|
||||
# 日志存储路径
|
||||
LOG_PATH = os.path.join(os.path.dirname(os.path.dirname(__file__)), "logs")
|
||||
|
|
@ -32,20 +35,14 @@ except Exception:
|
|||
os.makedirs(BASE_TEMP_DIR, exist_ok=True)
|
||||
|
||||
|
||||
#added by weiweiwang for log
|
||||
|
||||
# 创建日志记录器并设置日志级别
|
||||
logging.basicConfig(level=logging.INFO, format=LOG_FORMAT)
|
||||
|
||||
# 创建文件处理程序,并设置日志级别和文件名
|
||||
#appLogPath = os.path.join(LOG_PATH, "app.log")
|
||||
file_handler = logging.FileHandler(LOG_PATH +'/app.log')
|
||||
file_handler = RotatingFileHandler(LOG_PATH +'/app.log', maxBytes=LOG_MAX_FILE_SIZE, backupCount=LOG_BACKUP_COUNT)
|
||||
file_handler.setLevel(logging.INFO)
|
||||
|
||||
# 设置日志记录格式
|
||||
# # 设置日志记录格式
|
||||
formatter = logging.Formatter(LOG_FORMAT)
|
||||
file_handler.setFormatter(formatter)
|
||||
|
||||
# 获取日志记录器并添加文件处理程序
|
||||
appLogger = logging.getLogger(__name__)
|
||||
appLogger.addHandler(file_handler)
|
||||
logger.addHandler(file_handler)
|
||||
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
from typing import List
|
||||
from langchain.document_loaders.unstructured import UnstructuredFileLoader
|
||||
from configs import PDF_OCR_THRESHOLD
|
||||
from configs import PDF_OCR_THRESHOLD,logger
|
||||
from document_loaders.ocr import get_ocr
|
||||
#PDF_OCR_THRESHOLD= (0.6,0.6)
|
||||
#from ocr import get_ocr
|
||||
|
|
@ -23,7 +23,7 @@ class RapidOCRPDFLoader(UnstructuredFileLoader):
|
|||
print(f"****page:{i+1}****")
|
||||
text = page.get_text("")
|
||||
text_lines = text.strip().split("\n")
|
||||
#print(f"文字内容:{text_lines}")
|
||||
logger.debug(f"文字内容:{text_lines}")
|
||||
|
||||
img_list = page.get_image_info(xrefs=True)
|
||||
ocr_result = []
|
||||
|
|
@ -39,7 +39,7 @@ class RapidOCRPDFLoader(UnstructuredFileLoader):
|
|||
result, _ = ocr(img_array)
|
||||
if result:
|
||||
ocr_result = [line[1] for line in result]
|
||||
#print(f"图片内容:{ocr_result}")
|
||||
logger.debug(f"图片内容:{ocr_result}")
|
||||
#resp += "\n".join(ocr_result)
|
||||
|
||||
if (len(ocr_result)>0):
|
||||
|
|
@ -49,7 +49,7 @@ class RapidOCRPDFLoader(UnstructuredFileLoader):
|
|||
# 假设页码在最后一行
|
||||
if text_lines[-1].isdigit():
|
||||
text = "\n".join(text_lines[:-1])
|
||||
print(f"******去除了页码")
|
||||
logger.debug(f"******去除了页码")
|
||||
resp += text + "\n"
|
||||
|
||||
# 更新进度
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
from fastapi import Body
|
||||
from configs import logger, log_verbose
|
||||
from configs import logger, log_verbose, logger
|
||||
from server.utils import BaseResponse
|
||||
from server.db.repository import feedback_message_to_db
|
||||
|
||||
|
|
|
|||
|
|
@ -11,7 +11,8 @@ from configs import (LLM_MODELS,
|
|||
RERANKER_MODEL,
|
||||
RERANKER_MAX_LENGTH,
|
||||
MODEL_PATH,
|
||||
DOWNLOAD_BASE_URL)
|
||||
DOWNLOAD_BASE_URL,
|
||||
logger)
|
||||
from server.utils import wrap_done, get_ChatOpenAI
|
||||
from server.utils import BaseResponse, get_prompt_template
|
||||
from langchain.chains import LLMChain
|
||||
|
|
@ -26,6 +27,8 @@ from urllib.parse import urlencode
|
|||
from server.knowledge_base.kb_doc_api import search_docs
|
||||
from server.reranker.reranker import LangchainReranker
|
||||
from server.utils import embedding_device
|
||||
import time
|
||||
|
||||
async def knowledge_base_chat(query: str = Body(..., description="用户输入", examples=["你好"]),
|
||||
knowledge_base_name: str = Body(..., description="知识库名称", examples=["samples"]),
|
||||
top_k: int = Body(VECTOR_SEARCH_TOP_K, description="匹配向量数"),
|
||||
|
|
@ -81,6 +84,7 @@ async def knowledge_base_chat(query: str = Body(..., description="用户输入",
|
|||
max_tokens=max_tokens,
|
||||
callbacks=[callback],
|
||||
)
|
||||
start_time = time.time() # 记录开始时间
|
||||
docs = search_docs(query, knowledge_base_name, top_k, score_threshold)
|
||||
# docs = await run_in_threadpool(search_docs,
|
||||
# query=query,
|
||||
|
|
@ -88,7 +92,14 @@ async def knowledge_base_chat(query: str = Body(..., description="用户输入",
|
|||
# top_k=top_k,
|
||||
# score_threshold=score_threshold)
|
||||
|
||||
end_time = time.time() # 记录结束时间
|
||||
execution_time = end_time - start_time # 计算执行时间
|
||||
logger.info(f"search_docs 耗时{execution_time}秒")
|
||||
|
||||
# 加入reranker
|
||||
logger.info(f"use_reranker:{USE_RERANKER}")
|
||||
|
||||
start_time = time.time() # 记录开始时间
|
||||
if USE_RERANKER:
|
||||
reranker_model_path = MODEL_PATH["reranker"].get(RERANKER_MODEL,"BAAI/bge-reranker-large")
|
||||
print("-----------------model path------------------")
|
||||
|
|
@ -102,6 +113,9 @@ async def knowledge_base_chat(query: str = Body(..., description="用户输入",
|
|||
query=query)
|
||||
print("---------after rerank------------------")
|
||||
print(docs)
|
||||
end_time = time.time() # 记录结束时间
|
||||
execution_time = end_time - start_time # 计算执行时间
|
||||
logger.info(f"reranker 耗时{execution_time}秒")
|
||||
|
||||
context = "\n".join([doc.page_content for doc in docs])
|
||||
|
||||
|
|
|
|||
|
|
@ -19,7 +19,7 @@ from server.knowledge_base.model.kb_document_model import DocumentWithVSId
|
|||
from typing import List, Dict
|
||||
from sklearn.feature_extraction.text import TfidfVectorizer
|
||||
from sklearn.metrics.pairwise import cosine_similarity
|
||||
from configs import USE_RANKING, appLogger
|
||||
from configs import USE_RANKING, logger
|
||||
import jieba
|
||||
from typing import List, Dict,Tuple
|
||||
|
||||
|
|
@ -39,7 +39,7 @@ def search_docs(
|
|||
data = []
|
||||
if kb is not None:
|
||||
if query:
|
||||
print(f"search_docs, query:{query}")
|
||||
logger.info(f"search_docs, query:{query},top_k:{top_k},score_threshold:{score_threshold}, use_ranking:{USE_RANKING}")
|
||||
docs = kb.search_docs(query, FIRST_VECTOR_SEARCH_TOP_K, score_threshold)
|
||||
#print(f"search_docs,len of docs {len(docs)}, docs:{docs}")
|
||||
|
||||
|
|
@ -355,7 +355,7 @@ def update_docs(
|
|||
failed_files = {}
|
||||
kb_files = []
|
||||
|
||||
appLogger.info(f"111111 kb_doc_api update_docs file_names:{file_names},更新的doc 长度:{len(docs)}")
|
||||
logger.info(f"111111 kb_doc_api update_docs file_names:{file_names},更新的doc 长度:{len(docs)}")
|
||||
# 生成需要加载docs的文件列表
|
||||
for file_name in file_names:
|
||||
file_detail = get_file_detail(kb_name=knowledge_base_name, filename=file_name)
|
||||
|
|
@ -364,7 +364,7 @@ def update_docs(
|
|||
continue
|
||||
if file_name not in docs:
|
||||
try:
|
||||
appLogger.info(f"****kb_doc_api update_docs file_name not in docs,filename:{file_name}")
|
||||
logger.info(f"****kb_doc_api update_docs file_name not in docs,filename:{file_name}")
|
||||
kb_files.append(KnowledgeFile(filename=file_name, knowledge_base_name=knowledge_base_name))
|
||||
except Exception as e:
|
||||
msg = f"加载文档 {file_name} 时出错:{e}"
|
||||
|
|
@ -391,7 +391,7 @@ def update_docs(
|
|||
# 将自定义的docs进行向量化
|
||||
for file_name, v in docs.items():
|
||||
try:
|
||||
appLogger.info(f"222222 kb_doc_api update_docs file_name:{file_name},更新的doc 长度:{len(docs)}")
|
||||
logger.info(f"222222 kb_doc_api update_docs file_name:{file_name},更新的doc 长度:{len(docs)}")
|
||||
v = [x if isinstance(x, Document) else Document(**x) for x in v]
|
||||
kb_file = KnowledgeFile(filename=file_name, knowledge_base_name=knowledge_base_name)
|
||||
kb.update_doc(kb_file, docs=v, not_refresh_vs_cache=True)
|
||||
|
|
|
|||
|
|
@ -28,7 +28,7 @@ from typing import List, Union, Dict, Optional, Tuple
|
|||
|
||||
from server.embeddings_api import embed_texts, aembed_texts, embed_documents
|
||||
from server.knowledge_base.model.kb_document_model import DocumentWithVSId
|
||||
from configs import logger,appLogger
|
||||
from configs import logger
|
||||
import time
|
||||
|
||||
|
||||
|
|
@ -115,15 +115,15 @@ class KBService(ABC):
|
|||
custom_docs = True
|
||||
for doc in docs:
|
||||
doc.metadata.setdefault("source", kb_file.filename)
|
||||
appLogger.info(f"kb_doc_api add_doc docs 不为空,len(docs):{len(docs)},文件名称:{kb_file.filename}")
|
||||
logger.info(f"kb_doc_api add_doc docs 不为空,len(docs):{len(docs)},文件名称:{kb_file.filename}")
|
||||
else:
|
||||
docs = kb_file.file2text()
|
||||
custom_docs = False
|
||||
appLogger.info(f"kb_doc_api add_doc docs 为空,len(docs):{len(docs)},文件名称:{kb_file.filename}")
|
||||
logger.info(f"kb_doc_api add_doc docs 为空,len(docs):{len(docs)},文件名称:{kb_file.filename}")
|
||||
|
||||
end_time = time.time() # 记录结束时间
|
||||
execution_time = end_time - start_time # 计算执行时间
|
||||
appLogger.info(f"add_doc: 加载文件或分块耗时{execution_time}秒")
|
||||
logger.info(f"add_doc: 加载文件或分块耗时{execution_time}秒")
|
||||
|
||||
start_time = time.time() # 记录开始时间
|
||||
if docs:
|
||||
|
|
@ -138,11 +138,11 @@ class KBService(ABC):
|
|||
rel_path = Path(source).relative_to(self.doc_path)
|
||||
doc.metadata["source"] = str(rel_path.as_posix().strip("/"))
|
||||
except Exception as e:
|
||||
appLogger.info(f"cannot convert absolute path ({source}) to relative path. error is : {e}")
|
||||
logger.info(f"cannot convert absolute path ({source}) to relative path. error is : {e}")
|
||||
self.delete_doc(kb_file)
|
||||
#appLogger.info(f"add_doc filepath:{kb_file.filepath},将要执行do_add_doc")
|
||||
#logger.info(f"add_doc filepath:{kb_file.filepath},将要执行do_add_doc")
|
||||
doc_infos = self.do_add_doc(docs, **kwargs)
|
||||
#appLogger.info(f"add_doc filepath:{kb_file.filepath} 将要执行dd_file_to_db")
|
||||
#logger.info(f"add_doc filepath:{kb_file.filepath} 将要执行dd_file_to_db")
|
||||
status = add_file_to_db(kb_file,
|
||||
custom_docs=custom_docs,
|
||||
docs_count=len(docs),
|
||||
|
|
@ -150,7 +150,7 @@ class KBService(ABC):
|
|||
|
||||
end_time = time.time() # 记录结束时间
|
||||
execution_time = end_time - start_time # 计算执行时间
|
||||
appLogger.info(f"add_doc: 入库耗时:{execution_time}秒")
|
||||
logger.info(f"add_doc: 入库耗时:{execution_time}秒")
|
||||
else:
|
||||
status = False
|
||||
return status
|
||||
|
|
@ -251,7 +251,7 @@ class KBService(ABC):
|
|||
通过file_name或metadata检索Document
|
||||
'''
|
||||
doc_infos = list_docs_from_db(kb_name=self.kb_name, file_name=file_name, metadata=metadata)
|
||||
#appLogger.info(f"kb_doc_api list_docs_from_db: {doc_infos}")
|
||||
#logger.info(f"kb_doc_api list_docs_from_db: {doc_infos}")
|
||||
docs = []
|
||||
for x in doc_infos:
|
||||
doc_info = self.get_doc_by_ids([x["id"]])
|
||||
|
|
|
|||
|
|
@ -9,7 +9,7 @@ from server.knowledge_base.kb_service.base import KBService, SupportedVSType
|
|||
from server.knowledge_base.utils import KnowledgeFile
|
||||
from server.utils import load_local_embeddings
|
||||
from elasticsearch import Elasticsearch,BadRequestError
|
||||
from configs import logger,appLogger
|
||||
from configs import logger
|
||||
from configs import kbs_config
|
||||
from server.knowledge_base.model.kb_document_model import DocumentWithVSId
|
||||
|
||||
|
|
@ -30,13 +30,13 @@ class ESKBService(KBService):
|
|||
self.es_client_python = Elasticsearch(f"http://{self.IP}:{self.PORT}",
|
||||
basic_auth=(self.user,self.password))
|
||||
else:
|
||||
appLogger.warning("ES未配置用户名和密码")
|
||||
logger.warning("ES未配置用户名和密码")
|
||||
self.es_client_python = Elasticsearch(f"http://{self.IP}:{self.PORT}")
|
||||
except ConnectionError:
|
||||
appLogger.error("连接到 Elasticsearch 失败!")
|
||||
logger.error("连接到 Elasticsearch 失败!")
|
||||
raise ConnectionError
|
||||
except Exception as e:
|
||||
appLogger.error(f"Error 发生 : {e}")
|
||||
logger.error(f"Error 发生 : {e}")
|
||||
raise e
|
||||
try:
|
||||
# 首先尝试通过es_client_python创建
|
||||
|
|
@ -51,8 +51,8 @@ class ESKBService(KBService):
|
|||
}
|
||||
self.es_client_python.indices.create(index=self.index_name, mappings=mappings)
|
||||
except BadRequestError as e:
|
||||
appLogger.error("创建索引失败,重新")
|
||||
appLogger.error(e)
|
||||
logger.error("创建索引失败,重新")
|
||||
logger.error(e)
|
||||
|
||||
try:
|
||||
# langchain ES 连接、创建索引
|
||||
|
|
@ -67,7 +67,7 @@ class ESKBService(KBService):
|
|||
es_password=self.password
|
||||
)
|
||||
else:
|
||||
appLogger.warning("ES未配置用户名和密码")
|
||||
logger.warning("ES未配置用户名和密码")
|
||||
self.db_init = ElasticsearchStore(
|
||||
es_url=f"http://{self.IP}:{self.PORT}",
|
||||
index_name=self.index_name,
|
||||
|
|
@ -77,10 +77,10 @@ class ESKBService(KBService):
|
|||
)
|
||||
except ConnectionError:
|
||||
print("### 初始化 Elasticsearch 失败!")
|
||||
appLogger.error("### 初始化 Elasticsearch 失败!")
|
||||
logger.error("### 初始化 Elasticsearch 失败!")
|
||||
raise ConnectionError
|
||||
except Exception as e:
|
||||
appLogger.error(f"Error 发生 : {e}")
|
||||
logger.error(f"Error 发生 : {e}")
|
||||
raise e
|
||||
try:
|
||||
# 尝试通过db_init创建索引
|
||||
|
|
@ -89,8 +89,8 @@ class ESKBService(KBService):
|
|||
dims_length=self.dims_length
|
||||
)
|
||||
except Exception as e:
|
||||
appLogger.error("创建索引失败...")
|
||||
appLogger.error(e)
|
||||
logger.error("创建索引失败...")
|
||||
logger.error(e)
|
||||
# raise e
|
||||
|
||||
|
||||
|
|
@ -156,23 +156,22 @@ class ESKBService(KBService):
|
|||
except ConnectionError as ce:
|
||||
print(ce)
|
||||
print("连接到 Elasticsearch 失败!")
|
||||
appLogger.error("连接到 Elasticsearch 失败!")
|
||||
logger.error("连接到 Elasticsearch 失败!")
|
||||
except Exception as e:
|
||||
appLogger.error(f"Error 发生 : {e}")
|
||||
logger.error(f"Error 发生 : {e}")
|
||||
print(e)
|
||||
|
||||
|
||||
|
||||
def do_search(self, query:str, top_k: int, score_threshold: float):
|
||||
# 文本相似性检索
|
||||
print(f"do_search,top_k:{top_k},score_threshold:{score_threshold}")
|
||||
docs = self.db_init.similarity_search_with_score(query=query,
|
||||
k=top_k)
|
||||
return docs
|
||||
|
||||
def searchbyContent(self, query:str, top_k: int = 2):
|
||||
if self.es_client_python.indices.exists(index=self.index_name):
|
||||
appLogger.info(f"******ESKBService searchByContent {self.index_name},query:{query}")
|
||||
logger.info(f"******ESKBService searchByContent {self.index_name},query:{query}")
|
||||
tem_query = {
|
||||
"query": {"match": {
|
||||
"context": "*" + query + "*"
|
||||
|
|
@ -199,7 +198,7 @@ class ESKBService(KBService):
|
|||
|
||||
def searchbyContentInternal(self, query:str, top_k: int = 2):
|
||||
if self.es_client_python.indices.exists(index=self.index_name):
|
||||
appLogger.info(f"******ESKBService searchbyContentInternal {self.index_name},query:{query}")
|
||||
logger.info(f"******ESKBService searchbyContentInternal {self.index_name},query:{query}")
|
||||
tem_query = {
|
||||
"query": {"match": {
|
||||
"context": "*" + query + "*"
|
||||
|
|
@ -231,19 +230,19 @@ class ESKBService(KBService):
|
|||
metadata=result["_source"]["metadata"],
|
||||
))
|
||||
except Exception as e:
|
||||
appLogger.error(f"ES Docs Get Error! {e}")
|
||||
logger.error(f"ES Docs Get Error! {e}")
|
||||
return result_list
|
||||
|
||||
|
||||
def del_doc_by_ids(self,ids: List[str]) -> bool:
|
||||
appLogger.info(f"es_kb_service del_doc_by_ids")
|
||||
logger.info(f"es_kb_service del_doc_by_ids")
|
||||
for doc_id in ids:
|
||||
try:
|
||||
self.es_client_python.delete(index=self.index_name,
|
||||
id=doc_id,
|
||||
refresh=True)
|
||||
except Exception as e:
|
||||
appLogger.error(f"ES Docs Delete Error! {e}")
|
||||
logger.error(f"ES Docs Delete Error! {e}")
|
||||
|
||||
|
||||
def do_delete_doc(self, kb_file, **kwargs):
|
||||
|
|
@ -272,7 +271,7 @@ class ESKBService(KBService):
|
|||
id=doc_id,
|
||||
refresh=True)
|
||||
except Exception as e:
|
||||
appLogger.error(f"ES Docs Delete Error! {e}")
|
||||
logger.error(f"ES Docs Delete Error! {e}")
|
||||
|
||||
# self.db_init.delete(ids=delete_list)
|
||||
#self.es_client_python.indices.refresh(index=self.index_name)
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
from fastapi import Body
|
||||
from configs import (DEFAULT_VS_TYPE, EMBEDDING_MODEL,
|
||||
OVERLAP_SIZE,
|
||||
logger, log_verbose, )
|
||||
logger, log_verbose )
|
||||
from server.knowledge_base.utils import (list_files_from_folder)
|
||||
from sse_starlette import EventSourceResponse
|
||||
import json
|
||||
|
|
@ -10,7 +10,7 @@ from typing import List, Optional
|
|||
from server.knowledge_base.kb_summary.base import KBSummaryService
|
||||
from server.knowledge_base.kb_summary.summary_chunk import SummaryAdapter
|
||||
from server.utils import wrap_done, get_ChatOpenAI, BaseResponse
|
||||
from configs import LLM_MODELS, TEMPERATURE
|
||||
from configs import LLM_MODELS, TEMPERATURE, logger
|
||||
from server.knowledge_base.model.kb_document_model import DocumentWithVSId
|
||||
|
||||
def recreate_summary_vector_store(
|
||||
|
|
|
|||
|
|
@ -65,7 +65,7 @@ def import_from_db(
|
|||
con.close()
|
||||
return True
|
||||
except Exception as e:
|
||||
print(f"无法读取备份数据库:{sqlite_path}。错误信息:{e}")
|
||||
logger.error(f"无法读取备份数据库:{sqlite_path}。错误信息:{e}")
|
||||
return False
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -5,7 +5,6 @@ from configs import (
|
|||
OVERLAP_SIZE,
|
||||
ZH_TITLE_ENHANCE,
|
||||
logger,
|
||||
appLogger,
|
||||
log_verbose,
|
||||
text_splitter_dict,
|
||||
LLM_MODELS,
|
||||
|
|
@ -95,7 +94,7 @@ def list_files_from_folder(kb_name: str):
|
|||
process_entry(entry)
|
||||
|
||||
except Exception as e:
|
||||
appLogger.error(f"Error 发生 : {e}")
|
||||
logger.error(f"Error 发生 : {e}")
|
||||
|
||||
return result
|
||||
|
||||
|
|
@ -176,7 +175,7 @@ def get_loader(loader_name: str, file_path: str, loader_kwargs: Dict = None):
|
|||
DocumentLoader = getattr(document_loaders_module, loader_name)
|
||||
except Exception as e:
|
||||
msg = f"为文件{file_path}查找加载器{loader_name}时出错:{e}"
|
||||
appLogger.error(f'{e.__class__.__name__}: {msg}',
|
||||
logger.error(f'{e.__class__.__name__}: {msg}',
|
||||
exc_info=e if log_verbose else None)
|
||||
document_loaders_module = importlib.import_module('langchain.document_loaders')
|
||||
DocumentLoader = getattr(document_loaders_module, "UnstructuredFileLoader")
|
||||
|
|
@ -315,14 +314,15 @@ class KnowledgeFile:
|
|||
|
||||
def file2docs(self, refresh: bool = False):
|
||||
if self.docs is None or refresh:
|
||||
appLogger.info(f"{self.document_loader_name} used for {self.filepath}")
|
||||
logger.info(f"{self.document_loader_name} used for {self.filepath}")
|
||||
loader = get_loader(loader_name=self.document_loader_name,
|
||||
file_path=self.filepath,
|
||||
loader_kwargs=self.loader_kwargs)
|
||||
self.docs = loader.load()
|
||||
logger.info(f"{self.filepath}加载完成")
|
||||
return self.docs
|
||||
|
||||
print(f"KnowledgeFile: filepath:{self.filepath}, doc_title_name:{self.doc_title_name}, ext:{self.ext}")
|
||||
#print(f"KnowledgeFile: filepath:{self.filepath}, doc_title_name:{self.doc_title_name}, ext:{self.ext}")
|
||||
|
||||
def docs2texts(
|
||||
self,
|
||||
|
|
@ -347,7 +347,7 @@ class KnowledgeFile:
|
|||
if doc.page_content.strip()!="":
|
||||
doc.page_content = re.sub(r"\n{2,}", "\n", doc.page_content.strip())
|
||||
file_name_without_extension, file_extension = os.path.splitext(self.filepath)
|
||||
print(f"filepath:{self.filepath},文件名拆分后:{file_name_without_extension},{file_extension}")
|
||||
logger.info(f"filepath:{self.filepath},文件名拆分后:{file_name_without_extension},{file_extension}")
|
||||
if not docs:
|
||||
return []
|
||||
if self.ext not in [".csv"]:
|
||||
|
|
@ -437,10 +437,11 @@ def files2docs_in_thread(
|
|||
|
||||
def file2docs(*, file: KnowledgeFile, **kwargs) -> Tuple[bool, Tuple[str, str, List[Document]]]:
|
||||
try:
|
||||
logger.info(f"file2docs 从文件 {file.kb_name}/{file.filename}")
|
||||
return True, (file.kb_name, file.filename, file.file2text(**kwargs))
|
||||
except Exception as e:
|
||||
msg = f"从文件 {file.kb_name}/{file.filename} 加载文档时出错:{e}"
|
||||
appLogger.error(f'{e.__class__.__name__}: {msg}',
|
||||
msg = f"file2docs 从文件 {file.kb_name}/{file.filename} 加载文档时出错:{e}"
|
||||
logger.error(f'{e.__class__.__name__}: {msg}',
|
||||
exc_info=e if log_verbose else None)
|
||||
return False, (file.kb_name, file.filename, msg)
|
||||
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
from fastapi import Body
|
||||
from configs import logger, log_verbose, LLM_MODELS, HTTPX_DEFAULT_TIMEOUT
|
||||
from configs import logger, log_verbose, LLM_MODELS, HTTPX_DEFAULT_TIMEOUT, logger
|
||||
from server.utils import (BaseResponse, fschat_controller_address, list_config_llm_models,
|
||||
get_httpx_client, get_model_worker_config)
|
||||
from typing import List
|
||||
|
|
|
|||
34
startup.py
34
startup.py
|
|
@ -566,36 +566,36 @@ def dump_server_info(after_start=False, args=None):
|
|||
import fastchat
|
||||
from server.utils import api_address, webui_address
|
||||
|
||||
print("\n")
|
||||
print("=" * 30 + "Langchain-Chatchat Configuration" + "=" * 30)
|
||||
print(f"操作系统:{platform.platform()}.")
|
||||
print(f"python版本:{sys.version}")
|
||||
print(f"项目版本:{VERSION}")
|
||||
print(f"langchain版本:{langchain.__version__}. fastchat版本:{fastchat.__version__}")
|
||||
print("\n")
|
||||
logger.info("\n")
|
||||
logger.info("=" * 30 + "Langchain-Chatchat Configuration" + "=" * 30)
|
||||
logger.info(f"操作系统:{platform.platform()}")
|
||||
logger.info(f"python版本:{sys.version}")
|
||||
logger.info(f"项目版本:{VERSION}")
|
||||
logger.info(f"langchain版本:{langchain.__version__}. fastchat版本:{fastchat.__version__}")
|
||||
logger.info("\n")
|
||||
|
||||
models = LLM_MODELS
|
||||
if args and args.model_name:
|
||||
models = args.model_name
|
||||
|
||||
print(f"当前使用的分词器:{TEXT_SPLITTER_NAME}")
|
||||
print(f"当前启动的LLM模型:{models} @ {llm_device()}")
|
||||
logger.info(f"当前使用的分词器:{TEXT_SPLITTER_NAME}")
|
||||
logger.info(f"当前启动的LLM模型:{models} @ {llm_device()}")
|
||||
|
||||
for model in models:
|
||||
pprint(get_model_worker_config(model))
|
||||
print(f"当前Embbedings模型: {EMBEDDING_MODEL} @ {embedding_device()}")
|
||||
logger.info(f"当前Embbedings模型: {EMBEDDING_MODEL} @ {embedding_device()}")
|
||||
|
||||
if after_start:
|
||||
print("\n")
|
||||
print(f"服务端运行信息:")
|
||||
logger.info("\n")
|
||||
logger.info(f"服务端运行信息:")
|
||||
if args.openai_api:
|
||||
print(f" OpenAI API Server: {fschat_openai_api_address()}")
|
||||
logger.info(f" OpenAI API Server: {fschat_openai_api_address()}")
|
||||
if args.api:
|
||||
print(f" Chatchat API Server: {api_address()}")
|
||||
logger.info(f" Chatchat API Server: {api_address()}")
|
||||
if args.webui:
|
||||
print(f" Chatchat WEBUI Server: {webui_address()}")
|
||||
print("=" * 30 + "Langchain-Chatchat Configuration" + "=" * 30)
|
||||
print("\n")
|
||||
logger.info(f" Chatchat WEBUI Server: {webui_address()}")
|
||||
logger.info("=" * 30 + "Langchain-Chatchat Configuration" + "=" * 30)
|
||||
logger.info("\n")
|
||||
|
||||
|
||||
async def start_main_server():
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
from langchain.docstore.document import Document
|
||||
import re
|
||||
from configs import logger
|
||||
|
||||
def get_fist_level_title(
|
||||
text: str,
|
||||
|
|
@ -181,11 +182,11 @@ def zh_second_title_enhance(docs: Document) -> Document:
|
|||
title = None
|
||||
if len(docs) > 0:
|
||||
for doc in docs:
|
||||
#print(f"zh_second_title_enhance: {doc}")
|
||||
logger.debug(f"zh_second_title_enhance: {doc}")
|
||||
second_title = get_second_level_title(doc.page_content)
|
||||
if second_title:
|
||||
title = second_title
|
||||
#print(f"title: {title}")
|
||||
logger.debug(f"title: {title}")
|
||||
elif title:
|
||||
#print(f"title is not none")
|
||||
temp_third_content = is_third_level_content(doc.page_content)
|
||||
|
|
@ -194,7 +195,7 @@ def zh_second_title_enhance(docs: Document) -> Document:
|
|||
doc.page_content = f"{title} {doc.page_content}"
|
||||
else:
|
||||
title = None
|
||||
print(f"final title: {title}")
|
||||
logger.debug(f"final title: {title}")
|
||||
return docs
|
||||
else:
|
||||
print("zh_second_title_enhance 文件不存在")
|
||||
|
|
@ -204,19 +205,19 @@ def zh_first_title_enhance(docs: Document) -> Document:
|
|||
title = None
|
||||
if len(docs) > 0:
|
||||
for doc in docs:
|
||||
#print(f"zh_first_title_enhance: {doc}")
|
||||
logger.debug(f"zh_first_title_enhance: {doc}")
|
||||
first_title = get_fist_level_title(doc.page_content)
|
||||
if first_title:
|
||||
title = first_title
|
||||
#print(f"title: {title}")
|
||||
logger.debug(f"title: {title}")
|
||||
elif title:
|
||||
temp_second_content = is_second_level_content(doc.page_content)
|
||||
if temp_second_content:
|
||||
#print(f"is_second_level_content : {temp_second_content}")
|
||||
logger.debug(f"is_second_level_content : {temp_second_content}")
|
||||
doc.page_content = f"{title} {doc.page_content}"
|
||||
else:
|
||||
title = None
|
||||
#print(f"final title: {title}")
|
||||
logger.debug(f"final title: {title}")
|
||||
return docs
|
||||
else:
|
||||
print("zh_first_title_enhance 文件不存在")
|
||||
|
|
|
|||
Loading…
Reference in New Issue