Merge branch 'master' into pre-release

2023-09-12 15:51:53 +08:00 · 2023-09-12 15:51:53 +08:00 · 1d0c6a9184
parent 9ed43ab2ab 8064586374
commit 1d0c6a9184
7 changed files with 31 additions and 8 deletions
--- a/.github/workflows/close-issue.yml
+++ b/.github/workflows/close-issue.yml
@ -0,0 +1,22 @@
 name: Close inactive issues
 on:
  schedule:
    - cron: "30 1 * * *"
 jobs:
  close-issues:
    runs-on: ubuntu-latest
    permissions:
      issues: write
      pull-requests: write
    steps:
      - uses: actions/stale@v5
        with:
          days-before-issue-stale: 30
          days-before-issue-close: 14
          stale-issue-label: "stale"
          stale-issue-message: "This issue is stale because it has been open for 30 days with no activity."
          close-issue-message: "This issue was closed because it has been inactive for 14 days since being marked as stale."
          days-before-pr-stale: -1
          days-before-pr-close: -1
          repo-token: ${{ secrets.GITHUB_TOKEN }}
--- a/.gitignore
+++ b/.gitignore
@ -4,6 +4,6 @@ logs
 .idea/
 __pycache__/
 /knowledge_base/
-configs/*.py
+/configs/*.py
 .vscode/
 .pytest_cache/
--- a/README.md
+++ b/README.md
@ -485,6 +485,6 @@ $ streamlit run webui.py --server.port 666
 ## 项目交流群
-<img src="img/qr_code_59.jpg" alt="二维码" width="300" height="300" />
+<img src="img/qr_code_60.jpg" alt="二维码" width="300" height="300" />
 🎉 langchain-ChatGLM 项目微信交流群，如果你也对本项目感兴趣，欢迎加入群聊参与讨论交流。
--- a/document_loaders/mypdfloader.py
+++ b/document_loaders/mypdfloader.py
@ -5,7 +5,7 @@ from langchain.document_loaders.unstructured import UnstructuredFileLoader
 class RapidOCRPDFLoader(UnstructuredFileLoader):
    def _get_elements(self) -> List:
        def pdf2text(filepath):
-            import fitz
+            import fitz # pyMuPDF里面的fitz包，不要与pip install fitz混淆
            from rapidocr_onnxruntime import RapidOCR
            import numpy as np
            ocr = RapidOCR()
--- a/img/qr_code_60.jpg
+++ b/img/qr_code_60.jpg
--- a/server/knowledge_base/utils.py
+++ b/server/knowledge_base/utils.py
@ -91,8 +91,8 @@ LOADER_DICT = {"UnstructuredHTMLLoader": ['.html'],
               "RapidOCRLoader": ['.png', '.jpg', '.jpeg', '.bmp'],
               "UnstructuredFileLoader": ['.eml', '.msg', '.rst',
                                          '.rtf', '.txt', '.xml',
-                                          '.doc', '.docx', '.epub', '.odt',
+                                          '.docx', '.epub', '.odt',
-                                          '.ppt', '.pptx', '.tsv'],  # '.xlsx'
+                                          '.ppt', '.pptx', '.tsv'],
               }
 SUPPORTED_EXTS = [ext for sublist in LOADER_DICT.values() for ext in sublist]
--- a/webui_pages/dialogue/dialogue.py
+++ b/webui_pages/dialogue/dialogue.py
@ -153,6 +153,7 @@ def dialogue_page(api: ApiRequest):
            for d in api.knowledge_base_chat(prompt, selected_kb, kb_top_k, score_threshold, history, model=llm_model):
                if error_msg := check_error_msg(d): # check whether error occured
                    st.error(error_msg)
                else:
                    text += d["answer"]
                    chat_box.update_msg(text, 0)
                    chat_box.update_msg("\n\n".join(d["docs"]), 1, streaming=False)