Spaces:
Runtime error
Runtime error
remove OmegaConf dependency (#27)
Browse files- buster/chatbot.py +41 -42
buster/chatbot.py
CHANGED
|
@@ -4,7 +4,6 @@ from dataclasses import dataclass, field
|
|
| 4 |
import numpy as np
|
| 5 |
import openai
|
| 6 |
import pandas as pd
|
| 7 |
-
from omegaconf import OmegaConf
|
| 8 |
from openai.embeddings_utils import cosine_similarity, get_embedding
|
| 9 |
|
| 10 |
from buster.docparser import EMBEDDING_MODEL, read_documents
|
|
@@ -13,8 +12,48 @@ logger = logging.getLogger(__name__)
|
|
| 13 |
logging.basicConfig(level=logging.INFO)
|
| 14 |
|
| 15 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
class Chatbot:
|
| 17 |
-
def __init__(self, cfg:
|
| 18 |
# TODO: right now, the cfg is being passed as an omegaconf, is this what we want?
|
| 19 |
self.cfg = cfg
|
| 20 |
self._init_documents()
|
|
@@ -176,43 +215,3 @@ class Chatbot:
|
|
| 176 |
formatted_output = self.format_response(response, matched_documents)
|
| 177 |
|
| 178 |
return formatted_output
|
| 179 |
-
|
| 180 |
-
|
| 181 |
-
@dataclass
|
| 182 |
-
class ChatbotConfig:
|
| 183 |
-
"""Configuration object for a chatbot.
|
| 184 |
-
|
| 185 |
-
documents_csv: Path to the csv file containing the documents and their embeddings.
|
| 186 |
-
embedding_model: OpenAI model to use to get embeddings.
|
| 187 |
-
top_k: Max number of documents to retrieve, ordered by cosine similarity
|
| 188 |
-
thresh: threshold for cosine similarity to be considered
|
| 189 |
-
max_chars: maximum number of characters the retrieved documents can be. Will truncate otherwise.
|
| 190 |
-
completion_kwargs: kwargs for the OpenAI.Completion() method
|
| 191 |
-
separator: the separator to use, can be either "\n" or <p> depending on rendering.
|
| 192 |
-
link_format: the type of format to render links with, e.g. slack or markdown
|
| 193 |
-
unknown_prompt: Prompt to use to generate the "I don't know" embedding to compare to.
|
| 194 |
-
text_before_prompt: Text to prompt GPT with before the user prompt, but after the documentation.
|
| 195 |
-
text_after_response: Generic response to add the the chatbot's reply.
|
| 196 |
-
"""
|
| 197 |
-
|
| 198 |
-
documents_file: str = "buster/data/document_embeddings.csv"
|
| 199 |
-
embedding_model: str = "text-embedding-ada-002"
|
| 200 |
-
top_k: int = 3
|
| 201 |
-
thresh: float = 0.7
|
| 202 |
-
max_chars: int = 3000
|
| 203 |
-
|
| 204 |
-
completion_kwargs: dict = field(
|
| 205 |
-
default_factory=lambda: {
|
| 206 |
-
"engine": "text-davinci-003",
|
| 207 |
-
"max_tokens": 200,
|
| 208 |
-
"temperature": None,
|
| 209 |
-
"top_p": None,
|
| 210 |
-
"frequency_penalty": 1,
|
| 211 |
-
"presence_penalty": 1,
|
| 212 |
-
}
|
| 213 |
-
)
|
| 214 |
-
separator: str = "\n"
|
| 215 |
-
link_format: str = "slack"
|
| 216 |
-
unknown_prompt: str = "I Don't know how to answer your question."
|
| 217 |
-
text_before_prompt: str = "I'm a chatbot, bleep bloop."
|
| 218 |
-
text_after_response: str = "Answer the following question:\n"
|
|
|
|
| 4 |
import numpy as np
|
| 5 |
import openai
|
| 6 |
import pandas as pd
|
|
|
|
| 7 |
from openai.embeddings_utils import cosine_similarity, get_embedding
|
| 8 |
|
| 9 |
from buster.docparser import EMBEDDING_MODEL, read_documents
|
|
|
|
| 12 |
logging.basicConfig(level=logging.INFO)
|
| 13 |
|
| 14 |
|
| 15 |
+
@dataclass
|
| 16 |
+
class ChatbotConfig:
|
| 17 |
+
"""Configuration object for a chatbot.
|
| 18 |
+
|
| 19 |
+
documents_csv: Path to the csv file containing the documents and their embeddings.
|
| 20 |
+
embedding_model: OpenAI model to use to get embeddings.
|
| 21 |
+
top_k: Max number of documents to retrieve, ordered by cosine similarity
|
| 22 |
+
thresh: threshold for cosine similarity to be considered
|
| 23 |
+
max_chars: maximum number of characters the retrieved documents can be. Will truncate otherwise.
|
| 24 |
+
completion_kwargs: kwargs for the OpenAI.Completion() method
|
| 25 |
+
separator: the separator to use, can be either "\n" or <p> depending on rendering.
|
| 26 |
+
link_format: the type of format to render links with, e.g. slack or markdown
|
| 27 |
+
unknown_prompt: Prompt to use to generate the "I don't know" embedding to compare to.
|
| 28 |
+
text_before_prompt: Text to prompt GPT with before the user prompt, but after the documentation.
|
| 29 |
+
text_after_response: Generic response to add the the chatbot's reply.
|
| 30 |
+
"""
|
| 31 |
+
|
| 32 |
+
documents_file: str = "buster/data/document_embeddings.csv"
|
| 33 |
+
embedding_model: str = "text-embedding-ada-002"
|
| 34 |
+
top_k: int = 3
|
| 35 |
+
thresh: float = 0.7
|
| 36 |
+
max_chars: int = 3000
|
| 37 |
+
|
| 38 |
+
completion_kwargs: dict = field(
|
| 39 |
+
default_factory=lambda: {
|
| 40 |
+
"engine": "text-davinci-003",
|
| 41 |
+
"max_tokens": 200,
|
| 42 |
+
"temperature": None,
|
| 43 |
+
"top_p": None,
|
| 44 |
+
"frequency_penalty": 1,
|
| 45 |
+
"presence_penalty": 1,
|
| 46 |
+
}
|
| 47 |
+
)
|
| 48 |
+
separator: str = "\n"
|
| 49 |
+
link_format: str = "slack"
|
| 50 |
+
unknown_prompt: str = "I Don't know how to answer your question."
|
| 51 |
+
text_before_prompt: str = "I'm a chatbot, bleep bloop."
|
| 52 |
+
text_after_response: str = "Answer the following question:\n"
|
| 53 |
+
|
| 54 |
+
|
| 55 |
class Chatbot:
|
| 56 |
+
def __init__(self, cfg: ChatbotConfig):
|
| 57 |
# TODO: right now, the cfg is being passed as an omegaconf, is this what we want?
|
| 58 |
self.cfg = cfg
|
| 59 |
self._init_documents()
|
|
|
|
| 215 |
formatted_output = self.format_response(response, matched_documents)
|
| 216 |
|
| 217 |
return formatted_output
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|