Spaces:

jerpint
/

buster-dev

Runtime error

App Files Files Community

buster-dev / buster /apps /gradio_app.py

jerpint

update cfgs for new refactor

ab096a6 over 2 years ago

raw

history blame contribute delete

4.06 kB

	import logging
	import os

	import gradio as gr
	import pandas as pd
	from huggingface_hub import hf_hub_download

	from buster.apps.bot_configs import available_configs
	from buster.busterbot import Buster, BusterConfig
	from buster.retriever import Retriever
	from buster.utils import get_retriever_from_extension

	logger = logging.getLogger(__name__)
	logging.basicConfig(level=logging.INFO)

	DEFAULT_CONFIG = "huggingface"

	# DOWNLOAD FROM HF HUB
	HUB_TOKEN = os.getenv("HUB_TOKEN")
	REPO_ID = "jerpint/buster-data"
	HUB_DB_FILE = "documents.db"
	logger.info(f"Downloading {HUB_DB_FILE} from hub...")
	hf_hub_download(
	repo_id=REPO_ID,
	repo_type="dataset",
	filename=HUB_DB_FILE,
	token=HUB_TOKEN,
	local_dir=".",
	)
	logger.info(f"Downloaded.")
	retriever: Retriever = get_retriever_from_extension(HUB_DB_FILE)(HUB_DB_FILE)

	# initialize buster with the default config...
	default_cfg: BusterConfig = available_configs.get(DEFAULT_CONFIG)
	buster = Buster(cfg=default_cfg, retriever=retriever)


	def format_sources(matched_documents: pd.DataFrame) -> str:
	if len(matched_documents) == 0:
	return ""

	sourced_answer_template: str = (
	"""📝 Here are the sources I used to answer your question:<br>""" """{sources}<br><br>""" """{footnote}"""
	)
	source_template: str = """[🔗 {source.title}]({source.url}), relevance: {source.similarity:2.1f} %"""

	matched_documents.similarity = matched_documents.similarity * 100
	sources = "<br>".join([source_template.format(source=source) for _, source in matched_documents.iterrows()])
	footnote: str = "I'm a bot 🤖 and not always perfect."

	return sourced_answer_template.format(sources=sources, footnote=footnote)


	def chat(question, history, bot_source):
	history = history or []
	cfg = available_configs.get(bot_source)
	buster.update_cfg(cfg)

	response = buster.process_input(question)

	# formatted_sources = source_formatter(sources)
	matched_documents = response.matched_documents

	formatted_sources = format_sources(matched_documents)
	formatted_response = f"{response.completion.text}<br><br>" + formatted_sources

	history.append((question, formatted_response))

	return history, history


	block = gr.Blocks(css="#chatbot .overflow-y-auto{height:500px}")

	with block:
	with gr.Row():
	gr.Markdown("<h3><center>Buster 🤖: A Question-Answering Bot for open-source libraries </center></h3>")

	doc_source = gr.Dropdown(
	choices=sorted(list(available_configs.keys())),
	value=DEFAULT_CONFIG,
	interactive=True,
	multiselect=False,
	label="Source of Documentation",
	info="The source of documentation to select from",
	)

	chatbot = gr.Chatbot()

	with gr.Row():
	message = gr.Textbox(
	label="What's your question?",
	placeholder="What kind of model should I use for sentiment analysis?",
	lines=1,
	)
	submit = gr.Button(value="Send", variant="secondary").style(full_width=False)

	examples = gr.Examples(
	# TODO: seems not possible (for now) to update examples on change...
	examples=[
	"What kind of models should I use for images and text?",
	"When should I finetune a model vs. training it form scratch?",
	"Can you give me some python code to quickly finetune a model on my sentiment analysis dataset?",
	],
	inputs=message,
	)

	gr.Markdown(
	"""This simple application uses GPT to search the huggingface 🤗 transformers docs and answer questions.
	For more info on huggingface transformers view the [full documentation.](https://huggingface.co/docs/transformers/index)."""
	)

	gr.HTML("️<center> Created with ❤️ by @jerpint and @hadrienbertrand")

	state = gr.State()
	agent_state = gr.State()

	submit.click(chat, inputs=[message, state, doc_source], outputs=[chatbot, state])
	message.submit(chat, inputs=[message, state, doc_source], outputs=[chatbot, state])


	block.launch(debug=True)