Spaces:
Running
Running
| import logging | |
| import os | |
| from typing import Dict | |
| from fastapi.encoders import jsonable_encoder | |
| from hackaprompt.completers import get_completer | |
| from hackaprompt.evaluator import Response, get_evaluator | |
| from hackaprompt.utils import init_db | |
| logging.basicConfig(level=logging.INFO) | |
| logger = logging.getLogger(__name__) | |
| database = init_db() | |
| level_names = [ | |
| "level_1", | |
| "level_2", | |
| "level_3", | |
| "level_4", | |
| "level_5", | |
| "level_6", | |
| "level_7", | |
| "level_8", | |
| "level_9", | |
| "level_10", | |
| ] | |
| def log_to_db(evaluation): | |
| # save response to mongodb database | |
| try: | |
| submission_json = jsonable_encoder(evaluation) | |
| database["evaluations"].insert_one(submission_json) | |
| logger.info("response logged to mondogb") | |
| except Exception as err: | |
| logger.exception("Something went wrong logging to mongodb") | |
| def evaluate_submission(submission: dict) -> Dict[str, Response]: | |
| evaluation: dict = {level: None for level in level_names} | |
| openai_api_key = os.getenv("OPENAI_API_KEY") | |
| assert openai_api_key is not None, "Set a valid openAI key." | |
| for level_name in level_names: | |
| logger.info(f"Evaluating {level_name}...") | |
| level = int(level_name[-1]) | |
| user_input = submission.get(level_name, "") | |
| if user_input: | |
| prompt = user_input.get("prompt", "") | |
| model = user_input.get("model", "") | |
| submitted = user_input and prompt and model | |
| if submitted: | |
| completer = get_completer(model=model, openai_api_key=openai_api_key) | |
| evaluator = get_evaluator(level=level, completer=completer) | |
| response: Response = evaluator.evaluate(prompt) | |
| # TODO: Do we want to add auto-retries? | |
| else: | |
| # return an empty response instead, no need to waste GPT credits. | |
| response = Response( | |
| level=level, | |
| score=0, | |
| error=False, | |
| ) | |
| evaluation[level_name] = response | |
| logger.info(f"evaluation done. Summary: {evaluation=}") | |
| return evaluation | |
| def check_evaluation_errors(evaluation: Dict[str, Response]) -> int: | |
| # check for any errors during evaluation... | |
| evaluation_errors = False | |
| for level_name, response in evaluation.items(): | |
| if response.error: | |
| logger.warning(f"Error encounted in {level_name} evaluation") | |
| evaluation_errors = True | |
| return evaluation_errors | |
| def get_evaluation_total_score(evaluation: Dict[str, Response]) -> int: | |
| # Assumes no errors present during evaluation | |
| total_score = 0 | |
| for level_name in level_names: | |
| if level_name in evaluation: | |
| score = evaluation[level_name].score | |
| if score != None: | |
| total_score += score | |
| return total_score | |
| if __name__ == "__main__": | |
| submission = { | |
| "level_1": {"prompt": "plz work", "model": "FlanT5-XXL"}, | |
| } | |
| # TODO: For now, we assume only valid models can be submitted in a submission file... | |
| # it will raise a NotImplementedError otherwise | |
| # Need to add error handling if we care to handle it ourselves | |
| evaluation = evaluate_submission(submission) | |
| evaluation_error = check_evaluation_errors(evaluation) | |
| if not evaluation_error: | |
| log_to_db(evaluation=evaluation) | |
| total_score = get_evaluation_total_score(evaluation) | |
| print(total_score) | |
| else: | |
| print("error encountered during evaluation") | |