Agent_Course_Final_Assignment

Sleeping

App Files Files Community

Agent_Course_Final_Assignment / agent.py

abliznyuk

fixes

5e11dec 8 months ago

raw

history blame contribute delete

2.77 kB

	import requests
	from openai import OpenAI
	from smolagents import CodeAgent, OpenAIServerModel, tool, Tool, FinalAnswerTool
	from smolagents import WikipediaSearchTool, GoogleSearchTool, VisitWebpageTool, PythonInterpreterTool


	def get_prompt():
	with open("prompt.txt", "r") as f:
	return f.read()


	@tool
	def visual_qa(image_url: str, question: str) -> str:
	"""
	Provides functionality to perform visual question answering (VQA) by processing an image and a natural language question.

	Args:
	image_url (str): A URL pointing to the location of the image to be analyzed. The URL should be accessible and point to a valid image file.
	question: (str): A natural language string containing the question to be answered based on the provided image.

	Returns:
	str: The model-generated answer to the provided question based on the analysis of the image.
	"""
	from openai import OpenAI
	client = OpenAI()

	response = client.chat.completions.create(
	model="gpt-4o-mini",
	messages=[{
	"role": "user",
	"content": [
	{"type": "text", "text": question},
	{
	"type": "image_url",
	"image_url": {
	"url": image_url,
	"detail": "low"
	},
	},
	],
	}],
	)
	return response.choices[0].message.content


	@tool
	def transcribe_audio(audio_url: str) -> str:
	"""
	Provides functionality to perform audio transcription.

	Args:
	audio_url (str): A URL pointing to the location of the audio to be analyzed.

	Returns:
	str: Audio transcription.
	"""
	client = OpenAI()
	r = client.audio.transcriptions.create(
	model="gpt-4o-mini-transcribe",
	file=requests.get(audio_url).content,
	response_format="text",
	)
	return r.text


	class GAIAAgent:
	def __init__(self):
	self.agent = CodeAgent(
	tools=[
	GoogleSearchTool(provider="serper"),
	VisitWebpageTool(),
	WikipediaSearchTool(),
	PythonInterpreterTool(),
	FinalAnswerTool(),
	visual_qa,
	transcribe_audio,
	],
	model=OpenAIServerModel(model_id='gpt-4.1-mini', max_tokens=4096, temperature=0),
	add_base_tools=False,
	max_steps=15,
	additional_authorized_imports=["pandas"],
	)
	self.prompt = get_prompt()

	def __call__(self, question: str) -> str:
	args = {"question": question}
	return self.agent.run(self.prompt, additional_args=args)


	if __name__ == '__main__':
	agent = GAIAAgent()