Spaces:

DesertWolf
/

test3

Paused

App Files Files Community

test3 / tests /llm_translation /test_gpt4o_audio.py

DesertWolf

Upload folder using huggingface_hub

447ebeb verified 6 months ago

raw

history blame contribute delete

4.69 kB

	import json
	import os
	import sys
	from datetime import datetime
	from unittest.mock import AsyncMock

	sys.path.insert(
	0, os.path.abspath("../..")
	) # Adds the parent directory to the system path


	import httpx
	import pytest
	from respx import MockRouter

	import litellm
	from litellm import Choices, Message, ModelResponse
	from litellm.types.utils import StreamingChoices, ChatCompletionAudioResponse
	import base64
	import requests


	def check_non_streaming_response(completion):
	assert completion.choices[0].message.audio is not None, "Audio response is missing"
	assert isinstance(
	completion.choices[0].message.audio, ChatCompletionAudioResponse
	), "Invalid audio response type"
	assert len(completion.choices[0].message.audio.data) > 0, "Audio data is empty"


	async def check_streaming_response(completion):
	_audio_bytes = None
	_audio_transcript = None
	_audio_id = None
	async for chunk in completion:
	print(chunk)
	_choice: StreamingChoices = chunk.choices[0]
	if _choice.delta.audio is not None:
	if _choice.delta.audio.get("data") is not None:
	_audio_bytes = _choice.delta.audio["data"]
	if _choice.delta.audio.get("transcript") is not None:
	_audio_transcript = _choice.delta.audio["transcript"]
	if _choice.delta.audio.get("id") is not None:
	_audio_id = _choice.delta.audio["id"]
	# Atleast one chunk should have set _audio_bytes, _audio_transcript, _audio_id
	assert _audio_bytes is not None
	assert _audio_transcript is not None
	assert _audio_id is not None


	@pytest.mark.asyncio
	# @pytest.mark.flaky(retries=3, delay=1)
	@pytest.mark.parametrize("stream", [True, False])
	async def test_audio_output_from_model(stream):
	audio_format = "pcm16"
	if stream is False:
	audio_format = "wav"
	litellm.set_verbose = False
	try:
	completion = await litellm.acompletion(
	model="gpt-4o-audio-preview",
	modalities=["text", "audio"],
	audio={"voice": "alloy", "format": "pcm16"},
	messages=[{"role": "user", "content": "response in 1 word - yes or no"}],
	stream=stream,
	)
	except litellm.Timeout as e:
	print(e)
	pytest.skip("Skipping test due to timeout")
	except Exception as e:
	if "openai-internal" in str(e):
	pytest.skip("Skipping test due to openai-internal error")

	if stream is True:
	await check_streaming_response(completion)

	else:
	print("response= ", completion)
	check_non_streaming_response(completion)
	wav_bytes = base64.b64decode(completion.choices[0].message.audio.data)
	with open("dog.wav", "wb") as f:
	f.write(wav_bytes)


	@pytest.mark.asyncio
	@pytest.mark.parametrize("stream", [True, False])
	@pytest.mark.parametrize("model", ["gpt-4o-audio-preview"]) # "gpt-4o-audio-preview",
	async def test_audio_input_to_model(stream, model):
	# Fetch the audio file and convert it to a base64 encoded string
	audio_format = "pcm16"
	if stream is False:
	audio_format = "wav"
	litellm._turn_on_debug()
	litellm.drop_params = True
	url = "https://openaiassets.blob.core.windows.net/$web/API/docs/audio/alloy.wav"
	response = requests.get(url)
	response.raise_for_status()
	wav_data = response.content
	encoded_string = base64.b64encode(wav_data).decode("utf-8")
	try:
	completion = await litellm.acompletion(
	model=model,
	modalities=["text", "audio"],
	audio={"voice": "alloy", "format": audio_format},
	stream=stream,
	messages=[
	{
	"role": "user",
	"content": [
	{"type": "text", "text": "What is in this recording?"},
	{
	"type": "input_audio",
	"input_audio": {"data": encoded_string, "format": "wav"},
	},
	],
	},
	],
	)
	except litellm.Timeout as e:
	print(e)
	pytest.skip("Skipping test due to timeout")
	except Exception as e:
	if "openai-internal" in str(e):
	pytest.skip("Skipping test due to openai-internal error")
	raise e
	if stream is True:
	await check_streaming_response(completion)
	else:
	print("response= ", completion)

	check_non_streaming_response(completion)
	wav_bytes = base64.b64decode(completion.choices[0].message.audio.data)
	with open("dog.wav", "wb") as f:
	f.write(wav_bytes)