BaseAudioModel

class BaseAudioModel(ABC):

Base class for audio models providing Text-to-Speech (TTS) and Speech-to-Text (STT) functionality.

init

def __init__(
    self,
    api_key: Optional[str] = None,
    url: Optional[str] = None,
    timeout: Optional[float] = None
):

Initialize an instance of BaseAudioModel.

Parameters:

  • api_key (Optional[str]): API key for the audio service. If not provided, will look for an environment variable specific to the implementation.
  • url (Optional[str]): Base URL for the audio API. If not provided, will use a default URL or look for an environment variable specific to the implementation.
  • timeout (Optional[float], optional): The timeout value in seconds for API calls. If not provided, will fall back to the MODEL_TIMEOUT environment variable or default to 180 seconds. (default: :obj:None)

text_to_speech

def text_to_speech(self, input: str, **kwargs: Any):

Convert text to speech.

Parameters:

  • input (str): The text to be converted to speech.
  • storage_path (str): The local path to store the generated speech file. **kwargs (Any): Extra kwargs passed to the TTS API.

Returns:

Any: The response from the TTS API, which may vary by implementation.

speech_to_text

def speech_to_text(self, audio_file_path: str, **kwargs: Any):

Convert speech audio to text.

Parameters:

  • audio_file_path (str): The audio file path to transcribe. **kwargs (Any): Extra keyword arguments passed to the Speech-to-Text (STT) API.

Returns:

str: The transcribed text.

_ensure_directory_exists

def _ensure_directory_exists(self, file_path: str):

Ensure the directory for the given file path exists.

Parameters:

  • file_path (str): The file path for which to ensure the directory exists.