diff --git a/docs/getting-started/installation.md b/docs/getting-started/installation.md index 397cca7..5e402fe 100644 --- a/docs/getting-started/installation.md +++ b/docs/getting-started/installation.md @@ -1,30 +1,55 @@ # Installation -### Requirements -- portaudio by running `sudo apt-get install portaudio19-dev ` -- [torch](https://pytorch.org/get-started/locally/) -- [torchaudio](https://pytorch.org/get-started/locally/) - - -### Model specific requirements -- [llama-cpp-python](https://llama-cpp-python.readthedocs.io/en/latest/) -Make sure to install it using the correct CMAKE flag(s). -- [onnxruntime-gpu](https://onnxruntime.ai/docs/install/) - - ### pip installation ```shell pip install openvoicechat ``` -### To install base and functionality specific packages -```shell -pip install openvoicechat[piper,openai,transformers] -``` - -similarly "piper" and "openai" can be replaced by any of the following install options: +### Other Requirements +- portaudio +- [torch](https://pytorch.org/get-started/locally/) +- [torchaudio](https://pytorch.org/get-started/locally/) -- piper ([link](https://github.com/rhasspy/piper)) (does not work on windows) -- openai ([link](https://github.com/openai/openai-python)) -- xtts ([link](https://github.com/coqui-ai/TTS)) -- transformers ([link](https://github.com/huggingface/transformers)) \ No newline at end of file +### Install model specific packages + +| Category | Model Name | Required Packages | +|----------|----------------------|-------------------------| +| TTS | [Piper](https://github.com/rhasspy/piper.git) | ```pip install piper-tts piper-phonemize``` | +| TTS | [xtts - Coqui](https://github.com/coqui-ai/TTS) | `pip install TTS phonemizer` | +| ALL | [transformers - HuggingFace](https://huggingface.co/docs/transformers/index) | `pip install transformers` | +| LLM | [Ollama](https://ollama.com/) | `pip install ollama` | +| LLM | [OpenAI](https://github.com/openai/openai-python) | `pip install openai` | + + +Below you can select the required packages, and the `pip install` command will be generated automatically: + +
+

Select Required Packages

+
+ +
+ +
+ +
+ +
+ +
+
+
pip install 
+
+ + diff --git a/docs/getting-started/quickstart.md b/docs/getting-started/quickstart.md index b67e68e..397a2c1 100644 --- a/docs/getting-started/quickstart.md +++ b/docs/getting-started/quickstart.md @@ -3,32 +3,41 @@ Talk to an apple sales agent. ```py -import os -from openvoicechat.tts.tts_elevenlabs import Mouth_elevenlabs -from openvoicechat.llm.llm_gpt import Chatbot_gpt +from openvoicechat.tts.tts_xtts import Mouth_xtts +from openvoicechat.llm.llm_ollama import Chatbot_ollama from openvoicechat.stt.stt_hf import Ear_hf from openvoicechat.utils import run_chat from openvoicechat.llm.prompts import llama_sales +import torch from dotenv import load_dotenv +import os if __name__ == "__main__": - device = 'cuda' - - print('loading models... ', device) - + if torch.backends.mps.is_available(): + device = "mps" + elif torch.cuda.is_available(): + device = "cuda" + else: + device = "cpu" + + print("loading models... ", device) load_dotenv() - elevenlabs_api_key = os.getenv('ELEVENLABS_API_KEY') - gpt_api_key = os.getenv('OPENAI_API_KEY') - - ear = Ear_hf(silence_seconds=2, device=device) + ear = Ear_hf( + model_id="openai/whisper-tiny.en", + silence_seconds=1.5, + device=device, + listen_interruptions=False, + ) + + chatbot = Chatbot_ollama(sys_prompt=llama_sales, model="qwen2:0.5b") - chatbot = Chatbot_gpt(sys_prompt=llama_sales, api_key=gpt_api_key) + mouth = Mouth_xtts(device=device) - mouth = Mouth_elevenlabs(api_key=elevenlabs_api_key) + run_chat( + mouth, ear, chatbot, verbose=True, stopping_criteria=lambda x: "[END]" in x + ) - run_chat(mouth, ear, chatbot, verbose=True, - stopping_criteria=lambda x: '[END]' in x) ``` diff --git a/openvoicechat/stt/base.py b/openvoicechat/stt/base.py index 8c426f0..12f665e 100644 --- a/openvoicechat/stt/base.py +++ b/openvoicechat/stt/base.py @@ -25,6 +25,17 @@ def __init__( timing_path=TIMING_PATH, listen_interruptions=True, ): + """ + Initializes the BaseEar class. + Args: + silence_seconds (float, optional): Number of seconds of silence to detect. Defaults to 2. + not_interrupt_words (list, optional): List of words that should not be considered as interruptions. + listener (object, optional): Listener object to receive the audio from. Defaults to None. + stream (bool, optional): Flag indicating whether to stream the audio or process it as a whole. Defaults to False. + timing_path (str, optional): Path to the timing file. Defaults to TIMING_PATH. + listen_interruptions (bool, optional): Flag indicating whether to listen for interruptions. Defaults to True. + """ + if not_interrupt_words is None: not_interrupt_words = [ "you", diff --git a/openvoicechat/tts/base.py b/openvoicechat/tts/base.py index 422894c..25d047b 100644 --- a/openvoicechat/tts/base.py +++ b/openvoicechat/tts/base.py @@ -29,6 +29,14 @@ def remove_words_in_brackets_and_spaces(text): class BaseMouth: def __init__(self, sample_rate: int, player=sd, timing_path=TIMING_PATH, wait=True): + """ + Initializes the BaseMouth class. + Args: + sample_rate (int): The sample rate of the audio. + player (object, optional): The audio player object. Defaults to sounddeivce. + timing_path (str, optional): The path to the timing file. Defaults to TIMING_PATH. + wait (bool, optional): Whether to wait for the audio to finish playing. Defaults to True. + """ self.sample_rate = sample_rate self.interrupted = "" self.player = player diff --git a/setup.py b/setup.py index 293f87f..13bc694 100644 --- a/setup.py +++ b/setup.py @@ -1,35 +1,35 @@ from setuptools import setup, find_packages setup( - author='Fakhir Ali', - author_email='fakhir.ali@finityalpha.com', - description='OpenVoiceChat is an opensource library that allows you to have a natural voice conversation with ' - 'your LLM agent.', - long_description='If you plan on making an LLM agent and want to have your users be able to talk to it like a ' - 'person (low latency, handles interruptions), this library is for you. It aims to be the ' - 'opensource, highly extensible and easy to use alternative to the proprietary solutions.', - url='https://www.finityalpha.com/OpenVoiceChat/', - name='openvoicechat', - version='0.2.0', + author="Fakhir Ali", + author_email="fakhir.ali@finityalpha.com", + description="OpenVoiceChat is an opensource library that allows you to have a natural voice conversation with " + "your LLM agent.", + long_description="If you plan on making an LLM agent and want to have your users be able to talk to it like a " + "person (low latency, handles interruptions), this library is for you. It aims to be the " + "opensource, highly extensible and easy to use alternative to the proprietary solutions.", + url="https://www.finityalpha.com/OpenVoiceChat/", + name="openvoicechat", + version="0.2.0", packages=find_packages(), install_requires=[ - 'sounddevice', - 'pyaudio', - 'librosa', - 'pydub', - 'python-dotenv', - 'websockets', - 'fastapi', - 'pandas', - 'pysbd' + "sounddevice", + "pyaudio", + "librosa", + "pydub", + "python-dotenv", + "websockets", + "fastapi", + "pandas", + "pysbd", ], extras_require={ - 'transformers': ['transformers'], - 'piper': ['piper-tts', 'piper-phonemize'], - 'vosk': ['vosk'], - 'openai': ['openai'], - 'tortoise': ['tortoise-tts'], - 'xtts': ['TTS', 'phonemizer'], + "transformers": ["transformers"], + "piper": ["piper-tts", "piper-phonemize"], + "vosk": ["vosk"], + "openai": ["openai"], + "tortoise": ["tortoise-tts"], + "xtts": ["TTS", "phonemizer"], }, dependency_links=[], )