diff --git a/docs/getting-started/installation.md b/docs/getting-started/installation.md
index 397cca7..5e402fe 100644
--- a/docs/getting-started/installation.md
+++ b/docs/getting-started/installation.md
@@ -1,30 +1,55 @@
# Installation
-### Requirements
-- portaudio by running `sudo apt-get install portaudio19-dev `
-- [torch](https://pytorch.org/get-started/locally/)
-- [torchaudio](https://pytorch.org/get-started/locally/)
-
-
-### Model specific requirements
-- [llama-cpp-python](https://llama-cpp-python.readthedocs.io/en/latest/)
-Make sure to install it using the correct CMAKE flag(s).
-- [onnxruntime-gpu](https://onnxruntime.ai/docs/install/)
-
-
### pip installation
```shell
pip install openvoicechat
```
-### To install base and functionality specific packages
-```shell
-pip install openvoicechat[piper,openai,transformers]
-```
-
-similarly "piper" and "openai" can be replaced by any of the following install options:
+### Other Requirements
+- portaudio
+- [torch](https://pytorch.org/get-started/locally/)
+- [torchaudio](https://pytorch.org/get-started/locally/)
-- piper ([link](https://github.com/rhasspy/piper)) (does not work on windows)
-- openai ([link](https://github.com/openai/openai-python))
-- xtts ([link](https://github.com/coqui-ai/TTS))
-- transformers ([link](https://github.com/huggingface/transformers))
\ No newline at end of file
+### Install model specific packages
+
+| Category | Model Name | Required Packages |
+|----------|----------------------|-------------------------|
+| TTS | [Piper](https://github.com/rhasspy/piper.git) | ```pip install piper-tts piper-phonemize``` |
+| TTS | [xtts - Coqui](https://github.com/coqui-ai/TTS) | `pip install TTS phonemizer` |
+| ALL | [transformers - HuggingFace](https://huggingface.co/docs/transformers/index) | `pip install transformers` |
+| LLM | [Ollama](https://ollama.com/) | `pip install ollama` |
+| LLM | [OpenAI](https://github.com/openai/openai-python) | `pip install openai` |
+
+
+Below you can select the required packages, and the `pip install` command will be generated automatically:
+
+
+
+
diff --git a/docs/getting-started/quickstart.md b/docs/getting-started/quickstart.md
index b67e68e..397a2c1 100644
--- a/docs/getting-started/quickstart.md
+++ b/docs/getting-started/quickstart.md
@@ -3,32 +3,41 @@
Talk to an apple sales agent.
```py
-import os
-from openvoicechat.tts.tts_elevenlabs import Mouth_elevenlabs
-from openvoicechat.llm.llm_gpt import Chatbot_gpt
+from openvoicechat.tts.tts_xtts import Mouth_xtts
+from openvoicechat.llm.llm_ollama import Chatbot_ollama
from openvoicechat.stt.stt_hf import Ear_hf
from openvoicechat.utils import run_chat
from openvoicechat.llm.prompts import llama_sales
+import torch
from dotenv import load_dotenv
+import os
if __name__ == "__main__":
- device = 'cuda'
-
- print('loading models... ', device)
-
+ if torch.backends.mps.is_available():
+ device = "mps"
+ elif torch.cuda.is_available():
+ device = "cuda"
+ else:
+ device = "cpu"
+
+ print("loading models... ", device)
load_dotenv()
- elevenlabs_api_key = os.getenv('ELEVENLABS_API_KEY')
- gpt_api_key = os.getenv('OPENAI_API_KEY')
-
- ear = Ear_hf(silence_seconds=2, device=device)
+ ear = Ear_hf(
+ model_id="openai/whisper-tiny.en",
+ silence_seconds=1.5,
+ device=device,
+ listen_interruptions=False,
+ )
+
+ chatbot = Chatbot_ollama(sys_prompt=llama_sales, model="qwen2:0.5b")
- chatbot = Chatbot_gpt(sys_prompt=llama_sales, api_key=gpt_api_key)
+ mouth = Mouth_xtts(device=device)
- mouth = Mouth_elevenlabs(api_key=elevenlabs_api_key)
+ run_chat(
+ mouth, ear, chatbot, verbose=True, stopping_criteria=lambda x: "[END]" in x
+ )
- run_chat(mouth, ear, chatbot, verbose=True,
- stopping_criteria=lambda x: '[END]' in x)
```
diff --git a/openvoicechat/stt/base.py b/openvoicechat/stt/base.py
index 8c426f0..12f665e 100644
--- a/openvoicechat/stt/base.py
+++ b/openvoicechat/stt/base.py
@@ -25,6 +25,17 @@ def __init__(
timing_path=TIMING_PATH,
listen_interruptions=True,
):
+ """
+ Initializes the BaseEar class.
+ Args:
+ silence_seconds (float, optional): Number of seconds of silence to detect. Defaults to 2.
+ not_interrupt_words (list, optional): List of words that should not be considered as interruptions.
+ listener (object, optional): Listener object to receive the audio from. Defaults to None.
+ stream (bool, optional): Flag indicating whether to stream the audio or process it as a whole. Defaults to False.
+ timing_path (str, optional): Path to the timing file. Defaults to TIMING_PATH.
+ listen_interruptions (bool, optional): Flag indicating whether to listen for interruptions. Defaults to True.
+ """
+
if not_interrupt_words is None:
not_interrupt_words = [
"you",
diff --git a/openvoicechat/tts/base.py b/openvoicechat/tts/base.py
index 422894c..25d047b 100644
--- a/openvoicechat/tts/base.py
+++ b/openvoicechat/tts/base.py
@@ -29,6 +29,14 @@ def remove_words_in_brackets_and_spaces(text):
class BaseMouth:
def __init__(self, sample_rate: int, player=sd, timing_path=TIMING_PATH, wait=True):
+ """
+ Initializes the BaseMouth class.
+ Args:
+ sample_rate (int): The sample rate of the audio.
+ player (object, optional): The audio player object. Defaults to sounddeivce.
+ timing_path (str, optional): The path to the timing file. Defaults to TIMING_PATH.
+ wait (bool, optional): Whether to wait for the audio to finish playing. Defaults to True.
+ """
self.sample_rate = sample_rate
self.interrupted = ""
self.player = player
diff --git a/setup.py b/setup.py
index 293f87f..13bc694 100644
--- a/setup.py
+++ b/setup.py
@@ -1,35 +1,35 @@
from setuptools import setup, find_packages
setup(
- author='Fakhir Ali',
- author_email='fakhir.ali@finityalpha.com',
- description='OpenVoiceChat is an opensource library that allows you to have a natural voice conversation with '
- 'your LLM agent.',
- long_description='If you plan on making an LLM agent and want to have your users be able to talk to it like a '
- 'person (low latency, handles interruptions), this library is for you. It aims to be the '
- 'opensource, highly extensible and easy to use alternative to the proprietary solutions.',
- url='https://www.finityalpha.com/OpenVoiceChat/',
- name='openvoicechat',
- version='0.2.0',
+ author="Fakhir Ali",
+ author_email="fakhir.ali@finityalpha.com",
+ description="OpenVoiceChat is an opensource library that allows you to have a natural voice conversation with "
+ "your LLM agent.",
+ long_description="If you plan on making an LLM agent and want to have your users be able to talk to it like a "
+ "person (low latency, handles interruptions), this library is for you. It aims to be the "
+ "opensource, highly extensible and easy to use alternative to the proprietary solutions.",
+ url="https://www.finityalpha.com/OpenVoiceChat/",
+ name="openvoicechat",
+ version="0.2.0",
packages=find_packages(),
install_requires=[
- 'sounddevice',
- 'pyaudio',
- 'librosa',
- 'pydub',
- 'python-dotenv',
- 'websockets',
- 'fastapi',
- 'pandas',
- 'pysbd'
+ "sounddevice",
+ "pyaudio",
+ "librosa",
+ "pydub",
+ "python-dotenv",
+ "websockets",
+ "fastapi",
+ "pandas",
+ "pysbd",
],
extras_require={
- 'transformers': ['transformers'],
- 'piper': ['piper-tts', 'piper-phonemize'],
- 'vosk': ['vosk'],
- 'openai': ['openai'],
- 'tortoise': ['tortoise-tts'],
- 'xtts': ['TTS', 'phonemizer'],
+ "transformers": ["transformers"],
+ "piper": ["piper-tts", "piper-phonemize"],
+ "vosk": ["vosk"],
+ "openai": ["openai"],
+ "tortoise": ["tortoise-tts"],
+ "xtts": ["TTS", "phonemizer"],
},
dependency_links=[],
)