Merge pull request #48 from Finity-Alpha/update_docs

Update docs slightly
Finity-Alpha · Sep 27, 2024 · ed062e5 · ed062e5
2 parents 115f9bf + f181e4b
commit ed062e5
Show file tree

Hide file tree

Showing 5 changed files with 115 additions and 62 deletions.
diff --git a/docs/getting-started/installation.md b/docs/getting-started/installation.md
@@ -1,30 +1,55 @@
 # Installation
 
-### Requirements
-- portaudio by running `sudo apt-get install portaudio19-dev `
-- [torch](https://pytorch.org/get-started/locally/)
-- [torchaudio](https://pytorch.org/get-started/locally/)
-
-
-### Model specific requirements
-- [llama-cpp-python](https://llama-cpp-python.readthedocs.io/en/latest/)
-Make sure to install it using the correct CMAKE flag(s).
-- [onnxruntime-gpu](https://onnxruntime.ai/docs/install/)
-
-
 ### pip installation
 ```shell
 pip install openvoicechat
 ```
 
-### To install base and functionality specific packages
-```shell
-pip install openvoicechat[piper,openai,transformers]
-```
-
-similarly "piper" and "openai" can be replaced by any of the following install options:
+### Other Requirements
+- portaudio
+- [torch](https://pytorch.org/get-started/locally/)
+- [torchaudio](https://pytorch.org/get-started/locally/)
 
-- piper ([link](https://github.com/rhasspy/piper)) (does not work on windows)
-- openai ([link](https://github.com/openai/openai-python))
-- xtts ([link](https://github.com/coqui-ai/TTS))
-- transformers ([link](https://github.com/huggingface/transformers))
+### Install model specific packages
+
+| Category | Model Name           | Required Packages       |
+|----------|----------------------|-------------------------|
+| TTS      | [Piper](https://github.com/rhasspy/piper.git)                | ```pip install piper-tts piper-phonemize```                 |
+| TTS      | [xtts - Coqui](https://github.com/coqui-ai/TTS)                 | `pip install TTS phonemizer`                  |
+| ALL      | [transformers - HuggingFace](https://huggingface.co/docs/transformers/index)     | `pip install transformers`          |
+| LLM      | [Ollama](https://ollama.com/)               | `pip install ollama`                |
+| LLM      | [OpenAI](https://github.com/openai/openai-python)               | `pip install openai`                |
+
+
+Below you can select the required packages, and the `pip install` command will be generated automatically:
+
+<div id="pip-install-generator">
+    <h2>Select Required Packages</h2>
+    <div class="package-selection">
+        <input type="checkbox" id="transformers" value="transformers" onchange="generateCommand()">
+        <label for="transformers">HuggingFace - transformers</label><br>
+        <input type="checkbox" id="ollama" value="ollama" onchange="generateCommand()">
+        <label for="ollama">Ollama</label><br>
+        <input type="checkbox" id="openai" value="openai" onchange="generateCommand()">
+        <label for="openai">OpenAI</label><br>
+        <input type="checkbox" id="piper" value="piper-tts piper-phonemize" onchange="generateCommand()">
+        <label for="piper">Piper-tts</label><br>
+        <input type="checkbox" id="xtts" value="TTS phonemizer" onchange="generateCommand()">
+        <label for="xtts">xtts</label><br>
+    </div>
+    <pre class="result"><code id="result">pip install <package_name></code></pre>
+</div>
+
+<script>
+    function generateCommand() {
+        let selectedPackages = [];
+        const checkboxes = document.querySelectorAll('input[type="checkbox"]:checked');
+        checkboxes.forEach((checkbox) => {
+            selectedPackages.push(checkbox.value);
+        });
+        selectedPackages.shift();
+        console.log(selectedPackages);
+        let command = "pip install " + (selectedPackages.length > 0 ? selectedPackages.join(" ") : "<package_name>");
+        document.getElementById("result").innerText = command;
+    }
+</script>
diff --git a/docs/getting-started/quickstart.md b/docs/getting-started/quickstart.md
@@ -3,32 +3,41 @@
 Talk to an apple sales agent.
 
 ```py
-import os
-from openvoicechat.tts.tts_elevenlabs import Mouth_elevenlabs
-from openvoicechat.llm.llm_gpt import Chatbot_gpt
+from openvoicechat.tts.tts_xtts import Mouth_xtts
+from openvoicechat.llm.llm_ollama import Chatbot_ollama
 from openvoicechat.stt.stt_hf import Ear_hf
 from openvoicechat.utils import run_chat
 from openvoicechat.llm.prompts import llama_sales
+import torch
 from dotenv import load_dotenv
+import os
 
 
 if __name__ == "__main__":
-    device = 'cuda'
-
-    print('loading models... ', device)
-
+    if torch.backends.mps.is_available():
+        device = "mps"
+    elif torch.cuda.is_available():
+        device = "cuda"
+    else:
+        device = "cpu"
+
+    print("loading models... ", device)
     load_dotenv()
-    elevenlabs_api_key = os.getenv('ELEVENLABS_API_KEY')
-    gpt_api_key = os.getenv('OPENAI_API_KEY')
-
-    ear = Ear_hf(silence_seconds=2, device=device)
+    ear = Ear_hf(
+        model_id="openai/whisper-tiny.en",
+        silence_seconds=1.5,
+        device=device,
+        listen_interruptions=False,
+    )
+
+    chatbot = Chatbot_ollama(sys_prompt=llama_sales, model="qwen2:0.5b")
 
-    chatbot = Chatbot_gpt(sys_prompt=llama_sales, api_key=gpt_api_key)
+    mouth = Mouth_xtts(device=device)
 
-    mouth = Mouth_elevenlabs(api_key=elevenlabs_api_key)
+    run_chat(
+        mouth, ear, chatbot, verbose=True, stopping_criteria=lambda x: "[END]" in x
+    )
 
-    run_chat(mouth, ear, chatbot, verbose=True,
-             stopping_criteria=lambda x: '[END]' in x)
 ```
 
 

diff --git a/openvoicechat/stt/base.py b/openvoicechat/stt/base.py
@@ -25,6 +25,17 @@ def __init__(
         timing_path=TIMING_PATH,
         listen_interruptions=True,
     ):
+        """
+        Initializes the BaseEar class.
+        Args:
+            silence_seconds (float, optional): Number of seconds of silence to detect. Defaults to 2.
+            not_interrupt_words (list, optional): List of words that should not be considered as interruptions.
+            listener (object, optional): Listener object to receive the audio from. Defaults to None.
+            stream (bool, optional): Flag indicating whether to stream the audio or process it as a whole. Defaults to False.
+            timing_path (str, optional): Path to the timing file. Defaults to TIMING_PATH.
+            listen_interruptions (bool, optional): Flag indicating whether to listen for interruptions. Defaults to True.
+        """
+
         if not_interrupt_words is None:
             not_interrupt_words = [
                 "you",

diff --git a/openvoicechat/tts/base.py b/openvoicechat/tts/base.py
@@ -29,6 +29,14 @@ def remove_words_in_brackets_and_spaces(text):
 
 class BaseMouth:
     def __init__(self, sample_rate: int, player=sd, timing_path=TIMING_PATH, wait=True):
+        """
+        Initializes the BaseMouth class.
+        Args:
+            sample_rate (int): The sample rate of the audio.
+            player (object, optional): The audio player object. Defaults to sounddeivce.
+            timing_path (str, optional): The path to the timing file. Defaults to TIMING_PATH.
+            wait (bool, optional): Whether to wait for the audio to finish playing. Defaults to True.
+        """
         self.sample_rate = sample_rate
         self.interrupted = ""
         self.player = player

diff --git a/setup.py b/setup.py
@@ -1,35 +1,35 @@
 from setuptools import setup, find_packages
 
 setup(
-    author='Fakhir Ali',
-    author_email='[email protected]',
-    description='OpenVoiceChat is an opensource library that allows you to have a natural voice conversation with '
-                'your LLM agent.',
-    long_description='If you plan on making an LLM agent and want to have your users be able to talk to it like a '
-                     'person (low latency, handles interruptions), this library is for you. It aims to be the '
-                     'opensource, highly extensible and easy to use alternative to the proprietary solutions.',
-    url='https://www.finityalpha.com/OpenVoiceChat/',
-    name='openvoicechat',
-    version='0.2.0',
+    author="Fakhir Ali",
+    author_email="[email protected]",
+    description="OpenVoiceChat is an opensource library that allows you to have a natural voice conversation with "
+    "your LLM agent.",
+    long_description="If you plan on making an LLM agent and want to have your users be able to talk to it like a "
+    "person (low latency, handles interruptions), this library is for you. It aims to be the "
+    "opensource, highly extensible and easy to use alternative to the proprietary solutions.",
+    url="https://www.finityalpha.com/OpenVoiceChat/",
+    name="openvoicechat",
+    version="0.2.0",
     packages=find_packages(),
     install_requires=[
-        'sounddevice',
-        'pyaudio',
-        'librosa',
-        'pydub',
-        'python-dotenv',
-        'websockets',
-        'fastapi',
-        'pandas',
-        'pysbd'
+        "sounddevice",
+        "pyaudio",
+        "librosa",
+        "pydub",
+        "python-dotenv",
+        "websockets",
+        "fastapi",
+        "pandas",
+        "pysbd",
     ],
     extras_require={
-        'transformers': ['transformers'],
-        'piper': ['piper-tts', 'piper-phonemize'],
-        'vosk': ['vosk'],
-        'openai': ['openai'],
-        'tortoise': ['tortoise-tts'],
-        'xtts': ['TTS', 'phonemizer'],
+        "transformers": ["transformers"],
+        "piper": ["piper-tts", "piper-phonemize"],
+        "vosk": ["vosk"],
+        "openai": ["openai"],
+        "tortoise": ["tortoise-tts"],
+        "xtts": ["TTS", "phonemizer"],
     },
     dependency_links=[],
 )