Skip to content

Commit

Permalink
Merge pull request #58 from Finity-Alpha/new_logging
Browse files Browse the repository at this point in the history
New logging
  • Loading branch information
fakhirali authored Nov 8, 2024
2 parents e530d9c + a18b582 commit be0b1c6
Show file tree
Hide file tree
Showing 16 changed files with 168 additions and 20 deletions.
Binary file removed media/abs.wav
Binary file not shown.
Binary file removed media/my_voice.wav
Binary file not shown.
11 changes: 10 additions & 1 deletion openvoicechat/llm/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,11 @@


class BaseChatbot:
def __init__(self):
def __init__(self, logger=None):
"""
Initialize the model and other things here
"""
self.logger = logger

def run(self, input_text: str):
"""
Expand All @@ -32,6 +33,10 @@ def generate_response(self, input_text: str) -> str:
response = self.post_process(response_text)
return response

def _log_event(self, event: str, details: str, further: str):
if self.logger:
self.logger.info(event, extra={"details": details, "further": further})

def generate_response_stream(
self, input_text: str, output_queue: queue.Queue, interrupt_queue: queue.Queue
) -> str:
Expand All @@ -41,13 +46,17 @@ def generate_response_stream(
:param interrupt_queue: The interrupt queue which stores the transcription if interruption occurred. Used to stop generating.
:return: The chatbot's response after running self.post_process
"""
self._log_event("llm request sent", "LLM", "")
out = self.run(input_text)
response_text = ""
for text in out:
if not interrupt_queue.empty():
self._log_event("interruption detected", "LLM", "")
break
self._log_event("llm token received", "LLM", f'"{text}"')
output_queue.put(text)
response_text += text
output_queue.put(None)
self._log_event("llm post processing", "LLM", "")
response = self.post_process(response_text)
return response
2 changes: 2 additions & 0 deletions openvoicechat/llm/llm_gpt.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,9 @@ def __init__(
tool_choice=NOT_GIVEN,
tool_utterances=None,
functions=None,
logger=None,
):
super().__init__(logger=logger)

if tools is None:
tools = NOT_GIVEN
Expand Down
4 changes: 3 additions & 1 deletion openvoicechat/llm/llm_ollama.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@


class Chatbot_ollama(BaseChatbot):
def __init__(self, sys_prompt="", model="llama3"):
def __init__(self, sys_prompt="", model="llama3", logger=None):
super().__init__(logger=logger)

import ollama

ollama.pull(model)
Expand Down
52 changes: 52 additions & 0 deletions openvoicechat/logging_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
import logging
from datetime import datetime
import os


def make_logger(
log_dir: str = "logs", log_name: str = "ovc", console_log: bool = False
):
log_dir = log_dir
os.makedirs(log_dir, exist_ok=True)

# Create timestamp for file names
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
print(timestamp)

logger = logging.getLogger(log_name)
logger.setLevel(logging.INFO)

# File handler
fh = logging.FileHandler(os.path.join(log_dir, f"{log_name}_{timestamp}.log"))
fh.setLevel(logging.INFO)

# Console handler
ch = logging.StreamHandler()
ch.setLevel(logging.INFO)

# Formatter
# Time format: YYYY-MM-DD HH:MM:SS:mmm (e.g. 2024-11-07 04:08:55:383)
formatter = logging.Formatter(
"%(asctime)s,%(name)s,%(levelname)s,%(message)s,%(details)s,%(further)s",
)
formatter.default_msec_format = "%s.%03d"
fh.setFormatter(
formatter
) # weird hack to get milliseconds after a period instead of comma
formatter = logging.Formatter(
"%(asctime)s,%(name)s,%(levelname)s,%(message)s,%(details)s,%(further)s",
)
ch.setFormatter(formatter)

logger.addHandler(fh)
if console_log:
logger.addHandler(ch)

return logger


# Global logger instance
if __name__ == "__main__":
logger = make_logger(console_log=True)

logger.info("message", extra={"details": "hi", "further": "more"})
37 changes: 34 additions & 3 deletions openvoicechat/stt/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ def __init__(
listener=None,
stream=False,
listen_interruptions=True,
logger=None,
):
"""
Initializes the BaseEar class.
Expand Down Expand Up @@ -47,6 +48,7 @@ def __init__(
self.listener = listener
self.stream = stream
self.listen_interruptions = listen_interruptions
self.logger = logger

def transcribe(self, input_audio: np.ndarray) -> str:
"""
Expand Down Expand Up @@ -88,29 +90,53 @@ def _sim_transcribe_stream(self, input_audio: np.ndarray) -> str:
text += _ + " "
return text

def _log_event(self, event: str, details: str, further: str = ""):
if self.logger:
self.logger.info(
event, extra={"details": details, "further": f'"{further}"'}
)

def _listen(self) -> str:
"""
records audio using record_user and returns its transcription
:return: transcription
"""
import pysbd

seg = pysbd.Segmenter(language="en", clean=False)

sentence_finished = False
first = True
audio = np.zeros(0, dtype=np.float32)
n = 2 # number of times to see if the sentence ends
while not sentence_finished and n > 0:

new_audio = record_user(
self.silence_seconds, self.vad, self.listener, started=not first
self.silence_seconds,
self.vad,
self.listener,
started=not first,
logger=self.logger,
)

audio = np.concatenate((audio, new_audio), 0)

self._log_event("transcribing", "STT")
text = self.transcribe(audio)
self._log_event("transcribed", "STT", text)

self._log_event("segmenting", "STT", text)
first = False
seg = pysbd.Segmenter(language="en", clean=False)
if len(seg.segment(text + " .")) > 1:
sentence_finished = True
self._log_event("sentence boundary detected", "STT", text)
else:
n -= 1
self._log_event(
"no sentence boundary detected",
"STT",
text + ". tries left: " + str(n),
)
return text

def _listen_stream(self) -> str:
Expand Down Expand Up @@ -166,22 +192,27 @@ def interrupt_listen(self, record_seconds=100) -> str:
return False
while record_seconds > 0:
interruption_audio = record_interruption(
self.vad, record_seconds, streamer=self.listener
self.vad, record_seconds, streamer=self.listener, logger=self.logger
)
# duration of interruption audio
if interruption_audio is None:
return ""
else:
duration = len(interruption_audio) / 16_000
self._log_event(
"transcribing interruption", "STT", f"{duration} seconds"
)
if self.stream:
text = self._sim_transcribe_stream(interruption_audio)
else:
text = self.transcribe(interruption_audio)
self._log_event("interruption transcribed", "STT", text)
# remove any punctuation using re
text = re.sub(r"[^\w\s]", "", text)
text = text.lower()
text = text.strip()
if text in self.not_interrupt_words:
self._log_event("not interruption", "STT", text)
record_seconds -= duration
else:
return text
4 changes: 2 additions & 2 deletions openvoicechat/stt/stt_deepgram.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@


class Ear_deepgram(BaseEar):
def __init__(self, silence_seconds=2, api_key="", listener=None):
super().__init__(silence_seconds, stream=True, listener=listener)
def __init__(self, silence_seconds=2, api_key="", listener=None, logger=None):
super().__init__(silence_seconds, stream=True, listener=listener, logger=logger)
self.api_key = api_key

def transcribe_stream(self, audio_queue, transcription_queue):
Expand Down
2 changes: 2 additions & 0 deletions openvoicechat/stt/stt_hf.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,13 @@ def __init__(
generate_kwargs=None,
listener=None,
listen_interruptions=True,
logger=None,
):
super().__init__(
silence_seconds,
listener=listener,
listen_interruptions=listen_interruptions,
logger=logger,
)
from transformers import pipeline

Expand Down
32 changes: 25 additions & 7 deletions openvoicechat/stt/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,15 @@ def record_interruption_parallel(vad, listen_queue):
return None


def record_interruption(vad, record_seconds=100, streamer=None):
print("* recording for interruption")
def record_interruption(vad, record_seconds=100, streamer=None, logger=None):
if logger:
logger.info(
"recording for interruption",
extra={
"details": "record_interruption",
"further": f"{record_seconds} seconds",
},
)
frames = []
if streamer is None:
stream = make_stream()
Expand All @@ -62,7 +69,7 @@ def record_interruption(vad, record_seconds=100, streamer=None):
return None


def record_user(silence_seconds, vad, streamer=None, started=False):
def record_user(silence_seconds, vad, streamer=None, started=False, logger=None):
frames = []

if streamer is None:
Expand All @@ -74,7 +81,11 @@ def record_user(silence_seconds, vad, streamer=None, started=False):
CHUNK = streamer.CHUNK
RATE = streamer.RATE
one_second_iters = int(RATE / CHUNK)
print("* recording")
if logger:
logger.info(
"user recording started",
extra={"details": "record_user", "further": f"{silence_seconds} seconds"},
)

while True:
data = stream.read(CHUNK)
Expand All @@ -87,12 +98,19 @@ def record_user(silence_seconds, vad, streamer=None, started=False):
)
if not started and contains_speech:
started = True
print("*listening to speech*")
if logger:
logger.info(
"speech detected",
extra={"details": "record_user", "further": ""},
)
if started and contains_speech is False:
break
stream.close()

print("* done recording")
if logger:
logger.info(
"user recording ended",
extra={"details": "record_user", "further": ""},
)

# creating a np array from buffer
frames = np.frombuffer(b"".join(frames), dtype=np.int16)
Expand Down
Loading

0 comments on commit be0b1c6

Please sign in to comment.