ChatGPT Voice Assisant on Raspberry Pi using custom data, ChatGPT, Whisper API, Speech Recognition and Pyttsx3
Raspberry Pi can be used to build a voice assistant using Chat GPT and Whisper API as shown here. This article shows how to customize the voice assistant application using your own data, e.g., pdf documents, financial data etc.
Now, to train and create an AI chatbot based on a custom knowledge base, we need to get an API key from OpenAI. The API key will allow you to use OpenAI’s model as the LLM to study your custom data and draw inferences.
The Chat GPT library needs to be configured with an account's secret key which is available on the website. Set the api key OPENAI_API_KEY. This API key let you use OpenAI’s model as the LLM for any custom data and draw inferences.
os.environ["OPENAI_API_KEY"] = 'YOUR API KEY'
Copy the custom data documents on a specific directory on Raspberry pi. e.g., /home/pi/Documents
Use the Recognizer class from the Speech Recognition library to recognize spoken words and phrases.
if __name__ == "__main__":
# create a recognizer
recoginzer = sr.Recognizer()
mcrophone = sr.Microphone()
# start the bot
voice_bot(mcrophone,recoginzer)
Speech Recognition's AudioFile interface can be used to obtain the audio file, e.g., a file in wav format.
while True:
with microphone as source:
recognizer.adjust_for_ambient_noise(source)
print("Say something!")
audio = recognizer.listen(source)
try:
# convert audio to text using Whisper API
whisperresponse: str = getWhisperResponse(audio)
# check for wake up word
if "hello" in whisperresponse.lower():
Save the resulting audio file in a folder, e.g., /home/pi/Downloads.
def getWhisperResponse(audio):
with open("/home/pi/Downloads/microphone.wav", "wb") as f:
f.write(audio.get_wav_data())
file= open("/home/pi/Downloads/microphone.wav", "rb")
response = openai.Audio.transcribe(model="whisper-1", file=file)
os.remove("/home/satishjo/Downloads/microphone.wav")
return response
This will be used as input for Whisper API to convert the audio file into text format response.
# convert audio to text using Whisper API
whisperresponse: str = getWhisperResponse(audio)
The LlamaIndex converts your document data into a vectorized index for efficient query process. This index information is used to find the most relevant response based on the query data.
prompt_helper = PromptHelper(max_input_size, num_outputs, max_chunk_overlap, chunk_size_limit=chunk_size_limit)
llm_predictor = LLMPredictor(llm=ChatOpenAI(temperature=0.7, model_name="gpt-3.5-turbo", max_tokens=num_outputs))
The retrieved information from LlamIndex will be sent to the GPT prompt so that GPT would have the context for answering the question, which is from Whisper API, and provide a response.
documents = SimpleDirectoryReader("/home/pi/Documents").load_data() index = GPTSimpleVectorIndex(documents, llm_predictor=llm_predictor, prompt_helper=prompt_helper) index.save_to_disk('index.json') query_engine = GPTSimpleVectorIndex.load_from_disk('index.json') chatgpt_response = query_engine.query(whisperresponse, response_mode="compact")
Here is the complete code:
import os
import speech_recognition as sr
import requests
import pyttsx3
import openai
from gpt_index import SimpleDirectoryReader, GPTListIndex, GPTSimpleVectorIndex, LLMPredictor, PromptHelper
from langchain.chat_models import ChatOpenAI
import sys
os.environ["OPENAI_API_KEY"] = 'YOUR API KEY'
def getWhisperResponse(audio):
with open("/home/pi/Downloads/microphone.wav", "wb") as f:
f.write(audio.get_wav_data())
file= open("/home/pi/Downloads/microphone.wav", "rb")
response = openai.Audio.transcribe(model="whisper-1", file=file)
os.remove("/home/pi/Downloads/microphone.wav")
return response.text
def voice_bot(microphone: sr.Microphone,recognizer: sr.Recognizer):
OPENAI_API_KEY = 'YOUR API KEY'
openai.api_key = OPENAI_API_KEY
#instatiate speaker and set speaker rate
engine = pyttsx3.init()
engine.setProperty('rate', 150)
#set gender based voice
voices = engine.getProperty('voices')
engine.setProperty('voice', 'english+f4')
# start a loop for pinput
while True:
with microphone as source:
recognizer.adjust_for_ambient_noise(source, duration=0.5)
print("Say something!")
audio = recognizer.listen(source)
try:
# convert audio to text using Google speech API
whisperresponse: str = getWhisperResponse(audio)
# check for wake up word
if "hello" in whisperresponse.lower():
# create user
engine.say("Hi, Welcome")
max_input_size = 4096
num_outputs = 256
max_chunk_overlap = 20
chunk_size_limit = 600
prompt_helper = PromptHelper(max_input_size, num_outputs, max_chunk_overlap, chunk_size_limit=chunk_size_limit)
llm_predictor = LLMPredictor(llm=ChatOpenAI(temperature=0.7, model_name="gpt-3.5-turbo", max_tokens=num_outputs))
documents = SimpleDirectoryReader("/home/pi/Documents").load_data()
index = GPTSimpleVectorIndex(documents, llm_predictor=llm_predictor, prompt_helper=prompt_helper)
index.save_to_disk('index.json')
query_engine = GPTSimpleVectorIndex.load_from_disk('index.json')
chatgpt_response = query_engine.query(whisperresponse, response_mode="compact")
engine.say(chatgpt_response)
engine.runAndWait()
except sr.UnknownValueError:
print("Recognizer unknown error")
except sr.RequestError as e:
print(f"Request Error Speeck Recognizer {e}")
if __name__ == "__main__":
# create a recognizer
recoginzer = sr.Recognizer()
mcrophone = sr.Microphone()
# start the bot
voice_bot(mcrophone,recoginzer)
Comments
Post a Comment