OpenAI provides API for ChatGPT and Whisper models that would enable developers to access cutting-edge language and speech to text capabilities.
This article shows how to use the ChatGPT and Whisper APIs from OpenAI along with Speech Recognition and text to Text-to-Speech libraries to build a voice-enabled chatbot. The installation instructions required Chat GPT and Whisper etc., libraries are provided here .
First Connect USB Microphone and USB Speaker to Raspberry pi.
The Chat GPT library needs to be configured with an account's secret key which is available on the website. Set the api key OPENAI_API_KEY.
OPENAI_API_KEY = 'Your API Key Here'
openai.api_key = OPENAI_API_KEY
Use the Recognizer class from the Speech Recognition library to recognize spoken words and phrases.
if __name__ == "__main__":
# create a recognizer
recoginzer = sr.Recognizer()
mcrophone = sr.Microphone()
# start the bot
voice_bot(mcrophone,recoginzer)
Speech Recognition's AudioFile interface can be used to obtain the audio file, e.g., a file in wav format.
while True:
with microphone as source:
recognizer.adjust_for_ambient_noise(source)
print("Say something!")
audio = recognizer.listen(source)
try:
# convert audio to text using Whisper API
whisperresponse: str = getWhisperResponse(audio)
# check for wake up word
if "hello" in whisperresponse.lower():
Save the resulting audio file in a folder, e.g., /home/pi/Downloads.
def getWhisperResponse(audio):
with open("/home/pi/Downloads/microphone.wav", "wb") as f:
f.write(audio.get_wav_data())
file= open("/home/pi/Downloads/microphone.wav", "rb")
response = openai.Audio.transcribe(model="whisper-1", file=file)
os.remove("/home/satishjo/Downloads/microphone.wav")
return response
This will be used as input for Whisper API to convert the audio file into text format.
# convert audio to text using Whisper API
whisperresponse: str = getWhisperResponse(audio)
Send ChatGPT API request using a model, e.g.,
gpt-3.5-turbo to get response using whisperapi's response as input.
messages = [ {"role": "system", "content": whisperresponse} ]
chat = openai.ChatCompletion.create(model="gpt-3.5-turbo", messages=messages)
chatgpt_response = chat.choices[0].message.content
Convert the ChatGPT's response into voice using text-to-speech library pystttx3.
#instatiate speaker and set speaker rate
engine = pyttsx3.init()
engine.setProperty('rate', 150)
#set gender based voice
voices = engine.getProperty('voices')
engine.setProperty('voice', 'english+f4')
engine.say(chatgpt_response)
engine.runAndWait()
Here is the complete code:
#!/usr/bin/env python3
import os
import speech_recognition as sr
import requests
import pyttsx3
import openai
from gpt_index import SimpleDirectoryReader, GPTListIndex, GPTSimpleVectorIndex, LLMPredictor, PromptHelper
from langchain.chat_models import ChatOpenAI
import sys
def getWhisperResponse(audio):
with open("/home/pi/Downloads/microphone.wav", "wb") as f:
f.write(audio.get_wav_data())
file= open("/home/pi/Downloads/microphone.wav", "rb")
response = openai.Audio.transcribe(model="whisper-1", file=file)
os.remove("/home/satishjo/Downloads/microphone.wav")
return response
def voice_bot(microphone: sr.Microphone,recognizer: sr.Recognizer):
OPENAI_API_KEY = 'Your API Key Here'
openai.api_key = OPENAI_API_KEY
#instatiate speaker and set speaker rate
engine = pyttsx3.init()
engine.setProperty('rate', 150)
#set gender based voice
voices = engine.getProperty('voices')
engine.setProperty('voice', 'english+f4')
# start a loop for input
while True:
with microphone as source:
recognizer.adjust_for_ambient_noise(source)
print("Say something!")
audio = recognizer.listen(source)
try:
# convert audio to text using Whisper API
whisperresponse: str = getWhisperResponse(audio)
# check for wake up word
if "hello" in whisperresponse.lower():
# create user
engine.say("Hi, Welcome")
engine.runAndWait()
messages = [ {"role": "system", "content": whisperresponse} ]
chat = openai.ChatCompletion.create(model="gpt-3.5-turbo", messages=messages)
chatgpt_response = chat.choices[0].message.content
engine.say(chatgpt_response)
engine.runAndWait()
except sr.UnknownValueError:
print("Recognizer unknown error")
except sr.RequestError as e:
print(f"Request Error Speeck Recognizer {e}")
if __name__ == "__main__":
# create a recognizer
recoginzer = sr.Recognizer()
mcrophone = sr.Microphone()
# start the bot
voice_bot(mcrophone,recoginzer)
Comments
Post a Comment