1818############################################################################
1919
2020import os
21- import time
22- from sys import byteorder
2321from array import array
24- from struct import pack
22+ import time
2523import logging
26-
27- import pyaudio
2824import wave
2925import audioop
30- import logging
26+ import pyaudio
3127
3228try :
3329 from pocketsphinx .pocketsphinx import Decoder
3430 from sphinxbase .sphinxbase import *
35- except :
31+ except Exception :
3632 logging .info ("pocketsphinx not available" )
3733
38- ## GOOGLE Speech API ##
39- # [START import_libraries]
40- #from __future__ import division
41-
42- import re
43- import sys
44-
45- #from google.cloud import speech
46- #from google.cloud.speech import enums
47- #from google.cloud.speech import types
48- import pyaudio
4934from six .moves import queue
5035# [END import_libraries]
5136
@@ -76,7 +61,7 @@ def __init__(self):
7661 # frames_per_buffer=CHUNK)
7762 #self.stream_in.start_stream()
7863 self .stream_in = self .MicrophoneStream (FORMAT , RATE , CHUNK )
79- except Exception as e :
64+ except Exception :
8065 logging .info ("Audio: input stream not available" )
8166
8267 #self._google_speech_client = speech.SpeechClient()
@@ -89,36 +74,32 @@ def exit(self):
8974
9075 def say (self , what , locale = 'en' ):
9176 if what and "$" in what :
92- os .system ('omxplayer sounds/' + what [1 :])
93- elif what and len ( what ) :
94- os .system ('espeak --stdout -v' + locale + ' -p 90 -a 200 -s 150 -g 10 "' + what + '" 2>>/dev/null | aplay -D hw:1,0' )
77+ os .system ('omxplayer sounds/' + what [1 :])
78+ elif what and what :
79+ os .system ('espeak --stdout -v' + locale + ' -p 90 -a 200 -s 150 -g 10 "' + what + '" 2>>/dev/null | aplay -D hw:1,0' )
9580
9681 def normalize (self , snd_data ):
9782 "Average the volume out"
9883 MAXIMUM = 16384
99- times = float (MAXIMUM )/ max (abs (i ) for i in snd_data )
84+ times = float (MAXIMUM ) / max (abs (i ) for i in snd_data )
10085
10186 r = array ('h' , snd_data )
10287 c = 0
10388 for i in snd_data :
10489 r [c ] = int (i * times )
105- c += 1
90+ c += 1
10691 return r
10792
10893 def record (self , elapse ):
109- num_silent = 0
110- snd_started = False
111- c = 0
112-
11394 r = bytearray ()
114-
11595 t = time .time ()
11696 with self .stream_in as stream :
11797 audio_generator = stream .generator ()
11898 for content in audio_generator :
11999 r .extend (content )
120100 if time .time () - t >= elapse :
121101 return r
102+ return r
122103
123104 def record_to_file (self , filename , elapse ):
124105 data = self .record (elapse )
@@ -132,7 +113,7 @@ def record_to_file(self, filename, elapse):
132113 wf .close ()
133114
134115 def play (self , filename ):
135- os .system ('omxplayer sounds/' + filename )
116+ os .system ('omxplayer sounds/' + filename )
136117
137118 """
138119 # open the file for reading.
@@ -160,8 +141,6 @@ def play(self, filename):
160141 """
161142
162143 def hear (self , level , elapse = 1.0 ):
163- ts_total = time .time ()
164-
165144 t = time .time ()
166145 with self .stream_in as stream :
167146 audio_generator = stream .generator ()
@@ -171,9 +150,9 @@ def hear(self, level, elapse=1.0):
171150 return True
172151 if time .time () - t >= elapse :
173152 return False
153+ return False
174154
175155 def speech_recog (self , model ):
176-
177156 # Create a decoder with certain model
178157 config = Decoder .default_config ()
179158 config .set_string ('-hmm' , '/usr/local/share/pocketsphinx/model/en-us/en-us' )
@@ -186,7 +165,6 @@ def speech_recog(self, model):
186165 decoder = Decoder (config )
187166
188167 decoder .start_utt ()
189- tstamp = time .time ()
190168 recog_text = ''
191169
192170 with self .stream_in as stream :
@@ -195,38 +173,11 @@ def speech_recog(self, model):
195173 decoder .process_raw (content , False , False )
196174 if decoder .hyp () and decoder .hyp ().hypstr != '' :
197175 recog_text += decoder .hyp ().hypstr
198- tstamp = time .time ()
199176 if len (recog_text ) > 1 :
200177 decoder .end_utt ()
201- logging .info ("recog text: " + recog_text )
178+ logging .info ("recog text: %s" , recog_text )
202179 return recog_text
203-
204- # def speech_recog_google(self, locale):
205- # config = types.RecognitionConfig(
206- # encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
207- # sample_rate_hertz=RATE,
208- # language_code=locale)
209- # streaming_config = types.StreamingRecognitionConfig(
210- # config=config,
211- # interim_results=False,
212- # single_utterance=True)
213- #
214- # t1 = time.time()
215- # with self.stream_in as stream:
216- # audio_generator = stream.generator()
217- # requests = (types.StreamingRecognizeRequest(audio_content=content)
218- # for content in audio_generator)
219- #
220- # responses = self._google_speech_client.streaming_recognize(streaming_config, requests)
221-
222- # Now, put the transcription responses to use.
223- # for response in responses:
224- # if time.time() - t1 > 10:
225- # return ""
226- # if response.results:
227- # result = response.results[0]
228- # if result.is_final:
229- # return result.alternatives[0].transcript
180+ return recog_text
230181
231182 class MicrophoneStream (object ):
232183 """Opens a recording stream as a generator yielding the audio chunks."""
@@ -244,21 +195,21 @@ def __enter__(self):
244195 self ._audio_interface = pyaudio .PyAudio ()
245196 self ._buff = queue .Queue ()
246197 self ._audio_stream = self ._audio_interface .open (
247- format = self ._format ,
248- # The API currently only supports 1-channel (mono) audio
249- # https://goo.gl/z757pE
250- channels = 1 , rate = self ._rate ,
251- input = True , frames_per_buffer = self ._chunk ,
252- # Run the audio stream asynchronously to fill the buffer object.
253- # This is necessary so that the input device's buffer doesn't
254- # overflow while the calling thread makes network requests, etc.
255- stream_callback = self ._fill_buffer ,
198+ format = self ._format ,
199+ # The API currently only supports 1-channel (mono) audio
200+ # https://goo.gl/z757pE
201+ channels = 1 , rate = self ._rate ,
202+ input = True , frames_per_buffer = self ._chunk ,
203+ # Run the audio stream asynchronously to fill the buffer object.
204+ # This is necessary so that the input device's buffer doesn't
205+ # overflow while the calling thread makes network requests, etc.
206+ stream_callback = self ._fill_buffer ,
256207 )
257208 self .closed = False
258209
259210 return self
260211
261- def __exit__ (self , type , value , traceback ):
212+ def __exit__ (self , atype , value , traceback ):
262213 self ._audio_stream .stop_stream ()
263214 self ._audio_stream .close ()
264215 self ._audio_interface .terminate ()
0 commit comments