max){
@@ -268,8 +354,9 @@ private double[] bytesToDoubleArray(byte[] bufferData){
final int bytesRecorded = bufferData.length;
final int bytesPerSample = getAudioFormat().getSampleSizeInBits()/8;
final double amplification = 100.0; // choose a number as you like
- double[] micBufferData = new double[bytesRecorded - bytesPerSample +1];
- for (int index = 0, floatIndex = 0; index < bytesRecorded - bytesPerSample + 1; index += bytesPerSample, floatIndex++) {
+ int micBufferLength = bytesRecorded; // bytesRecorded - bytesPerSample +1
+ double[] micBufferData = new double[micBufferLength];
+ for (int index = 0, floatIndex = 0; index < micBufferLength; index += bytesPerSample, floatIndex++) {
double sample = 0;
for (int b = 0; b < bytesPerSample; b++) {
int v = bufferData[index + b];
@@ -284,5 +371,4 @@ private double[] bytesToDoubleArray(byte[] bufferData){
}
return micBufferData;
}
-
}
diff --git a/src/main/java/com/darkprograms/speech/recognizer/RecognitionResult.java b/src/main/java/com/darkprograms/speech/recognizer/RecognitionResult.java
new file mode 100644
index 0000000..134aaae
--- /dev/null
+++ b/src/main/java/com/darkprograms/speech/recognizer/RecognitionResult.java
@@ -0,0 +1,7 @@
+package com.darkprograms.speech.recognizer;
+
+public interface RecognitionResult {
+ /** @return String representation of what was said */
+ String getResponse();
+ boolean isFinalResponse();
+}
diff --git a/src/main/java/com/darkprograms/speech/recognizer/SpeechRecognizer.java b/src/main/java/com/darkprograms/speech/recognizer/SpeechRecognizer.java
new file mode 100644
index 0000000..67483f3
--- /dev/null
+++ b/src/main/java/com/darkprograms/speech/recognizer/SpeechRecognizer.java
@@ -0,0 +1,4 @@
+package com.darkprograms.speech.recognizer;
+
+public interface SpeechRecognizer {
+}
diff --git a/src/main/java/com/darkprograms/speech/recognizer/awslex/LexRecognizer.java b/src/main/java/com/darkprograms/speech/recognizer/awslex/LexRecognizer.java
new file mode 100644
index 0000000..dd2fc14
--- /dev/null
+++ b/src/main/java/com/darkprograms/speech/recognizer/awslex/LexRecognizer.java
@@ -0,0 +1,148 @@
+package com.darkprograms.speech.recognizer.awslex;
+
+import com.amazonaws.services.lexruntime.AmazonLexRuntime;
+import com.amazonaws.services.lexruntime.model.PostContentRequest;
+import com.amazonaws.services.lexruntime.model.PostContentResult;
+import com.darkprograms.speech.recognizer.RecognitionResult;
+import org.json.JSONObject;
+
+import javax.sound.sampled.AudioInputStream;
+import java.io.IOException;
+import java.util.Base64;
+import java.util.Map;
+
+/**
+ * example:
+ *
+ LexRecognizer lex = new LexRecognizer(AmazonLexRuntimeClientBuilder.defaultClient(), "MyLexBot", "PROD", "auser");
+ MicrophoneAnalyzer mic = new MicrophoneAnalyzer(null);
+ VoiceActivityDetector vad = new VoiceActivityDetector();
+
+ vad.detectVoiceActivity(mic, audioInputStream -> {
+ PostContentResult result = lex.getRecognizedDataForStream(audioInputStream, myApp.getSessionAttributes()).getResult();
+ System.out.println(result.message);
+ });
+ *
+ */
+public class LexRecognizer {
+ private AmazonLexRuntime lex;
+ private String botName;
+ private String botAlias;
+ private String userId;
+
+ public LexRecognizer(AmazonLexRuntime lex, String botName, String botAlias, String userId) {
+ this.lex = lex;
+ this.botName = botName;
+ this.botAlias = botAlias;
+ this.userId = userId;
+ }
+
+ public void setUserId(String userId) {
+ this.userId = userId;
+ }
+
+ public LexResponse getRecognizedDataForStream(AudioInputStream stream) {
+ return getRecognizedDataForStream(stream, (String)null);
+ }
+
+ /**
+ * @see #getRecognizedDataForStreamWithObjects(AudioInputStream, Map)
+ * @param stream
+ * @param sessionAttributes simple key:value attributes
+ * @return
+ */
+ public RecognitionResult getRecognizedDataForStream(AudioInputStream stream, Map sessionAttributes) {
+ String json;
+ if (sessionAttributes == null || sessionAttributes.isEmpty()) {
+ json = null;
+ } else {
+ StringBuilder str = null;
+
+ for (Map.Entry entry : sessionAttributes.entrySet()) {
+ String key = entry.getKey();
+ String value = entry.getValue();
+
+ if (str == null) {
+ str = new StringBuilder("{");
+ } else {
+ str.append(",");
+ }
+
+ str.append("\"").append(key).append("\":");
+ if (value == null) {
+ str.append("null");
+ } else {
+ str.append(JSONObject.quote(value));
+ }
+ }
+
+ json = str.append("}").toString();
+ }
+
+ return getRecognizedDataForStream(stream, new String(Base64.getEncoder().encode(json.getBytes())));
+ }
+
+ /**
+ * Each value of sesssionAttributes will be converted to a String containing JSON
+ * @param stream
+ * @param sessionAttributes
+ * @return
+ */
+ public RecognitionResult getRecognizedDataForStreamWithObjects(AudioInputStream stream, Map sessionAttributes) {
+ String json;
+ if (sessionAttributes == null || sessionAttributes.isEmpty()) {
+ json = null;
+ } else {
+ StringBuilder str = null;
+
+ for (Map.Entry entry : sessionAttributes.entrySet()) {
+ String key = entry.getKey();
+ Object value = entry.getValue();
+
+ if (str == null) {
+ str = new StringBuilder("{");
+ } else {
+ str.append(",");
+ }
+
+ str.append("\"").append(key).append("\":");
+// if (value == null) {
+// str.append("null");
+// } else if (value instanceof Number) {
+// str.append(JSONObject.numberToString((Number)value));
+// } else if (value instanceof Boolean) {
+// str.append(((Boolean)value).toString());
+// } else if (value instanceof String) {
+// str.append("\"").append((String)value).append("\"");
+// } else {
+ str.append(JSONObject.valueToString(value));
+// }
+ }
+
+ json = str.append("}").toString();
+ }
+
+ return getRecognizedDataForStream(stream, new String(Base64.getEncoder().encode(json.getBytes())));
+ }
+
+ /**
+ * @param stream
+ * @param sessionAttributes The value must be map (keys and values must be strings) that is JSON serialized and then base64 encoded
+ * @return
+ */
+ public LexResponse getRecognizedDataForStream(AudioInputStream stream, String sessionAttributes) {
+ PostContentRequest request = new PostContentRequest()
+ .withBotName(botName)
+ .withBotAlias(botAlias)
+ .withUserId(userId)
+ .withInputStream(stream)
+ .withContentType("audio/l16; rate=16000; channels=1")
+ .withSessionAttributes(sessionAttributes);
+
+// System.out.println("sending request to Lex: " + request);
+// try {System.out.println(">> " + stream.available());} catch (IOException e) {}
+
+ PostContentResult result = lex.postContent(request);
+ return new LexResponse(result);
+ }
+}
diff --git a/src/main/java/com/darkprograms/speech/recognizer/awslex/LexResponse.java b/src/main/java/com/darkprograms/speech/recognizer/awslex/LexResponse.java
new file mode 100644
index 0000000..f987095
--- /dev/null
+++ b/src/main/java/com/darkprograms/speech/recognizer/awslex/LexResponse.java
@@ -0,0 +1,38 @@
+package com.darkprograms.speech.recognizer.awslex;
+
+import com.amazonaws.services.lexruntime.model.PostContentResult;
+import com.darkprograms.speech.recognizer.RecognitionResult;
+
+public class LexResponse implements RecognitionResult {
+ private PostContentResult result;
+// private String response;
+
+ public LexResponse(PostContentResult result) {
+ this.result = result;
+// this.response = result.getInputTranscript();
+
+ // Close - Fulfilled or Failed (ReadyForFulfillment?)
+ // Incomplete - ElicitIntent, ConfirmIntent, ElicitSlot
+// result.getDialogState();
+// result.getIntentName();
+// result.getMessage();
+// result.getSessionAttributes();
+// result.getSlots();
+// result.getSlotToElicit();
+// No card?!!!
+ }
+
+ public String getResponse() {
+ return result.getInputTranscript();
+// return null;
+ }
+
+ public boolean isFinalResponse() {
+ String state = result.getDialogState();
+ return "Fulfilled".equals(state) || "Failed".equals(state);
+ }
+
+ public PostContentResult getResult() {
+ return result;
+ }
+}
diff --git a/src/main/java/com/darkprograms/speech/recognizer/GSpeechDuplex.java b/src/main/java/com/darkprograms/speech/recognizer/google/GSpeechDuplex.java
similarity index 99%
rename from src/main/java/com/darkprograms/speech/recognizer/GSpeechDuplex.java
rename to src/main/java/com/darkprograms/speech/recognizer/google/GSpeechDuplex.java
index a66e844..ed4421d 100644
--- a/src/main/java/com/darkprograms/speech/recognizer/GSpeechDuplex.java
+++ b/src/main/java/com/darkprograms/speech/recognizer/google/GSpeechDuplex.java
@@ -1,4 +1,4 @@
-package com.darkprograms.speech.recognizer;
+package com.darkprograms.speech.recognizer.google;
import java.io.File;
import java.io.IOException;
@@ -184,7 +184,7 @@ public void recognize(TargetDataLine tl, AudioFormat af) throws IOException, Lin
/**
* This code opens a new Thread that connects to the downstream URL. Due to threading,
* the best way to handle this is through the use of listeners.
- * @param The URL you want to connect to.
+ * @param urlStr The URL you want to connect to.
*/
private Thread downChannel(String urlStr) {
final String url = urlStr;
diff --git a/src/main/java/com/darkprograms/speech/recognizer/GSpeechResponseListener.java b/src/main/java/com/darkprograms/speech/recognizer/google/GSpeechResponseListener.java
similarity index 75%
rename from src/main/java/com/darkprograms/speech/recognizer/GSpeechResponseListener.java
rename to src/main/java/com/darkprograms/speech/recognizer/google/GSpeechResponseListener.java
index dcbbf2a..aca0228 100644
--- a/src/main/java/com/darkprograms/speech/recognizer/GSpeechResponseListener.java
+++ b/src/main/java/com/darkprograms/speech/recognizer/google/GSpeechResponseListener.java
@@ -1,4 +1,5 @@
-package com.darkprograms.speech.recognizer;
+package com.darkprograms.speech.recognizer.google
+ ;
/**
* Response listeners for URL connections.
diff --git a/src/main/java/com/darkprograms/speech/recognizer/GoogleResponse.java b/src/main/java/com/darkprograms/speech/recognizer/google/GoogleResponse.java
similarity index 92%
rename from src/main/java/com/darkprograms/speech/recognizer/GoogleResponse.java
rename to src/main/java/com/darkprograms/speech/recognizer/google/GoogleResponse.java
index 73a86f4..666dadc 100644
--- a/src/main/java/com/darkprograms/speech/recognizer/GoogleResponse.java
+++ b/src/main/java/com/darkprograms/speech/recognizer/google/GoogleResponse.java
@@ -1,4 +1,6 @@
-package com.darkprograms.speech.recognizer;
+package com.darkprograms.speech.recognizer.google;
+
+import com.darkprograms.speech.recognizer.RecognitionResult;
import java.util.ArrayList;
import java.util.List;
@@ -8,7 +10,7 @@
*
* @author Luke Kuza, Duncan Jauncey, Aaron Gokaslan
******************************************************************************/
-public class GoogleResponse {
+public class GoogleResponse implements RecognitionResult {
/**
* Variable that holds the response
diff --git a/src/main/java/com/darkprograms/speech/recognizer/Recognizer.java b/src/main/java/com/darkprograms/speech/recognizer/google/Recognizer.java
similarity index 98%
rename from src/main/java/com/darkprograms/speech/recognizer/Recognizer.java
rename to src/main/java/com/darkprograms/speech/recognizer/google/Recognizer.java
index cac98a3..431b724 100644
--- a/src/main/java/com/darkprograms/speech/recognizer/Recognizer.java
+++ b/src/main/java/com/darkprograms/speech/recognizer/google/Recognizer.java
@@ -1,4 +1,4 @@
-package com.darkprograms.speech.recognizer;
+package com.darkprograms.speech.recognizer.google;
import java.util.*;
import java.io.*;
@@ -6,14 +6,17 @@
import java.net.URLConnection;
import java.nio.charset.Charset;
+import com.darkprograms.speech.recognizer.SpeechRecognizer;
import org.json.*;
+import com.darkprograms.speech.encoding.FlacEncoder;
+
/***************************************************************
* Class that submits FLAC audio and retrieves recognized text
*
* @author Luke Kuza, Duncan Jauncey, Aaron Gokaslan
**************************************************************/
-public class Recognizer {
+public class Recognizer implements SpeechRecognizer {
public enum Languages{
AUTO_DETECT("auto"),//tells Google to auto-detect the language
diff --git a/src/main/java/com/darkprograms/speech/recognizer/RecognizerChunked.java b/src/main/java/com/darkprograms/speech/recognizer/google/RecognizerChunked.java
similarity index 99%
rename from src/main/java/com/darkprograms/speech/recognizer/RecognizerChunked.java
rename to src/main/java/com/darkprograms/speech/recognizer/google/RecognizerChunked.java
index 160b395..62614df 100644
--- a/src/main/java/com/darkprograms/speech/recognizer/RecognizerChunked.java
+++ b/src/main/java/com/darkprograms/speech/recognizer/google/RecognizerChunked.java
@@ -1,4 +1,4 @@
-package com.darkprograms.speech.recognizer;
+package com.darkprograms.speech.recognizer.google;
import java.io.BufferedReader;
diff --git a/src/main/java/com/darkprograms/speech/recognizer/vad/AbstractVAD.java b/src/main/java/com/darkprograms/speech/recognizer/vad/AbstractVAD.java
new file mode 100644
index 0000000..f2ddb2a
--- /dev/null
+++ b/src/main/java/com/darkprograms/speech/recognizer/vad/AbstractVAD.java
@@ -0,0 +1,177 @@
+package com.darkprograms.speech.recognizer.vad;
+
+import com.darkprograms.speech.microphone.Microphone;
+import com.darkprograms.speech.microphone.MicrophoneAnalyzer;
+
+import javax.sound.sampled.AudioInputStream;
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+
+
+public abstract class AbstractVAD implements VoiceActivityDetector, Runnable {
+ private static final int WINDOW_MILLIS = 16;
+ private static final int IGNORE_SILENCE_WINDOWS = 10;
+ private static final int IGNORE_SPEECH_WINDOWS = 5;
+ /** maximum ms between words */
+ private static final int MAX_SILENCE_MILLIS = 4;
+ /** minimum duration of speech to recognise */
+ private static final int MIN_SPEECH_MILLIS = 200;
+ private static final double WINDOW_SECONDS = (double)WINDOW_MILLIS / 1000;
+ /** Google does not allow recordings over 1 minute, but 10 seconds should be ample */
+ private static final int MAX_SPEECH_MILLIS = 10_000;
+ private static final int MAX_SILENCE_WINDOWS = MAX_SILENCE_MILLIS / WINDOW_MILLIS;
+ private static final int MIN_SPEECH_WINDOWS = MIN_SPEECH_MILLIS / WINDOW_MILLIS;
+
+ protected AudioInputStream audio;
+ MicrophoneAnalyzer mic;
+ private VoiceActivityListener listener;
+ private VadState state;
+ private Thread thread;
+
+ private int maxSpeechMs;
+ private int maxSpeechWindows;
+ int silenceCount;
+ private int speechCount;
+
+ private int offset;
+ private int bufferSize;
+ private ByteArrayOutputStream outBuffer;
+
+ // TODO: optionally provide PipedInputStream to support streaming recogntion on Google
+ public void detectVoiceActivity(MicrophoneAnalyzer mic, VoiceActivityListener listener) {
+ detectVoiceActivity(mic, MAX_SPEECH_MILLIS, listener);
+ }
+
+ /** Initialise the VAD and start a thread */
+ @Override
+ public void detectVoiceActivity(MicrophoneAnalyzer mic, int maxSpeechMs, VoiceActivityListener listener) {
+ this.listener = listener;
+ this.maxSpeechMs = maxSpeechMs;
+ maxSpeechWindows = maxSpeechMs / WINDOW_MILLIS;
+
+ if (this.mic != null) {
+ if (this.mic == mic) {
+ // re-open the same mic
+ if (mic.getState() == Microphone.CaptureState.CLOSED) {
+ mic.open();
+ }
+ return;
+ } else {
+ // swap mics
+ this.audio = mic.captureAudioToStream();
+ this.mic.close();
+ }
+ } else {
+ this.audio = mic.captureAudioToStream();
+ }
+
+ this.mic = mic;
+ }
+
+ @Override
+ public void setVoiceActivityListener(VoiceActivityListener listener) {
+ this.listener = listener;
+ }
+
+ @Override
+ public void start() {
+ thread = new Thread(this, "JARVIS-VAD");
+ thread.start();
+ }
+
+ @Override
+ public void terminate() {
+// state = VadState.CLOSED;
+ thread.interrupt();
+ }
+
+ /**
+ * Continuously reads "windows" of audio into a buffer and delegates to {@link #sampleForSpeech(byte[])}
+ * and {@link #incrementSpeechCounter(boolean, int, byte[])}.
+ * {@link #emitVoiceActivity(ByteArrayOutputStream)} will be called when an utterance has been captured.
+ */
+ @Override
+ public void run() {
+ int bytesToRead = mic.getNumOfBytes(WINDOW_SECONDS);
+ byte[] audioData = new byte[bytesToRead];
+ bufferSize = maxSpeechMs * this.mic.getNumOfBytes(0.001);
+ silenceCount = 0;
+ speechCount = 0;
+ offset = 0;
+ outBuffer = new ByteArrayOutputStream(bufferSize);
+
+ state = VoiceActivityDetector.VadState.LISTENING;
+
+ while (state != VadState.CLOSED) {
+ try {
+ int bytesRead = this.audio.read(audioData, 0, bytesToRead);
+ boolean speechDetected = sampleForSpeech(audioData);
+ incrementSpeechCounter(speechDetected, bytesRead, audioData);
+ } catch (Exception e) {
+ e.printStackTrace();
+ state = VadState.CLOSED;
+ return;
+ }
+ }
+ }
+
+ /**
+ * Executed from within the VAD thread
+ * @param audioData
+ * @return
+ */
+ protected abstract boolean sampleForSpeech(byte[] audioData);
+
+ protected void incrementSpeechCounter(boolean speechDetected, int bytesRead, byte[] audioData) {
+ if (speechDetected) {
+ speechCount++;
+ // Ignore speech runs less than 5 successive frames.
+ if (state != VoiceActivityDetector.VadState.DETECTED_SPEECH && speechCount >= IGNORE_SPEECH_WINDOWS) {
+ state = VoiceActivityDetector.VadState.DETECTED_SPEECH;
+ silenceCount = 0;
+ }
+
+ if (offset + bytesRead < bufferSize) {
+ outBuffer.write(audioData, 0, bytesRead);
+ offset += bytesRead;
+
+ if (speechCount >= maxSpeechWindows) {
+ System.out.println("in theory, this should be handled by the following end of buffer handler");
+ emitVoiceActivity(outBuffer);
+ }
+ } else {
+ System.out.println("Reached the end of the buffer! Send what we've captured so far");
+ bytesRead = bufferSize - offset;
+ outBuffer.write(audioData, 0, bytesRead);
+ emitVoiceActivity(outBuffer);
+ }
+ } else {
+ // silence
+ silenceCount++;
+
+ // Ignore silence runs less than 10 successive frames.
+ if (state == VoiceActivityDetector.VadState.DETECTED_SPEECH && silenceCount >= IGNORE_SILENCE_WINDOWS) {
+ if (silenceCount >= MAX_SILENCE_WINDOWS && speechCount >= MIN_SPEECH_WINDOWS) {
+ System.out.println("We have silence after a chunk of speech worth processing");
+ emitVoiceActivity(outBuffer);
+ } else {
+ state = VoiceActivityDetector.VadState.DETECTED_SILENCE_AFTER_SPEECH;
+ }
+
+ speechCount = 0;
+ }
+ }
+ }
+
+ protected void emitVoiceActivity(ByteArrayOutputStream outBuffer) {
+ listener.onVoiceActivity(createVoiceActivityStream(outBuffer));
+ outBuffer.reset();
+ offset = 0;
+ state = VadState.LISTENING;
+ }
+
+ protected AudioInputStream createVoiceActivityStream(ByteArrayOutputStream outBuffer) {
+ System.out.println("speech: " + mic.getAudioFormat().getFrameSize() * mic.getNumOfFrames(outBuffer.size()));
+ return new AudioInputStream(new ByteArrayInputStream(outBuffer.toByteArray()), audio.getFormat(), mic.getNumOfFrames(outBuffer.size()));
+ }
+}
diff --git a/src/main/java/com/darkprograms/speech/recognizer/vad/MoattarHomayounpourVAD.java b/src/main/java/com/darkprograms/speech/recognizer/vad/MoattarHomayounpourVAD.java
new file mode 100644
index 0000000..1f6a3bd
--- /dev/null
+++ b/src/main/java/com/darkprograms/speech/recognizer/vad/MoattarHomayounpourVAD.java
@@ -0,0 +1,59 @@
+package com.darkprograms.speech.recognizer.vad;
+
+/**
+ * Implementation of [https://www.researchgate.net/publication/255667085_A_simple_but_efficient_real-time_voice_activity_detection_algorithm]
+ *
+ * !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+ * !! WARNING - this is not working correctly !!
+ * !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+ *
+ * TODO: need to calculate Spectral Flatness Measure
+ */
+public class MoattarHomayounpourVAD extends AbstractVAD {
+ private static final int ENERGY_PRIMARY_THRESHOLD = 40;
+ private static final int FREQUENCY_PRIMARY_THRESHOLD = 185;
+ private static final int SPECTRAL_FLATNESS_PRIMARY_THRESHOLD = 5;
+
+ private int minEnergy = Integer.MAX_VALUE;
+ private int minFrequency = Integer.MAX_VALUE;
+ private int minSpectralFlatness = Integer.MAX_VALUE;
+
+ @Override
+ public void run() {
+ minEnergy = Integer.MAX_VALUE;
+ minFrequency = Integer.MAX_VALUE;
+ minSpectralFlatness = Integer.MAX_VALUE;
+ super.run();
+ }
+
+ @Override
+ protected boolean sampleForSpeech(byte[] audioData) {
+ int counter = 0;
+ int energy = mic.calculateRMSLevel(audioData);
+ int frequency = mic.getFrequency(audioData);
+
+ // ignore frequencies above 400hz (and below 50Hz?)
+ if (frequency < 400) {
+ // 3-2-2- Compute the abstract value of Spectral Flatness Measure SFM(i)
+// TODO https://github.com/filipeuva/SoundBites/blob/master/src/uk/co/biogen/SoundBites/analysis/AnalysisInterface.java#L264
+
+ // 3-3- Supposing that some of the first 30 frames are silence, find the minimum value for E, F & SF
+ minEnergy = Math.min(minEnergy, energy);
+ minFrequency = Math.min(minFrequency, frequency);
+// minSpectralFlatness = Math.min(minSpectralFlatness, energy);
+
+ double energyThreshold = ENERGY_PRIMARY_THRESHOLD * Math.log(minEnergy);
+ System.out.println("energy: " + energy + "\tfrequency:" + frequency);
+ if (energy - minEnergy >= energyThreshold) counter++;
+ if (frequency - minFrequency >= FREQUENCY_PRIMARY_THRESHOLD) counter++;
+// if (sfm - minSpectralFlatness) >= SPECTRAL_FLATNESS_PRIMARY_THRESHOLD) counter++;
+ }
+
+ if(counter > 1) {
+ return true;
+ } else {
+ minEnergy = ((silenceCount * minEnergy) + energy) / (silenceCount + 1);
+ return false;
+ }
+ }
+}
diff --git a/src/main/java/com/darkprograms/speech/recognizer/vad/RecordingListener.java b/src/main/java/com/darkprograms/speech/recognizer/vad/RecordingListener.java
new file mode 100644
index 0000000..5eccaf5
--- /dev/null
+++ b/src/main/java/com/darkprograms/speech/recognizer/vad/RecordingListener.java
@@ -0,0 +1,41 @@
+package com.darkprograms.speech.recognizer.vad;
+
+import javax.sound.sampled.AudioFileFormat;
+import javax.sound.sampled.AudioInputStream;
+import javax.sound.sampled.AudioSystem;
+import java.io.File;
+import java.io.IOException;
+import java.util.Date;
+
+/**
+ * Useful for debugging & testing microphone
+ */
+public class RecordingListener implements VoiceActivityListener {
+ private VoiceActivityListener nextListener;
+
+ @Override
+ public void onVoiceActivity(AudioInputStream audioInputStream) {
+ String fileName = new Date().toString() + ".wav";
+ File out = new File("/tmp", fileName);
+
+ try {
+ System.out.println("Saving recoring to " + out.getAbsolutePath());
+ AudioSystem.write(audioInputStream, AudioFileFormat.Type.WAVE, out);
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+
+ if (nextListener != null) {
+ nextListener.onVoiceActivity(audioInputStream);
+ }
+ }
+
+ public RecordingListener withNextListener(VoiceActivityListener nextListener) {
+ this.nextListener = nextListener;
+ return this;
+ }
+
+ public void setNextListener(VoiceActivityListener nextListener) {
+ this.nextListener = nextListener;
+ }
+}
diff --git a/src/main/java/com/darkprograms/speech/recognizer/vad/SimpleVAD.java b/src/main/java/com/darkprograms/speech/recognizer/vad/SimpleVAD.java
new file mode 100644
index 0000000..299f53a
--- /dev/null
+++ b/src/main/java/com/darkprograms/speech/recognizer/vad/SimpleVAD.java
@@ -0,0 +1,37 @@
+package com.darkprograms.speech.recognizer.vad;
+
+/**
+ * Adapted from https://stackoverflow.com/questions/18815235/can-i-use-google-speech-recognition-api-in-my-desktop-application
+ */
+public class SimpleVAD extends AbstractVAD {
+ private int threshold = 10;
+ private int ambientVolume;
+ private int speakingVolume;
+ private boolean speaking;
+
+ public void setThreshold(int threshold) {
+ this.threshold = threshold;
+ }
+
+ @Override
+ public void run() {
+ speakingVolume = -2;
+ speaking = false;
+ ambientVolume = mic.getAudioVolume();
+ super.run();
+ }
+
+ @Override
+ protected boolean sampleForSpeech(byte[] audioData) {
+ int volume = mic.calculateRMSLevel(audioData);
+//System.out.println(volume);
+ if (volume > ambientVolume + threshold) {
+ speakingVolume = volume;
+ speaking = true;
+ }
+ if (speaking && volume + threshold < speakingVolume) {
+ speaking = false;
+ }
+ return speaking;
+ }
+}
diff --git a/src/main/java/com/darkprograms/speech/recognizer/vad/VoiceActivityDetector.java b/src/main/java/com/darkprograms/speech/recognizer/vad/VoiceActivityDetector.java
new file mode 100644
index 0000000..6496307
--- /dev/null
+++ b/src/main/java/com/darkprograms/speech/recognizer/vad/VoiceActivityDetector.java
@@ -0,0 +1,29 @@
+package com.darkprograms.speech.recognizer.vad;
+
+import com.darkprograms.speech.microphone.MicrophoneAnalyzer;
+import com.darkprograms.speech.util.FFT;
+
+import javax.sound.sampled.AudioInputStream;
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+
+/**
+ * @see [https://github.com/Sciss/SpeechRecognitionHMM/blob/master/src/main/java/org/ioe/tprsa/audio/preProcessings/EndPointDetection.java]
+ */
+public interface VoiceActivityDetector {
+ enum VadState {
+ LISTENING,
+ DETECTED_SPEECH,
+ DETECTED_SILENCE_AFTER_SPEECH,
+ CLOSED
+ }
+
+ void start();
+ void terminate();
+
+ // TODO: optionally provide PipedInputStream to support streaming recognition on Google
+ void detectVoiceActivity(MicrophoneAnalyzer mic, VoiceActivityListener listener);
+ void detectVoiceActivity(MicrophoneAnalyzer mic, int maxSpeechMs, VoiceActivityListener listener);
+ void setVoiceActivityListener(VoiceActivityListener listener);
+}
diff --git a/src/main/java/com/darkprograms/speech/recognizer/vad/VoiceActivityListener.java b/src/main/java/com/darkprograms/speech/recognizer/vad/VoiceActivityListener.java
new file mode 100644
index 0000000..ebd1fe9
--- /dev/null
+++ b/src/main/java/com/darkprograms/speech/recognizer/vad/VoiceActivityListener.java
@@ -0,0 +1,7 @@
+package com.darkprograms.speech.recognizer.vad;
+
+import javax.sound.sampled.AudioInputStream;
+
+public interface VoiceActivityListener {
+ void onVoiceActivity(AudioInputStream audioInputStream);
+}