ESP32 | INMP441 | Tutorial - [Part.5] Speech-to-Text Powered by Google Cloud machine learning

This commit is contained in:
Eric
2020-03-25 01:54:11 -07:00
parent a5b975ac8a
commit dca18e76a9
5 changed files with 379 additions and 0 deletions

View File

@@ -0,0 +1,284 @@
#include <driver/i2s.h>
#include <SPIFFS.h>
#include <WiFi.h>
#include <HTTPClient.h>
#define I2S_WS 15
#define I2S_SD 13
#define I2S_SCK 2
#define I2S_PORT I2S_NUM_0
#define I2S_SAMPLE_RATE (16000)
#define I2S_SAMPLE_BITS (16)
#define I2S_READ_LEN (16 * 1024)
#define RECORD_TIME (20) //Seconds
#define I2S_CHANNEL_NUM (1)
#define FLASH_RECORD_SIZE (I2S_CHANNEL_NUM * I2S_SAMPLE_RATE * I2S_SAMPLE_BITS / 8 * RECORD_TIME)
File file;
const char filename[] = "/recording.wav";
const int headerSize = 44;
bool isWIFIConnected;
void setup() {
// put your setup code here, to run once:
Serial.begin(115200);
SPIFFSInit();
i2sInit();
xTaskCreate(i2s_adc, "i2s_adc", 1024 * 2, NULL, 1, NULL);
delay(500);
xTaskCreate(wifiConnect, "wifi_Connect", 4096, NULL, 0, NULL);
}
void loop() {
// put your main code here, to run repeatedly:
}
void SPIFFSInit(){
if(!SPIFFS.begin(true)){
Serial.println("SPIFFS initialisation failed!");
while(1) yield();
}
SPIFFS.remove(filename);
file = SPIFFS.open(filename, FILE_WRITE);
if(!file){
Serial.println("File is not available!");
}
byte header[headerSize];
wavHeader(header, FLASH_RECORD_SIZE);
file.write(header, headerSize);
listSPIFFS();
}
void i2sInit(){
i2s_config_t i2s_config = {
.mode = (i2s_mode_t)(I2S_MODE_MASTER | I2S_MODE_RX),
.sample_rate = I2S_SAMPLE_RATE,
.bits_per_sample = i2s_bits_per_sample_t(I2S_SAMPLE_BITS),
.channel_format = I2S_CHANNEL_FMT_ONLY_LEFT,
.communication_format = i2s_comm_format_t(I2S_COMM_FORMAT_I2S | I2S_COMM_FORMAT_I2S_MSB),
.intr_alloc_flags = 0,
.dma_buf_count = 64,
.dma_buf_len = 1024,
.use_apll = 1
};
i2s_driver_install(I2S_PORT, &i2s_config, 0, NULL);
const i2s_pin_config_t pin_config = {
.bck_io_num = I2S_SCK,
.ws_io_num = I2S_WS,
.data_out_num = -1,
.data_in_num = I2S_SD
};
i2s_set_pin(I2S_PORT, &pin_config);
}
void i2s_adc_data_scale(uint8_t * d_buff, uint8_t* s_buff, uint32_t len)
{
uint32_t j = 0;
uint32_t dac_value = 0;
for (int i = 0; i < len; i += 2) {
dac_value = ((((uint16_t) (s_buff[i + 1] & 0xf) << 8) | ((s_buff[i + 0]))));
d_buff[j++] = 0;
d_buff[j++] = dac_value * 256 / 2048;
}
}
void i2s_adc(void *arg)
{
int i2s_read_len = I2S_READ_LEN;
int flash_wr_size = 0;
size_t bytes_read;
char* i2s_read_buff = (char*) calloc(i2s_read_len, sizeof(char));
uint8_t* flash_write_buff = (uint8_t*) calloc(i2s_read_len, sizeof(char));
i2s_read(I2S_PORT, (void*) i2s_read_buff, i2s_read_len, &bytes_read, portMAX_DELAY);
i2s_read(I2S_PORT, (void*) i2s_read_buff, i2s_read_len, &bytes_read, portMAX_DELAY);
Serial.println(" *** Recording Start *** ");
while (flash_wr_size < FLASH_RECORD_SIZE) {
//read data from I2S bus, in this case, from ADC.
i2s_read(I2S_PORT, (void*) i2s_read_buff, i2s_read_len, &bytes_read, portMAX_DELAY);
//example_disp_buf((uint8_t*) i2s_read_buff, 64);
//save original data from I2S(ADC) into flash.
i2s_adc_data_scale(flash_write_buff, (uint8_t*)i2s_read_buff, i2s_read_len);
file.write((const byte*) flash_write_buff, i2s_read_len);
flash_wr_size += i2s_read_len;
ets_printf("Sound recording %u%%\n", flash_wr_size * 100 / FLASH_RECORD_SIZE);
ets_printf("Never Used Stack Size: %u\n", uxTaskGetStackHighWaterMark(NULL));
}
file.close();
free(i2s_read_buff);
i2s_read_buff = NULL;
free(flash_write_buff);
flash_write_buff = NULL;
listSPIFFS();
if(isWIFIConnected){
uploadFile();
}
vTaskDelete(NULL);
}
void example_disp_buf(uint8_t* buf, int length)
{
printf("======\n");
for (int i = 0; i < length; i++) {
printf("%02x ", buf[i]);
if ((i + 1) % 8 == 0) {
printf("\n");
}
}
printf("======\n");
}
void wavHeader(byte* header, int wavSize){
header[0] = 'R';
header[1] = 'I';
header[2] = 'F';
header[3] = 'F';
unsigned int fileSize = wavSize + headerSize - 8;
header[4] = (byte)(fileSize & 0xFF);
header[5] = (byte)((fileSize >> 8) & 0xFF);
header[6] = (byte)((fileSize >> 16) & 0xFF);
header[7] = (byte)((fileSize >> 24) & 0xFF);
header[8] = 'W';
header[9] = 'A';
header[10] = 'V';
header[11] = 'E';
header[12] = 'f';
header[13] = 'm';
header[14] = 't';
header[15] = ' ';
header[16] = 0x10;
header[17] = 0x00;
header[18] = 0x00;
header[19] = 0x00;
header[20] = 0x01;
header[21] = 0x00;
header[22] = 0x01;
header[23] = 0x00;
header[24] = 0x80;
header[25] = 0x3E;
header[26] = 0x00;
header[27] = 0x00;
header[28] = 0x00;
header[29] = 0x7D;
header[30] = 0x01;
header[31] = 0x00;
header[32] = 0x02;
header[33] = 0x00;
header[34] = 0x10;
header[35] = 0x00;
header[36] = 'd';
header[37] = 'a';
header[38] = 't';
header[39] = 'a';
header[40] = (byte)(wavSize & 0xFF);
header[41] = (byte)((wavSize >> 8) & 0xFF);
header[42] = (byte)((wavSize >> 16) & 0xFF);
header[43] = (byte)((wavSize >> 24) & 0xFF);
}
void listSPIFFS(void) {
Serial.println(F("\r\nListing SPIFFS files:"));
static const char line[] PROGMEM = "=================================================";
Serial.println(FPSTR(line));
Serial.println(F(" File name Size"));
Serial.println(FPSTR(line));
fs::File root = SPIFFS.open("/");
if (!root) {
Serial.println(F("Failed to open directory"));
return;
}
if (!root.isDirectory()) {
Serial.println(F("Not a directory"));
return;
}
fs::File file = root.openNextFile();
while (file) {
if (file.isDirectory()) {
Serial.print("DIR : ");
String fileName = file.name();
Serial.print(fileName);
} else {
String fileName = file.name();
Serial.print(" " + fileName);
// File path can be 31 characters maximum in SPIFFS
int spaces = 33 - fileName.length(); // Tabulate nicely
if (spaces < 1) spaces = 1;
while (spaces--) Serial.print(" ");
String fileSize = (String) file.size();
spaces = 10 - fileSize.length(); // Tabulate nicely
if (spaces < 1) spaces = 1;
while (spaces--) Serial.print(" ");
Serial.println(fileSize + " bytes");
}
file = root.openNextFile();
}
Serial.println(FPSTR(line));
Serial.println();
delay(1000);
}
void wifiConnect(void *pvParameters){
isWIFIConnected = false;
char* ssid = "<YOUR_WIFI_SSID>";
char* password = "<YOUR_WIFI_PW>";
WiFi.begin(ssid, password);
while(WiFi.status() != WL_CONNECTED){
vTaskDelay(500);
Serial.print(".");
}
isWIFIConnected = true;
while(true){
vTaskDelay(1000);
}
}
void uploadFile(){
file = SPIFFS.open(filename, FILE_READ);
if(!file){
Serial.println("FILE IS NOT AVAILABLE!");
return;
}
Serial.println("===> Upload FILE to Node.js Server");
HTTPClient client;
client.begin("http://192.168.1.124:8888/uploadAudio");
client.addHeader("Content-Type", "audio/wav");
int httpResponseCode = client.sendRequest("POST", &file, file.size());
Serial.print("httpResponseCode : ");
Serial.println(httpResponseCode);
if(httpResponseCode == 200){
String response = client.getString();
Serial.println("==================== Transcription ====================");
Serial.println(response);
Serial.println("==================== End ====================");
}else{
Serial.println("Error");
}
file.close();
client.end();
}

View File

@@ -0,0 +1,60 @@
var fs = require("file-system");
const http = require("http");
const server = http.createServer();
const fileName = "./resources/recording.wav";
server.on("request", (request, response) => {
if (request.method == "POST" && request.url === "/uploadAudio") {
var recordingFile = fs.createWriteStream(fileName, { encoding: "utf8" });
request.on("data", function(data) {
recordingFile.write(data);
});
request.on("end", async function() {
recordingFile.end();
const transciption = await speechToTextAPI();
response.writeHead(200, { "Content-Type": "text/plain" });
response.end(transciption);
});
} else {
console.log("Error Check your POST request");
response.writeHead(405, { "Content-Type": "text/plain" });
}
});
async function speechToTextAPI() {
// Imports the Google Cloud client library
const speech = require("@google-cloud/speech");
const fs = require("fs");
// Creates a client
const client = new speech.SpeechClient();
// Reads a local audio file and converts it to base64
const file = fs.readFileSync(fileName);
const audioBytes = file.toString("base64");
// The audio file's encoding, sample rate in hertz, and BCP-47 language code
const audio = {
content: audioBytes
};
const config = {
encoding: "LINEAR16",
sampleRateHertz: 16000,
languageCode: "en-US"
};
const request = {
audio: audio,
config: config
};
// Detects speech in the audio file
const [response] = await client.recognize(request);
const transcription = response.results.map((result) => result.alternatives[0].transcript).join("\n");
console.log(`Transcription: ${transcription}`);
return transcription;
}
const port = 8888;
server.listen(port);
console.log(`Listening at ${port}`);

View File

@@ -0,0 +1,35 @@
async function main() {
// Imports the Google Cloud client library
const speech = require("@google-cloud/speech");
const fs = require("fs");
// Creates a client
const client = new speech.SpeechClient();
// The name of the audio file to transcribe
const fileName = "./resources/recording.wav";
// Reads a local audio file and converts it to base64
const file = fs.readFileSync(fileName);
const audioBytes = file.toString("base64");
// The audio file's encoding, sample rate in hertz, and BCP-47 language code
const audio = {
content: audioBytes
};
const config = {
encoding: "LINEAR16",
sampleRateHertz: 16000,
languageCode: "en-US"
};
const request = {
audio: audio,
config: config
};
// Detects speech in the audio file
const [response] = await client.recognize(request);
const transcription = response.results.map((result) => result.alternatives[0].transcript).join("\n");
console.log(`Transcription: ${transcription}`);
}
main().catch(console.error);