mirror of
https://github.com/0015/ThatProject.git
synced 2026-01-12 09:17:42 +03:00
ESP32 | INMP441 | Tutorial - [Part.5] Speech-to-Text Powered by Google Cloud machine learning
This commit is contained in:
@@ -0,0 +1,284 @@
|
|||||||
|
#include <driver/i2s.h>
|
||||||
|
#include <SPIFFS.h>
|
||||||
|
#include <WiFi.h>
|
||||||
|
#include <HTTPClient.h>
|
||||||
|
|
||||||
|
#define I2S_WS 15
|
||||||
|
#define I2S_SD 13
|
||||||
|
#define I2S_SCK 2
|
||||||
|
#define I2S_PORT I2S_NUM_0
|
||||||
|
#define I2S_SAMPLE_RATE (16000)
|
||||||
|
#define I2S_SAMPLE_BITS (16)
|
||||||
|
#define I2S_READ_LEN (16 * 1024)
|
||||||
|
#define RECORD_TIME (20) //Seconds
|
||||||
|
#define I2S_CHANNEL_NUM (1)
|
||||||
|
#define FLASH_RECORD_SIZE (I2S_CHANNEL_NUM * I2S_SAMPLE_RATE * I2S_SAMPLE_BITS / 8 * RECORD_TIME)
|
||||||
|
|
||||||
|
File file;
|
||||||
|
const char filename[] = "/recording.wav";
|
||||||
|
const int headerSize = 44;
|
||||||
|
bool isWIFIConnected;
|
||||||
|
|
||||||
|
void setup() {
|
||||||
|
// put your setup code here, to run once:
|
||||||
|
Serial.begin(115200);
|
||||||
|
SPIFFSInit();
|
||||||
|
i2sInit();
|
||||||
|
xTaskCreate(i2s_adc, "i2s_adc", 1024 * 2, NULL, 1, NULL);
|
||||||
|
delay(500);
|
||||||
|
xTaskCreate(wifiConnect, "wifi_Connect", 4096, NULL, 0, NULL);
|
||||||
|
}
|
||||||
|
|
||||||
|
void loop() {
|
||||||
|
// put your main code here, to run repeatedly:
|
||||||
|
}
|
||||||
|
|
||||||
|
void SPIFFSInit(){
|
||||||
|
if(!SPIFFS.begin(true)){
|
||||||
|
Serial.println("SPIFFS initialisation failed!");
|
||||||
|
while(1) yield();
|
||||||
|
}
|
||||||
|
|
||||||
|
SPIFFS.remove(filename);
|
||||||
|
file = SPIFFS.open(filename, FILE_WRITE);
|
||||||
|
if(!file){
|
||||||
|
Serial.println("File is not available!");
|
||||||
|
}
|
||||||
|
|
||||||
|
byte header[headerSize];
|
||||||
|
wavHeader(header, FLASH_RECORD_SIZE);
|
||||||
|
|
||||||
|
file.write(header, headerSize);
|
||||||
|
listSPIFFS();
|
||||||
|
}
|
||||||
|
|
||||||
|
void i2sInit(){
|
||||||
|
i2s_config_t i2s_config = {
|
||||||
|
.mode = (i2s_mode_t)(I2S_MODE_MASTER | I2S_MODE_RX),
|
||||||
|
.sample_rate = I2S_SAMPLE_RATE,
|
||||||
|
.bits_per_sample = i2s_bits_per_sample_t(I2S_SAMPLE_BITS),
|
||||||
|
.channel_format = I2S_CHANNEL_FMT_ONLY_LEFT,
|
||||||
|
.communication_format = i2s_comm_format_t(I2S_COMM_FORMAT_I2S | I2S_COMM_FORMAT_I2S_MSB),
|
||||||
|
.intr_alloc_flags = 0,
|
||||||
|
.dma_buf_count = 64,
|
||||||
|
.dma_buf_len = 1024,
|
||||||
|
.use_apll = 1
|
||||||
|
};
|
||||||
|
|
||||||
|
i2s_driver_install(I2S_PORT, &i2s_config, 0, NULL);
|
||||||
|
|
||||||
|
const i2s_pin_config_t pin_config = {
|
||||||
|
.bck_io_num = I2S_SCK,
|
||||||
|
.ws_io_num = I2S_WS,
|
||||||
|
.data_out_num = -1,
|
||||||
|
.data_in_num = I2S_SD
|
||||||
|
};
|
||||||
|
|
||||||
|
i2s_set_pin(I2S_PORT, &pin_config);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void i2s_adc_data_scale(uint8_t * d_buff, uint8_t* s_buff, uint32_t len)
|
||||||
|
{
|
||||||
|
uint32_t j = 0;
|
||||||
|
uint32_t dac_value = 0;
|
||||||
|
for (int i = 0; i < len; i += 2) {
|
||||||
|
dac_value = ((((uint16_t) (s_buff[i + 1] & 0xf) << 8) | ((s_buff[i + 0]))));
|
||||||
|
d_buff[j++] = 0;
|
||||||
|
d_buff[j++] = dac_value * 256 / 2048;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void i2s_adc(void *arg)
|
||||||
|
{
|
||||||
|
|
||||||
|
int i2s_read_len = I2S_READ_LEN;
|
||||||
|
int flash_wr_size = 0;
|
||||||
|
size_t bytes_read;
|
||||||
|
|
||||||
|
char* i2s_read_buff = (char*) calloc(i2s_read_len, sizeof(char));
|
||||||
|
uint8_t* flash_write_buff = (uint8_t*) calloc(i2s_read_len, sizeof(char));
|
||||||
|
|
||||||
|
i2s_read(I2S_PORT, (void*) i2s_read_buff, i2s_read_len, &bytes_read, portMAX_DELAY);
|
||||||
|
i2s_read(I2S_PORT, (void*) i2s_read_buff, i2s_read_len, &bytes_read, portMAX_DELAY);
|
||||||
|
|
||||||
|
Serial.println(" *** Recording Start *** ");
|
||||||
|
while (flash_wr_size < FLASH_RECORD_SIZE) {
|
||||||
|
//read data from I2S bus, in this case, from ADC.
|
||||||
|
i2s_read(I2S_PORT, (void*) i2s_read_buff, i2s_read_len, &bytes_read, portMAX_DELAY);
|
||||||
|
//example_disp_buf((uint8_t*) i2s_read_buff, 64);
|
||||||
|
//save original data from I2S(ADC) into flash.
|
||||||
|
i2s_adc_data_scale(flash_write_buff, (uint8_t*)i2s_read_buff, i2s_read_len);
|
||||||
|
file.write((const byte*) flash_write_buff, i2s_read_len);
|
||||||
|
flash_wr_size += i2s_read_len;
|
||||||
|
ets_printf("Sound recording %u%%\n", flash_wr_size * 100 / FLASH_RECORD_SIZE);
|
||||||
|
ets_printf("Never Used Stack Size: %u\n", uxTaskGetStackHighWaterMark(NULL));
|
||||||
|
}
|
||||||
|
file.close();
|
||||||
|
|
||||||
|
free(i2s_read_buff);
|
||||||
|
i2s_read_buff = NULL;
|
||||||
|
free(flash_write_buff);
|
||||||
|
flash_write_buff = NULL;
|
||||||
|
|
||||||
|
listSPIFFS();
|
||||||
|
|
||||||
|
if(isWIFIConnected){
|
||||||
|
uploadFile();
|
||||||
|
}
|
||||||
|
|
||||||
|
vTaskDelete(NULL);
|
||||||
|
}
|
||||||
|
|
||||||
|
void example_disp_buf(uint8_t* buf, int length)
|
||||||
|
{
|
||||||
|
printf("======\n");
|
||||||
|
for (int i = 0; i < length; i++) {
|
||||||
|
printf("%02x ", buf[i]);
|
||||||
|
if ((i + 1) % 8 == 0) {
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
printf("======\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
void wavHeader(byte* header, int wavSize){
|
||||||
|
header[0] = 'R';
|
||||||
|
header[1] = 'I';
|
||||||
|
header[2] = 'F';
|
||||||
|
header[3] = 'F';
|
||||||
|
unsigned int fileSize = wavSize + headerSize - 8;
|
||||||
|
header[4] = (byte)(fileSize & 0xFF);
|
||||||
|
header[5] = (byte)((fileSize >> 8) & 0xFF);
|
||||||
|
header[6] = (byte)((fileSize >> 16) & 0xFF);
|
||||||
|
header[7] = (byte)((fileSize >> 24) & 0xFF);
|
||||||
|
header[8] = 'W';
|
||||||
|
header[9] = 'A';
|
||||||
|
header[10] = 'V';
|
||||||
|
header[11] = 'E';
|
||||||
|
header[12] = 'f';
|
||||||
|
header[13] = 'm';
|
||||||
|
header[14] = 't';
|
||||||
|
header[15] = ' ';
|
||||||
|
header[16] = 0x10;
|
||||||
|
header[17] = 0x00;
|
||||||
|
header[18] = 0x00;
|
||||||
|
header[19] = 0x00;
|
||||||
|
header[20] = 0x01;
|
||||||
|
header[21] = 0x00;
|
||||||
|
header[22] = 0x01;
|
||||||
|
header[23] = 0x00;
|
||||||
|
header[24] = 0x80;
|
||||||
|
header[25] = 0x3E;
|
||||||
|
header[26] = 0x00;
|
||||||
|
header[27] = 0x00;
|
||||||
|
header[28] = 0x00;
|
||||||
|
header[29] = 0x7D;
|
||||||
|
header[30] = 0x01;
|
||||||
|
header[31] = 0x00;
|
||||||
|
header[32] = 0x02;
|
||||||
|
header[33] = 0x00;
|
||||||
|
header[34] = 0x10;
|
||||||
|
header[35] = 0x00;
|
||||||
|
header[36] = 'd';
|
||||||
|
header[37] = 'a';
|
||||||
|
header[38] = 't';
|
||||||
|
header[39] = 'a';
|
||||||
|
header[40] = (byte)(wavSize & 0xFF);
|
||||||
|
header[41] = (byte)((wavSize >> 8) & 0xFF);
|
||||||
|
header[42] = (byte)((wavSize >> 16) & 0xFF);
|
||||||
|
header[43] = (byte)((wavSize >> 24) & 0xFF);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void listSPIFFS(void) {
|
||||||
|
Serial.println(F("\r\nListing SPIFFS files:"));
|
||||||
|
static const char line[] PROGMEM = "=================================================";
|
||||||
|
|
||||||
|
Serial.println(FPSTR(line));
|
||||||
|
Serial.println(F(" File name Size"));
|
||||||
|
Serial.println(FPSTR(line));
|
||||||
|
|
||||||
|
fs::File root = SPIFFS.open("/");
|
||||||
|
if (!root) {
|
||||||
|
Serial.println(F("Failed to open directory"));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (!root.isDirectory()) {
|
||||||
|
Serial.println(F("Not a directory"));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
fs::File file = root.openNextFile();
|
||||||
|
while (file) {
|
||||||
|
|
||||||
|
if (file.isDirectory()) {
|
||||||
|
Serial.print("DIR : ");
|
||||||
|
String fileName = file.name();
|
||||||
|
Serial.print(fileName);
|
||||||
|
} else {
|
||||||
|
String fileName = file.name();
|
||||||
|
Serial.print(" " + fileName);
|
||||||
|
// File path can be 31 characters maximum in SPIFFS
|
||||||
|
int spaces = 33 - fileName.length(); // Tabulate nicely
|
||||||
|
if (spaces < 1) spaces = 1;
|
||||||
|
while (spaces--) Serial.print(" ");
|
||||||
|
String fileSize = (String) file.size();
|
||||||
|
spaces = 10 - fileSize.length(); // Tabulate nicely
|
||||||
|
if (spaces < 1) spaces = 1;
|
||||||
|
while (spaces--) Serial.print(" ");
|
||||||
|
Serial.println(fileSize + " bytes");
|
||||||
|
}
|
||||||
|
|
||||||
|
file = root.openNextFile();
|
||||||
|
}
|
||||||
|
|
||||||
|
Serial.println(FPSTR(line));
|
||||||
|
Serial.println();
|
||||||
|
delay(1000);
|
||||||
|
}
|
||||||
|
|
||||||
|
void wifiConnect(void *pvParameters){
|
||||||
|
isWIFIConnected = false;
|
||||||
|
char* ssid = "<YOUR_WIFI_SSID>";
|
||||||
|
char* password = "<YOUR_WIFI_PW>";
|
||||||
|
|
||||||
|
WiFi.begin(ssid, password);
|
||||||
|
while(WiFi.status() != WL_CONNECTED){
|
||||||
|
vTaskDelay(500);
|
||||||
|
Serial.print(".");
|
||||||
|
}
|
||||||
|
isWIFIConnected = true;
|
||||||
|
while(true){
|
||||||
|
vTaskDelay(1000);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void uploadFile(){
|
||||||
|
file = SPIFFS.open(filename, FILE_READ);
|
||||||
|
if(!file){
|
||||||
|
Serial.println("FILE IS NOT AVAILABLE!");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
Serial.println("===> Upload FILE to Node.js Server");
|
||||||
|
|
||||||
|
HTTPClient client;
|
||||||
|
client.begin("http://192.168.1.124:8888/uploadAudio");
|
||||||
|
client.addHeader("Content-Type", "audio/wav");
|
||||||
|
int httpResponseCode = client.sendRequest("POST", &file, file.size());
|
||||||
|
Serial.print("httpResponseCode : ");
|
||||||
|
Serial.println(httpResponseCode);
|
||||||
|
|
||||||
|
if(httpResponseCode == 200){
|
||||||
|
String response = client.getString();
|
||||||
|
Serial.println("==================== Transcription ====================");
|
||||||
|
Serial.println(response);
|
||||||
|
Serial.println("==================== End ====================");
|
||||||
|
}else{
|
||||||
|
Serial.println("Error");
|
||||||
|
}
|
||||||
|
file.close();
|
||||||
|
client.end();
|
||||||
|
}
|
||||||
Binary file not shown.
Binary file not shown.
@@ -0,0 +1,60 @@
|
|||||||
|
var fs = require("file-system");
|
||||||
|
const http = require("http");
|
||||||
|
const server = http.createServer();
|
||||||
|
const fileName = "./resources/recording.wav";
|
||||||
|
|
||||||
|
server.on("request", (request, response) => {
|
||||||
|
if (request.method == "POST" && request.url === "/uploadAudio") {
|
||||||
|
var recordingFile = fs.createWriteStream(fileName, { encoding: "utf8" });
|
||||||
|
request.on("data", function(data) {
|
||||||
|
recordingFile.write(data);
|
||||||
|
});
|
||||||
|
|
||||||
|
request.on("end", async function() {
|
||||||
|
recordingFile.end();
|
||||||
|
const transciption = await speechToTextAPI();
|
||||||
|
response.writeHead(200, { "Content-Type": "text/plain" });
|
||||||
|
response.end(transciption);
|
||||||
|
});
|
||||||
|
} else {
|
||||||
|
console.log("Error Check your POST request");
|
||||||
|
response.writeHead(405, { "Content-Type": "text/plain" });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
async function speechToTextAPI() {
|
||||||
|
// Imports the Google Cloud client library
|
||||||
|
const speech = require("@google-cloud/speech");
|
||||||
|
const fs = require("fs");
|
||||||
|
|
||||||
|
// Creates a client
|
||||||
|
const client = new speech.SpeechClient();
|
||||||
|
|
||||||
|
// Reads a local audio file and converts it to base64
|
||||||
|
const file = fs.readFileSync(fileName);
|
||||||
|
const audioBytes = file.toString("base64");
|
||||||
|
|
||||||
|
// The audio file's encoding, sample rate in hertz, and BCP-47 language code
|
||||||
|
const audio = {
|
||||||
|
content: audioBytes
|
||||||
|
};
|
||||||
|
const config = {
|
||||||
|
encoding: "LINEAR16",
|
||||||
|
sampleRateHertz: 16000,
|
||||||
|
languageCode: "en-US"
|
||||||
|
};
|
||||||
|
const request = {
|
||||||
|
audio: audio,
|
||||||
|
config: config
|
||||||
|
};
|
||||||
|
|
||||||
|
// Detects speech in the audio file
|
||||||
|
const [response] = await client.recognize(request);
|
||||||
|
const transcription = response.results.map((result) => result.alternatives[0].transcript).join("\n");
|
||||||
|
console.log(`Transcription: ${transcription}`);
|
||||||
|
return transcription;
|
||||||
|
}
|
||||||
|
|
||||||
|
const port = 8888;
|
||||||
|
server.listen(port);
|
||||||
|
console.log(`Listening at ${port}`);
|
||||||
@@ -0,0 +1,35 @@
|
|||||||
|
async function main() {
|
||||||
|
// Imports the Google Cloud client library
|
||||||
|
const speech = require("@google-cloud/speech");
|
||||||
|
const fs = require("fs");
|
||||||
|
|
||||||
|
// Creates a client
|
||||||
|
const client = new speech.SpeechClient();
|
||||||
|
|
||||||
|
// The name of the audio file to transcribe
|
||||||
|
const fileName = "./resources/recording.wav";
|
||||||
|
|
||||||
|
// Reads a local audio file and converts it to base64
|
||||||
|
const file = fs.readFileSync(fileName);
|
||||||
|
const audioBytes = file.toString("base64");
|
||||||
|
|
||||||
|
// The audio file's encoding, sample rate in hertz, and BCP-47 language code
|
||||||
|
const audio = {
|
||||||
|
content: audioBytes
|
||||||
|
};
|
||||||
|
const config = {
|
||||||
|
encoding: "LINEAR16",
|
||||||
|
sampleRateHertz: 16000,
|
||||||
|
languageCode: "en-US"
|
||||||
|
};
|
||||||
|
const request = {
|
||||||
|
audio: audio,
|
||||||
|
config: config
|
||||||
|
};
|
||||||
|
|
||||||
|
// Detects speech in the audio file
|
||||||
|
const [response] = await client.recognize(request);
|
||||||
|
const transcription = response.results.map((result) => result.alternatives[0].transcript).join("\n");
|
||||||
|
console.log(`Transcription: ${transcription}`);
|
||||||
|
}
|
||||||
|
main().catch(console.error);
|
||||||
Reference in New Issue
Block a user