ESP32 | INMP441 | Tutorial - [Part.5] Speech-to-Text Powered by Google Cloud machine learning

2026-01-12 17:27:43 +03:00 · 2020-03-25 01:54:11 -07:00
parent a5b975ac8a
commit dca18e76a9
5 changed files with 379 additions and 0 deletions
--- a/ESP32_MICROPHONE/ESP32_INMP441_SPEECH_TO_TEXT/NodejsServer/resources/_recording.wav
+++ b/ESP32_MICROPHONE/ESP32_INMP441_SPEECH_TO_TEXT/NodejsServer/resources/_recording.wav
--- a/ESP32_MICROPHONE/ESP32_INMP441_SPEECH_TO_TEXT/NodejsServer/resources/recording.wav
+++ b/ESP32_MICROPHONE/ESP32_INMP441_SPEECH_TO_TEXT/NodejsServer/resources/recording.wav
--- a/ESP32_MICROPHONE/ESP32_INMP441_SPEECH_TO_TEXT/NodejsServer/speechAPIServer.js
+++ b/ESP32_MICROPHONE/ESP32_INMP441_SPEECH_TO_TEXT/NodejsServer/speechAPIServer.js
@@ -0,0 +1,60 @@
+var fs = require("file-system");
+const http = require("http");
+const server = http.createServer();
+const fileName = "./resources/recording.wav";
+
+server.on("request", (request, response) => {
+	if (request.method == "POST" && request.url === "/uploadAudio") {
+		var recordingFile = fs.createWriteStream(fileName, { encoding: "utf8" });
+		request.on("data", function(data) {
+			recordingFile.write(data);
+		});
+
+		request.on("end", async function() {
+			recordingFile.end();
+			const transciption = await speechToTextAPI();
+			response.writeHead(200, { "Content-Type": "text/plain" });
+			response.end(transciption);
+		});
+	} else {
+		console.log("Error Check your POST request");
+		response.writeHead(405, { "Content-Type": "text/plain" });
+	}
+});
+
+async function speechToTextAPI() {
+	// Imports the Google Cloud client library
+	const speech = require("@google-cloud/speech");
+	const fs = require("fs");
+
+	// Creates a client
+	const client = new speech.SpeechClient();
+
+	// Reads a local audio file and converts it to base64
+	const file = fs.readFileSync(fileName);
+	const audioBytes = file.toString("base64");
+
+	// The audio file's encoding, sample rate in hertz, and BCP-47 language code
+	const audio = {
+		content: audioBytes
+	};
+	const config = {
+		encoding: "LINEAR16",
+		sampleRateHertz: 16000,
+		languageCode: "en-US"
+	};
+	const request = {
+		audio: audio,
+		config: config
+	};
+
+	// Detects speech in the audio file
+	const [response] = await client.recognize(request);
+	const transcription = response.results.map((result) => result.alternatives[0].transcript).join("\n");
+	console.log(`Transcription: ${transcription}`);
+	return transcription;
+}
+
+const port = 8888;
+server.listen(port);
+console.log(`Listening at ${port}`);
--- a/ESP32_MICROPHONE/ESP32_INMP441_SPEECH_TO_TEXT/NodejsServer/speechAPItest.js
+++ b/ESP32_MICROPHONE/ESP32_INMP441_SPEECH_TO_TEXT/NodejsServer/speechAPItest.js
@@ -0,0 +1,35 @@
+async function main() {
+	// Imports the Google Cloud client library
+	const speech = require("@google-cloud/speech");
+	const fs = require("fs");
+
+	// Creates a client
+	const client = new speech.SpeechClient();
+
+	// The name of the audio file to transcribe
+	const fileName = "./resources/recording.wav";
+
+	// Reads a local audio file and converts it to base64
+	const file = fs.readFileSync(fileName);
+	const audioBytes = file.toString("base64");
+
+	// The audio file's encoding, sample rate in hertz, and BCP-47 language code
+	const audio = {
+		content: audioBytes
+	};
+	const config = {
+		encoding: "LINEAR16",
+		sampleRateHertz: 16000,
+		languageCode: "en-US"
+	};
+	const request = {
+		audio: audio,
+		config: config
+	};
+
+	// Detects speech in the audio file
+	const [response] = await client.recognize(request);
+	const transcription = response.results.map((result) => result.alternatives[0].transcript).join("\n");
+	console.log(`Transcription: ${transcription}`);
+}
+main().catch(console.error);