Speech2Text using built-in WL's neural network
Microphone capture function
.js
const label = document.createElement('div');
label.classList.add('rounded', 'p-1', 'text-small');
label.innerText = "Not recording";
label.style.background = 'lightgray';
core.GetVoice = async (args, env) => {
const interval = await interpretate(args[0], env);
const promise = new Deferred();
navigator.mediaDevices.getUserMedia({ audio: true })
.then(stream => {
const mediaRecorder = new MediaRecorder(stream);
let chunks = [];
mediaRecorder.ondataavailable = event => {
chunks.push(event.data);
};
mediaRecorder.onstop = () => {
const audioBlob = new Blob(chunks, { 'type' : 'audio/ogg; codecs=opus' });
const reader = new FileReader();
reader.onloadend = () => {
label.style.background = 'lightgray';
label.innerText = "Finished";
const base64data = reader.result.split(',')[1];
promise.resolve(base64data);
};
reader.readAsDataURL(audioBlob);
};
mediaRecorder.start();
label.innerText = "Recording...";
label.style.background = 'green';
setTimeout(() => {
mediaRecorder.stop();
}, interval);
})
.catch(error => {
console.error('Error accessing microphone:', error);
label.innerText = "Error accessing microphone";
label.style.background = 'red';
});
return promise.promise;
};
return label;
Widget
Module[{text = " ", win = CurrentWindow[], status = " "}, Panel[Column[{
{EventHandler[InputButton["Speak"], Function[Null,
status = "Listerning...";
Then[FrontFetchAsync[GetVoice[3000], "Window"->win], Function[str,
status = "Processing...";
text = StringJoin[text, "\n", SpeechRecognize[ImportString[str, "Base64"], Method->"NeuralNetwork"]];
status = " ";
]]
]], TextView[status // Offload]} // Row,
HTMLView[text // Offload, "Style"->"width:500px"]
}], Style["Click and speak", 10]]]