Detect specific spoken keywords and wake phrases in real time using decibri and sherpa-onnx. Runs entirely offline with no API key, no cloud service, and no network dependency.
This integration captures live microphone audio using decibri and feeds it to a sherpa-onnx keyword spotting (KWS) engine. When a user speaks one of your defined keywords or phrases, the engine detects it and reports which phrase was matched.
Choose this for wake word detection, voice command triggers, or any scenario where you need to listen for specific phrases without transcribing everything.
Download a KWS model from the sherpa-onnx releases. For example, the Zipformer transducer KWS model:
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/kws-models/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2
tar xvf sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2
This creates a directory with the model files: encoder.onnx, decoder.onnx, joiner.onnx, tokens.txt, and bpe.model.
Define your model paths and the keywords you want to detect. Keywords are encoded as BPE token sequences using the model's tokens.txt file.
const Decibri = require('decibri');
const sherpa = require('sherpa-onnx');
const modelDir = './sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01';
const config = {
featConfig: { sampleRate: 16000, featureDim: 80 },
modelConfig: {
transducer: {
encoder: `${modelDir}/encoder-epoch-12-avg-2-chunk-16-left-64.onnx`,
decoder: `${modelDir}/decoder-epoch-12-avg-2-chunk-16-left-64.onnx`,
joiner: `${modelDir}/joiner-epoch-12-avg-2-chunk-16-left-64.onnx`,
},
tokens: `${modelDir}/tokens.txt`,
numThreads: 2,
provider: 'cpu',
},
keywordsFile: `${modelDir}/keywords.txt`,
};
Instantiate the keyword spotter and create a detection stream.
const kws = new sherpa.KeywordSpotter(config);
const stream = kws.createStream();
Create a decibri instance at 16 kHz mono to match the model's expected input.
const mic = new Decibri({ sampleRate: 16000, channels: 1 });
Convert each incoming Int16 buffer to Float32, feed it to the KWS engine, and check for keyword detections.
mic.on('data', (chunk) => {
// Convert Int16 PCM to Float32
const int16 = new Int16Array(chunk.buffer, chunk.byteOffset, chunk.length / 2);
const float32 = new Float32Array(int16.length);
for (let i = 0; i < int16.length; i++) {
float32[i] = int16[i] / 32768;
}
// Feed audio to the KWS engine
stream.acceptWaveform(16000, float32);
while (kws.isReady(stream)) {
kws.decode(stream);
}
// Check for keyword detections
const keyword = kws.getResult(stream).keyword;
if (keyword) {
console.log(`Detected: "${keyword}"`);
}
});
Stop the microphone and free resources when the user presses Ctrl+C.
process.on('SIGINT', () => {
mic.stop();
stream.free();
kws.free();
process.exit(0);
});
console.log('Listening for keywords... (Ctrl+C to stop)');
const Decibri = require('decibri');
const sherpa = require('sherpa-onnx');
const modelDir = './sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01';
const config = {
featConfig: { sampleRate: 16000, featureDim: 80 },
modelConfig: {
transducer: {
encoder: `${modelDir}/encoder-epoch-12-avg-2-chunk-16-left-64.onnx`,
decoder: `${modelDir}/decoder-epoch-12-avg-2-chunk-16-left-64.onnx`,
joiner: `${modelDir}/joiner-epoch-12-avg-2-chunk-16-left-64.onnx`,
},
tokens: `${modelDir}/tokens.txt`,
numThreads: 2,
provider: 'cpu',
},
keywordsFile: `${modelDir}/keywords.txt`,
};
const kws = new sherpa.KeywordSpotter(config);
const stream = kws.createStream();
const mic = new Decibri({ sampleRate: 16000, channels: 1 });
mic.on('data', (chunk) => {
const int16 = new Int16Array(chunk.buffer, chunk.byteOffset, chunk.length / 2);
const float32 = new Float32Array(int16.length);
for (let i = 0; i < int16.length; i++) {
float32[i] = int16[i] / 32768;
}
stream.acceptWaveform(16000, float32);
while (kws.isReady(stream)) {
kws.decode(stream);
}
const keyword = kws.getResult(stream).keyword;
if (keyword) {
console.log(`Detected: "${keyword}"`);
}
});
process.on('SIGINT', () => {
mic.stop();
stream.free();
kws.free();
process.exit(0);
});
console.log('Listening for keywords... (Ctrl+C to stop)');