I am trying to build a siren detector that can recognize police and fire engine sirens. Since this is a long, continuous, I have found that using 2 second samples seems to do a good job at removing false positives. I think it is because most similar sounds do not last as long.
I have it work very reliably and it is able to pick up sirens even when they are very faint. It does have occasional false positives, like when trucks are back up, people singing and jazz (saxophone seems similar)
Like @janvda I tried switch from MFCC to MFE and I am seeing similar things. The accuracy of the MFE based model is only a few percent lower than MFCC. When I run it against the training data, the accuracy is much less. The MFE based model appears to be better at blocking false positives, but it is not as good at picking out sirens. This seems to be especially true when then siren is quieter so the background silence gets amplified more during normalization.
I am using Arduino because I am lazy and they make it easy to record to SD and add in LoRa. I want to do continuous classification, perhaps with smaller sample windows. The documentation is really helpful but I wanted to see if there was some good code to start from or some pointers. I am targeting the Adafruit Feather Sense board, which is Nrf52840 based.
Could I use the run_nn_continuous() function as a starting point? I am sure you guys have already looked into this, so I want to check if there are some gotchas I will run into.
Awesome! I was able to pull something that seems to be working good. I have 2 of the same HW setups so I am going to try running the continuous version against the standard version. Here is the basic jist of it:
// If your target is limited in memory remove this macro to save 10K RAM
#define EIDSP_QUANTIZE_FILTERBANK 0
/* Includes ---------------------------------------------------------------- */
#include <PDM.h>
#include <urban-sound_inference.h>
#include <edge-impulse-sdk/dsp/numpy.hpp>
#include <SPI.h>
#include <Wire.h>
#define BUFFER_LENGTH EI_CLASSIFIER_SLICE_SIZE //EI_CLASSIFIER_RAW_SAMPLE_COUNT
// Blinky on receipt
#define LED 13
/** Audio buffers, pointers and selectors */
typedef struct {
int16_t *buffer;
int16_t *other_buffer;
bool use_buffer;
uint8_t buf_ready;
uint32_t buf_count;
uint32_t n_samples;
} inference_t;
static inference_t inference;
static bool record_ready = false;
static signed short sampleBuffer[2048];
static bool debug_nn = false; // Set this to true to see e.g. features generated from the raw signal
static int samples_saved = 0;
/**
* @brief Arduino setup function
*/
void setup()
{
Serial.begin(115200);
// summary of inferencing settings (from model_metadata.h)
ei_printf("Inferencing settings:\n");
ei_printf("\tInterval: %.2f ms.\n", (float)EI_CLASSIFIER_INTERVAL_MS);
ei_printf("\tFrame size: %d\n", EI_CLASSIFIER_DSP_INPUT_FRAME_SIZE);
ei_printf("\tSample length: %d ms.\n", BUFFER_LENGTH / 16);
ei_printf("\tNo. of classes: %d\n", sizeof(ei_classifier_inferencing_categories) / sizeof(ei_classifier_inferencing_categories[0]));
run_classifier_init();
if (microphone_inference_start(BUFFER_LENGTH) == false) {
ei_printf("ERR: Failed to setup audio sampling\r\n");
return;
}
}
int print_results = -(EI_CLASSIFIER_SLICES_PER_MODEL_WINDOW);
/**
* @brief Arduino main function. Runs the inferencing loop.
*/
void loop()
{
ei_printf("Recording...\n");
bool m = microphone_inference_record();
if (!m) {
ei_printf("ERR: Failed to record audio...\n");
return;
}
inference.use_buffer = !inference.use_buffer;
inference.buf_ready = 0;
inference.buf_count = 0;
ei_printf("Recording done\n");
signal_t signal;
signal.total_length = BUFFER_LENGTH;
signal.get_data = µphone_audio_signal_get_data;
ei_impulse_result_t result = { 0 };
EI_IMPULSE_ERROR r = run_classifier_continuous(&signal, &result, false);
if (r != EI_IMPULSE_OK) {
ei_printf("ERR: Failed to run classifier (%d)\n", r);
}
ei_printf("Amount already recorded: %d of %d, is it done: %d\n",inference.buf_count, inference.n_samples, inference.buf_ready);
if (++print_results >= (EI_CLASSIFIER_SLICES_PER_MODEL_WINDOW >> 1)) {
// print the predictions
ei_printf("Predictions (DSP: %d ms., Classification: %d ms., Anomaly: %d ms.): \n",
result.timing.dsp, result.timing.classification, result.timing.anomaly);
for (size_t ix = 0; ix < EI_CLASSIFIER_LABEL_COUNT; ix++) {
ei_printf(" %s: %.5f\n", result.classification[ix].label,
result.classification[ix].value);
}
#if EI_CLASSIFIER_HAS_ANOMALY == 1
ei_printf(" anomaly score: %.3f\n", result.anomaly);
#endif
print_results = 0;
}
int noise = (int) (result.classification[0].value * 100);
int siren = (int) (result.classification[1].value * 100);
}
/**
* @brief Printf function uses vsnprintf and output using Arduino Serial
*
* @param[in] format Variable argument list
*/
void ei_printf(const char *format, ...) {
static char print_buf[1024] = { 0 };
va_list args;
va_start(args, format);
int r = vsnprintf(print_buf, sizeof(print_buf), format, args);
va_end(args);
#ifndef SERIAL_SOUND
if (r > 0) {
Serial.write(print_buf);
}
#endif
}
/**
* @brief PDM buffer full callback
* Get data and call audio thread callback
*/
static void pdm_data_ready_inference_callback(void)
{
int bytesAvailable = PDM.available();
// read into the sample buffer
int bytesRead = PDM.read((char *)&sampleBuffer[0], bytesAvailable);
//if (record_ready == true || inference.buf_ready == 1) {
if (inference.buf_ready != 1) {
for(int i = 0; i < bytesRead>>1; i++) {
if (inference.use_buffer) {
inference.buffer[inference.buf_count++] = sampleBuffer[i];
} else {
inference.other_buffer[inference.buf_count++] = sampleBuffer[i];
}
if(inference.buf_count >= inference.n_samples) {
inference.buf_count = 0;
inference.buf_ready = 1;
break;
}
}
}
}
/**
* @brief Init inferencing struct and setup/start PDM
*
* @param[in] n_samples The n samples
*
* @return { description_of_the_return_value }
*/
static bool microphone_inference_start(uint32_t n_samples)
{
inference.buffer = (int16_t *)malloc(n_samples * sizeof(int16_t));
if(inference.buffer == NULL) {
ei_printf("Fnot engough mem 1");
return false;
}
inference.buf_count = 0;
inference.n_samples = n_samples;
inference.buf_ready = 0;
inference.other_buffer = (int16_t *)malloc(n_samples * sizeof(int16_t));
if(inference.other_buffer == NULL) {
ei_printf("not enough mem 2!");
return false;
}
// configure the data receive callback
PDM.onReceive(&pdm_data_ready_inference_callback);
// optionally set the gain, defaults to 20
PDM.setGain(60);
//ei_printf("Sector size: %d nblocks: %d\r\n", ei_nano_fs_get_block_size(), n_sample_blocks);
PDM.setBufferSize(4096);
// initialize PDM with:
// - one channel (mono mode)
// - a 16 kHz sample rate
if (!PDM.begin(1, EI_CLASSIFIER_FREQUENCY)) {
ei_printf("Failed to start PDM!");
}
record_ready = true;
return true;
}
/**
* @brief Wait on new data
*
* @return True when finished
*/
static bool microphone_inference_record(void)
{
while(inference.buf_ready == 0) {
delay(10);
}
return true;
}
/**
* Get raw audio signal data
*/
static int microphone_audio_signal_get_data(size_t offset, size_t length, float *out_ptr)
{
// The buffers get swapped before inference starts. Doing NOT should give you the buffer that was just recorded
if (!inference.use_buffer) {
numpy::int16_to_float(&inference.buffer[offset], out_ptr, length);
} else {
numpy::int16_to_float(&inference.other_buffer[offset], out_ptr, length);
}
return 0;
}
/**
* @brief Stop PDM and release buffers
*/
static void microphone_inference_end(void)
{
PDM.end();
free(inference.buffer);
free(inference.other_buffer);
}
#if !defined(EI_CLASSIFIER_SENSOR) || EI_CLASSIFIER_SENSOR != EI_CLASSIFIER_SENSOR_MICROPHONE
#error "Invalid model for current sensor."
#endif
I have 2 of the same HW setups so I am going to try running the continuous version against the standard version.
No real benefit for non-continuous. Underlying algorithms are the same, we just do some smart incremental feature generation for continuous mode, so would just stick to that!
I am making a “cough sensing project” with few modifications in the above code.
could u help me in this? i have very much less time to do this project. it could be helpful if you connect to me at brandmnk@gmail.com
@tiriotis, you can definitely use it as a base. I’m not sure what microphone the Adafruit Feather Sense packs, but I assume there’s a driver for it in Zephyr already - so you might need to swap the drivers out, but the general ideas should be fine.
However you can modify the macro EI_CLASSIFIER_SLICES_PER_MODEL_WINDOW to run continuous inferencing on smaller windows. It has impact on both memory and timing, to have more technical details you can read the documentation about conitnuous audio sampling: https://docs.edgeimpulse.com/docs/continuous-audio-sampling.
I am checking different values for the size of the double buffer and i see that as i reduce the size it doesn’t have any impact on the model.For example for 1 sec samples in 16khz and 4 windows which is the optimal value for the size of the buffer ?
@tiriotis, what do you mean? The buffer should always be equal to EI_CLASSIFIER_SLICE_SIZE which is dependent on the slices per window. If you lower it you’ll be writing in uninitialized memory.