Counting objects using fomo on esp-eye

hust_peter · September 27, 2023, 4:45am

Question/Issue: I want to count objects in an taken image and I have follows the step in this tutorial: Count objects using FOMO - Edge Impulse Documentation

I am using ESP-EYE with ESP-IDF 4.4, and I have download this source code “GitHub - edgeimpulse/firmware-espressif-esp32: Edge Impulse firmware for the Espressif ESP-EYE(ESP32) Development board” then modify it like the tutorial:

ei_run_camera_impulse.cpp:
/* Edge Impulse ingestion SDK

Copyright (c) 2022 EdgeImpulse Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the “Software”), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
*/

/* Include ----------------------------------------------------------------- */
#include “model-parameters/model_metadata.h”

#if defined(EI_CLASSIFIER_SENSOR) && EI_CLASSIFIER_SENSOR == EI_CLASSIFIER_SENSOR_CAMERA

#include “edge-impulse-sdk/classifier/ei_run_classifier.h”
#include “edge-impulse-sdk/dsp/image/image.hpp”
#include “ei_camera.h”
#include “firmware-sdk/at_base64_lib.h”
#include “firmware-sdk/jpeg/encode_as_jpg.h”
#include “stdint.h”
#include “ei_device_espressif_esp32.h”
#include “ei_run_impulse.h”

#include “esp_timer.h”

#define DWORD_ALIGN_PTR(a) ((a & 0x3) ?(((uintptr_t)a + 0x4) & ~(uintptr_t)0x3) : a)

typedef enum {
INFERENCE_STOPPED,
INFERENCE_WAITING,
INFERENCE_SAMPLING,
INFERENCE_DATA_READY
} inference_state_t;

static inference_state_t state = INFERENCE_STOPPED;
static uint64_t last_inference_ts = 0;

static bool debug_mode = false;
static bool continuous_mode = false;

static uint8_t *snapshot_buf = nullptr;
static uint32_t snapshot_buf_size;

static ei_device_snapshot_resolutions_t snapshot_resolution;
static ei_device_snapshot_resolutions_t fb_resolution;

static bool resize_required = false;
static uint32_t inference_delay;

// Define the top of the image and the number of columns
static int TOP_Y = 50;
static int NUM_COLS = 5;
static int COL_WIDTH = EI_CLASSIFIER_INPUT_WIDTH / NUM_COLS;
static int MAX_ITEMS = 10;

// Define the factor of the width/height which determines the threshold
// for detection of the object’s movement between frames:
static float DETECT_FACTOR = 1.5;

// Initialize variables
std::vector count(NUM_COLS, 0);
int countsum =0;
int notfoundframes = 0;
std::vector<std::vector<ei_impulse_result_bounding_box_t> > previous_blobs(NUM_COLS);

static int ei_camera_get_data(size_t offset, size_t length, float *out_ptr)
{
// we already have a RGB888 buffer, so recalculate offset into pixel index
size_t pixel_ix = offset * 3;
size_t pixels_left = length;
size_t out_ptr_ix = 0;

while (pixels_left != 0) {
    out_ptr[out_ptr_ix] = (snapshot_buf[pixel_ix] << 16) + (snapshot_buf[pixel_ix + 1] << 8) + snapshot_buf[pixel_ix + 2];

    // go to the next pixel
    out_ptr_ix++;
    pixel_ix+=3;
    pixels_left--;
}

// and done!
return 0;

}

void ei_run_impulse(void)
{
switch(state) {
case INFERENCE_STOPPED:
// nothing to do
return;
case INFERENCE_WAITING:
if(ei_read_timer_ms() < (last_inference_ts + inference_delay)) {
return;
}
state = INFERENCE_DATA_READY;
break;
case INFERENCE_SAMPLING:
case INFERENCE_DATA_READY:
if(continuous_mode == true) {
state = INFERENCE_WAITING;
}
break;
default:
break;
}

uint8_t *jpeg_image;
uint32_t jpeg_image_size = 0;

EiCameraESP32 *camera = static_cast<EiCameraESP32*>(EiCameraESP32::get_camera());

ei_printf("Taking photo...\n");

if(camera->ei_camera_capture_jpeg(&jpeg_image, &jpeg_image_size) == false) {
    ei_printf("ERR: Failed to take a snapshot!\n");
    return;
}

snapshot_buf = (uint8_t*)ei_malloc(snapshot_buf_size);

// check if allocation was successful
if(snapshot_buf == nullptr) {
    ei_printf("ERR: Failed to allocate snapshot buffer!\n");
    return;
}

if(camera->ei_camera_jpeg_to_rgb888(jpeg_image, jpeg_image_size, snapshot_buf) == false) {
    ei_printf("ERR: Failed to decode JPEG image\n");
    ei_free(snapshot_buf);
    ei_free(jpeg_image);
    return;
}

ei_free(jpeg_image);
jpeg_image_size = 0;

int64_t fr_start = esp_timer_get_time();

if (resize_required) {
    ei::image::processing::crop_and_interpolate_rgb888(
        snapshot_buf,
        fb_resolution.width,
        fb_resolution.height,
        snapshot_buf,
        snapshot_resolution.width,
        snapshot_resolution.height);
}
int64_t fr_end = esp_timer_get_time();

if (debug_mode) {
    ei_printf("Time resizing: %d\n", (uint32_t)((fr_end - fr_start)/1000));
}

ei::signal_t signal;
signal.total_length = EI_CLASSIFIER_INPUT_WIDTH * EI_CLASSIFIER_INPUT_HEIGHT;
signal.get_data = &ei_camera_get_data;

// print and discard JPEG buffer before inference to free some memory
if (debug_mode) {
    ei_printf("Begin output\n");
    ei_printf("Framebuffer: ");
    // base64_encode((const char*)jpeg_image, jpeg_image_size, &ei_putchar);
    int ret = encode_rgb888_signal_as_jpg_and_output_base64(&signal, EI_CLASSIFIER_INPUT_WIDTH, EI_CLASSIFIER_INPUT_HEIGHT);
    ei_printf("\r\n");
    if(ret != 0) {
        ei_printf("ERR: Failed to encode frame as JPEG (%d)\n", ret);
    }
}

// run the impulse: DSP, neural network and the Anomaly algorithm
ei_impulse_result_t result = { 0 };

EI_IMPULSE_ERROR ei_error = run_classifier(&signal, &result, false);
if (ei_error != EI_IMPULSE_OK) {
    ei_printf("ERR: Failed to run impulse (%d)\n", ei_error);
    ei_free(snapshot_buf);
    return;
}
ei_free(snapshot_buf);

// print the predictions
ei_printf(“Predictions (DSP: %d ms., Classification: %d ms., Anomaly: %d ms., Count: %d ): \n”,
result.timing.dsp, result.timing.classification,result.timing.anomaly, countsum);

#if EI_CLASSIFIER_OBJECT_DETECTION == 1
bool bb_found = result.bounding_boxes[0].value > 0;
std::vector<std::vector<ei_impulse_result_bounding_box_t> > current_blobs(NUM_COLS);
for (size_t ix = 0; ix < result.bounding_boxes_count; ix++) {
auto bb = result.bounding_boxes[ix];
if (bb.value == 0) {
continue;
}
// Check which column the blob is in
int col = int(bb.x / COL_WIDTH);
// Check if blob is within DETECT_FACTOR*h of a blob detected in the previous frame and treat as the same object
for (auto blob : previous_blobs[col]) {
if (abs(int(bb.x - blob.x)) < DETECT_FACTOR * (bb.width + blob.width) && abs(int(bb.y - blob.y)) < DETECT_FACTOR * (bb.height + blob.height)) {
// Check this blob has “moved” across the Y threshold
if (blob.y >= TOP_Y && bb.y < TOP_Y) {
// Increment count for this column if blob has left the top of the image
count[col]++;
countsum++;
}
}
}
// Add current blob to list
current_blobs[col].push_back(bb);
//ei_printf(" %s (%f) [ x: %u, y: %u, width: %u, height: %u ]\n", bb.label, bb.value, bb.x, bb.y, bb.width, bb.height);
}
previous_blobs = std::move(current_blobs);
ei_printf(" Count: %d\n",countsum);
if (bb_found) {
ei_printf(" Count: %d\n",countsum);
notfoundframes = 0;
}
else {
notfoundframes ++;
if (notfoundframes == 1){
ei_printf(" No objects found\n");
}
else {
ei_printf(" Count: %d\n",countsum);
}
}

#else
for (size_t ix = 0; ix < EI_CLASSIFIER_LABEL_COUNT; ix++) {
ei_printf(" %s: %.5f\n", result.classification[ix].label,
result.classification[ix].value);

#if EI_CLASSIFIER_HAS_ANOMALY == 1
ei_printf(" anomaly score: %.3f\n", result.anomaly);
#endif
#endif

if (debug_mode) {
    ei_printf("\r\n----------------------------------\r\n");
    ei_printf("End output\r\n");
}

if(continuous_mode == false) {
    ei_printf("Starting inferencing in %d seconds...\n", inference_delay / 1000);
}

}

void ei_start_impulse(bool continuous, bool debug, bool use_max_uart_speed)
{
snapshot_resolution.width = EI_CLASSIFIER_INPUT_WIDTH;
snapshot_resolution.height = EI_CLASSIFIER_INPUT_HEIGHT;

debug_mode = debug;
continuous_mode = continuous;

EiDeviceESP32* dev = static_cast<EiDeviceESP32*>(EiDeviceESP32::get_device());
EiCameraESP32 *camera = static_cast<EiCameraESP32*>(EiCameraESP32::get_camera());

// check if minimum suitable sensor resolution is the same as
// desired snapshot resolution
// if not we need to resize later
fb_resolution = camera->search_resolution(snapshot_resolution.width, snapshot_resolution.height);

if (snapshot_resolution.width != fb_resolution.width || snapshot_resolution.height != fb_resolution.height) {
    resize_required = true;
}

if (!camera->init(snapshot_resolution.width, snapshot_resolution.height)) {
    ei_printf("Failed to init camera, check if camera is connected!\n");
    return;
}

snapshot_buf_size = fb_resolution.width * fb_resolution.height * 3;

// summary of inferencing settings (from model_metadata.h)
ei_printf("Inferencing settings:\n");
ei_printf("\tImage resolution: %dx%d\n", EI_CLASSIFIER_INPUT_WIDTH, EI_CLASSIFIER_INPUT_HEIGHT);
ei_printf("\tFrame size: %d\n", EI_CLASSIFIER_DSP_INPUT_FRAME_SIZE);
ei_printf("\tNo. of classes: %d\n", sizeof(ei_classifier_inferencing_categories) / sizeof(ei_classifier_inferencing_categories[0]));

if(continuous_mode == true) {
    inference_delay = 10000000;
    state = INFERENCE_DATA_READY;
}
else {
    inference_delay = 10000000;  // Đặt khoảng thời gian chụp ảnh và đếm lại ở đây
    state = INFERENCE_WAITING;
    ei_printf("Starting inferencing in %d seconds...\n", inference_delay / 1000);
}

if (use_max_uart_speed) {
    ei_printf("OK\r\n");
    ei_sleep(100);
    dev->set_max_data_output_baudrate();
    ei_sleep(100);
}

while(!ei_user_invoke_stop()) {
    // Thêm vòng lặp để đảm bảo luôn chạy quá trình chụp ảnh và đếm số lượng vật thể
    while (state != INFERENCE_STOPPED) {
        ei_run_impulse();
        ei_printf("Countsum: %d\n", countsum);  // In ra biến countsum sau mỗi lần chạy ei_run_impulse
        ei_sleep(1);
    }

    if (!continuous_mode) {
        state = INFERENCE_WAITING;
        ei_printf("Starting inferencing in %d seconds...\n", inference_delay / 1000);
        ei_sleep(inference_delay / 1000);
    }
}

ei_stop_impulse();

if (use_max_uart_speed) {
    ei_printf("\r\nOK\r\n");
    ei_sleep(100);
    dev->set_default_data_output_baudrate();
    ei_sleep(100);
}

}

void ei_stop_impulse(void)
{
state = INFERENCE_STOPPED;
}

bool is_inference_running(void)
{
return (state != INFERENCE_STOPPED);
}

#endif /* defined(EI_CLASSIFIER_SENSOR) && EI_CLASSIFIER_SENSOR == EI_CLASSIFIER_SENSOR_CAMERA */

main.cpp:
/* Edge Impulse ingestion SDK

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the “Software”), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
*/

/* Include ----------------------------------------------------------------- */
#include “driver/gpio.h”
#include “sdkconfig.h”
#include <stdio.h>
#include “ei_device_espressif_esp32.h”
#include “ei_at_handlers.h”
#include “ei_classifier_porting.h”
#include “ei_run_impulse.h”
#include “freertos/FreeRTOS.h”
#include “freertos/task.h”

EiDeviceInfo *EiDevInfo = dynamic_cast<EiDeviceInfo *>(EiDeviceESP32::get_device());
static ATServer *at;

uint32_t elapsed_time = 0;

// Hàm để kiểm tra xem đã đủ 10 giây chưa
bool is_10_seconds_elapsed() {
if (elapsed_time >= 10000) { // 10 giây = 10000 ms
return true;
}
return false;
}

extern “C” void app_main() {
gpio_pad_select_gpio(GPIO_NUM_21);
gpio_reset_pin(GPIO_NUM_21);

gpio_pad_select_gpio(GPIO_NUM_22);
gpio_reset_pin(GPIO_NUM_22);    

gpio_set_direction(GPIO_NUM_21, GPIO_MODE_OUTPUT);
gpio_set_direction(GPIO_NUM_22, GPIO_MODE_OUTPUT);    

EiDeviceESP32* dev = static_cast<EiDeviceESP32*>(EiDeviceESP32::get_device());

ei_printf(
    "Hello DMM\r\n"
    "Compiled on %s %s\r\n",
    __DATE__,
    __TIME__);

at = ei_at_init(dev);
ei_printf("Type AT+HELP to see a list of commands.\r\n");
at->print_prompt();

dev->set_state(eiStateFinished);

while(1) {
    if (is_10_seconds_elapsed()) {
        break;
    }

    // Start Edge Impulse inference
    ei_start_impulse(false, false, false);

    // Check if inference is running and print count
    while (is_inference_running()) {
        ei_run_impulse();
    }

    elapsed_time += 1000; // Cộng thêm 1 giây (1000 ms)
    
    vTaskDelay(1000 / portTICK_PERIOD_MS); 
}

}

AND THIS IS MY OUTPUT:
Starting inferencing in 2 seconds…
Predictions (DSP: 324 ms., Classification: 4 ms., Anomaly: 0 ms.):

helloworld: 0.000000*
noise: 0.894531*
unknown: 0.105469*
Starting inferencing in 2 seconds…
Predictions (DSP: 324 ms., Classification: 4 ms., Anomaly: 0 ms.):
helloworld: 0.000000*
noise: 0.941406*
unknown: 0.058594*
Starting inferencing in 2 seconds…

There are nothing about the taking image process and there are no result of counting objects system?

I want the system to do: taking image => process image and count the objects => print results only.
Please help me!

MMarcial · September 27, 2023, 8:22pm

The code posted does count objects. Did you deploy a FOMO Impulse?