Counting objects using fomo on esp-eye

Question/Issue: I want to count objects in an taken image and I have follows the step in this tutorial: Count objects using FOMO - Edge Impulse Documentation

I am using ESP-EYE with ESP-IDF 4.4, and I have download this source code “GitHub - edgeimpulse/firmware-espressif-esp32: Edge Impulse firmware for the Espressif ESP-EYE(ESP32) Development board” then modify it like the tutorial:

ei_run_camera_impulse.cpp:
/* Edge Impulse ingestion SDK

  • Copyright (c) 2022 EdgeImpulse Inc.
  • Permission is hereby granted, free of charge, to any person obtaining a copy
  • of this software and associated documentation files (the “Software”), to deal
  • in the Software without restriction, including without limitation the rights
  • to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  • copies of the Software, and to permit persons to whom the Software is
  • furnished to do so, subject to the following conditions:
  • The above copyright notice and this permission notice shall be included in
  • all copies or substantial portions of the Software.
  • THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  • IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  • FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  • AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  • LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  • OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  • SOFTWARE.
    */

/* Include ----------------------------------------------------------------- */
#include “model-parameters/model_metadata.h”

#if defined(EI_CLASSIFIER_SENSOR) && EI_CLASSIFIER_SENSOR == EI_CLASSIFIER_SENSOR_CAMERA

#include “edge-impulse-sdk/classifier/ei_run_classifier.h”
#include “edge-impulse-sdk/dsp/image/image.hpp”
#include “ei_camera.h”
#include “firmware-sdk/at_base64_lib.h”
#include “firmware-sdk/jpeg/encode_as_jpg.h”
#include “stdint.h”
#include “ei_device_espressif_esp32.h”
#include “ei_run_impulse.h”

#include “esp_timer.h”

#define DWORD_ALIGN_PTR(a) ((a & 0x3) ?(((uintptr_t)a + 0x4) & ~(uintptr_t)0x3) : a)

typedef enum {
INFERENCE_STOPPED,
INFERENCE_WAITING,
INFERENCE_SAMPLING,
INFERENCE_DATA_READY
} inference_state_t;

static inference_state_t state = INFERENCE_STOPPED;
static uint64_t last_inference_ts = 0;

static bool debug_mode = false;
static bool continuous_mode = false;

static uint8_t *snapshot_buf = nullptr;
static uint32_t snapshot_buf_size;

static ei_device_snapshot_resolutions_t snapshot_resolution;
static ei_device_snapshot_resolutions_t fb_resolution;

static bool resize_required = false;
static uint32_t inference_delay;

// Define the top of the image and the number of columns
static int TOP_Y = 50;
static int NUM_COLS = 5;
static int COL_WIDTH = EI_CLASSIFIER_INPUT_WIDTH / NUM_COLS;
static int MAX_ITEMS = 10;

// Define the factor of the width/height which determines the threshold
// for detection of the object’s movement between frames:
static float DETECT_FACTOR = 1.5;

// Initialize variables
std::vector count(NUM_COLS, 0);
int countsum =0;
int notfoundframes = 0;
std::vector<std::vector<ei_impulse_result_bounding_box_t> > previous_blobs(NUM_COLS);

static int ei_camera_get_data(size_t offset, size_t length, float *out_ptr)
{
// we already have a RGB888 buffer, so recalculate offset into pixel index
size_t pixel_ix = offset * 3;
size_t pixels_left = length;
size_t out_ptr_ix = 0;

while (pixels_left != 0) {
    out_ptr[out_ptr_ix] = (snapshot_buf[pixel_ix] << 16) + (snapshot_buf[pixel_ix + 1] << 8) + snapshot_buf[pixel_ix + 2];

    // go to the next pixel
    out_ptr_ix++;
    pixel_ix+=3;
    pixels_left--;
}

// and done!
return 0;

}

void ei_run_impulse(void)
{
switch(state) {
case INFERENCE_STOPPED:
// nothing to do
return;
case INFERENCE_WAITING:
if(ei_read_timer_ms() < (last_inference_ts + inference_delay)) {
return;
}
state = INFERENCE_DATA_READY;
break;
case INFERENCE_SAMPLING:
case INFERENCE_DATA_READY:
if(continuous_mode == true) {
state = INFERENCE_WAITING;
}
break;
default:
break;
}

uint8_t *jpeg_image;
uint32_t jpeg_image_size = 0;

EiCameraESP32 *camera = static_cast<EiCameraESP32*>(EiCameraESP32::get_camera());

ei_printf("Taking photo...\n");

if(camera->ei_camera_capture_jpeg(&jpeg_image, &jpeg_image_size) == false) {
    ei_printf("ERR: Failed to take a snapshot!\n");
    return;
}

snapshot_buf = (uint8_t*)ei_malloc(snapshot_buf_size);

// check if allocation was successful
if(snapshot_buf == nullptr) {
    ei_printf("ERR: Failed to allocate snapshot buffer!\n");
    return;
}

if(camera->ei_camera_jpeg_to_rgb888(jpeg_image, jpeg_image_size, snapshot_buf) == false) {
    ei_printf("ERR: Failed to decode JPEG image\n");
    ei_free(snapshot_buf);
    ei_free(jpeg_image);
    return;
}

ei_free(jpeg_image);
jpeg_image_size = 0;

int64_t fr_start = esp_timer_get_time();

if (resize_required) {
    ei::image::processing::crop_and_interpolate_rgb888(
        snapshot_buf,
        fb_resolution.width,
        fb_resolution.height,
        snapshot_buf,
        snapshot_resolution.width,
        snapshot_resolution.height);
}
int64_t fr_end = esp_timer_get_time();

if (debug_mode) {
    ei_printf("Time resizing: %d\n", (uint32_t)((fr_end - fr_start)/1000));
}

ei::signal_t signal;
signal.total_length = EI_CLASSIFIER_INPUT_WIDTH * EI_CLASSIFIER_INPUT_HEIGHT;
signal.get_data = &ei_camera_get_data;

// print and discard JPEG buffer before inference to free some memory
if (debug_mode) {
    ei_printf("Begin output\n");
    ei_printf("Framebuffer: ");
    // base64_encode((const char*)jpeg_image, jpeg_image_size, &ei_putchar);
    int ret = encode_rgb888_signal_as_jpg_and_output_base64(&signal, EI_CLASSIFIER_INPUT_WIDTH, EI_CLASSIFIER_INPUT_HEIGHT);
    ei_printf("\r\n");
    if(ret != 0) {
        ei_printf("ERR: Failed to encode frame as JPEG (%d)\n", ret);
    }
}

// run the impulse: DSP, neural network and the Anomaly algorithm
ei_impulse_result_t result = { 0 };

EI_IMPULSE_ERROR ei_error = run_classifier(&signal, &result, false);
if (ei_error != EI_IMPULSE_OK) {
    ei_printf("ERR: Failed to run impulse (%d)\n", ei_error);
    ei_free(snapshot_buf);
    return;
}
ei_free(snapshot_buf);

// print the predictions
ei_printf(“Predictions (DSP: %d ms., Classification: %d ms., Anomaly: %d ms., Count: %d ): \n”,
result.timing.dsp, result.timing.classification,result.timing.anomaly, countsum);

#if EI_CLASSIFIER_OBJECT_DETECTION == 1
bool bb_found = result.bounding_boxes[0].value > 0;
std::vector<std::vector<ei_impulse_result_bounding_box_t> > current_blobs(NUM_COLS);
for (size_t ix = 0; ix < result.bounding_boxes_count; ix++) {
auto bb = result.bounding_boxes[ix];
if (bb.value == 0) {
continue;
}
// Check which column the blob is in
int col = int(bb.x / COL_WIDTH);
// Check if blob is within DETECT_FACTOR*h of a blob detected in the previous frame and treat as the same object
for (auto blob : previous_blobs[col]) {
if (abs(int(bb.x - blob.x)) < DETECT_FACTOR * (bb.width + blob.width) && abs(int(bb.y - blob.y)) < DETECT_FACTOR * (bb.height + blob.height)) {
// Check this blob has “moved” across the Y threshold
if (blob.y >= TOP_Y && bb.y < TOP_Y) {
// Increment count for this column if blob has left the top of the image
count[col]++;
countsum++;
}
}
}
// Add current blob to list
current_blobs[col].push_back(bb);
//ei_printf(" %s (%f) [ x: %u, y: %u, width: %u, height: %u ]\n", bb.label, bb.value, bb.x, bb.y, bb.width, bb.height);
}
previous_blobs = std::move(current_blobs);
ei_printf(" Count: %d\n",countsum);
if (bb_found) {
ei_printf(" Count: %d\n",countsum);
notfoundframes = 0;
}
else {
notfoundframes ++;
if (notfoundframes == 1){
ei_printf(" No objects found\n");
}
else {
ei_printf(" Count: %d\n",countsum);
}
}

#else
for (size_t ix = 0; ix < EI_CLASSIFIER_LABEL_COUNT; ix++) {
ei_printf(" %s: %.5f\n", result.classification[ix].label,
result.classification[ix].value);

}

#if EI_CLASSIFIER_HAS_ANOMALY == 1
ei_printf(" anomaly score: %.3f\n", result.anomaly);
#endif
#endif

if (debug_mode) {
    ei_printf("\r\n----------------------------------\r\n");
    ei_printf("End output\r\n");
}

if(continuous_mode == false) {
    ei_printf("Starting inferencing in %d seconds...\n", inference_delay / 1000);
}

}

void ei_start_impulse(bool continuous, bool debug, bool use_max_uart_speed)
{
snapshot_resolution.width = EI_CLASSIFIER_INPUT_WIDTH;
snapshot_resolution.height = EI_CLASSIFIER_INPUT_HEIGHT;

debug_mode = debug;
continuous_mode = continuous;

EiDeviceESP32* dev = static_cast<EiDeviceESP32*>(EiDeviceESP32::get_device());
EiCameraESP32 *camera = static_cast<EiCameraESP32*>(EiCameraESP32::get_camera());

// check if minimum suitable sensor resolution is the same as
// desired snapshot resolution
// if not we need to resize later
fb_resolution = camera->search_resolution(snapshot_resolution.width, snapshot_resolution.height);

if (snapshot_resolution.width != fb_resolution.width || snapshot_resolution.height != fb_resolution.height) {
    resize_required = true;
}

if (!camera->init(snapshot_resolution.width, snapshot_resolution.height)) {
    ei_printf("Failed to init camera, check if camera is connected!\n");
    return;
}

snapshot_buf_size = fb_resolution.width * fb_resolution.height * 3;

// summary of inferencing settings (from model_metadata.h)
ei_printf("Inferencing settings:\n");
ei_printf("\tImage resolution: %dx%d\n", EI_CLASSIFIER_INPUT_WIDTH, EI_CLASSIFIER_INPUT_HEIGHT);
ei_printf("\tFrame size: %d\n", EI_CLASSIFIER_DSP_INPUT_FRAME_SIZE);
ei_printf("\tNo. of classes: %d\n", sizeof(ei_classifier_inferencing_categories) / sizeof(ei_classifier_inferencing_categories[0]));

if(continuous_mode == true) {
    inference_delay = 10000000;
    state = INFERENCE_DATA_READY;
}
else {
    inference_delay = 10000000;  // Đặt khoảng thời gian chụp ảnh và đếm lại ở đây
    state = INFERENCE_WAITING;
    ei_printf("Starting inferencing in %d seconds...\n", inference_delay / 1000);
}

if (use_max_uart_speed) {
    ei_printf("OK\r\n");
    ei_sleep(100);
    dev->set_max_data_output_baudrate();
    ei_sleep(100);
}

while(!ei_user_invoke_stop()) {
    // Thêm vòng lặp để đảm bảo luôn chạy quá trình chụp ảnh và đếm số lượng vật thể
    while (state != INFERENCE_STOPPED) {
        ei_run_impulse();
        ei_printf("Countsum: %d\n", countsum);  // In ra biến countsum sau mỗi lần chạy ei_run_impulse
        ei_sleep(1);
    }

    if (!continuous_mode) {
        state = INFERENCE_WAITING;
        ei_printf("Starting inferencing in %d seconds...\n", inference_delay / 1000);
        ei_sleep(inference_delay / 1000);
    }
}

ei_stop_impulse();

if (use_max_uart_speed) {
    ei_printf("\r\nOK\r\n");
    ei_sleep(100);
    dev->set_default_data_output_baudrate();
    ei_sleep(100);
}

}

void ei_stop_impulse(void)
{
state = INFERENCE_STOPPED;
}

bool is_inference_running(void)
{
return (state != INFERENCE_STOPPED);
}

#endif /* defined(EI_CLASSIFIER_SENSOR) && EI_CLASSIFIER_SENSOR == EI_CLASSIFIER_SENSOR_CAMERA */

main.cpp:
/* Edge Impulse ingestion SDK

  • Copyright (c) 2022 EdgeImpulse Inc.
  • Permission is hereby granted, free of charge, to any person obtaining a copy
  • of this software and associated documentation files (the “Software”), to deal
  • in the Software without restriction, including without limitation the rights
  • to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  • copies of the Software, and to permit persons to whom the Software is
  • furnished to do so, subject to the following conditions:
  • The above copyright notice and this permission notice shall be included in
  • all copies or substantial portions of the Software.
  • THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  • IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  • FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  • AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  • LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  • OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  • SOFTWARE.
    */

/* Include ----------------------------------------------------------------- */
#include “driver/gpio.h”
#include “sdkconfig.h”
#include <stdio.h>
#include “ei_device_espressif_esp32.h”
#include “ei_at_handlers.h”
#include “ei_classifier_porting.h”
#include “ei_run_impulse.h”
#include “freertos/FreeRTOS.h”
#include “freertos/task.h”

EiDeviceInfo *EiDevInfo = dynamic_cast<EiDeviceInfo *>(EiDeviceESP32::get_device());
static ATServer *at;

uint32_t elapsed_time = 0;

// Hàm để kiểm tra xem đã đủ 10 giây chưa
bool is_10_seconds_elapsed() {
if (elapsed_time >= 10000) { // 10 giây = 10000 ms
return true;
}
return false;
}

extern “C” void app_main() {
gpio_pad_select_gpio(GPIO_NUM_21);
gpio_reset_pin(GPIO_NUM_21);

gpio_pad_select_gpio(GPIO_NUM_22);
gpio_reset_pin(GPIO_NUM_22);    

gpio_set_direction(GPIO_NUM_21, GPIO_MODE_OUTPUT);
gpio_set_direction(GPIO_NUM_22, GPIO_MODE_OUTPUT);    

EiDeviceESP32* dev = static_cast<EiDeviceESP32*>(EiDeviceESP32::get_device());

ei_printf(
    "Hello DMM\r\n"
    "Compiled on %s %s\r\n",
    __DATE__,
    __TIME__);

at = ei_at_init(dev);
ei_printf("Type AT+HELP to see a list of commands.\r\n");
at->print_prompt();

dev->set_state(eiStateFinished);

while(1) {
    if (is_10_seconds_elapsed()) {
        break;
    }

    // Start Edge Impulse inference
    ei_start_impulse(false, false, false);

    // Check if inference is running and print count
    while (is_inference_running()) {
        ei_run_impulse();
    }

    elapsed_time += 1000; // Cộng thêm 1 giây (1000 ms)
    
    vTaskDelay(1000 / portTICK_PERIOD_MS); 
}

}

AND THIS IS MY OUTPUT:
Starting inferencing in 2 seconds…
Predictions (DSP: 324 ms., Classification: 4 ms., Anomaly: 0 ms.):

  • helloworld: 0.000000*
  • noise: 0.894531*
  • unknown: 0.105469*
    Starting inferencing in 2 seconds…
    Predictions (DSP: 324 ms., Classification: 4 ms., Anomaly: 0 ms.):
  • helloworld: 0.000000*
  • noise: 0.941406*
  • unknown: 0.058594*
    Starting inferencing in 2 seconds…

There are nothing about the taking image process and there are no result of counting objects system?

I want the system to do: taking image => process image and count the objects => print results only.
Please help me!

1 Like

The code posted does count objects. Did you deploy a FOMO Impulse?