Program freezes at inference start (Edge Impulse + Zephyr + ETHOS_U55)

Hello,

I am working on an Alif Ensemble E7 (Cortex-M55 + Ethos-U55) running Zephyr RTOS, and I am trying to deploy an Edge Impulse model that should run on the Ethos-U55 NPU.

My goal is to offload the inference to the Ethos-U NPU. I was able to build and flash the firmware, and the boot log correctly shows:

  • “Starting EI realtime classifier…”
  • “Ethos-U device is ready”
  • and then just before inference: === start inference ===

But after that, the program never continues: I never see the “after run_classifier” log (=== end inference ===). It doesn’t reboot and doesn’t appear to crash, but no more printf/ei_printf output appears after the call to run_classifier().

main.cpp :

#include <zephyr/kernel.h>
#include <zephyr/device.h>
#include <zephyr/drivers/sensor.h>
#include <zephyr/drivers/gpio.h>
#include <stdio.h>

#include "edge-impulse-sdk/classifier/ei_run_classifier.h"

// LED
#define RED_LED_NODE DT_ALIAS(led0)
static const struct gpio_dt_spec led = GPIO_DT_SPEC_GET(RED_LED_NODE, gpios);

// Accéléromètre
static const struct device *sensor = DEVICE_DT_GET(DT_NODELABEL(lis2dux12_body));


static float features[EI_CLASSIFIER_DSP_INPUT_FRAME_SIZE] = {0};
static size_t feat_index = 0;


static int get_feature_callback(size_t offset, size_t length, float *out_ptr)
{
  static uint32_t n = 0;
  if ((n++ % 50) == 0)
  {
    ei_printf("[cb] off=%u len=%u\n", (unsigned)offset, (unsigned)length);
  }
  memcpy(out_ptr, features + offset, length * sizeof(float));
  return 0;
}

static ei_impulse_result_t g_result;

K_THREAD_STACK_DEFINE(hb_stack, 1024);
static struct k_thread hb_thread;

static void hb_fn(void *, void *, void *)
{
  while (1)
  {
    ei_printf("[hb] t=%llu\n", ei_read_timer_ms());
    k_msleep(1000);
  }
}

auto main() -> int
{

  printf("Starting EI realtime classifier...\n");


  if (!device_is_ready(sensor))
  {
    printf("Accelerometer NOT READY\n");
    return 0;
  }

  // --- LED ---
  if (gpio_is_ready_dt(&led))
  {
    gpio_pin_configure_dt(&led, GPIO_OUTPUT);
  }

  const size_t frame_size = EI_CLASSIFIER_DSP_INPUT_FRAME_SIZE;

  // --- Vérif Ethos-U ---
  const struct device *ethosu = DEVICE_DT_GET(DT_NODELABEL(ethosu0));
  if (!device_is_ready(ethosu))
  {
    printf("Ethos-U device not ready\n");
  }
  else
  {
    printf("Ethos-U device is ready\n");
  }

  while (true)
  {

    gpio_pin_toggle_dt(&led);

    struct sensor_value accel[3];
    int rc = sensor_sample_fetch_chan(sensor, SENSOR_CHAN_ACCEL_XYZ);

    if (rc != 0)
    {
      printf("sensor_sample_fetch_chan error: %d\n", rc);
      k_msleep(10);
      continue;
    }
    rc = sensor_channel_get(sensor, SENSOR_CHAN_ACCEL_XYZ, accel);

    if (rc != 0)
    {
      printf("sensor_channel_get error: %d\n", rc);
      k_msleep(10);
      continue;
    }
    float ax = sensor_value_to_double(&accel[0]);
    float ay = sensor_value_to_double(&accel[1]);
    float az = sensor_value_to_double(&accel[2]);

    // printf("%.3f,%.3f,%.3f\n", ax, ay, az);

    features[feat_index++] = ax;
    features[feat_index++] = ay;
    features[feat_index++] = az;

    if (feat_index >= frame_size)
    {

      feat_index = 0;

      signal_t signal;
      signal.total_length = frame_size;
      signal.get_data = get_feature_callback;

      // uint64_t start = k_uptime_get();
      // ei_printf("Running inference...\n");
      ei_printf("\n=== start inference ===\n");
      EI_IMPULSE_ERROR ei_status = run_classifier(&signal, &g_result, false);
      ei_printf("\n=== end inference ===\n");
      // uint64_t end = k_uptime_get();

      // printf("Inference time: %llu ms\n", end - start);

      if (ei_status != EI_IMPULSE_OK)
      {
        printf("EI classifier error: %d\n", ei_status);
      }
      else
      {

        printf("\n=== PREDICTION ===\n");

        for (size_t i = 0; i < EI_CLASSIFIER_LABEL_COUNT; i++)
        {
          printf("%s : %.3f\n",
                 g_result.classification[i].label,
                 g_result.classification[i].value);
        }

        float best_value = -1.0f;
        const char *best_label = "unknown";

        for (size_t i = 0; i < EI_CLASSIFIER_LABEL_COUNT; i++)
        {
          if (g_result.classification[i].value > best_value)
          {
            best_value = g_result.classification[i].value;
            best_label = g_result.classification[i].label;
          }
        }

        printf("**Detected state : %s (%.3f)\n", best_label, best_value);
        // ei_printf("Arena size: %d bytes\n", EI_CLASSIFIER_TFLITE_LARGEST_ARENA_SIZE);
      }
    }

    k_msleep(EI_CLASSIFIER_INTERVAL_MS);
    //  k_msleep(5); // 200 Hz
  }

  return 0;
}

prj.conf :

# --- SYSTEM ---
CONFIG_CPP=y
CONFIG_STD_CPP17=y
CONFIG_NEWLIB_LIBC=y
CONFIG_NEWLIB_LIBC_FLOAT_PRINTF=y

CONFIG_REQUIRES_FULL_LIBC=y
CONFIG_REQUIRES_FULL_LIBCPP=y
CONFIG_NEWLIB_LIBC_MIN_REQUIRED_HEAP_SIZE=8192

# --- MEMORY ---

CONFIG_MAIN_STACK_SIZE=65536
CONFIG_ISR_STACK_SIZE=8192
CONFIG_HEAP_MEM_POOL_SIZE=131072
CONFIG_SYSTEM_WORKQUEUE_STACK_SIZE=8192


# --- LOGGING ---
CONFIG_LOG=y
CONFIG_LOG_MODE_IMMEDIATE=y

# --- UART ---
CONFIG_SERIAL=y
CONFIG_UART_CONSOLE=y

# --- SENSORS ---
CONFIG_GPIO=y
CONFIG_I2C=y
CONFIG_SENSOR=y
CONFIG_LIS2DUX12=y

# --- ETHOS-U55 (NPU) ---
CONFIG_ARM_ETHOS_U=y
CONFIG_ARM_ETHOS_U55_256=y

CONFIG_CMSIS_DSP=y
# CONFIG_CMSIS_NN=y

CONFIG_FPU=y
CONFIG_FPU_SHARING=y


CONFIG_CMSIS_DSP_STATISTICS=y



CONFIG_CMSIS_DSP_TRANSFORM=y

CONFIG_CMSIS_DSP_MATRIX=y

CONFIG_CMSIS_DSP_FILTERING=y
CONFIG_CMSIS_DSP_DISTANCE=y


CONFIG_ASSERT=y
CONFIG_INIT_STACKS=y
CONFIG_STACK_SENTINEL=y
CONFIG_THREAD_STACK_INFO=y
CONFIG_EXCEPTION_STACK_TRACE=y
CONFIG_FAULT_DUMP=2

CmakeLists.txt :

cmake_minimum_required(VERSION 3.13.1)

set(EXTRA_CONF_FILE ../../base.conf)
set(PROJECT_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../..)

list(APPEND ZEPHYR_EXTRA_MODULES ${PROJECT_DIR}/drivers)

find_package(Zephyr REQUIRED HINTS $ENV{ZEPHYR_BASE})

project(hp_app)

add_compile_definitions(EI_CLASSIFIER_ALLOCATION_STATIC=1)

# --- CONFIGURATION ETHOS-U55 ---
if(ETHOSU_TARGET_NPU_CONFIG MATCHES "^ethos-(u[0-9]+)-([0-9]+)$")
  set(ETHOSU_ARCH "${CMAKE_MATCH_1}")
  set(ETHOSU_MACS "${CMAKE_MATCH_2}")

  add_compile_definitions(ETHOSU_ARCH_U55=1)
  add_compile_definitions(EI_CLASSIFIER_TFLITE_ENABLE_ETHOS_U55=1)

  if(ETHOSU_MACS STREQUAL "256")
    set(CONFIG_ARM_ETHOS_U55_256 y CACHE BOOL "" FORCE)
  elseif(ETHOSU_MACS STREQUAL "128")
    set(CONFIG_ARM_ETHOS_U55_128 y CACHE BOOL "" FORCE)
  endif()
else()
  message(FATAL_ERROR "Missing -DETHOSU_TARGET_NPU_CONFIG=ethos-u55-256")
endif()

# --- DEFINITIONS ---
target_compile_definitions(app PRIVATE
    TF_LITE_STATIC_MEMORY
    EI_CLASSIFIER_SENSOR=EI_CLASSIFIER_SENSOR_UNKNOWN
    
    EIDSP_USE_CMSIS_DSP=1
    EI_CLASSIFIER_TFLITE_ENABLE_CMSIS_NN=0
    EIDSP_LOAD_CMSIS_DSP_SOURCES=0
    
    ARM_MATH_CM55
    ARM_MATH_LOOPUNROLL
)

# --- INCLUSIONS ---
zephyr_include_directories(
  ${CMAKE_CURRENT_SOURCE_DIR}
  ${CMAKE_CURRENT_SOURCE_DIR}/edge-impulse-sdk
  ${CMAKE_CURRENT_SOURCE_DIR}/model-parameters
  ${CMAKE_CURRENT_SOURCE_DIR}/tflite-model
  ${CMAKE_CURRENT_SOURCE_DIR}/edge-impulse-sdk/dsp
  ${CMAKE_CURRENT_SOURCE_DIR}/edge-impulse-sdk/classifier
  ${CMAKE_CURRENT_SOURCE_DIR}/edge-impulse-sdk/third_party/flatbuffers/include
  ${CMAKE_CURRENT_SOURCE_DIR}/edge-impulse-sdk/third_party/gemmlowp
  ${CMAKE_CURRENT_SOURCE_DIR}/edge-impulse-sdk/third_party/ruy
)

# --- SOURCES ---
target_sources(app PRIVATE main.cpp)

# Edge Impulse Core
file(GLOB_RECURSE EI_DSP "${CMAKE_CURRENT_SOURCE_DIR}/edge-impulse-sdk/dsp/*.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/edge-impulse-sdk/dsp/*.c")
file(GLOB_RECURSE EI_CLASSIFIER "${CMAKE_CURRENT_SOURCE_DIR}/edge-impulse-sdk/classifier/*.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/edge-impulse-sdk/classifier/*.c")

# Third Party
file(GLOB_RECURSE EI_THIRD_PARTY 
    "${CMAKE_CURRENT_SOURCE_DIR}/edge-impulse-sdk/third_party/*.cpp" 
    "${CMAKE_CURRENT_SOURCE_DIR}/edge-impulse-sdk/third_party/*.c" 
    "${CMAKE_CURRENT_SOURCE_DIR}/edge-impulse-sdk/third_party/*.cc"
)

# TensorFlow Lite Micro
file(GLOB_RECURSE EI_TENSORFLOW 
    "${CMAKE_CURRENT_SOURCE_DIR}/edge-impulse-sdk/tensorflow/*.cpp" 
    "${CMAKE_CURRENT_SOURCE_DIR}/edge-impulse-sdk/tensorflow/*.c" 
    "${CMAKE_CURRENT_SOURCE_DIR}/edge-impulse-sdk/tensorflow/*.cc"
)

# Model
file(GLOB_RECURSE EI_MODEL 
    "${CMAKE_CURRENT_SOURCE_DIR}/tflite-model/*.cpp" 
    "${CMAKE_CURRENT_SOURCE_DIR}/tflite-model/*.cc"
)
file(GLOB_RECURSE EI_PARAMS 
    "${CMAKE_CURRENT_SOURCE_DIR}/model-parameters/*.cpp"
)

# Porting Layer
target_sources(app PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/edge-impulse-sdk/porting/zephyr/ei_classifier_porting.cpp")

# Assemblage
target_sources(app PRIVATE
    ${EI_DSP}
    ${EI_CLASSIFIER}
    ${EI_THIRD_PARTY}
    ${EI_TENSORFLOW}
    ${EI_MODEL}
    ${EI_PARAMS}
)

# --- OPTIONS ---
target_compile_options(app PRIVATE 
    -flax-vector-conversions 
    -Wno-strict-aliasing 
    -Wno-unused-parameter
    -Wno-deprecated-declarations
    -Wno-format
)

set_property(TARGET app PROPERTY CXX_STANDARD 17)

target_link_libraries(app PRIVATE -lm)

Thank you in advance for your help.

Best regards,

Rizelaine

Hello everyone,

I managed to solve my previous “Bus Fault” / memory corruption crashes by disabling “CONFIG_NEWLIB_LIBC=y” in my prj.conf. It seems it was causing issues with the available memory layout.

However, I am now facing a new error immediately when inference starts:

image

know Error -3 usually points to an Out of Memory issue. Since my model is quite small (reported as ~7.4KB RAM on the dashboard), I tried to increase the Heap size significantly in prj.conf, but the error persists.

Here is my current configuration:

CONFIG_MAIN_STACK_SIZE=16384
CONFIG_HEAP_MEM_POOL_SIZE=524288  
CONFIG_NEWLIB_LIBC_MIN_REQUIRED_HEAP_SIZE=8192

Has anyone encountered this error?

Thanks in advance for your help!

1 Like

Hi @Rizelaine

Seems you can’t allocate tensor_arena.
Can you test calling ei_malloc and ei_free to check fi you can allocate dinamically ?

What deployment did you end to use ? Eon compiled or tflite?

You can also try to define
EI_CLASSIFIER_ALLOCATION_STATIC
and, if you want to place in a specific memory area, something like
EI_TENSOR_ARENA_LOCATION: “.tensor_arena_buf_dtcm”
but this needs the linker script to be modified

See C++ library - Edge Impulse Documentation

fv

1 Like

Thank you for your help!

I am currently using a standard TensorFlow Lite deployment.

By implementing a manual static arena in my main.cpp and overriding ei_malloc, I managed to get past the AllocateTensors() failed (-3) error. It seems the standard heap was indeed too fragmented for the NPU’s requirements.

main.cpp :


#include <zephyr/kernel.h>
#include <zephyr/device.h>
#include <zephyr/cache.h>
#include <zephyr/drivers/sensor.h>
#include <zephyr/drivers/gpio.h>
#include <stdio.h>
#include <stdlib.h>

#include "edge-impulse-sdk/classifier/ei_run_classifier.h"

__aligned(16) static float features[EI_CLASSIFIER_DSP_INPUT_FRAME_SIZE] = {0};

// 2. ARÈNE STATIQUE ALIGNÉE
static uint8_t my_tensor_arena[307200] __aligned(16);

void *ei_malloc(size_t size)
{
  if (size > sizeof(my_tensor_arena))
    return NULL;
  return my_tensor_arena;
}
void ei_free(void *ptr) {}

// // --- FONCTIONS D'ALLOCATION ---

// static uint8_t ei_tensor_arena[262144] __attribute__((aligned(16)));

// static size_t arena_offset = 0;

// void *ei_malloc(size_t size)
// {
//   // Alignement manuel sur 16 octets
//   size_t align_offset = (16 - (arena_offset % 16)) % 16;
//   size_t alloc_size = size + align_offset;

//   // Vérifie si on a encore de la place
//   if (arena_offset + alloc_size > sizeof(ei_tensor_arena))
//   {
//     printk("ERREUR: Arena pleine ! Demande: %d, Dispo: %d\n",
//            (int)size, (int)(sizeof(ei_tensor_arena) - arena_offset));
//     return NULL;
//   }

//   // On donne l'adresse libre
//   void *ptr = &ei_tensor_arena[arena_offset + align_offset];

//   // On avance le pointeur pour la prochaine fois
//   arena_offset += alloc_size;

//   return ptr;
// }

// void *ei_calloc(size_t nitems, size_t size)
// {
//   size_t total_size = nitems * size;
//   void *ptr = ei_malloc(total_size);
//   if (ptr)
//   {
//     memset(ptr, 0, total_size);
//   }
//   return ptr;
// }

// void ei_free(void *ptr)
// {
// }
// // -------------------------------------------------------

// LED
#define RED_LED_NODE DT_ALIAS(led0)
static const struct gpio_dt_spec led = GPIO_DT_SPEC_GET(RED_LED_NODE, gpios);

// Accéléromètre
static const struct device *sensor = DEVICE_DT_GET(DT_NODELABEL(lis2dux12_body));

// Buffer : 100 échantillons * 3 axes = 300 valeurs
// static float features[EI_CLASSIFIER_DSP_INPUT_FRAME_SIZE] = {0};
//__aligned(16) static float features[EI_CLASSIFIER_DSP_INPUT_FRAME_SIZE] = {0};
static size_t feat_index = 0;

// Callback utilisé par Edge Impulse pour lire les données

static int get_feature_callback(size_t offset, size_t length, float *out_ptr)
{
  static uint32_t n = 0;
  if ((n++ % 50) == 0)
  {
    ei_printf("[cb] off=%u len=%u\n", (unsigned)offset, (unsigned)length);
  }
  memcpy(out_ptr, features + offset, length * sizeof(float));
  return 0;
}

static ei_impulse_result_t g_result;

K_THREAD_STACK_DEFINE(hb_stack, 1024);
static struct k_thread hb_thread;

static void hb_fn(void *, void *, void *)
{
  while (1)
  {
    ei_printf("[hb] t=%llu\n", ei_read_timer_ms());
    k_msleep(1000);
  }
}

auto main() -> int
{

  printf("Starting EI realtime classifier...\n");

  // int *toto = (int *)ei_malloc(50);
  // printf("Toto %p\n", toto);

  // --- Vérif capteur ---
  if (!device_is_ready(sensor))
  {
    printf("Accelerometer NOT READY\n");
    return 0;
  }

  // --- LED ---
  if (gpio_is_ready_dt(&led))
  {
    gpio_pin_configure_dt(&led, GPIO_OUTPUT);
  }

  // Une prédiction = 100 échantillons @100 Hz -> 1 seconde de données
  const size_t frame_size = EI_CLASSIFIER_DSP_INPUT_FRAME_SIZE;

  // --- Vérif Ethos-U ---
  const struct device *ethosu = DEVICE_DT_GET(DT_NODELABEL(ethosu0));
  if (!device_is_ready(ethosu))
  {
    printf("Ethos-U device not ready\n");
  }
  else
  {
    printf("Ethos-U device is ready\n");
  }

  while (true)
  {

    gpio_pin_toggle_dt(&led);

    struct sensor_value accel[3];
    int rc = sensor_sample_fetch_chan(sensor, SENSOR_CHAN_ACCEL_XYZ);

    if (rc != 0)
    {
      printf("sensor_sample_fetch_chan error: %d\n", rc);
      k_msleep(10);
      continue;
    }
    rc = sensor_channel_get(sensor, SENSOR_CHAN_ACCEL_XYZ, accel);

    if (rc != 0)
    {
      printf("sensor_channel_get error: %d\n", rc);
      k_msleep(10);
      continue;
    }
    float ax = sensor_value_to_double(&accel[0]);
    float ay = sensor_value_to_double(&accel[1]);
    float az = sensor_value_to_double(&accel[2]);

    // printf("%.3f,%.3f,%.3f\n", ax, ay, az);

    features[feat_index++] = ax;
    features[feat_index++] = ay;
    features[feat_index++] = az;

    // Quand la fenêtre est pleine, on lance le modèle
    if (feat_index >= frame_size)
    {
      // On repars au début pour la prochaine fenêtre
      feat_index = 0;

      // Création du signal attendu par EI
      signal_t signal;
      signal.total_length = frame_size;
      signal.get_data = get_feature_callback;

      // Lancer le modèle
      // EI_IMPULSE_ERROR ei_status = run_classifier(&signal, &g_result, false);

      // uint64_t start = k_uptime_get();
      // ei_printf("Running inference...\n");

      sys_cache_data_flush_all();

      ei_printf("\n=== start inference ===\n");
      EI_IMPULSE_ERROR ei_status = run_classifier(&signal, &g_result, true);
      ei_printf("\n=== end inference ===\n");
      // uint64_t end = k_uptime_get();

      // printf("Inference time: %llu ms\n", end - start);

      if (ei_status != EI_IMPULSE_OK)
      {
        printf("EI classifier error: %d\n", ei_status);
      }
      else
      {

        printf("\n=== PREDICTION ===\n");

        for (size_t i = 0; i < EI_CLASSIFIER_LABEL_COUNT; i++)
        {
          printf("%s : %.3f\n",
                 g_result.classification[i].label,
                 g_result.classification[i].value);
        }

        // === Trouver la classe dominante ===
        float best_value = -1.0f;
        const char *best_label = "unknown";

        for (size_t i = 0; i < EI_CLASSIFIER_LABEL_COUNT; i++)
        {
          if (g_result.classification[i].value > best_value)
          {
            best_value = g_result.classification[i].value;
            best_label = g_result.classification[i].label;
          }
        }

        // printf("**Detected state : %s (%.3f)\n", best_label, best_value);
        // ei_printf("Arena size: %d bytes\n", EI_CLASSIFIER_TFLITE_LARGEST_ARENA_SIZE);
      }
    }

    // Attendre 10 ms pour respecter 100 Hz
    k_msleep(EI_CLASSIFIER_INTERVAL_MS);
    //  k_msleep(5); // 200 Hz
  }

  return 0;
}

prj.conf :

# --- SYSTEM ---
CONFIG_CPP=y
CONFIG_STD_CPP17=y
# CONFIG_NEWLIB_LIBC=y
CONFIG_NEWLIB_LIBC_FLOAT_PRINTF=y

# CONFIG_REQUIRES_FULL_LIBC=y
# CONFIG_REQUIRES_FULL_LIBCPP=y

CONFIG_NEWLIB_LIBC_MIN_REQUIRED_HEAP_SIZE=8192

# --- MEMORY ---

CONFIG_MAIN_STACK_SIZE=16384
# CONFIG_ISR_STACK_SIZE=16384
CONFIG_HEAP_MEM_POOL_SIZE=16384
# CONFIG_SYSTEM_WORKQUEUE_STACK_SIZE=16384

CONFIG_CACHE_MANAGEMENT=y


# --- LOGGING ---
CONFIG_LOG=y
CONFIG_LOG_MODE_IMMEDIATE=y

# --- UART ---
# CONFIG_SERIAL=y
# CONFIG_UART_CONSOLE=y

# --- SENSORS ---
CONFIG_GPIO=y
CONFIG_I2C=y
CONFIG_SENSOR=y
CONFIG_LIS2DUX12=y

# --- ETHOS-U55 (NPU) ---
CONFIG_ARM_ETHOS_U=y
CONFIG_ARM_ETHOS_U55_256=y

#CONFIG_CMSIS_DSP=y
CONFIG_CMSIS_NN=y


# CONFIG_FPU=y
# CONFIG_FPU_SHARING=y

# Active les fonctions de statistiques (Mean, Variance, RMS...)
# Corrige: arm_mean_f32, arm_rms_f32, arm_var_f32
CONFIG_CMSIS_DSP_STATISTICS=y

# Active les fonctions de transformation (FFT)
# Corrige: arm_rfft_fast_init_f32, arm_rfft_fast_f32
CONFIG_CMSIS_DSP_TRANSFORM=y

# Active les fonctions de matrices
# Corrige: arm_mat_trans_f32, arm_mat_scale_f32
CONFIG_CMSIS_DSP_MATRIX=y
# CONFIG_CMSIS_DSP_FASTMATH=y
# CONFIG_CMSIS_DSP_COMPLEXMATH=y

cmakelists.txt :

cmake_minimum_required(VERSION 3.13.1)

set(EXTRA_CONF_FILE ../../base.conf)
set(PROJECT_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../..)

list(APPEND ZEPHYR_EXTRA_MODULES ${PROJECT_DIR}/drivers)

find_package(Zephyr REQUIRED HINTS $ENV{ZEPHYR_BASE})

project(hp_app)

# add_compile_definitions(EI_CLASSIFIER_ALLOCATION_STATIC=1)
# add_compile_definitions(EI_CLASSIFIER_ALLOCATION_STATIC_SIZE=307200)
add_compile_definitions(EI_TENSOR_ARENA_LOCATION=".tensor_arena_buf_dtcm")

# --- CONFIGURATION ETHOS-U55 ---
if(ETHOSU_TARGET_NPU_CONFIG MATCHES "^ethos-(u[0-9]+)-([0-9]+)$")
  set(ETHOSU_ARCH "${CMAKE_MATCH_1}")
  set(ETHOSU_MACS "${CMAKE_MATCH_2}")

  add_compile_definitions(ETHOSU_ARCH_U55=1)
  add_compile_definitions(EI_CLASSIFIER_TFLITE_ENABLE_ETHOS_U55=1)

  if(ETHOSU_MACS STREQUAL "256")
    set(CONFIG_ARM_ETHOS_U55_256 y CACHE BOOL "" FORCE)
  elseif(ETHOSU_MACS STREQUAL "128")
    set(CONFIG_ARM_ETHOS_U55_128 y CACHE BOOL "" FORCE)
  endif()
else()
  message(FATAL_ERROR "Missing -DETHOSU_TARGET_NPU_CONFIG=ethos-u55-256")
endif()

# --- DEFINITIONS ---
target_compile_definitions(app PRIVATE
    # TF_LITE_STATIC_MEMORY
    EI_CLASSIFIER_SENSOR=EI_CLASSIFIER_SENSOR_UNKNOWN
    EIDSP_USE_CMSIS_DSP=1
    EI_CLASSIFIER_TFLITE_ENABLE_CMSIS_NN=0
    EIDSP_LOAD_CMSIS_DSP_SOURCES=0
    ARM_MATH_CM55
    ARM_MATH_LOOPUNROLL
    ETHOSU55
)

# --- INCLUSIONS ---
zephyr_include_directories(
  ${CMAKE_CURRENT_SOURCE_DIR}
  ${CMAKE_CURRENT_SOURCE_DIR}/edge-impulse-sdk
  ${CMAKE_CURRENT_SOURCE_DIR}/model-parameters
  ${CMAKE_CURRENT_SOURCE_DIR}/tflite-model
  ${CMAKE_CURRENT_SOURCE_DIR}/edge-impulse-sdk/dsp
  ${CMAKE_CURRENT_SOURCE_DIR}/edge-impulse-sdk/classifier
  ${CMAKE_CURRENT_SOURCE_DIR}/edge-impulse-sdk/third_party/flatbuffers/include
  ${CMAKE_CURRENT_SOURCE_DIR}/edge-impulse-sdk/third_party/gemmlowp
  ${CMAKE_CURRENT_SOURCE_DIR}/edge-impulse-sdk/third_party/ruy
)

# --- SOURCES ---
target_sources(app PRIVATE main.cpp)

# Edge Impulse Core
file(GLOB_RECURSE EI_DSP "${CMAKE_CURRENT_SOURCE_DIR}/edge-impulse-sdk/dsp/*.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/edge-impulse-sdk/dsp/*.c")
file(GLOB_RECURSE EI_CLASSIFIER "${CMAKE_CURRENT_SOURCE_DIR}/edge-impulse-sdk/classifier/*.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/edge-impulse-sdk/classifier/*.c")

# Third Party
file(GLOB_RECURSE EI_THIRD_PARTY 
    "${CMAKE_CURRENT_SOURCE_DIR}/edge-impulse-sdk/third_party/*.cpp" 
    "${CMAKE_CURRENT_SOURCE_DIR}/edge-impulse-sdk/third_party/*.c" 
    "${CMAKE_CURRENT_SOURCE_DIR}/edge-impulse-sdk/third_party/*.cc"
)

# TensorFlow Lite Micro
file(GLOB_RECURSE EI_TENSORFLOW 
    "${CMAKE_CURRENT_SOURCE_DIR}/edge-impulse-sdk/tensorflow/*.cpp" 
    "${CMAKE_CURRENT_SOURCE_DIR}/edge-impulse-sdk/tensorflow/*.c" 
    "${CMAKE_CURRENT_SOURCE_DIR}/edge-impulse-sdk/tensorflow/*.cc"
)

# Model
file(GLOB_RECURSE EI_MODEL 
    "${CMAKE_CURRENT_SOURCE_DIR}/tflite-model/*.cpp" 
    "${CMAKE_CURRENT_SOURCE_DIR}/tflite-model/*.cc"
)
file(GLOB_RECURSE EI_PARAMS 
    "${CMAKE_CURRENT_SOURCE_DIR}/model-parameters/*.cpp"
)

# Porting Layer
target_sources(app PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/edge-impulse-sdk/porting/zephyr/ei_classifier_porting.cpp")

# Assemblage
target_sources(app PRIVATE
    ${EI_DSP}
    ${EI_CLASSIFIER}
    ${EI_THIRD_PARTY}
    ${EI_TENSORFLOW}
    ${EI_MODEL}
    ${EI_PARAMS}
)

# --- OPTIONS ---
target_compile_options(app PRIVATE 
    -flax-vector-conversions 
    -Wno-strict-aliasing 
    -Wno-unused-parameter
    -Wno-deprecated-declarations
    -Wno-format
    -mfpu=auto
)


set_property(TARGET app PROPERTY CXX_STANDARD 17)

target_link_libraries(app PRIVATE -lm)

The application now crashes immediately after the first inference callback with an Unaligned memory access fault:

I have already made sure that my tensor_arena and my features buffer are aligned using __aligned(16) as required by the Ethos-U. I also tried to perform a cache flush using sys_cache_data_flush_all() before calling run_classifier, but the crash persists.

I am a bit stuck on how to resolve this memory alignment issue on the Cortex-M55. Does this look like a common issue when the arena is placed in standard SRAM instead of DTCM?

Any further advice would be greatly appreciated.

Best regards,

Rizelaine

Hello,

I managed to correct my previous error by adding this “uint8_t tensor_arena[kTensorArenaSize] attribute((aligned(16)));” in main_functions.cpp.
I now have the following error :

Does anyone know what is causing this error and how I can correct it?

Thank you in advance for your help.

Best regards,

Hi @Rizelaine

I will check what kind of error it is, in your code remember to add the code for the ei_free !
regards,
fv

@Rizelaine
I don’t see thie initialization of the NPU, see

You should call ethosu_init

fv

(post deleted by author)