Program freezes at inference start (Edge Impulse + Zephyr + ETHOS_U55)

Rizelaine · December 16, 2025, 1:52pm

Hello,

I am working on an Alif Ensemble E7 (Cortex-M55 + Ethos-U55) running Zephyr RTOS, and I am trying to deploy an Edge Impulse model that should run on the Ethos-U55 NPU.

My goal is to offload the inference to the Ethos-U NPU. I was able to build and flash the firmware, and the boot log correctly shows:

“Starting EI realtime classifier…”
“Ethos-U device is ready”
and then just before inference: === start inference ===

But after that, the program never continues: I never see the “after run_classifier” log (=== end inference ===). It doesn’t reboot and doesn’t appear to crash, but no more printf/ei_printf output appears after the call to run_classifier().

main.cpp :

#include <zephyr/kernel.h>
#include <zephyr/device.h>
#include <zephyr/drivers/sensor.h>
#include <zephyr/drivers/gpio.h>
#include <stdio.h>

#include "edge-impulse-sdk/classifier/ei_run_classifier.h"

// LED
#define RED_LED_NODE DT_ALIAS(led0)
static const struct gpio_dt_spec led = GPIO_DT_SPEC_GET(RED_LED_NODE, gpios);

// Accéléromètre
static const struct device *sensor = DEVICE_DT_GET(DT_NODELABEL(lis2dux12_body));


static float features[EI_CLASSIFIER_DSP_INPUT_FRAME_SIZE] = {0};
static size_t feat_index = 0;


static int get_feature_callback(size_t offset, size_t length, float *out_ptr)
{
  static uint32_t n = 0;
  if ((n++ % 50) == 0)
  {
    ei_printf("[cb] off=%u len=%u\n", (unsigned)offset, (unsigned)length);
  }
  memcpy(out_ptr, features + offset, length * sizeof(float));
  return 0;
}

static ei_impulse_result_t g_result;

K_THREAD_STACK_DEFINE(hb_stack, 1024);
static struct k_thread hb_thread;

static void hb_fn(void *, void *, void *)
{
  while (1)
  {
    ei_printf("[hb] t=%llu\n", ei_read_timer_ms());
    k_msleep(1000);
  }
}

auto main() -> int
{

  printf("Starting EI realtime classifier...\n");


  if (!device_is_ready(sensor))
  {
    printf("Accelerometer NOT READY\n");
    return 0;
  }

  // --- LED ---
  if (gpio_is_ready_dt(&led))
  {
    gpio_pin_configure_dt(&led, GPIO_OUTPUT);
  }

  const size_t frame_size = EI_CLASSIFIER_DSP_INPUT_FRAME_SIZE;

  // --- Vérif Ethos-U ---
  const struct device *ethosu = DEVICE_DT_GET(DT_NODELABEL(ethosu0));
  if (!device_is_ready(ethosu))
  {
    printf("Ethos-U device not ready\n");
  }
  else
  {
    printf("Ethos-U device is ready\n");
  }

  while (true)
  {

    gpio_pin_toggle_dt(&led);

    struct sensor_value accel[3];
    int rc = sensor_sample_fetch_chan(sensor, SENSOR_CHAN_ACCEL_XYZ);

    if (rc != 0)
    {
      printf("sensor_sample_fetch_chan error: %d\n", rc);
      k_msleep(10);
      continue;
    }
    rc = sensor_channel_get(sensor, SENSOR_CHAN_ACCEL_XYZ, accel);

    if (rc != 0)
    {
      printf("sensor_channel_get error: %d\n", rc);
      k_msleep(10);
      continue;
    }
    float ax = sensor_value_to_double(&accel[0]);
    float ay = sensor_value_to_double(&accel[1]);
    float az = sensor_value_to_double(&accel[2]);

    // printf("%.3f,%.3f,%.3f\n", ax, ay, az);

    features[feat_index++] = ax;
    features[feat_index++] = ay;
    features[feat_index++] = az;

    if (feat_index >= frame_size)
    {

      feat_index = 0;

      signal_t signal;
      signal.total_length = frame_size;
      signal.get_data = get_feature_callback;

      // uint64_t start = k_uptime_get();
      // ei_printf("Running inference...\n");
      ei_printf("\n=== start inference ===\n");
      EI_IMPULSE_ERROR ei_status = run_classifier(&signal, &g_result, false);
      ei_printf("\n=== end inference ===\n");
      // uint64_t end = k_uptime_get();

      // printf("Inference time: %llu ms\n", end - start);

      if (ei_status != EI_IMPULSE_OK)
      {
        printf("EI classifier error: %d\n", ei_status);
      }
      else
      {

        printf("\n=== PREDICTION ===\n");

        for (size_t i = 0; i < EI_CLASSIFIER_LABEL_COUNT; i++)
        {
          printf("%s : %.3f\n",
                 g_result.classification[i].label,
                 g_result.classification[i].value);
        }

        float best_value = -1.0f;
        const char *best_label = "unknown";

        for (size_t i = 0; i < EI_CLASSIFIER_LABEL_COUNT; i++)
        {
          if (g_result.classification[i].value > best_value)
          {
            best_value = g_result.classification[i].value;
            best_label = g_result.classification[i].label;
          }
        }

        printf("**Detected state : %s (%.3f)\n", best_label, best_value);
        // ei_printf("Arena size: %d bytes\n", EI_CLASSIFIER_TFLITE_LARGEST_ARENA_SIZE);
      }
    }

    k_msleep(EI_CLASSIFIER_INTERVAL_MS);
    //  k_msleep(5); // 200 Hz
  }

  return 0;
}

prj.conf :

# --- SYSTEM ---
CONFIG_CPP=y
CONFIG_STD_CPP17=y
CONFIG_NEWLIB_LIBC=y
CONFIG_NEWLIB_LIBC_FLOAT_PRINTF=y

CONFIG_REQUIRES_FULL_LIBC=y
CONFIG_REQUIRES_FULL_LIBCPP=y
CONFIG_NEWLIB_LIBC_MIN_REQUIRED_HEAP_SIZE=8192

# --- MEMORY ---

CONFIG_MAIN_STACK_SIZE=65536
CONFIG_ISR_STACK_SIZE=8192
CONFIG_HEAP_MEM_POOL_SIZE=131072
CONFIG_SYSTEM_WORKQUEUE_STACK_SIZE=8192


# --- LOGGING ---
CONFIG_LOG=y
CONFIG_LOG_MODE_IMMEDIATE=y

# --- UART ---
CONFIG_SERIAL=y
CONFIG_UART_CONSOLE=y

# --- SENSORS ---
CONFIG_GPIO=y
CONFIG_I2C=y
CONFIG_SENSOR=y
CONFIG_LIS2DUX12=y

# --- ETHOS-U55 (NPU) ---
CONFIG_ARM_ETHOS_U=y
CONFIG_ARM_ETHOS_U55_256=y

CONFIG_CMSIS_DSP=y
# CONFIG_CMSIS_NN=y

CONFIG_FPU=y
CONFIG_FPU_SHARING=y


CONFIG_CMSIS_DSP_STATISTICS=y



CONFIG_CMSIS_DSP_TRANSFORM=y

CONFIG_CMSIS_DSP_MATRIX=y

CONFIG_CMSIS_DSP_FILTERING=y
CONFIG_CMSIS_DSP_DISTANCE=y


CONFIG_ASSERT=y
CONFIG_INIT_STACKS=y
CONFIG_STACK_SENTINEL=y
CONFIG_THREAD_STACK_INFO=y
CONFIG_EXCEPTION_STACK_TRACE=y
CONFIG_FAULT_DUMP=2

CmakeLists.txt :

cmake_minimum_required(VERSION 3.13.1)

set(EXTRA_CONF_FILE ../../base.conf)
set(PROJECT_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../..)

list(APPEND ZEPHYR_EXTRA_MODULES ${PROJECT_DIR}/drivers)

find_package(Zephyr REQUIRED HINTS $ENV{ZEPHYR_BASE})

project(hp_app)

add_compile_definitions(EI_CLASSIFIER_ALLOCATION_STATIC=1)

# --- CONFIGURATION ETHOS-U55 ---
if(ETHOSU_TARGET_NPU_CONFIG MATCHES "^ethos-(u[0-9]+)-([0-9]+)$")
  set(ETHOSU_ARCH "${CMAKE_MATCH_1}")
  set(ETHOSU_MACS "${CMAKE_MATCH_2}")

  add_compile_definitions(ETHOSU_ARCH_U55=1)
  add_compile_definitions(EI_CLASSIFIER_TFLITE_ENABLE_ETHOS_U55=1)

  if(ETHOSU_MACS STREQUAL "256")
    set(CONFIG_ARM_ETHOS_U55_256 y CACHE BOOL "" FORCE)
  elseif(ETHOSU_MACS STREQUAL "128")
    set(CONFIG_ARM_ETHOS_U55_128 y CACHE BOOL "" FORCE)
  endif()
else()
  message(FATAL_ERROR "Missing -DETHOSU_TARGET_NPU_CONFIG=ethos-u55-256")
endif()

# --- DEFINITIONS ---
target_compile_definitions(app PRIVATE
    TF_LITE_STATIC_MEMORY
    EI_CLASSIFIER_SENSOR=EI_CLASSIFIER_SENSOR_UNKNOWN
    
    EIDSP_USE_CMSIS_DSP=1
    EI_CLASSIFIER_TFLITE_ENABLE_CMSIS_NN=0
    EIDSP_LOAD_CMSIS_DSP_SOURCES=0
    
    ARM_MATH_CM55
    ARM_MATH_LOOPUNROLL
)

# --- INCLUSIONS ---
zephyr_include_directories(
  ${CMAKE_CURRENT_SOURCE_DIR}
  ${CMAKE_CURRENT_SOURCE_DIR}/edge-impulse-sdk
  ${CMAKE_CURRENT_SOURCE_DIR}/model-parameters
  ${CMAKE_CURRENT_SOURCE_DIR}/tflite-model
  ${CMAKE_CURRENT_SOURCE_DIR}/edge-impulse-sdk/dsp
  ${CMAKE_CURRENT_SOURCE_DIR}/edge-impulse-sdk/classifier
  ${CMAKE_CURRENT_SOURCE_DIR}/edge-impulse-sdk/third_party/flatbuffers/include
  ${CMAKE_CURRENT_SOURCE_DIR}/edge-impulse-sdk/third_party/gemmlowp
  ${CMAKE_CURRENT_SOURCE_DIR}/edge-impulse-sdk/third_party/ruy
)

# --- SOURCES ---
target_sources(app PRIVATE main.cpp)

# Edge Impulse Core
file(GLOB_RECURSE EI_DSP "${CMAKE_CURRENT_SOURCE_DIR}/edge-impulse-sdk/dsp/*.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/edge-impulse-sdk/dsp/*.c")
file(GLOB_RECURSE EI_CLASSIFIER "${CMAKE_CURRENT_SOURCE_DIR}/edge-impulse-sdk/classifier/*.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/edge-impulse-sdk/classifier/*.c")

# Third Party
file(GLOB_RECURSE EI_THIRD_PARTY 
    "${CMAKE_CURRENT_SOURCE_DIR}/edge-impulse-sdk/third_party/*.cpp" 
    "${CMAKE_CURRENT_SOURCE_DIR}/edge-impulse-sdk/third_party/*.c" 
    "${CMAKE_CURRENT_SOURCE_DIR}/edge-impulse-sdk/third_party/*.cc"
)

# TensorFlow Lite Micro
file(GLOB_RECURSE EI_TENSORFLOW 
    "${CMAKE_CURRENT_SOURCE_DIR}/edge-impulse-sdk/tensorflow/*.cpp" 
    "${CMAKE_CURRENT_SOURCE_DIR}/edge-impulse-sdk/tensorflow/*.c" 
    "${CMAKE_CURRENT_SOURCE_DIR}/edge-impulse-sdk/tensorflow/*.cc"
)

# Model
file(GLOB_RECURSE EI_MODEL 
    "${CMAKE_CURRENT_SOURCE_DIR}/tflite-model/*.cpp" 
    "${CMAKE_CURRENT_SOURCE_DIR}/tflite-model/*.cc"
)
file(GLOB_RECURSE EI_PARAMS 
    "${CMAKE_CURRENT_SOURCE_DIR}/model-parameters/*.cpp"
)

# Porting Layer
target_sources(app PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/edge-impulse-sdk/porting/zephyr/ei_classifier_porting.cpp")

# Assemblage
target_sources(app PRIVATE
    ${EI_DSP}
    ${EI_CLASSIFIER}
    ${EI_THIRD_PARTY}
    ${EI_TENSORFLOW}
    ${EI_MODEL}
    ${EI_PARAMS}
)

# --- OPTIONS ---
target_compile_options(app PRIVATE 
    -flax-vector-conversions 
    -Wno-strict-aliasing 
    -Wno-unused-parameter
    -Wno-deprecated-declarations
    -Wno-format
)

set_property(TARGET app PROPERTY CXX_STANDARD 17)

target_link_libraries(app PRIVATE -lm)

Thank you in advance for your help.

Best regards,

Rizelaine

Rizelaine · December 18, 2025, 2:09pm

Hello everyone,

I managed to solve my previous “Bus Fault” / memory corruption crashes by disabling “CONFIG_NEWLIB_LIBC=y” in my prj.conf. It seems it was causing issues with the available memory layout.

However, I am now facing a new error immediately when inference starts:

know Error -3 usually points to an Out of Memory issue. Since my model is quite small (reported as ~7.4KB RAM on the dashboard), I tried to increase the Heap size significantly in prj.conf, but the error persists.

Here is my current configuration:

CONFIG_MAIN_STACK_SIZE=16384
CONFIG_HEAP_MEM_POOL_SIZE=524288  
CONFIG_NEWLIB_LIBC_MIN_REQUIRED_HEAP_SIZE=8192

Has anyone encountered this error?

Thanks in advance for your help!

ei_francesco · December 18, 2025, 2:43pm

Hi @Rizelaine

Seems you can’t allocate tensor_arena.
Can you test calling ei_malloc and ei_free to check fi you can allocate dinamically ?

What deployment did you end to use ? Eon compiled or tflite?

You can also try to define
EI_CLASSIFIER_ALLOCATION_STATIC
and, if you want to place in a specific memory area, something like
EI_TENSOR_ARENA_LOCATION: “.tensor_arena_buf_dtcm”
but this needs the linker script to be modified

See C++ library - Edge Impulse Documentation

fv

Rizelaine · December 18, 2025, 4:16pm

Thank you for your help!

I am currently using a standard TensorFlow Lite deployment.

By implementing a manual static arena in my main.cpp and overriding ei_malloc, I managed to get past the AllocateTensors() failed (-3) error. It seems the standard heap was indeed too fragmented for the NPU’s requirements.

main.cpp :


#include <zephyr/kernel.h>
#include <zephyr/device.h>
#include <zephyr/cache.h>
#include <zephyr/drivers/sensor.h>
#include <zephyr/drivers/gpio.h>
#include <stdio.h>
#include <stdlib.h>

#include "edge-impulse-sdk/classifier/ei_run_classifier.h"

__aligned(16) static float features[EI_CLASSIFIER_DSP_INPUT_FRAME_SIZE] = {0};

// 2. ARÈNE STATIQUE ALIGNÉE
static uint8_t my_tensor_arena[307200] __aligned(16);

void *ei_malloc(size_t size)
{
  if (size > sizeof(my_tensor_arena))
    return NULL;
  return my_tensor_arena;
}
void ei_free(void *ptr) {}

// // --- FONCTIONS D'ALLOCATION ---

// static uint8_t ei_tensor_arena[262144] __attribute__((aligned(16)));

// static size_t arena_offset = 0;

// void *ei_malloc(size_t size)
// {
//   // Alignement manuel sur 16 octets
//   size_t align_offset = (16 - (arena_offset % 16)) % 16;
//   size_t alloc_size = size + align_offset;

//   // Vérifie si on a encore de la place
//   if (arena_offset + alloc_size > sizeof(ei_tensor_arena))
//   {
//     printk("ERREUR: Arena pleine ! Demande: %d, Dispo: %d\n",
//            (int)size, (int)(sizeof(ei_tensor_arena) - arena_offset));
//     return NULL;
//   }

//   // On donne l'adresse libre
//   void *ptr = &ei_tensor_arena[arena_offset + align_offset];

//   // On avance le pointeur pour la prochaine fois
//   arena_offset += alloc_size;

//   return ptr;
// }

// void *ei_calloc(size_t nitems, size_t size)
// {
//   size_t total_size = nitems * size;
//   void *ptr = ei_malloc(total_size);
//   if (ptr)
//   {
//     memset(ptr, 0, total_size);
//   }
//   return ptr;
// }

// void ei_free(void *ptr)
// {
// }
// // -------------------------------------------------------

// LED
#define RED_LED_NODE DT_ALIAS(led0)
static const struct gpio_dt_spec led = GPIO_DT_SPEC_GET(RED_LED_NODE, gpios);

// Accéléromètre
static const struct device *sensor = DEVICE_DT_GET(DT_NODELABEL(lis2dux12_body));

// Buffer : 100 échantillons * 3 axes = 300 valeurs
// static float features[EI_CLASSIFIER_DSP_INPUT_FRAME_SIZE] = {0};
//__aligned(16) static float features[EI_CLASSIFIER_DSP_INPUT_FRAME_SIZE] = {0};
static size_t feat_index = 0;

// Callback utilisé par Edge Impulse pour lire les données

static int get_feature_callback(size_t offset, size_t length, float *out_ptr)
{
  static uint32_t n = 0;
  if ((n++ % 50) == 0)
  {
    ei_printf("[cb] off=%u len=%u\n", (unsigned)offset, (unsigned)length);
  }
  memcpy(out_ptr, features + offset, length * sizeof(float));
  return 0;
}

static ei_impulse_result_t g_result;

K_THREAD_STACK_DEFINE(hb_stack, 1024);
static struct k_thread hb_thread;

static void hb_fn(void *, void *, void *)
{
  while (1)
  {
    ei_printf("[hb] t=%llu\n", ei_read_timer_ms());
    k_msleep(1000);
  }
}

auto main() -> int
{

  printf("Starting EI realtime classifier...\n");

  // int *toto = (int *)ei_malloc(50);
  // printf("Toto %p\n", toto);

  // --- Vérif capteur ---
  if (!device_is_ready(sensor))
  {
    printf("Accelerometer NOT READY\n");
    return 0;
  }

  // --- LED ---
  if (gpio_is_ready_dt(&led))
  {
    gpio_pin_configure_dt(&led, GPIO_OUTPUT);
  }

  // Une prédiction = 100 échantillons @100 Hz -> 1 seconde de données
  const size_t frame_size = EI_CLASSIFIER_DSP_INPUT_FRAME_SIZE;

  // --- Vérif Ethos-U ---
  const struct device *ethosu = DEVICE_DT_GET(DT_NODELABEL(ethosu0));
  if (!device_is_ready(ethosu))
  {
    printf("Ethos-U device not ready\n");
  }
  else
  {
    printf("Ethos-U device is ready\n");
  }

  while (true)
  {

    gpio_pin_toggle_dt(&led);

    struct sensor_value accel[3];
    int rc = sensor_sample_fetch_chan(sensor, SENSOR_CHAN_ACCEL_XYZ);

    if (rc != 0)
    {
      printf("sensor_sample_fetch_chan error: %d\n", rc);
      k_msleep(10);
      continue;
    }
    rc = sensor_channel_get(sensor, SENSOR_CHAN_ACCEL_XYZ, accel);

    if (rc != 0)
    {
      printf("sensor_channel_get error: %d\n", rc);
      k_msleep(10);
      continue;
    }
    float ax = sensor_value_to_double(&accel[0]);
    float ay = sensor_value_to_double(&accel[1]);
    float az = sensor_value_to_double(&accel[2]);

    // printf("%.3f,%.3f,%.3f\n", ax, ay, az);

    features[feat_index++] = ax;
    features[feat_index++] = ay;
    features[feat_index++] = az;

    // Quand la fenêtre est pleine, on lance le modèle
    if (feat_index >= frame_size)
    {
      // On repars au début pour la prochaine fenêtre
      feat_index = 0;

      // Création du signal attendu par EI
      signal_t signal;
      signal.total_length = frame_size;
      signal.get_data = get_feature_callback;

      // Lancer le modèle
      // EI_IMPULSE_ERROR ei_status = run_classifier(&signal, &g_result, false);

      // uint64_t start = k_uptime_get();
      // ei_printf("Running inference...\n");

      sys_cache_data_flush_all();

      ei_printf("\n=== start inference ===\n");
      EI_IMPULSE_ERROR ei_status = run_classifier(&signal, &g_result, true);
      ei_printf("\n=== end inference ===\n");
      // uint64_t end = k_uptime_get();

      // printf("Inference time: %llu ms\n", end - start);

      if (ei_status != EI_IMPULSE_OK)
      {
        printf("EI classifier error: %d\n", ei_status);
      }
      else
      {

        printf("\n=== PREDICTION ===\n");

        for (size_t i = 0; i < EI_CLASSIFIER_LABEL_COUNT; i++)
        {
          printf("%s : %.3f\n",
                 g_result.classification[i].label,
                 g_result.classification[i].value);
        }

        // === Trouver la classe dominante ===
        float best_value = -1.0f;
        const char *best_label = "unknown";

        for (size_t i = 0; i < EI_CLASSIFIER_LABEL_COUNT; i++)
        {
          if (g_result.classification[i].value > best_value)
          {
            best_value = g_result.classification[i].value;
            best_label = g_result.classification[i].label;
          }
        }

        // printf("**Detected state : %s (%.3f)\n", best_label, best_value);
        // ei_printf("Arena size: %d bytes\n", EI_CLASSIFIER_TFLITE_LARGEST_ARENA_SIZE);
      }
    }

    // Attendre 10 ms pour respecter 100 Hz
    k_msleep(EI_CLASSIFIER_INTERVAL_MS);
    //  k_msleep(5); // 200 Hz
  }

  return 0;
}

prj.conf :

# --- SYSTEM ---
CONFIG_CPP=y
CONFIG_STD_CPP17=y
# CONFIG_NEWLIB_LIBC=y
CONFIG_NEWLIB_LIBC_FLOAT_PRINTF=y

# CONFIG_REQUIRES_FULL_LIBC=y
# CONFIG_REQUIRES_FULL_LIBCPP=y

CONFIG_NEWLIB_LIBC_MIN_REQUIRED_HEAP_SIZE=8192

# --- MEMORY ---

CONFIG_MAIN_STACK_SIZE=16384
# CONFIG_ISR_STACK_SIZE=16384
CONFIG_HEAP_MEM_POOL_SIZE=16384
# CONFIG_SYSTEM_WORKQUEUE_STACK_SIZE=16384

CONFIG_CACHE_MANAGEMENT=y


# --- LOGGING ---
CONFIG_LOG=y
CONFIG_LOG_MODE_IMMEDIATE=y

# --- UART ---
# CONFIG_SERIAL=y
# CONFIG_UART_CONSOLE=y

# --- SENSORS ---
CONFIG_GPIO=y
CONFIG_I2C=y
CONFIG_SENSOR=y
CONFIG_LIS2DUX12=y

# --- ETHOS-U55 (NPU) ---
CONFIG_ARM_ETHOS_U=y
CONFIG_ARM_ETHOS_U55_256=y

#CONFIG_CMSIS_DSP=y
CONFIG_CMSIS_NN=y


# CONFIG_FPU=y
# CONFIG_FPU_SHARING=y

# Active les fonctions de statistiques (Mean, Variance, RMS...)
# Corrige: arm_mean_f32, arm_rms_f32, arm_var_f32
CONFIG_CMSIS_DSP_STATISTICS=y

# Active les fonctions de transformation (FFT)
# Corrige: arm_rfft_fast_init_f32, arm_rfft_fast_f32
CONFIG_CMSIS_DSP_TRANSFORM=y

# Active les fonctions de matrices
# Corrige: arm_mat_trans_f32, arm_mat_scale_f32
CONFIG_CMSIS_DSP_MATRIX=y
# CONFIG_CMSIS_DSP_FASTMATH=y
# CONFIG_CMSIS_DSP_COMPLEXMATH=y

cmakelists.txt :

cmake_minimum_required(VERSION 3.13.1)

set(EXTRA_CONF_FILE ../../base.conf)
set(PROJECT_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../..)

list(APPEND ZEPHYR_EXTRA_MODULES ${PROJECT_DIR}/drivers)

find_package(Zephyr REQUIRED HINTS $ENV{ZEPHYR_BASE})

project(hp_app)

# add_compile_definitions(EI_CLASSIFIER_ALLOCATION_STATIC=1)
# add_compile_definitions(EI_CLASSIFIER_ALLOCATION_STATIC_SIZE=307200)
add_compile_definitions(EI_TENSOR_ARENA_LOCATION=".tensor_arena_buf_dtcm")

# --- CONFIGURATION ETHOS-U55 ---
if(ETHOSU_TARGET_NPU_CONFIG MATCHES "^ethos-(u[0-9]+)-([0-9]+)$")
  set(ETHOSU_ARCH "${CMAKE_MATCH_1}")
  set(ETHOSU_MACS "${CMAKE_MATCH_2}")

  add_compile_definitions(ETHOSU_ARCH_U55=1)
  add_compile_definitions(EI_CLASSIFIER_TFLITE_ENABLE_ETHOS_U55=1)

  if(ETHOSU_MACS STREQUAL "256")
    set(CONFIG_ARM_ETHOS_U55_256 y CACHE BOOL "" FORCE)
  elseif(ETHOSU_MACS STREQUAL "128")
    set(CONFIG_ARM_ETHOS_U55_128 y CACHE BOOL "" FORCE)
  endif()
else()
  message(FATAL_ERROR "Missing -DETHOSU_TARGET_NPU_CONFIG=ethos-u55-256")
endif()

# --- DEFINITIONS ---
target_compile_definitions(app PRIVATE
    # TF_LITE_STATIC_MEMORY
    EI_CLASSIFIER_SENSOR=EI_CLASSIFIER_SENSOR_UNKNOWN
    EIDSP_USE_CMSIS_DSP=1
    EI_CLASSIFIER_TFLITE_ENABLE_CMSIS_NN=0
    EIDSP_LOAD_CMSIS_DSP_SOURCES=0
    ARM_MATH_CM55
    ARM_MATH_LOOPUNROLL
    ETHOSU55
)

# --- INCLUSIONS ---
zephyr_include_directories(
  ${CMAKE_CURRENT_SOURCE_DIR}
  ${CMAKE_CURRENT_SOURCE_DIR}/edge-impulse-sdk
  ${CMAKE_CURRENT_SOURCE_DIR}/model-parameters
  ${CMAKE_CURRENT_SOURCE_DIR}/tflite-model
  ${CMAKE_CURRENT_SOURCE_DIR}/edge-impulse-sdk/dsp
  ${CMAKE_CURRENT_SOURCE_DIR}/edge-impulse-sdk/classifier
  ${CMAKE_CURRENT_SOURCE_DIR}/edge-impulse-sdk/third_party/flatbuffers/include
  ${CMAKE_CURRENT_SOURCE_DIR}/edge-impulse-sdk/third_party/gemmlowp
  ${CMAKE_CURRENT_SOURCE_DIR}/edge-impulse-sdk/third_party/ruy
)

# --- SOURCES ---
target_sources(app PRIVATE main.cpp)

# Edge Impulse Core
file(GLOB_RECURSE EI_DSP "${CMAKE_CURRENT_SOURCE_DIR}/edge-impulse-sdk/dsp/*.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/edge-impulse-sdk/dsp/*.c")
file(GLOB_RECURSE EI_CLASSIFIER "${CMAKE_CURRENT_SOURCE_DIR}/edge-impulse-sdk/classifier/*.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/edge-impulse-sdk/classifier/*.c")

# Third Party
file(GLOB_RECURSE EI_THIRD_PARTY 
    "${CMAKE_CURRENT_SOURCE_DIR}/edge-impulse-sdk/third_party/*.cpp" 
    "${CMAKE_CURRENT_SOURCE_DIR}/edge-impulse-sdk/third_party/*.c" 
    "${CMAKE_CURRENT_SOURCE_DIR}/edge-impulse-sdk/third_party/*.cc"
)

# TensorFlow Lite Micro
file(GLOB_RECURSE EI_TENSORFLOW 
    "${CMAKE_CURRENT_SOURCE_DIR}/edge-impulse-sdk/tensorflow/*.cpp" 
    "${CMAKE_CURRENT_SOURCE_DIR}/edge-impulse-sdk/tensorflow/*.c" 
    "${CMAKE_CURRENT_SOURCE_DIR}/edge-impulse-sdk/tensorflow/*.cc"
)

# Model
file(GLOB_RECURSE EI_MODEL 
    "${CMAKE_CURRENT_SOURCE_DIR}/tflite-model/*.cpp" 
    "${CMAKE_CURRENT_SOURCE_DIR}/tflite-model/*.cc"
)
file(GLOB_RECURSE EI_PARAMS 
    "${CMAKE_CURRENT_SOURCE_DIR}/model-parameters/*.cpp"
)

# Porting Layer
target_sources(app PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/edge-impulse-sdk/porting/zephyr/ei_classifier_porting.cpp")

# Assemblage
target_sources(app PRIVATE
    ${EI_DSP}
    ${EI_CLASSIFIER}
    ${EI_THIRD_PARTY}
    ${EI_TENSORFLOW}
    ${EI_MODEL}
    ${EI_PARAMS}
)

# --- OPTIONS ---
target_compile_options(app PRIVATE 
    -flax-vector-conversions 
    -Wno-strict-aliasing 
    -Wno-unused-parameter
    -Wno-deprecated-declarations
    -Wno-format
    -mfpu=auto
)


set_property(TARGET app PROPERTY CXX_STANDARD 17)

target_link_libraries(app PRIVATE -lm)

The application now crashes immediately after the first inference callback with an Unaligned memory access fault:

I have already made sure that my tensor_arena and my features buffer are aligned using __aligned(16) as required by the Ethos-U. I also tried to perform a cache flush using sys_cache_data_flush_all() before calling run_classifier, but the crash persists.

I am a bit stuck on how to resolve this memory alignment issue on the Cortex-M55. Does this look like a common issue when the arena is placed in standard SRAM instead of DTCM?

Any further advice would be greatly appreciated.

Best regards,

Rizelaine

Rizelaine · December 23, 2025, 10:41am

Hello,

I managed to correct my previous error by adding this “uint8_t tensor_arena[kTensorArenaSize] attribute((aligned(16)));” in main_functions.cpp.
I now have the following error :

Does anyone know what is causing this error and how I can correct it?

Thank you in advance for your help.

Best regards,

ei_francesco · December 30, 2025, 10:44am

Hi @Rizelaine

I will check what kind of error it is, in your code remember to add the code for the ei_free !
regards,
fv

ei_francesco · December 30, 2025, 2:59pm

@Rizelaine
I don’t see thie initialization of the NPU, see

github.com

edgeimpulse/firmware-alif-csolution/blob/main/app/npu/npu_handler.cpp

/* The Clear BSD License
 *
 * Copyright (c) 2025 EdgeImpulse Inc.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted (subject to the limitations in the disclaimer
 * below) provided that the following conditions are met:
 *
 *   * Redistributions of source code must retain the above copyright notice,
 *   this list of conditions and the following disclaimer.
 *
 *   * Redistributions in binary form must reproduce the above copyright
 *   notice, this list of conditions and the following disclaimer in the
 *   documentation and/or other materials provided with the distribution.
 *
 *   * Neither the name of the copyright holder nor the names of its
 *   contributors may be used to endorse or promote products derived from this
 *   software without specific prior written permission.
 *

This file has been truncated. show original

You should call ethosu_init

fv

ei_francesco · January 12, 2026, 2:25pm

Hi @Rizelaine
were you able to run inference ?

We recently added the Zephyr deployment for Ethos, let me know if you find it useful.

fv

Rizelaine · January 12, 2026, 4:26pm

Hi Francesco! Yes, I was finally able to run the inference successfully.

To make it work, I had to manually fine-tune the Ethos-U core driver integration within the CMake build system. Specifically, I:

Mapped the log levels between Zephyr and the Ethos-U driver to ensure better debugging.
Configured the NPU memory regions (QCONFIG and REGIONCFG) specifically for the Alif Ensemble family.
Optimized the AXI bus transactions by adjusting the burst lengths (max beats) and memory attributes for the U55/U85 architectures to ensure stable data transit between the NPU and the SRAM.

Best regards

ei_francesco · January 12, 2026, 6:44pm

That’s great to hear!