Assistance Required: Movenet Pose estimation Code for NPU in imx8mp

adarshkv · ‎02-25-2025

Dear NXP support,

I’m trying to run movenet model (https://github.com/NXP/eiq-model-zoo/tree/main/tasks/vision/pose-estimation/movenet) on the i.MX 8M Plus NPU using libvx_delegate.so, but I’m hitting unsupported ops and a segmentation fault. Here’s the output:

Loading NPU delegate from /usr/lib/libvx_delegate.so
Vx delegate: allowed_builtin_code set to 0.
Vx delegate: error_during_init set to 0.
Vx delegate: error_during_prepare set to 0.
Vx delegate: error_during_invoke set to 0.
ERROR: Fallback unsupported op 53 to TfLite
ERROR: Fallback unsupported op 90 to TfLite
ERROR: Fallback unsupported op 53 to TfLite
ERROR: Fallback unsupported op 53 to TfLite
ERROR: Fallback unsupported op 88 to TfLite
ERROR: Fallback unsupported op 90 to TfLite
ERROR: Fallback unsupported op 53 to TfLite
ERROR: Fallback unsupported op 53 to TfLite
ERROR: Fallback unsupported op 88 to TfLite
Segmentation fault

I’ve attached my Python script (example_npu.py) and movenet model. It works on CPU but crashes with the NPU.

Could you provide an updated code example for running movenet.tflite (or a similar model) on the NPU , addressing these errors? Assistance with model optimization would also help.

import cv2

import tensorflow as tf

import numpy as np

import time

MODEL_FILENAME = "movenet_opt.tflite" # Use optimized model

IMAGE_FILENAME = "example_input.jpg"

LINE_COLOR = (255, 128, 0)

POINT_COLOR = (0, 0, 255)

DELEGATE_PATH = "/usr/lib/libvx_delegate.so"

keypoints_def = [...] # Your keypoints_def and connections unchanged

# Load delegate with debugging

delegate_options = {"logging_level": "DEBUG"} # Adjust based on NXP docs

print(f"Loading NPU delegate from {DELEGATE_PATH}")

try:

npu_delegate = tf.lite.experimental.load_delegate(DELEGATE_PATH, delegate_options)

except ValueError as e:

print(f"Failed to load delegate: {e}")

exit(1)

# Initialize interpreter

interpreter = tf.lite.Interpreter(model_path=MODEL_FILENAME, experimental_delegates=[npu_delegate])

try:

interpreter.allocate_tensors()

except RuntimeError as e:

print(f"Failed to allocate tensors: {e}")

exit(1)

def load_image(filename

orig_image = cv2.imread(filename, 1)

if orig_image is None:

raise ValueError(f"Failed to load image: {filename}")

image = cv2.cvtColor(orig_image, cv2.COLOR_BGR2RGB)

image = cv2.resize(image, (192, 192))

image = image[..., ::-1]

image = np.expand_dims(image, axis=0)

return orig_image, image

def run_inference(interpreter, image

input_details = interpreter.get_input_details()

output_details = interpreter.get_output_details()

interpreter.set_tensor(input_details[0]['index'], image.astype(np.uint8))

interpreter.invoke()

return interpreter.get_tensor(output_details[0]['index']).astype(np.float32)

# Load image

try:

orig_image, processed_image = load_image(IMAGE_FILENAME)

except ValueError as e:

print(e)

exit(1)

# Run inference

start = time.time()

out = run_inference(interpreter, processed_image)[0, 0, ...]

end = time.time()

print("Inference time:", (end - start) * 1000, "ms")

# Draw results

w, h = orig_image.shape[0:2]

out[:, 0] *= w

out[:, 1] *= h

for c in connections:

i, j = c

cv2.line(orig_image, (int(out[i, 1]), int(out[i, 0])), (int(out[j, 1]), int(out[j, 0])), LINE_COLOR, 5)

for i in range(out.shape[0]):

cv2.circle(orig_image, (int(out[i, 1]), int(out[i, 0])), 5, POINT_COLOR, 10)

cv2.imwrite("example_output.jpg", orig_image)

cv2.imshow('Pose Estimation', orig_image)

cv2.waitKey(0)

cv2.destroyAllWindows()