Hello @danielchen
The BSP version is 6.12.20.
I checked in /proc/version:
Linux version 6.12.20-lts-next-gdfaf2136deb2 (oe-user@oe-host) (aarch64-poky-linux-gcc (GCC) 14.2.0, GNU ld (GNU Binutils) 2.44) #1 SMP PREEMPT Wed Jun 4 10:15:09 UTC 2025
I am using the IMX.95 EVK.
I used the same tool to convert the mobilenet.tflite file. When converting the model using the MCU_SDK_25.09.00+Linux_6.12.34_2.1.0 converter the output looks more usefull even if it not the same as with the CPU Model. However when I use the Linux_6.6.3_1.0.0 the same issue happens as with the custom model. The output changes
NPU (Linux_6.6.3_1.0.0):
— Inference Results —
Inference Time (average): 0.50 ms
Model Output: (1, 1001)
[[0 0 0 … 0 0 0]]
Non-zero class scores:
All class scores are zero!
NPU (MCU_SDK_25.09.00+Linux_6.12.34_2.1.0):
— Inference Results —
Inference Time (average): 2.00 ms
Model Output: (1, 1001)
[[0 6 0 … 0 0 0]]
Non-zero class scores:
Class ID: 1 Score: 6
Class ID: 114 Score: 6
Class ID: 117 Score: 6
Class ID: 118 Score: 6
Class ID: 123 Score: 6
Class ID: 125 Score: 6
Class ID: 242 Score: 6
Class ID: 245 Score: 6
Class ID: 246 Score: 6
Class ID: 250 Score: 6
Class ID: 251 Score: 6
Class ID: 253 Score: 6
Class ID: 256 Score: 6
Class ID: 257 Score: 6
Class ID: 370 Score: 6
Class ID: 373 Score: 6
Class ID: 374 Score: 6
Class ID: 379 Score: 6
Class ID: 381 Score: 6
Class ID: 457 Score: 6
Class ID: 498 Score: 6
Class ID: 501 Score: 6
Class ID: 502 Score: 6
Class ID: 507 Score: 6
Class ID: 508 Score: 5
Class ID: 509 Score: 6
Class ID: 513 Score: 6
Class ID: 514 Score: 6
Class ID: 626 Score: 6
Class ID: 629 Score: 6
Class ID: 630 Score: 6
Class ID: 635 Score: 6
Class ID: 637 Score: 6
Class ID: 709 Score: 3
Class ID: 754 Score: 6
Class ID: 757 Score: 6
Class ID: 758 Score: 6
Class ID: 763 Score: 6
Class ID: 765 Score: 6
Class ID: 882 Score: 6
Class ID: 885 Score: 6
Class ID: 886 Score: 6
Class ID: 890 Score: 6
Class ID: 891 Score: 6
Class ID: 893 Score: 2
Predicted class ID: 1
Predicted class score: 6
Min score: 0 Max score: 6
CPU:
— Inference Results —
Inference Time (average): 116.31 ms
Model Output: (1, 1001)
[[0 0 0 … 0 0 0]]
Non-zero class scores:
Class ID: 439 Score: 18
Class ID: 441 Score: 1
Class ID: 454 Score: 1
Class ID: 496 Score: 1
Class ID: 505 Score: 3
Class ID: 506 Score: 7
Class ID: 527 Score: 2
Class ID: 528 Score: 1
Class ID: 544 Score: 1
Class ID: 549 Score: 1
Class ID: 551 Score: 1
Class ID: 554 Score: 1
Class ID: 573 Score: 15
Class ID: 599 Score: 2
Class ID: 605 Score: 112
Class ID: 620 Score: 11
Class ID: 627 Score: 1
Class ID: 630 Score: 1
Class ID: 645 Score: 1
Class ID: 665 Score: 2
Class ID: 697 Score: 6
Class ID: 712 Score: 13
Class ID: 726 Score: 1
Class ID: 738 Score: 2
Class ID: 744 Score: 1
Class ID: 774 Score: 3
Class ID: 783 Score: 8
Class ID: 800 Score: 1
Class ID: 805 Score: 2
Class ID: 847 Score: 5
Class ID: 852 Score: 3
Class ID: 895 Score: 1
Class ID: 899 Score: 3
Class ID: 900 Score: 5
Class ID: 906 Score: 3
Class ID: 908 Score: 1
Class ID: 967 Score: 3
Predicted class ID: 605
Predicted class score: 112
Min score: 0 Max score: 112
Python Code Inference:
import time
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
from tensorflow.lite.python.interpreter import Interpreter
from tensorflow.lite.python.interpreter import load_delegate
from tqdm import tqdm
MODEL_PATH_NPU = ‘./mobilenet_v1_1.0_224_quant_IMX_95.tflite’
MODEL_PATH_CPU = ‘./mobilenet_v1_1.0_224_quant.tflite’
IMAGE_PATH = ‘./train.jpg’
USE_NPU_DELEGATE = True
NPU_DELEGATE_PATH = ‘/usr/lib/liblitert_neutron_delegate.so’
INFERENCE_WARMUP_STEPS = 10
INFERENCE_STEPS = 10
class TFLiteModel:
def __init__(self, model_path, use_npu_delegate, npu_delegate_path):
if use_npu_delegate:
ext_delegate = [load_delegate(npu_delegate_path)]
self.interpreter = Interpreter(model_path=model_path, experimental_delegates=ext_delegate)
else:
self.interpreter = Interpreter(model_path=model_path)
self.interpreter.allocate_tensors()
self.input_details = self.interpreter.get_input_details()
self.output_details = self.interpreter.get_output_details()
def load_image(self, image_path, input_mean, input_std):
image_height = self.input_details[0]['shape'][1]
image_width = self.input_details[0]['shape'][2]
img = Image.open(image_path).resize((image_width, image_height))
img_array = np.array(img)
input_data = np.expand_dims(img_array, axis=0)
return input_data
def infer(self, input_tensor):
self.interpreter.set_tensor(self.input_details[0]['index'], input_tensor)
self.interpreter.invoke()
output_data = self.interpreter.get_tensor(self.output_details[0]['index'])
return output_data
if __name__ == ‘__main__':
if not USE_NPU_DELEGATE:
MODEL_PATH = MODEL_PATH_CPU
else:
MODEL_PATH = MODEL_PATH_NPU
tf_lite_model = TFLiteModel(MODEL_PATH, USE_NPU_DELEGATE, NPU_DELEGATE_PATH)
input_tensor = tf_lite_model.load_image(IMAGE_PATH, input_mean=[0.485, 0.456, 0.406], input_std=[0.229, 0.224, 0.225])
#Warm up
for _ in tqdm(range(INFERENCE_WARMUP_STEPS), desc="Warming up",total=INFERENCE_WARMUP_STEPS):
tf_lite_model.infer(input_tensor)
# Actual inference
average_inference_time = 0
for _ in tqdm(range(INFERENCE_STEPS), desc="Inference", total=INFERENCE_STEPS):
start_time = time.time()
output_data = tf_lite_model.infer(input_tensor)
stop_time = time.time()
average_inference_time += (stop_time - start_time) * 1000
average_inference_time /= INFERENCE_STEPS
print('\n--- Inference Results ---')
print(f'Inference Time (average): {average_inference_time:.2f} ms')
print(f'Model Output: {output_data.shape} \n {output_data}')
# List all None-zero class scores
print('Non-zero class scores:')
number_of_zero_scores = 0
for class_id, score in enumerate(output_data[0]):
if np.abs(score) > 1e-6:
print(f' Class ID: {class_id} Score: {score}')
else:
number_of_zero_scores += 1
if number_of_zero_scores == len(output_data[0]):
print(' All class scores are zero!')
else:
id = np.argmax(output_data)
print(f'\nPredicted class ID: {id}')
print(f'Predicted class score: {output_data[0][id]}')
print(f'Min score: {np.min(output_data)} Max score: {np.max(output_data)}')
print('-------------------------\n')
# Plot a heat map with the logits for each class
class_width_pixels = 100
plt.figure(figsize=(20, 4))
plt.imshow(output_data, aspect='auto', cmap='viridis', extent=[0, output_data.shape[1]*class_width_pixels, 0, 1])
plt.colorbar()
plt.xlabel('Class ID')
plt.title('Model Output Scores per Class')
plt.savefig(f'model_output_scores_{"NPU" if USE_NPU_DELEGATE else "CPU"}.png')
plt.close()```