TfLite NPU 実行エラー op_layout_inference.cc:MapAxis:177 マップ軸が失敗しました みなさん ごめんなさい。私は、Pythonから変換されたプログラムである独自のC ++プログラムを構築して、私のボードimx8m plusの人々を検出するための簡単なモデルを実行しようとしました。 コード: 1. main.cpp // main.cpp
#include "detector.h"
#include
int main(int argc, char* argv[]) {
if (argc != 2) {
std::cerr << "Usage: " << argv[0] << " " << std::endl;
return 1;
}
std::string image_path = argv[1];
std::string model_path = "model.tflite";
std::string delegate_path = "/usr/lib/libvx_delegate.so";
cv::Size input_size(192, 192);
float score_th = 0.5;
float nms_th = 0.4;
Detector detector(model_path, delegate_path, input_size, score_th, nms_th);
if (!detector.init_model()) {
return 1;
}
cv::Mat image = cv::imread(image_path);
if (image.empty()) {
std::cerr << "Failed to load image from " << image_path << std::endl;
return 1;
}
auto [bboxes, scores] = detector.detect(image);
for (size_t i = 0; i < bboxes.size(); ++i) {
cv::rectangle(image, bboxes[i], cv::Scalar(0, 255, 0), 2);
std::cout << "Detected bbox: " << bboxes[i] << " with score: " << scores[i] << std::endl;
}
// cv::imshow("Detections", image);
cv::waitKey(0);
return 0;
} 2. 検出器.h // detector.h
#ifndef DETECTOR_H
#define DETECTOR_H
#include
#include
#include
#include
#include
#include
#include
#include "delegate_main.h"
class Detector {
public:
Detector(const std::string& model_path,
const std::string& delegate_path,
const cv::Size& input_shape,
float score_th,
float nms_th);
bool init_model();
std::pair<:vector><:rect>, std::vector > detect(const cv::Mat& image);
private:
std::string model_path_;
std::string delegate_path_;
cv::Size input_shape_;
float score_th_;
float nms_th_;
std::unique_ptr<:interpreter> interpreter_;
std::pair<:mat> preprocess(const cv::Mat& image, const cv::Size& input_size);
std::tuple<:vector><:rect>, std::vector , std::vector > postprocess(cv::Mat& outputs,
const cv::Size& img_size,
float ratio,
float score_th,
float nms_th);
void meshgrid(const cv::Range& x_range, const cv::Range& y_range, cv::Mat& xv, cv::Mat& yv);
std::tuple<:vector><:rect>, std::vector , std::vector > nms(const std::vector<:rect>& bboxes,
const std::vector & scores,
float score_th,
float nms_th);
};
#endif // DETECTOR_H 3. detector.cpp // detector.cpp
#include "detector.h"
#include
Detector::Detector(const std::string& model_path,
const std::string& delegate_path,
const cv::Size& input_shape,
float score_th,
float nms_th)
: model_path_(model_path),
delegate_path_(delegate_path),
input_shape_(input_shape),
score_th_(score_th),
nms_th_(nms_th) {}
bool Detector::init_model() {
auto model = tflite::FlatBufferModel::BuildFromFile(model_path_.c_str());
if (!model) {
std::cerr << "Failed to load model from " << model_path_ << std::endl;
return false;
}
auto ext_delegate_option = TfLiteExternalDelegateOptionsDefault(delegate_path_.c_str());
auto ext_delegate_ptr = TfLiteExternalDelegateCreate(&ext_delegate_option);
if (!ext_delegate_ptr) {
std::cerr << "Failed to create external delegate" << std::endl;
return false;
}
tflite::ops::builtin::BuiltinOpResolver resolver;
resolver.AddCustom(kNbgCustomOp, tflite::ops::custom::Register_VSI_NPU_PRECOMPILED());
tflite::InterpreterBuilder builder(*model, resolver);
builder(&interpreter_);
if (!interpreter_) {
std::cerr << "Failed to build interpreter" << std::endl;
return false;
}
interpreter_->ModifyGraphWithDelegate(ext_delegate_ptr);
if (interpreter_->AllocateTensors() != kTfLiteOk) {
std::cerr << "Failed to allocate tensors" << std::endl;
return false;
}
return true;
}
std::pair<:mat> Detector::preprocess(const cv::Mat& image, const cv::Size& input_size) {
float ratio = std::min(static_cast (input_size.width) / image.cols,
static_cast (input_size.height) / image.rows);
cv::Size new_size(static_cast (image.cols * ratio), static_cast (image.rows * ratio));
cv::Mat resized_image;
cv::resize(image, resized_image, new_size, 0, 0, cv::INTER_LINEAR);
cv::Mat padded_image = cv::Mat::ones(input_size, CV_8UC3) * 114;
resized_image.copyTo(padded_image(cv::Rect(0, 0, resized_image.cols, resized_image.rows)));
std::vector<:mat> channels(3);
cv::split(padded_image, channels);
cv::Mat chw_image(3, input_size.height * input_size.width, CV_32F);
for(int i = 0; i < 3; ++i) {
channels[i].convertTo(channels[i], CV_32F);
std::memcpy(chw_image.ptr (i), channels[i].data, channels[i].total() * sizeof(float));
}
cv::Mat reshaped_image = chw_image.reshape(1, {1, 3, input_size.height, input_size.width});
return std::make_pair(reshaped_image, ratio);
}
std::tuple<:vector><:rect>, std::vector , std::vector > Detector::postprocess(cv::Mat& outputs,
const cv::Size& img_size,
float ratio,
float score_th,
float nms_th) {
std::vector<:rect> bboxes;
std::vector scores;
std::vector class_ids;
std::vector strides = {8, 16, 32};
std::vector<:mat> grids;
std::vector<:mat> expanded_strides;
for (int stride : strides) {
int hsize = img_size.height / stride;
int wsize = img_size.width / stride;
cv::Mat xv, yv;
meshgrid(cv::Range(0, wsize - 1), cv::Range(0, hsize - 1), xv, yv);
cv::Mat grid;
cv::hconcat(xv.reshape(1, 1), yv.reshape(1, 1), grid);
grids.push_back(grid.reshape(2, 1));
expanded_strides.push_back(cv::Mat(grid.size(), CV_32F, cv::Scalar(stride)));
}
cv::Mat grid_cat, stride_cat;
cv::vconcat(grids, grid_cat);
cv::vconcat(expanded_strides, stride_cat);
outputs.colRange(2, 4).convertTo(outputs.colRange(2, 4), CV_32F);
cv::Mat exp_colRange(outputs.colRange(2, 4).size(), CV_32F);
cv::exp(outputs.colRange(2, 4), exp_colRange);
outputs.colRange(0, 2) = (outputs.colRange(0, 2) + grid_cat) * stride_cat;
outputs.colRange(2, 4) = exp_colRange.mul(stride_cat);
cv::Mat predictions = outputs.row(0);
cv::Mat bboxes_mat = predictions.colRange(0, 4);
cv::Mat scores_mat = predictions.col(4).mul(predictions.colRange(5, predictions.cols));
scores.assign(scores_mat.begin (), scores_mat.end ());
std::vector<:rect> bboxes_xyxy(bboxes_mat.rows);
for (int i = 0; i < bboxes_mat.rows; ++i) {
float x_center = bboxes_mat.at (i, 0);
float y_center = bboxes_mat.at (i, 1);
float width = bboxes_mat.at (i, 2);
float height = bboxes_mat.at (i, 3);
float x_min = x_center - width / 2.0;
float y_min = y_center - height / 2.0;
float x_max = x_center + width / 2.0;
float y_max = y_center + height / 2.0;
bboxes_xyxy[i] = cv::Rect(cv::Point(x_min / ratio, y_min / ratio), cv::Point(x_max / ratio, y_max / ratio));
}
return nms(bboxes_xyxy, scores, score_th, nms_th);
}
void Detector::meshgrid(const cv::Range& x_range, const cv::Range& y_range, cv::Mat& xv, cv::Mat& yv) {
cv::Mat x_coords = cv::Mat(x_range.size(), 1, CV_32F);
cv::Mat y_coords = cv::Mat(y_range.size(), 1, CV_32F);
for (int i = 0; i < x_range.size(); ++i) {
x_coords.at (i,0) = x_range.start + i;
}
for (int i = 0; i < y_range.size(); ++i) {
y_coords.at (i,0) = y_range.start + i;
}
cv::repeat(x_coords, 1, y_range.size(), xv);
cv::repeat(y_coords.t(), x_range.size(), 1, yv);
}
std::tuple<:vector><:rect>, std::vector , std::vector > Detector::nms(const std::vector<:rect>& bboxes,
const std::vector & scores,
float score_th,
float nms_th) {
std::vector<:rect> bboxes_filtered;
std::vector scores_filtered;
std::vector class_ids_filtered;
std::vector indices;
cv::dnn::NMSBoxes(bboxes, scores, score_th, nms_th, indices);
for(int idx : indices) {
bboxes_filtered.push_back(bboxes[idx]);
scores_filtered.push_back(scores[idx]);
class_ids_filtered.push_back(0);
}
return std::make_tuple(bboxes_filtered, scores_filtered, class_ids_filtered);
}
std::pair<:vector><:rect>, std::vector > Detector::detect(const cv::Mat& image) {
cv::Mat temp_image = image.clone();
auto [preprocessed_image, ratio] = preprocess(temp_image, input_shape_);
std::cout << "Preprocess Completed"<<:endl>tensor(interpreter_->inputs()[0]);
const uint input_width = input_data->dims->data[3];
const uint input_height = input_data->dims->data[2];
const uint input_channels = input_data->dims->data[1];
const uint batch_size = input_data->dims->data[0];
std::cout << "Expected dimension: "<< batch_size << "x" << input_channels << "x" << input_height << "x" << input_width << std::endl;
const uint image_width = preprocessed_image.size[3];
const uint image_height = preprocessed_image.size[2];
const uint image_channels = preprocessed_image.size[1];
const uint image_batch_size = preprocessed_image.size[0];
std::cout << "Image dimension: "<< image_batch_size << "x" << image_channels << "x" << image_height << "x" << image_width << std::endl;
if(input_data->type !=kTfLiteFloat32){
std::cerr << "input tensor is not of type float" << std::endl;
return std::make_pair(std::vector<:rect>(), std::vector ());
}
if(input_data->data.f == nullptr) {
std::cerr << "input tensor data pointer is null" << std::endl;
return std::make_pair(std::vector<:rect>(), std::vector ());
}
std::memcpy(input_data->data.f, preprocessed_image.ptr (0), batch_size * input_width * input_height * input_channels * sizeof(float));
if(memcmp(input_data->data.f, preprocessed_image.ptr (0),batch_size * input_width * input_height * input_channels * sizeof(float)) != 0){
std::cerr << "data copy to input tensor failed" << std::endl;
return std::make_pair(std::vector<:rect>(), std::vector ());
}
else{
std::cout << "Set up Input Tensor Completed"<<:endl>Invoke();
std::cout << "Inference Completed"<<:endl>typed_output_tensor (0);
size_t output_size = interpreter_->tensor(interpreter_->outputs()[0])->bytes / sizeof(float);
cv::Mat results(1, output_size, CV_32F, output_tensor);
std::cout << "Get Results Completed"<<:endl> result_rect_list;
for (size_t i = 0; i < bboxes_xyxy.size(); ++i) {
result_rect_list.push_back(bboxes_xyxy[i]);
}
// Returning the list of rectangles and the associated scores
return {result_rect_list, scores};
} 私のボードイメージはNanbield 6.6.3_1.0.0フルイメージです VX DelegateとNPUを使用して実行しようとしましたが、コードを実行すると問題が発生しました root@imx8mpevk:/run/media/SD CARD-sda1/test_npu# ./detector_app lena_color_512.tif
INFO: Vx delegate: allowed_cache_mode set to 0.
INFO: Vx delegate: device num set to 0.
INFO: Vx delegate: allowed_builtin_code set to 0.
INFO: Vx delegate: error_during_init set to 0.
INFO: Vx delegate: error_during_prepare set to 0.
INFO: Vx delegate: error_during_invoke set to 0.
Preprocess Completed
Expected dimension: 1x3x192x192
Image dimension: 1x3x192x192
Set up Input Tensor Completed
E [/usr/src/debug/tim-vx/1.1.88-r[ 126.612163] audit: type=1701 audit(1695250801.923:18): auid=4294967295 uid=0 gid=0 ses=4294967295 pid=1270 comm="detector_app" exe=2F72756E2F6D656469612F534420434152442D736461312F746573745F6E70752F6465746563746F725F617070 sig=6 res=1
0/src/tim/transform/ops/op_layout_inference.cc:MapAxis:177]Map axis failed.
detector_app: /usr/src/debug/tim-vx/1.1.88-r0/src/tim/transform/ops/op_layout_inference.cc:178: uint32_t tim::transform::OpLayoutInfer::MapAxis(const std::vector &, uint32_t): Assertion `false' failed.
Aborted (core dumped) また、gdb debugを実行しようとしましたが、次のようなものが返されます。 (gdb) set args lena_color_512.tif
(gdb) run
Starting program: /run/media/SD CARD-sda1/test_npu/detector_app lena_color_512.tif
[Thread debugging using libthread_db enabled]
Using host libthread_db library "/usr/lib/libthread_db.so.1".
INFO: Vx delegate: allowed_cache_mode set to 0.
INFO: Vx delegate: device num set to 0.
INFO: Vx delegate: allowed_builtin_code set to 0.
INFO: Vx delegate: error_during_init set to 0.
INFO: Vx delegate: error_during_prepare set to 0.
INFO: Vx delegate: error_during_invoke set to 0.
Preprocess Completed
Expected dimension: 1x3x192x192
Image dimension: 1x3x192x192
Set up Input Tensor Completed
[New Thread 0xfffff146cf00 (LWP 1660)]
E [/usr/src/debug/tim-vx/1.1.88-r0/src/tim/transform/ops/op_layout_inference.cc:MapAxis:177]Map axis failed.
detector_app: /usr/src/debug/tim-vx/1.1.88-r0/src/tim/transform/ops/op_layout_inference.cc:178: uint32_t tim::transform::OpLayoutInfer::MapAxis(const std::vector &, uint32_t): Assertion `false' failed.
Thread 1 "detector_app" received signal SIGABRT, Aborted.
__pthread_kill_implementation (threadid= , signo=signo@entry=6, no_tid=no_tid@entry=0) at pthread_kill.c:44
44 pthread_kill.c: No such file or directory.
(gdb) bt
#0 __pthread_kill_implementation (threadid= , signo=signo@entry=6, no_tid=no_tid@entry=0) at pthread_kill.c:44
#1 0x0000fffff69c0568 in __pthread_kill_internal (signo=6, threadid= ) at pthread_kill.c:78
#2 0x0000fffff697acd0 in __GI_raise (sig=sig@entry=6) at /usr/src/debug/glibc/2.38+git-r0/sysdeps/posix/raise.c:26
#3 0x0000fffff6966ef0 in __GI_abort () at abort.c:79
#4 0x0000fffff69743f8 in __assert_fail_base (fmt=0xfffff6a8a8e8 "%s%s%s:%u: %s%sAssertion `%s' failed.\n%n", assertion=assertion@entry=0xfffff1ffdcf0 "false",
file=file@entry=0xfffff1fff568 "/usr/src/debug/tim-vx/1.1.88-r0/src/tim/transform/ops/op_layout_inference.cc", line=line@entry=178,
function=function@entry=0xfffff1fff5d8 "uint32_t tim::transform::OpLayoutInfer::MapAxis(const std::vector &, uint32_t)") at assert.c:92
#5 0x0000fffff6974470 in __assert_fail (assertion=0xfffff1ffdcf0 "false", file=0xfffff1fff568 "/usr/src/debug/tim-vx/1.1.88-r0/src/tim/transform/ops/op_layout_inference.cc", line=178,
function=0xfffff1fff5d8 "uint32_t tim::transform::OpLayoutInfer::MapAxis(const std::vector &, uint32_t)") at assert.c:101
#6 0x0000fffff1fa5f74 in tim::transform::OpLayoutInfer::MapAxis(std::vector > const&, unsigned int) () from /usr/lib/libtim-vx.so
#7 0x0000fffff1f6a1b0 in ?? () from /usr/lib/libtim-vx.so
#8 0x0000fffff1f4e5f4 in tim::transform::layout_inference_impl::HandleLayoutInfer(std::shared_ptr<:transform::layout_inference_impl::layoutinfercontext>&, std::shared_ptr<:vx::operation> const&) () from /usr/lib/libtim-vx.so
#9 0x0000fffff1f531f4 in tim::transform::LayoutInference(std::shared_ptr<:vx::graph> const&, std::shared_ptr<:vx::context>&, std::map<:shared_ptr><:vx::tensor>, std::shared_ptr<:transform::ipermutevector>, std::less<:shared_ptr><:vx::tensor> >, std::allocator<:pair><:shared_ptr><:vx::tensor> const, std::shared_ptr<:transform::ipermutevector> > > >) () from /usr/lib/libtim-vx.so
#10 0x0000fffff23d85ac in vx::delegate::Delegate::Invoke(vx::delegate::OpData const&, TfLiteContext*, TfLiteNode*) () from /usr/lib/libvx_delegate.so
#11 0x0000fffff7be9d9c in tflite::Subgraph::InvokeImpl() () from /usr/lib/libtensorflow-lite.so.2.14.0
#12 0x0000fffff7bea388 in tflite::Subgraph::Invoke() () from /usr/lib/libtensorflow-lite.so.2.14.0
#13 0x0000fffff7bd440c in tflite::impl::Interpreter::Invoke() () from /usr/lib/libtensorflow-lite.so.2.14.0
#14 0x0000aaaaaaaa62e0 in Detector::detect (this=this@entry=0xfffffffff890, image=...)
at /home/ubuntu/imx-yocto-bsp/sdk/sysroots/armv8a-poky-linux/usr/include/c++/13.2.0/bits/unique_ptr.h:199
#15 0x0000aaaaaaaa35b0 in main (argc= , argv= ) at /home/ubuntu/imx-yocto-bsp/tflite_test/build_minim/main.cpp:29 誰かが何が間違っているのか手がかりを持っていますか?ここで何が起こったのかわからないからです。しかし、私が知っているのは、op_layout_inference.cc:MapAxis:177 Map axisのアサーションがアサーションエラー(? よろしくお願いいたします i.MX 8ファミリ | i.MX 8QuadMax (8QM) | 8QuadPlus Re:TfLite NPU実行エラー op_layout_inference.cc:MapAxis:177マップ軸が失敗しました 問題の原因を見つけました。どうやらこの行がエラーの原因となったようです。 resolver.AddCustom(kNbgCustomOp, tflite::ops::custom::Register_VSI_NPU_PRECOMPILED()); だから今のところ、私はそれを無効にするだけで、魔法のように機能します。誰かがなぜそれがエラーを引き起こすのかを説明できるかもしれませんが、今のところ、私はついに私のアプリ開発を続けることができます。 モデルについては、Pythonコードを使用してチェックしますが、エラーは発生していないようですので、モデル自体はNPUの実行と互換性があります。 ありがとうございます Re:TfLite NPU実行エラー op_layout_inference.cc:MapAxis:177マップ軸が失敗しました モデルが NPU/VX デリゲート実行と互換性がない可能性はありますか? bc CPUで実行しようとすると、別のエラーが発生しました(StridedSliceレイヤーの1つに関連していますが、CPUの実行についてまだ適切にチェックしていません) Re:TfLite NPU実行エラー op_layout_inference.cc:MapAxis:177マップ軸が失敗しました Hello,
あなたが知っている限り、ゼロ引数コンストラクタを呼び出すことを不可能にしたという主張は、プライベートであるため、呼び出しが発生した場合、そのアサーションはエラーごとに違反されていると言うためにそこにあるように見えます。
よろしくお願いします。
View full article