Thanks Bio;
Perhaps my question was not clear enough,
I am running an OpenCV 3.0 code (after verifying that an OpenCL code works on my IMX8M machine).
This OpenCV code (attached below) is using UMat instead of Mat which instructs the OpenCV layer to use T-API
and use OpenCL kernels for the implementation of the OpenCV function "goodFeaturesToTrack()".
The OpenCV - OpenCL implementation fires a total amount of work items in a manner that causes the OpenCl layer to violate the CL_DEVICE_MAX_WORK_GROUP_SIZE.
In my platform I found that CL_DEVICE_MAX_WORK_GROUP_SIZE is 1024 in total and on each dimension,
clGetDeviceInfo(devices[j], CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(WorkSize), &WorkSize, NULL);
Now, while running the OpenCV code below, it is implementing the "goodFeaturesToTrack()" function on the OpenCL run-time, and OpenCV tries to queue the following CL kernels with wrong "globalsize" which results OpenCL errors:
OpenCL error CL_INVALID_WORK_GROUP_SIZE (-54) during call:
clEnqueueNDRangeKernel('sobel3', dims=2, globalsize=512x512x1, localsize=16x16x1)
OpenCL error CL_INVALID_WORK_GROUP_SIZE (-54) during call:
clEnqueueNDRangeKernel('MaxEigenVal', dims=1, globalsize=4096x1x1, localsize=1024x1x1)
My original question was:
1. Why OpenCV 3.0 which uses the T-API, is configuring the work items incorrectly so that it violates the CL_DEVICE_MAX_WORK_GROUP_SIZE=1024?
2. How can I set or limit the globalsize/localsize that the OpenCV is using when queuing CL kernels into the openCL run-time environment to prevent this behavior.
Regards,
Guy
My Program Code
==================
#include "opencv2/highgui/highgui.hpp"
#include "opencv2/imgproc/imgproc.hpp"
#include <iostream>
#include <stdio.h>
#include <stdlib.h>
using namespace cv;
using namespace std;
/// Global variables
Mat src, src_gray;
UMat usrc,usrc_gray;
int maxCorners = 500;
int maxTrackbar = 100;
RNG rng(12345);
char* source_window = "Image";
/// Function header
void goodFeaturesToTrack_Demo( int, void* );
/**
* @function main
*/
int main( int argc, char** argv )
{
/// Load source image and convert it to gray
//src = imread( argv[1], 1 );
imread( argv[1], 1 ).copyTo(usrc);
cvtColor( usrc, usrc_gray, CV_BGR2GRAY );
/// Create Window
namedWindow( source_window, CV_WINDOW_AUTOSIZE );
/// Create Trackbar to set the number of corners
createTrackbar( "Max corners:", source_window, &maxCorners, maxTrackbar, goodFeaturesToTrack_Demo );
imshow( source_window, usrc );
goodFeaturesToTrack_Demo( 0, 0 );
waitKey(0);
return(0);
}
/**
* @function goodFeaturesToTrack_Demo.cpp
* @brief Apply Shi-Tomasi corner detector
*/
void goodFeaturesToTrack_Demo( int, void* )
{
if( maxCorners < 1 ) { maxCorners = 1; }
/// Parameters for Shi-Tomasi algorithm
vector<Point2f> corners;
double qualityLevel = 0.01;
double minDistance = 10;
int blockSize = 3;
bool useHarrisDetector = false;
double k = 0.04;
/// Copy the source image
UMat ucopy;
ucopy = usrc.clone();
/// Apply corner detection
goodFeaturesToTrack( usrc_gray,
corners,
maxCorners,
qualityLevel,
minDistance,
Mat(),
blockSize,
useHarrisDetector,
k );
/// Draw corners detected
cout<<"** Number of corners detected: "<<corners.size()<<endl;
int r = 4;
for( int i = 0; i < corners.size(); i++ )
{ circle( ucopy, corners[i], r, Scalar(rng.uniform(0,255), rng.uniform(0,255),
rng.uniform(0,255)), -1, 8, 0 ); }
/// Show what you got
namedWindow( source_window, CV_WINDOW_AUTOSIZE );
imshow( source_window, ucopy );
}