Hi,
I'm using yocto warrior-fsl-4.19.35-mx8mq-v1.0 from:
https://github.com/varigit/variscite-bsp-platform.git
My OpenCV version is 4.0.1
Here is the OpenCL info:

I don't have any patches for GPU.
GPU memory is 256MB.
Code example:
int testUMAT ()
{
int counter = 100;
bool isImshow = true;
std::chrono::steady_clock::time_point begin;
std::chrono::steady_clock::time_point end;
cv::Mat testMat (768,1024,CV_8UC1 );
cv::Mat testNuc (768,1024,CV_8UC1 );
// Defining GPU matrices
cv::UMat testMatGpu , testNucGpu, testMatTarget;
// Randomizing image
cv::randu(testMat, 0, (int)pow(2, 8));
cv::randu(testNuc, 0, (int)pow(2, 8));
testMat.copyTo(testMatGpu);
testNuc.copyTo(testNucGpu);
auto start = chrono::high_resolution_clock::now();
for(int i=0;i<counter;i++)
{
cv::multiply(testMatGpu, testNucGpu, testNucGpu);
}
auto end = chrono::high_resolution_clock::now();
auto duration = chrono::duration_cast<chrono::microseconds>(end - start);
cout << "End test:" << duration.count() / (1000.0 * counter) << endl;
return 0;
}
Thanks!