AnsweredAssumed Answered

vectors adding error by opencl on i.mx6d

Question asked by zheng fudi on Aug 10, 2016
Latest reply on Aug 10, 2016 by igorpadykov

Hi all,

I am learning opencl on i.mx6d, I wrote kernel code like this:

__kernel void helloworld ( __global uchar *input1,

__global uchar *input2,

__global uchar *output,

int width,

int height,

)

{

    int y = get_global_id (0);

    int x = get_global_id (1);

    int id = (y * width) + x;

 

    output[id] = input1[id];    // ok

    // output[id] = input2[id];    // ok

    // output[id] = input1[id] + input2[id];    // error

    // output[id] = 0x01;    // error

}

 

and host code like this:

#include <stdio.h>

#include <stdlib.h>

#include <assert.h>

#include "../include/CL/cl.h"

 

#define FSLCL_ERROR -1

#define FSLCL_SUCCESS CL_SUCCESS

 

#define MAX_SOURCE_SIZE (0x100000)

 

cl_mem buffer_input = NULL;

cl_mem buffer_input2 = NULL;

cl_mem buffer_output = NULL;

cl_kernel buffer_kernel = NULL;

size_t buffer_size = 0;

int buffer_width = 1024;

int buffer_height = 1024;

cl_platform_id platform_id;

cl_device_id device_id;

cl_context context;

cl_command_queue cq;

cl_program program;

cl_kernel kernel;

 

struct fsl_kernel_src_str

{

    char *src;

    long size;

};

 

typedef struct fsl_kernel_src_str fsl_kernel_src;

 

cl_int FSLCL_LoadKernelSource (char *filename, fsl_kernel_src *kernel)

{

    FILE *fp = NULL;

    fp = fopen (filename, "rb");

 

    if (fp == NULL)

    {

        printf ("\nFailed to open: %s\n", filename);

        return FSLCL_ERROR;

    }

    fseek (fp, 0, SEEK_END);

    kernel->size = ftell (fp);

    rewind (fp);

    kernel->src = (char *) malloc (10 + sizeof (char) * kernel->size);

 

    if (! kernel->src)

    {

        printf ("\nError Allocating memory to load CL program");

        return FSLCL_ERROR;

    }

 

    kernel->size = fread (kernel->src, 1, MAX_SOURCE_SIZE, fp);

    if (kernel->size < 1) {

        printf("read file(%s) fail, ret=%d.\n", filename, kernel->size);

    }

 

    kernel->src[kernel->size] = '\0';

 

    fclose (fp);

    fp = NULL;

 

    printf("close fp.\n");

 

    return FSLCL_SUCCESS;

}

 

int main (int argc, char **argv)

{

    int dimension = 2;

    size_t global[2] = {buffer_width, buffer_height};

    size_t local[2] = {4, 16};

    int size_2d = buffer_width * buffer_height;

    cl_int ret;

    cl_int platforms;

    char **data;

    char **data2;

    char **data0;

    int i, j;

 

    data0= (char **) malloc (buffer_width * sizeof (char *));

    data = (char **) malloc (buffer_width * sizeof (char *));

    data2 = (char **) malloc (buffer_width * sizeof (char *));

 

    if(NULL == data0 || NULL == data || NULL == data2) {

        printf("malloc1 err: %x, %x, %x\n", data0, data, data2);

        return -1;

    }

 

    for (i = 0; i < buffer_width; i++)

    {

        data0[i] = (char *) malloc (buffer_height * sizeof (char));

        data[i] = (char *) malloc (buffer_height * sizeof (char));

       

        data2[i] = (char *) malloc (buffer_height * sizeof (char));

 

        if(NULL == data0[i] || NULL == data[i] || NULL == data2[i]) {

            printf("malloc2 err in i%d: %x, %x, %x\n", i, data0[i], data[i], data2[i]);

            return -1;

        }       

    }

 

    for (i = 0; i < buffer_width; i++)

    {

        for (j = 0; j < buffer_height; j++)

        {

            data0[i][j] = 0;

            data[i][j] = 0;

            data2[i][j] = 0;

        }

    }

 

    ret = clGetPlatformIDs (1, &platform_id, &platforms );

    printf("clGetPlatformIDs ret = %d\n", ret);

    assert (ret == CL_SUCCESS);

 

    cl_int devices;

    ret = clGetDeviceIDs (platform_id,CL_DEVICE_TYPE_GPU,1,&device_id, &devices);

    assert (ret == CL_SUCCESS);

 

    cl_context_properties properties[] = {CL_CONTEXT_PLATFORM, (cl_context_properties)platform_id, 0};

    context = clCreateContext(properties, devices, &device_id, NULL,NULL,&ret);

    assert (ret == CL_SUCCESS);

    printf("clCreateContext ret =%d\n", ret);

 

    cq = clCreateCommandQueue(context, device_id, 0, &ret);

    assert (ret == CL_SUCCESS);

    printf("clCreateCommandQueue ret =%d\n", ret);

 

    fsl_kernel_src app_kernel;

    ret = FSLCL_LoadKernelSource ((char *)"helloworld.cl", &app_kernel);

    printf("FSLCL_LoadKernelSource ret =%d\n", ret);

 

    // Submit the source code of the kernel to OpenCL

    program = clCreateProgramWithSource (context, 1, (const char **)&app_kernel.src, 0,&ret);

    printf("clCreateProgramWithSource ret =%d\n", ret);

 

    // and compile it (after this we could extract the compiled version)

    if (ret == CL_SUCCESS)

        ret = clBuildProgram (program, 1, device_id, NULL, NULL, NULL);

 

    if (ret < 0)

    {

        printf ("Failed\n");

        printf ("\nReturn: %d\n", ret);

        clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, app_kernel.size, app_kernel.src, NULL);

        printf ("\n%s", app_kernel.src);

    }

    assert(ret == CL_SUCCESS);

 

    buffer_input = clCreateBuffer (context, CL_MEM_READ_ONLY, size_2d, NULL, &ret);

    printf("clCreateBuffer for input ret =%d\n", ret);

    assert (ret == CL_SUCCESS);

    buffer_input2 = clCreateBuffer (context, CL_MEM_READ_ONLY, size_2d, NULL, &ret);

    printf("clCreateBuffer for input2 ret =%d\n", ret);

    assert (ret == CL_SUCCESS);

 

    buffer_output = clCreateBuffer (context, CL_MEM_WRITE_ONLY , size_2d, NULL, &ret);

    assert (ret == CL_SUCCESS);

 

    // get a handle and map parameters for the kernel

    kernel = clCreateKernel(program, "helloworld", &ret);

    printf("clCreateKernel ret =%d\n", ret);

    usleep(100*1000);

    assert (ret == CL_SUCCESS);

    clSetKernelArg (kernel, 0, sizeof(cl_mem), &buffer_input);

    clSetKernelArg (kernel, 1, sizeof(cl_mem), &buffer_input2);

    clSetKernelArg (kernel, 2, sizeof(cl_mem), &buffer_output);

    clSetKernelArg (kernel, 3, sizeof(int), &buffer_width);

    clSetKernelArg (kernel, 4, sizeof(int), &buffer_height);

 

    for (i = 0; i < buffer_width; i++)

    {

        for (j = 0; j < buffer_height; j++)

        {

            data0[i][j] = rand () % 10;

 

            data[i][j] = rand () % 10;

        }

    }

 

    ret = clEnqueueWriteBuffer (cq, buffer_input, CL_TRUE, 0, size_2d, data0, 0, NULL, NULL);

    printf("clEnqueueWriteBuffer input1 ret =%d\n", ret);

    assert (ret == CL_SUCCESS);

 

    ret = clEnqueueWriteBuffer (cq, buffer_input2, CL_TRUE, 0, size_2d, data, 0, NULL, NULL);

    printf("clEnqueueWriteBuffer input2 ret =%d\n", ret);

    assert (ret == CL_SUCCESS);

 

    ret = clEnqueueNDRangeKernel(cq, kernel, dimension, NULL, &global, &local, 0, NULL, NULL);

    printf("clEnqueueNDRangeKernel ret =%d\n", ret);

    assert (ret == CL_SUCCESS);

 

    clFinish(cq);

 

    ret = clEnqueueReadBuffer(cq, buffer_output, CL_TRUE, 0, size_2d, data2, 0, NULL, NULL);

    printf("clEnqueueReadBuffer ret =%d\n", ret);

    assert (ret == CL_SUCCESS);

 

    printf ("\nResult:\n");

    for (i = 0; i < buffer_width; i++)

    {

        for (j = 0; j < buffer_height; j++)

        {

            // printf ("\n%d , %d -- %d", data0[i][j], data[i][j], data2[i][j]);

            printf("%d, ", data2[i][j]);

        }

    }

    printf ("\n");

 

    clFlush( cq);

    clFinish(cq);

    clReleaseMemObject (buffer_input);

    clReleaseMemObject (buffer_input2);

    clReleaseMemObject (buffer_output);

    clReleaseContext(context);

    clReleaseKernel(kernel);

    clReleaseProgram(program);

    clReleaseCommandQueue(cq);

 

    for (i = 0; i < buffer_width; i++)

    {

        free(data0[i]);

        free(data[i]);

        free(data2[i]);

    }

    free(data0);

    free(data);

    free(data2);

    return 0;

}

 

Question:

If the computing method in kernel code is "output[id] = input1[id];" or "output[id] = input2[id];", the whole program works well.

But, if the computing method in kernel code changed to "output[id] = input1[id]+input2[id];" or "output[id] = 0x01;", the program runs with an error output: segmentation fault. I have corfirmed that the line leads to this is "printf("%d, ", data2[i][j]);" in host code, but i am not sure what is the true cause. I will really appreciate it if someone can help me.

Outcomes