res_hist = np.zeros((height, width)).astype(np.float32)
res_hist_g = cl.Buffer(ctx, mf.WRITE_ONLY | mf.USE_HOST_PTR, hostbuf=res_hist)
kernel(queue, res_hist.shape, None, input_buffer, res_hist_g)
# this copy is necessary!
cl.enqueue_copy(queue, res_hist, res_hist_g, is_blocking=True)