I couldn't make built in driver banding feature work but I found a workaround which looks pretty good to me (i.e. no visible artifacts). Idea is that work is divided into chunks that are no larger than 1024x1024 but instead of bands, buffer is divided into blocks. So for example, to scale rgb8888 432x968 to rgb8888 1296x968 (this is 3x scale horizontally), 1296 exceeds 1024 output limitation and 968 doesn't. To get correct number of passes, start from the output. 1296/2=648 which is within output limit and 968 can be left unchanged because it's within limit. Work can be done in 2 passes with the following code (Buf_input and Buf_temp are Android DMA_BUFFER pointers but they could be allocated differently, (please refer to rogeriorps/ipu-examples · GitHub for examples how to setup IPU, allocate and map memory in a generic Linux fashion):
struct ipu_task task;
memset(&task, 0, sizeof(task));
task.input.width = 216;
task.input.height = 968;
task.input.format = v4l2_fourcc('R', 'G', 'B', '4');
task.output.width = 216*3;
task.output.height = 968;
task.output.format = v4l2_fourcc('R', 'G', 'B', '4');
task.input.paddr = Buf_input->phy_offset;
task.output.paddr = Buf_temp->phy_offset;
if (ioctl(mIpuFd, IPU_QUEUE_TASK, &task) < 0) {
CAMERA_LOG_ERR("scale x3: ioct(IPU_QUEUE_TASK) fail");
}
memset(&task, 0, sizeof(task));
task.input.width = 216;
task.input.height = 968;
task.input.format = v4l2_fourcc('R', 'G', 'B', '4');
task.output.width = 216*3;
task.output.height = 968;
task.output.format = v4l2_fourcc('R', 'G', 'B', '4');
task.input.paddr = Buf_input->phy_offset+216*968*4;
task.output.paddr = Buf_temp->phy_offset+216*3*968*4;
if (ioctl(mIpuFd, IPU_QUEUE_TASK, &task) < 0) {
CAMERA_LOG_ERR("scale x3: ioct(IPU_QUEUE_TASK) fail");
}
In case when both horizontal and vertical size exceed 1024x1024 limit work can be split similarly but may need more than 2 passes. For example to scale 864x1944 to 2592x1944 the following code will work (requires 8 equal size passes and 9th pass for the leftover):
memset(&task, 0, sizeof(task));
task.input.width = 216;
task.input.height = 968;
task.input.format = v4l2_fourcc('R', 'G', 'B', '4');
task.output.width = 216*3;
task.output.height = 968;
task.output.format = v4l2_fourcc('R', 'G', 'B', '4');
task.output.rotate = IPU_ROTATE_HORIZ_FLIP;
task.input.paddr = Buf_input->phy_offset;
task.output.paddr = Buf_temp->phy_offset;
if (ioctl(mIpuFd, IPU_QUEUE_TASK, &task) < 0) {
CAMERA_LOG_ERR("scale x3: ioct(IPU_QUEUE_TASK) fail");
}
memset(&task, 0, sizeof(task));
task.input.width = 216;
task.input.height = 968;
task.input.format = v4l2_fourcc('R', 'G', 'B', '4');
task.output.width = 216*3;
task.output.height = 968;
task.output.format = v4l2_fourcc('R', 'G', 'B', '4');
task.output.rotate = IPU_ROTATE_HORIZ_FLIP;
task.input.paddr = Buf_input->phy_offset+216*968*4;
task.output.paddr = Buf_temp->phy_offset+216*3*968*4;
if (ioctl(mIpuFd, IPU_QUEUE_TASK, &task) < 0) {
CAMERA_LOG_ERR("scale x3: ioct(IPU_QUEUE_TASK) fail");
}
memset(&task, 0, sizeof(task));
task.input.width = 216;
task.input.height = 968;
task.input.format = v4l2_fourcc('R', 'G', 'B', '4');
task.output.width = 216*3;
task.output.height = 968;
task.output.format = v4l2_fourcc('R', 'G', 'B', '4');
task.output.rotate = IPU_ROTATE_HORIZ_FLIP;
task.input.paddr = Buf_input->phy_offset+216*968*4*2;
task.output.paddr = Buf_temp->phy_offset+216*3*968*4*2;
if (ioctl(mIpuFd, IPU_QUEUE_TASK, &task) < 0) {
CAMERA_LOG_ERR("scale x3: ioct(IPU_QUEUE_TASK) fail");
}
memset(&task, 0, sizeof(task));
task.input.width = 216;
task.input.height = 968;
task.input.format = v4l2_fourcc('R', 'G', 'B', '4');
task.output.width = 216*3;
task.output.height = 968;
task.output.format = v4l2_fourcc('R', 'G', 'B', '4');
task.output.rotate = IPU_ROTATE_HORIZ_FLIP;
task.input.paddr = Buf_input->phy_offset+216*968*4*3;
task.output.paddr = Buf_temp->phy_offset+216*3*968*4*3;
if (ioctl(mIpuFd, IPU_QUEUE_TASK, &task) < 0) {
CAMERA_LOG_ERR("scale x3: ioct(IPU_QUEUE_TASK) fail");
}
memset(&task, 0, sizeof(task));
task.input.width = 216;
task.input.height = 968;
task.input.format = v4l2_fourcc('R', 'G', 'B', '4');
task.output.width = 216*3;
task.output.height = 968;
task.output.format = v4l2_fourcc('R', 'G', 'B', '4');
task.output.rotate = IPU_ROTATE_HORIZ_FLIP;
task.input.paddr = Buf_input->phy_offset+216*968*4*4;
task.output.paddr = Buf_temp->phy_offset+216*3*968*4*4;
if (ioctl(mIpuFd, IPU_QUEUE_TASK, &task) < 0) {
CAMERA_LOG_ERR("scale x3: ioct(IPU_QUEUE_TASK) fail");
}
memset(&task, 0, sizeof(task));
task.input.width = 216;
task.input.height = 968;
task.input.format = v4l2_fourcc('R', 'G', 'B', '4');
task.output.width = 216*3;
task.output.height = 968;
task.output.format = v4l2_fourcc('R', 'G', 'B', '4');
task.output.rotate = IPU_ROTATE_HORIZ_FLIP;
task.input.paddr = Buf_input->phy_offset+216*968*4*5;
task.output.paddr = Buf_temp->phy_offset+216*3*968*4*5;
if (ioctl(mIpuFd, IPU_QUEUE_TASK, &task) < 0) {
CAMERA_LOG_ERR("scale x3: ioct(IPU_QUEUE_TASK) fail");
}
memset(&task, 0, sizeof(task));
task.input.width = 216;
task.input.height = 968;
task.input.format = v4l2_fourcc('R', 'G', 'B', '4');
task.output.width = 216*3;
task.output.height = 968;
task.output.format = v4l2_fourcc('R', 'G', 'B', '4');
task.output.rotate = IPU_ROTATE_HORIZ_FLIP;
task.input.paddr = Buf_input->phy_offset+216*968*4*6;
task.output.paddr = Buf_temp->phy_offset+216*3*968*4*6;
if (ioctl(mIpuFd, IPU_QUEUE_TASK, &task) < 0) {
CAMERA_LOG_ERR("scale x3: ioct(IPU_QUEUE_TASK) fail");
}
memset(&task, 0, sizeof(task));
task.input.width = 216;
task.input.height = 968;
task.input.format = v4l2_fourcc('R', 'G', 'B', '4');
task.output.width = 216*3;
task.output.height = 968;
task.output.format = v4l2_fourcc('R', 'G', 'B', '4');
task.output.rotate = IPU_ROTATE_HORIZ_FLIP;
task.input.paddr = Buf_input->phy_offset+216*968*4*7;
task.output.paddr = Buf_temp->phy_offset+216*3*968*4*7;
if (ioctl(mIpuFd, IPU_QUEUE_TASK, &task) < 0) {
CAMERA_LOG_ERR("scale x3: ioct(IPU_QUEUE_TASK) fail");
}
memset(&task, 0, sizeof(task));
task.input.width = 216;
task.input.height = 32;
task.input.format = v4l2_fourcc('R', 'G', 'B', '4');
task.output.width = 216*3;
task.output.height = 32;
task.output.format = v4l2_fourcc('R', 'G', 'B', '4');
task.output.rotate = IPU_ROTATE_HORIZ_FLIP;
task.input.paddr = Buf_input->phy_offset+216*968*4*8;
task.output.paddr = Buf_temp->phy_offset+216*3*968*4*8;
if (ioctl(mIpuFd, IPU_QUEUE_TASK, &task) < 0) {
CAMERA_LOG_ERR("scale x3: ioct(IPU_QUEUE_TASK) fail");
}
As a side note, I also found a way to accelerate memcpy() with IPU block. 1:1 scale won't work, but if some change is specified (for example horizontal flip) it will work:
memset(&task, 0, sizeof(task));
task.input.width = 216*3;
task.input.height = 968;
task.input.format = v4l2_fourcc('R', 'G', 'B', '4');
task.output.width = 216*3;
task.output.height = 968;
task.output.format = v4l2_fourcc('R', 'G', 'B', '4');
task.output.rotate = IPU_ROTATE_HORIZ_FLIP;
task.input.paddr = Buf_temp->phy_offset;
task.output.paddr = Buf_input->phy_offset;
if (ioctl(mIpuFd, IPU_QUEUE_TASK, &task) < 0) {
CAMERA_LOG_ERR("ioct(IPU_QUEUE_TASK) fail");
}
To undo horizontal flip another such operation will be required, so with a temp buffer, fast memcpy can be achieved.