diff options
| -rw-r--r-- | drivers/gpu/drm/imx/dw_hdmi-imx.c | 5 | ||||
| -rw-r--r-- | drivers/gpu/drm/imx/imx-drm-core.c | 11 | ||||
| -rw-r--r-- | drivers/gpu/drm/imx/imx-ldb.c | 10 | ||||
| -rw-r--r-- | drivers/gpu/drm/imx/imx-tve.c | 12 | ||||
| -rw-r--r-- | drivers/gpu/drm/imx/ipuv3-crtc.c | 10 | ||||
| -rw-r--r-- | drivers/gpu/drm/imx/ipuv3-plane.c | 18 | ||||
| -rw-r--r-- | drivers/gpu/drm/imx/parallel-display.c | 10 | ||||
| -rw-r--r-- | drivers/gpu/ipu-v3/ipu-cpmem.c | 52 | ||||
| -rw-r--r-- | drivers/gpu/ipu-v3/ipu-ic.c | 52 | ||||
| -rw-r--r-- | drivers/gpu/ipu-v3/ipu-image-convert.c | 1019 | ||||
| -rw-r--r-- | include/video/imx-ipu-v3.h | 9 |
11 files changed, 940 insertions, 268 deletions
diff --git a/drivers/gpu/drm/imx/dw_hdmi-imx.c b/drivers/gpu/drm/imx/dw_hdmi-imx.c index fe6becdcc29e..77a26fd3a44a 100644 --- a/drivers/gpu/drm/imx/dw_hdmi-imx.c +++ b/drivers/gpu/drm/imx/dw_hdmi-imx.c @@ -1,10 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0 /* Copyright (C) 2011-2013 Freescale Semiconductor, Inc. * * derived from imx-hdmi.c(renamed to bridge/dw_hdmi.c now) - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include <linux/module.h> #include <linux/platform_device.h> diff --git a/drivers/gpu/drm/imx/imx-drm-core.c b/drivers/gpu/drm/imx/imx-drm-core.c index 0e6942f21a4e..820c7e3878f0 100644 --- a/drivers/gpu/drm/imx/imx-drm-core.c +++ b/drivers/gpu/drm/imx/imx-drm-core.c @@ -1,17 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * Freescale i.MX drm driver * * Copyright (C) 2011 Sascha Hauer, Pengutronix - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2 - * of the License, or (at your option) any later version. - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * */ #include <linux/component.h> #include <linux/device.h> diff --git a/drivers/gpu/drm/imx/imx-ldb.c b/drivers/gpu/drm/imx/imx-ldb.c index 3bd0f8a18e74..2c5bbe317353 100644 --- a/drivers/gpu/drm/imx/imx-ldb.c +++ b/drivers/gpu/drm/imx/imx-ldb.c @@ -1,16 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * i.MX drm driver - LVDS display bridge * * Copyright (C) 2012 Sascha Hauer, Pengutronix - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2 - * of the License, or (at your option) any later version. - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. */ #include <linux/module.h> diff --git a/drivers/gpu/drm/imx/imx-tve.c b/drivers/gpu/drm/imx/imx-tve.c index cffd3310240e..293dd5752583 100644 --- a/drivers/gpu/drm/imx/imx-tve.c +++ b/drivers/gpu/drm/imx/imx-tve.c @@ -1,16 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * i.MX drm driver - Television Encoder (TVEv2) * * Copyright (C) 2013 Philipp Zabel, Pengutronix - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2 - * of the License, or (at your option) any later version. - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. */ #include <linux/clk.h> @@ -442,7 +434,7 @@ static int clk_tve_di_set_rate(struct clk_hw *hw, unsigned long rate, return 0; } -static struct clk_ops clk_tve_di_ops = { +static const struct clk_ops clk_tve_di_ops = { .round_rate = clk_tve_di_round_rate, .set_rate = clk_tve_di_set_rate, .recalc_rate = clk_tve_di_recalc_rate, diff --git a/drivers/gpu/drm/imx/ipuv3-crtc.c b/drivers/gpu/drm/imx/ipuv3-crtc.c index 7d4b710b837a..058b53c0aa7e 100644 --- a/drivers/gpu/drm/imx/ipuv3-crtc.c +++ b/drivers/gpu/drm/imx/ipuv3-crtc.c @@ -1,16 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * i.MX IPUv3 Graphics driver * * Copyright (C) 2011 Sascha Hauer, Pengutronix - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2 - * of the License, or (at your option) any later version. - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. */ #include <linux/component.h> #include <linux/module.h> diff --git a/drivers/gpu/drm/imx/ipuv3-plane.c b/drivers/gpu/drm/imx/ipuv3-plane.c index 40605fdf0e33..c390924de93d 100644 --- a/drivers/gpu/drm/imx/ipuv3-plane.c +++ b/drivers/gpu/drm/imx/ipuv3-plane.c @@ -1,16 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * i.MX IPUv3 DP Overlay Planes * * Copyright (C) 2013 Philipp Zabel, Pengutronix - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2 - * of the License, or (at your option) any later version. - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. */ #include <drm/drmP.h> @@ -236,9 +228,15 @@ static void ipu_plane_enable(struct ipu_plane *ipu_plane) void ipu_plane_disable(struct ipu_plane *ipu_plane, bool disable_dp_channel) { + int ret; + DRM_DEBUG_KMS("[%d] %s\n", __LINE__, __func__); - ipu_idmac_wait_busy(ipu_plane->ipu_ch, 50); + ret = ipu_idmac_wait_busy(ipu_plane->ipu_ch, 50); + if (ret == -ETIMEDOUT) { + DRM_ERROR("[PLANE:%d] IDMAC timeout\n", + ipu_plane->base.base.id); + } if (ipu_plane->dp && disable_dp_channel) ipu_dp_disable_channel(ipu_plane->dp, false); diff --git a/drivers/gpu/drm/imx/parallel-display.c b/drivers/gpu/drm/imx/parallel-display.c index aefd04e18f93..f3ce51121dd6 100644 --- a/drivers/gpu/drm/imx/parallel-display.c +++ b/drivers/gpu/drm/imx/parallel-display.c @@ -1,16 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * i.MX drm driver - parallel display implementation * * Copyright (C) 2012 Sascha Hauer, Pengutronix - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2 - * of the License, or (at your option) any later version. - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. */ #include <linux/component.h> diff --git a/drivers/gpu/ipu-v3/ipu-cpmem.c b/drivers/gpu/ipu-v3/ipu-cpmem.c index a9d2501500a1..163fadb8a33a 100644 --- a/drivers/gpu/ipu-v3/ipu-cpmem.c +++ b/drivers/gpu/ipu-v3/ipu-cpmem.c @@ -259,6 +259,8 @@ EXPORT_SYMBOL_GPL(ipu_cpmem_set_high_priority); void ipu_cpmem_set_buffer(struct ipuv3_channel *ch, int bufnum, dma_addr_t buf) { + WARN_ON_ONCE(buf & 0x7); + if (bufnum) ipu_ch_param_write_field(ch, IPU_FIELD_EBA1, buf >> 3); else @@ -268,6 +270,8 @@ EXPORT_SYMBOL_GPL(ipu_cpmem_set_buffer); void ipu_cpmem_set_uv_offset(struct ipuv3_channel *ch, u32 u_off, u32 v_off) { + WARN_ON_ONCE((u_off & 0x7) || (v_off & 0x7)); + ipu_ch_param_write_field(ch, IPU_FIELD_UBO, u_off / 8); ipu_ch_param_write_field(ch, IPU_FIELD_VBO, v_off / 8); } @@ -435,6 +439,8 @@ void ipu_cpmem_set_yuv_planar_full(struct ipuv3_channel *ch, unsigned int uv_stride, unsigned int u_offset, unsigned int v_offset) { + WARN_ON_ONCE((u_offset & 0x7) || (v_offset & 0x7)); + ipu_ch_param_write_field(ch, IPU_FIELD_SLUV, uv_stride - 1); ipu_ch_param_write_field(ch, IPU_FIELD_UBO, u_offset / 8); ipu_ch_param_write_field(ch, IPU_FIELD_VBO, v_offset / 8); @@ -739,48 +745,56 @@ int ipu_cpmem_set_image(struct ipuv3_channel *ch, struct ipu_image *image) switch (pix->pixelformat) { case V4L2_PIX_FMT_YUV420: offset = Y_OFFSET(pix, image->rect.left, image->rect.top); - u_offset = U_OFFSET(pix, image->rect.left, - image->rect.top) - offset; - v_offset = V_OFFSET(pix, image->rect.left, - image->rect.top) - offset; + u_offset = image->u_offset ? + image->u_offset : U_OFFSET(pix, image->rect.left, + image->rect.top) - offset; + v_offset = image->v_offset ? + image->v_offset : V_OFFSET(pix, image->rect.left, + image->rect.top) - offset; ipu_cpmem_set_yuv_planar_full(ch, pix->bytesperline / 2, u_offset, v_offset); break; case V4L2_PIX_FMT_YVU420: offset = Y_OFFSET(pix, image->rect.left, image->rect.top); - u_offset = U_OFFSET(pix, image->rect.left, - image->rect.top) - offset; - v_offset = V_OFFSET(pix, image->rect.left, - image->rect.top) - offset; + u_offset = image->u_offset ? + image->u_offset : V_OFFSET(pix, image->rect.left, + image->rect.top) - offset; + v_offset = image->v_offset ? + image->v_offset : U_OFFSET(pix, image->rect.left, + image->rect.top) - offset; ipu_cpmem_set_yuv_planar_full(ch, pix->bytesperline / 2, - v_offset, u_offset); + u_offset, v_offset); break; case V4L2_PIX_FMT_YUV422P: offset = Y_OFFSET(pix, image->rect.left, image->rect.top); - u_offset = U2_OFFSET(pix, image->rect.left, - image->rect.top) - offset; - v_offset = V2_OFFSET(pix, image->rect.left, - image->rect.top) - offset; + u_offset = image->u_offset ? + image->u_offset : U2_OFFSET(pix, image->rect.left, + image->rect.top) - offset; + v_offset = image->v_offset ? + image->v_offset : V2_OFFSET(pix, image->rect.left, + image->rect.top) - offset; ipu_cpmem_set_yuv_planar_full(ch, pix->bytesperline / 2, u_offset, v_offset); break; case V4L2_PIX_FMT_NV12: offset = Y_OFFSET(pix, image->rect.left, image->rect.top); - u_offset = UV_OFFSET(pix, image->rect.left, - image->rect.top) - offset; - v_offset = 0; + u_offset = image->u_offset ? + image->u_offset : UV_OFFSET(pix, image->rect.left, + image->rect.top) - offset; + v_offset = image->v_offset ? image->v_offset : 0; ipu_cpmem_set_yuv_planar_full(ch, pix->bytesperline, u_offset, v_offset); break; case V4L2_PIX_FMT_NV16: offset = Y_OFFSET(pix, image->rect.left, image->rect.top); - u_offset = UV2_OFFSET(pix, image->rect.left, - image->rect.top) - offset; - v_offset = 0; + u_offset = image->u_offset ? + image->u_offset : UV2_OFFSET(pix, image->rect.left, + image->rect.top) - offset; + v_offset = image->v_offset ? image->v_offset : 0; ipu_cpmem_set_yuv_planar_full(ch, pix->bytesperline, u_offset, v_offset); diff --git a/drivers/gpu/ipu-v3/ipu-ic.c b/drivers/gpu/ipu-v3/ipu-ic.c index 67cc820253a9..594c3cbc8291 100644 --- a/drivers/gpu/ipu-v3/ipu-ic.c +++ b/drivers/gpu/ipu-v3/ipu-ic.c @@ -442,36 +442,40 @@ unlock: } EXPORT_SYMBOL_GPL(ipu_ic_task_graphics_init); -int ipu_ic_task_init(struct ipu_ic *ic, - int in_width, int in_height, - int out_width, int out_height, - enum ipu_color_space in_cs, - enum ipu_color_space out_cs) +int ipu_ic_task_init_rsc(struct ipu_ic *ic, + int in_width, int in_height, + int out_width, int out_height, + enum ipu_color_space in_cs, + enum ipu_color_space out_cs, + u32 rsc) { struct ipu_ic_priv *priv = ic->priv; - u32 reg, downsize_coeff, resize_coeff; + u32 downsize_coeff, resize_coeff; unsigned long flags; int ret = 0; - /* Setup vertical resizing */ - ret = calc_resize_coeffs(ic, in_height, out_height, - &resize_coeff, &downsize_coeff); - if (ret) - return ret; + if (!rsc) { + /* Setup vertical resizing */ - reg = (downsize_coeff << 30) | (resize_coeff << 16); + ret = calc_resize_coeffs(ic, in_height, out_height, + &resize_coeff, &downsize_coeff); + if (ret) + return ret; + + rsc = (downsize_coeff << 30) | (resize_coeff << 16); - /* Setup horizontal resizing */ - ret = calc_resize_coeffs(ic, in_width, out_width, - &resize_coeff, &downsize_coeff); - if (ret) - return ret; + /* Setup horizontal resizing */ + ret = calc_resize_coeffs(ic, in_width, out_width, + &resize_coeff, &downsize_coeff); + if (ret) + return ret; - reg |= (downsize_coeff << 14) | resize_coeff; + rsc |= (downsize_coeff << 14) | resize_coeff; + } spin_lock_irqsave(&priv->lock, flags); - ipu_ic_write(ic, reg, ic->reg->rsc); + ipu_ic_write(ic, rsc, ic->reg->rsc); /* Setup color space conversion */ ic->in_cs = in_cs; @@ -487,6 +491,16 @@ unlock: spin_unlock_irqrestore(&priv->lock, flags); return ret; } + +int ipu_ic_task_init(struct ipu_ic *ic, + int in_width, int in_height, + int out_width, int out_height, + enum ipu_color_space in_cs, + enum ipu_color_space out_cs) +{ + return ipu_ic_task_init_rsc(ic, in_width, in_height, out_width, + out_height, in_cs, out_cs, 0); +} EXPORT_SYMBOL_GPL(ipu_ic_task_init); int ipu_ic_task_idma_init(struct ipu_ic *ic, struct ipuv3_channel *channel, diff --git a/drivers/gpu/ipu-v3/ipu-image-convert.c b/drivers/gpu/ipu-v3/ipu-image-convert.c index f4081962784c..13103ab86050 100644 --- a/drivers/gpu/ipu-v3/ipu-image-convert.c +++ b/drivers/gpu/ipu-v3/ipu-image-convert.c @@ -37,17 +37,36 @@ * when double_buffering boolean is set). * * Note that the input frame must be split up into the same number - * of tiles as the output frame. + * of tiles as the output frame: * - * FIXME: at this point there is no attempt to deal with visible seams - * at the tile boundaries when upscaling. The seams are caused by a reset - * of the bilinear upscale interpolation when starting a new tile. The - * seams are barely visible for small upscale factors, but become - * increasingly visible as the upscale factor gets larger, since more - * interpolated pixels get thrown out at the tile boundaries. A possilble - * fix might be to overlap tiles of different sizes, but this must be done - * while also maintaining the IDMAC dma buffer address alignment and 8x8 IRT - * alignment restrictions of each tile. + * +---------+-----+ + * +-----+---+ | A | B | + * | A | B | | | | + * +-----+---+ --> +---------+-----+ + * | C | D | | C | D | + * +-----+---+ | | | + * +---------+-----+ + * + * Clockwise 90° rotations are handled by first rescaling into a + * reusable temporary tile buffer and then rotating with the 8x8 + * block rotator, writing to the correct destination: + * + * +-----+-----+ + * | | | + * +-----+---+ +---------+ | C | A | + * | A | B | | A,B, | | | | | + * +-----+---+ --> | C,D | | --> | | | + * | C | D | +---------+ +-----+-----+ + * +-----+---+ | D | B | + * | | | + * +-----+-----+ + * + * If the 8x8 block rotator is used, horizontal or vertical flipping + * is done during the rotation step, otherwise flipping is done + * during the scaling step. + * With rotation or flipping, tile order changes between input and + * output image. Tiles are numbered row major from top left to bottom + * right for both input and output image. */ #define MAX_STRIPES_W 4 @@ -84,6 +103,8 @@ struct ipu_image_convert_dma_chan { struct ipu_image_tile { u32 width; u32 height; + u32 left; + u32 top; /* size and strides are in bytes */ u32 size; u32 stride; @@ -135,6 +156,12 @@ struct ipu_image_convert_ctx { struct ipu_image_convert_image in; struct ipu_image_convert_image out; enum ipu_rotate_mode rot_mode; + u32 downsize_coeff_h; + u32 downsize_coeff_v; + u32 image_resize_coeff_h; + u32 image_resize_coeff_v; + u32 resize_coeffs_h[MAX_STRIPES_W]; + u32 resize_coeffs_v[MAX_STRIPES_H]; /* intermediate buffer for rotation */ struct ipu_image_convert_dma_buf rot_intermediate[2]; @@ -300,12 +327,11 @@ static void dump_format(struct ipu_image_convert_ctx *ctx, struct ipu_image_convert_priv *priv = chan->priv; dev_dbg(priv->ipu->dev, - "task %u: ctx %p: %s format: %dx%d (%dx%d tiles of size %dx%d), %c%c%c%c\n", + "task %u: ctx %p: %s format: %dx%d (%dx%d tiles), %c%c%c%c\n", chan->ic_task, ctx, ic_image->type == IMAGE_CONVERT_OUT ? "Output" : "Input", ic_image->base.pix.width, ic_image->base.pix.height, ic_image->num_cols, ic_image->num_rows, - ic_image->tile[0].width, ic_image->tile[0].height, ic_image->fmt->fourcc & 0xff, (ic_image->fmt->fourcc >> 8) & 0xff, (ic_image->fmt->fourcc >> 16) & 0xff, @@ -353,24 +379,459 @@ static int alloc_dma_buf(struct ipu_image_convert_priv *priv, static inline int num_stripes(int dim) { - if (dim <= 1024) - return 1; - else if (dim <= 2048) + return (dim - 1) / 1024 + 1; +} + +/* + * Calculate downsizing coefficients, which are the same for all tiles, + * and bilinear resizing coefficients, which are used to find the best + * seam positions. + */ +static int calc_image_resize_coefficients(struct ipu_image_convert_ctx *ctx, + struct ipu_image *in, + struct ipu_image *out) +{ + u32 downsized_width = in->rect.width; + u32 downsized_height = in->rect.height; + u32 downsize_coeff_v = 0; + u32 downsize_coeff_h = 0; + u32 resized_width = out->rect.width; + u32 resized_height = out->rect.height; + u32 resize_coeff_h; + u32 resize_coeff_v; + + if (ipu_rot_mode_is_irt(ctx->rot_mode)) { + resized_width = out->rect.height; + resized_height = out->rect.width; + } + + /* Do not let invalid input lead to an endless loop below */ + if (WARN_ON(resized_width == 0 || resized_height == 0)) + return -EINVAL; + + while (downsized_width >= resized_width * 2) { + downsized_width >>= 1; + downsize_coeff_h++; + } + + while (downsized_height >= resized_height * 2) { + downsized_height >>= 1; + downsize_coeff_v++; + } + + /* + * Calculate the bilinear resizing coefficients that could be used if + * we were converting with a single tile. The bottom right output pixel + * should sample as close as possible to the bottom right input pixel + * out of the decimator, but not overshoot it: + */ + resize_coeff_h = 8192 * (downsized_width - 1) / (resized_width - 1); + resize_coeff_v = 8192 * (downsized_height - 1) / (resized_height - 1); + + dev_dbg(ctx->chan->priv->ipu->dev, + "%s: hscale: >>%u, *8192/%u vscale: >>%u, *8192/%u, %ux%u tiles\n", + __func__, downsize_coeff_h, resize_coeff_h, downsize_coeff_v, + resize_coeff_v, ctx->in.num_cols, ctx->in.num_rows); + + if (downsize_coeff_h > 2 || downsize_coeff_v > 2 || + resize_coeff_h > 0x3fff || resize_coeff_v > 0x3fff) + return -EINVAL; + + ctx->downsize_coeff_h = downsize_coeff_h; + ctx->downsize_coeff_v = downsize_coeff_v; + ctx->image_resize_coeff_h = resize_coeff_h; + ctx->image_resize_coeff_v = resize_coeff_v; + + return 0; +} + +#define round_closest(x, y) round_down((x) + (y)/2, (y)) + +/* + * Find the best aligned seam position in the inverval [out_start, out_end]. + * Rotation and image offsets are out of scope. + * + * @out_start: start of inverval, must be within 1024 pixels / lines + * of out_end + * @out_end: end of interval, smaller than or equal to out_edge + * @in_edge: input right / bottom edge + * @out_edge: output right / bottom edge + * @in_align: input alignment, either horizontal 8-byte line start address + * alignment, or pixel alignment due to image format + * @out_align: output alignment, either horizontal 8-byte line start address + * alignment, or pixel alignment due to image format or rotator + * block size + * @in_burst: horizontal input burst size in case of horizontal flip + * @out_burst: horizontal output burst size or rotator block size + * @downsize_coeff: downsizing section coefficient + * @resize_coeff: main processing section resizing coefficient + * @_in_seam: aligned input seam position return value + * @_out_seam: aligned output seam position return value + */ +static void find_best_seam(struct ipu_image_convert_ctx *ctx, + unsigned int out_start, + unsigned int out_end, + unsigned int in_edge, + unsigned int out_edge, + unsigned int in_align, + unsigned int out_align, + unsigned int in_burst, + unsigned int out_burst, + unsigned int downsize_coeff, + unsigned int resize_coeff, + u32 *_in_seam, + u32 *_out_seam) +{ + struct device *dev = ctx->chan->priv->ipu->dev; + unsigned int out_pos; + /* Input / output seam position candidates */ + unsigned int out_seam = 0; + unsigned int in_seam = 0; + unsigned int min_diff = UINT_MAX; + + /* + * Output tiles must start at a multiple of 8 bytes horizontally and + * possibly at an even line horizontally depending on the pixel format. + * Only consider output aligned positions for the seam. + */ + out_start = round_up(out_start, out_align); + for (out_pos = out_start; out_pos < out_end; out_pos += out_align) { + unsigned int in_pos; + unsigned int in_pos_aligned; + unsigned int abs_diff; + + /* + * Tiles in the right row / bottom column may not be allowed to + * overshoot horizontally / vertically. out_burst may be the + * actual DMA burst size, or the rotator block size. + */ + if ((out_burst > 1) && (out_edge - out_pos) % out_burst) + continue; + + /* + * Input sample position, corresponding to out_pos, 19.13 fixed + * point. + */ + in_pos = (out_pos * resize_coeff) << downsize_coeff; + /* + * The closest input sample position that we could actually + * start the input tile at, 19.13 fixed point. + */ + in_pos_aligned = round_closest(in_pos, 8192U * in_align); + + if ((in_burst > 1) && + (in_edge - in_pos_aligned / 8192U) % in_burst) + continue; + + if (in_pos < in_pos_aligned) + abs_diff = in_pos_aligned - in_pos; + else + abs_diff = in_pos - in_pos_aligned; + + if (abs_diff < min_diff) { + in_seam = in_pos_aligned; + out_seam = out_pos; + min_diff = abs_diff; + } + } + + *_out_seam = out_seam; + /* Convert 19.13 fixed point to integer seam position */ + *_in_seam = DIV_ROUND_CLOSEST(in_seam, 8192U); + + dev_dbg(dev, "%s: out_seam %u(%u) in [%u, %u], in_seam %u(%u) diff %u.%03u\n", + __func__, out_seam, out_align, out_start, out_end, + *_in_seam, in_align, min_diff / 8192, + DIV_ROUND_CLOSEST(min_diff % 8192 * 1000, 8192)); +} + +/* + * Tile left edges are required to be aligned to multiples of 8 bytes + * by the IDMAC. + */ +static inline u32 tile_left_align(const struct ipu_image_pixfmt *fmt) +{ + if (fmt->planar) + return fmt->uv_packed ? 8 : 8 * fmt->uv_width_dec; + else + return fmt->bpp == 32 ? 2 : fmt->bpp == 16 ? 4 : 8; +} + +/* + * Tile top edge alignment is only limited by chroma subsampling. + */ +static inline u32 tile_top_align(const struct ipu_image_pixfmt *fmt) +{ + return fmt->uv_height_dec > 1 ? 2 : 1; +} + +static inline u32 tile_width_align(enum ipu_image_convert_type type, + const struct ipu_image_pixfmt *fmt, + enum ipu_rotate_mode rot_mode) +{ + if (type == IMAGE_CONVERT_IN) { + /* + * The IC burst reads 8 pixels at a time. Reading beyond the + * end of the line is usually acceptable. Those pixels are + * ignored, unless the IC has to write the scaled line in + * reverse. + */ + return (!ipu_rot_mode_is_irt(rot_mode) && + (rot_mode & IPU_ROT_BIT_HFLIP)) ? 8 : 2; + } + + /* + * Align to 16x16 pixel blocks for planar 4:2:0 chroma subsampled + * formats to guarantee 8-byte aligned line start addresses in the + * chroma planes when IRT is used. Align to 8x8 pixel IRT block size + * for all other formats. + */ + return (ipu_rot_mode_is_irt(rot_mode) && + fmt->planar && !fmt->uv_packed) ? + 8 * fmt->uv_width_dec : 8; +} + +static inline u32 tile_height_align(enum ipu_image_convert_type type, + const struct ipu_image_pixfmt *fmt, + enum ipu_rotate_mode rot_mode) +{ + if (type == IMAGE_CONVERT_IN || !ipu_rot_mode_is_irt(rot_mode)) return 2; + + /* + * Align to 16x16 pixel blocks for planar 4:2:0 chroma subsampled + * formats to guarantee 8-byte aligned line start addresses in the + * chroma planes when IRT is used. Align to 8x8 pixel IRT block size + * for all other formats. + */ + return (fmt->planar && !fmt->uv_packed) ? 8 * fmt->uv_width_dec : 8; +} + +/* + * Fill in left position and width and for all tiles in an input column, and + * for all corresponding output tiles. If the 90° rotator is used, the output + * tiles are in a row, and output tile top position and height are set. + */ +static void fill_tile_column(struct ipu_image_convert_ctx *ctx, + unsigned int col, + struct ipu_image_convert_image *in, + unsigned int in_left, unsigned int in_width, + struct ipu_image_convert_image *out, + unsigned int out_left, unsigned int out_width) +{ + unsigned int row, tile_idx; + struct ipu_image_tile *in_tile, *out_tile; + + for (row = 0; row < in->num_rows; row++) { + tile_idx = in->num_cols * row + col; + in_tile = &in->tile[tile_idx]; + out_tile = &out->tile[ctx->out_tile_map[tile_idx]]; + + in_tile->left = in_left; + in_tile->width = in_width; + + if (ipu_rot_mode_is_irt(ctx->rot_mode)) { + out_tile->top = out_left; + out_tile->height = out_width; + } else { + out_tile->left = out_left; + out_tile->width = out_width; + } + } +} + +/* + * Fill in top position and height and for all tiles in an input row, and + * for all corresponding output tiles. If the 90° rotator is used, the output + * tiles are in a column, and output tile left position and width are set. + */ +static void fill_tile_row(struct ipu_image_convert_ctx *ctx, unsigned int row, + struct ipu_image_convert_image *in, + unsigned int in_top, unsigned int in_height, + struct ipu_image_convert_image *out, + unsigned int out_top, unsigned int out_height) +{ + unsigned int col, tile_idx; + struct ipu_image_tile *in_tile, *out_tile; + + for (col = 0; col < in->num_cols; col++) { + tile_idx = in->num_cols * row + col; + in_tile = &in->tile[tile_idx]; + out_tile = &out->tile[ctx->out_tile_map[tile_idx]]; + + in_tile->top = in_top; + in_tile->height = in_height; + + if (ipu_rot_mode_is_irt(ctx->rot_mode)) { + out_tile->left = out_top; + out_tile->width = out_height; + } else { + out_tile->top = out_top; + out_tile->height = out_height; + } + } +} + +/* + * Find the best horizontal and vertical seam positions to split into tiles. + * Minimize the fractional part of the input sampling position for the + * top / left pixels of each tile. + */ +static void find_seams(struct ipu_image_convert_ctx *ctx, + struct ipu_image_convert_image *in, + struct ipu_image_convert_image *out) +{ + struct device *dev = ctx->chan->priv->ipu->dev; + unsigned int resized_width = out->base.rect.width; + unsigned int resized_height = out->base.rect.height; + unsigned int col; + unsigned int row; + unsigned int in_left_align = tile_left_align(in->fmt); + unsigned int in_top_align = tile_top_align(in->fmt); + unsigned int out_left_align = tile_left_align(out->fmt); + unsigned int out_top_align = tile_top_align(out->fmt); + unsigned int out_width_align = tile_width_align(out->type, out->fmt, + ctx->rot_mode); + unsigned int out_height_align = tile_height_align(out->type, out->fmt, + ctx->rot_mode); + unsigned int in_right = in->base.rect.width; + unsigned int in_bottom = in->base.rect.height; + unsigned int out_right = out->base.rect.width; + unsigned int out_bottom = out->base.rect.height; + unsigned int flipped_out_left; + unsigned int flipped_out_top; + + if (ipu_rot_mode_is_irt(ctx->rot_mode)) { + /* Switch width/height and align top left to IRT block size */ + resized_width = out->base.rect.height; + resized_height = out->base.rect.width; + out_left_align = out_height_align; + out_top_align = out_width_align; + out_width_align = out_left_align; + out_height_align = out_top_align; + out_right = out->base.rect.height; + out_bottom = out->base.rect.width; + } + + for (col = in->num_cols - 1; col > 0; col--) { + bool allow_in_overshoot = ipu_rot_mode_is_irt(ctx->rot_mode) || + !(ctx->rot_mode & IPU_ROT_BIT_HFLIP); + bool allow_out_overshoot = (col < in->num_cols - 1) && + !(ctx->rot_mode & IPU_ROT_BIT_HFLIP); + unsigned int out_start; + unsigned int out_end; + unsigned int in_left; + unsigned int out_left; + + /* + * Align input width to burst length if the scaling step flips + * horizontally. + */ + + /* Start within 1024 pixels of the right edge */ + out_start = max_t(int, 0, out_right - 1024); + /* End before having to add more columns to the left */ + out_end = min_t(unsigned int, out_right, col * 1024); + + find_best_seam(ctx, out_start, out_end, + in_right, out_right, + in_left_align, out_left_align, + allow_in_overshoot ? 1 : 8 /* burst length */, + allow_out_overshoot ? 1 : out_width_align, + ctx->downsize_coeff_h, ctx->image_resize_coeff_h, + &in_left, &out_left); + + if (ctx->rot_mode & IPU_ROT_BIT_HFLIP) + flipped_out_left = resized_width - out_right; + else + flipped_out_left = out_left; + + fill_tile_column(ctx, col, in, in_left, in_right - in_left, + out, flipped_out_left, out_right - out_left); + + dev_dbg(dev, "%s: col %u: %u, %u -> %u, %u\n", __func__, col, + in_left, in_right - in_left, + flipped_out_left, out_right - out_left); + + in_right = in_left; + out_right = out_left; + } + + flipped_out_left = (ctx->rot_mode & IPU_ROT_BIT_HFLIP) ? + resized_width - out_right : 0; + + fill_tile_column(ctx, 0, in, 0, in_right, + out, flipped_out_left, out_right); + + dev_dbg(dev, "%s: col 0: 0, %u -> %u, %u\n", __func__, + in_right, flipped_out_left, out_right); + + for (row = in->num_rows - 1; row > 0; row--) { + bool allow_overshoot = row < in->num_rows - 1; + unsigned int out_start; + unsigned int out_end; + unsigned int in_top; + unsigned int out_top; + + /* Start within 1024 lines of the bottom edge */ + out_start = max_t(int, 0, out_bottom - 1024); + /* End before having to add more rows above */ + out_end = min_t(unsigned int, out_bottom, row * 1024); + + find_best_seam(ctx, out_start, out_end, + in_bottom, out_bottom, + in_top_align, out_top_align, + 1, allow_overshoot ? 1 : out_height_align, + ctx->downsize_coeff_v, ctx->image_resize_coeff_v, + &in_top, &out_top); + + if ((ctx->rot_mode & IPU_ROT_BIT_VFLIP) ^ + ipu_rot_mode_is_irt(ctx->rot_mode)) + flipped_out_top = resized_height - out_bottom; + else + flipped_out_top = out_top; + + fill_tile_row(ctx, row, in, in_top, in_bottom - in_top, + out, flipped_out_top, out_bottom - out_top); + + dev_dbg(dev, "%s: row %u: %u, %u -> %u, %u\n", __func__, row, + in_top, in_bottom - in_top, + flipped_out_top, out_bottom - out_top); + + in_bottom = in_top; + out_bottom = out_top; + } + + if ((ctx->rot_mode & IPU_ROT_BIT_VFLIP) ^ + ipu_rot_mode_is_irt(ctx->rot_mode)) + flipped_out_top = resized_height - out_bottom; else - return 4; + flipped_out_top = 0; + + fill_tile_row(ctx, 0, in, 0, in_bottom, + out, flipped_out_top, out_bottom); + + dev_dbg(dev, "%s: row 0: 0, %u -> %u, %u\n", __func__, + in_bottom, flipped_out_top, out_bottom); } static void calc_tile_dimensions(struct ipu_image_convert_ctx *ctx, struct ipu_image_convert_image *image) { - int i; + struct ipu_image_convert_chan *chan = ctx->chan; + struct ipu_image_convert_priv *priv = chan->priv; + unsigned int i; for (i = 0; i < ctx->num_tiles; i++) { - struct ipu_image_tile *tile = &image->tile[i]; + struct ipu_image_tile *tile; + const unsigned int row = i / image->num_cols; + const unsigned int col = i % image->num_cols; + + if (image->type == IMAGE_CONVERT_OUT) + tile = &image->tile[ctx->out_tile_map[i]]; + else + tile = &image->tile[i]; - tile->height = image->base.pix.height / image->num_rows; - tile->width = image->base.pix.width / image->num_cols; tile->si |
