950-0551-drm-vc4-Add-support-for-gamma-on-BCM2711.patch 9.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272
  1. From fc26e29e257c8d737b78e4581f7ffd9be338a70c Mon Sep 17 00:00:00 2001
  2. From: Dave Stevenson <[email protected]>
  3. Date: Tue, 27 Apr 2021 14:24:21 +0200
  4. Subject: [PATCH] drm/vc4: Add support for gamma on BCM2711
  5. BCM2711 changes from a 256 entry lookup table to a 16 point
  6. piecewise linear function as the pipeline bitdepth has increased
  7. to make a LUT unwieldy.
  8. Implement a simple conversion from a 256 entry LUT that userspace
  9. is likely to expect to 16 evenly spread points in the PWL. This
  10. could be improved with curve fitting at a later date.
  11. Co-developed-by: Juerg Haefliger <[email protected]>
  12. Signed-off-by: Juerg Haefliger <[email protected]>
  13. Signed-off-by: Dave Stevenson <[email protected]>
  14. Signed-off-by: Maxime Ripard <[email protected]>
  15. ---
  16. drivers/gpu/drm/vc4/vc4_crtc.c | 35 +++++++++++---
  17. drivers/gpu/drm/vc4/vc4_drv.h | 28 +++++++++--
  18. drivers/gpu/drm/vc4/vc4_hvs.c | 87 ++++++++++++++++++++++++++++++++--
  19. drivers/gpu/drm/vc4/vc4_regs.h | 22 +++++++++
  20. 4 files changed, 159 insertions(+), 13 deletions(-)
  21. --- a/drivers/gpu/drm/vc4/vc4_crtc.c
  22. +++ b/drivers/gpu/drm/vc4/vc4_crtc.c
  23. @@ -1148,19 +1148,42 @@ int vc4_crtc_init(struct drm_device *drm
  24. if (!vc4->hvs->hvs5) {
  25. drm_mode_crtc_set_gamma_size(crtc, ARRAY_SIZE(vc4_crtc->lut_r));
  26. + } else {
  27. + /* This is a lie for hvs5 which uses a 16 point PWL, but it
  28. + * allows for something smarter than just 16 linearly spaced
  29. + * segments. Conversion is done in vc5_hvs_update_gamma_lut.
  30. + */
  31. + drm_mode_crtc_set_gamma_size(crtc, 256);
  32. + }
  33. - drm_crtc_enable_color_mgmt(crtc, 0, false, crtc->gamma_size);
  34. + drm_crtc_enable_color_mgmt(crtc, 0, false, crtc->gamma_size);
  35. + if (!vc4->hvs->hvs5) {
  36. /* We support CTM, but only for one CRTC at a time. It's therefore
  37. * implemented as private driver state in vc4_kms, not here.
  38. */
  39. drm_crtc_enable_color_mgmt(crtc, 0, true, crtc->gamma_size);
  40. - }
  41. - for (i = 0; i < crtc->gamma_size; i++) {
  42. - vc4_crtc->lut_r[i] = i;
  43. - vc4_crtc->lut_g[i] = i;
  44. - vc4_crtc->lut_b[i] = i;
  45. + /* Initialize the VC4 gamma LUTs */
  46. + for (i = 0; i < crtc->gamma_size; i++) {
  47. + vc4_crtc->lut_r[i] = i;
  48. + vc4_crtc->lut_g[i] = i;
  49. + vc4_crtc->lut_b[i] = i;
  50. + }
  51. + } else {
  52. + /* Initialize the VC5 gamma PWL entries. Assume 12-bit pipeline,
  53. + * evenly spread over full range.
  54. + */
  55. + for (i = 0; i < SCALER5_DSPGAMMA_NUM_POINTS; i++) {
  56. + vc4_crtc->pwl_r[i] =
  57. + VC5_HVS_SET_GAMMA_ENTRY(i << 8, i << 12, 1 << 8);
  58. + vc4_crtc->pwl_g[i] =
  59. + VC5_HVS_SET_GAMMA_ENTRY(i << 8, i << 12, 1 << 8);
  60. + vc4_crtc->pwl_b[i] =
  61. + VC5_HVS_SET_GAMMA_ENTRY(i << 8, i << 12, 1 << 8);
  62. + vc4_crtc->pwl_a[i] =
  63. + VC5_HVS_SET_GAMMA_ENTRY(i << 8, i << 12, 1 << 8);
  64. + }
  65. }
  66. return 0;
  67. --- a/drivers/gpu/drm/vc4/vc4_drv.h
  68. +++ b/drivers/gpu/drm/vc4/vc4_drv.h
  69. @@ -19,6 +19,7 @@
  70. #include <drm/drm_modeset_lock.h>
  71. #include "uapi/drm/vc4_drm.h"
  72. +#include "vc4_regs.h"
  73. struct drm_device;
  74. struct drm_gem_object;
  75. @@ -481,6 +482,17 @@ struct vc4_pv_data {
  76. };
  77. +struct vc5_gamma_entry {
  78. + u32 x_c_terms;
  79. + u32 grad_term;
  80. +};
  81. +
  82. +#define VC5_HVS_SET_GAMMA_ENTRY(x, c, g) (struct vc5_gamma_entry){ \
  83. + .x_c_terms = VC4_SET_FIELD((x), SCALER5_DSPGAMMA_OFF_X) | \
  84. + VC4_SET_FIELD((c), SCALER5_DSPGAMMA_OFF_C), \
  85. + .grad_term = (g) \
  86. +}
  87. +
  88. struct vc4_crtc {
  89. struct drm_crtc base;
  90. struct platform_device *pdev;
  91. @@ -490,9 +502,19 @@ struct vc4_crtc {
  92. /* Timestamp at start of vblank irq - unaffected by lock delays. */
  93. ktime_t t_vblank;
  94. - u8 lut_r[256];
  95. - u8 lut_g[256];
  96. - u8 lut_b[256];
  97. + union {
  98. + struct { /* VC4 gamma LUT */
  99. + u8 lut_r[256];
  100. + u8 lut_g[256];
  101. + u8 lut_b[256];
  102. + };
  103. + struct { /* VC5 gamma PWL entries */
  104. + struct vc5_gamma_entry pwl_r[SCALER5_DSPGAMMA_NUM_POINTS];
  105. + struct vc5_gamma_entry pwl_g[SCALER5_DSPGAMMA_NUM_POINTS];
  106. + struct vc5_gamma_entry pwl_b[SCALER5_DSPGAMMA_NUM_POINTS];
  107. + struct vc5_gamma_entry pwl_a[SCALER5_DSPGAMMA_NUM_POINTS];
  108. + };
  109. + };
  110. struct drm_pending_vblank_event *event;
  111. --- a/drivers/gpu/drm/vc4/vc4_hvs.c
  112. +++ b/drivers/gpu/drm/vc4/vc4_hvs.c
  113. @@ -259,6 +259,80 @@ u8 vc4_hvs_get_fifo_frame_count(struct d
  114. return field;
  115. }
  116. +static void vc5_hvs_write_gamma_entry(struct vc4_dev *vc4,
  117. + u32 offset,
  118. + struct vc5_gamma_entry *gamma)
  119. +{
  120. + HVS_WRITE(offset, gamma->x_c_terms);
  121. + HVS_WRITE(offset + 4, gamma->grad_term);
  122. +}
  123. +
  124. +static void vc5_hvs_lut_load(struct drm_crtc *crtc)
  125. +{
  126. + struct drm_device *dev = crtc->dev;
  127. + struct vc4_dev *vc4 = to_vc4_dev(dev);
  128. + struct vc4_crtc *vc4_crtc = to_vc4_crtc(crtc);
  129. + struct vc4_crtc_state *vc4_state = to_vc4_crtc_state(crtc->state);
  130. + u32 i;
  131. + u32 offset = SCALER5_DSPGAMMA_START +
  132. + vc4_state->assigned_channel * SCALER5_DSPGAMMA_CHAN_OFFSET;
  133. +
  134. + for (i = 0; i < SCALER5_DSPGAMMA_NUM_POINTS; i++, offset += 8)
  135. + vc5_hvs_write_gamma_entry(vc4, offset, &vc4_crtc->pwl_r[i]);
  136. + for (i = 0; i < SCALER5_DSPGAMMA_NUM_POINTS; i++, offset += 8)
  137. + vc5_hvs_write_gamma_entry(vc4, offset, &vc4_crtc->pwl_g[i]);
  138. + for (i = 0; i < SCALER5_DSPGAMMA_NUM_POINTS; i++, offset += 8)
  139. + vc5_hvs_write_gamma_entry(vc4, offset, &vc4_crtc->pwl_b[i]);
  140. +
  141. + if (vc4_state->assigned_channel == 2) {
  142. + /* Alpha only valid on channel 2 */
  143. + for (i = 0; i < SCALER5_DSPGAMMA_NUM_POINTS; i++, offset += 8)
  144. + vc5_hvs_write_gamma_entry(vc4, offset, &vc4_crtc->pwl_a[i]);
  145. + }
  146. +}
  147. +
  148. +static void vc5_hvs_update_gamma_lut(struct drm_crtc *crtc)
  149. +{
  150. + struct vc4_crtc *vc4_crtc = to_vc4_crtc(crtc);
  151. + struct drm_color_lut *lut = crtc->state->gamma_lut->data;
  152. + unsigned int step, i;
  153. + u32 start, end;
  154. +
  155. +#define VC5_HVS_UPDATE_GAMMA_ENTRY_FROM_LUT(pwl, chan) \
  156. + start = drm_color_lut_extract(lut[i * step].chan, 12); \
  157. + end = drm_color_lut_extract(lut[(i + 1) * step - 1].chan, 12); \
  158. + \
  159. + /* Negative gradients not permitted by the hardware, so \
  160. + * flatten such points out. \
  161. + */ \
  162. + if (end < start) \
  163. + end = start; \
  164. + \
  165. + /* Assume 12bit pipeline. \
  166. + * X evenly spread over full range (12 bit). \
  167. + * C as U12.4 format. \
  168. + * Gradient as U4.8 format. \
  169. + */ \
  170. + vc4_crtc->pwl[i] = \
  171. + VC5_HVS_SET_GAMMA_ENTRY(i << 8, start << 4, \
  172. + ((end - start) << 4) / (step - 1))
  173. +
  174. + /* HVS5 has a 16 point piecewise linear function for each colour
  175. + * channel (including alpha on channel 2) on each display channel.
  176. + *
  177. + * Currently take a crude subsample of the gamma LUT, but this could
  178. + * be improved to implement curve fitting.
  179. + */
  180. + step = crtc->gamma_size / SCALER5_DSPGAMMA_NUM_POINTS;
  181. + for (i = 0; i < SCALER5_DSPGAMMA_NUM_POINTS; i++) {
  182. + VC5_HVS_UPDATE_GAMMA_ENTRY_FROM_LUT(pwl_r, red);
  183. + VC5_HVS_UPDATE_GAMMA_ENTRY_FROM_LUT(pwl_g, green);
  184. + VC5_HVS_UPDATE_GAMMA_ENTRY_FROM_LUT(pwl_b, blue);
  185. + }
  186. +
  187. + vc5_hvs_lut_load(crtc);
  188. +}
  189. +
  190. int vc4_hvs_get_fifo_from_output(struct drm_device *dev, unsigned int output)
  191. {
  192. struct vc4_dev *vc4 = to_vc4_dev(dev);
  193. @@ -352,14 +426,16 @@ static int vc4_hvs_init_channel(struct v
  194. dispbkgndx &= ~SCALER_DISPBKGND_INTERLACE;
  195. HVS_WRITE(SCALER_DISPBKGNDX(chan), dispbkgndx |
  196. - SCALER_DISPBKGND_AUTOHS |
  197. - ((!vc4->hvs->hvs5) ? SCALER_DISPBKGND_GAMMA : 0) |
  198. + SCALER_DISPBKGND_AUTOHS | SCALER_DISPBKGND_GAMMA |
  199. (interlace ? SCALER_DISPBKGND_INTERLACE : 0));
  200. /* Reload the LUT, since the SRAMs would have been disabled if
  201. * all CRTCs had SCALER_DISPBKGND_GAMMA unset at once.
  202. */
  203. - vc4_hvs_lut_load(crtc);
  204. + if (!vc4->hvs->hvs5)
  205. + vc4_hvs_lut_load(crtc);
  206. + else
  207. + vc5_hvs_lut_load(crtc);
  208. return 0;
  209. }
  210. @@ -557,7 +633,10 @@ void vc4_hvs_atomic_flush(struct drm_crt
  211. u32 dispbkgndx = HVS_READ(SCALER_DISPBKGNDX(vc4_state->assigned_channel));
  212. if (crtc->state->gamma_lut) {
  213. - vc4_hvs_update_gamma_lut(crtc);
  214. + if (!vc4->hvs->hvs5)
  215. + vc4_hvs_update_gamma_lut(crtc);
  216. + else
  217. + vc5_hvs_update_gamma_lut(crtc);
  218. dispbkgndx |= SCALER_DISPBKGND_GAMMA;
  219. } else {
  220. /* Unsetting DISPBKGND_GAMMA skips the gamma lut step
  221. --- a/drivers/gpu/drm/vc4/vc4_regs.h
  222. +++ b/drivers/gpu/drm/vc4/vc4_regs.h
  223. @@ -505,6 +505,28 @@
  224. #define SCALER_DLIST_START 0x00002000
  225. #define SCALER_DLIST_SIZE 0x00004000
  226. +/* Gamma PWL for each channel. 16 points for each of 4 colour channels (alpha
  227. + * only on channel 2). 8 bytes per entry, offsets first, then gradient:
  228. + * Y = GRAD * X + C
  229. + *
  230. + * Values for X and C are left justified, and vary depending on the width of
  231. + * the HVS channel:
  232. + * 8-bit pipeline: X uses [31:24], C is U8.8 format, and GRAD is U4.8.
  233. + * 12-bit pipeline: X uses [31:20], C is U12.4 format, and GRAD is U4.8.
  234. + *
  235. + * The 3 HVS channels start at 0x400 offsets (ie chan 1 starts at 0x2400, and
  236. + * chan 2 at 0x2800).
  237. + */
  238. +#define SCALER5_DSPGAMMA_NUM_POINTS 16
  239. +#define SCALER5_DSPGAMMA_START 0x00002000
  240. +#define SCALER5_DSPGAMMA_CHAN_OFFSET 0x400
  241. +# define SCALER5_DSPGAMMA_OFF_X_MASK VC4_MASK(31, 20)
  242. +# define SCALER5_DSPGAMMA_OFF_X_SHIFT 20
  243. +# define SCALER5_DSPGAMMA_OFF_C_MASK VC4_MASK(15, 0)
  244. +# define SCALER5_DSPGAMMA_OFF_C_SHIFT 0
  245. +# define SCALER5_DSPGAMMA_GRAD_MASK VC4_MASK(11, 0)
  246. +# define SCALER5_DSPGAMMA_GRAD_SHIFT 0
  247. +
  248. #define SCALER5_DLIST_START 0x00004000
  249. # define VC4_HDMI_SW_RESET_FORMAT_DETECT BIT(1)