format_conversion.effect 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505
  1. /******************************************************************************
  2. Copyright (C) 2014 by Hugh Bailey <[email protected]>
  3. This program is free software: you can redistribute it and/or modify
  4. it under the terms of the GNU General Public License as published by
  5. the Free Software Foundation, either version 2 of the License, or
  6. (at your option) any later version.
  7. This program is distributed in the hope that it will be useful,
  8. but WITHOUT ANY WARRANTY; without even the implied warranty of
  9. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  10. GNU General Public License for more details.
  11. You should have received a copy of the GNU General Public License
  12. along with this program. If not, see <http://www.gnu.org/licenses/>.
  13. ******************************************************************************/
  14. //#define DEBUGGING
  15. uniform float4x4 ViewProj;
  16. uniform float u_plane_offset;
  17. uniform float v_plane_offset;
  18. uniform float width;
  19. uniform float height;
  20. uniform float width_i;
  21. uniform float height_i;
  22. uniform float width_d2;
  23. uniform float height_d2;
  24. uniform float width_d2_i;
  25. uniform float height_d2_i;
  26. uniform float input_width;
  27. uniform float input_height;
  28. uniform float input_width_i;
  29. uniform float input_height_i;
  30. uniform float input_width_i_d2;
  31. uniform float input_height_i_d2;
  32. uniform int int_width;
  33. uniform int int_input_width;
  34. uniform int int_u_plane_offset;
  35. uniform int int_v_plane_offset;
  36. uniform float4x4 color_matrix;
  37. uniform float3 color_range_min = {0.0, 0.0, 0.0};
  38. uniform float3 color_range_max = {1.0, 1.0, 1.0};
  39. uniform texture2d image;
  40. sampler_state def_sampler {
  41. Filter = Linear;
  42. AddressU = Clamp;
  43. AddressV = Clamp;
  44. };
  45. struct VertInOut {
  46. float4 pos : POSITION;
  47. float2 uv : TEXCOORD0;
  48. };
  49. VertInOut VSDefault(VertInOut vert_in)
  50. {
  51. VertInOut vert_out;
  52. vert_out.pos = mul(float4(vert_in.pos.xyz, 1.0), ViewProj);
  53. vert_out.uv = vert_in.uv;
  54. return vert_out;
  55. }
  56. /* used to prevent internal GPU precision issues width fmod in particular */
  57. #define PRECISION_OFFSET 0.2
  58. float4 PSNV12(VertInOut vert_in) : TARGET
  59. {
  60. float v_mul = floor(vert_in.uv.y * input_height);
  61. float byte_offset = floor((v_mul + vert_in.uv.x) * width) * 4.0;
  62. byte_offset += PRECISION_OFFSET;
  63. float2 sample_pos[4];
  64. if (byte_offset < u_plane_offset) {
  65. #ifdef DEBUGGING
  66. return float4(1.0, 1.0, 1.0, 1.0);
  67. #endif
  68. float lum_u = floor(fmod(byte_offset, width)) * width_i;
  69. float lum_v = floor(byte_offset * width_i) * height_i;
  70. /* move to texel centers to sample the 4 pixels properly */
  71. lum_u += width_i * 0.5;
  72. lum_v += height_i * 0.5;
  73. sample_pos[0] = float2(lum_u, lum_v);
  74. sample_pos[1] = float2(lum_u += width_i, lum_v);
  75. sample_pos[2] = float2(lum_u += width_i, lum_v);
  76. sample_pos[3] = float2(lum_u + width_i, lum_v);
  77. float4x4 out_val = float4x4(
  78. image.Sample(def_sampler, sample_pos[0]),
  79. image.Sample(def_sampler, sample_pos[1]),
  80. image.Sample(def_sampler, sample_pos[2]),
  81. image.Sample(def_sampler, sample_pos[3])
  82. );
  83. return transpose(out_val)[1];
  84. } else {
  85. #ifdef DEBUGGING
  86. return float4(0.5, 0.2, 0.5, 0.2);
  87. #endif
  88. float new_offset = byte_offset - u_plane_offset;
  89. float ch_u = floor(fmod(new_offset, width)) * width_i;
  90. float ch_v = floor(new_offset * width_i) * height_d2_i;
  91. float width_i2 = width_i*2.0;
  92. /* move to the borders of each set of 4 pixels to force it
  93. * to do bilinear averaging */
  94. ch_u += width_i;
  95. ch_v += height_i;
  96. sample_pos[0] = float2(ch_u, ch_v);
  97. sample_pos[1] = float2(ch_u + width_i2, ch_v);
  98. return float4(
  99. image.Sample(def_sampler, sample_pos[0]).rb,
  100. image.Sample(def_sampler, sample_pos[1]).rb
  101. );
  102. }
  103. }
  104. float PSNV12_Y(VertInOut vert_in) : TARGET
  105. {
  106. return image.Sample(def_sampler, vert_in.uv.xy).y;
  107. }
  108. float2 PSNV12_UV(VertInOut vert_in) : TARGET
  109. {
  110. return image.Sample(def_sampler, vert_in.uv.xy).xz;
  111. }
  112. float4 PSPlanar420(VertInOut vert_in) : TARGET
  113. {
  114. float v_mul = floor(vert_in.uv.y * input_height);
  115. float byte_offset = floor((v_mul + vert_in.uv.x) * width) * 4.0;
  116. byte_offset += PRECISION_OFFSET;
  117. float2 sample_pos[4];
  118. if (byte_offset < u_plane_offset) {
  119. #ifdef DEBUGGING
  120. return float4(1.0, 1.0, 1.0, 1.0);
  121. #endif
  122. float lum_u = floor(fmod(byte_offset, width)) * width_i;
  123. float lum_v = floor(byte_offset * width_i) * height_i;
  124. /* move to texel centers to sample the 4 pixels properly */
  125. lum_u += width_i * 0.5;
  126. lum_v += height_i * 0.5;
  127. sample_pos[0] = float2(lum_u, lum_v);
  128. sample_pos[1] = float2(lum_u += width_i, lum_v);
  129. sample_pos[2] = float2(lum_u += width_i, lum_v);
  130. sample_pos[3] = float2(lum_u + width_i, lum_v);
  131. } else {
  132. #ifdef DEBUGGING
  133. return ((byte_offset < v_plane_offset) ?
  134. float4(0.5, 0.5, 0.5, 0.5) :
  135. float4(0.2, 0.2, 0.2, 0.2));
  136. #endif
  137. float new_offset = byte_offset -
  138. ((byte_offset < v_plane_offset) ?
  139. u_plane_offset : v_plane_offset);
  140. float ch_u = floor(fmod(new_offset, width_d2)) * width_d2_i;
  141. float ch_v = floor(new_offset * width_d2_i) * height_d2_i;
  142. float width_i2 = width_i*2.0;
  143. /* move to the borders of each set of 4 pixels to force it
  144. * to do bilinear averaging */
  145. ch_u += width_i;
  146. ch_v += height_i;
  147. /* set up coordinates for next chroma line, in case
  148. * (width / 2) % 4 == 2, i.e. the current set of 4 pixels is split
  149. * between the current and the next chroma line; do note that the next
  150. * chroma line is two source lines below the current source line */
  151. float ch_u_n = 0. + width_i;
  152. float ch_v_n = ch_v + height_i * 3;
  153. sample_pos[0] = float2(ch_u, ch_v);
  154. sample_pos[1] = float2(ch_u += width_i2, ch_v);
  155. ch_u += width_i2;
  156. // check if ch_u overflowed the current source and chroma line
  157. if (ch_u > 1.0) {
  158. sample_pos[2] = float2(ch_u_n, ch_v_n);
  159. sample_pos[2] = float2(ch_u_n + width_i2, ch_v_n);
  160. } else {
  161. sample_pos[2] = float2(ch_u, ch_v);
  162. sample_pos[3] = float2(ch_u + width_i2, ch_v);
  163. }
  164. }
  165. float4x4 out_val = float4x4(
  166. image.Sample(def_sampler, sample_pos[0]),
  167. image.Sample(def_sampler, sample_pos[1]),
  168. image.Sample(def_sampler, sample_pos[2]),
  169. image.Sample(def_sampler, sample_pos[3])
  170. );
  171. out_val = transpose(out_val);
  172. if (byte_offset < u_plane_offset)
  173. return out_val[1];
  174. else if (byte_offset < v_plane_offset)
  175. return out_val[0];
  176. else
  177. return out_val[2];
  178. }
  179. float4 PSPlanar444(VertInOut vert_in) : TARGET
  180. {
  181. float v_mul = floor(vert_in.uv.y * input_height);
  182. float byte_offset = floor((v_mul + vert_in.uv.x) * width) * 4.0;
  183. byte_offset += PRECISION_OFFSET;
  184. float new_byte_offset = byte_offset;
  185. if (byte_offset >= v_plane_offset)
  186. new_byte_offset -= v_plane_offset;
  187. else if (byte_offset >= u_plane_offset)
  188. new_byte_offset -= u_plane_offset;
  189. float2 sample_pos[4];
  190. float u_val = floor(fmod(new_byte_offset, width)) * width_i;
  191. float v_val = floor(new_byte_offset * width_i) * height_i;
  192. /* move to texel centers to sample the 4 pixels properly */
  193. u_val += width_i * 0.5;
  194. v_val += height_i * 0.5;
  195. sample_pos[0] = float2(u_val, v_val);
  196. sample_pos[1] = float2(u_val += width_i, v_val);
  197. sample_pos[2] = float2(u_val += width_i, v_val);
  198. sample_pos[3] = float2(u_val + width_i, v_val);
  199. float4x4 out_val = float4x4(
  200. image.Sample(def_sampler, sample_pos[0]),
  201. image.Sample(def_sampler, sample_pos[1]),
  202. image.Sample(def_sampler, sample_pos[2]),
  203. image.Sample(def_sampler, sample_pos[3])
  204. );
  205. out_val = transpose(out_val);
  206. if (byte_offset < u_plane_offset)
  207. return out_val[1];
  208. else if (byte_offset < v_plane_offset)
  209. return out_val[0];
  210. else
  211. return out_val[2];
  212. }
  213. float GetIntOffsetColor(int offset)
  214. {
  215. return image.Load(int3(offset % int_input_width,
  216. offset / int_input_width,
  217. 0)).r;
  218. }
  219. float4 PSPacked422_Reverse(VertInOut vert_in, int u_pos, int v_pos,
  220. int y0_pos, int y1_pos) : TARGET
  221. {
  222. float y = vert_in.uv.y;
  223. float odd = floor(fmod(width * vert_in.uv.x + PRECISION_OFFSET, 2.0));
  224. float x = floor(width_d2 * vert_in.uv.x + PRECISION_OFFSET) *
  225. width_d2_i;
  226. x += input_width_i_d2;
  227. float4 texel = image.Sample(def_sampler, float2(x, y));
  228. float3 yuv = float3(odd > 0.5 ? texel[y1_pos] : texel[y0_pos],
  229. texel[u_pos], texel[v_pos]);
  230. yuv = clamp(yuv, color_range_min, color_range_max);
  231. return saturate(mul(float4(yuv, 1.0), color_matrix));
  232. }
  233. float4 PSPlanar420_Reverse(VertInOut vert_in) : TARGET
  234. {
  235. int x = int(vert_in.uv.x * width + PRECISION_OFFSET);
  236. int y = int(vert_in.uv.y * height + PRECISION_OFFSET);
  237. int lum_offset = y * int_width + x;
  238. int chroma_offset = (y / 2) * (int_width / 2) + x / 2;
  239. int chroma1 = int_u_plane_offset + chroma_offset;
  240. int chroma2 = int_v_plane_offset + chroma_offset;
  241. float3 yuv = float3(
  242. GetIntOffsetColor(lum_offset),
  243. GetIntOffsetColor(chroma1),
  244. GetIntOffsetColor(chroma2)
  245. );
  246. yuv = clamp(yuv, color_range_min, color_range_max);
  247. return saturate(mul(float4(yuv, 1.0), color_matrix));
  248. }
  249. float4 PSPlanar444_Reverse(VertInOut vert_in) : TARGET
  250. {
  251. int x = int(vert_in.uv.x * width + PRECISION_OFFSET);
  252. int y = int(vert_in.uv.y * height + PRECISION_OFFSET);
  253. int lum_offset = y * int_width + x;
  254. int chroma_offset = y * int_width + x;
  255. int chroma1 = int_u_plane_offset + chroma_offset;
  256. int chroma2 = int_v_plane_offset + chroma_offset;
  257. float3 yuv = float3(
  258. GetIntOffsetColor(lum_offset),
  259. GetIntOffsetColor(chroma1),
  260. GetIntOffsetColor(chroma2)
  261. );
  262. yuv = clamp(yuv, color_range_min, color_range_max);
  263. return saturate(mul(float4(yuv, 1.0), color_matrix));
  264. }
  265. float4 PSNV12_Reverse(VertInOut vert_in) : TARGET
  266. {
  267. int x = int(vert_in.uv.x * width + PRECISION_OFFSET);
  268. int y = int(vert_in.uv.y * height + PRECISION_OFFSET);
  269. int lum_offset = y * int_width + x;
  270. int chroma_offset = (y / 2) * (int_width / 2) + x / 2;
  271. int chroma = int_u_plane_offset + chroma_offset * 2;
  272. float3 yuv = float3(
  273. GetIntOffsetColor(lum_offset),
  274. GetIntOffsetColor(chroma),
  275. GetIntOffsetColor(chroma + 1)
  276. );
  277. yuv = clamp(yuv, color_range_min, color_range_max);
  278. return saturate(mul(float4(yuv, 1.0), color_matrix));
  279. }
  280. float4 PSY800_Limited(VertInOut vert_in) : TARGET
  281. {
  282. int x = int(vert_in.uv.x * width + PRECISION_OFFSET);
  283. int y = int(vert_in.uv.y * height + PRECISION_OFFSET);
  284. float limited = image.Load(int3(x, y, 0)).x;
  285. float full = saturate((limited - (16.0 / 255.0)) * (255.0 / 219.0));
  286. return float4(full, full, full, 1.0);
  287. }
  288. float4 PSY800_Full(VertInOut vert_in) : TARGET
  289. {
  290. int x = int(vert_in.uv.x * width + PRECISION_OFFSET);
  291. int y = int(vert_in.uv.y * height + PRECISION_OFFSET);
  292. float3 full = image.Load(int3(x, y, 0)).xxx;
  293. return float4(full, 1.0);
  294. }
  295. float4 PSRGB_Limited(VertInOut vert_in) : TARGET
  296. {
  297. int x = int(vert_in.uv.x * width + PRECISION_OFFSET);
  298. int y = int(vert_in.uv.y * height + PRECISION_OFFSET);
  299. float4 rgba = image.Load(int3(x, y, 0));
  300. rgba.rgb = saturate((rgba.rgb - (16.0 / 255.0)) * (255.0 / 219.0));
  301. return rgba;
  302. }
  303. technique Planar420
  304. {
  305. pass
  306. {
  307. vertex_shader = VSDefault(vert_in);
  308. pixel_shader = PSPlanar420(vert_in);
  309. }
  310. }
  311. technique Planar444
  312. {
  313. pass
  314. {
  315. vertex_shader = VSDefault(vert_in);
  316. pixel_shader = PSPlanar444(vert_in);
  317. }
  318. }
  319. technique NV12
  320. {
  321. pass
  322. {
  323. vertex_shader = VSDefault(vert_in);
  324. pixel_shader = PSNV12(vert_in);
  325. }
  326. }
  327. technique NV12_Y
  328. {
  329. pass
  330. {
  331. vertex_shader = VSDefault(vert_in);
  332. pixel_shader = PSNV12_Y(vert_in);
  333. }
  334. }
  335. technique NV12_UV
  336. {
  337. pass
  338. {
  339. vertex_shader = VSDefault(vert_in);
  340. pixel_shader = PSNV12_UV(vert_in);
  341. }
  342. }
  343. technique UYVY_Reverse
  344. {
  345. pass
  346. {
  347. vertex_shader = VSDefault(vert_in);
  348. pixel_shader = PSPacked422_Reverse(vert_in, 2, 0, 1, 3);
  349. }
  350. }
  351. technique YUY2_Reverse
  352. {
  353. pass
  354. {
  355. vertex_shader = VSDefault(vert_in);
  356. pixel_shader = PSPacked422_Reverse(vert_in, 1, 3, 2, 0);
  357. }
  358. }
  359. technique YVYU_Reverse
  360. {
  361. pass
  362. {
  363. vertex_shader = VSDefault(vert_in);
  364. pixel_shader = PSPacked422_Reverse(vert_in, 3, 1, 2, 0);
  365. }
  366. }
  367. technique I420_Reverse
  368. {
  369. pass
  370. {
  371. vertex_shader = VSDefault(vert_in);
  372. pixel_shader = PSPlanar420_Reverse(vert_in);
  373. }
  374. }
  375. technique I444_Reverse
  376. {
  377. pass
  378. {
  379. vertex_shader = VSDefault(vert_in);
  380. pixel_shader = PSPlanar444_Reverse(vert_in);
  381. }
  382. }
  383. technique NV12_Reverse
  384. {
  385. pass
  386. {
  387. vertex_shader = VSDefault(vert_in);
  388. pixel_shader = PSNV12_Reverse(vert_in);
  389. }
  390. }
  391. technique Y800_Limited
  392. {
  393. pass
  394. {
  395. vertex_shader = VSDefault(vert_in);
  396. pixel_shader = PSY800_Limited(vert_in);
  397. }
  398. }
  399. technique Y800_Full
  400. {
  401. pass
  402. {
  403. vertex_shader = VSDefault(vert_in);
  404. pixel_shader = PSY800_Full(vert_in);
  405. }
  406. }
  407. technique RGB_Limited
  408. {
  409. pass
  410. {
  411. vertex_shader = VSDefault(vert_in);
  412. pixel_shader = PSRGB_Limited(vert_in);
  413. }
  414. }