format_conversion.effect 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413
  1. /******************************************************************************
  2. Copyright (C) 2014 by Hugh Bailey <[email protected]>
  3. This program is free software: you can redistribute it and/or modify
  4. it under the terms of the GNU General Public License as published by
  5. the Free Software Foundation, either version 2 of the License, or
  6. (at your option) any later version.
  7. This program is distributed in the hope that it will be useful,
  8. but WITHOUT ANY WARRANTY; without even the implied warranty of
  9. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  10. GNU General Public License for more details.
  11. You should have received a copy of the GNU General Public License
  12. along with this program. If not, see <http://www.gnu.org/licenses/>.
  13. ******************************************************************************/
  14. //#define DEBUGGING
  15. uniform float4x4 ViewProj;
  16. uniform float u_plane_offset;
  17. uniform float v_plane_offset;
  18. uniform float width;
  19. uniform float height;
  20. uniform float width_i;
  21. uniform float height_i;
  22. uniform float width_d2;
  23. uniform float height_d2;
  24. uniform float width_d2_i;
  25. uniform float height_d2_i;
  26. uniform float input_width;
  27. uniform float input_height;
  28. uniform float input_width_i;
  29. uniform float input_height_i;
  30. uniform float input_width_i_d2;
  31. uniform float input_height_i_d2;
  32. uniform int int_width;
  33. uniform int int_input_width;
  34. uniform int int_u_plane_offset;
  35. uniform int int_v_plane_offset;
  36. uniform texture2d image;
  37. sampler_state def_sampler {
  38. Filter = Linear;
  39. AddressU = Clamp;
  40. AddressV = Clamp;
  41. };
  42. struct VertInOut {
  43. float4 pos : POSITION;
  44. float2 uv : TEXCOORD0;
  45. };
  46. VertInOut VSDefault(VertInOut vert_in)
  47. {
  48. VertInOut vert_out;
  49. vert_out.pos = mul(float4(vert_in.pos.xyz, 1.0), ViewProj);
  50. vert_out.uv = vert_in.uv;
  51. return vert_out;
  52. }
  53. /* used to prevent internal GPU precision issues width fmod in particular */
  54. #define PRECISION_OFFSET 0.2
  55. float4 PSNV12(VertInOut vert_in) : TARGET
  56. {
  57. float v_mul = floor(vert_in.uv.y * input_height);
  58. float byte_offset = floor((v_mul + vert_in.uv.x) * width) * 4.0;
  59. byte_offset += PRECISION_OFFSET;
  60. float2 sample_pos[4];
  61. if (byte_offset < u_plane_offset) {
  62. #ifdef DEBUGGING
  63. return float4(1.0, 1.0, 1.0, 1.0);
  64. #endif
  65. float lum_u = floor(fmod(byte_offset, width)) * width_i;
  66. float lum_v = floor(byte_offset * width_i) * height_i;
  67. /* move to texel centers to sample the 4 pixels properly */
  68. lum_u += width_i * 0.5;
  69. lum_v += height_i * 0.5;
  70. sample_pos[0] = float2(lum_u, lum_v);
  71. sample_pos[1] = float2(lum_u += width_i, lum_v);
  72. sample_pos[2] = float2(lum_u += width_i, lum_v);
  73. sample_pos[3] = float2(lum_u + width_i, lum_v);
  74. float4x4 out_val = float4x4(
  75. image.Sample(def_sampler, sample_pos[0]),
  76. image.Sample(def_sampler, sample_pos[1]),
  77. image.Sample(def_sampler, sample_pos[2]),
  78. image.Sample(def_sampler, sample_pos[3])
  79. );
  80. return transpose(out_val)[1];
  81. } else {
  82. #ifdef DEBUGGING
  83. return float4(0.5, 0.2, 0.5, 0.2);
  84. #endif
  85. float new_offset = byte_offset - u_plane_offset;
  86. float ch_u = floor(fmod(new_offset, width)) * width_i;
  87. float ch_v = floor(new_offset * width_i) * height_d2_i;
  88. float width_i2 = width_i*2.0;
  89. /* move to the borders of each set of 4 pixels to force it
  90. * to do bilinear averaging */
  91. ch_u += width_i;
  92. ch_v += height_i;
  93. sample_pos[0] = float2(ch_u, ch_v);
  94. sample_pos[1] = float2(ch_u + width_i2, ch_v);
  95. return float4(
  96. image.Sample(def_sampler, sample_pos[0]).rb,
  97. image.Sample(def_sampler, sample_pos[1]).rb
  98. );
  99. }
  100. }
  101. float PSNV12_Y(VertInOut vert_in) : TARGET
  102. {
  103. return image.Sample(def_sampler, vert_in.uv.xy).y;
  104. }
  105. float2 PSNV12_UV(VertInOut vert_in) : TARGET
  106. {
  107. return image.Sample(def_sampler, vert_in.uv.xy).xz;
  108. }
  109. float4 PSPlanar420(VertInOut vert_in) : TARGET
  110. {
  111. float v_mul = floor(vert_in.uv.y * input_height);
  112. float byte_offset = floor((v_mul + vert_in.uv.x) * width) * 4.0;
  113. byte_offset += PRECISION_OFFSET;
  114. float2 sample_pos[4];
  115. if (byte_offset < u_plane_offset) {
  116. #ifdef DEBUGGING
  117. return float4(1.0, 1.0, 1.0, 1.0);
  118. #endif
  119. float lum_u = floor(fmod(byte_offset, width)) * width_i;
  120. float lum_v = floor(byte_offset * width_i) * height_i;
  121. /* move to texel centers to sample the 4 pixels properly */
  122. lum_u += width_i * 0.5;
  123. lum_v += height_i * 0.5;
  124. sample_pos[0] = float2(lum_u, lum_v);
  125. sample_pos[1] = float2(lum_u += width_i, lum_v);
  126. sample_pos[2] = float2(lum_u += width_i, lum_v);
  127. sample_pos[3] = float2(lum_u + width_i, lum_v);
  128. } else {
  129. #ifdef DEBUGGING
  130. return ((byte_offset < v_plane_offset) ?
  131. float4(0.5, 0.5, 0.5, 0.5) :
  132. float4(0.2, 0.2, 0.2, 0.2));
  133. #endif
  134. float new_offset = byte_offset -
  135. ((byte_offset < v_plane_offset) ?
  136. u_plane_offset : v_plane_offset);
  137. float ch_u = floor(fmod(new_offset, width_d2)) * width_d2_i;
  138. float ch_v = floor(new_offset * width_d2_i) * height_d2_i;
  139. float width_i2 = width_i*2.0;
  140. /* move to the borders of each set of 4 pixels to force it
  141. * to do bilinear averaging */
  142. ch_u += width_i;
  143. ch_v += height_i;
  144. /* set up coordinates for next chroma line, in case
  145. * (width / 2) % 4 == 2, i.e. the current set of 4 pixels is split
  146. * between the current and the next chroma line; do note that the next
  147. * chroma line is two source lines below the current source line */
  148. float ch_u_n = 0. + width_i;
  149. float ch_v_n = ch_v + height_i * 3;
  150. sample_pos[0] = float2(ch_u, ch_v);
  151. sample_pos[1] = float2(ch_u += width_i2, ch_v);
  152. ch_u += width_i2;
  153. // check if ch_u overflowed the current source and chroma line
  154. if (ch_u > 1.0) {
  155. sample_pos[2] = float2(ch_u_n, ch_v_n);
  156. sample_pos[2] = float2(ch_u_n + width_i2, ch_v_n);
  157. } else {
  158. sample_pos[2] = float2(ch_u, ch_v);
  159. sample_pos[3] = float2(ch_u + width_i2, ch_v);
  160. }
  161. }
  162. float4x4 out_val = float4x4(
  163. image.Sample(def_sampler, sample_pos[0]),
  164. image.Sample(def_sampler, sample_pos[1]),
  165. image.Sample(def_sampler, sample_pos[2]),
  166. image.Sample(def_sampler, sample_pos[3])
  167. );
  168. out_val = transpose(out_val);
  169. if (byte_offset < u_plane_offset)
  170. return out_val[1];
  171. else if (byte_offset < v_plane_offset)
  172. return out_val[0];
  173. else
  174. return out_val[2];
  175. }
  176. float4 PSPlanar444(VertInOut vert_in) : TARGET
  177. {
  178. float v_mul = floor(vert_in.uv.y * input_height);
  179. float byte_offset = floor((v_mul + vert_in.uv.x) * width) * 4.0;
  180. byte_offset += PRECISION_OFFSET;
  181. float new_byte_offset = byte_offset;
  182. if (byte_offset >= v_plane_offset)
  183. new_byte_offset -= v_plane_offset;
  184. else if (byte_offset >= u_plane_offset)
  185. new_byte_offset -= u_plane_offset;
  186. float2 sample_pos[4];
  187. float u_val = floor(fmod(new_byte_offset, width)) * width_i;
  188. float v_val = floor(new_byte_offset * width_i) * height_i;
  189. /* move to texel centers to sample the 4 pixels properly */
  190. u_val += width_i * 0.5;
  191. v_val += height_i * 0.5;
  192. sample_pos[0] = float2(u_val, v_val);
  193. sample_pos[1] = float2(u_val += width_i, v_val);
  194. sample_pos[2] = float2(u_val += width_i, v_val);
  195. sample_pos[3] = float2(u_val + width_i, v_val);
  196. float4x4 out_val = float4x4(
  197. image.Sample(def_sampler, sample_pos[0]),
  198. image.Sample(def_sampler, sample_pos[1]),
  199. image.Sample(def_sampler, sample_pos[2]),
  200. image.Sample(def_sampler, sample_pos[3])
  201. );
  202. out_val = transpose(out_val);
  203. if (byte_offset < u_plane_offset)
  204. return out_val[1];
  205. else if (byte_offset < v_plane_offset)
  206. return out_val[0];
  207. else
  208. return out_val[2];
  209. }
  210. float GetIntOffsetColor(int offset)
  211. {
  212. return image.Load(int3(offset % int_input_width,
  213. offset / int_input_width,
  214. 0)).r;
  215. }
  216. float4 PSPacked422_Reverse(VertInOut vert_in, int u_pos, int v_pos,
  217. int y0_pos, int y1_pos) : TARGET
  218. {
  219. float y = vert_in.uv.y;
  220. float odd = floor(fmod(width * vert_in.uv.x + PRECISION_OFFSET, 2.0));
  221. float x = floor(width_d2 * vert_in.uv.x + PRECISION_OFFSET) *
  222. width_d2_i;
  223. x += input_width_i_d2;
  224. float4 texel = image.Sample(def_sampler, float2(x, y));
  225. return float4(odd > 0.5 ? texel[y1_pos] : texel[y0_pos],
  226. texel[u_pos], texel[v_pos], 1.0);
  227. }
  228. float4 PSPlanar420_Reverse(VertInOut vert_in) : TARGET
  229. {
  230. int x = int(vert_in.uv.x * width + PRECISION_OFFSET);
  231. int y = int(vert_in.uv.y * height + PRECISION_OFFSET);
  232. int lum_offset = y * int_width + x;
  233. int chroma_offset = (y / 2) * (int_width / 2) + x / 2;
  234. int chroma1 = int_u_plane_offset + chroma_offset;
  235. int chroma2 = int_v_plane_offset + chroma_offset;
  236. return float4(
  237. GetIntOffsetColor(lum_offset),
  238. GetIntOffsetColor(chroma1),
  239. GetIntOffsetColor(chroma2),
  240. 1.0
  241. );
  242. }
  243. float4 PSNV12_Reverse(VertInOut vert_in) : TARGET
  244. {
  245. int x = int(vert_in.uv.x * width + PRECISION_OFFSET);
  246. int y = int(vert_in.uv.y * height + PRECISION_OFFSET);
  247. int lum_offset = y * int_width + x;
  248. int chroma_offset = (y / 2) * (int_width / 2) + x / 2;
  249. int chroma = int_u_plane_offset + chroma_offset * 2;
  250. return float4(
  251. GetIntOffsetColor(lum_offset),
  252. GetIntOffsetColor(chroma),
  253. GetIntOffsetColor(chroma + 1),
  254. 1.0
  255. );
  256. }
  257. technique Planar420
  258. {
  259. pass
  260. {
  261. vertex_shader = VSDefault(vert_in);
  262. pixel_shader = PSPlanar420(vert_in);
  263. }
  264. }
  265. technique Planar444
  266. {
  267. pass
  268. {
  269. vertex_shader = VSDefault(vert_in);
  270. pixel_shader = PSPlanar444(vert_in);
  271. }
  272. }
  273. technique NV12
  274. {
  275. pass
  276. {
  277. vertex_shader = VSDefault(vert_in);
  278. pixel_shader = PSNV12(vert_in);
  279. }
  280. }
  281. technique NV12_Y
  282. {
  283. pass
  284. {
  285. vertex_shader = VSDefault(vert_in);
  286. pixel_shader = PSNV12_Y(vert_in);
  287. }
  288. }
  289. technique NV12_UV
  290. {
  291. pass
  292. {
  293. vertex_shader = VSDefault(vert_in);
  294. pixel_shader = PSNV12_UV(vert_in);
  295. }
  296. }
  297. technique UYVY_Reverse
  298. {
  299. pass
  300. {
  301. vertex_shader = VSDefault(vert_in);
  302. pixel_shader = PSPacked422_Reverse(vert_in, 2, 0, 1, 3);
  303. }
  304. }
  305. technique YUY2_Reverse
  306. {
  307. pass
  308. {
  309. vertex_shader = VSDefault(vert_in);
  310. pixel_shader = PSPacked422_Reverse(vert_in, 1, 3, 2, 0);
  311. }
  312. }
  313. technique YVYU_Reverse
  314. {
  315. pass
  316. {
  317. vertex_shader = VSDefault(vert_in);
  318. pixel_shader = PSPacked422_Reverse(vert_in, 3, 1, 2, 0);
  319. }
  320. }
  321. technique I420_Reverse
  322. {
  323. pass
  324. {
  325. vertex_shader = VSDefault(vert_in);
  326. pixel_shader = PSPlanar420_Reverse(vert_in);
  327. }
  328. }
  329. technique NV12_Reverse
  330. {
  331. pass
  332. {
  333. vertex_shader = VSDefault(vert_in);
  334. pixel_shader = PSNV12_Reverse(vert_in);
  335. }
  336. }