format_conversion.effect 9.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369
  1. /******************************************************************************
  2. Copyright (C) 2014 by Hugh Bailey <[email protected]>
  3. This program is free software: you can redistribute it and/or modify
  4. it under the terms of the GNU General Public License as published by
  5. the Free Software Foundation, either version 2 of the License, or
  6. (at your option) any later version.
  7. This program is distributed in the hope that it will be useful,
  8. but WITHOUT ANY WARRANTY; without even the implied warranty of
  9. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  10. GNU General Public License for more details.
  11. You should have received a copy of the GNU General Public License
  12. along with this program. If not, see <http://www.gnu.org/licenses/>.
  13. ******************************************************************************/
  14. //#define DEBUGGING
  15. uniform float4x4 ViewProj;
  16. uniform float u_plane_offset;
  17. uniform float v_plane_offset;
  18. uniform float width;
  19. uniform float height;
  20. uniform float width_i;
  21. uniform float height_i;
  22. uniform float width_d2;
  23. uniform float height_d2;
  24. uniform float width_d2_i;
  25. uniform float height_d2_i;
  26. uniform float input_width;
  27. uniform float input_height;
  28. uniform float input_width_i;
  29. uniform float input_height_i;
  30. uniform float input_width_i_d2;
  31. uniform float input_height_i_d2;
  32. uniform int int_width;
  33. uniform int int_input_width;
  34. uniform int int_u_plane_offset;
  35. uniform int int_v_plane_offset;
  36. uniform texture2d image;
  37. sampler_state def_sampler {
  38. Filter = Linear;
  39. AddressU = Clamp;
  40. AddressV = Clamp;
  41. };
  42. struct VertInOut {
  43. float4 pos : POSITION;
  44. float2 uv : TEXCOORD0;
  45. };
  46. VertInOut VSDefault(VertInOut vert_in)
  47. {
  48. VertInOut vert_out;
  49. vert_out.pos = mul(float4(vert_in.pos.xyz, 1.0), ViewProj);
  50. vert_out.uv = vert_in.uv;
  51. return vert_out;
  52. }
  53. /* used to prevent internal GPU precision issues width fmod in particular */
  54. #define PRECISION_OFFSET 0.2
  55. float4 PSNV12(VertInOut vert_in) : TARGET
  56. {
  57. float v_mul = floor(vert_in.uv.y * input_height);
  58. float byte_offset = floor((v_mul + vert_in.uv.x) * width) * 4.0;
  59. byte_offset += PRECISION_OFFSET;
  60. float2 sample_pos[4];
  61. if (byte_offset < u_plane_offset) {
  62. #ifdef DEBUGGING
  63. return float4(1.0, 1.0, 1.0, 1.0);
  64. #endif
  65. float lum_u = floor(fmod(byte_offset, width)) * width_i;
  66. float lum_v = floor(byte_offset * width_i) * height_i;
  67. /* move to texel centers to sample the 4 pixels properly */
  68. lum_u += width_i * 0.5;
  69. lum_v += height_i * 0.5;
  70. sample_pos[0] = float2(lum_u, lum_v);
  71. sample_pos[1] = float2(lum_u += width_i, lum_v);
  72. sample_pos[2] = float2(lum_u += width_i, lum_v);
  73. sample_pos[3] = float2(lum_u + width_i, lum_v);
  74. float4x4 out_val = float4x4(
  75. image.Sample(def_sampler, sample_pos[0]),
  76. image.Sample(def_sampler, sample_pos[1]),
  77. image.Sample(def_sampler, sample_pos[2]),
  78. image.Sample(def_sampler, sample_pos[3])
  79. );
  80. return transpose(out_val)[1];
  81. } else {
  82. #ifdef DEBUGGING
  83. return float4(0.5, 0.2, 0.5, 0.2);
  84. #endif
  85. float new_offset = byte_offset - u_plane_offset;
  86. float ch_u = floor(fmod(new_offset, width)) * width_i;
  87. float ch_v = floor(new_offset * width_i) * height_d2_i;
  88. float width_i2 = width_i*2.0;
  89. /* move to the borders of each set of 4 pixels to force it
  90. * to do bilinear averaging */
  91. ch_u += width_i;
  92. ch_v += height_i;
  93. sample_pos[0] = float2(ch_u, ch_v);
  94. sample_pos[1] = float2(ch_u + width_i2, ch_v);
  95. return float4(
  96. image.Sample(def_sampler, sample_pos[0]).rb,
  97. image.Sample(def_sampler, sample_pos[1]).rb
  98. );
  99. }
  100. }
  101. float4 PSPlanar420(VertInOut vert_in) : TARGET
  102. {
  103. float v_mul = floor(vert_in.uv.y * input_height);
  104. float byte_offset = floor((v_mul + vert_in.uv.x) * width) * 4.0;
  105. byte_offset += PRECISION_OFFSET;
  106. float2 sample_pos[4];
  107. if (byte_offset < u_plane_offset) {
  108. #ifdef DEBUGGING
  109. return float4(1.0, 1.0, 1.0, 1.0);
  110. #endif
  111. float lum_u = floor(fmod(byte_offset, width)) * width_i;
  112. float lum_v = floor(byte_offset * width_i) * height_i;
  113. /* move to texel centers to sample the 4 pixels properly */
  114. lum_u += width_i * 0.5;
  115. lum_v += height_i * 0.5;
  116. sample_pos[0] = float2(lum_u, lum_v);
  117. sample_pos[1] = float2(lum_u += width_i, lum_v);
  118. sample_pos[2] = float2(lum_u += width_i, lum_v);
  119. sample_pos[3] = float2(lum_u + width_i, lum_v);
  120. } else {
  121. #ifdef DEBUGGING
  122. return ((byte_offset < v_plane_offset) ?
  123. float4(0.5, 0.5, 0.5, 0.5) :
  124. float4(0.2, 0.2, 0.2, 0.2));
  125. #endif
  126. float new_offset = byte_offset -
  127. ((byte_offset < v_plane_offset) ?
  128. u_plane_offset : v_plane_offset);
  129. float ch_u = floor(fmod(new_offset, width_d2)) * width_d2_i;
  130. float ch_v = floor(new_offset * width_d2_i) * height_d2_i;
  131. float width_i2 = width_i*2.0;
  132. /* move to the borders of each set of 4 pixels to force it
  133. * to do bilinear averaging */
  134. ch_u += width_i;
  135. ch_v += height_i;
  136. sample_pos[0] = float2(ch_u, ch_v);
  137. sample_pos[1] = float2(ch_u += width_i2, ch_v);
  138. sample_pos[2] = float2(ch_u += width_i2, ch_v);
  139. sample_pos[3] = float2(ch_u + width_i2, ch_v);
  140. }
  141. float4x4 out_val = float4x4(
  142. image.Sample(def_sampler, sample_pos[0]),
  143. image.Sample(def_sampler, sample_pos[1]),
  144. image.Sample(def_sampler, sample_pos[2]),
  145. image.Sample(def_sampler, sample_pos[3])
  146. );
  147. out_val = transpose(out_val);
  148. if (byte_offset < u_plane_offset)
  149. return out_val[1];
  150. else if (byte_offset < v_plane_offset)
  151. return out_val[0];
  152. else
  153. return out_val[2];
  154. }
  155. float4 PSPlanar444(VertInOut vert_in) : TARGET
  156. {
  157. float v_mul = floor(vert_in.uv.y * input_height);
  158. float byte_offset = floor((v_mul + vert_in.uv.x) * width) * 4.0;
  159. byte_offset += PRECISION_OFFSET;
  160. float new_byte_offset = byte_offset;
  161. if (byte_offset >= v_plane_offset)
  162. new_byte_offset -= v_plane_offset;
  163. else if (byte_offset >= u_plane_offset)
  164. new_byte_offset -= u_plane_offset;
  165. float2 sample_pos[4];
  166. float u_val = floor(fmod(new_byte_offset, width)) * width_i;
  167. float v_val = floor(new_byte_offset * width_i) * height_i;
  168. /* move to texel centers to sample the 4 pixels properly */
  169. u_val += width_i * 0.5;
  170. v_val += height_i * 0.5;
  171. sample_pos[0] = float2(u_val, v_val);
  172. sample_pos[1] = float2(u_val += width_i, v_val);
  173. sample_pos[2] = float2(u_val += width_i, v_val);
  174. sample_pos[3] = float2(u_val + width_i, v_val);
  175. float4x4 out_val = float4x4(
  176. image.Sample(def_sampler, sample_pos[0]),
  177. image.Sample(def_sampler, sample_pos[1]),
  178. image.Sample(def_sampler, sample_pos[2]),
  179. image.Sample(def_sampler, sample_pos[3])
  180. );
  181. out_val = transpose(out_val);
  182. if (byte_offset < u_plane_offset)
  183. return out_val[1];
  184. else if (byte_offset < v_plane_offset)
  185. return out_val[0];
  186. else
  187. return out_val[2];
  188. }
  189. float GetIntOffsetColor(int offset)
  190. {
  191. return image.Load(int2(offset % int_input_width,
  192. offset / int_input_width)).r;
  193. }
  194. float4 PSPacked422_Reverse(VertInOut vert_in, int u_pos, int v_pos,
  195. int y0_pos, int y1_pos) : TARGET
  196. {
  197. float y = vert_in.uv.y;
  198. float odd = floor(fmod(width * vert_in.uv.x + PRECISION_OFFSET, 2.0));
  199. float x = floor(width_d2 * vert_in.uv.x + PRECISION_OFFSET) *
  200. width_d2_i;
  201. x += input_width_i_d2;
  202. float4 texel = image.Sample(def_sampler, float2(x, y));
  203. return float4(odd > 0.5 ? texel[y1_pos] : texel[y0_pos],
  204. texel[u_pos], texel[v_pos], 1.0);
  205. }
  206. float4 PSPlanar420_Reverse(VertInOut vert_in) : TARGET
  207. {
  208. int x = int(vert_in.uv.x * width + PRECISION_OFFSET);
  209. int y = int(vert_in.uv.y * height + PRECISION_OFFSET);
  210. int lum_offset = y * int_width + x;
  211. int chroma_offset = (y / 2) * (int_width / 2) + x / 2;
  212. int chroma1 = int_u_plane_offset + chroma_offset;
  213. int chroma2 = int_v_plane_offset + chroma_offset;
  214. return float4(
  215. GetIntOffsetColor(lum_offset),
  216. GetIntOffsetColor(chroma1),
  217. GetIntOffsetColor(chroma2),
  218. 1.0
  219. );
  220. }
  221. float4 PSNV12_Reverse(VertInOut vert_in) : TARGET
  222. {
  223. int x = int(vert_in.uv.x * width + PRECISION_OFFSET);
  224. int y = int(vert_in.uv.y * height + PRECISION_OFFSET);
  225. int lum_offset = y * int_width + x;
  226. int chroma_offset = (y / 2) * (int_width / 2) + x / 2;
  227. int chroma = int_u_plane_offset + chroma_offset * 2;
  228. return float4(
  229. GetIntOffsetColor(lum_offset),
  230. GetIntOffsetColor(chroma),
  231. GetIntOffsetColor(chroma + 1),
  232. 1.0
  233. );
  234. }
  235. technique Planar420
  236. {
  237. pass
  238. {
  239. vertex_shader = VSDefault(vert_in);
  240. pixel_shader = PSPlanar420(vert_in);
  241. }
  242. }
  243. technique Planar444
  244. {
  245. pass
  246. {
  247. vertex_shader = VSDefault(vert_in);
  248. pixel_shader = PSPlanar444(vert_in);
  249. }
  250. }
  251. technique NV12
  252. {
  253. pass
  254. {
  255. vertex_shader = VSDefault(vert_in);
  256. pixel_shader = PSNV12(vert_in);
  257. }
  258. }
  259. technique UYVY_Reverse
  260. {
  261. pass
  262. {
  263. vertex_shader = VSDefault(vert_in);
  264. pixel_shader = PSPacked422_Reverse(vert_in, 2, 0, 1, 3);
  265. }
  266. }
  267. technique YUY2_Reverse
  268. {
  269. pass
  270. {
  271. vertex_shader = VSDefault(vert_in);
  272. pixel_shader = PSPacked422_Reverse(vert_in, 1, 3, 2, 0);
  273. }
  274. }
  275. technique YVYU_Reverse
  276. {
  277. pass
  278. {
  279. vertex_shader = VSDefault(vert_in);
  280. pixel_shader = PSPacked422_Reverse(vert_in, 3, 1, 2, 0);
  281. }
  282. }
  283. technique I420_Reverse
  284. {
  285. pass
  286. {
  287. vertex_shader = VSDefault(vert_in);
  288. pixel_shader = PSPlanar420_Reverse(vert_in);
  289. }
  290. }
  291. technique NV12_Reverse
  292. {
  293. pass
  294. {
  295. vertex_shader = VSDefault(vert_in);
  296. pixel_shader = PSNV12_Reverse(vert_in);
  297. }
  298. }