format_conversion.effect 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627
  1. /******************************************************************************
  2. Copyright (C) 2014 by Hugh Bailey <[email protected]>
  3. This program is free software: you can redistribute it and/or modify
  4. it under the terms of the GNU General Public License as published by
  5. the Free Software Foundation, either version 2 of the License, or
  6. (at your option) any later version.
  7. This program is distributed in the hope that it will be useful,
  8. but WITHOUT ANY WARRANTY; without even the implied warranty of
  9. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  10. GNU General Public License for more details.
  11. You should have received a copy of the GNU General Public License
  12. along with this program. If not, see <http://www.gnu.org/licenses/>.
  13. ******************************************************************************/
  14. //#define DEBUGGING
  15. uniform float u_plane_offset;
  16. uniform float v_plane_offset;
  17. uniform float width;
  18. uniform float height;
  19. uniform float width_i;
  20. uniform float height_i;
  21. uniform float width_d2;
  22. uniform float height_d2;
  23. uniform float width_d2_i;
  24. uniform float height_d2_i;
  25. uniform float input_width;
  26. uniform float input_height;
  27. uniform float input_width_i;
  28. uniform float input_height_i;
  29. uniform float input_width_i_d2;
  30. uniform float input_height_i_d2;
  31. uniform int int_width;
  32. uniform int int_input_width;
  33. uniform int int_u_plane_offset;
  34. uniform int int_v_plane_offset;
  35. uniform float4x4 color_matrix;
  36. uniform float3 color_range_min = {0.0, 0.0, 0.0};
  37. uniform float3 color_range_max = {1.0, 1.0, 1.0};
  38. uniform float4 color_vec_y;
  39. uniform float4 color_vec_u;
  40. uniform float4 color_vec_v;
  41. uniform texture2d image;
  42. sampler_state def_sampler {
  43. Filter = Linear;
  44. AddressU = Clamp;
  45. AddressV = Clamp;
  46. };
  47. struct FragPos {
  48. float4 pos : POSITION;
  49. };
  50. struct VertTexPos {
  51. float2 uv : TEXCOORD0;
  52. float4 pos : POSITION;
  53. };
  54. struct FragTex {
  55. float2 uv : TEXCOORD0;
  56. };
  57. FragPos VSPos(uint id : VERTEXID)
  58. {
  59. float idHigh = float(id >> 1);
  60. float idLow = float(id & uint(1));
  61. float x = idHigh * 4.0 - 1.0;
  62. float y = idLow * 4.0 - 1.0;
  63. FragPos vert_out;
  64. vert_out.pos = float4(x, y, 0.0, 1.0);
  65. return vert_out;
  66. }
  67. VertTexPos VSPosTex(uint id : VERTEXID)
  68. {
  69. float idHigh = float(id >> 1);
  70. float idLow = float(id & uint(1));
  71. float x = idHigh * 4.0 - 1.0;
  72. float y = idLow * 4.0 - 1.0;
  73. float u = idHigh * 2.0;
  74. float v = obs_glsl_compile ? (idLow * 2.0) : (1.0 - idLow * 2.0);
  75. VertTexPos vert_out;
  76. vert_out.uv = float2(u, v);
  77. vert_out.pos = float4(x, y, 0.0, 1.0);
  78. return vert_out;
  79. }
  80. /* used to prevent internal GPU precision issues width fmod in particular */
  81. #define PRECISION_OFFSET 0.2
  82. float4 PSNV12(FragTex frag_in) : TARGET
  83. {
  84. float v_mul = floor(frag_in.uv.y * input_height);
  85. float byte_offset = floor((v_mul + frag_in.uv.x) * width) * 4.0;
  86. byte_offset += PRECISION_OFFSET;
  87. if (byte_offset < u_plane_offset) {
  88. #ifdef DEBUGGING
  89. return float4(1.0, 1.0, 1.0, 1.0);
  90. #endif
  91. float lum_u = floor(fmod(byte_offset, width)) * width_i;
  92. float lum_v = floor(byte_offset * width_i) * height_i;
  93. /* move to texel centers to sample the 4 pixels properly */
  94. lum_u += width_i * 0.5;
  95. lum_v += height_i * 0.5;
  96. float2 sample_pos0 = float2(lum_u, lum_v);
  97. float2 sample_pos1 = float2(lum_u += width_i, lum_v);
  98. float2 sample_pos2 = float2(lum_u += width_i, lum_v);
  99. float2 sample_pos3 = float2(lum_u + width_i, lum_v);
  100. float3 rgb0 = image.Sample(def_sampler, sample_pos0).rgb;
  101. float3 rgb1 = image.Sample(def_sampler, sample_pos1).rgb;
  102. float3 rgb2 = image.Sample(def_sampler, sample_pos2).rgb;
  103. float3 rgb3 = image.Sample(def_sampler, sample_pos3).rgb;
  104. float4 out_val = float4(
  105. dot(color_vec_y.xyz, rgb0) + color_vec_y.w,
  106. dot(color_vec_y.xyz, rgb1) + color_vec_y.w,
  107. dot(color_vec_y.xyz, rgb2) + color_vec_y.w,
  108. dot(color_vec_y.xyz, rgb3) + color_vec_y.w
  109. );
  110. return out_val;
  111. } else {
  112. #ifdef DEBUGGING
  113. return float4(0.5, 0.2, 0.5, 0.2);
  114. #endif
  115. float new_offset = byte_offset - u_plane_offset;
  116. float ch_u = floor(fmod(new_offset, width)) * width_i;
  117. float ch_v = floor(new_offset * width_i) * height_d2_i;
  118. float width_i2 = width_i*2.0;
  119. /* move to the borders of each set of 4 pixels to force it
  120. * to do bilinear averaging */
  121. ch_u += width_i;
  122. ch_v += height_i;
  123. float2 sample_pos0 = float2(ch_u, ch_v);
  124. float2 sample_pos1 = float2(ch_u + width_i2, ch_v);
  125. float3 rgb0 = image.Sample(def_sampler, sample_pos0).rgb;
  126. float3 rgb1 = image.Sample(def_sampler, sample_pos1).rgb;
  127. return float4(
  128. dot(color_vec_u.xyz, rgb0) + color_vec_u.w,
  129. dot(color_vec_v.xyz, rgb0) + color_vec_v.w,
  130. dot(color_vec_u.xyz, rgb1) + color_vec_u.w,
  131. dot(color_vec_v.xyz, rgb1) + color_vec_v.w
  132. );
  133. }
  134. }
  135. float PSNV12_Y(FragPos frag_in) : TARGET
  136. {
  137. float3 rgb = image.Load(int3(frag_in.pos.xy, 0)).rgb;
  138. float y = dot(color_vec_y.xyz, rgb) + color_vec_y.w;
  139. return y;
  140. }
  141. float2 PSNV12_UV(FragTex frag_in) : TARGET
  142. {
  143. float3 rgb = image.Sample(def_sampler, frag_in.uv).rgb;
  144. float u = dot(color_vec_u.xyz, rgb) + color_vec_u.w;
  145. float v = dot(color_vec_v.xyz, rgb) + color_vec_v.w;
  146. return float2(u, v);
  147. }
  148. float4 PSPlanar420(FragTex frag_in) : TARGET
  149. {
  150. float v_mul = floor(frag_in.uv.y * input_height);
  151. float byte_offset = floor((v_mul + frag_in.uv.x) * width) * 4.0;
  152. byte_offset += PRECISION_OFFSET;
  153. float2 sample_pos0, sample_pos1, sample_pos2, sample_pos3;
  154. if (byte_offset < u_plane_offset) {
  155. #ifdef DEBUGGING
  156. return float4(1.0, 1.0, 1.0, 1.0);
  157. #endif
  158. float lum_u = floor(fmod(byte_offset, width)) * width_i;
  159. float lum_v = floor(byte_offset * width_i) * height_i;
  160. /* move to texel centers to sample the 4 pixels properly */
  161. lum_u += width_i * 0.5;
  162. lum_v += height_i * 0.5;
  163. sample_pos0 = float2(lum_u, lum_v);
  164. sample_pos1 = float2(lum_u += width_i, lum_v);
  165. sample_pos2 = float2(lum_u += width_i, lum_v);
  166. sample_pos3 = float2(lum_u + width_i, lum_v);
  167. } else {
  168. #ifdef DEBUGGING
  169. return ((byte_offset < v_plane_offset) ?
  170. float4(0.5, 0.5, 0.5, 0.5) :
  171. float4(0.2, 0.2, 0.2, 0.2));
  172. #endif
  173. float new_offset = byte_offset -
  174. ((byte_offset < v_plane_offset) ?
  175. u_plane_offset : v_plane_offset);
  176. float ch_u = floor(fmod(new_offset, width_d2)) * width_d2_i;
  177. float ch_v = floor(new_offset * width_d2_i) * height_d2_i;
  178. float width_i2 = width_i*2.0;
  179. /* move to the borders of each set of 4 pixels to force it
  180. * to do bilinear averaging */
  181. ch_u += width_i;
  182. ch_v += height_i;
  183. /* set up coordinates for next chroma line, in case
  184. * (width / 2) % 4 == 2, i.e. the current set of 4 pixels is split
  185. * between the current and the next chroma line; do note that the next
  186. * chroma line is two source lines below the current source line */
  187. float ch_u_n = 0. + width_i;
  188. float ch_v_n = ch_v + height_i * 3;
  189. sample_pos0 = float2(ch_u, ch_v);
  190. sample_pos1 = float2(ch_u += width_i2, ch_v);
  191. ch_u += width_i2;
  192. // check if ch_u overflowed the current source and chroma line
  193. if (ch_u > 1.0) {
  194. sample_pos2 = float2(ch_u_n, ch_v_n);
  195. sample_pos2 = float2(ch_u_n + width_i2, ch_v_n);
  196. } else {
  197. sample_pos2 = float2(ch_u, ch_v);
  198. sample_pos3 = float2(ch_u + width_i2, ch_v);
  199. }
  200. }
  201. float3 rgb0 = image.Sample(def_sampler, sample_pos0).rgb;
  202. float3 rgb1 = image.Sample(def_sampler, sample_pos1).rgb;
  203. float3 rgb2 = image.Sample(def_sampler, sample_pos2).rgb;
  204. float3 rgb3 = image.Sample(def_sampler, sample_pos3).rgb;
  205. float4 color_vec;
  206. if (byte_offset < u_plane_offset)
  207. color_vec = color_vec_y;
  208. else if (byte_offset < v_plane_offset)
  209. color_vec = color_vec_u;
  210. else
  211. color_vec = color_vec_v;
  212. return float4(
  213. dot(color_vec.xyz, rgb0) + color_vec.w,
  214. dot(color_vec.xyz, rgb1) + color_vec.w,
  215. dot(color_vec.xyz, rgb2) + color_vec.w,
  216. dot(color_vec.xyz, rgb3) + color_vec.w
  217. );
  218. }
  219. float4 PSPlanar444(FragTex frag_in) : TARGET
  220. {
  221. float v_mul = floor(frag_in.uv.y * input_height);
  222. float byte_offset = floor((v_mul + frag_in.uv.x) * width) * 4.0;
  223. byte_offset += PRECISION_OFFSET;
  224. float new_byte_offset = byte_offset;
  225. if (byte_offset >= v_plane_offset)
  226. new_byte_offset -= v_plane_offset;
  227. else if (byte_offset >= u_plane_offset)
  228. new_byte_offset -= u_plane_offset;
  229. float u_val = floor(fmod(new_byte_offset, width)) * width_i;
  230. float v_val = floor(new_byte_offset * width_i) * height_i;
  231. /* move to texel centers to sample the 4 pixels properly */
  232. u_val += width_i * 0.5;
  233. v_val += height_i * 0.5;
  234. float2 sample_pos0 = float2(u_val, v_val);
  235. float2 sample_pos1 = float2(u_val += width_i, v_val);
  236. float2 sample_pos2 = float2(u_val += width_i, v_val);
  237. float2 sample_pos3 = float2(u_val + width_i, v_val);
  238. float3 rgb0 = image.Sample(def_sampler, sample_pos0).rgb;
  239. float3 rgb1 = image.Sample(def_sampler, sample_pos1).rgb;
  240. float3 rgb2 = image.Sample(def_sampler, sample_pos2).rgb;
  241. float3 rgb3 = image.Sample(def_sampler, sample_pos3).rgb;
  242. float4 color_vec;
  243. if (byte_offset < u_plane_offset)
  244. color_vec = color_vec_y;
  245. else if (byte_offset < v_plane_offset)
  246. color_vec = color_vec_u;
  247. else
  248. color_vec = color_vec_v;
  249. return float4(
  250. dot(color_vec.xyz, rgb0) + color_vec.w,
  251. dot(color_vec.xyz, rgb1) + color_vec.w,
  252. dot(color_vec.xyz, rgb2) + color_vec.w,
  253. dot(color_vec.xyz, rgb3) + color_vec.w
  254. );
  255. }
  256. float GetIntOffsetColor(int offset)
  257. {
  258. return image.Load(int3(offset % int_input_width,
  259. offset / int_input_width,
  260. 0)).r;
  261. }
  262. float4 PSPacked422_Reverse(FragTex frag_in, int u_pos, int v_pos,
  263. int y0_pos, int y1_pos) : TARGET
  264. {
  265. float y = frag_in.uv.y;
  266. float odd = floor(fmod(width * frag_in.uv.x + PRECISION_OFFSET, 2.0));
  267. float x = floor(width_d2 * frag_in.uv.x + PRECISION_OFFSET) *
  268. width_d2_i;
  269. x += input_width_i_d2;
  270. float4 texel = image.Sample(def_sampler, float2(x, y));
  271. float3 yuv = float3(odd > 0.5 ? texel[y1_pos] : texel[y0_pos],
  272. texel[u_pos], texel[v_pos]);
  273. yuv = clamp(yuv, color_range_min, color_range_max);
  274. return saturate(mul(float4(yuv, 1.0), color_matrix));
  275. }
  276. float4 PSPlanar420_Reverse(FragTex frag_in) : TARGET
  277. {
  278. int x = int(frag_in.uv.x * width + PRECISION_OFFSET);
  279. int y = int(frag_in.uv.y * height + PRECISION_OFFSET);
  280. int lum_offset = y * int_width + x;
  281. int chroma_offset = (y / 2) * (int_width / 2) + x / 2;
  282. int chroma1 = int_u_plane_offset + chroma_offset;
  283. int chroma2 = int_v_plane_offset + chroma_offset;
  284. float3 yuv = float3(
  285. GetIntOffsetColor(lum_offset),
  286. GetIntOffsetColor(chroma1),
  287. GetIntOffsetColor(chroma2)
  288. );
  289. yuv = clamp(yuv, color_range_min, color_range_max);
  290. return saturate(mul(float4(yuv, 1.0), color_matrix));
  291. }
  292. float4 PSPlanar422_Reverse(FragTex frag_in) : TARGET
  293. {
  294. int x = int(frag_in.uv.x * width + PRECISION_OFFSET);
  295. int y = int(frag_in.uv.y * height + PRECISION_OFFSET);
  296. int lum_offset = y * int_width + x;
  297. int chroma_offset = y * (int_width / 2) + x / 2;
  298. int chroma1 = int_u_plane_offset + chroma_offset;
  299. int chroma2 = int_v_plane_offset + chroma_offset;
  300. float3 yuv = float3(
  301. GetIntOffsetColor(lum_offset),
  302. GetIntOffsetColor(chroma1),
  303. GetIntOffsetColor(chroma2)
  304. );
  305. yuv = clamp(yuv, color_range_min, color_range_max);
  306. return saturate(mul(float4(yuv, 1.0), color_matrix));
  307. }
  308. float4 PSPlanar444_Reverse(FragTex frag_in) : TARGET
  309. {
  310. int x = int(frag_in.uv.x * width + PRECISION_OFFSET);
  311. int y = int(frag_in.uv.y * height + PRECISION_OFFSET);
  312. int lum_offset = y * int_width + x;
  313. int chroma_offset = y * int_width + x;
  314. int chroma1 = int_u_plane_offset + chroma_offset;
  315. int chroma2 = int_v_plane_offset + chroma_offset;
  316. float3 yuv = float3(
  317. GetIntOffsetColor(lum_offset),
  318. GetIntOffsetColor(chroma1),
  319. GetIntOffsetColor(chroma2)
  320. );
  321. yuv = clamp(yuv, color_range_min, color_range_max);
  322. return saturate(mul(float4(yuv, 1.0), color_matrix));
  323. }
  324. float4 PSNV12_Reverse(FragTex frag_in) : TARGET
  325. {
  326. int x = int(frag_in.uv.x * width + PRECISION_OFFSET);
  327. int y = int(frag_in.uv.y * height + PRECISION_OFFSET);
  328. int lum_offset = y * int_width + x;
  329. int chroma_offset = (y / 2) * (int_width / 2) + x / 2;
  330. int chroma = int_u_plane_offset + chroma_offset * 2;
  331. float3 yuv = float3(
  332. GetIntOffsetColor(lum_offset),
  333. GetIntOffsetColor(chroma),
  334. GetIntOffsetColor(chroma + 1)
  335. );
  336. yuv = clamp(yuv, color_range_min, color_range_max);
  337. return saturate(mul(float4(yuv, 1.0), color_matrix));
  338. }
  339. float4 PSY800_Limited(FragTex frag_in) : TARGET
  340. {
  341. int x = int(frag_in.uv.x * width + PRECISION_OFFSET);
  342. int y = int(frag_in.uv.y * height + PRECISION_OFFSET);
  343. float limited = image.Load(int3(x, y, 0)).x;
  344. float full = saturate((limited - (16.0 / 255.0)) * (255.0 / 219.0));
  345. return float4(full, full, full, 1.0);
  346. }
  347. float4 PSY800_Full(FragTex frag_in) : TARGET
  348. {
  349. int x = int(frag_in.uv.x * width + PRECISION_OFFSET);
  350. int y = int(frag_in.uv.y * height + PRECISION_OFFSET);
  351. float3 full = image.Load(int3(x, y, 0)).xxx;
  352. return float4(full, 1.0);
  353. }
  354. float4 PSRGB_Limited(FragTex frag_in) : TARGET
  355. {
  356. int x = int(frag_in.uv.x * width + PRECISION_OFFSET);
  357. int y = int(frag_in.uv.y * height + PRECISION_OFFSET);
  358. float4 rgba = image.Load(int3(x, y, 0));
  359. rgba.rgb = saturate((rgba.rgb - (16.0 / 255.0)) * (255.0 / 219.0));
  360. return rgba;
  361. }
  362. float4 PSBGR3_Limited(FragTex frag_in) : TARGET
  363. {
  364. int x = int(frag_in.uv.x * width * 3.0 + PRECISION_OFFSET);
  365. int y = int(frag_in.uv.y * height + PRECISION_OFFSET);
  366. float b = image.Load(int3(x - 1, y, 0)).x;
  367. float g = image.Load(int3(x, y, 0)).x;
  368. float r = image.Load(int3(x + 1, y, 0)).x;
  369. float3 rgb = float3(r, g, b);
  370. rgb = saturate((rgb - (16.0 / 255.0)) * (255.0 / 219.0));
  371. return float4(rgb, 1.0);
  372. }
  373. float4 PSBGR3_Full(FragTex frag_in) : TARGET
  374. {
  375. int x = int(frag_in.uv.x * width * 3.0 + PRECISION_OFFSET);
  376. int y = int(frag_in.uv.y * height + PRECISION_OFFSET);
  377. float b = image.Load(int3(x - 1, y, 0)).x;
  378. float g = image.Load(int3(x, y, 0)).x;
  379. float r = image.Load(int3(x + 1, y, 0)).x;
  380. float3 rgb = float3(r, g, b);
  381. return float4(rgb, 1.0);
  382. }
  383. technique Planar420
  384. {
  385. pass
  386. {
  387. vertex_shader = VSPosTex(id);
  388. pixel_shader = PSPlanar420(frag_in);
  389. }
  390. }
  391. technique Planar444
  392. {
  393. pass
  394. {
  395. vertex_shader = VSPosTex(id);
  396. pixel_shader = PSPlanar444(frag_in);
  397. }
  398. }
  399. technique NV12
  400. {
  401. pass
  402. {
  403. vertex_shader = VSPosTex(id);
  404. pixel_shader = PSNV12(frag_in);
  405. }
  406. }
  407. technique NV12_Y
  408. {
  409. pass
  410. {
  411. vertex_shader = VSPos(id);
  412. pixel_shader = PSNV12_Y(frag_in);
  413. }
  414. }
  415. technique NV12_UV
  416. {
  417. pass
  418. {
  419. vertex_shader = VSPosTex(id);
  420. pixel_shader = PSNV12_UV(frag_in);
  421. }
  422. }
  423. technique UYVY_Reverse
  424. {
  425. pass
  426. {
  427. vertex_shader = VSPosTex(id);
  428. pixel_shader = PSPacked422_Reverse(frag_in, 2, 0, 1, 3);
  429. }
  430. }
  431. technique YUY2_Reverse
  432. {
  433. pass
  434. {
  435. vertex_shader = VSPosTex(id);
  436. pixel_shader = PSPacked422_Reverse(frag_in, 1, 3, 2, 0);
  437. }
  438. }
  439. technique YVYU_Reverse
  440. {
  441. pass
  442. {
  443. vertex_shader = VSPosTex(id);
  444. pixel_shader = PSPacked422_Reverse(frag_in, 3, 1, 2, 0);
  445. }
  446. }
  447. technique I420_Reverse
  448. {
  449. pass
  450. {
  451. vertex_shader = VSPosTex(id);
  452. pixel_shader = PSPlanar420_Reverse(frag_in);
  453. }
  454. }
  455. technique I422_Reverse
  456. {
  457. pass
  458. {
  459. vertex_shader = VSPosTex(id);
  460. pixel_shader = PSPlanar422_Reverse(frag_in);
  461. }
  462. }
  463. technique I444_Reverse
  464. {
  465. pass
  466. {
  467. vertex_shader = VSPosTex(id);
  468. pixel_shader = PSPlanar444_Reverse(frag_in);
  469. }
  470. }
  471. technique NV12_Reverse
  472. {
  473. pass
  474. {
  475. vertex_shader = VSPosTex(id);
  476. pixel_shader = PSNV12_Reverse(frag_in);
  477. }
  478. }
  479. technique Y800_Limited
  480. {
  481. pass
  482. {
  483. vertex_shader = VSPosTex(id);
  484. pixel_shader = PSY800_Limited(frag_in);
  485. }
  486. }
  487. technique Y800_Full
  488. {
  489. pass
  490. {
  491. vertex_shader = VSPosTex(id);
  492. pixel_shader = PSY800_Full(frag_in);
  493. }
  494. }
  495. technique RGB_Limited
  496. {
  497. pass
  498. {
  499. vertex_shader = VSPosTex(id);
  500. pixel_shader = PSRGB_Limited(frag_in);
  501. }
  502. }
  503. technique BGR3_Limited
  504. {
  505. pass
  506. {
  507. vertex_shader = VSPosTex(id);
  508. pixel_shader = PSBGR3_Limited(frag_in);
  509. }
  510. }
  511. technique BGR3_Full
  512. {
  513. pass
  514. {
  515. vertex_shader = VSPosTex(id);
  516. pixel_shader = PSBGR3_Full(frag_in);
  517. }
  518. }