format_conversion.effect 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555
  1. /******************************************************************************
  2. Copyright (C) 2014 by Hugh Bailey <[email protected]>
  3. This program is free software: you can redistribute it and/or modify
  4. it under the terms of the GNU General Public License as published by
  5. the Free Software Foundation, either version 2 of the License, or
  6. (at your option) any later version.
  7. This program is distributed in the hope that it will be useful,
  8. but WITHOUT ANY WARRANTY; without even the implied warranty of
  9. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  10. GNU General Public License for more details.
  11. You should have received a copy of the GNU General Public License
  12. along with this program. If not, see <http://www.gnu.org/licenses/>.
  13. ******************************************************************************/
  14. //#define DEBUGGING
  15. uniform float u_plane_offset;
  16. uniform float v_plane_offset;
  17. uniform float width;
  18. uniform float height;
  19. uniform float width_i;
  20. uniform float height_i;
  21. uniform float width_d2;
  22. uniform float height_d2;
  23. uniform float width_d2_i;
  24. uniform float height_d2_i;
  25. uniform float input_width;
  26. uniform float input_height;
  27. uniform float input_width_i;
  28. uniform float input_height_i;
  29. uniform float input_width_i_d2;
  30. uniform float input_height_i_d2;
  31. uniform int int_width;
  32. uniform int int_input_width;
  33. uniform int int_u_plane_offset;
  34. uniform int int_v_plane_offset;
  35. uniform float4x4 color_matrix;
  36. uniform float3 color_range_min = {0.0, 0.0, 0.0};
  37. uniform float3 color_range_max = {1.0, 1.0, 1.0};
  38. uniform texture2d image;
  39. sampler_state def_sampler {
  40. Filter = Linear;
  41. AddressU = Clamp;
  42. AddressV = Clamp;
  43. };
  44. struct VertInOut {
  45. float4 pos : POSITION;
  46. float2 uv : TEXCOORD0;
  47. };
  48. VertInOut VSDefault(uint id : VERTEXID)
  49. {
  50. float idHigh = float(id >> 1);
  51. float idLow = float(id & uint(1));
  52. float x = idHigh * 4.0 - 1.0;
  53. float y = idLow * 4.0 - 1.0;
  54. float u = idHigh * 2.0;
  55. float v = obs_glsl_compile ? (idLow * 2.0) : (1.0 - idLow * 2.0);
  56. VertInOut vert_out;
  57. vert_out.pos = float4(x, y, 0.0, 1.0);
  58. vert_out.uv = float2(u, v);
  59. return vert_out;
  60. }
  61. /* used to prevent internal GPU precision issues width fmod in particular */
  62. #define PRECISION_OFFSET 0.2
  63. float4 PSNV12(VertInOut vert_in) : TARGET
  64. {
  65. float v_mul = floor(vert_in.uv.y * input_height);
  66. float byte_offset = floor((v_mul + vert_in.uv.x) * width) * 4.0;
  67. byte_offset += PRECISION_OFFSET;
  68. float2 sample_pos[4];
  69. if (byte_offset < u_plane_offset) {
  70. #ifdef DEBUGGING
  71. return float4(1.0, 1.0, 1.0, 1.0);
  72. #endif
  73. float lum_u = floor(fmod(byte_offset, width)) * width_i;
  74. float lum_v = floor(byte_offset * width_i) * height_i;
  75. /* move to texel centers to sample the 4 pixels properly */
  76. lum_u += width_i * 0.5;
  77. lum_v += height_i * 0.5;
  78. sample_pos[0] = float2(lum_u, lum_v);
  79. sample_pos[1] = float2(lum_u += width_i, lum_v);
  80. sample_pos[2] = float2(lum_u += width_i, lum_v);
  81. sample_pos[3] = float2(lum_u + width_i, lum_v);
  82. float4x4 out_val = float4x4(
  83. image.Sample(def_sampler, sample_pos[0]),
  84. image.Sample(def_sampler, sample_pos[1]),
  85. image.Sample(def_sampler, sample_pos[2]),
  86. image.Sample(def_sampler, sample_pos[3])
  87. );
  88. return transpose(out_val)[1];
  89. } else {
  90. #ifdef DEBUGGING
  91. return float4(0.5, 0.2, 0.5, 0.2);
  92. #endif
  93. float new_offset = byte_offset - u_plane_offset;
  94. float ch_u = floor(fmod(new_offset, width)) * width_i;
  95. float ch_v = floor(new_offset * width_i) * height_d2_i;
  96. float width_i2 = width_i*2.0;
  97. /* move to the borders of each set of 4 pixels to force it
  98. * to do bilinear averaging */
  99. ch_u += width_i;
  100. ch_v += height_i;
  101. sample_pos[0] = float2(ch_u, ch_v);
  102. sample_pos[1] = float2(ch_u + width_i2, ch_v);
  103. return float4(
  104. image.Sample(def_sampler, sample_pos[0]).rb,
  105. image.Sample(def_sampler, sample_pos[1]).rb
  106. );
  107. }
  108. }
  109. float PSNV12_Y(VertInOut vert_in) : TARGET
  110. {
  111. return image.Sample(def_sampler, vert_in.uv.xy).y;
  112. }
  113. float2 PSNV12_UV(VertInOut vert_in) : TARGET
  114. {
  115. return image.Sample(def_sampler, vert_in.uv.xy).xz;
  116. }
  117. float4 PSPlanar420(VertInOut vert_in) : TARGET
  118. {
  119. float v_mul = floor(vert_in.uv.y * input_height);
  120. float byte_offset = floor((v_mul + vert_in.uv.x) * width) * 4.0;
  121. byte_offset += PRECISION_OFFSET;
  122. float2 sample_pos[4];
  123. if (byte_offset < u_plane_offset) {
  124. #ifdef DEBUGGING
  125. return float4(1.0, 1.0, 1.0, 1.0);
  126. #endif
  127. float lum_u = floor(fmod(byte_offset, width)) * width_i;
  128. float lum_v = floor(byte_offset * width_i) * height_i;
  129. /* move to texel centers to sample the 4 pixels properly */
  130. lum_u += width_i * 0.5;
  131. lum_v += height_i * 0.5;
  132. sample_pos[0] = float2(lum_u, lum_v);
  133. sample_pos[1] = float2(lum_u += width_i, lum_v);
  134. sample_pos[2] = float2(lum_u += width_i, lum_v);
  135. sample_pos[3] = float2(lum_u + width_i, lum_v);
  136. } else {
  137. #ifdef DEBUGGING
  138. return ((byte_offset < v_plane_offset) ?
  139. float4(0.5, 0.5, 0.5, 0.5) :
  140. float4(0.2, 0.2, 0.2, 0.2));
  141. #endif
  142. float new_offset = byte_offset -
  143. ((byte_offset < v_plane_offset) ?
  144. u_plane_offset : v_plane_offset);
  145. float ch_u = floor(fmod(new_offset, width_d2)) * width_d2_i;
  146. float ch_v = floor(new_offset * width_d2_i) * height_d2_i;
  147. float width_i2 = width_i*2.0;
  148. /* move to the borders of each set of 4 pixels to force it
  149. * to do bilinear averaging */
  150. ch_u += width_i;
  151. ch_v += height_i;
  152. /* set up coordinates for next chroma line, in case
  153. * (width / 2) % 4 == 2, i.e. the current set of 4 pixels is split
  154. * between the current and the next chroma line; do note that the next
  155. * chroma line is two source lines below the current source line */
  156. float ch_u_n = 0. + width_i;
  157. float ch_v_n = ch_v + height_i * 3;
  158. sample_pos[0] = float2(ch_u, ch_v);
  159. sample_pos[1] = float2(ch_u += width_i2, ch_v);
  160. ch_u += width_i2;
  161. // check if ch_u overflowed the current source and chroma line
  162. if (ch_u > 1.0) {
  163. sample_pos[2] = float2(ch_u_n, ch_v_n);
  164. sample_pos[2] = float2(ch_u_n + width_i2, ch_v_n);
  165. } else {
  166. sample_pos[2] = float2(ch_u, ch_v);
  167. sample_pos[3] = float2(ch_u + width_i2, ch_v);
  168. }
  169. }
  170. float4x4 out_val = float4x4(
  171. image.Sample(def_sampler, sample_pos[0]),
  172. image.Sample(def_sampler, sample_pos[1]),
  173. image.Sample(def_sampler, sample_pos[2]),
  174. image.Sample(def_sampler, sample_pos[3])
  175. );
  176. out_val = transpose(out_val);
  177. if (byte_offset < u_plane_offset)
  178. return out_val[1];
  179. else if (byte_offset < v_plane_offset)
  180. return out_val[0];
  181. else
  182. return out_val[2];
  183. }
  184. float4 PSPlanar444(VertInOut vert_in) : TARGET
  185. {
  186. float v_mul = floor(vert_in.uv.y * input_height);
  187. float byte_offset = floor((v_mul + vert_in.uv.x) * width) * 4.0;
  188. byte_offset += PRECISION_OFFSET;
  189. float new_byte_offset = byte_offset;
  190. if (byte_offset >= v_plane_offset)
  191. new_byte_offset -= v_plane_offset;
  192. else if (byte_offset >= u_plane_offset)
  193. new_byte_offset -= u_plane_offset;
  194. float2 sample_pos[4];
  195. float u_val = floor(fmod(new_byte_offset, width)) * width_i;
  196. float v_val = floor(new_byte_offset * width_i) * height_i;
  197. /* move to texel centers to sample the 4 pixels properly */
  198. u_val += width_i * 0.5;
  199. v_val += height_i * 0.5;
  200. sample_pos[0] = float2(u_val, v_val);
  201. sample_pos[1] = float2(u_val += width_i, v_val);
  202. sample_pos[2] = float2(u_val += width_i, v_val);
  203. sample_pos[3] = float2(u_val + width_i, v_val);
  204. float4x4 out_val = float4x4(
  205. image.Sample(def_sampler, sample_pos[0]),
  206. image.Sample(def_sampler, sample_pos[1]),
  207. image.Sample(def_sampler, sample_pos[2]),
  208. image.Sample(def_sampler, sample_pos[3])
  209. );
  210. out_val = transpose(out_val);
  211. if (byte_offset < u_plane_offset)
  212. return out_val[1];
  213. else if (byte_offset < v_plane_offset)
  214. return out_val[0];
  215. else
  216. return out_val[2];
  217. }
  218. float GetIntOffsetColor(int offset)
  219. {
  220. return image.Load(int3(offset % int_input_width,
  221. offset / int_input_width,
  222. 0)).r;
  223. }
  224. float4 PSPacked422_Reverse(VertInOut vert_in, int u_pos, int v_pos,
  225. int y0_pos, int y1_pos) : TARGET
  226. {
  227. float y = vert_in.uv.y;
  228. float odd = floor(fmod(width * vert_in.uv.x + PRECISION_OFFSET, 2.0));
  229. float x = floor(width_d2 * vert_in.uv.x + PRECISION_OFFSET) *
  230. width_d2_i;
  231. x += input_width_i_d2;
  232. float4 texel = image.Sample(def_sampler, float2(x, y));
  233. float3 yuv = float3(odd > 0.5 ? texel[y1_pos] : texel[y0_pos],
  234. texel[u_pos], texel[v_pos]);
  235. yuv = clamp(yuv, color_range_min, color_range_max);
  236. return saturate(mul(float4(yuv, 1.0), color_matrix));
  237. }
  238. float4 PSPlanar420_Reverse(VertInOut vert_in) : TARGET
  239. {
  240. int x = int(vert_in.uv.x * width + PRECISION_OFFSET);
  241. int y = int(vert_in.uv.y * height + PRECISION_OFFSET);
  242. int lum_offset = y * int_width + x;
  243. int chroma_offset = (y / 2) * (int_width / 2) + x / 2;
  244. int chroma1 = int_u_plane_offset + chroma_offset;
  245. int chroma2 = int_v_plane_offset + chroma_offset;
  246. float3 yuv = float3(
  247. GetIntOffsetColor(lum_offset),
  248. GetIntOffsetColor(chroma1),
  249. GetIntOffsetColor(chroma2)
  250. );
  251. yuv = clamp(yuv, color_range_min, color_range_max);
  252. return saturate(mul(float4(yuv, 1.0), color_matrix));
  253. }
  254. float4 PSPlanar444_Reverse(VertInOut vert_in) : TARGET
  255. {
  256. int x = int(vert_in.uv.x * width + PRECISION_OFFSET);
  257. int y = int(vert_in.uv.y * height + PRECISION_OFFSET);
  258. int lum_offset = y * int_width + x;
  259. int chroma_offset = y * int_width + x;
  260. int chroma1 = int_u_plane_offset + chroma_offset;
  261. int chroma2 = int_v_plane_offset + chroma_offset;
  262. float3 yuv = float3(
  263. GetIntOffsetColor(lum_offset),
  264. GetIntOffsetColor(chroma1),
  265. GetIntOffsetColor(chroma2)
  266. );
  267. yuv = clamp(yuv, color_range_min, color_range_max);
  268. return saturate(mul(float4(yuv, 1.0), color_matrix));
  269. }
  270. float4 PSNV12_Reverse(VertInOut vert_in) : TARGET
  271. {
  272. int x = int(vert_in.uv.x * width + PRECISION_OFFSET);
  273. int y = int(vert_in.uv.y * height + PRECISION_OFFSET);
  274. int lum_offset = y * int_width + x;
  275. int chroma_offset = (y / 2) * (int_width / 2) + x / 2;
  276. int chroma = int_u_plane_offset + chroma_offset * 2;
  277. float3 yuv = float3(
  278. GetIntOffsetColor(lum_offset),
  279. GetIntOffsetColor(chroma),
  280. GetIntOffsetColor(chroma + 1)
  281. );
  282. yuv = clamp(yuv, color_range_min, color_range_max);
  283. return saturate(mul(float4(yuv, 1.0), color_matrix));
  284. }
  285. float4 PSY800_Limited(VertInOut vert_in) : TARGET
  286. {
  287. int x = int(vert_in.uv.x * width + PRECISION_OFFSET);
  288. int y = int(vert_in.uv.y * height + PRECISION_OFFSET);
  289. float limited = image.Load(int3(x, y, 0)).x;
  290. float full = saturate((limited - (16.0 / 255.0)) * (255.0 / 219.0));
  291. return float4(full, full, full, 1.0);
  292. }
  293. float4 PSY800_Full(VertInOut vert_in) : TARGET
  294. {
  295. int x = int(vert_in.uv.x * width + PRECISION_OFFSET);
  296. int y = int(vert_in.uv.y * height + PRECISION_OFFSET);
  297. float3 full = image.Load(int3(x, y, 0)).xxx;
  298. return float4(full, 1.0);
  299. }
  300. float4 PSRGB_Limited(VertInOut vert_in) : TARGET
  301. {
  302. int x = int(vert_in.uv.x * width + PRECISION_OFFSET);
  303. int y = int(vert_in.uv.y * height + PRECISION_OFFSET);
  304. float4 rgba = image.Load(int3(x, y, 0));
  305. rgba.rgb = saturate((rgba.rgb - (16.0 / 255.0)) * (255.0 / 219.0));
  306. return rgba;
  307. }
  308. float4 PSBGR3_Limited(VertInOut vert_in) : TARGET
  309. {
  310. int x = int(vert_in.uv.x * width * 3.0 + PRECISION_OFFSET);
  311. int y = int(vert_in.uv.y * height + PRECISION_OFFSET);
  312. float b = image.Load(int3(x - 1, y, 0)).x;
  313. float g = image.Load(int3(x, y, 0)).x;
  314. float r = image.Load(int3(x + 1, y, 0)).x;
  315. float3 rgb = float3(r, g, b);
  316. rgb = saturate((rgb - (16.0 / 255.0)) * (255.0 / 219.0));
  317. return float4(rgb, 1.0);
  318. }
  319. float4 PSBGR3_Full(VertInOut vert_in) : TARGET
  320. {
  321. int x = int(vert_in.uv.x * width * 3.0 + PRECISION_OFFSET);
  322. int y = int(vert_in.uv.y * height + PRECISION_OFFSET);
  323. float b = image.Load(int3(x - 1, y, 0)).x;
  324. float g = image.Load(int3(x, y, 0)).x;
  325. float r = image.Load(int3(x + 1, y, 0)).x;
  326. float3 rgb = float3(r, g, b);
  327. return float4(rgb, 1.0);
  328. }
  329. technique Planar420
  330. {
  331. pass
  332. {
  333. vertex_shader = VSDefault(id);
  334. pixel_shader = PSPlanar420(vert_in);
  335. }
  336. }
  337. technique Planar444
  338. {
  339. pass
  340. {
  341. vertex_shader = VSDefault(id);
  342. pixel_shader = PSPlanar444(vert_in);
  343. }
  344. }
  345. technique NV12
  346. {
  347. pass
  348. {
  349. vertex_shader = VSDefault(id);
  350. pixel_shader = PSNV12(vert_in);
  351. }
  352. }
  353. technique NV12_Y
  354. {
  355. pass
  356. {
  357. vertex_shader = VSDefault(id);
  358. pixel_shader = PSNV12_Y(vert_in);
  359. }
  360. }
  361. technique NV12_UV
  362. {
  363. pass
  364. {
  365. vertex_shader = VSDefault(id);
  366. pixel_shader = PSNV12_UV(vert_in);
  367. }
  368. }
  369. technique UYVY_Reverse
  370. {
  371. pass
  372. {
  373. vertex_shader = VSDefault(id);
  374. pixel_shader = PSPacked422_Reverse(vert_in, 2, 0, 1, 3);
  375. }
  376. }
  377. technique YUY2_Reverse
  378. {
  379. pass
  380. {
  381. vertex_shader = VSDefault(id);
  382. pixel_shader = PSPacked422_Reverse(vert_in, 1, 3, 2, 0);
  383. }
  384. }
  385. technique YVYU_Reverse
  386. {
  387. pass
  388. {
  389. vertex_shader = VSDefault(id);
  390. pixel_shader = PSPacked422_Reverse(vert_in, 3, 1, 2, 0);
  391. }
  392. }
  393. technique I420_Reverse
  394. {
  395. pass
  396. {
  397. vertex_shader = VSDefault(id);
  398. pixel_shader = PSPlanar420_Reverse(vert_in);
  399. }
  400. }
  401. technique I444_Reverse
  402. {
  403. pass
  404. {
  405. vertex_shader = VSDefault(id);
  406. pixel_shader = PSPlanar444_Reverse(vert_in);
  407. }
  408. }
  409. technique NV12_Reverse
  410. {
  411. pass
  412. {
  413. vertex_shader = VSDefault(id);
  414. pixel_shader = PSNV12_Reverse(vert_in);
  415. }
  416. }
  417. technique Y800_Limited
  418. {
  419. pass
  420. {
  421. vertex_shader = VSDefault(id);
  422. pixel_shader = PSY800_Limited(vert_in);
  423. }
  424. }
  425. technique Y800_Full
  426. {
  427. pass
  428. {
  429. vertex_shader = VSDefault(id);
  430. pixel_shader = PSY800_Full(vert_in);
  431. }
  432. }
  433. technique RGB_Limited
  434. {
  435. pass
  436. {
  437. vertex_shader = VSDefault(id);
  438. pixel_shader = PSRGB_Limited(vert_in);
  439. }
  440. }
  441. technique BGR3_Limited
  442. {
  443. pass
  444. {
  445. vertex_shader = VSDefault(id);
  446. pixel_shader = PSBGR3_Limited(vert_in);
  447. }
  448. }
  449. technique BGR3_Full
  450. {
  451. pass
  452. {
  453. vertex_shader = VSDefault(id);
  454. pixel_shader = PSBGR3_Full(vert_in);
  455. }
  456. }