123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505 |
- /******************************************************************************
- Copyright (C) 2014 by Hugh Bailey <[email protected]>
- This program is free software: you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation, either version 2 of the License, or
- (at your option) any later version.
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
- You should have received a copy of the GNU General Public License
- along with this program. If not, see <http://www.gnu.org/licenses/>.
- ******************************************************************************/
- //#define DEBUGGING
- uniform float4x4 ViewProj;
- uniform float u_plane_offset;
- uniform float v_plane_offset;
- uniform float width;
- uniform float height;
- uniform float width_i;
- uniform float height_i;
- uniform float width_d2;
- uniform float height_d2;
- uniform float width_d2_i;
- uniform float height_d2_i;
- uniform float input_width;
- uniform float input_height;
- uniform float input_width_i;
- uniform float input_height_i;
- uniform float input_width_i_d2;
- uniform float input_height_i_d2;
- uniform int int_width;
- uniform int int_input_width;
- uniform int int_u_plane_offset;
- uniform int int_v_plane_offset;
- uniform float4x4 color_matrix;
- uniform float3 color_range_min = {0.0, 0.0, 0.0};
- uniform float3 color_range_max = {1.0, 1.0, 1.0};
- uniform texture2d image;
- sampler_state def_sampler {
- Filter = Linear;
- AddressU = Clamp;
- AddressV = Clamp;
- };
- struct VertInOut {
- float4 pos : POSITION;
- float2 uv : TEXCOORD0;
- };
- VertInOut VSDefault(VertInOut vert_in)
- {
- VertInOut vert_out;
- vert_out.pos = mul(float4(vert_in.pos.xyz, 1.0), ViewProj);
- vert_out.uv = vert_in.uv;
- return vert_out;
- }
- /* used to prevent internal GPU precision issues width fmod in particular */
- #define PRECISION_OFFSET 0.2
- float4 PSNV12(VertInOut vert_in) : TARGET
- {
- float v_mul = floor(vert_in.uv.y * input_height);
- float byte_offset = floor((v_mul + vert_in.uv.x) * width) * 4.0;
- byte_offset += PRECISION_OFFSET;
- float2 sample_pos[4];
- if (byte_offset < u_plane_offset) {
- #ifdef DEBUGGING
- return float4(1.0, 1.0, 1.0, 1.0);
- #endif
- float lum_u = floor(fmod(byte_offset, width)) * width_i;
- float lum_v = floor(byte_offset * width_i) * height_i;
- /* move to texel centers to sample the 4 pixels properly */
- lum_u += width_i * 0.5;
- lum_v += height_i * 0.5;
- sample_pos[0] = float2(lum_u, lum_v);
- sample_pos[1] = float2(lum_u += width_i, lum_v);
- sample_pos[2] = float2(lum_u += width_i, lum_v);
- sample_pos[3] = float2(lum_u + width_i, lum_v);
- float4x4 out_val = float4x4(
- image.Sample(def_sampler, sample_pos[0]),
- image.Sample(def_sampler, sample_pos[1]),
- image.Sample(def_sampler, sample_pos[2]),
- image.Sample(def_sampler, sample_pos[3])
- );
- return transpose(out_val)[1];
- } else {
- #ifdef DEBUGGING
- return float4(0.5, 0.2, 0.5, 0.2);
- #endif
- float new_offset = byte_offset - u_plane_offset;
- float ch_u = floor(fmod(new_offset, width)) * width_i;
- float ch_v = floor(new_offset * width_i) * height_d2_i;
- float width_i2 = width_i*2.0;
- /* move to the borders of each set of 4 pixels to force it
- * to do bilinear averaging */
- ch_u += width_i;
- ch_v += height_i;
- sample_pos[0] = float2(ch_u, ch_v);
- sample_pos[1] = float2(ch_u + width_i2, ch_v);
-
- return float4(
- image.Sample(def_sampler, sample_pos[0]).rb,
- image.Sample(def_sampler, sample_pos[1]).rb
- );
- }
- }
- float PSNV12_Y(VertInOut vert_in) : TARGET
- {
- return image.Sample(def_sampler, vert_in.uv.xy).y;
- }
- float2 PSNV12_UV(VertInOut vert_in) : TARGET
- {
- return image.Sample(def_sampler, vert_in.uv.xy).xz;
- }
- float4 PSPlanar420(VertInOut vert_in) : TARGET
- {
- float v_mul = floor(vert_in.uv.y * input_height);
- float byte_offset = floor((v_mul + vert_in.uv.x) * width) * 4.0;
- byte_offset += PRECISION_OFFSET;
- float2 sample_pos[4];
- if (byte_offset < u_plane_offset) {
- #ifdef DEBUGGING
- return float4(1.0, 1.0, 1.0, 1.0);
- #endif
- float lum_u = floor(fmod(byte_offset, width)) * width_i;
- float lum_v = floor(byte_offset * width_i) * height_i;
- /* move to texel centers to sample the 4 pixels properly */
- lum_u += width_i * 0.5;
- lum_v += height_i * 0.5;
- sample_pos[0] = float2(lum_u, lum_v);
- sample_pos[1] = float2(lum_u += width_i, lum_v);
- sample_pos[2] = float2(lum_u += width_i, lum_v);
- sample_pos[3] = float2(lum_u + width_i, lum_v);
- } else {
- #ifdef DEBUGGING
- return ((byte_offset < v_plane_offset) ?
- float4(0.5, 0.5, 0.5, 0.5) :
- float4(0.2, 0.2, 0.2, 0.2));
- #endif
- float new_offset = byte_offset -
- ((byte_offset < v_plane_offset) ?
- u_plane_offset : v_plane_offset);
- float ch_u = floor(fmod(new_offset, width_d2)) * width_d2_i;
- float ch_v = floor(new_offset * width_d2_i) * height_d2_i;
- float width_i2 = width_i*2.0;
- /* move to the borders of each set of 4 pixels to force it
- * to do bilinear averaging */
- ch_u += width_i;
- ch_v += height_i;
- /* set up coordinates for next chroma line, in case
- * (width / 2) % 4 == 2, i.e. the current set of 4 pixels is split
- * between the current and the next chroma line; do note that the next
- * chroma line is two source lines below the current source line */
- float ch_u_n = 0. + width_i;
- float ch_v_n = ch_v + height_i * 3;
- sample_pos[0] = float2(ch_u, ch_v);
- sample_pos[1] = float2(ch_u += width_i2, ch_v);
- ch_u += width_i2;
- // check if ch_u overflowed the current source and chroma line
- if (ch_u > 1.0) {
- sample_pos[2] = float2(ch_u_n, ch_v_n);
- sample_pos[2] = float2(ch_u_n + width_i2, ch_v_n);
- } else {
- sample_pos[2] = float2(ch_u, ch_v);
- sample_pos[3] = float2(ch_u + width_i2, ch_v);
- }
- }
- float4x4 out_val = float4x4(
- image.Sample(def_sampler, sample_pos[0]),
- image.Sample(def_sampler, sample_pos[1]),
- image.Sample(def_sampler, sample_pos[2]),
- image.Sample(def_sampler, sample_pos[3])
- );
- out_val = transpose(out_val);
- if (byte_offset < u_plane_offset)
- return out_val[1];
- else if (byte_offset < v_plane_offset)
- return out_val[0];
- else
- return out_val[2];
- }
- float4 PSPlanar444(VertInOut vert_in) : TARGET
- {
- float v_mul = floor(vert_in.uv.y * input_height);
- float byte_offset = floor((v_mul + vert_in.uv.x) * width) * 4.0;
- byte_offset += PRECISION_OFFSET;
- float new_byte_offset = byte_offset;
- if (byte_offset >= v_plane_offset)
- new_byte_offset -= v_plane_offset;
- else if (byte_offset >= u_plane_offset)
- new_byte_offset -= u_plane_offset;
- float2 sample_pos[4];
- float u_val = floor(fmod(new_byte_offset, width)) * width_i;
- float v_val = floor(new_byte_offset * width_i) * height_i;
- /* move to texel centers to sample the 4 pixels properly */
- u_val += width_i * 0.5;
- v_val += height_i * 0.5;
- sample_pos[0] = float2(u_val, v_val);
- sample_pos[1] = float2(u_val += width_i, v_val);
- sample_pos[2] = float2(u_val += width_i, v_val);
- sample_pos[3] = float2(u_val + width_i, v_val);
- float4x4 out_val = float4x4(
- image.Sample(def_sampler, sample_pos[0]),
- image.Sample(def_sampler, sample_pos[1]),
- image.Sample(def_sampler, sample_pos[2]),
- image.Sample(def_sampler, sample_pos[3])
- );
- out_val = transpose(out_val);
- if (byte_offset < u_plane_offset)
- return out_val[1];
- else if (byte_offset < v_plane_offset)
- return out_val[0];
- else
- return out_val[2];
- }
- float GetIntOffsetColor(int offset)
- {
- return image.Load(int3(offset % int_input_width,
- offset / int_input_width,
- 0)).r;
- }
- float4 PSPacked422_Reverse(VertInOut vert_in, int u_pos, int v_pos,
- int y0_pos, int y1_pos) : TARGET
- {
- float y = vert_in.uv.y;
- float odd = floor(fmod(width * vert_in.uv.x + PRECISION_OFFSET, 2.0));
- float x = floor(width_d2 * vert_in.uv.x + PRECISION_OFFSET) *
- width_d2_i;
- x += input_width_i_d2;
- float4 texel = image.Sample(def_sampler, float2(x, y));
- float3 yuv = float3(odd > 0.5 ? texel[y1_pos] : texel[y0_pos],
- texel[u_pos], texel[v_pos]);
- yuv = clamp(yuv, color_range_min, color_range_max);
- return saturate(mul(float4(yuv, 1.0), color_matrix));
- }
- float4 PSPlanar420_Reverse(VertInOut vert_in) : TARGET
- {
- int x = int(vert_in.uv.x * width + PRECISION_OFFSET);
- int y = int(vert_in.uv.y * height + PRECISION_OFFSET);
- int lum_offset = y * int_width + x;
- int chroma_offset = (y / 2) * (int_width / 2) + x / 2;
- int chroma1 = int_u_plane_offset + chroma_offset;
- int chroma2 = int_v_plane_offset + chroma_offset;
- float3 yuv = float3(
- GetIntOffsetColor(lum_offset),
- GetIntOffsetColor(chroma1),
- GetIntOffsetColor(chroma2)
- );
- yuv = clamp(yuv, color_range_min, color_range_max);
- return saturate(mul(float4(yuv, 1.0), color_matrix));
- }
- float4 PSPlanar444_Reverse(VertInOut vert_in) : TARGET
- {
- int x = int(vert_in.uv.x * width + PRECISION_OFFSET);
- int y = int(vert_in.uv.y * height + PRECISION_OFFSET);
- int lum_offset = y * int_width + x;
- int chroma_offset = y * int_width + x;
- int chroma1 = int_u_plane_offset + chroma_offset;
- int chroma2 = int_v_plane_offset + chroma_offset;
- float3 yuv = float3(
- GetIntOffsetColor(lum_offset),
- GetIntOffsetColor(chroma1),
- GetIntOffsetColor(chroma2)
- );
- yuv = clamp(yuv, color_range_min, color_range_max);
- return saturate(mul(float4(yuv, 1.0), color_matrix));
- }
- float4 PSNV12_Reverse(VertInOut vert_in) : TARGET
- {
- int x = int(vert_in.uv.x * width + PRECISION_OFFSET);
- int y = int(vert_in.uv.y * height + PRECISION_OFFSET);
- int lum_offset = y * int_width + x;
- int chroma_offset = (y / 2) * (int_width / 2) + x / 2;
- int chroma = int_u_plane_offset + chroma_offset * 2;
- float3 yuv = float3(
- GetIntOffsetColor(lum_offset),
- GetIntOffsetColor(chroma),
- GetIntOffsetColor(chroma + 1)
- );
- yuv = clamp(yuv, color_range_min, color_range_max);
- return saturate(mul(float4(yuv, 1.0), color_matrix));
- }
- float4 PSY800_Limited(VertInOut vert_in) : TARGET
- {
- int x = int(vert_in.uv.x * width + PRECISION_OFFSET);
- int y = int(vert_in.uv.y * height + PRECISION_OFFSET);
- float limited = image.Load(int3(x, y, 0)).x;
- float full = saturate((limited - (16.0 / 255.0)) * (255.0 / 219.0));
- return float4(full, full, full, 1.0);
- }
- float4 PSY800_Full(VertInOut vert_in) : TARGET
- {
- int x = int(vert_in.uv.x * width + PRECISION_OFFSET);
- int y = int(vert_in.uv.y * height + PRECISION_OFFSET);
- float3 full = image.Load(int3(x, y, 0)).xxx;
- return float4(full, 1.0);
- }
- float4 PSRGB_Limited(VertInOut vert_in) : TARGET
- {
- int x = int(vert_in.uv.x * width + PRECISION_OFFSET);
- int y = int(vert_in.uv.y * height + PRECISION_OFFSET);
- float4 rgba = image.Load(int3(x, y, 0));
- rgba.rgb = saturate((rgba.rgb - (16.0 / 255.0)) * (255.0 / 219.0));
- return rgba;
- }
- technique Planar420
- {
- pass
- {
- vertex_shader = VSDefault(vert_in);
- pixel_shader = PSPlanar420(vert_in);
- }
- }
- technique Planar444
- {
- pass
- {
- vertex_shader = VSDefault(vert_in);
- pixel_shader = PSPlanar444(vert_in);
- }
- }
- technique NV12
- {
- pass
- {
- vertex_shader = VSDefault(vert_in);
- pixel_shader = PSNV12(vert_in);
- }
- }
- technique NV12_Y
- {
- pass
- {
- vertex_shader = VSDefault(vert_in);
- pixel_shader = PSNV12_Y(vert_in);
- }
- }
- technique NV12_UV
- {
- pass
- {
- vertex_shader = VSDefault(vert_in);
- pixel_shader = PSNV12_UV(vert_in);
- }
- }
- technique UYVY_Reverse
- {
- pass
- {
- vertex_shader = VSDefault(vert_in);
- pixel_shader = PSPacked422_Reverse(vert_in, 2, 0, 1, 3);
- }
- }
- technique YUY2_Reverse
- {
- pass
- {
- vertex_shader = VSDefault(vert_in);
- pixel_shader = PSPacked422_Reverse(vert_in, 1, 3, 2, 0);
- }
- }
- technique YVYU_Reverse
- {
- pass
- {
- vertex_shader = VSDefault(vert_in);
- pixel_shader = PSPacked422_Reverse(vert_in, 3, 1, 2, 0);
- }
- }
- technique I420_Reverse
- {
- pass
- {
- vertex_shader = VSDefault(vert_in);
- pixel_shader = PSPlanar420_Reverse(vert_in);
- }
- }
- technique I444_Reverse
- {
- pass
- {
- vertex_shader = VSDefault(vert_in);
- pixel_shader = PSPlanar444_Reverse(vert_in);
- }
- }
- technique NV12_Reverse
- {
- pass
- {
- vertex_shader = VSDefault(vert_in);
- pixel_shader = PSNV12_Reverse(vert_in);
- }
- }
- technique Y800_Limited
- {
- pass
- {
- vertex_shader = VSDefault(vert_in);
- pixel_shader = PSY800_Limited(vert_in);
- }
- }
- technique Y800_Full
- {
- pass
- {
- vertex_shader = VSDefault(vert_in);
- pixel_shader = PSY800_Full(vert_in);
- }
- }
- technique RGB_Limited
- {
- pass
- {
- vertex_shader = VSDefault(vert_in);
- pixel_shader = PSRGB_Limited(vert_in);
- }
- }
|