lanczos_scale.effect 6.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239
  1. /*
  2. * lanczos sharper
  3. * note - this shader is adapted from the GPL bsnes shader, very good stuff
  4. * there.
  5. */
  6. uniform float4x4 ViewProj;
  7. uniform texture2d image;
  8. uniform float2 base_dimension;
  9. uniform float2 base_dimension_i;
  10. uniform float undistort_factor = 1.0;
  11. sampler_state textureSampler
  12. {
  13. AddressU = Clamp;
  14. AddressV = Clamp;
  15. Filter = Linear;
  16. };
  17. struct VertData {
  18. float4 pos : POSITION;
  19. float2 uv : TEXCOORD0;
  20. };
  21. struct VertOut {
  22. float2 uv : TEXCOORD0;
  23. float4 pos : POSITION;
  24. };
  25. struct FragData {
  26. float2 uv : TEXCOORD0;
  27. };
  28. VertOut VSDefault(VertData v_in)
  29. {
  30. VertOut vert_out;
  31. vert_out.uv = v_in.uv;
  32. vert_out.pos = mul(float4(v_in.pos.xyz, 1.0), ViewProj);
  33. return vert_out;
  34. }
  35. float weight(float x)
  36. {
  37. float ax = abs(x);
  38. if (x == 0.0)
  39. return 1.0;
  40. float radius = 3.0;
  41. if (ax < radius) {
  42. float PIval = 3.14159265358979323846;
  43. float x_pi = x * PIval;
  44. float radius_i = 1.0 / 3.0;
  45. return radius * sin(x_pi) * sin(x_pi * radius_i) / (x_pi * x_pi);
  46. }
  47. return 0.0;
  48. }
  49. float3 weight3(float x)
  50. {
  51. return float3(
  52. weight(x * 2.0 - 3.0),
  53. weight(x * 2.0 - 1.0),
  54. weight(x * 2.0 + 1.0));
  55. }
  56. float AspectUndistortX(float x, float a)
  57. {
  58. // The higher the power, the longer the linear part will be.
  59. return (1.0 - a) * (x * x * x * x * x) + a * x;
  60. }
  61. float AspectUndistortU(float u)
  62. {
  63. // Normalize texture coord to -1.0 to 1.0 range, and back.
  64. return AspectUndistortX((u - 0.5) * 2.0, undistort_factor) * 0.5 + 0.5;
  65. }
  66. float2 undistort_coord(float xpos, float ypos)
  67. {
  68. return float2(AspectUndistortU(xpos), ypos);
  69. }
  70. float4 undistort_pixel(float xpos, float ypos)
  71. {
  72. return image.Sample(textureSampler, undistort_coord(xpos, ypos));
  73. }
  74. float4 undistort_line(float3 xpos012, float3 xpos345, float ypos, float3 rowtap024,
  75. float3 rowtap135)
  76. {
  77. return
  78. undistort_pixel(xpos012.x, ypos) * rowtap024.x +
  79. undistort_pixel(xpos012.y, ypos) * rowtap135.x +
  80. undistort_pixel(xpos012.z, ypos) * rowtap024.y +
  81. undistort_pixel(xpos345.x, ypos) * rowtap135.y +
  82. undistort_pixel(xpos345.y, ypos) * rowtap024.z +
  83. undistort_pixel(xpos345.z, ypos) * rowtap135.z;
  84. }
  85. float4 DrawLanczos(FragData f_in, bool undistort)
  86. {
  87. float2 stepxy = base_dimension_i;
  88. float2 pos = f_in.uv + stepxy * 0.5;
  89. float2 f = frac(pos * base_dimension);
  90. float2 f_rev_half = (-0.5) * f + 0.5;
  91. float3 rowtap024 = weight3(f_rev_half.x);
  92. float3 rowtap135 = weight3(f_rev_half.x + 0.5);
  93. float3 coltap024 = weight3(f_rev_half.y);
  94. float3 coltap135 = weight3(f_rev_half.y + 0.5);
  95. // Need normalization if divided value near zero
  96. float rowsum = rowtap024.x + rowtap024.y + rowtap024.z +
  97. rowtap135.x + rowtap135.y + rowtap135.z;
  98. float rowsum_i = 1.0 / rowsum;
  99. rowtap024 = rowtap024 * rowsum_i;
  100. rowtap135 = rowtap135 * rowsum_i;
  101. float colsum = coltap024.x + coltap024.y + coltap024.z +
  102. coltap135.x + coltap135.y + coltap135.z;
  103. float colsum_i = 1.0 / colsum;
  104. coltap024 = coltap024 * colsum_i;
  105. coltap135 = coltap135 * colsum_i;
  106. float2 uv0 = (-2.5 - f) * stepxy + pos;
  107. float2 uv1 = uv0 + stepxy;
  108. float2 uv2 = uv1 + stepxy;
  109. float2 uv3 = uv2 + stepxy;
  110. float2 uv4 = uv3 + stepxy;
  111. float2 uv5 = uv4 + stepxy;
  112. if (undistort) {
  113. float3 xpos012 = float3(uv0.x, uv1.x, uv2.x);
  114. float3 xpos345 = float3(uv3.x, uv4.x, uv5.x);
  115. return undistort_line(xpos012, xpos345, uv0.y, rowtap024, rowtap135) * coltap024.x +
  116. undistort_line(xpos012, xpos345, uv1.y, rowtap024, rowtap135) * coltap135.x +
  117. undistort_line(xpos012, xpos345, uv2.y, rowtap024, rowtap135) * coltap024.y +
  118. undistort_line(xpos012, xpos345, uv3.y, rowtap024, rowtap135) * coltap135.y +
  119. undistort_line(xpos012, xpos345, uv4.y, rowtap024, rowtap135) * coltap024.z +
  120. undistort_line(xpos012, xpos345, uv5.y, rowtap024, rowtap135) * coltap135.z;
  121. }
  122. float u_weight_sum = rowtap024.y + rowtap135.y;
  123. float u_middle_offset = rowtap135.y * stepxy.x / u_weight_sum;
  124. float u_middle = uv2.x + u_middle_offset;
  125. float v_weight_sum = coltap024.y + coltap135.y;
  126. float v_middle_offset = coltap135.y * stepxy.y / v_weight_sum;
  127. float v_middle = uv2.y + v_middle_offset;
  128. float2 coord_limit = base_dimension - 0.5;
  129. float2 coord0_f = max(uv0 * base_dimension, 0.5);
  130. float2 coord1_f = coord0_f + 1.0;
  131. float2 coord4_f = min(coord0_f + 4.0, coord_limit);
  132. float2 coord5_f = min(coord0_f + 5.0, coord_limit);
  133. int2 coord0 = int2(coord0_f);
  134. int2 coord1 = int2(coord1_f);
  135. int2 coord4 = int2(coord4_f);
  136. int2 coord5 = int2(coord5_f);
  137. float4 row0 = image.Load(int3(coord0, 0)) * rowtap024.x;
  138. row0 += image.Load(int3(coord1.x, coord0.y, 0))* rowtap135.x;
  139. row0 += image.Sample(textureSampler, float2(u_middle, uv0.y)) * u_weight_sum;
  140. row0 += image.Load(int3(coord4.x, coord0.y, 0)) * rowtap024.z;
  141. row0 += image.Load(int3(coord5.x, coord0.y, 0)) * rowtap135.z;
  142. float4 total = row0 * coltap024.x;
  143. float4 row1 = image.Load(int3(coord0.x, coord1.y, 0)) * rowtap024.x;
  144. row1 += image.Load(int3(coord1.x, coord1.y, 0))* rowtap135.x;
  145. row1 += image.Sample(textureSampler, float2(u_middle, uv1.y)) * u_weight_sum;
  146. row1 += image.Load(int3(coord4.x, coord1.y, 0)) * rowtap024.z;
  147. row1 += image.Load(int3(coord5.x, coord1.y, 0)) * rowtap135.z;
  148. total += row1 * coltap135.x;
  149. float4 row23 = image.Sample(textureSampler, float2(uv0.x, v_middle)) * rowtap024.x;
  150. row23 += image.Sample(textureSampler, float2(uv1.x, v_middle))* rowtap135.x;
  151. row23 += image.Sample(textureSampler, float2(u_middle, v_middle)) * u_weight_sum;
  152. row23 += image.Sample(textureSampler, float2(uv4.x, v_middle)) * rowtap024.z;
  153. row23 += image.Sample(textureSampler, float2(uv5.x, v_middle)) * rowtap135.z;
  154. total += row23 * v_weight_sum;
  155. float4 row4 = image.Load(int3(coord0.x, coord4.y, 0)) * rowtap024.x;
  156. row4 += image.Load(int3(coord1.x, coord4.y, 0))* rowtap135.x;
  157. row4 += image.Sample(textureSampler, float2(u_middle, uv4.y)) * u_weight_sum;
  158. row4 += image.Load(int3(coord4.x, coord4.y, 0)) * rowtap024.z;
  159. row4 += image.Load(int3(coord5.x, coord4.y, 0)) * rowtap135.z;
  160. total += row4 * coltap024.z;
  161. float4 row5 = image.Load(int3(coord0.x, coord5.y, 0)) * rowtap024.x;
  162. row5 += image.Load(int3(coord1.x, coord5.y, 0))* rowtap135.x;
  163. row5 += image.Sample(textureSampler, float2(u_middle, uv5.y)) * u_weight_sum;
  164. row5 += image.Load(int3(coord4.x, coord5.y, 0)) * rowtap024.z;
  165. row5 += image.Load(int3(coord5, 0)) * rowtap135.z;
  166. total += row5 * coltap135.z;
  167. return total;
  168. }
  169. float4 PSDrawLanczosRGBA(FragData f_in, bool undistort) : TARGET
  170. {
  171. return DrawLanczos(f_in, undistort);
  172. }
  173. float4 PSDrawLanczosRGBADivide(FragData f_in) : TARGET
  174. {
  175. float4 rgba = DrawLanczos(f_in, false);
  176. float alpha = rgba.a;
  177. float multiplier = (alpha > 0.0) ? (1.0 / alpha) : 0.0;
  178. return float4(rgba.rgb * multiplier, alpha);
  179. }
  180. technique Draw
  181. {
  182. pass
  183. {
  184. vertex_shader = VSDefault(v_in);
  185. pixel_shader = PSDrawLanczosRGBA(f_in, false);
  186. }
  187. }
  188. technique DrawAlphaDivide
  189. {
  190. pass
  191. {
  192. vertex_shader = VSDefault(v_in);
  193. pixel_shader = PSDrawLanczosRGBADivide(f_in);
  194. }
  195. }
  196. technique DrawUndistort
  197. {
  198. pass
  199. {
  200. vertex_shader = VSDefault(v_in);
  201. pixel_shader = PSDrawLanczosRGBA(f_in, true);
  202. }
  203. }