xbrz_tools.h 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266
  1. // ****************************************************************************
  2. // * This file is part of the xBRZ project. It is distributed under *
  3. // * GNU General Public License: https://www.gnu.org/licenses/gpl-3.0 *
  4. // * Copyright (C) Zenju (zenju AT gmx DOT de) - All Rights Reserved *
  5. // * *
  6. // * Additionally and as a special exception, the author gives permission *
  7. // * to link the code of this program with the following libraries *
  8. // * (or with modified versions that use the same licenses), and distribute *
  9. // * linked combinations including the two: MAME, FreeFileSync, Snes9x, ePSXe *
  10. // * You must obey the GNU General Public License in all respects for all of *
  11. // * the code used other than MAME, FreeFileSync, Snes9x, ePSXe. *
  12. // * If you modify this file, you may extend this exception to your version *
  13. // * of the file, but you are not obligated to do so. If you do not wish to *
  14. // * do so, delete this exception statement from your version. *
  15. // ****************************************************************************
  16. #ifndef XBRZ_TOOLS_H_825480175091875
  17. #define XBRZ_TOOLS_H_825480175091875
  18. #include <cassert>
  19. #include <algorithm>
  20. #include <type_traits>
  21. namespace xbrz
  22. {
  23. template <uint32_t N> inline
  24. unsigned char getByte(uint32_t val) { return static_cast<unsigned char>((val >> (8 * N)) & 0xff); }
  25. inline unsigned char getAlpha(uint32_t pix) { return getByte<3>(pix); }
  26. inline unsigned char getRed (uint32_t pix) { return getByte<2>(pix); }
  27. inline unsigned char getGreen(uint32_t pix) { return getByte<1>(pix); }
  28. inline unsigned char getBlue (uint32_t pix) { return getByte<0>(pix); }
  29. inline uint32_t makePixel(unsigned char a, unsigned char r, unsigned char g, unsigned char b) { return (a << 24) | (r << 16) | (g << 8) | b; }
  30. inline uint32_t makePixel( unsigned char r, unsigned char g, unsigned char b) { return (r << 16) | (g << 8) | b; }
  31. inline uint32_t rgb555to888(uint16_t pix) { return ((pix & 0x7C00) << 9) | ((pix & 0x03E0) << 6) | ((pix & 0x001F) << 3); }
  32. inline uint32_t rgb565to888(uint16_t pix) { return ((pix & 0xF800) << 8) | ((pix & 0x07E0) << 5) | ((pix & 0x001F) << 3); }
  33. inline uint16_t rgb888to555(uint32_t pix) { return static_cast<uint16_t>(((pix & 0xF80000) >> 9) | ((pix & 0x00F800) >> 6) | ((pix & 0x0000F8) >> 3)); }
  34. inline uint16_t rgb888to565(uint32_t pix) { return static_cast<uint16_t>(((pix & 0xF80000) >> 8) | ((pix & 0x00FC00) >> 5) | ((pix & 0x0000F8) >> 3)); }
  35. template <class Pix> inline
  36. Pix* byteAdvance(Pix* ptr, int bytes)
  37. {
  38. using PixNonConst = typename std::remove_cv<Pix>::type;
  39. using PixByte = typename std::conditional<std::is_same<Pix, PixNonConst>::value, char, const char>::type;
  40. static_assert(std::is_integral<PixNonConst>::value, "Pix* is expected to be cast-able to char*");
  41. return reinterpret_cast<Pix*>(reinterpret_cast<PixByte*>(ptr) + bytes);
  42. }
  43. //fill block with the given color
  44. template <class Pix> inline
  45. void fillBlock(Pix* trg, int pitch /*[bytes]*/, Pix col, int blockWidth, int blockHeight)
  46. {
  47. //for (int y = 0; y < blockHeight; ++y, trg = byteAdvance(trg, pitch))
  48. // std::fill(trg, trg + blockWidth, col);
  49. for (int y = 0; y < blockHeight; ++y, trg = byteAdvance(trg, pitch))
  50. for (int x = 0; x < blockWidth; ++x)
  51. trg[x] = col;
  52. }
  53. //nearest-neighbour (going over target image - slow for upscaling, since source is read multiple times missing out on cache! Fast for similar image sizes!)
  54. template <class PixSrc, class PixTrg, class PixConverter>
  55. void nearestneighbourScale(const PixSrc* src, int srcWidth, int srcHeight, int srcPitch /*[bytes]*/,
  56. /**/ PixTrg* trg, int trgWidth, int trgHeight, int trgPitch /*[bytes]*/,
  57. int yFirst, int yLast, PixConverter pixCvrt /*convert PixSrc to PixTrg*/)
  58. {
  59. static_assert(std::is_integral<PixSrc>::value, "PixSrc* is expected to be cast-able to char*");
  60. static_assert(std::is_integral<PixTrg>::value, "PixTrg* is expected to be cast-able to char*");
  61. static_assert(std::is_same<decltype(pixCvrt(PixSrc())), PixTrg>::value, "PixConverter returning wrong pixel format");
  62. if (srcPitch < srcWidth * static_cast<int>(sizeof(PixSrc)) ||
  63. trgPitch < trgWidth * static_cast<int>(sizeof(PixTrg)))
  64. {
  65. assert(false);
  66. return;
  67. }
  68. yFirst = std::max(yFirst, 0);
  69. yLast = std::min(yLast, trgHeight);
  70. if (yFirst >= yLast || srcHeight <= 0 || srcWidth <= 0) return;
  71. for (int y = yFirst; y < yLast; ++y)
  72. {
  73. const int ySrc = srcHeight * y / trgHeight;
  74. const PixSrc* const srcLine = byteAdvance(src, ySrc * srcPitch);
  75. PixTrg* const trgLine = byteAdvance(trg, y * trgPitch);
  76. for (int x = 0; x < trgWidth; ++x)
  77. {
  78. const int xSrc = srcWidth * x / trgWidth;
  79. trgLine[x] = pixCvrt(srcLine[xSrc]);
  80. }
  81. }
  82. }
  83. //nearest-neighbour (going over source image - fast for upscaling, since source is read only once
  84. template <class PixSrc, class PixTrg, class PixConverter>
  85. void nearestneighbourScaleOverSource(const PixSrc* src, int srcWidth, int srcHeight, int srcPitch /*[bytes]*/,
  86. /**/ PixTrg* trg, int trgWidth, int trgHeight, int trgPitch /*[bytes]*/,
  87. int yFirst, int yLast, PixConverter pixCvrt /*convert PixSrc to PixTrg*/)
  88. {
  89. static_assert(std::is_integral<PixSrc>::value, "PixSrc* is expected to be cast-able to char*");
  90. static_assert(std::is_integral<PixTrg>::value, "PixTrg* is expected to be cast-able to char*");
  91. static_assert(std::is_same<decltype(pixCvrt(PixSrc())), PixTrg>::value, "PixConverter returning wrong pixel format");
  92. if (srcPitch < srcWidth * static_cast<int>(sizeof(PixSrc)) ||
  93. trgPitch < trgWidth * static_cast<int>(sizeof(PixTrg)))
  94. {
  95. assert(false);
  96. return;
  97. }
  98. yFirst = std::max(yFirst, 0);
  99. yLast = std::min(yLast, srcHeight);
  100. if (yFirst >= yLast || trgWidth <= 0 || trgHeight <= 0) return;
  101. for (int y = yFirst; y < yLast; ++y)
  102. {
  103. //mathematically: ySrc = floor(srcHeight * yTrg / trgHeight)
  104. // => search for integers in: [ySrc, ySrc + 1) * trgHeight / srcHeight
  105. //keep within for loop to support MT input slices!
  106. const int yTrgFirst = ( y * trgHeight + srcHeight - 1) / srcHeight; //=ceil(y * trgHeight / srcHeight)
  107. const int yTrgLast = ((y + 1) * trgHeight + srcHeight - 1) / srcHeight; //=ceil(((y + 1) * trgHeight) / srcHeight)
  108. const int blockHeight = yTrgLast - yTrgFirst;
  109. if (blockHeight > 0)
  110. {
  111. const PixSrc* srcLine = byteAdvance(src, y * srcPitch);
  112. /**/ PixTrg* trgLine = byteAdvance(trg, yTrgFirst * trgPitch);
  113. int xTrgFirst = 0;
  114. for (int x = 0; x < srcWidth; ++x)
  115. {
  116. const int xTrgLast = ((x + 1) * trgWidth + srcWidth - 1) / srcWidth;
  117. const int blockWidth = xTrgLast - xTrgFirst;
  118. if (blockWidth > 0)
  119. {
  120. xTrgFirst = xTrgLast;
  121. const auto trgPix = pixCvrt(srcLine[x]);
  122. fillBlock(trgLine, trgPitch, trgPix, blockWidth, blockHeight);
  123. trgLine += blockWidth;
  124. }
  125. }
  126. }
  127. }
  128. }
  129. template <class PixTrg, class PixConverter>
  130. void bilinearScale(const uint32_t* src, int srcWidth, int srcHeight, int srcPitch,
  131. /**/ PixTrg* trg, int trgWidth, int trgHeight, int trgPitch,
  132. int yFirst, int yLast, PixConverter pixCvrt /*convert uint32_t to PixTrg*/)
  133. {
  134. static_assert(std::is_integral<PixTrg>::value, "PixTrg* is expected to be cast-able to char*");
  135. static_assert(std::is_same<decltype(pixCvrt(uint32_t())), PixTrg>::value, "PixConverter returning wrong pixel format");
  136. if (srcPitch < srcWidth * static_cast<int>(sizeof(uint32_t)) ||
  137. trgPitch < trgWidth * static_cast<int>(sizeof(PixTrg)))
  138. {
  139. assert(false);
  140. return;
  141. }
  142. yFirst = std::max(yFirst, 0);
  143. yLast = std::min(yLast, trgHeight);
  144. if (yFirst >= yLast || srcHeight <= 0 || srcWidth <= 0) return;
  145. const double scaleX = static_cast<double>(trgWidth ) / srcWidth;
  146. const double scaleY = static_cast<double>(trgHeight) / srcHeight;
  147. //perf notes:
  148. // -> double-based calculation is (slightly) faster than float
  149. // -> pre-calculation gives significant boost; std::vector<> memory allocation is negligible!
  150. struct CoeffsX
  151. {
  152. int x1 = 0;
  153. int x2 = 0;
  154. double xx1 = 0;
  155. double x2x = 0;
  156. };
  157. std::vector<CoeffsX> buf(trgWidth);
  158. for (int x = 0; x < trgWidth; ++x)
  159. {
  160. const int x1 = srcWidth * x / trgWidth;
  161. int x2 = x1 + 1;
  162. if (x2 == srcWidth) --x2;
  163. const double xx1 = x / scaleX - x1;
  164. const double x2x = 1 - xx1;
  165. buf[x] = { x1, x2, xx1, x2x };
  166. }
  167. for (int y = yFirst; y < yLast; ++y)
  168. {
  169. const int y1 = srcHeight * y / trgHeight;
  170. int y2 = y1 + 1;
  171. if (y2 == srcHeight) --y2;
  172. const double yy1 = y / scaleY - y1;
  173. const double y2y = 1 - yy1;
  174. const uint32_t* const srcLine = byteAdvance(src, y1 * srcPitch);
  175. const uint32_t* const srcLineNext = byteAdvance(src, y2 * srcPitch);
  176. PixTrg* const trgLine = byteAdvance(trg, y * trgPitch);
  177. for (int x = 0; x < trgWidth; ++x)
  178. {
  179. //perf: do NOT "simplify" the variable layout without measurement!
  180. const int x1 = buf[x].x1;
  181. const int x2 = buf[x].x2;
  182. const double xx1 = buf[x].xx1;
  183. const double x2x = buf[x].x2x;
  184. const double x2xy2y = x2x * y2y;
  185. const double xx1y2y = xx1 * y2y;
  186. const double x2xyy1 = x2x * yy1;
  187. const double xx1yy1 = xx1 * yy1;
  188. auto interpolate = [=](int offset)
  189. {
  190. /* https://en.wikipedia.org/wiki/Bilinear_interpolation
  191. (c11(x2 - x) + c21(x - x1)) * (y2 - y ) +
  192. (c12(x2 - x) + c22(x - x1)) * (y - y1) */
  193. const auto c11 = (srcLine [x1] >> (8 * offset)) & 0xff;
  194. const auto c21 = (srcLine [x2] >> (8 * offset)) & 0xff;
  195. const auto c12 = (srcLineNext[x1] >> (8 * offset)) & 0xff;
  196. const auto c22 = (srcLineNext[x2] >> (8 * offset)) & 0xff;
  197. return c11 * x2xy2y + c21 * xx1y2y +
  198. c12 * x2xyy1 + c22 * xx1yy1;
  199. };
  200. const double bi = interpolate(0);
  201. const double gi = interpolate(1);
  202. const double ri = interpolate(2);
  203. const double ai = interpolate(3);
  204. const auto b = static_cast<uint32_t>(bi + 0.5);
  205. const auto g = static_cast<uint32_t>(gi + 0.5);
  206. const auto r = static_cast<uint32_t>(ri + 0.5);
  207. const auto a = static_cast<uint32_t>(ai + 0.5);
  208. const uint32_t trgPix = (a << 24) | (r << 16) | (g << 8) | b;
  209. trgLine[x] = pixCvrt(trgPix);
  210. }
  211. }
  212. }
  213. }
  214. #endif //XBRZ_TOOLS_H_825480175091875