瀏覽代碼

Merge branch 'develop' into resource_generation

Laserlicht 1 年之前
父節點
當前提交
01e55beef1
共有 70 個文件被更改,包括 3793 次插入438 次删除
  1. 1 5
      AI/BattleAI/CMakeLists.txt
  2. 0 4
      AI/CMakeLists.txt
  3. 1 5
      AI/Nullkiller/CMakeLists.txt
  4. 5 1
      CMakeLists.txt
  5. 7 0
      client/CMakeLists.txt
  6. 1 1
      client/CPlayerInterface.cpp
  7. 1 1
      client/adventureMap/AdventureMapInterface.cpp
  8. 1 1
      client/adventureMap/CMinimap.cpp
  9. 3 1
      client/battle/BattleFieldController.cpp
  10. 5 31
      client/battle/CreatureAnimation.cpp
  11. 0 2
      client/battle/CreatureAnimation.h
  12. 7 5
      client/eventsSDL/InputSourceMouse.cpp
  13. 1 1
      client/gui/CGuiHandler.cpp
  14. 2 2
      client/gui/CIntObject.cpp
  15. 2 2
      client/gui/WindowHandler.cpp
  16. 9 11
      client/mapView/MapRenderer.cpp
  17. 1 1
      client/mapView/MapRenderer.h
  18. 3 3
      client/mapView/MapViewCache.cpp
  19. 5 5
      client/media/CVideoHandler.cpp
  20. 1 0
      client/render/CAnimation.cpp
  21. 0 50
      client/render/CDefFile.cpp
  22. 53 32
      client/render/Canvas.cpp
  23. 18 6
      client/render/Canvas.h
  24. 1 0
      client/render/Colors.cpp
  25. 3 0
      client/render/Colors.h
  26. 16 7
      client/render/IFont.cpp
  27. 3 1
      client/render/IFont.h
  28. 23 16
      client/render/IImage.h
  29. 1 1
      client/render/IRenderHandler.h
  30. 5 0
      client/render/IScreenHandler.h
  31. 40 1
      client/render/ImageLocator.cpp
  32. 10 0
      client/render/ImageLocator.h
  33. 143 54
      client/renderSDL/CBitmapFont.cpp
  34. 16 4
      client/renderSDL/CBitmapFont.h
  35. 1 1
      client/renderSDL/CBitmapHanFont.h
  36. 7 8
      client/renderSDL/CTrueTypeFont.cpp
  37. 1 1
      client/renderSDL/CTrueTypeFont.h
  38. 2 2
      client/renderSDL/CursorHardware.cpp
  39. 12 5
      client/renderSDL/CursorSoftware.cpp
  40. 149 0
      client/renderSDL/ImageScaled.cpp
  41. 67 0
      client/renderSDL/ImageScaled.h
  42. 94 31
      client/renderSDL/RenderHandler.cpp
  43. 16 6
      client/renderSDL/RenderHandler.h
  44. 313 47
      client/renderSDL/SDLImage.cpp
  45. 36 19
      client/renderSDL/SDLImage.h
  46. 1 1
      client/renderSDL/SDLImageLoader.cpp
  47. 3 3
      client/renderSDL/SDLImageLoader.h
  48. 68 21
      client/renderSDL/SDL_Extensions.cpp
  49. 7 4
      client/renderSDL/SDL_Extensions.h
  50. 75 4
      client/renderSDL/ScreenHandler.cpp
  51. 23 0
      client/renderSDL/ScreenHandler.h
  52. 6 4
      client/widgets/Images.cpp
  53. 1 1
      client/widgets/Images.h
  54. 1 1
      client/windows/CCastleInterface.cpp
  55. 2 2
      client/windows/CMapOverview.cpp
  56. 1 1
      client/windows/CMessage.cpp
  57. 2 2
      client/windows/CSpellWindow.cpp
  58. 1 1
      client/windows/CWindowWithArtifacts.cpp
  59. 66 0
      client/xBRZ/Changelog.txt
  60. 621 0
      client/xBRZ/License.txt
  61. 1367 0
      client/xBRZ/xbrz.cpp
  62. 79 0
      client/xBRZ/xbrz.h
  63. 35 0
      client/xBRZ/xbrz_config.h
  64. 266 0
      client/xBRZ/xbrz_tools.h
  65. 8 2
      config/schemas/settings.json
  66. 21 2
      launcher/settingsView/csettingsview_moc.cpp
  67. 1 0
      launcher/settingsView/csettingsview_moc.h
  68. 42 14
      launcher/settingsView/csettingsview_moc.ui
  69. 3 1
      lib/CMakeLists.txt
  70. 7 0
      lib/Rect.h

+ 1 - 5
AI/BattleAI/CMakeLists.txt

@@ -37,11 +37,7 @@ else()
 endif()
 
 target_include_directories(BattleAI PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
-target_link_libraries(BattleAI PRIVATE vcmi TBB::tbb)
+target_link_libraries(BattleAI PRIVATE vcmi)
 
 vcmi_set_output_dir(BattleAI "AI")
 enable_pch(BattleAI)
-
-if(APPLE_IOS AND NOT USING_CONAN)
-	install(IMPORTED_RUNTIME_ARTIFACTS TBB::tbb LIBRARY DESTINATION ${LIB_DIR}) # CMake 3.21+
-endif()

+ 0 - 4
AI/CMakeLists.txt

@@ -8,10 +8,6 @@ else()
 	option(FORCE_BUNDLED_FL "Force to use FuzzyLite included into VCMI's source tree" OFF)
 endif()
 
-if(TBB_FOUND AND MSVC)
-	   install_vcpkg_imported_tgt(TBB::tbb)
-endif()
-
 #FuzzyLite uses MSVC pragmas in headers, so, we need to disable -Wunknown-pragmas
 if(MINGW)
     add_compile_options(-Wno-unknown-pragmas)

+ 1 - 5
AI/Nullkiller/CMakeLists.txt

@@ -157,11 +157,7 @@ else()
 endif()
 
 target_include_directories(Nullkiller PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
-target_link_libraries(Nullkiller PUBLIC vcmi fuzzylite::fuzzylite TBB::tbb)
+target_link_libraries(Nullkiller PUBLIC vcmi fuzzylite::fuzzylite)
 
 vcmi_set_output_dir(Nullkiller "AI")
 enable_pch(Nullkiller)
-
-if(APPLE_IOS AND NOT USING_CONAN)
-	install(IMPORTED_RUNTIME_ARTIFACTS TBB::tbb LIBRARY DESTINATION ${LIB_DIR}) # CMake 3.21+
-endif()

+ 5 - 1
CMakeLists.txt

@@ -519,7 +519,7 @@ if(ENABLE_LAUNCHER OR ENABLE_EDITOR)
 	endif()
 endif()
 
-if(ENABLE_CLIENT)
+if(NOT ENABLE_MINIMAL_LIB)
 	find_package(TBB REQUIRED)
 endif()
 
@@ -722,6 +722,10 @@ endif()
 
 
 if(WIN32)
+	if(TBB_FOUND AND MSVC)
+		   install_vcpkg_imported_tgt(TBB::tbb)
+	endif()
+
 	if(USING_CONAN)
 		#Conan imports enabled
 		vcmi_install_conan_deps("\${CMAKE_INSTALL_PREFIX}")

+ 7 - 0
client/CMakeLists.txt

@@ -98,6 +98,7 @@ set(client_SRCS
 	renderSDL/CTrueTypeFont.cpp
 	renderSDL/CursorHardware.cpp
 	renderSDL/CursorSoftware.cpp
+	renderSDL/ImageScaled.cpp
 	renderSDL/RenderHandler.cpp
 	renderSDL/SDLImage.cpp
 	renderSDL/SDLImageLoader.cpp
@@ -172,6 +173,8 @@ set(client_SRCS
 	windows/settings/BattleOptionsTab.cpp
 	windows/settings/AdventureOptionsTab.cpp
 
+	xBRZ/xbrz.cpp
+
 	ArtifactsUIController.cpp
 	CGameInfo.cpp
 	CMT.cpp
@@ -303,6 +306,7 @@ set(client_HEADERS
 	renderSDL/CTrueTypeFont.h
 	renderSDL/CursorHardware.h
 	renderSDL/CursorSoftware.h
+	renderSDL/ImageScaled.h
 	renderSDL/RenderHandler.h
 	renderSDL/SDLImage.h
 	renderSDL/SDLImageLoader.h
@@ -380,6 +384,9 @@ set(client_HEADERS
 	windows/settings/BattleOptionsTab.h
 	windows/settings/AdventureOptionsTab.h
 
+	xBRZ/xbrz.h
+	xBRZ/xbrz_tools.h
+
 	ArtifactsUIController.h
 	CGameInfo.h
 	CMT.h

+ 1 - 1
client/CPlayerInterface.cpp

@@ -1145,7 +1145,7 @@ void CPlayerInterface::showMapObjectSelectDialog(QueryID askID, const Component
 		if(t)
 		{
 			auto image = GH.renderHandler().loadImage(AnimationPath::builtin("ITPA"), t->town->clientInfo.icons[t->hasFort()][false] + 2, 0, EImageBlitMode::OPAQUE);
-			image->scaleFast(Point(35, 23));
+			image->scaleTo(Point(35, 23));
 			images.push_back(image);
 		}
 	}

+ 1 - 1
client/adventureMap/AdventureMapInterface.cpp

@@ -453,7 +453,7 @@ void AdventureMapInterface::onPlayerTurnStarted(PlayerColor playerID)
 		widget->getInfoBar()->showDate();
 
 	onHeroChanged(nullptr);
-	Canvas canvas = Canvas::createFromSurface(screen);
+	Canvas canvas = Canvas::createFromSurface(screen, CanvasScalingPolicy::AUTO);
 	showAll(canvas);
 	mapAudio->onPlayerTurnStarted();
 

+ 1 - 1
client/adventureMap/CMinimap.cpp

@@ -73,7 +73,7 @@ void CMinimapInstance::redrawMinimap()
 
 CMinimapInstance::CMinimapInstance(CMinimap *Parent, int Level):
 	parent(Parent),
-	minimap(new Canvas(Point(LOCPLINT->cb->getMapSize().x, LOCPLINT->cb->getMapSize().y))),
+	minimap(new Canvas(Point(LOCPLINT->cb->getMapSize().x, LOCPLINT->cb->getMapSize().y), CanvasScalingPolicy::IGNORE)),
 	level(Level)
 {
 	pos.w = parent->pos.w;

+ 3 - 1
client/battle/BattleFieldController.cpp

@@ -124,6 +124,8 @@ BattleFieldController::BattleFieldController(BattleInterface & owner):
 	rangedFullDamageLimitImages = GH.renderHandler().loadAnimation(AnimationPath::builtin("battle/rangeHighlights/rangeHighlightsGreen.json"), EImageBlitMode::COLORKEY);
 	shootingRangeLimitImages = GH.renderHandler().loadAnimation(AnimationPath::builtin("battle/rangeHighlights/rangeHighlightsRed.json"), EImageBlitMode::COLORKEY);
 
+	cellShade->setShadowEnabled(true);
+
 	if(!owner.siegeController)
 	{
 		auto bfieldType = owner.getBattle()->battleGetBattlefieldType();
@@ -142,7 +144,7 @@ BattleFieldController::BattleFieldController(BattleInterface & owner):
 	pos.w = background->width();
 	pos.h = background->height();
 
-	backgroundWithHexes = std::make_unique<Canvas>(Point(background->width(), background->height()));
+	backgroundWithHexes = std::make_unique<Canvas>(Point(background->width(), background->height()), CanvasScalingPolicy::AUTO);
 
 	updateAccessibleHexes();
 	addUsedEvents(LCLICK | SHOW_POPUP | MOVE | TIME | GESTURE);

+ 5 - 31
client/battle/CreatureAnimation.cpp

@@ -323,33 +323,6 @@ static ColorRGBA genBorderColor(ui8 alpha, const ColorRGBA & base)
 	return ColorRGBA(base.r, base.g, base.b, ui8(base.a * alpha / 256));
 }
 
-static ui8 mixChannels(ui8 c1, ui8 c2, ui8 a1, ui8 a2)
-{
-	return c1*a1 / 256 + c2*a2*(255 - a1) / 256 / 256;
-}
-
-static ColorRGBA addColors(const ColorRGBA & base, const ColorRGBA & over)
-{
-	return ColorRGBA(
-			mixChannels(over.r, base.r, over.a, base.a),
-			mixChannels(over.g, base.g, over.a, base.a),
-			mixChannels(over.b, base.b, over.a, base.a),
-			ui8(over.a + base.a * (255 - over.a) / 256)
-			);
-}
-
-void CreatureAnimation::genSpecialPalette(IImage::SpecialPalette & target)
-{
-	target.resize(8);
-	target[0] = genShadow(0);
-	target[1] = genShadow(shadowAlpha / 2);
-	// colors 2 & 3 are not used in creatures
-	target[4] = genShadow(shadowAlpha);
-	target[5] = genBorderColor(getBorderStrength(elapsedTime), border);
-	target[6] = addColors(genShadow(shadowAlpha),     genBorderColor(getBorderStrength(elapsedTime), border));
-	target[7] = addColors(genShadow(shadowAlpha / 2), genBorderColor(getBorderStrength(elapsedTime), border));
-}
-
 void CreatureAnimation::nextFrame(Canvas & canvas, const ColorFilter & shifter, bool facingRight)
 {
 	ColorRGBA shadowTest = shifter.shiftColor(genShadow(128));
@@ -366,11 +339,12 @@ void CreatureAnimation::nextFrame(Canvas & canvas, const ColorFilter & shifter,
 
 	if(image)
 	{
-		IImage::SpecialPalette SpecialPalette;
-		genSpecialPalette(SpecialPalette);
+		image->setShadowEnabled(true);
+		image->setOverlayEnabled(isIdle());
+		if (isIdle())
+			image->setOverlayColor(genBorderColor(getBorderStrength(elapsedTime), border));
 
-		image->setSpecialPalette(SpecialPalette, IImage::SPECIAL_PALETTE_MASK_CREATURES);
-		image->adjustPalette(shifter, IImage::SPECIAL_PALETTE_MASK_CREATURES);
+		image->adjustPalette(shifter, 0);
 
 		canvas.draw(image, pos.topLeft(), Rect(0, 0, pos.w, pos.h));
 

+ 0 - 2
client/battle/CreatureAnimation.h

@@ -107,9 +107,7 @@ private:
 
 	void endAnimation();
 
-	void genSpecialPalette(IImage::SpecialPalette & target);
 public:
-
 	/// function(s) that will be called when animation ends, after reset to 1st frame
 	/// NOTE that these functions will be fired only once
 	CFunctionList<void()> onAnimationReset;

+ 7 - 5
client/eventsSDL/InputSourceMouse.cpp

@@ -16,6 +16,8 @@
 #include "../gui/EventDispatcher.h"
 #include "../gui/MouseButton.h"
 
+#include "../render/IScreenHandler.h"
+
 #include "../../lib/Point.h"
 #include "../../lib/CConfigHandler.h"
 
@@ -30,8 +32,8 @@ InputSourceMouse::InputSourceMouse()
 
 void InputSourceMouse::handleEventMouseMotion(const SDL_MouseMotionEvent & motion)
 {
-	Point newPosition(motion.x, motion.y);
-	Point distance(-motion.xrel, -motion.yrel);
+	Point newPosition = Point(motion.x, motion.y) / GH.screenHandler().getScalingFactor();
+	Point distance= Point(-motion.xrel, -motion.yrel) / GH.screenHandler().getScalingFactor();
 
 	mouseButtonsMask = motion.state;
 
@@ -45,7 +47,7 @@ void InputSourceMouse::handleEventMouseMotion(const SDL_MouseMotionEvent & motio
 
 void InputSourceMouse::handleEventMouseButtonDown(const SDL_MouseButtonEvent & button)
 {
-	Point position(button.x, button.y);
+	Point position = Point(button.x, button.y) / GH.screenHandler().getScalingFactor();
 
 	switch(button.button)
 	{
@@ -67,12 +69,12 @@ void InputSourceMouse::handleEventMouseButtonDown(const SDL_MouseButtonEvent & b
 
 void InputSourceMouse::handleEventMouseWheel(const SDL_MouseWheelEvent & wheel)
 {
-	GH.events().dispatchMouseScrolled(Point(wheel.x, wheel.y), GH.getCursorPosition());
+	GH.events().dispatchMouseScrolled(Point(wheel.x, wheel.y) / GH.screenHandler().getScalingFactor(), GH.getCursorPosition());
 }
 
 void InputSourceMouse::handleEventMouseButtonUp(const SDL_MouseButtonEvent & button)
 {
-	Point position(button.x, button.y);
+	Point position = Point(button.x, button.y) / GH.screenHandler().getScalingFactor();
 
 	switch(button.button)
 	{

+ 1 - 1
client/gui/CGuiHandler.cpp

@@ -179,7 +179,7 @@ const Point & CGuiHandler::getCursorPosition() const
 
 Point CGuiHandler::screenDimensions() const
 {
-	return Point(screen->w, screen->h);
+	return screenHandlerInstance->getLogicalResolution();
 }
 
 void CGuiHandler::drawFPSCounter()

+ 2 - 2
client/gui/CIntObject.cpp

@@ -233,12 +233,12 @@ void CIntObject::redraw()
 		}
 		else
 		{
-			Canvas buffer = Canvas::createFromSurface(screenBuf);
+			Canvas buffer = Canvas::createFromSurface(screenBuf, CanvasScalingPolicy::AUTO);
 
 			showAll(buffer);
 			if(screenBuf != screen)
 			{
-				Canvas screenBuffer = Canvas::createFromSurface(screen);
+				Canvas screenBuffer = Canvas::createFromSurface(screen, CanvasScalingPolicy::AUTO);
 
 				showAll(screenBuffer);
 			}

+ 2 - 2
client/gui/WindowHandler.cpp

@@ -111,7 +111,7 @@ void WindowHandler::totalRedrawImpl()
 {
 	logGlobal->debug("totalRedraw requested!");
 
-	Canvas target = Canvas::createFromSurface(screen2);
+	Canvas target = Canvas::createFromSurface(screen2, CanvasScalingPolicy::AUTO);
 
 	for(auto & elem : windowsStack)
 		elem->showAll(target);
@@ -134,7 +134,7 @@ void WindowHandler::simpleRedrawImpl()
 	if(windowsStack.size() > 1)
 		CSDL_Ext::blitAt(screen2, 0, 0, screen); //blit background
 
-	Canvas target = Canvas::createFromSurface(screen);
+	Canvas target = Canvas::createFromSurface(screen, CanvasScalingPolicy::AUTO);
 
 	if(!windowsStack.empty())
 		windowsStack.back()->show(target); //blit active interface/window

+ 9 - 11
client/mapView/MapRenderer.cpp

@@ -21,6 +21,7 @@
 #include "../render/IImage.h"
 #include "../render/IRenderHandler.h"
 #include "../render/Colors.h"
+#include "../render/Graphics.h"
 
 #include "../../CCallback.h"
 
@@ -477,23 +478,20 @@ void MapRendererObjects::renderImage(IMapRendererContext & context, Canvas & tar
 		return;
 
 	image->setAlpha(transparency);
-	image->setFlagColor(object->tempOwner);
+	image->setShadowEnabled(true);
+	image->setOverlayEnabled(object->getOwner().isValidPlayer() || object->getOwner() == PlayerColor::NEUTRAL);
+
+	if (object->getOwner().isValidPlayer())
+		image->setOverlayColor(graphics->playerColors[object->getOwner().getNum()]);
+
+	if (object->getOwner() == PlayerColor::NEUTRAL)
+		image->setOverlayColor(graphics->neutralColor);
 
 	Point offsetPixels = context.objectImageOffset(object->id, coordinates);
 
 	if ( offsetPixels.x < image->dimensions().x && offsetPixels.y < image->dimensions().y)
 	{
 		Point imagePos = image->dimensions() - offsetPixels - Point(32, 32);
-
-		//if (transparency == 255)
-		//{
-		//	Canvas intermediate(Point(32,32));
-		//	intermediate.enableTransparency(true);
-		//	image->setBlitMode(EImageBlitMode::OPAQUE);
-		//	intermediate.draw(image, Point(0, 0), Rect(imagePos, Point(32,32)));
-		//	target.draw(intermediate, Point(0,0));
-		//	return;
-		//}
 		target.draw(image, Point(0, 0), Rect(imagePos, Point(32,32)));
 	}
 }

+ 1 - 1
client/mapView/MapRenderer.h

@@ -23,7 +23,7 @@ class CAnimation;
 class IImage;
 class Canvas;
 class IMapRendererContext;
-enum class EImageBlitMode;
+enum class EImageBlitMode : uint8_t;
 
 class MapTileStorage
 {

+ 3 - 3
client/mapView/MapViewCache.cpp

@@ -36,9 +36,9 @@ MapViewCache::MapViewCache(const std::shared_ptr<MapViewModel> & model)
 	, overlayWasVisible(false)
 	, mapRenderer(new MapRenderer())
 	, iconsStorage(GH.renderHandler().loadAnimation(AnimationPath::builtin("VwSymbol"), EImageBlitMode::COLORKEY))
-	, intermediate(new Canvas(Point(32, 32)))
-	, terrain(new Canvas(model->getCacheDimensionsPixels()))
-	, terrainTransition(new Canvas(model->getPixelsVisibleDimensions()))
+	, intermediate(new Canvas(Point(32, 32), CanvasScalingPolicy::AUTO))
+	, terrain(new Canvas(model->getCacheDimensionsPixels(), CanvasScalingPolicy::AUTO))
+	, terrainTransition(new Canvas(model->getPixelsVisibleDimensions(), CanvasScalingPolicy::AUTO))
 {
 	Point visibleSize = model->getTilesVisibleDimensions();
 	terrainChecksum.resize(boost::extents[visibleSize.x][visibleSize.y]);

+ 5 - 5
client/media/CVideoHandler.cpp

@@ -19,6 +19,7 @@
 #include "../eventsSDL/InputHandler.h"
 #include "../gui/CGuiHandler.h"
 #include "../render/Canvas.h"
+#include "../render/IScreenHandler.h"
 #include "../renderSDL/SDL_Extensions.h"
 
 #include "../../lib/filesystem/CInputStream.h"
@@ -182,8 +183,7 @@ void CVideoInstance::prepareOutput(bool scaleToScreenSize, bool useTextureOutput
 	}
 	else
 	{
-		dimensions.x  = getCodecContext()->width;
-		dimensions.y = getCodecContext()->height;
+		dimensions = Point(getCodecContext()->width, getCodecContext()->height) * GH.screenHandler().getScalingFactor();
 	}
 
 	// Allocate a place to put our YUV image on that screen
@@ -207,7 +207,7 @@ void CVideoInstance::prepareOutput(bool scaleToScreenSize, bool useTextureOutput
 	}
 	else
 	{
-		surface = CSDL_Ext::newSurface(dimensions.x, dimensions.y);
+		surface = CSDL_Ext::newSurface(dimensions);
 		sws = sws_getContext(getCodecContext()->width, getCodecContext()->height, getCodecContext()->pix_fmt,
 							 dimensions.x, dimensions.y, AV_PIX_FMT_RGB32,
 							 SWS_BICUBIC, nullptr, nullptr, nullptr);
@@ -362,7 +362,7 @@ void CVideoInstance::show(const Point & position, Canvas & canvas)
 	if(sws == nullptr)
 		throw std::runtime_error("No video to show!");
 
-	CSDL_Ext::blitSurface(surface, canvas.getInternalSurface(), position);
+	CSDL_Ext::blitSurface(surface, canvas.getInternalSurface(), position * GH.screenHandler().getScalingFactor());
 }
 
 double FFMpegStream::getCurrentFrameEndTime() const
@@ -639,7 +639,7 @@ bool CVideoPlayer::playIntroVideo(const VideoPath & name)
 
 void CVideoPlayer::playSpellbookAnimation(const VideoPath & name, const Point & position)
 {
-	openAndPlayVideoImpl(name, position, false, false, false);
+	openAndPlayVideoImpl(name, position * GH.screenHandler().getScalingFactor(), false, false, false);
 }
 
 std::unique_ptr<IVideoInstance> CVideoPlayer::open(const VideoPath & name, bool scaleToScreen)

+ 1 - 0
client/render/CAnimation.cpp

@@ -13,6 +13,7 @@
 #include "../gui/CGuiHandler.h"
 #include "../render/IImage.h"
 #include "../render/IRenderHandler.h"
+#include "../render/IScreenHandler.h"
 
 #include "../../lib/filesystem/Filesystem.h"
 #include "../../lib/json/JsonUtils.h"

+ 0 - 50
client/render/CDefFile.cpp

@@ -36,48 +36,10 @@ enum class DefType : uint32_t
  *  DefFile, class used for def loading                                  *
  *************************************************************************/
 
-static bool colorsSimilar (const SDL_Color & lhs, const SDL_Color & rhs)
-{
-	// it seems that H3 does not requires exact match to replace colors -> (255, 103, 255) gets interpreted as shadow
-	// exact logic is not clear and requires extensive testing with image editing
-	// potential reason is that H3 uses 16-bit color format (565 RGB bits), meaning that 3 least significant bits are lost in red and blue component
-	static const int threshold = 8;
-
-	int diffR = static_cast<int>(lhs.r) - rhs.r;
-	int diffG = static_cast<int>(lhs.g) - rhs.g;
-	int diffB = static_cast<int>(lhs.b) - rhs.b;
-	int diffA = static_cast<int>(lhs.a) - rhs.a;
-
-	return std::abs(diffR) < threshold && std::abs(diffG) < threshold && std::abs(diffB) < threshold && std::abs(diffA) < threshold;
-}
-
 CDefFile::CDefFile(const AnimationPath & Name):
 	data(nullptr),
 	palette(nullptr)
 {
-	//First 8 colors in def palette used for transparency
-	static const SDL_Color sourcePalette[8] = {
-		{0,   255, 255, SDL_ALPHA_OPAQUE},
-		{255, 150, 255, SDL_ALPHA_OPAQUE},
-		{255, 100, 255, SDL_ALPHA_OPAQUE},
-		{255, 50,  255, SDL_ALPHA_OPAQUE},
-		{255, 0,   255, SDL_ALPHA_OPAQUE},
-		{255, 255, 0,   SDL_ALPHA_OPAQUE},
-		{180, 0,   255, SDL_ALPHA_OPAQUE},
-		{0,   255, 0,   SDL_ALPHA_OPAQUE}
-	};
-
-	static const SDL_Color targetPalette[8] = {
-		{0, 0, 0, 0  }, // transparency                  ( used in most images )
-		{0, 0, 0, 64 }, // shadow border                 ( used in battle, adventure map def's )
-		{0, 0, 0, 64 }, // shadow border                 ( used in fog-of-war def's )
-		{0, 0, 0, 128}, // shadow body                   ( used in fog-of-war def's )
-		{0, 0, 0, 128}, // shadow body                   ( used in battle, adventure map def's )
-		{0, 0, 0, 0  }, // selection / owner flag        ( used in battle, adventure map def's )
-		{0, 0, 0, 128}, // shadow body   below selection ( used in battle def's )
-		{0, 0, 0, 64 }  // shadow border below selection ( used in battle def's )
-	};
-
 	data = CResourceHandler::get()->load(Name)->readAll().first;
 
 	palette = std::unique_ptr<SDL_Color[]>(new SDL_Color[256]);
@@ -99,18 +61,6 @@ CDefFile::CDefFile(const AnimationPath & Name):
 		palette[i].a = SDL_ALPHA_OPAQUE;
 	}
 
-	// these colors seems to be used unconditionally
-	palette[0] = targetPalette[0];
-	palette[1] = targetPalette[1];
-	palette[4] = targetPalette[4];
-
-	// rest of special colors are used only if their RGB values are close to H3
-	for (uint32_t i = 0; i < 8; ++i)
-	{
-		if (colorsSimilar(sourcePalette[i], palette[i]))
-			palette[i] = targetPalette[i];
-	}
-
 	for (ui32 i=0; i<totalBlocks; i++)
 	{
 		size_t blockID = read_le_u32(data.get() + it);

+ 53 - 32
client/render/Canvas.cpp

@@ -10,6 +10,8 @@
 #include "StdInc.h"
 #include "Canvas.h"
 
+#include "../gui/CGuiHandler.h"
+#include "../render/IScreenHandler.h"
 #include "../renderSDL/SDL_Extensions.h"
 #include "Colors.h"
 #include "IImage.h"
@@ -19,7 +21,8 @@
 #include <SDL_surface.h>
 #include <SDL_pixels.h>
 
-Canvas::Canvas(SDL_Surface * surface):
+Canvas::Canvas(SDL_Surface * surface, CanvasScalingPolicy scalingPolicy):
+	scalingPolicy(scalingPolicy),
 	surface(surface),
 	renderArea(0,0, surface->w, surface->h)
 {
@@ -27,6 +30,7 @@ Canvas::Canvas(SDL_Surface * surface):
 }
 
 Canvas::Canvas(const Canvas & other):
+	scalingPolicy(other.scalingPolicy),
 	surface(other.surface),
 	renderArea(other.renderArea)
 {
@@ -34,6 +38,7 @@ Canvas::Canvas(const Canvas & other):
 }
 
 Canvas::Canvas(Canvas && other):
+	scalingPolicy(other.scalingPolicy),
 	surface(other.surface),
 	renderArea(other.renderArea)
 {
@@ -43,20 +48,39 @@ Canvas::Canvas(Canvas && other):
 Canvas::Canvas(const Canvas & other, const Rect & newClipRect):
 	Canvas(other)
 {
-	renderArea = other.renderArea.intersect(newClipRect + other.renderArea.topLeft());
+	Rect scaledClipRect( transformPos(newClipRect.topLeft()), transformPos(newClipRect.dimensions()));
+	renderArea = other.renderArea.intersect(scaledClipRect + other.renderArea.topLeft());
 }
 
-Canvas::Canvas(const Point & size):
-	renderArea(Point(0,0), size),
-	surface(CSDL_Ext::newSurface(size.x, size.y))
+Canvas::Canvas(const Point & size, CanvasScalingPolicy scalingPolicy):
+	scalingPolicy(scalingPolicy),
+	surface(CSDL_Ext::newSurface(size * getScalingFactor())),
+	renderArea(Point(0,0), size * getScalingFactor())
 {
 	CSDL_Ext::fillSurface(surface, CSDL_Ext::toSDL(Colors::TRANSPARENCY) );
 	SDL_SetSurfaceBlendMode(surface, SDL_BLENDMODE_NONE);
 }
 
-Canvas Canvas::createFromSurface(SDL_Surface * surface)
+int Canvas::getScalingFactor() const
 {
-	return Canvas(surface);
+	if (scalingPolicy == CanvasScalingPolicy::IGNORE)
+		return 1;
+	return GH.screenHandler().getScalingFactor();
+}
+
+Point Canvas::transformPos(const Point & input)
+{
+	return renderArea.topLeft() + input * getScalingFactor();
+}
+
+Point Canvas::transformSize(const Point & input)
+{
+	return input * getScalingFactor();
+}
+
+Canvas Canvas::createFromSurface(SDL_Surface * surface, CanvasScalingPolicy scalingPolicy)
+{
+	return Canvas(surface, scalingPolicy);
 }
 
 void Canvas::applyTransparency(bool on)
@@ -81,19 +105,20 @@ void Canvas::draw(const std::shared_ptr<IImage>& image, const Point & pos)
 {
 	assert(image);
 	if (image)
-		image->draw(surface, pos + renderArea.topLeft());
+		image->draw(surface, transformPos(pos));
 }
 
 void Canvas::draw(const std::shared_ptr<IImage>& image, const Point & pos, const Rect & sourceRect)
 {
+	Rect realSourceRect = sourceRect * getScalingFactor();
 	assert(image);
 	if (image)
-		image->draw(surface, pos + renderArea.topLeft(), &sourceRect);
+		image->draw(surface, transformPos(pos), &realSourceRect);
 }
 
 void Canvas::draw(const Canvas & image, const Point & pos)
 {
-	CSDL_Ext::blitSurface(image.surface, image.renderArea, surface, renderArea.topLeft() + pos);
+	CSDL_Ext::blitSurface(image.surface, image.renderArea, surface, transformPos(pos));
 }
 
 void Canvas::drawTransparent(const Canvas & image, const Point & pos, double transparency)
@@ -103,42 +128,38 @@ void Canvas::drawTransparent(const Canvas & image, const Point & pos, double tra
 	SDL_GetSurfaceBlendMode(image.surface, &oldMode);
 	SDL_SetSurfaceBlendMode(image.surface, SDL_BLENDMODE_BLEND);
 	SDL_SetSurfaceAlphaMod(image.surface, 255 * transparency);
-	CSDL_Ext::blitSurface(image.surface, image.renderArea, surface, renderArea.topLeft() + pos);
+	CSDL_Ext::blitSurface(image.surface, image.renderArea, surface, transformPos(pos));
 	SDL_SetSurfaceAlphaMod(image.surface, 255);
 	SDL_SetSurfaceBlendMode(image.surface, oldMode);
 }
 
 void Canvas::drawScaled(const Canvas & image, const Point & pos, const Point & targetSize)
 {
-	SDL_Rect targetRect = CSDL_Ext::toSDL(Rect(pos + renderArea.topLeft(), targetSize));
+	SDL_Rect targetRect = CSDL_Ext::toSDL(Rect(transformPos(pos), transformSize(targetSize)));
 	SDL_BlitScaled(image.surface, nullptr, surface, &targetRect);
 }
 
 void Canvas::drawPoint(const Point & dest, const ColorRGBA & color)
 {
-	CSDL_Ext::putPixelWithoutRefreshIfInSurf(surface, dest.x, dest.y, color.r, color.g, color.b, color.a);
+	Point point = transformPos(dest);
+	CSDL_Ext::putPixelWithoutRefreshIfInSurf(surface, point.x, point.y, color.r, color.g, color.b, color.a);
 }
 
 void Canvas::drawLine(const Point & from, const Point & dest, const ColorRGBA & colorFrom, const ColorRGBA & colorDest)
 {
-	CSDL_Ext::drawLine(surface, renderArea.topLeft() + from, renderArea.topLeft() + dest, CSDL_Ext::toSDL(colorFrom), CSDL_Ext::toSDL(colorDest));
-}
-
-void Canvas::drawLineDashed(const Point & from, const Point & dest, const ColorRGBA & color)
-{
-	CSDL_Ext::drawLineDashed(surface, renderArea.topLeft() + from, renderArea.topLeft() + dest, CSDL_Ext::toSDL(color));
+	CSDL_Ext::drawLine(surface, transformPos(from), transformPos(dest), CSDL_Ext::toSDL(colorFrom), CSDL_Ext::toSDL(colorDest), getScalingFactor());
 }
 
 void Canvas::drawBorder(const Rect & target, const ColorRGBA & color, int width)
 {
-	Rect realTarget = target + renderArea.topLeft();
+	Rect realTarget = target * getScalingFactor() + renderArea.topLeft();
 
-	CSDL_Ext::drawBorder(surface, realTarget.x, realTarget.y, realTarget.w, realTarget.h, CSDL_Ext::toSDL(color), width);
+	CSDL_Ext::drawBorder(surface, realTarget.x, realTarget.y, realTarget.w, realTarget.h, CSDL_Ext::toSDL(color), width * getScalingFactor());
 }
 
 void Canvas::drawBorderDashed(const Rect & target, const ColorRGBA & color)
 {
-	Rect realTarget = target + renderArea.topLeft();
+	Rect realTarget = target * getScalingFactor() + renderArea.topLeft();
 
 	CSDL_Ext::drawLineDashed(surface, realTarget.topLeft(),    realTarget.topRight(),    CSDL_Ext::toSDL(color));
 	CSDL_Ext::drawLineDashed(surface, realTarget.bottomLeft(), realTarget.bottomRight(), CSDL_Ext::toSDL(color));
@@ -150,10 +171,10 @@ void Canvas::drawText(const Point & position, const EFonts & font, const ColorRG
 {
 	switch (alignment)
 	{
-	case ETextAlignment::TOPLEFT:      return graphics->fonts[font]->renderTextLeft  (surface, text, colorDest, renderArea.topLeft() + position);
-	case ETextAlignment::TOPCENTER:    return graphics->fonts[font]->renderTextCenter(surface, text, colorDest, renderArea.topLeft() + position);
-	case ETextAlignment::CENTER:       return graphics->fonts[font]->renderTextCenter(surface, text, colorDest, renderArea.topLeft() + position);
-	case ETextAlignment::BOTTOMRIGHT:  return graphics->fonts[font]->renderTextRight (surface, text, colorDest, renderArea.topLeft() + position);
+	case ETextAlignment::TOPLEFT:      return graphics->fonts[font]->renderTextLeft  (surface, text, colorDest, transformPos(position));
+	case ETextAlignment::TOPCENTER:    return graphics->fonts[font]->renderTextCenter(surface, text, colorDest, transformPos(position));
+	case ETextAlignment::CENTER:       return graphics->fonts[font]->renderTextCenter(surface, text, colorDest, transformPos(position));
+	case ETextAlignment::BOTTOMRIGHT:  return graphics->fonts[font]->renderTextRight (surface, text, colorDest, transformPos(position));
 	}
 }
 
@@ -161,23 +182,23 @@ void Canvas::drawText(const Point & position, const EFonts & font, const ColorRG
 {
 	switch (alignment)
 	{
-	case ETextAlignment::TOPLEFT:      return graphics->fonts[font]->renderTextLinesLeft  (surface, text, colorDest, renderArea.topLeft() + position);
-	case ETextAlignment::TOPCENTER:    return graphics->fonts[font]->renderTextLinesCenter(surface, text, colorDest, renderArea.topLeft() + position);
-	case ETextAlignment::CENTER:       return graphics->fonts[font]->renderTextLinesCenter(surface, text, colorDest, renderArea.topLeft() + position);
-	case ETextAlignment::BOTTOMRIGHT:  return graphics->fonts[font]->renderTextLinesRight (surface, text, colorDest, renderArea.topLeft() + position);
+	case ETextAlignment::TOPLEFT:      return graphics->fonts[font]->renderTextLinesLeft  (surface, text, colorDest, transformPos(position));
+	case ETextAlignment::TOPCENTER:    return graphics->fonts[font]->renderTextLinesCenter(surface, text, colorDest, transformPos(position));
+	case ETextAlignment::CENTER:       return graphics->fonts[font]->renderTextLinesCenter(surface, text, colorDest, transformPos(position));
+	case ETextAlignment::BOTTOMRIGHT:  return graphics->fonts[font]->renderTextLinesRight (surface, text, colorDest, transformPos(position));
 	}
 }
 
 void Canvas::drawColor(const Rect & target, const ColorRGBA & color)
 {
-	Rect realTarget = target + renderArea.topLeft();
+	Rect realTarget = (target + renderArea.topLeft()) * getScalingFactor();
 
 	CSDL_Ext::fillRect(surface, realTarget, CSDL_Ext::toSDL(color));
 }
 
 void Canvas::drawColorBlended(const Rect & target, const ColorRGBA & color)
 {
-	Rect realTarget = target + renderArea.topLeft();
+	Rect realTarget = (target + renderArea.topLeft()) * getScalingFactor();
 
 	CSDL_Ext::fillRectBlended(surface, realTarget, CSDL_Ext::toSDL(color));
 }

+ 18 - 6
client/render/Canvas.h

@@ -17,9 +17,19 @@ struct SDL_Surface;
 class IImage;
 enum EFonts : int;
 
+enum class CanvasScalingPolicy
+{
+	AUTO,  // automatically scale canvas operations by global scaling factor
+	IGNORE // disable any scaling processing. Scaling factor will be set to 1
+
+};
+
 /// Class that represents surface for drawing on
 class Canvas
 {
+	/// Upscaler awareness. Must be first member for initialization
+	CanvasScalingPolicy scalingPolicy;
+
 	/// Target surface
 	SDL_Surface * surface;
 
@@ -27,11 +37,14 @@ class Canvas
 	Rect renderArea;
 
 	/// constructs canvas using existing surface. Caller maintains ownership on the surface
-	explicit Canvas(SDL_Surface * surface);
+	explicit Canvas(SDL_Surface * surface, CanvasScalingPolicy scalingPolicy);
 
 	/// copy constructor
 	Canvas(const Canvas & other);
 
+	Point transformPos(const Point & input);
+	Point transformSize(const Point & input);
+
 public:
 	Canvas & operator = (const Canvas & other) = delete;
 	Canvas & operator = (Canvas && other) = delete;
@@ -43,11 +56,11 @@ public:
 	Canvas(const Canvas & other, const Rect & clipRect);
 
 	/// constructs canvas of specified size
-	explicit Canvas(const Point & size);
+	explicit Canvas(const Point & size, CanvasScalingPolicy scalingPolicy);
 
 	/// constructs canvas using existing surface. Caller maintains ownership on the surface
 	/// Compatibility method. AVOID USAGE. To be removed once SDL abstraction layer is finished.
-	static Canvas createFromSurface(SDL_Surface * surface);
+	static Canvas createFromSurface(SDL_Surface * surface, CanvasScalingPolicy scalingPolicy);
 
 	~Canvas();
 
@@ -78,9 +91,6 @@ public:
 	/// renders continuous, 1-pixel wide line with color gradient
 	void drawLine(const Point & from, const Point & dest, const ColorRGBA & colorFrom, const ColorRGBA & colorDest);
 
-	/// renders dashed, 1-pixel wide line with specified color
-	void drawLineDashed(const Point & from, const Point & dest, const ColorRGBA & color);
-
 	/// renders rectangular, solid-color border in specified location
 	void drawBorder(const Rect & target, const ColorRGBA & color, int width = 1);
 
@@ -102,6 +112,8 @@ public:
 	/// fills canvas with texture
 	void fillTexture(const std::shared_ptr<IImage>& image);
 
+	int getScalingFactor() const;
+
 	/// Compatibility method. AVOID USAGE. To be removed once SDL abstraction layer is finished.
 	SDL_Surface * getInternalSurface();
 

+ 1 - 0
client/render/Colors.cpp

@@ -15,6 +15,7 @@
 
 const ColorRGBA Colors::YELLOW = { 229, 215, 123, ColorRGBA::ALPHA_OPAQUE };
 const ColorRGBA Colors::WHITE = { 255, 243, 222, ColorRGBA::ALPHA_OPAQUE };
+const ColorRGBA Colors::WHITE_TRUE = { 255, 255, 255, ColorRGBA::ALPHA_OPAQUE };
 const ColorRGBA Colors::METALLIC_GOLD = { 173, 142, 66, ColorRGBA::ALPHA_OPAQUE };
 const ColorRGBA Colors::GREEN = { 0, 255, 0, ColorRGBA::ALPHA_OPAQUE };
 const ColorRGBA Colors::CYAN = { 0, 255, 255, ColorRGBA::ALPHA_OPAQUE };

+ 3 - 0
client/render/Colors.h

@@ -23,6 +23,9 @@ public:
 	/** the standard h3 white color */
 	static const ColorRGBA WHITE;
 
+	/** actual 100% white color */
+	static const ColorRGBA WHITE_TRUE;
+
 	/** the metallic gold color used mostly as a border around buttons */
 	static const ColorRGBA METALLIC_GOLD;
 

+ 16 - 7
client/render/IFont.cpp

@@ -11,9 +11,18 @@
 #include "StdInc.h"
 #include "IFont.h"
 
+#include "../gui/CGuiHandler.h"
+
+#include "../render/IScreenHandler.h"
+
 #include "../../lib/Point.h"
 #include "../../lib/texts/TextOperations.h"
 
+int IFont::getScalingFactor() const
+{
+	return GH.screenHandler().getScalingFactor();
+}
+
 size_t IFont::getStringWidth(const std::string & data) const
 {
 	size_t width = 0;
@@ -32,13 +41,13 @@ void IFont::renderTextLeft(SDL_Surface * surface, const std::string & data, cons
 
 void IFont::renderTextRight(SDL_Surface * surface, const std::string & data, const ColorRGBA & color, const Point & pos) const
 {
-	Point size((int)getStringWidth(data), (int)getLineHeight());
+	Point size = Point(getStringWidth(data), getLineHeight()) * getScalingFactor();
 	renderText(surface, data, color, pos - size);
 }
 
 void IFont::renderTextCenter(SDL_Surface * surface, const std::string & data, const ColorRGBA & color, const Point & pos) const
 {
-	Point size((int)getStringWidth(data), (int)getLineHeight());
+	Point size = Point(getStringWidth(data), getLineHeight()) * getScalingFactor();
 	renderText(surface, data, color, pos - size / 2);
 }
 
@@ -49,31 +58,31 @@ void IFont::renderTextLinesLeft(SDL_Surface * surface, const std::vector<std::st
 	for(const std::string & line : data)
 	{
 		renderTextLeft(surface, line, color, currPos);
-		currPos.y += (int)getLineHeight();
+		currPos.y += getLineHeight() * getScalingFactor();
 	}
 }
 
 void IFont::renderTextLinesRight(SDL_Surface * surface, const std::vector<std::string> & data, const ColorRGBA & color, const Point & pos) const
 {
 	Point currPos = pos;
-	currPos.y -= (int)data.size() * (int)getLineHeight();
+	currPos.y -= data.size() * getLineHeight() * getScalingFactor();
 
 	for(const std::string & line : data)
 	{
 		renderTextRight(surface, line, color, currPos);
-		currPos.y += (int)getLineHeight();
+		currPos.y += getLineHeight() * getScalingFactor();
 	}
 }
 
 void IFont::renderTextLinesCenter(SDL_Surface * surface, const std::vector<std::string> & data, const ColorRGBA & color, const Point & pos) const
 {
 	Point currPos = pos;
-	currPos.y -= (int)data.size() * (int)getLineHeight() / 2;
+	currPos.y -= data.size() * getLineHeight() / 2 * getScalingFactor();
 
 	for(const std::string & line : data)
 	{
 		renderTextCenter(surface, line, color, currPos);
-		currPos.y += (int)getLineHeight();
+		currPos.y += getLineHeight() * getScalingFactor();
 	}
 }
 

+ 3 - 1
client/render/IFont.h

@@ -16,12 +16,14 @@ VCMI_LIB_NAMESPACE_END
 
 struct SDL_Surface;
 
-class IFont
+class IFont : boost::noncopyable
 {
 protected:
 	/// Internal function to render font, see renderTextLeft
 	virtual void renderText(SDL_Surface * surface, const std::string & data, const ColorRGBA & color, const Point & pos) const = 0;
 
+	int getScalingFactor() const;
+
 public:
 	virtual ~IFont()
 	{}

+ 23 - 16
client/render/IImage.h

@@ -21,10 +21,12 @@ class ColorRGBA;
 VCMI_LIB_NAMESPACE_END
 
 struct SDL_Surface;
+struct SDL_Palette;
 class ColorFilter;
+class ISharedImage;
 
 /// Defines which blit method will be selected when image is used for rendering
-enum class EImageBlitMode
+enum class EImageBlitMode : uint8_t
 {
 	/// Preferred for images that don't need any background
 	/// Indexed or RGBA: Image can have no transparency and can be only used as background
@@ -40,28 +42,22 @@ enum class EImageBlitMode
 	ALPHA
 };
 
-/*
- * Base class for images, can be used for non-animation pictures as well
- */
+/// Base class for images for use in client code.
+/// This class represents current state of image, with potential transformations applied, such as player coloring
 class IImage
 {
 public:
-	using SpecialPalette = std::vector<ColorRGBA>;
-	static constexpr int32_t SPECIAL_PALETTE_MASK_CREATURES = 0b11110011;
-
 	//draws image on surface "where" at position
 	virtual void draw(SDL_Surface * where, const Point & pos, const Rect * src = nullptr) const = 0;
 
-	virtual void scaleFast(const Point & size) = 0;
+	virtual void scaleTo(const Point & size) = 0;
+	virtual void scaleInteger(int factor) = 0;
 
 	virtual void exportBitmap(const boost::filesystem::path & path) const = 0;
 
 	//Change palette to specific player
 	virtual void playerColored(PlayerColor player) = 0;
 
-	//set special color for flag
-	virtual void setFlagColor(PlayerColor player) = 0;
-
 	//test transparency of specific pixel
 	virtual bool isTransparent(const Point & coords) const = 0;
 
@@ -77,23 +73,34 @@ public:
 	virtual void setBlitMode(EImageBlitMode mode) = 0;
 
 	//only indexed bitmaps with 7 special colors
-	virtual void setSpecialPalette(const SpecialPalette & SpecialPalette, uint32_t colorsToSkipMask) = 0;
+	virtual void setOverlayColor(const ColorRGBA & color) = 0;
+
+	virtual void setShadowEnabled(bool on) = 0;
+	virtual void setBodyEnabled(bool on) = 0;
+	virtual void setOverlayEnabled(bool on) = 0;
+	virtual std::shared_ptr<ISharedImage> getSharedImage() const = 0;
 
 	virtual ~IImage() = default;
 };
 
-class IConstImage
+/// Base class for image data, mostly for internal use
+/// Represents unmodified pixel data, usually loaded from file
+/// This image can be shared between multiple image handlers (IImage instances)
+class ISharedImage
 {
 public:
 	virtual Point dimensions() const = 0;
 	virtual void exportBitmap(const boost::filesystem::path & path) const = 0;
 	virtual bool isTransparent(const Point & coords) const = 0;
+	virtual void draw(SDL_Surface * where, SDL_Palette * palette, const Point & dest, const Rect * src, const ColorRGBA & colorMultiplier, uint8_t alpha, EImageBlitMode mode) const = 0;
 
 	virtual std::shared_ptr<IImage> createImageReference(EImageBlitMode mode) = 0;
 
-	virtual std::shared_ptr<IConstImage> horizontalFlip() const = 0;
-	virtual std::shared_ptr<IConstImage> verticalFlip() const = 0;
+	virtual std::shared_ptr<ISharedImage> horizontalFlip() const = 0;
+	virtual std::shared_ptr<ISharedImage> verticalFlip() const = 0;
+	virtual std::shared_ptr<ISharedImage> scaleInteger(int factor, SDL_Palette * palette) const = 0;
+	virtual std::shared_ptr<ISharedImage> scaleTo(const Point & size, SDL_Palette * palette) const = 0;
 
 
-	virtual ~IConstImage() = default;
+	virtual ~ISharedImage() = default;
 };

+ 1 - 1
client/render/IRenderHandler.h

@@ -19,7 +19,7 @@ struct SDL_Surface;
 
 class IImage;
 class CAnimation;
-enum class EImageBlitMode;
+enum class EImageBlitMode : uint8_t;
 
 class IRenderHandler : public boost::noncopyable
 {

+ 5 - 0
client/render/IScreenHandler.h

@@ -41,6 +41,11 @@ public:
 	/// Dimensions of render output
 	virtual Point getRenderResolution() const = 0;
 
+	/// Dimensions of logical output. Can be different if scaling is used
+	virtual Point getLogicalResolution() const = 0;
+
+	virtual int getScalingFactor() const = 0;
+
 	/// Window has focus
 	virtual bool hasFocus() = 0;
 };

+ 40 - 1
client/render/ImageLocator.cpp

@@ -10,6 +10,9 @@
 #include "StdInc.h"
 #include "ImageLocator.h"
 
+#include "../gui/CGuiHandler.h"
+#include "IScreenHandler.h"
+
 #include "../../lib/json/JsonNode.h"
 
 
@@ -47,10 +50,46 @@ bool ImageLocator::operator<(const ImageLocator & other) const
 		return defFrame < other.defFrame;
 	if(verticalFlip != other.verticalFlip)
 		return verticalFlip < other.verticalFlip;
-	return horizontalFlip < other.horizontalFlip;
+	if(horizontalFlip != other.horizontalFlip)
+		return horizontalFlip < other.horizontalFlip;
+	if(scalingFactor != other.scalingFactor)
+		return scalingFactor < other.scalingFactor;
+	if(playerColored != other.playerColored)
+		return playerColored < other.playerColored;
+	if(layerShadow != other.layerShadow)
+		return layerShadow < other.layerShadow;
+	if(layerBody != other.layerBody)
+		return layerBody < other.layerBody;
+	if (layerOverlay != other.layerOverlay)
+		return layerOverlay < other.layerOverlay;
+
+	return false;
 }
 
 bool ImageLocator::empty() const
 {
 	return !image.has_value() && !defFile.has_value();
 }
+
+ImageLocator ImageLocator::copyFile() const
+{
+	ImageLocator result;
+	result.image = image;
+	result.defFile = defFile;
+	result.defFrame = defFrame;
+	result.defGroup = defGroup;
+	return result;
+}
+
+ImageLocator ImageLocator::copyFileTransform() const
+{
+	ImageLocator result = copyFile();
+	result.horizontalFlip = horizontalFlip;
+	result.verticalFlip = verticalFlip;
+	return result;
+}
+
+ImageLocator ImageLocator::copyFileTransformScale() const
+{
+	return *this; // full copy
+}

+ 10 - 0
client/render/ImageLocator.h

@@ -10,6 +10,7 @@
 #pragma once
 
 #include "../../lib/filesystem/ResourcePath.h"
+#include "../../lib/constants/EntityIdentifiers.h"
 
 struct ImageLocator
 {
@@ -20,6 +21,11 @@ struct ImageLocator
 
 	bool verticalFlip = false;
 	bool horizontalFlip = false;
+	int8_t scalingFactor = 1;
+	PlayerColor playerColored = PlayerColor::CANNOT_DETERMINE;
+	bool layerShadow = false;
+	bool layerBody = true;
+	bool layerOverlay = false;
 
 	ImageLocator() = default;
 	ImageLocator(const AnimationPath & path, int frame, int group);
@@ -28,4 +34,8 @@ struct ImageLocator
 
 	bool operator < (const ImageLocator & other) const;
 	bool empty() const;
+
+	ImageLocator copyFile() const;
+	ImageLocator copyFileTransform() const;
+	ImageLocator copyFileTransformScale() const;
 };

+ 143 - 54
client/renderSDL/CBitmapFont.cpp

@@ -12,7 +12,9 @@
 
 #include "SDL_Extensions.h"
 #include "../CGameInfo.h"
+#include "../gui/CGuiHandler.h"
 #include "../render/Colors.h"
+#include "../render/IScreenHandler.h"
 
 #include "../../lib/Rect.h"
 #include "../../lib/filesystem/Filesystem.h"
@@ -24,7 +26,75 @@
 
 #include <SDL_surface.h>
 
-void CBitmapFont::loadModFont(const std::string & modName, const ResourcePath & resource)
+struct AtlasLayout
+{
+	Point dimensions;
+	std::map<int, Rect> images;
+};
+
+/// Attempts to pack provided list of images into 2d box of specified size
+/// Returns resulting layout on success and empty optional on failure
+static std::optional<AtlasLayout> tryAtlasPacking(Point dimensions, const std::map<int, Point> & images)
+{
+	// Simple atlas packing algorithm. Can be extended if needed, however optimal solution is NP-complete problem, so 'perfect' solution is too costly
+
+	AtlasLayout result;
+	result.dimensions = dimensions;
+
+	// a little interval to prevent potential 'bleeding' into adjacent symbols
+	// should be unnecessary for base game, but may be needed for upscaled filters
+	constexpr int interval = 1;
+	int currentHeight = 0;
+	int nextHeight = 0;
+	int currentWidth = 0;
+
+	for (auto const & image : images)
+	{
+		int nextWidth = currentWidth + image.second.x + interval;
+
+		if (nextWidth > dimensions.x)
+		{
+			currentHeight = nextHeight;
+			currentWidth = 0;
+			nextWidth = currentWidth + image.second.x + interval;
+		}
+
+		nextHeight = std::max(nextHeight, currentHeight + image.second.y + interval);
+		if (nextHeight > dimensions.y)
+			return std::nullopt; // failure - ran out of space
+
+		result.images[image.first] = Rect(Point(currentWidth, currentHeight), image.second);
+
+		currentWidth = nextWidth;
+	}
+
+	return result;
+}
+
+/// Arranges images to fit into texture atlas with automatic selection of image size
+/// Returns images arranged into 2d box
+static AtlasLayout doAtlasPacking(const std::map<int, Point> & images)
+{
+	// initial size of an atlas. Smaller size won't even fit tiniest H3 font
+	Point dimensions(128, 128);
+
+	for (;;)
+	{
+		auto result = tryAtlasPacking(dimensions, images);
+
+		if (result)
+			return *result;
+
+		// else - packing failed. Increase atlas size and try again
+		// increase width and height in alternating form: (64,64) -> (128,64) -> (128,128) ...
+		if (dimensions.x > dimensions.y)
+			dimensions.y *= 2;
+		else
+			dimensions.x *= 2;
+	}
+}
+
+void CBitmapFont::loadModFont(const std::string & modName, const ResourcePath & resource, std::unordered_map<CodePoint, EntryFNT> & loadedChars)
 {
 	if (!CResourceHandler::get(modName)->existsResource(resource))
 	{
@@ -49,7 +119,7 @@ void CBitmapFont::loadModFont(const std::string & modName, const ResourcePath &
 	{
 		CodePoint codepoint = TextOperations::getUnicodeCodepoint(static_cast<char>(charIndex), modEncoding);
 
-		BitmapChar symbol;
+		EntryFNT symbol;
 
 		symbol.leftOffset =  read_le_u32(data.first.get() + baseIndex + charIndex * 12 + 0);
 		symbol.width =       read_le_u32(data.first.get() + baseIndex + charIndex * 12 + 4);
@@ -65,7 +135,7 @@ void CBitmapFont::loadModFont(const std::string & modName, const ResourcePath &
 
 		std::copy_n(pixelData, pixelsCount, symbol.pixels.data() );
 
-		chars[codepoint] = symbol;
+		loadedChars[codepoint] = symbol;
 	}
 }
 
@@ -74,15 +144,72 @@ CBitmapFont::CBitmapFont(const std::string & filename):
 {
 	ResourcePath resource("data/" + filename, EResType::BMP_FONT);
 
-	loadModFont("core", resource);
+	std::unordered_map<CodePoint, EntryFNT> loadedChars;
+	loadModFont("core", resource, loadedChars);
 
 	for(const auto & modName : VLC->modh->getActiveMods())
 	{
 		if (CResourceHandler::get(modName)->existsResource(resource))
-			loadModFont(modName, resource);
+			loadModFont(modName, resource, loadedChars);
+	}
+
+	std::map<int, Point> atlasSymbol;
+	for (auto const & symbol : loadedChars)
+		atlasSymbol[symbol.first] = Point(symbol.second.width, symbol.second.height);
+
+	auto atlas = doAtlasPacking(atlasSymbol);
+
+	atlasImage = SDL_CreateRGBSurface(0, atlas.dimensions.x, atlas.dimensions.y, 8, 0, 0, 0, 0);
+
+	assert(atlasImage->format->palette != nullptr);
+	assert(atlasImage->format->palette->ncolors == 256);
+
+	atlasImage->format->palette->colors[0] = { 0, 255, 255, SDL_ALPHA_OPAQUE }; // transparency
+	atlasImage->format->palette->colors[1] = { 0, 0, 0, SDL_ALPHA_OPAQUE }; // black shadow
+
+	CSDL_Ext::fillSurface(atlasImage, CSDL_Ext::toSDL(Colors::CYAN));
+	CSDL_Ext::setColorKey(atlasImage, CSDL_Ext::toSDL(Colors::CYAN));
+
+	for (size_t i = 2; i < atlasImage->format->palette->ncolors; ++i)
+		atlasImage->format->palette->colors[i] = { 255, 255, 255, SDL_ALPHA_OPAQUE };
+
+	for (auto const	& symbol : loadedChars)
+	{
+		BitmapChar storedEntry;
+
+		storedEntry.leftOffset = symbol.second.leftOffset;
+		storedEntry.rightOffset = symbol.second.rightOffset;
+		storedEntry.positionInAtlas = atlas.images.at(symbol.first);
+
+		// Copy pixel data to atlas
+		uint8_t *dstPixels = static_cast<uint8_t*>(atlasImage->pixels);
+		uint8_t *dstLine   = dstPixels + storedEntry.positionInAtlas.y * atlasImage->pitch;
+		uint8_t *dst = dstLine + storedEntry.positionInAtlas.x;
+
+		for (size_t i = 0; i < storedEntry.positionInAtlas.h; ++i)
+		{
+			const uint8_t *srcPtr = symbol.second.pixels.data() + i * storedEntry.positionInAtlas.w;
+			uint8_t * dstPtr = dst + i * atlasImage->pitch;
+
+			std::copy_n(srcPtr, storedEntry.positionInAtlas.w, dstPtr);
+		}
+
+		chars[symbol.first] = storedEntry;
+	}
+
+	if (GH.screenHandler().getScalingFactor() != 1)
+	{
+		auto scaledSurface = CSDL_Ext::scaleSurfaceIntegerFactor(atlasImage, GH.screenHandler().getScalingFactor());
+		SDL_FreeSurface(atlasImage);
+		atlasImage = scaledSurface;
 	}
 }
 
+CBitmapFont::~CBitmapFont()
+{
+	SDL_FreeSurface(atlasImage);
+}
+
 size_t CBitmapFont::getLineHeight() const
 {
 	return maxHeight;
@@ -97,7 +224,7 @@ size_t CBitmapFont::getGlyphWidth(const char * data) const
 	if (iter == chars.end())
 		return 0;
 
-	return iter->second.leftOffset + iter->second.width + iter->second.rightOffset;
+	return iter->second.leftOffset + iter->second.positionInAtlas.w + iter->second.rightOffset;
 }
 
 bool CBitmapFont::canRepresentCharacter(const char *data) const
@@ -120,52 +247,21 @@ bool CBitmapFont::canRepresentString(const std::string & data) const
 
 void CBitmapFont::renderCharacter(SDL_Surface * surface, const BitmapChar & character, const ColorRGBA & color, int &posX, int &posY) const
 {
-	Rect clipRect;
-	CSDL_Ext::getClipRect(surface, clipRect);
-
-	posX += character.leftOffset;
-
-	CSDL_Ext::TColorPutter colorPutter = CSDL_Ext::getPutterFor(surface);
+	int scalingFactor = GH.screenHandler().getScalingFactor();
 
-	uint8_t bpp = surface->format->BytesPerPixel;
+	posX += character.leftOffset * scalingFactor;
 
-	// start of line, may differ from 0 due to end of surface or clipped surface
-	int lineBegin = std::max<int>(0, clipRect.y - posY);
-	int lineEnd   = std::min<int>(character.height, clipRect.y + clipRect.h - posY - 1);
+	auto sdlColor = CSDL_Ext::toSDL(color);
 
-	// start end end of each row, may differ from 0
-	int rowBegin = std::max<int>(0, clipRect.x - posX);
-	int rowEnd   = std::min<int>(character.width, clipRect.x + clipRect.w - posX - 1);
+	if (atlasImage->format->palette)
+		SDL_SetPaletteColors(atlasImage->format->palette, &sdlColor, 255, 1);
+	else
+		SDL_SetSurfaceColorMod(atlasImage, color.r, color.g, color.b);
 
-	//for each line in symbol
-	for(int dy = lineBegin; dy <lineEnd; dy++)
-	{
-		uint8_t *dstLine = (uint8_t*)surface->pixels;
-		const uint8_t *srcLine = character.pixels.data();
-
-		// shift source\destination pixels to current position
-		dstLine += (posY+dy) * surface->pitch + posX * bpp;
-		srcLine += dy * character.width;
+	CSDL_Ext::blitSurface(atlasImage, character.positionInAtlas * scalingFactor, surface, Point(posX, posY));
 
-		//for each column in line
-		for(int dx = rowBegin; dx < rowEnd; dx++)
-		{
-			uint8_t* dstPixel = dstLine + dx*bpp;
-			switch(srcLine[dx])
-			{
-			case 1: //black "shadow"
-				colorPutter(dstPixel, 0, 0, 0);
-				break;
-			case 255: //text colour
-				colorPutter(dstPixel, color.r, color.g, color.b);
-				break;
-			default :
-				break; //transparency
-			}
-		}
-	}
-	posX += character.width;
-	posX += character.rightOffset;
+	posX += character.positionInAtlas.w * scalingFactor;
+	posX += character.rightOffset * scalingFactor;
 }
 
 void CBitmapFont::renderText(SDL_Surface * surface, const std::string & data, const ColorRGBA & color, const Point & pos) const
@@ -178,12 +274,6 @@ void CBitmapFont::renderText(SDL_Surface * surface, const std::string & data, co
 	int posX = pos.x;
 	int posY = pos.y;
 
-	// Should be used to detect incorrect text parsing. Disabled right now due to some old UI code (mostly pregame and battles)
-	//assert(data[0] != '{');
-	//assert(data[data.size()-1] != '}');
-
-	SDL_LockSurface(surface);
-
 	for(size_t i=0; i<data.size(); i += TextOperations::getUnicodeCharacterSize(data[i]))
 	{
 		CodePoint codepoint = TextOperations::getUnicodeCodepoint(data.data() + i, data.size() - i);
@@ -193,6 +283,5 @@ void CBitmapFont::renderText(SDL_Surface * surface, const std::string & data, co
 		if (iter != chars.end())
 			renderCharacter(surface, iter->second, color, posX, posY);
 	}
-	SDL_UnlockSurface(surface);
 }
 

+ 16 - 4
client/renderSDL/CBitmapFont.h

@@ -11,32 +11,44 @@
 
 #include "../render/IFont.h"
 
+#include "../../lib/Rect.h"
+
 VCMI_LIB_NAMESPACE_BEGIN
 class ResourcePath;
 VCMI_LIB_NAMESPACE_END
 
-class CBitmapFont : public IFont
+class CBitmapFont final : public IFont
 {
+	SDL_Surface * atlasImage;
+
 	using CodePoint = uint32_t;
 
-	struct BitmapChar
+	struct EntryFNT
 	{
 		int32_t leftOffset;
 		uint32_t width;
 		uint32_t height;
 		int32_t rightOffset;
-		std::vector<uint8_t> pixels; // pixels of this character, part of BitmapFont::data
+		std::vector<uint8_t> pixels;
+	};
+
+	struct BitmapChar
+	{
+		Rect positionInAtlas;
+		int32_t leftOffset;
+		int32_t rightOffset;
 	};
 
 	std::unordered_map<CodePoint, BitmapChar> chars;
 	uint32_t maxHeight;
 
-	void loadModFont(const std::string & modName, const ResourcePath & resource);
+	void loadModFont(const std::string & modName, const ResourcePath & resource, std::unordered_map<CodePoint, EntryFNT> & loadedChars);
 
 	void renderCharacter(SDL_Surface * surface, const BitmapChar & character, const ColorRGBA & color, int &posX, int &posY) const;
 	void renderText(SDL_Surface * surface, const std::string & data, const ColorRGBA & color, const Point & pos) const override;
 public:
 	explicit CBitmapFont(const std::string & filename);
+	~CBitmapFont();
 
 	size_t getLineHeight() const override;
 	size_t getGlyphWidth(const char * data) const override;

+ 1 - 1
client/renderSDL/CBitmapHanFont.h

@@ -18,7 +18,7 @@ VCMI_LIB_NAMESPACE_END
 class CBitmapFont;
 
 /// supports multi-byte characters for such languages like Chinese
-class CBitmapHanFont : public IFont
+class CBitmapHanFont final : public IFont
 {
 	std::unique_ptr<CBitmapFont> fallback;
 	// data, directly copied from file

+ 7 - 8
client/renderSDL/CTrueTypeFont.cpp

@@ -29,7 +29,9 @@ std::pair<std::unique_ptr<ui8[]>, ui64> CTrueTypeFont::loadData(const JsonNode &
 
 TTF_Font * CTrueTypeFont::loadFont(const JsonNode &config)
 {
-	int pointSize = static_cast<int>(config["size"].Float());
+	int pointSizeBase = static_cast<int>(config["size"].Float());
+	int scalingFactor = getScalingFactor();
+	int pointSize = pointSizeBase * scalingFactor;
 
 	if(!TTF_WasInit() && TTF_Init()==-1)
 		throw std::runtime_error(std::string("Failed to initialize true type support: ") + TTF_GetError() + "\n");
@@ -74,7 +76,7 @@ size_t CTrueTypeFont::getLineHeight() const
 	if (fallbackFont)
 		return fallbackFont->getLineHeight();
 
-	return TTF_FontHeight(font.get());
+	return TTF_FontHeight(font.get()) / getScalingFactor();
 }
 
 size_t CTrueTypeFont::getGlyphWidth(const char *data) const
@@ -83,19 +85,16 @@ size_t CTrueTypeFont::getGlyphWidth(const char *data) const
 		return fallbackFont->getGlyphWidth(data);
 
 	return getStringWidth(std::string(data, TextOperations::getUnicodeCharacterSize(*data)));
-	int advance;
-	TTF_GlyphMetrics(font.get(), *data, nullptr, nullptr, nullptr, nullptr, &advance);
-	return advance;
 }
 
 size_t CTrueTypeFont::getStringWidth(const std::string & data) const
 {
 	if (fallbackFont && fallbackFont->canRepresentString(data))
-		return fallbackFont->getStringWidth(data);
+		return fallbackFont->getStringWidth(data) / getScalingFactor();
 
 	int width;
 	TTF_SizeUTF8(font.get(), data.c_str(), &width, nullptr);
-	return width;
+	return width / getScalingFactor();
 }
 
 void CTrueTypeFont::renderText(SDL_Surface * surface, const std::string & data, const ColorRGBA & color, const Point & pos) const
@@ -107,7 +106,7 @@ void CTrueTypeFont::renderText(SDL_Surface * surface, const std::string & data,
 	}
 
 	if (dropShadow && color.r != 0 && color.g != 0 && color.b != 0) // not black - add shadow
-		renderText(surface, data, Colors::BLACK, pos + Point(1,1));
+		renderText(surface, data, Colors::BLACK, pos + Point(1,1) * getScalingFactor());
 
 	if (!data.empty())
 	{

+ 1 - 1
client/renderSDL/CTrueTypeFont.h

@@ -19,7 +19,7 @@ class CBitmapFont;
 
 using TTF_Font = struct _TTF_Font;
 
-class CTrueTypeFont : public IFont
+class CTrueTypeFont final : public IFont
 {
 	std::unique_ptr<CBitmapFont> fallbackFont;
 	const std::pair<std::unique_ptr<ui8[]>, ui64> data;

+ 2 - 2
client/renderSDL/CursorHardware.cpp

@@ -12,7 +12,7 @@
 #include "CursorHardware.h"
 
 #include "../gui/CGuiHandler.h"
-#include "../renderSDL/ScreenHandler.h"
+#include "../render/IScreenHandler.h"
 #include "../render/Colors.h"
 #include "../render/IImage.h"
 #include "SDL_Extensions.h"
@@ -45,7 +45,7 @@ void CursorHardware::setVisible(bool on)
 
 void CursorHardware::setImage(std::shared_ptr<IImage> image, const Point & pivotOffset)
 {
-	auto cursorSurface = CSDL_Ext::newSurface(image->dimensions().x, image->dimensions().y);
+	auto cursorSurface = CSDL_Ext::newSurface(image->dimensions() * GH.screenHandler().getScalingFactor());
 
 	CSDL_Ext::fillSurface(cursorSurface, CSDL_Ext::toSDL(Colors::TRANSPARENCY));
 

+ 12 - 5
client/renderSDL/CursorSoftware.cpp

@@ -11,6 +11,8 @@
 #include "StdInc.h"
 #include "CursorSoftware.h"
 
+#include "../gui/CGuiHandler.h"
+#include "../render/IScreenHandler.h"
 #include "../render/Colors.h"
 #include "../render/IImage.h"
 #include "../CMT.h"
@@ -30,8 +32,8 @@ void CursorSoftware::render()
 	SDL_Rect destRect;
 	destRect.x = renderPos.x;
 	destRect.y = renderPos.y;
-	destRect.w = 40;
-	destRect.h = 40;
+	destRect.w = cursorSurface->w;
+	destRect.h = cursorSurface->h;
 
 	SDL_RenderCopy(mainRenderer, cursorTexture, nullptr, &destRect);
 }
@@ -44,7 +46,7 @@ void CursorSoftware::createTexture(const Point & dimensions)
 	if (cursorSurface)
 		SDL_FreeSurface(cursorSurface);
 
-	cursorSurface = CSDL_Ext::newSurface(dimensions.x, dimensions.y);
+	cursorSurface = CSDL_Ext::newSurface(dimensions);
 	cursorTexture = SDL_CreateTexture(mainRenderer, SDL_PIXELFORMAT_ARGB8888, SDL_TEXTUREACCESS_STREAMING, dimensions.x, dimensions.y);
 
 	SDL_SetSurfaceBlendMode(cursorSurface, SDL_BLENDMODE_NONE);
@@ -53,8 +55,13 @@ void CursorSoftware::createTexture(const Point & dimensions)
 
 void CursorSoftware::updateTexture()
 {
-	if (!cursorSurface ||  Point(cursorSurface->w, cursorSurface->h) != cursorImage->dimensions())
-		createTexture(cursorImage->dimensions());
+	if (!cursorSurface)
+		createTexture(cursorImage->dimensions() * GH.screenHandler().getScalingFactor());
+
+	Point currentSize = Point(cursorSurface->w, cursorSurface->h);
+
+	if (currentSize != cursorImage->dimensions() * GH.screenHandler().getScalingFactor())
+		createTexture(cursorImage->dimensions() * GH.screenHandler().getScalingFactor());
 
 	CSDL_Ext::fillSurface(cursorSurface, CSDL_Ext::toSDL(Colors::TRANSPARENCY));
 

+ 149 - 0
client/renderSDL/ImageScaled.cpp

@@ -0,0 +1,149 @@
+/*
+ * ImageScaled.cpp, part of VCMI engine
+ *
+ * Authors: listed in file AUTHORS in main folder
+ *
+ * License: GNU General Public License v2.0 or later
+ * Full text of license available in license.txt file, in main folder
+ *
+ */
+#include "StdInc.h"
+#include "ImageScaled.h"
+
+#include "SDLImage.h"
+#include "SDL_Extensions.h"
+
+#include "../gui/CGuiHandler.h"
+#include "../render/IScreenHandler.h"
+#include "../render/Colors.h"
+
+#include "../../lib/constants/EntityIdentifiers.h"
+
+#include <SDL_surface.h>
+
+ImageScaled::ImageScaled(const ImageLocator & inputLocator, const std::shared_ptr<ISharedImage> & source, EImageBlitMode mode)
+	: source(source)
+	, locator(inputLocator)
+	, colorMultiplier(Colors::WHITE_TRUE)
+	, alphaValue(SDL_ALPHA_OPAQUE)
+	, blitMode(mode)
+{
+	locator.scalingFactor = GH.screenHandler().getScalingFactor();
+	setBodyEnabled(true);
+}
+
+std::shared_ptr<ISharedImage> ImageScaled::getSharedImage() const
+{
+	return body;
+}
+
+void ImageScaled::scaleInteger(int factor)
+{
+	assert(0);
+}
+
+void ImageScaled::scaleTo(const Point & size)
+{
+	if (body)
+		body = body->scaleTo(size, nullptr); // FIXME: adjust for scaling
+}
+
+void ImageScaled::exportBitmap(const boost::filesystem::path &path) const
+{
+	source->exportBitmap(path);
+}
+
+bool ImageScaled::isTransparent(const Point &coords) const
+{
+	return source->isTransparent(coords);
+}
+
+Point ImageScaled::dimensions() const
+{
+	return source->dimensions();
+}
+
+void ImageScaled::setAlpha(uint8_t value)
+{
+	alphaValue = value;
+}
+
+void ImageScaled::setBlitMode(EImageBlitMode mode)
+{
+	blitMode = mode;
+}
+
+void ImageScaled::draw(SDL_Surface *where, const Point &pos, const Rect *src) const
+{
+	if (shadow)
+		shadow->draw(where, nullptr, pos, src, Colors::WHITE_TRUE, alphaValue, blitMode);
+	if (body)
+		body->draw(where, nullptr, pos, src, Colors::WHITE_TRUE, alphaValue, blitMode);
+	if (overlay)
+		overlay->draw(where, nullptr, pos, src, colorMultiplier, colorMultiplier.a * alphaValue / 255, blitMode);
+}
+
+void ImageScaled::setOverlayColor(const ColorRGBA & color)
+{
+	colorMultiplier = color;
+}
+
+void ImageScaled::playerColored(PlayerColor player)
+{
+	playerColor = player;
+	if (body)
+		setBodyEnabled(true); // regenerate
+}
+
+void ImageScaled::shiftPalette(uint32_t firstColorID, uint32_t colorsToMove, uint32_t distanceToMove)
+{
+	// TODO: implement
+}
+
+void ImageScaled::adjustPalette(const ColorFilter &shifter, uint32_t colorsToSkipMask)
+{
+	// TODO: implement
+}
+
+void ImageScaled::setShadowEnabled(bool on)
+{
+	if (on)
+	{
+		locator.layerBody = false;
+		locator.layerShadow = true;
+		locator.layerOverlay = false;
+		locator.playerColored = PlayerColor::CANNOT_DETERMINE;
+		shadow = GH.renderHandler().loadImage(locator, blitMode)->getSharedImage();
+	}
+	else
+		shadow = nullptr;
+}
+
+void ImageScaled::setBodyEnabled(bool on)
+{
+	if (on)
+	{
+		locator.layerBody = true;
+		locator.layerShadow = false;
+		locator.layerOverlay = false;
+		locator.playerColored = playerColor;
+		body = GH.renderHandler().loadImage(locator, blitMode)->getSharedImage();
+	}
+	else
+		body = nullptr;
+}
+
+
+void ImageScaled::setOverlayEnabled(bool on)
+{
+	if (on)
+	{
+		locator.layerBody = false;
+		locator.layerShadow = false;
+		locator.layerOverlay = true;
+		locator.playerColored = PlayerColor::CANNOT_DETERMINE;
+		overlay = GH.renderHandler().loadImage(locator, blitMode)->getSharedImage();
+	}
+	else
+		overlay = nullptr;
+}

+ 67 - 0
client/renderSDL/ImageScaled.h

@@ -0,0 +1,67 @@
+/*
+ * ImageScaled.h, part of VCMI engine
+ *
+ * Authors: listed in file AUTHORS in main folder
+ *
+ * License: GNU General Public License v2.0 or later
+ * Full text of license available in license.txt file, in main folder
+ *
+ */
+#pragma once
+
+#include "../render/IImage.h"
+#include "../render/IRenderHandler.h"
+
+#include "../../lib/Color.h"
+#include "../../lib/constants/EntityIdentifiers.h"
+
+struct SDL_Palette;
+
+class SDLImageShared;
+
+// Upscaled image with several mechanisms to emulate H3 palette effects
+class ImageScaled final : public IImage
+{
+private:
+
+	/// Original unscaled image
+	std::shared_ptr<ISharedImage> source;
+
+	/// Upscaled shadow of our image, may be null
+	std::shared_ptr<ISharedImage> shadow;
+
+	/// Upscaled main part of our image, may be null
+	std::shared_ptr<ISharedImage> body;
+
+	/// Upscaled overlay (player color, selection highlight) of our image, may be null
+	std::shared_ptr<ISharedImage> overlay;
+
+	ImageLocator locator;
+
+	ColorRGBA colorMultiplier;
+	PlayerColor playerColor = PlayerColor::CANNOT_DETERMINE;
+
+	uint8_t alphaValue;
+	EImageBlitMode blitMode;
+
+public:
+	ImageScaled(const ImageLocator & locator, const std::shared_ptr<ISharedImage> & source, EImageBlitMode mode);
+
+	void scaleInteger(int factor) override;
+	void scaleTo(const Point & size) override;
+	void exportBitmap(const boost::filesystem::path & path) const override;
+	bool isTransparent(const Point & coords) const override;
+	Point dimensions() const override;
+	void setAlpha(uint8_t value) override;
+	void setBlitMode(EImageBlitMode mode) override;
+	void draw(SDL_Surface * where, const Point & pos, const Rect * src) const override;
+	void setOverlayColor(const ColorRGBA & color) override;
+	void playerColored(PlayerColor player) override;
+	void shiftPalette(uint32_t firstColorID, uint32_t colorsToMove, uint32_t distanceToMove) override;
+	void adjustPalette(const ColorFilter & shifter, uint32_t colorsToSkipMask) override;
+
+	void setShadowEnabled(bool on) override;
+	void setBodyEnabled(bool on) override;
+	void setOverlayEnabled(bool on) override;
+	std::shared_ptr<ISharedImage> getSharedImage() const override;
+};

+ 94 - 31
client/renderSDL/RenderHandler.cpp

@@ -11,9 +11,15 @@
 #include "RenderHandler.h"
 
 #include "SDLImage.h"
+#include "ImageScaled.h"
+
+#include "../gui/CGuiHandler.h"
 
 #include "../render/CAnimation.h"
 #include "../render/CDefFile.h"
+#include "../render/Colors.h"
+#include "../render/ColorFilter.h"
+#include "../render/IScreenHandler.h"
 
 #include "../../lib/json/JsonUtils.h"
 #include "../../lib/filesystem/Filesystem.h"
@@ -125,56 +131,87 @@ RenderHandler::AnimationLayoutMap & RenderHandler::getAnimationLayout(const Anim
 	return animationLayouts[actualPath];
 }
 
-std::shared_ptr<IConstImage> RenderHandler::loadImageFromSingleFile(const ImagePath & path)
+int RenderHandler::getScalingFactor() const
 {
-	auto result = std::make_shared<SDLImageConst>(path);
-	imageFiles[ImageLocator(path)] = result;
-	return result;
+	return GH.screenHandler().getScalingFactor();
+}
+
+std::shared_ptr<IImage> RenderHandler::createImageReference(const ImageLocator & locator, std::shared_ptr<ISharedImage> input, EImageBlitMode mode)
+{
+	if (getScalingFactor() == 1 || locator.scalingFactor != 1 || locator.empty())
+		return input->createImageReference(mode);
+	else
+		return std::make_shared<ImageScaled>(locator, input, mode);
 }
 
-std::shared_ptr<IConstImage> RenderHandler::loadImageFromAnimationFileUncached(const AnimationPath & path, int frame, int group)
+ImageLocator RenderHandler::getLocatorForAnimationFrame(const AnimationPath & path, int frame, int group)
 {
 	const auto & layout = getAnimationLayout(path);
 	if (!layout.count(group))
-		return loadImageFromSingleFile(ImagePath::builtin("DEFAULT"));
+		return ImageLocator(ImagePath::builtin("DEFAULT"));
 
 	if (frame >= layout.at(group).size())
-		return loadImageFromSingleFile(ImagePath::builtin("DEFAULT"));
+		return ImageLocator(ImagePath::builtin("DEFAULT"));
 
 	const auto & locator = layout.at(group).at(frame);
+	if (locator.image || locator.defFile)
+		return locator;
+
+	return ImageLocator(path, frame, group);
+}
+
+std::shared_ptr<ISharedImage> RenderHandler::loadImageImpl(const ImageLocator & locator)
+{
+	auto it = imageFiles.find(locator);
+	if (it != imageFiles.end())
+		return it->second;
+
+	// TODO: order should be different:
+	// 1) try to find correctly scaled image
+	// 2) if fails -> try to find correctly transformed
+	// 3) if also fails -> try to find image from correct file
+	// 4) load missing part of the sequence
+	// TODO: check whether (load -> transform -> scale) or (load -> scale -> transform) order should be used for proper loading of pre-scaled data
+	auto imageFromFile = loadImageFromFile(locator.copyFile());
+	auto transformedImage = transformImage(locator.copyFileTransform(), imageFromFile);
+	auto scaledImage = scaleImage(locator.copyFileTransformScale(), transformedImage);
+
+	return scaledImage;
+}
+
+std::shared_ptr<ISharedImage> RenderHandler::loadImageFromFileUncached(const ImageLocator & locator)
+{
 	if (locator.image)
 	{
-		return loadImageImpl(locator);
+		// TODO: create EmptySharedImage class that will be instantiated if image does not exists or fails to load
+		return std::make_shared<SDLImageShared>(*locator.image);
 	}
-	else
+
+	if (locator.defFile)
 	{
-		auto defFile = getAnimationFile(path);
-		return std::make_shared<SDLImageConst>(defFile.get(), frame, group);
+		auto defFile = getAnimationFile(*locator.defFile);
+		return std::make_shared<SDLImageShared>(defFile.get(), locator.defFrame, locator.defGroup);
 	}
+
+	throw std::runtime_error("Invalid image locator received!");
 }
 
-std::shared_ptr<IConstImage> RenderHandler::loadImageFromAnimationFile(const AnimationPath & path, int frame, int group)
+std::shared_ptr<ISharedImage> RenderHandler::loadImageFromFile(const ImageLocator & locator)
 {
-	auto result = loadImageFromAnimationFileUncached(path, frame, group);
-	imageFiles[ImageLocator(path, frame, group)] = result;
+	if (imageFiles.count(locator))
+		return imageFiles.at(locator);
+
+	auto result = loadImageFromFileUncached(locator);
+	imageFiles[locator] = result;
 	return result;
 }
 
-std::shared_ptr<IConstImage> RenderHandler::loadImageImpl(const ImageLocator & locator)
+std::shared_ptr<ISharedImage> RenderHandler::transformImage(const ImageLocator & locator, std::shared_ptr<ISharedImage> image)
 {
-	auto it = imageFiles.find(locator);
-	if (it != imageFiles.end())
-		return it->second;
+	if (imageFiles.count(locator))
+		return imageFiles.at(locator);
 
-	std::shared_ptr<IConstImage> result;
-
-	if (locator.image)
-		result = loadImageFromSingleFile(*locator.image);
-	else if (locator.defFile)
-		result = loadImageFromAnimationFile(*locator.defFile, locator.defFrame, locator.defGroup);
-
-	if (!result)
-		result = loadImageFromSingleFile(ImagePath::builtin("DEFAULT"));
+	auto result = image;
 
 	if (locator.verticalFlip)
 		result = result->verticalFlip();
@@ -186,24 +223,50 @@ std::shared_ptr<IConstImage> RenderHandler::loadImageImpl(const ImageLocator & l
 	return result;
 }
 
+std::shared_ptr<ISharedImage> RenderHandler::scaleImage(const ImageLocator & locator, std::shared_ptr<ISharedImage> image)
+{
+	if (imageFiles.count(locator))
+		return imageFiles.at(locator);
+
+	auto handle = image->createImageReference(EImageBlitMode::OPAQUE);
+
+	assert(locator.scalingFactor != 1); // should be filtered-out before
+
+
+
+	handle->setOverlayEnabled(locator.layerOverlay);
+	handle->setBodyEnabled(locator.layerBody);
+	handle->setShadowEnabled(locator.layerShadow);
+	if (locator.layerBody && locator.playerColored != PlayerColor::CANNOT_DETERMINE)
+		handle->playerColored(locator.playerColored);
+
+	handle->scaleInteger(locator.scalingFactor);
+
+	// TODO: try to optimize image size (possibly even before scaling?) - trim image borders if they are completely transparent
+	auto result = handle->getSharedImage();
+	imageFiles[locator] = result;
+	return result;
+}
+
 std::shared_ptr<IImage> RenderHandler::loadImage(const ImageLocator & locator, EImageBlitMode mode)
 {
-	return loadImageImpl(locator)->createImageReference(mode);
+	return createImageReference(locator, loadImageImpl(locator), mode);
 }
 
 std::shared_ptr<IImage> RenderHandler::loadImage(const AnimationPath & path, int frame, int group, EImageBlitMode mode)
 {
-	return loadImageFromAnimationFile(path, frame, group)->createImageReference(mode);
+	auto locator = getLocatorForAnimationFrame(path, frame, group);
+	return loadImage(locator, mode);
 }
 
 std::shared_ptr<IImage> RenderHandler::loadImage(const ImagePath & path, EImageBlitMode mode)
 {
-	return loadImageImpl(ImageLocator(path))->createImageReference(mode);
+	return loadImage(ImageLocator(path), mode);
 }
 
 std::shared_ptr<IImage> RenderHandler::createImage(SDL_Surface * source)
 {
-	return std::make_shared<SDLImageConst>(source)->createImageReference(EImageBlitMode::ALPHA);
+	return createImageReference(ImageLocator(), std::make_shared<SDLImageShared>(source), EImageBlitMode::ALPHA);
 }
 
 std::shared_ptr<CAnimation> RenderHandler::loadAnimation(const AnimationPath & path, EImageBlitMode mode)

+ 16 - 6
client/renderSDL/RenderHandler.h

@@ -16,7 +16,8 @@ class EntityService;
 VCMI_LIB_NAMESPACE_END
 
 class CDefFile;
-class IConstImage;
+class SDLImageShared;
+class ISharedImage;
 
 class RenderHandler : public IRenderHandler
 {
@@ -24,7 +25,7 @@ class RenderHandler : public IRenderHandler
 
 	std::map<AnimationPath, std::shared_ptr<CDefFile>> animationFiles;
 	std::map<AnimationPath, AnimationLayoutMap> animationLayouts;
-	std::map<ImageLocator, std::shared_ptr<IConstImage>> imageFiles;
+	std::map<ImageLocator, std::shared_ptr<ISharedImage>> imageFiles;
 
 	std::shared_ptr<CDefFile> getAnimationFile(const AnimationPath & path);
 	AnimationLayoutMap & getAnimationLayout(const AnimationPath & path);
@@ -33,10 +34,19 @@ class RenderHandler : public IRenderHandler
 	void addImageListEntry(size_t index, size_t group, const std::string & listName, const std::string & imageName);
 	void addImageListEntries(const EntityService * service);
 
-	std::shared_ptr<IConstImage> loadImageFromSingleFile(const ImagePath & path);
-	std::shared_ptr<IConstImage> loadImageFromAnimationFileUncached(const AnimationPath & path, int frame, int group);
-	std::shared_ptr<IConstImage> loadImageFromAnimationFile(const AnimationPath & path, int frame, int group);
-	std::shared_ptr<IConstImage> loadImageImpl(const ImageLocator & config);
+	std::shared_ptr<ISharedImage> loadImageImpl(const ImageLocator & config);
+
+	std::shared_ptr<ISharedImage> loadImageFromFileUncached(const ImageLocator & locator);
+	std::shared_ptr<ISharedImage> loadImageFromFile(const ImageLocator & locator);
+
+	std::shared_ptr<ISharedImage> transformImage(const ImageLocator & locator, std::shared_ptr<ISharedImage> image);
+	std::shared_ptr<ISharedImage> scaleImage(const ImageLocator & locator, std::shared_ptr<ISharedImage> image);
+
+	ImageLocator getLocatorForAnimationFrame(const AnimationPath & path, int frame, int group);
+
+	int getScalingFactor() const;
+
+	std::shared_ptr<IImage> createImageReference(const ImageLocator & locator, std::shared_ptr<ISharedImage> input, EImageBlitMode mode);
 public:
 
 	// IRenderHandler implementation

+ 313 - 47
client/renderSDL/SDLImage.cpp

@@ -14,14 +14,70 @@
 #include "SDL_Extensions.h"
 
 #include "../render/ColorFilter.h"
+#include "../render/Colors.h"
 #include "../render/CBitmapHandler.h"
 #include "../render/CDefFile.h"
 #include "../render/Graphics.h"
+#include "../xBRZ/xbrz.h"
 
+#include <tbb/parallel_for.h>
 #include <SDL_surface.h>
 
 class SDLImageLoader;
 
+//First 8 colors in def palette used for transparency
+static constexpr std::array<SDL_Color, 8> sourcePalette = {{
+	{0,   255, 255, SDL_ALPHA_OPAQUE},
+	{255, 150, 255, SDL_ALPHA_OPAQUE},
+	{255, 100, 255, SDL_ALPHA_OPAQUE},
+	{255, 50,  255, SDL_ALPHA_OPAQUE},
+	{255, 0,   255, SDL_ALPHA_OPAQUE},
+	{255, 255, 0,   SDL_ALPHA_OPAQUE},
+	{180, 0,   255, SDL_ALPHA_OPAQUE},
+	{0,   255, 0,   SDL_ALPHA_OPAQUE}
+}};
+
+static constexpr std::array<ColorRGBA, 8> targetPalette = {{
+	{0, 0, 0, 0  }, // 0 - transparency                  ( used in most images )
+	{0, 0, 0, 64 }, // 1 - shadow border                 ( used in battle, adventure map def's )
+	{0, 0, 0, 64 }, // 2 - shadow border                 ( used in fog-of-war def's )
+	{0, 0, 0, 128}, // 3 - shadow body                   ( used in fog-of-war def's )
+	{0, 0, 0, 128}, // 4 - shadow body                   ( used in battle, adventure map def's )
+	{0, 0, 0, 0  }, // 5 - selection / owner flag        ( used in battle, adventure map def's )
+	{0, 0, 0, 128}, // 6 - shadow body   below selection ( used in battle def's )
+	{0, 0, 0, 64 }  // 7 - shadow border below selection ( used in battle def's )
+}};
+
+static ui8 mixChannels(ui8 c1, ui8 c2, ui8 a1, ui8 a2)
+{
+	return c1*a1 / 256 + c2*a2*(255 - a1) / 256 / 256;
+}
+
+static ColorRGBA addColors(const ColorRGBA & base, const ColorRGBA & over)
+{
+	return ColorRGBA(
+		mixChannels(over.r, base.r, over.a, base.a),
+		mixChannels(over.g, base.g, over.a, base.a),
+		mixChannels(over.b, base.b, over.a, base.a),
+		static_cast<ui8>(over.a + base.a * (255 - over.a) / 256)
+		);
+}
+
+static bool colorsSimilar (const SDL_Color & lhs, const SDL_Color & rhs)
+{
+	// it seems that H3 does not requires exact match to replace colors -> (255, 103, 255) gets interpreted as shadow
+	// exact logic is not clear and requires extensive testing with image editing
+	// potential reason is that H3 uses 16-bit color format (565 RGB bits), meaning that 3 least significant bits are lost in red and blue component
+	static const int threshold = 8;
+
+	int diffR = static_cast<int>(lhs.r) - rhs.r;
+	int diffG = static_cast<int>(lhs.g) - rhs.g;
+	int diffB = static_cast<int>(lhs.b) - rhs.b;
+	int diffA = static_cast<int>(lhs.a) - rhs.a;
+
+	return std::abs(diffR) < threshold && std::abs(diffG) < threshold && std::abs(diffB) < threshold && std::abs(diffA) < threshold;
+}
+
 int IImage::width() const
 {
 	return dimensions().x;
@@ -32,7 +88,7 @@ int IImage::height() const
 	return dimensions().y;
 }
 
-SDLImageConst::SDLImageConst(CDefFile * data, size_t frame, size_t group)
+SDLImageShared::SDLImageShared(const CDefFile * data, size_t frame, size_t group)
 	: surf(nullptr),
 	margins(0, 0),
 	fullSize(0, 0),
@@ -44,7 +100,7 @@ SDLImageConst::SDLImageConst(CDefFile * data, size_t frame, size_t group)
 	savePalette();
 }
 
-SDLImageConst::SDLImageConst(SDL_Surface * from)
+SDLImageShared::SDLImageShared(SDL_Surface * from)
 	: surf(nullptr),
 	margins(0, 0),
 	fullSize(0, 0),
@@ -61,7 +117,7 @@ SDLImageConst::SDLImageConst(SDL_Surface * from)
 	fullSize.y = surf->h;
 }
 
-SDLImageConst::SDLImageConst(const ImagePath & filename)
+SDLImageShared::SDLImageShared(const ImagePath & filename)
 	: surf(nullptr),
 	margins(0, 0),
 	fullSize(0, 0),
@@ -83,7 +139,7 @@ SDLImageConst::SDLImageConst(const ImagePath & filename)
 }
 
 
-void SDLImageConst::draw(SDL_Surface * where, SDL_Palette * palette, const Point & dest, const Rect * src, uint8_t alpha, EImageBlitMode mode) const
+void SDLImageShared::draw(SDL_Surface * where, SDL_Palette * palette, const Point & dest, const Rect * src, const ColorRGBA & colorMultiplier, uint8_t alpha, EImageBlitMode mode) const
 {
 	if (!surf)
 		return;
@@ -109,6 +165,7 @@ void SDLImageConst::draw(SDL_Surface * where, SDL_Palette * palette, const Point
 
 	destShift += dest;
 
+	SDL_SetSurfaceColorMod(surf, colorMultiplier.r, colorMultiplier.g, colorMultiplier.b);
 	SDL_SetSurfaceAlphaMod(surf, alpha);
 
 	if (alpha != SDL_ALPHA_OPAQUE || (mode != EImageBlitMode::OPAQUE && surf->format->Amask != 0))
@@ -127,21 +184,143 @@ void SDLImageConst::draw(SDL_Surface * where, SDL_Palette * palette, const Point
 	{
 		CSDL_Ext::blitSurface(surf, sourceRect, where, destShift);
 	}
+
+	if (surf->format->palette)
+		SDL_SetSurfacePalette(surf, originalPalette);
 }
 
-const SDL_Palette * SDLImageConst::getPalette() const
+void SDLImageShared::optimizeSurface()
 {
-	if (originalPalette == nullptr)
-		throw std::runtime_error("Palette not found!");
+	if (!surf)
+		return;
+
+	int left = surf->w;
+	int top = surf->h;
+	int right = 0;
+	int bottom = 0;
+
+	// locate fully-transparent area around image
+	// H3 hadles this on format level, but mods or images scaled in runtime do not
+	if (surf->format->palette)
+	{
+		for (int y = 0; y < surf->h; ++y)
+		{
+			const uint8_t * row = static_cast<uint8_t *>(surf->pixels) + y * surf->pitch;
+			for (int x = 0; x < surf->w; ++x)
+			{
+				if (row[x] != 0)
+				{
+					// opaque or can be opaque (e.g. disabled shadow)
+					top = std::min(top, y);
+					left = std::min(left, x);
+					right = std::max(right, x);
+					bottom = std::max(bottom, y);
+				}
+			}
+		}
+	}
+	else
+	{
+		for (int y = 0; y < surf->h; ++y)
+		{
+			for (int x = 0; x < surf->w; ++x)
+			{
+				ColorRGBA color;
+				SDL_GetRGBA(CSDL_Ext::getPixel(surf, x, y), surf->format, &color.r, &color.g, &color.b, &color.a);
+
+				if (color.a != SDL_ALPHA_TRANSPARENT)
+				{
+					 // opaque
+					top = std::min(top, y);
+					left = std::min(left, x);
+					right = std::max(right, x);
+					bottom = std::max(bottom, y);
+				}
+			}
+		}
+	}
+
+	if (left == surf->w)
+	{
+		// empty image - simply delete it
+		SDL_FreeSurface(surf);
+		surf = nullptr;
+		return;
+	}
+
+	if (left != 0 || top != 0 || right != surf->w - 1 || bottom != surf->h - 1)
+	{
+		// non-zero border found
+		Rect newDimensions(left, top, right - left + 1, bottom - top + 1);
+		SDL_Rect rectSDL = CSDL_Ext::toSDL(newDimensions);
+		auto newSurface = CSDL_Ext::newSurface(newDimensions.dimensions(), surf);
+		SDL_SetSurfaceBlendMode(surf, SDL_BLENDMODE_NONE);
+		SDL_BlitSurface(surf, &rectSDL, newSurface, nullptr);
 
-	return originalPalette;
+		SDL_FreeSurface(surf);
+		surf = newSurface;
+
+		margins.x += left;
+		margins.y += top;
+	}
 }
 
-std::shared_ptr<SDLImageConst> SDLImageConst::scaleFast(const Point & size) const
+std::shared_ptr<ISharedImage> SDLImageShared::scaleInteger(int factor, SDL_Palette * palette) const
+{
+	if (factor <= 0)
+		throw std::runtime_error("Unable to scale by integer value of " + std::to_string(factor));
+
+	if (palette && surf->format->palette)
+		SDL_SetSurfacePalette(surf, palette);
+
+	/// Convert current surface to ARGB format suitable for xBRZ
+	/// TODO: skip its creation if this is format matches current surface (even if unlikely)
+	SDL_Surface * intermediate = SDL_ConvertSurfaceFormat(surf, SDL_PIXELFORMAT_ARGB8888, 0);
+	SDL_Surface * scaled = CSDL_Ext::newSurface(Point(surf->w * factor, surf->h * factor), intermediate);
+
+	assert(intermediate->pitch == intermediate->w * 4);
+	assert(scaled->pitch == scaled->w * 4);
+
+	const uint32_t * srcPixels = static_cast<const uint32_t*>(intermediate->pixels);
+	uint32_t * dstPixels = static_cast<uint32_t*>(scaled->pixels);
+
+	// avoid excessive granulation - xBRZ prefers at least 8-16 lines per task
+	// TODO: compare performance and size of images, recheck values for potentially better parameters
+	const int granulation = std::clamp(surf->h / 64 * 8, 8, 64);
+
+	tbb::parallel_for(tbb::blocked_range<size_t>(0, intermediate->h, granulation), [factor, srcPixels, dstPixels, intermediate](const tbb::blocked_range<size_t> & r)
+	{
+		xbrz::scale(factor, srcPixels, dstPixels, intermediate->w, intermediate->h, xbrz::ColorFormat::ARGB, {}, r.begin(), r.end());
+	});
+
+	SDL_FreeSurface(intermediate);
+
+	auto ret = std::make_shared<SDLImageShared>(scaled);
+
+	ret->fullSize.x = fullSize.x * factor;
+	ret->fullSize.y = fullSize.y * factor;
+
+	ret->margins.x = margins.x * factor;
+	ret->margins.y = margins.y * factor;
+	ret->optimizeSurface();
+
+	// erase our own reference
+	SDL_FreeSurface(scaled);
+
+	if (surf->format->palette)
+		SDL_SetSurfacePalette(surf, originalPalette);
+
+	return ret;
+}
+
+std::shared_ptr<ISharedImage> SDLImageShared::scaleTo(const Point & size, SDL_Palette * palette) const
 {
 	float scaleX = float(size.x) / dimensions().x;
 	float scaleY = float(size.y) / dimensions().y;
 
+	if (palette && surf->format->palette)
+		SDL_SetSurfacePalette(surf, palette);
+
 	auto scaled = CSDL_Ext::scaleSurface(surf, (int)(surf->w * scaleX), (int)(surf->h * scaleY));
 
 	if (scaled->format && scaled->format->palette) // fix color keying, because SDL loses it at this point
@@ -151,7 +330,7 @@ std::shared_ptr<SDLImageConst> SDLImageConst::scaleFast(const Point & size) cons
 	else
 		CSDL_Ext::setDefaultColorKey(scaled);//just in case
 
-	auto ret = std::make_shared<SDLImageConst>(scaled);
+	auto ret = std::make_shared<SDLImageShared>(scaled);
 
 	ret->fullSize.x = (int) round((float)fullSize.x * scaleX);
 	ret->fullSize.y = (int) round((float)fullSize.y * scaleY);
@@ -162,10 +341,13 @@ std::shared_ptr<SDLImageConst> SDLImageConst::scaleFast(const Point & size) cons
 	// erase our own reference
 	SDL_FreeSurface(scaled);
 
+	if (surf->format->palette)
+		SDL_SetSurfacePalette(surf, originalPalette);
+
 	return ret;
 }
 
-void SDLImageConst::exportBitmap(const boost::filesystem::path& path) const
+void SDLImageShared::exportBitmap(const boost::filesystem::path& path) const
 {
 	SDL_SaveBMP(surf, path.string().c_str());
 }
@@ -175,13 +357,7 @@ void SDLImageIndexed::playerColored(PlayerColor player)
 	graphics->setPlayerPalette(currentPalette, player);
 }
 
-void SDLImageIndexed::setFlagColor(PlayerColor player)
-{
-	if(player.isValidPlayer() || player==PlayerColor::NEUTRAL)
-		graphics->setPlayerFlagColor(currentPalette, player);
-}
-
-bool SDLImageConst::isTransparent(const Point & coords) const
+bool SDLImageShared::isTransparent(const Point & coords) const
 {
 	if (surf)
 		return CSDL_Ext::isTransparent(surf, coords.x, coords.y);
@@ -189,23 +365,23 @@ bool SDLImageConst::isTransparent(const Point & coords) const
 		return true;
 }
 
-Point SDLImageConst::dimensions() const
+Point SDLImageShared::dimensions() const
 {
 	return fullSize;
 }
 
-std::shared_ptr<IImage> SDLImageConst::createImageReference(EImageBlitMode mode)
+std::shared_ptr<IImage> SDLImageShared::createImageReference(EImageBlitMode mode)
 {
 	if (surf && surf->format->palette)
-		return std::make_shared<SDLImageIndexed>(shared_from_this(), mode);
+		return std::make_shared<SDLImageIndexed>(shared_from_this(), originalPalette, mode);
 	else
 		return std::make_shared<SDLImageRGB>(shared_from_this(), mode);
 }
 
-std::shared_ptr<IConstImage> SDLImageConst::horizontalFlip() const
+std::shared_ptr<ISharedImage> SDLImageShared::horizontalFlip() const
 {
 	SDL_Surface * flipped = CSDL_Ext::horizontalFlip(surf);
-	auto ret = std::make_shared<SDLImageConst>(flipped);
+	auto ret = std::make_shared<SDLImageShared>(flipped);
 	ret->fullSize = fullSize;
 	ret->margins.x = margins.x;
 	ret->margins.y = fullSize.y - surf->h - margins.y;
@@ -214,10 +390,10 @@ std::shared_ptr<IConstImage> SDLImageConst::horizontalFlip() const
 	return ret;
 }
 
-std::shared_ptr<IConstImage> SDLImageConst::verticalFlip() const
+std::shared_ptr<ISharedImage> SDLImageShared::verticalFlip() const
 {
 	SDL_Surface * flipped = CSDL_Ext::verticalFlip(surf);
-	auto ret = std::make_shared<SDLImageConst>(flipped);
+	auto ret = std::make_shared<SDLImageShared>(flipped);
 	ret->fullSize = fullSize;
 	ret->margins.x = fullSize.x - surf->w - margins.x;
 	ret->margins.y = margins.y;
@@ -227,7 +403,7 @@ std::shared_ptr<IConstImage> SDLImageConst::verticalFlip() const
 }
 
 // Keep the original palette, in order to do color switching operation
-void SDLImageConst::savePalette()
+void SDLImageShared::savePalette()
 {
 	// For some images that don't have palette, skip this
 	if(surf->format->palette == nullptr)
@@ -241,8 +417,6 @@ void SDLImageConst::savePalette()
 
 void SDLImageIndexed::shiftPalette(uint32_t firstColorID, uint32_t colorsToMove, uint32_t distanceToMove)
 {
-	const SDL_Palette * originalPalette = image->getPalette();
-
 	std::vector<SDL_Color> shifterColors(colorsToMove);
 
 	for(uint32_t i=0; i<colorsToMove; ++i)
@@ -253,11 +427,12 @@ void SDLImageIndexed::shiftPalette(uint32_t firstColorID, uint32_t colorsToMove,
 
 void SDLImageIndexed::adjustPalette(const ColorFilter & shifter, uint32_t colorsToSkipMask)
 {
-	const SDL_Palette * originalPalette = image->getPalette();
-
 	// Note: here we skip first colors in the palette that are predefined in H3 images
 	for(int i = 0; i < currentPalette->ncolors; i++)
 	{
+		if (i < std::size(sourcePalette) && colorsSimilar(sourcePalette[i], originalPalette->colors[i]))
+			continue;
+
 		if(i < std::numeric_limits<uint32_t>::digits && ((colorsToSkipMask >> i) & 1) == 1)
 			continue;
 
@@ -265,13 +440,17 @@ void SDLImageIndexed::adjustPalette(const ColorFilter & shifter, uint32_t colors
 	}
 }
 
-SDLImageIndexed::SDLImageIndexed(const std::shared_ptr<SDLImageConst> & image, EImageBlitMode mode)
+SDLImageIndexed::SDLImageIndexed(const std::shared_ptr<ISharedImage> & image, SDL_Palette * originalPalette, EImageBlitMode mode)
 	:SDLImageBase::SDLImageBase(image, mode)
+	,originalPalette(originalPalette)
 {
-	auto originalPalette = image->getPalette();
 
 	currentPalette = SDL_AllocPalette(originalPalette->ncolors);
 	SDL_SetPaletteColors(currentPalette, originalPalette->colors, 0, originalPalette->ncolors);
+
+	setOverlayColor(Colors::TRANSPARENCY);
+	if (mode == EImageBlitMode::ALPHA)
+		setShadowTransparency(1.0);
 }
 
 SDLImageIndexed::~SDLImageIndexed()
@@ -279,42 +458,117 @@ SDLImageIndexed::~SDLImageIndexed()
 	SDL_FreePalette(currentPalette);
 }
 
-void SDLImageIndexed::setSpecialPalette(const IImage::SpecialPalette & specialPalette, uint32_t colorsToSkipMask)
+void SDLImageIndexed::setShadowTransparency(float factor)
 {
-	size_t last = std::min<size_t>(specialPalette.size(), currentPalette->ncolors);
+	ColorRGBA shadow50(0, 0, 0, 128 * factor);
+	ColorRGBA shadow25(0, 0, 0,  64 * factor);
+
+	std::array<SDL_Color, 5> colorsSDL = {
+		originalPalette->colors[0],
+		originalPalette->colors[1],
+		originalPalette->colors[2],
+		originalPalette->colors[3],
+		originalPalette->colors[4]
+	};
+
+	// seems to be used unconditionally
+	colorsSDL[1] = CSDL_Ext::toSDL(shadow25);
+	colorsSDL[4] = CSDL_Ext::toSDL(shadow50);
+
+	// seems to be used only if color matches
+	if (colorsSimilar(originalPalette->colors[0], sourcePalette[0]))
+		colorsSDL[0] = CSDL_Ext::toSDL(Colors::TRANSPARENCY);
+
+	if (colorsSimilar(originalPalette->colors[2], sourcePalette[2]))
+		colorsSDL[2] = CSDL_Ext::toSDL(shadow25);
+
+	if (colorsSimilar(originalPalette->colors[3], sourcePalette[3]))
+		colorsSDL[3] = CSDL_Ext::toSDL(shadow50);
 
-	for (size_t i = 0; i < last; ++i)
+	SDL_SetPaletteColors(currentPalette, colorsSDL.data(), 0, colorsSDL.size());
+}
+
+void SDLImageIndexed::setOverlayColor(const ColorRGBA & color)
+{
+	for (int i : {5,6,7})
 	{
-		if(i < std::numeric_limits<uint32_t>::digits && ((colorsToSkipMask >> i) & 1) == 1)
-			currentPalette->colors[i] = CSDL_Ext::toSDL(specialPalette[i]);
+		if (colorsSimilar(originalPalette->colors[i], sourcePalette[i]))
+			currentPalette->colors[i] = CSDL_Ext::toSDL(addColors(targetPalette[i], color));
 	}
 }
 
-SDLImageConst::~SDLImageConst()
+void SDLImageIndexed::setShadowEnabled(bool on)
+{
+	if (on)
+		setShadowTransparency(1.0);
+
+	shadowEnabled = on;
+}
+
+void SDLImageIndexed::setBodyEnabled(bool on)
+{
+	if (on)
+		adjustPalette(ColorFilter::genEmptyShifter(), 0);
+	else
+		adjustPalette(ColorFilter::genAlphaShifter(0), 0);
+
+	bodyEnabled = on;
+}
+
+void SDLImageIndexed::setOverlayEnabled(bool on)
+{
+	if (on)
+		setOverlayColor(Colors::WHITE_TRUE);
+	else
+		setOverlayColor(Colors::TRANSPARENCY);
+	overlayEnabled = on;
+}
+
+SDLImageShared::~SDLImageShared()
 {
 	SDL_FreeSurface(surf);
 	SDL_FreePalette(originalPalette);
 }
 
-SDLImageBase::SDLImageBase(const std::shared_ptr<SDLImageConst> & image, EImageBlitMode mode)
+SDLImageBase::SDLImageBase(const std::shared_ptr<ISharedImage> & image, EImageBlitMode mode)
 	:image(image)
 	, alphaValue(SDL_ALPHA_OPAQUE)
 	, blitMode(mode)
 {}
 
+std::shared_ptr<ISharedImage> SDLImageBase::getSharedImage() const
+{
+	return image;
+}
+
 void SDLImageRGB::draw(SDL_Surface * where, const Point & pos, const Rect * src) const
 {
-	image->draw(where, nullptr, pos, src, alphaValue, blitMode);
+	image->draw(where, nullptr, pos, src, Colors::WHITE_TRUE, alphaValue, blitMode);
 }
 
 void SDLImageIndexed::draw(SDL_Surface * where, const Point & pos, const Rect * src) const
 {
-	image->draw(where, currentPalette, pos, src, alphaValue, blitMode);
+	image->draw(where, currentPalette, pos, src, Colors::WHITE_TRUE, alphaValue, blitMode);
 }
 
-void SDLImageBase::scaleFast(const Point & size)
+void SDLImageIndexed::scaleTo(const Point & size)
 {
-	image = image->scaleFast(size);
+	image = image->scaleTo(size, currentPalette);
+}
+
+void SDLImageRGB::scaleTo(const Point & size)
+{
+	image = image->scaleTo(size, nullptr);
+}
+
+void SDLImageIndexed::scaleInteger(int factor)
+{
+	image = image->scaleInteger(factor, currentPalette);
+}
+
+void SDLImageRGB::scaleInteger(int factor)
+{
+	image = image->scaleInteger(factor, nullptr);
 }
 
 void SDLImageBase::exportBitmap(const boost::filesystem::path & path) const
@@ -342,13 +596,25 @@ void SDLImageBase::setBlitMode(EImageBlitMode mode)
 	blitMode = mode;
 }
 
-void SDLImageRGB::setSpecialPalette(const SpecialPalette & SpecialPalette, uint32_t colorsToSkipMask)
-{}
+void SDLImageRGB::setShadowEnabled(bool on)
+{
+	// Not supported. Theoretically we can try to extract all pixels of specific colors, but better to use 8-bit images or composite images
+}
 
-void SDLImageRGB::playerColored(PlayerColor player)
+void SDLImageRGB::setBodyEnabled(bool on)
+{
+	// Not supported. Theoretically we can try to extract all pixels of specific colors, but better to use 8-bit images or composite images
+}
+
+void SDLImageRGB::setOverlayEnabled(bool on)
+{
+	// Not supported. Theoretically we can try to extract all pixels of specific colors, but better to use 8-bit images or composite images
+}
+
+void SDLImageRGB::setOverlayColor(const ColorRGBA & color)
 {}
 
-void SDLImageRGB::setFlagColor(PlayerColor player)
+void SDLImageRGB::playerColored(PlayerColor player)
 {}
 
 void SDLImageRGB::shiftPalette(uint32_t firstColorID, uint32_t colorsToMove, uint32_t distanceToMove)

+ 36 - 19
client/renderSDL/SDLImage.h

@@ -24,7 +24,7 @@ struct SDL_Palette;
 /*
  * Wrapper around SDL_Surface
  */
-class SDLImageConst final : public IConstImage, public std::enable_shared_from_this<SDLImageConst>, boost::noncopyable
+class SDLImageShared final : public ISharedImage, public std::enable_shared_from_this<SDLImageShared>, boost::noncopyable
 {
 	//Surface without empty borders
 	SDL_Surface * surf;
@@ -38,26 +38,27 @@ class SDLImageConst final : public IConstImage, public std::enable_shared_from_t
 	// Keep the original palette, in order to do color switching operation
 	void savePalette();
 
+	void optimizeSurface();
+
 public:
 	//Load image from def file
-	SDLImageConst(CDefFile *data, size_t frame, size_t group=0);
+	SDLImageShared(const CDefFile *data, size_t frame, size_t group=0);
 	//Load from bitmap file
-	SDLImageConst(const ImagePath & filename);
+	SDLImageShared(const ImagePath & filename);
 	//Create using existing surface, extraRef will increase refcount on SDL_Surface
-	SDLImageConst(SDL_Surface * from);
-	~SDLImageConst();
+	SDLImageShared(SDL_Surface * from);
+	~SDLImageShared();
 
-	void draw(SDL_Surface * where, SDL_Palette * palette, const Point & dest, const Rect * src, uint8_t alpha, EImageBlitMode mode) const;
+	void draw(SDL_Surface * where, SDL_Palette * palette, const Point & dest, const Rect * src, const ColorRGBA & colorMultiplier, uint8_t alpha, EImageBlitMode mode) const override;
 
 	void exportBitmap(const boost::filesystem::path & path) const override;
 	Point dimensions() const override;
 	bool isTransparent(const Point & coords) const override;
 	std::shared_ptr<IImage> createImageReference(EImageBlitMode mode) override;
-	std::shared_ptr<IConstImage> horizontalFlip() const override;
-	std::shared_ptr<IConstImage> verticalFlip() const override;
-	std::shared_ptr<SDLImageConst> scaleFast(const Point & size) const;
-
-	const SDL_Palette * getPalette() const;
+	std::shared_ptr<ISharedImage> horizontalFlip() const override;
+	std::shared_ptr<ISharedImage> verticalFlip() const override;
+	std::shared_ptr<ISharedImage> scaleInteger(int factor, SDL_Palette * palette) const override;
+	std::shared_ptr<ISharedImage> scaleTo(const Point & size, SDL_Palette * palette) const override;
 
 	friend class SDLImageLoader;
 };
@@ -65,36 +66,47 @@ public:
 class SDLImageBase : public IImage, boost::noncopyable
 {
 protected:
-	std::shared_ptr<SDLImageConst> image;
+	std::shared_ptr<ISharedImage> image;
 
 	uint8_t alphaValue;
 	EImageBlitMode blitMode;
 
 public:
-	SDLImageBase(const std::shared_ptr<SDLImageConst> & image, EImageBlitMode mode);
+	SDLImageBase(const std::shared_ptr<ISharedImage> & image, EImageBlitMode mode);
 
-	void scaleFast(const Point & size) override;
 	void exportBitmap(const boost::filesystem::path & path) const override;
 	bool isTransparent(const Point & coords) const override;
 	Point dimensions() const override;
 	void setAlpha(uint8_t value) override;
 	void setBlitMode(EImageBlitMode mode) override;
+	std::shared_ptr<ISharedImage> getSharedImage() const override;
 };
 
 class SDLImageIndexed final : public SDLImageBase
 {
 	SDL_Palette * currentPalette = nullptr;
+	SDL_Palette * originalPalette = nullptr;
+
+	bool bodyEnabled = true;
+	bool shadowEnabled = false;
+	bool overlayEnabled = false;
 
+	void setShadowTransparency(float factor);
 public:
-	SDLImageIndexed(const std::shared_ptr<SDLImageConst> & image, EImageBlitMode mode);
+	SDLImageIndexed(const std::shared_ptr<ISharedImage> & image, SDL_Palette * palette, EImageBlitMode mode);
 	~SDLImageIndexed();
 
 	void draw(SDL_Surface * where, const Point & pos, const Rect * src) const override;
-	void setSpecialPalette(const SpecialPalette & SpecialPalette, uint32_t colorsToSkipMask) override;
+	void setOverlayColor(const ColorRGBA & color) override;
 	void playerColored(PlayerColor player) override;
-	void setFlagColor(PlayerColor player) override;
 	void shiftPalette(uint32_t firstColorID, uint32_t colorsToMove, uint32_t distanceToMove) override;
 	void adjustPalette(const ColorFilter & shifter, uint32_t colorsToSkipMask) override;
+	void scaleInteger(int factor) override;
+	void scaleTo(const Point & size) override;
+
+	void setShadowEnabled(bool on) override;
+	void setBodyEnabled(bool on) override;
+	void setOverlayEnabled(bool on) override;
 };
 
 class SDLImageRGB final : public SDLImageBase
@@ -103,9 +115,14 @@ public:
 	using SDLImageBase::SDLImageBase;
 
 	void draw(SDL_Surface * where, const Point & pos, const Rect * src) const override;
-	void setSpecialPalette(const SpecialPalette & SpecialPalette, uint32_t colorsToSkipMask) override;
+	void setOverlayColor(const ColorRGBA & color) override;
 	void playerColored(PlayerColor player) override;
-	void setFlagColor(PlayerColor player) override;
 	void shiftPalette(uint32_t firstColorID, uint32_t colorsToMove, uint32_t distanceToMove) override;
 	void adjustPalette(const ColorFilter & shifter, uint32_t colorsToSkipMask) override;
+	void scaleInteger(int factor) override;
+	void scaleTo(const Point & size) override;
+
+	void setShadowEnabled(bool on) override;
+	void setBodyEnabled(bool on) override;
+	void setOverlayEnabled(bool on) override;
 };

+ 1 - 1
client/renderSDL/SDLImageLoader.cpp

@@ -17,7 +17,7 @@
 
 #include <SDL_surface.h>
 
-SDLImageLoader::SDLImageLoader(SDLImageConst * Img):
+SDLImageLoader::SDLImageLoader(SDLImageShared * Img):
 	image(Img),
 	lineStart(nullptr),
 	position(nullptr)

+ 3 - 3
client/renderSDL/SDLImageLoader.h

@@ -11,13 +11,13 @@
 
 #include "../render/IImageLoader.h"
 
-class SDLImageConst;
+class SDLImageShared;
 
 class SDLImageLoader : public IImageLoader
 {
 	static constexpr int DEFAULT_PALETTE_COLORS = 256;
 
-	SDLImageConst * image;
+	SDLImageShared * image;
 	ui8 * lineStart;
 	ui8 * position;
 public:
@@ -29,7 +29,7 @@ public:
 	//init image with these sizes and palette
 	void init(Point SpriteSize, Point Margins, Point FullSize, SDL_Color *pal);
 
-	SDLImageLoader(SDLImageConst * Img);
+	SDLImageLoader(SDLImageShared * Img);
 	~SDLImageLoader();
 };
 

+ 68 - 21
client/renderSDL/SDL_Extensions.cpp

@@ -12,12 +12,17 @@
 
 #include "SDL_PixelAccess.h"
 
+#include "../gui/CGuiHandler.h"
 #include "../render/Graphics.h"
+#include "../render/IScreenHandler.h"
 #include "../render/Colors.h"
 #include "../CMT.h"
+#include "../xBRZ/xbrz.h"
 
 #include "../../lib/GameConstants.h"
 
+#include <tbb/parallel_for.h>
+
 #include <SDL_render.h>
 #include <SDL_surface.h>
 #include <SDL_version.h>
@@ -63,21 +68,21 @@ void CSDL_Ext::setAlpha(SDL_Surface * bg, int value)
 	SDL_SetSurfaceAlphaMod(bg, value);
 }
 
-SDL_Surface * CSDL_Ext::newSurface(int w, int h)
+SDL_Surface * CSDL_Ext::newSurface(const Point & dimensions)
 {
-	return newSurface(w, h, screen);
+	return newSurface(dimensions, screen);
 }
 
-SDL_Surface * CSDL_Ext::newSurface(int w, int h, SDL_Surface * mod) //creates new surface, with flags/format same as in surface given
+SDL_Surface * CSDL_Ext::newSurface(const Point & dimensions, SDL_Surface * mod) //creates new surface, with flags/format same as in surface given
 {
-	SDL_Surface * ret = SDL_CreateRGBSurface(0,w,h,mod->format->BitsPerPixel,mod->format->Rmask,mod->format->Gmask,mod->format->Bmask,mod->format->Amask);
+	SDL_Surface * ret = SDL_CreateRGBSurface(0,dimensions.x,dimensions.y,mod->format->BitsPerPixel,mod->format->Rmask,mod->format->Gmask,mod->format->Bmask,mod->format->Amask);
 
 	if(ret == nullptr)
 	{
 		const char * error = SDL_GetError();
 
 		std::string messagePattern = "Failed to create SDL Surface of size %d x %d, %d bpp. Reason: %s";
-		std::string message = boost::str(boost::format(messagePattern) % w % h % mod->format->BitsPerPixel % error);
+		std::string message = boost::str(boost::format(messagePattern) % dimensions.x % dimensions.y % mod->format->BitsPerPixel % error);
 
 		handleFatalError(message, true);
 	}
@@ -448,32 +453,35 @@ static void drawLineY(SDL_Surface * sur, int x1, int y1, int x2, int y2, const S
 	}
 }
 
-void CSDL_Ext::drawLine(SDL_Surface * sur, const Point & from, const Point & dest, const SDL_Color & color1, const SDL_Color & color2)
+void CSDL_Ext::drawLine(SDL_Surface * sur, const Point & from, const Point & dest, const SDL_Color & color1, const SDL_Color & color2, int thickness)
 {
 	//FIXME: duplicated code with drawLineDashed
-	int width  = std::abs(from.x - dest.x);
+	int width = std::abs(from.x - dest.x);
 	int height = std::abs(from.y - dest.y);
 
-	if ( width == 0 && height == 0)
+	if(width == 0 && height == 0)
 	{
-		uint8_t *p = CSDL_Ext::getPxPtr(sur, from.x, from.y);
+		uint8_t * p = CSDL_Ext::getPxPtr(sur, from.x, from.y);
 		ColorPutter<4>::PutColorAlpha(p, color1);
 		return;
 	}
 
-	if (width > height)
-	{
-		if ( from.x < dest.x)
-			drawLineX(sur, from.x, from.y, dest.x, dest.y, color1, color2);
-		else
-			drawLineX(sur, dest.x, dest.y, from.x, from.y, color2, color1);
-	}
-	else
+	for(int i = 0; i < thickness; ++i)
 	{
-		if ( from.y < dest.y)
-			drawLineY(sur, from.x, from.y, dest.x, dest.y, color1, color2);
+		if(width > height)
+		{
+			if(from.x < dest.x)
+				drawLineX(sur, from.x, from.y + i, dest.x, dest.y + i, color1, color2);
+			else
+				drawLineX(sur, dest.x, dest.y + i, from.x, from.y + i, color2, color1);
+		}
 		else
-			drawLineY(sur, dest.x, dest.y, from.x, from.y, color2, color1);
+		{
+			if(from.y < dest.y)
+				drawLineY(sur, from.x + i, from.y, dest.x + i, dest.y, color1, color2);
+			else
+				drawLineY(sur, dest.x + i, dest.y, from.x + i, from.y, color2, color1);
+		}
 	}
 }
 
@@ -630,8 +638,11 @@ SDL_Surface * CSDL_Ext::scaleSurface(SDL_Surface * surf, int width, int height)
 	if(!surf || !width || !height)
 		return nullptr;
 
+	if (surf->w * 2 == width && surf->h * 2 == height)
+		return scaleSurfaceIntegerFactor(surf, 2);
+
 	SDL_Surface * intermediate = SDL_ConvertSurface(surf, screen->format, 0);
-	SDL_Surface * ret = newSurface(width, height, intermediate);
+	SDL_Surface * ret = newSurface(Point(width, height), intermediate);
 
 #if SDL_VERSION_ATLEAST(2,0,16)
 	SDL_SoftStretchLinear(intermediate, nullptr, ret, nullptr);
@@ -643,6 +654,37 @@ SDL_Surface * CSDL_Ext::scaleSurface(SDL_Surface * surf, int width, int height)
 	return ret;
 }
 
+SDL_Surface * CSDL_Ext::scaleSurfaceIntegerFactor(SDL_Surface * surf, int factor)
+{
+	if(surf == nullptr || factor == 0)
+		return nullptr;
+
+	int newWidth = surf->w * factor;
+	int newHight = surf->h * factor;
+
+	SDL_Surface * intermediate = SDL_ConvertSurface(surf, screen->format, 0);
+	SDL_Surface * ret = newSurface(Point(newWidth, newHight), intermediate);
+
+	assert(intermediate->pitch == intermediate->w * 4);
+	assert(ret->pitch == ret->w * 4);
+
+	const uint32_t * srcPixels = static_cast<const uint32_t*>(intermediate->pixels);
+	uint32_t * dstPixels = static_cast<uint32_t*>(ret->pixels);
+
+	// avoid excessive granulation - xBRZ prefers at least 8-16 lines per task
+	// TODO: compare performance and size of images, recheck values for potentially better parameters
+	const int granulation = std::clamp(surf->h / 64 * 8, 8, 64);
+
+	tbb::parallel_for(tbb::blocked_range<size_t>(0, intermediate->h, granulation), [factor, srcPixels, dstPixels, intermediate](const tbb::blocked_range<size_t> & r)
+	{
+		xbrz::scale(factor, srcPixels, dstPixels, intermediate->w, intermediate->h, xbrz::ColorFormat::ARGB, {}, r.begin(), r.end());
+	});
+
+	SDL_FreeSurface(intermediate);
+
+	return ret;
+}
+
 void CSDL_Ext::blitSurface(SDL_Surface * src, const Rect & srcRectInput, SDL_Surface * dst, const Point & dstPoint)
 {
 	SDL_Rect srcRect = CSDL_Ext::toSDL(srcRectInput);
@@ -732,5 +774,10 @@ void CSDL_Ext::getClipRect(SDL_Surface * src, Rect & other)
 	other = CSDL_Ext::fromSDL(rect);
 }
 
+int CSDL_Ext::CClipRectGuard::getScalingFactor() const
+{
+	return GH.screenHandler().getScalingFactor();
+}
+
 template SDL_Surface * CSDL_Ext::createSurfaceWithBpp<3>(int, int);
 template SDL_Surface * CSDL_Ext::createSurfaceWithBpp<4>(int, int);

+ 7 - 4
client/renderSDL/SDL_Extensions.h

@@ -78,20 +78,21 @@ using TColorPutterAlpha = void (*)(uint8_t *&, const uint8_t &, const uint8_t &,
 	int blit8bppAlphaTo24bpp(const SDL_Surface * src, const Rect & srcRect, SDL_Surface * dst, const Point & dstPoint, uint8_t alpha); //blits 8 bpp surface with alpha channel to 24 bpp surface
 	uint32_t colorTouint32_t(const SDL_Color * color); //little endian only
 
-	void drawLine(SDL_Surface * sur, const Point & from, const Point & dest, const SDL_Color & color1, const SDL_Color & color2);
+	void drawLine(SDL_Surface * sur, const Point & from, const Point & dest, const SDL_Color & color1, const SDL_Color & color2, int width);
 	void drawLineDashed(SDL_Surface * sur, const Point & from, const Point & dest, const SDL_Color & color);
 
 	void drawBorder(SDL_Surface * sur, int x, int y, int w, int h, const SDL_Color & color, int depth = 1);
 	void drawBorder(SDL_Surface * sur, const Rect & r, const SDL_Color & color, int depth = 1);
 
-	SDL_Surface * newSurface(int w, int h, SDL_Surface * mod); //creates new surface, with flags/format same as in surface given
-	SDL_Surface * newSurface(int w, int h); //creates new surface, with flags/format same as in screen surface
+	SDL_Surface * newSurface(const Point & dimensions, SDL_Surface * mod); //creates new surface, with flags/format same as in surface given
+	SDL_Surface * newSurface(const Point & dimensions); //creates new surface, with flags/format same as in screen surface
 	SDL_Surface * copySurface(SDL_Surface * mod); //returns copy of given surface
 	template<int bpp>
 	SDL_Surface * createSurfaceWithBpp(int width, int height); //create surface with give bits per pixels value
 
 	// bilinear filtering. Always returns rgba surface
 	SDL_Surface * scaleSurface(SDL_Surface * surf, int width, int height);
+	SDL_Surface * scaleSurfaceIntegerFactor(SDL_Surface * surf, int factor);
 
 	template<int bpp>
 	void convertToGrayscaleBpp(SDL_Surface * surf, const Rect & rect);
@@ -110,11 +111,13 @@ using TColorPutterAlpha = void (*)(uint8_t *&, const uint8_t &, const uint8_t &,
 		SDL_Surface * surf;
 		Rect oldRect;
 
+		int getScalingFactor() const;
+
 	public:
 		CClipRectGuard(SDL_Surface * surface, const Rect & rect): surf(surface)
 		{
 			CSDL_Ext::getClipRect(surf, oldRect);
-			CSDL_Ext::setClipRect(surf, rect);
+			CSDL_Ext::setClipRect(surf, rect * getScalingFactor());
 		}
 
 		~CClipRectGuard()

+ 75 - 4
client/renderSDL/ScreenHandler.cpp

@@ -38,13 +38,14 @@ SDL_Surface * screen2 = nullptr; //and hlp surface (used to store not-active int
 SDL_Surface * screenBuf = screen; //points to screen (if only advmapint is present) or screen2 (else) - should be used when updating controls which are not regularly redrawed
 
 static const std::string NAME = GameConstants::VCMI_VERSION; //application name
+static constexpr Point heroes3Resolution = Point(800, 600);
 
 std::tuple<int, int> ScreenHandler::getSupportedScalingRange() const
 {
 	// H3 resolution, any resolution smaller than that is not correctly supported
-	static const Point minResolution = {800, 600};
+	static constexpr Point minResolution = heroes3Resolution;
 	// arbitrary limit on *downscaling*. Allow some downscaling, if requested by user. Should be generally limited to 100+ for all but few devices
-	static const double minimalScaling = 50;
+	static constexpr double minimalScaling = 50;
 
 	Point renderResolution = getRenderResolution();
 	double reservedAreaWidth = settings["video"]["reservedWidth"].Float();
@@ -99,6 +100,24 @@ Point ScreenHandler::getPreferredLogicalResolution() const
 	return logicalResolution;
 }
 
+int ScreenHandler::getScalingFactor() const
+{
+	switch (upscalingFilter)
+	{
+		case EUpscalingFilter::NONE: return 1;
+		case EUpscalingFilter::XBRZ_2: return 2;
+		case EUpscalingFilter::XBRZ_3: return 3;
+		case EUpscalingFilter::XBRZ_4: return 4;
+	}
+
+	throw std::runtime_error("invalid upscaling filter");
+}
+
+Point ScreenHandler::getLogicalResolution() const
+{
+	return Point(screen->w, screen->h) / getScalingFactor();
+}
+
 Point ScreenHandler::getRenderResolution() const
 {
 	assert(mainRenderer != nullptr);
@@ -291,12 +310,64 @@ void ScreenHandler::initializeWindow()
 		handleFatalError(message, true);
 	}
 
+	selectUpscalingFilter();
+	selectDownscalingFilter();
+
 	SDL_RendererInfo info;
 	SDL_GetRendererInfo(mainRenderer, &info);
-	SDL_SetHint(SDL_HINT_RENDER_SCALE_QUALITY, settings["video"]["scalingMode"].String().c_str());
 	logGlobal->info("Created renderer %s", info.name);
 }
 
+EUpscalingFilter ScreenHandler::loadUpscalingFilter() const
+{
+	static const std::map<std::string, EUpscalingFilter> upscalingFilterTypes =
+	{
+		{"auto", EUpscalingFilter::AUTO },
+		{"none", EUpscalingFilter::NONE },
+		{"xbrz2", EUpscalingFilter::XBRZ_2 },
+		{"xbrz3", EUpscalingFilter::XBRZ_3 },
+		{"xbrz4", EUpscalingFilter::XBRZ_4 }
+	};
+
+	auto filterName = settings["video"]["upscalingFilter"].String();
+	auto filter = upscalingFilterTypes.at(filterName);
+
+	if (filter != EUpscalingFilter::AUTO)
+		return filter;
+
+	// for now - always fallback to no filter
+	return EUpscalingFilter::NONE;
+
+	// else - autoselect
+//	Point outputResolution = getRenderResolution();
+//	Point logicalResolution = getPreferredLogicalResolution();
+//
+//	float scaleX = static_cast<float>(outputResolution.x) / logicalResolution.x;
+//	float scaleY = static_cast<float>(outputResolution.x) / logicalResolution.x;
+//	float scaling = std::min(scaleX, scaleY);
+//
+//	if (scaling <= 1.0f)
+//		return EUpscalingFilter::NONE;
+//	if (scaling <= 2.0f)
+//		return EUpscalingFilter::XBRZ_2;
+//	if (scaling <= 3.0f)
+//		return EUpscalingFilter::XBRZ_3;
+//
+//	return EUpscalingFilter::XBRZ_4;
+}
+
+void ScreenHandler::selectUpscalingFilter()
+{
+	upscalingFilter	= loadUpscalingFilter();
+	logGlobal->debug("Selected upscaling filter %d", static_cast<int>(upscalingFilter));
+}
+
+void ScreenHandler::selectDownscalingFilter()
+{
+	SDL_SetHint(SDL_HINT_RENDER_SCALE_QUALITY, settings["video"]["downscalingFilter"].String().c_str());
+	logGlobal->debug("Selected downscaling filter %s", settings["video"]["downscalingFilter"].String());
+}
+
 void ScreenHandler::initializeScreenBuffers()
 {
 #ifdef VCMI_ENDIAN_BIG
@@ -311,7 +382,7 @@ void ScreenHandler::initializeScreenBuffers()
 	int amask = 0xFF000000;
 #endif
 
-	auto logicalSize = getPreferredLogicalResolution();
+	auto logicalSize = getPreferredLogicalResolution() * getScalingFactor();
 	SDL_RenderSetLogicalSize(mainRenderer, logicalSize.x, logicalSize.y);
 
 	screen = SDL_CreateRGBSurface(0, logicalSize.x, logicalSize.y, 32, rmask, gmask, bmask, amask);

+ 23 - 0
client/renderSDL/ScreenHandler.h

@@ -29,9 +29,23 @@ enum class EWindowMode
 	FULLSCREEN_EXCLUSIVE
 };
 
+enum class EUpscalingFilter
+{
+	AUTO, // used only for loading from config, replaced with autoselected value on init
+	NONE,
+	//BILINEAR, // TODO?
+	//BICUBIC, // TODO?
+	XBRZ_2,
+	XBRZ_3,
+	XBRZ_4,
+	// NOTE: xbrz also provides x5 and x6 filters, but those would require high-end gaming PC's due to huge memory usage with no visible gain
+};
+
 /// This class is responsible for management of game window and its main rendering surface
 class ScreenHandler final : public IScreenHandler
 {
+	EUpscalingFilter upscalingFilter = EUpscalingFilter::AUTO;
+
 	/// Dimensions of target surfaces/textures, this value is what game logic views as screen size
 	Point getPreferredLogicalResolution() const;
 
@@ -69,6 +83,11 @@ class ScreenHandler final : public IScreenHandler
 
 	/// Performs validation of settings and updates them to valid values if necessary
 	void validateSettings();
+
+	EUpscalingFilter loadUpscalingFilter() const;
+
+	void selectDownscalingFilter();
+	void selectUpscalingFilter();
 public:
 
 	/// Creates and initializes screen, window and SDL state
@@ -89,6 +108,10 @@ public:
 	/// Window has focus
 	bool hasFocus() final;
 
+	Point getLogicalResolution() const final;
+
+	int getScalingFactor() const final;
+
 	std::vector<Point> getSupportedResolutions() const final;
 	std::vector<Point> getSupportedResolutions(int displayIndex) const;
 	std::tuple<int, int> getSupportedScalingRange() const final;

+ 6 - 4
client/widgets/Images.cpp

@@ -108,7 +108,7 @@ void CPicture::setAlpha(uint8_t value)
 
 void CPicture::scaleTo(Point size)
 {
-	bg->scaleFast(size);
+	bg->scaleTo(size);
 
 	pos.w = bg->width();
 	pos.h = bg->height();
@@ -255,7 +255,7 @@ void CAnimImage::showAll(Canvas & to)
 		if(auto img = anim->getImage(targetFrame, group))
 		{
 			if(isScaled())
-				img->scaleFast(scaledSize);
+				img->scaleTo(scaledSize);
 
 			to.draw(img, pos.topLeft());
 		}
@@ -307,7 +307,7 @@ bool CAnimImage::isPlayerColored() const
 }
 
 CShowableAnim::CShowableAnim(int x, int y, const AnimationPath & name, ui8 Flags, ui32 frameTime, size_t Group, uint8_t alpha):
-	anim(GH.renderHandler().loadAnimation(name, (Flags & PALETTE_ALPHA) ? EImageBlitMode::ALPHA : EImageBlitMode::COLORKEY)),
+	anim(GH.renderHandler().loadAnimation(name, (Flags & CREATURE_MODE) ? EImageBlitMode::ALPHA : EImageBlitMode::COLORKEY)),
 	group(Group),
 	frame(0),
 	first(0),
@@ -420,6 +420,8 @@ void CShowableAnim::blitImage(size_t frame, size_t group, Canvas & to)
 	auto img = anim->getImage(frame, group);
 	if(img)
 	{
+		if (flags & CREATURE_MODE)
+			img->setShadowEnabled(true);
 		img->setAlpha(alpha);
 		to.draw(img, pos.topLeft(), src);
 	}
@@ -440,7 +442,7 @@ void CShowableAnim::setDuration(int durationMs)
 }
 
 CCreatureAnim::CCreatureAnim(int x, int y, const AnimationPath & name, ui8 flags, ECreatureAnimType type):
-	CShowableAnim(x, y, name, flags | PALETTE_ALPHA, 100, size_t(type)) // H3 uses 100 ms per frame, irregardless of battle speed settings
+	CShowableAnim(x, y, name, flags | CREATURE_MODE, 100, size_t(type)) // H3 uses 100 ms per frame, irregardless of battle speed settings
 {
 	xOffset = 0;
 	yOffset = 0;

+ 1 - 1
client/widgets/Images.h

@@ -145,7 +145,7 @@ public:
 		BASE=1,            //base frame will be blitted before current one
 		HORIZONTAL_FLIP=2, //TODO: will be displayed rotated
 		VERTICAL_FLIP=4,   //TODO: will be displayed rotated
-		PALETTE_ALPHA=8,   // use alpha channel for images with palette. Required for creatures in battle and map objects
+		CREATURE_MODE=8,   // use alpha channel for images with palette. Required for creatures in battle and map objects
 		PLAY_ONCE=32       //play animation only once and stop at last frame
 	};
 protected:

+ 1 - 1
client/windows/CCastleInterface.cpp

@@ -885,7 +885,7 @@ void CCastleBuildings::enterCastleGate()
 			if(settings["general"]["enableUiEnhancements"].Bool())
 			{
 				auto image = GH.renderHandler().loadImage(AnimationPath::builtin("ITPA"), t->town->clientInfo.icons[t->hasFort()][false] + 2, 0, EImageBlitMode::OPAQUE);
-				image->scaleFast(Point(35, 23));
+				image->scaleTo(Point(35, 23));
 				images.push_back(image);
 			}
 		}

+ 2 - 2
client/windows/CMapOverview.cpp

@@ -60,7 +60,7 @@ CMapOverview::CMapOverview(const std::string & mapName, const std::string & file
 
 Canvas CMapOverviewWidget::createMinimapForLayer(std::unique_ptr<CMap> & map, int layer) const
 {
-	Canvas canvas = Canvas(Point(map->width, map->height));
+	Canvas canvas = Canvas(Point(map->width, map->height), CanvasScalingPolicy::IGNORE);
 
 	for (int y = 0; y < map->height; ++y)
 		for (int x = 0; x < map->width; ++x)
@@ -139,7 +139,7 @@ std::shared_ptr<CPicture> CMapOverviewWidget::buildDrawMinimap(const JsonNode &
 	double resize = maxSideLengthSrc / maxSideLengthDst;
 	Point newMinimapSize = Point(minimapRect.w / resize, minimapRect.h / resize);
 
-	Canvas canvasScaled = Canvas(Point(rect.w, rect.h));
+	Canvas canvasScaled = Canvas(Point(rect.w, rect.h), CanvasScalingPolicy::AUTO);
 	canvasScaled.drawScaled(minimaps[id], Point((rect.w - newMinimapSize.x) / 2, (rect.h - newMinimapSize.y) / 2), newMinimapSize);
 	std::shared_ptr<IImage> img = GH.renderHandler().createImage(canvasScaled.getInternalSurface());
 

+ 1 - 1
client/windows/CMessage.cpp

@@ -41,7 +41,7 @@ void CMessage::init()
 {
 	for(int i = 0; i < PlayerColor::PLAYER_LIMIT_I; i++)
 	{
-		dialogBorders[i] = GH.renderHandler().loadAnimation(AnimationPath::builtin("DIALGBOX"), EImageBlitMode::OPAQUE);
+		dialogBorders[i] = GH.renderHandler().loadAnimation(AnimationPath::builtin("DIALGBOX"), EImageBlitMode::COLORKEY);
 
 		for(int j = 0; j < dialogBorders[i]->size(0); j++)
 		{

+ 2 - 2
client/windows/CSpellWindow.cpp

@@ -482,13 +482,13 @@ void CSpellWindow::setCurrentPage(int value)
 void CSpellWindow::turnPageLeft()
 {
 	if(settings["video"]["spellbookAnimation"].Bool() && !isBigSpellbook)
-		CCS->videoh->playSpellbookAnimation(VideoPath::builtin("PGTRNLFT.SMK"), pos.topLeft() + Point(13, 15));
+		CCS->videoh->playSpellbookAnimation(VideoPath::builtin("PGTRNLFT.SMK"), pos.topLeft() + Point(13, 14));
 }
 
 void CSpellWindow::turnPageRight()
 {
 	if(settings["video"]["spellbookAnimation"].Bool() && !isBigSpellbook)
-		CCS->videoh->playSpellbookAnimation(VideoPath::builtin("PGTRNRGH.SMK"), pos.topLeft() + Point(13, 15));
+		CCS->videoh->playSpellbookAnimation(VideoPath::builtin("PGTRNRGH.SMK"), pos.topLeft() + Point(13, 14));
 }
 
 void CSpellWindow::keyPressed(EShortcut key)

+ 1 - 1
client/windows/CWindowWithArtifacts.cpp

@@ -238,7 +238,7 @@ void CWindowWithArtifacts::setCursorAnimation(const CArtifactInstance & artInst)
 	{
 		assert(artInst.getScrollSpellID().num >= 0);
 		auto image = GH.renderHandler().loadImage(AnimationPath::builtin("spellscr"), artInst.getScrollSpellID().num, 0, EImageBlitMode::COLORKEY);
-		image->scaleFast(Point(44,34));
+		image->scaleTo(Point(44,34));
 
 		CCS->curh->dragAndDropCursor(image);
 	}

+ 66 - 0
client/xBRZ/Changelog.txt

@@ -0,0 +1,66 @@
+xBRZ 1.8 [2019-11-28]
+---------------------
+Consider ARGB outside area as transparent
+Fixed ARGB scaling issue on image borders
+
+
+xBRZ 1.7 [2019-07-04]
+---------------------
+Fixed asymmetric color distance
+New parameter: "Center direction bias"
+
+
+xBRZ 1.6 [2018-02-27]
+---------------------
+Added bilinear scaling
+Option to skip color buffer creation
+Updated license info
+
+
+xBRZ 1.5 [2017-08-07]
+---------------------
+Added RGB conversion routines
+
+
+xBRZ 1.4 [2015-07-25]
+---------------------
+Added 6xBRZ scaler
+Create color distance buffer lazily
+
+
+xBRZ 1.3 [2015-04-03]
+---------------------
+Improved ARGB performance by 15%
+Fixed alpha channel gradient bug
+
+
+xBRZ 1.2 [2014-11-21]
+---------------------
+Further improved performance by over 30%
+
+
+xBRZ 1.1 [2014-11-02]
+---------------------
+Support images with alpha channel
+Improved color analysis
+
+
+xBRZ 1.0 [2013-02-11]
+---------------------
+Fixed xBRZ scaler compiler issues for GCC
+
+ 
+xBRZ 0.2 [2012-12-11]
+---------------------
+Added 5xBRZ scaler
+Optimized xBRZ scaler performance by factor 3
+Further improved image quality of xBRZ scaler
+
+
+xBRZ 0.1 [2012-09-26]
+---------------------
+Initial release:
+- scale while preserving small image features
+- support multithreading
+- support 64-bit architectures
+- support processing image slices

+ 621 - 0
client/xBRZ/License.txt

@@ -0,0 +1,621 @@
+                    GNU GENERAL PUBLIC LICENSE
+                       Version 3, 29 June 2007
+
+ Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+                            Preamble
+
+  The GNU General Public License is a free, copyleft license for
+software and other kinds of works.
+
+  The licenses for most software and other practical works are designed
+to take away your freedom to share and change the works.  By contrast,
+the GNU General Public License is intended to guarantee your freedom to
+share and change all versions of a program--to make sure it remains free
+software for all its users.  We, the Free Software Foundation, use the
+GNU General Public License for most of our software; it applies also to
+any other work released this way by its authors.  You can apply it to
+your programs, too.
+
+  When we speak of free software, we are referring to freedom, not
+price.  Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+them if you wish), that you receive source code or can get it if you
+want it, that you can change the software or use pieces of it in new
+free programs, and that you know you can do these things.
+
+  To protect your rights, we need to prevent others from denying you
+these rights or asking you to surrender the rights.  Therefore, you have
+certain responsibilities if you distribute copies of the software, or if
+you modify it: responsibilities to respect the freedom of others.
+
+  For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must pass on to the recipients the same
+freedoms that you received.  You must make sure that they, too, receive
+or can get the source code.  And you must show them these terms so they
+know their rights.
+
+  Developers that use the GNU GPL protect your rights with two steps:
+(1) assert copyright on the software, and (2) offer you this License
+giving you legal permission to copy, distribute and/or modify it.
+
+  For the developers' and authors' protection, the GPL clearly explains
+that there is no warranty for this free software.  For both users' and
+authors' sake, the GPL requires that modified versions be marked as
+changed, so that their problems will not be attributed erroneously to
+authors of previous versions.
+
+  Some devices are designed to deny users access to install or run
+modified versions of the software inside them, although the manufacturer
+can do so.  This is fundamentally incompatible with the aim of
+protecting users' freedom to change the software.  The systematic
+pattern of such abuse occurs in the area of products for individuals to
+use, which is precisely where it is most unacceptable.  Therefore, we
+have designed this version of the GPL to prohibit the practice for those
+products.  If such problems arise substantially in other domains, we
+stand ready to extend this provision to those domains in future versions
+of the GPL, as needed to protect the freedom of users.
+
+  Finally, every program is threatened constantly by software patents.
+States should not allow patents to restrict development and use of
+software on general-purpose computers, but in those that do, we wish to
+avoid the special danger that patents applied to a free program could
+make it effectively proprietary.  To prevent this, the GPL assures that
+patents cannot be used to render the program non-free.
+
+  The precise terms and conditions for copying, distribution and
+modification follow.
+
+                       TERMS AND CONDITIONS
+
+  0. Definitions.
+
+  "This License" refers to version 3 of the GNU General Public License.
+
+  "Copyright" also means copyright-like laws that apply to other kinds of
+works, such as semiconductor masks.
+
+  "The Program" refers to any copyrightable work licensed under this
+License.  Each licensee is addressed as "you".  "Licensees" and
+"recipients" may be individuals or organizations.
+
+  To "modify" a work means to copy from or adapt all or part of the work
+in a fashion requiring copyright permission, other than the making of an
+exact copy.  The resulting work is called a "modified version" of the
+earlier work or a work "based on" the earlier work.
+
+  A "covered work" means either the unmodified Program or a work based
+on the Program.
+
+  To "propagate" a work means to do anything with it that, without
+permission, would make you directly or secondarily liable for
+infringement under applicable copyright law, except executing it on a
+computer or modifying a private copy.  Propagation includes copying,
+distribution (with or without modification), making available to the
+public, and in some countries other activities as well.
+
+  To "convey" a work means any kind of propagation that enables other
+parties to make or receive copies.  Mere interaction with a user through
+a computer network, with no transfer of a copy, is not conveying.
+
+  An interactive user interface displays "Appropriate Legal Notices"
+to the extent that it includes a convenient and prominently visible
+feature that (1) displays an appropriate copyright notice, and (2)
+tells the user that there is no warranty for the work (except to the
+extent that warranties are provided), that licensees may convey the
+work under this License, and how to view a copy of this License.  If
+the interface presents a list of user commands or options, such as a
+menu, a prominent item in the list meets this criterion.
+
+  1. Source Code.
+
+  The "source code" for a work means the preferred form of the work
+for making modifications to it.  "Object code" means any non-source
+form of a work.
+
+  A "Standard Interface" means an interface that either is an official
+standard defined by a recognized standards body, or, in the case of
+interfaces specified for a particular programming language, one that
+is widely used among developers working in that language.
+
+  The "System Libraries" of an executable work include anything, other
+than the work as a whole, that (a) is included in the normal form of
+packaging a Major Component, but which is not part of that Major
+Component, and (b) serves only to enable use of the work with that
+Major Component, or to implement a Standard Interface for which an
+implementation is available to the public in source code form.  A
+"Major Component", in this context, means a major essential component
+(kernel, window system, and so on) of the specific operating system
+(if any) on which the executable work runs, or a compiler used to
+produce the work, or an object code interpreter used to run it.
+
+  The "Corresponding Source" for a work in object code form means all
+the source code needed to generate, install, and (for an executable
+work) run the object code and to modify the work, including scripts to
+control those activities.  However, it does not include the work's
+System Libraries, or general-purpose tools or generally available free
+programs which are used unmodified in performing those activities but
+which are not part of the work.  For example, Corresponding Source
+includes interface definition files associated with source files for
+the work, and the source code for shared libraries and dynamically
+linked subprograms that the work is specifically designed to require,
+such as by intimate data communication or control flow between those
+subprograms and other parts of the work.
+
+  The Corresponding Source need not include anything that users
+can regenerate automatically from other parts of the Corresponding
+Source.
+
+  The Corresponding Source for a work in source code form is that
+same work.
+
+  2. Basic Permissions.
+
+  All rights granted under this License are granted for the term of
+copyright on the Program, and are irrevocable provided the stated
+conditions are met.  This License explicitly affirms your unlimited
+permission to run the unmodified Program.  The output from running a
+covered work is covered by this License only if the output, given its
+content, constitutes a covered work.  This License acknowledges your
+rights of fair use or other equivalent, as provided by copyright law.
+
+  You may make, run and propagate covered works that you do not
+convey, without conditions so long as your license otherwise remains
+in force.  You may convey covered works to others for the sole purpose
+of having them make modifications exclusively for you, or provide you
+with facilities for running those works, provided that you comply with
+the terms of this License in conveying all material for which you do
+not control copyright.  Those thus making or running the covered works
+for you must do so exclusively on your behalf, under your direction
+and control, on terms that prohibit them from making any copies of
+your copyrighted material outside their relationship with you.
+
+  Conveying under any other circumstances is permitted solely under
+the conditions stated below.  Sublicensing is not allowed; section 10
+makes it unnecessary.
+
+  3. Protecting Users' Legal Rights From Anti-Circumvention Law.
+
+  No covered work shall be deemed part of an effective technological
+measure under any applicable law fulfilling obligations under article
+11 of the WIPO copyright treaty adopted on 20 December 1996, or
+similar laws prohibiting or restricting circumvention of such
+measures.
+
+  When you convey a covered work, you waive any legal power to forbid
+circumvention of technological measures to the extent such circumvention
+is effected by exercising rights under this License with respect to
+the covered work, and you disclaim any intention to limit operation or
+modification of the work as a means of enforcing, against the work's
+users, your or third parties' legal rights to forbid circumvention of
+technological measures.
+
+  4. Conveying Verbatim Copies.
+
+  You may convey verbatim copies of the Program's source code as you
+receive it, in any medium, provided that you conspicuously and
+appropriately publish on each copy an appropriate copyright notice;
+keep intact all notices stating that this License and any
+non-permissive terms added in accord with section 7 apply to the code;
+keep intact all notices of the absence of any warranty; and give all
+recipients a copy of this License along with the Program.
+
+  You may charge any price or no price for each copy that you convey,
+and you may offer support or warranty protection for a fee.
+
+  5. Conveying Modified Source Versions.
+
+  You may convey a work based on the Program, or the modifications to
+produce it from the Program, in the form of source code under the
+terms of section 4, provided that you also meet all of these conditions:
+
+    a) The work must carry prominent notices stating that you modified
+    it, and giving a relevant date.
+
+    b) The work must carry prominent notices stating that it is
+    released under this License and any conditions added under section
+    7.  This requirement modifies the requirement in section 4 to
+    "keep intact all notices".
+
+    c) You must license the entire work, as a whole, under this
+    License to anyone who comes into possession of a copy.  This
+    License will therefore apply, along with any applicable section 7
+    additional terms, to the whole of the work, and all its parts,
+    regardless of how they are packaged.  This License gives no
+    permission to license the work in any other way, but it does not
+    invalidate such permission if you have separately received it.
+
+    d) If the work has interactive user interfaces, each must display
+    Appropriate Legal Notices; however, if the Program has interactive
+    interfaces that do not display Appropriate Legal Notices, your
+    work need not make them do so.
+
+  A compilation of a covered work with other separate and independent
+works, which are not by their nature extensions of the covered work,
+and which are not combined with it such as to form a larger program,
+in or on a volume of a storage or distribution medium, is called an
+"aggregate" if the compilation and its resulting copyright are not
+used to limit the access or legal rights of the compilation's users
+beyond what the individual works permit.  Inclusion of a covered work
+in an aggregate does not cause this License to apply to the other
+parts of the aggregate.
+
+  6. Conveying Non-Source Forms.
+
+  You may convey a covered work in object code form under the terms
+of sections 4 and 5, provided that you also convey the
+machine-readable Corresponding Source under the terms of this License,
+in one of these ways:
+
+    a) Convey the object code in, or embodied in, a physical product
+    (including a physical distribution medium), accompanied by the
+    Corresponding Source fixed on a durable physical medium
+    customarily used for software interchange.
+
+    b) Convey the object code in, or embodied in, a physical product
+    (including a physical distribution medium), accompanied by a
+    written offer, valid for at least three years and valid for as
+    long as you offer spare parts or customer support for that product
+    model, to give anyone who possesses the object code either (1) a
+    copy of the Corresponding Source for all the software in the
+    product that is covered by this License, on a durable physical
+    medium customarily used for software interchange, for a price no
+    more than your reasonable cost of physically performing this
+    conveying of source, or (2) access to copy the
+    Corresponding Source from a network server at no charge.
+
+    c) Convey individual copies of the object code with a copy of the
+    written offer to provide the Corresponding Source.  This
+    alternative is allowed only occasionally and noncommercially, and
+    only if you received the object code with such an offer, in accord
+    with subsection 6b.
+
+    d) Convey the object code by offering access from a designated
+    place (gratis or for a charge), and offer equivalent access to the
+    Corresponding Source in the same way through the same place at no
+    further charge.  You need not require recipients to copy the
+    Corresponding Source along with the object code.  If the place to
+    copy the object code is a network server, the Corresponding Source
+    may be on a different server (operated by you or a third party)
+    that supports equivalent copying facilities, provided you maintain
+    clear directions next to the object code saying where to find the
+    Corresponding Source.  Regardless of what server hosts the
+    Corresponding Source, you remain obligated to ensure that it is
+    available for as long as needed to satisfy these requirements.
+
+    e) Convey the object code using peer-to-peer transmission, provided
+    you inform other peers where the object code and Corresponding
+    Source of the work are being offered to the general public at no
+    charge under subsection 6d.
+
+  A separable portion of the object code, whose source code is excluded
+from the Corresponding Source as a System Library, need not be
+included in conveying the object code work.
+
+  A "User Product" is either (1) a "consumer product", which means any
+tangible personal property which is normally used for personal, family,
+or household purposes, or (2) anything designed or sold for incorporation
+into a dwelling.  In determining whether a product is a consumer product,
+doubtful cases shall be resolved in favor of coverage.  For a particular
+product received by a particular user, "normally used" refers to a
+typical or common use of that class of product, regardless of the status
+of the particular user or of the way in which the particular user
+actually uses, or expects or is expected to use, the product.  A product
+is a consumer product regardless of whether the product has substantial
+commercial, industrial or non-consumer uses, unless such uses represent
+the only significant mode of use of the product.
+
+  "Installation Information" for a User Product means any methods,
+procedures, authorization keys, or other information required to install
+and execute modified versions of a covered work in that User Product from
+a modified version of its Corresponding Source.  The information must
+suffice to ensure that the continued functioning of the modified object
+code is in no case prevented or interfered with solely because
+modification has been made.
+
+  If you convey an object code work under this section in, or with, or
+specifically for use in, a User Product, and the conveying occurs as
+part of a transaction in which the right of possession and use of the
+User Product is transferred to the recipient in perpetuity or for a
+fixed term (regardless of how the transaction is characterized), the
+Corresponding Source conveyed under this section must be accompanied
+by the Installation Information.  But this requirement does not apply
+if neither you nor any third party retains the ability to install
+modified object code on the User Product (for example, the work has
+been installed in ROM).
+
+  The requirement to provide Installation Information does not include a
+requirement to continue to provide support service, warranty, or updates
+for a work that has been modified or installed by the recipient, or for
+the User Product in which it has been modified or installed.  Access to a
+network may be denied when the modification itself materially and
+adversely affects the operation of the network or violates the rules and
+protocols for communication across the network.
+
+  Corresponding Source conveyed, and Installation Information provided,
+in accord with this section must be in a format that is publicly
+documented (and with an implementation available to the public in
+source code form), and must require no special password or key for
+unpacking, reading or copying.
+
+  7. Additional Terms.
+
+  "Additional permissions" are terms that supplement the terms of this
+License by making exceptions from one or more of its conditions.
+Additional permissions that are applicable to the entire Program shall
+be treated as though they were included in this License, to the extent
+that they are valid under applicable law.  If additional permissions
+apply only to part of the Program, that part may be used separately
+under those permissions, but the entire Program remains governed by
+this License without regard to the additional permissions.
+
+  When you convey a copy of a covered work, you may at your option
+remove any additional permissions from that copy, or from any part of
+it.  (Additional permissions may be written to require their own
+removal in certain cases when you modify the work.)  You may place
+additional permissions on material, added by you to a covered work,
+for which you have or can give appropriate copyright permission.
+
+  Notwithstanding any other provision of this License, for material you
+add to a covered work, you may (if authorized by the copyright holders of
+that material) supplement the terms of this License with terms:
+
+    a) Disclaiming warranty or limiting liability differently from the
+    terms of sections 15 and 16 of this License; or
+
+    b) Requiring preservation of specified reasonable legal notices or
+    author attributions in that material or in the Appropriate Legal
+    Notices displayed by works containing it; or
+
+    c) Prohibiting misrepresentation of the origin of that material, or
+    requiring that modified versions of such material be marked in
+    reasonable ways as different from the original version; or
+
+    d) Limiting the use for publicity purposes of names of licensors or
+    authors of the material; or
+
+    e) Declining to grant rights under trademark law for use of some
+    trade names, trademarks, or service marks; or
+
+    f) Requiring indemnification of licensors and authors of that
+    material by anyone who conveys the material (or modified versions of
+    it) with contractual assumptions of liability to the recipient, for
+    any liability that these contractual assumptions directly impose on
+    those licensors and authors.
+
+  All other non-permissive additional terms are considered "further
+restrictions" within the meaning of section 10.  If the Program as you
+received it, or any part of it, contains a notice stating that it is
+governed by this License along with a term that is a further
+restriction, you may remove that term.  If a license document contains
+a further restriction but permits relicensing or conveying under this
+License, you may add to a covered work material governed by the terms
+of that license document, provided that the further restriction does
+not survive such relicensing or conveying.
+
+  If you add terms to a covered work in accord with this section, you
+must place, in the relevant source files, a statement of the
+additional terms that apply to those files, or a notice indicating
+where to find the applicable terms.
+
+  Additional terms, permissive or non-permissive, may be stated in the
+form of a separately written license, or stated as exceptions;
+the above requirements apply either way.
+
+  8. Termination.
+
+  You may not propagate or modify a covered work except as expressly
+provided under this License.  Any attempt otherwise to propagate or
+modify it is void, and will automatically terminate your rights under
+this License (including any patent licenses granted under the third
+paragraph of section 11).
+
+  However, if you cease all violation of this License, then your
+license from a particular copyright holder is reinstated (a)
+provisionally, unless and until the copyright holder explicitly and
+finally terminates your license, and (b) permanently, if the copyright
+holder fails to notify you of the violation by some reasonable means
+prior to 60 days after the cessation.
+
+  Moreover, your license from a particular copyright holder is
+reinstated permanently if the copyright holder notifies you of the
+violation by some reasonable means, this is the first time you have
+received notice of violation of this License (for any work) from that
+copyright holder, and you cure the violation prior to 30 days after
+your receipt of the notice.
+
+  Termination of your rights under this section does not terminate the
+licenses of parties who have received copies or rights from you under
+this License.  If your rights have been terminated and not permanently
+reinstated, you do not qualify to receive new licenses for the same
+material under section 10.
+
+  9. Acceptance Not Required for Having Copies.
+
+  You are not required to accept this License in order to receive or
+run a copy of the Program.  Ancillary propagation of a covered work
+occurring solely as a consequence of using peer-to-peer transmission
+to receive a copy likewise does not require acceptance.  However,
+nothing other than this License grants you permission to propagate or
+modify any covered work.  These actions infringe copyright if you do
+not accept this License.  Therefore, by modifying or propagating a
+covered work, you indicate your acceptance of this License to do so.
+
+  10. Automatic Licensing of Downstream Recipients.
+
+  Each time you convey a covered work, the recipient automatically
+receives a license from the original licensors, to run, modify and
+propagate that work, subject to this License.  You are not responsible
+for enforcing compliance by third parties with this License.
+
+  An "entity transaction" is a transaction transferring control of an
+organization, or substantially all assets of one, or subdividing an
+organization, or merging organizations.  If propagation of a covered
+work results from an entity transaction, each party to that
+transaction who receives a copy of the work also receives whatever
+licenses to the work the party's predecessor in interest had or could
+give under the previous paragraph, plus a right to possession of the
+Corresponding Source of the work from the predecessor in interest, if
+the predecessor has it or can get it with reasonable efforts.
+
+  You may not impose any further restrictions on the exercise of the
+rights granted or affirmed under this License.  For example, you may
+not impose a license fee, royalty, or other charge for exercise of
+rights granted under this License, and you may not initiate litigation
+(including a cross-claim or counterclaim in a lawsuit) alleging that
+any patent claim is infringed by making, using, selling, offering for
+sale, or importing the Program or any portion of it.
+
+  11. Patents.
+
+  A "contributor" is a copyright holder who authorizes use under this
+License of the Program or a work on which the Program is based.  The
+work thus licensed is called the contributor's "contributor version".
+
+  A contributor's "essential patent claims" are all patent claims
+owned or controlled by the contributor, whether already acquired or
+hereafter acquired, that would be infringed by some manner, permitted
+by this License, of making, using, or selling its contributor version,
+but do not include claims that would be infringed only as a
+consequence of further modification of the contributor version.  For
+purposes of this definition, "control" includes the right to grant
+patent sublicenses in a manner consistent with the requirements of
+this License.
+
+  Each contributor grants you a non-exclusive, worldwide, royalty-free
+patent license under the contributor's essential patent claims, to
+make, use, sell, offer for sale, import and otherwise run, modify and
+propagate the contents of its contributor version.
+
+  In the following three paragraphs, a "patent license" is any express
+agreement or commitment, however denominated, not to enforce a patent
+(such as an express permission to practice a patent or covenant not to
+sue for patent infringement).  To "grant" such a patent license to a
+party means to make such an agreement or commitment not to enforce a
+patent against the party.
+
+  If you convey a covered work, knowingly relying on a patent license,
+and the Corresponding Source of the work is not available for anyone
+to copy, free of charge and under the terms of this License, through a
+publicly available network server or other readily accessible means,
+then you must either (1) cause the Corresponding Source to be so
+available, or (2) arrange to deprive yourself of the benefit of the
+patent license for this particular work, or (3) arrange, in a manner
+consistent with the requirements of this License, to extend the patent
+license to downstream recipients.  "Knowingly relying" means you have
+actual knowledge that, but for the patent license, your conveying the
+covered work in a country, or your recipient's use of the covered work
+in a country, would infringe one or more identifiable patents in that
+country that you have reason to believe are valid.
+
+  If, pursuant to or in connection with a single transaction or
+arrangement, you convey, or propagate by procuring conveyance of, a
+covered work, and grant a patent license to some of the parties
+receiving the covered work authorizing them to use, propagate, modify
+or convey a specific copy of the covered work, then the patent license
+you grant is automatically extended to all recipients of the covered
+work and works based on it.
+
+  A patent license is "discriminatory" if it does not include within
+the scope of its coverage, prohibits the exercise of, or is
+conditioned on the non-exercise of one or more of the rights that are
+specifically granted under this License.  You may not convey a covered
+work if you are a party to an arrangement with a third party that is
+in the business of distributing software, under which you make payment
+to the third party based on the extent of your activity of conveying
+the work, and under which the third party grants, to any of the
+parties who would receive the covered work from you, a discriminatory
+patent license (a) in connection with copies of the covered work
+conveyed by you (or copies made from those copies), or (b) primarily
+for and in connection with specific products or compilations that
+contain the covered work, unless you entered into that arrangement,
+or that patent license was granted, prior to 28 March 2007.
+
+  Nothing in this License shall be construed as excluding or limiting
+any implied license or other defenses to infringement that may
+otherwise be available to you under applicable patent law.
+
+  12. No Surrender of Others' Freedom.
+
+  If conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License.  If you cannot convey a
+covered work so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you may
+not convey it at all.  For example, if you agree to terms that obligate you
+to collect a royalty for further conveying from those to whom you convey
+the Program, the only way you could satisfy both those terms and this
+License would be to refrain entirely from conveying the Program.
+
+  13. Use with the GNU Affero General Public License.
+
+  Notwithstanding any other provision of this License, you have
+permission to link or combine any covered work with a work licensed
+under version 3 of the GNU Affero General Public License into a single
+combined work, and to convey the resulting work.  The terms of this
+License will continue to apply to the part which is the covered work,
+but the special requirements of the GNU Affero General Public License,
+section 13, concerning interaction through a network will apply to the
+combination as such.
+
+  14. Revised Versions of this License.
+
+  The Free Software Foundation may publish revised and/or new versions of
+the GNU General Public License from time to time.  Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+  Each version is given a distinguishing version number.  If the
+Program specifies that a certain numbered version of the GNU General
+Public License "or any later version" applies to it, you have the
+option of following the terms and conditions either of that numbered
+version or of any later version published by the Free Software
+Foundation.  If the Program does not specify a version number of the
+GNU General Public License, you may choose any version ever published
+by the Free Software Foundation.
+
+  If the Program specifies that a proxy can decide which future
+versions of the GNU General Public License can be used, that proxy's
+public statement of acceptance of a version permanently authorizes you
+to choose that version for the Program.
+
+  Later license versions may give you additional or different
+permissions.  However, no additional obligations are imposed on any
+author or copyright holder as a result of your choosing to follow a
+later version.
+
+  15. Disclaimer of Warranty.
+
+  THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
+APPLICABLE LAW.  EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
+HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
+OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
+THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
+IS WITH YOU.  SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
+ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+
+  16. Limitation of Liability.
+
+  IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
+THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
+GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
+USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
+DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
+PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
+EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
+SUCH DAMAGES.
+
+  17. Interpretation of Sections 15 and 16.
+
+  If the disclaimer of warranty and limitation of liability provided
+above cannot be given local legal effect according to their terms,
+reviewing courts shall apply local law that most closely approximates
+an absolute waiver of all civil liability in connection with the
+Program, unless a warranty or assumption of liability accompanies a
+copy of the Program in return for a fee.
+
+                     END OF TERMS AND CONDITIONS

+ 1367 - 0
client/xBRZ/xbrz.cpp

@@ -0,0 +1,1367 @@
+// ****************************************************************************
+// * This file is part of the xBRZ project. It is distributed under           *
+// * GNU General Public License: https://www.gnu.org/licenses/gpl-3.0         *
+// * Copyright (C) Zenju (zenju AT gmx DOT de) - All Rights Reserved          *
+// *                                                                          *
+// * Additionally and as a special exception, the author gives permission     *
+// * to link the code of this program with the following libraries            *
+// * (or with modified versions that use the same licenses), and distribute   *
+// * linked combinations including the two: MAME, FreeFileSync, Snes9x, ePSXe *
+// * You must obey the GNU General Public License in all respects for all of  *
+// * the code used other than MAME, FreeFileSync, Snes9x, ePSXe.              *
+// * If you modify this file, you may extend this exception to your version   *
+// * of the file, but you are not obligated to do so. If you do not wish to   *
+// * do so, delete this exception statement from your version.                *
+// ****************************************************************************
+
+#include "xbrz.h"
+#include <cassert>
+#include <vector>
+#include <algorithm>
+#include <cmath> //std::sqrt
+#include "xbrz_tools.h"
+
+#if defined _MSC_VER
+#pragma warning(disable:5051)
+#endif
+
+using namespace xbrz;
+
+
+namespace
+{
+template <unsigned int M, unsigned int N> inline
+uint32_t gradientRGB(uint32_t pixFront, uint32_t pixBack) //blend front color with opacity M / N over opaque background: https://en.wikipedia.org/wiki/Alpha_compositing#Alpha_blending
+{
+    static_assert(0 < M && M < N && N <= 1000);
+
+    auto calcColor = [](unsigned char colFront, unsigned char colBack) -> unsigned char { return (colFront * M + colBack * (N - M)) / N; };
+
+    return makePixel(calcColor(getRed  (pixFront), getRed  (pixBack)),
+                     calcColor(getGreen(pixFront), getGreen(pixBack)),
+                     calcColor(getBlue (pixFront), getBlue (pixBack)));
+}
+
+
+template <unsigned int M, unsigned int N> inline
+uint32_t gradientARGB(uint32_t pixFront, uint32_t pixBack) //find intermediate color between two colors with alpha channels (=> NO alpha blending!!!)
+{
+    static_assert(0 < M && M < N && N <= 1000);
+
+    const unsigned int weightFront = getAlpha(pixFront) * M;
+    const unsigned int weightBack  = getAlpha(pixBack) * (N - M);
+    const unsigned int weightSum   = weightFront + weightBack;
+    if (weightSum == 0)
+        return 0;
+
+    auto calcColor = [=](unsigned char colFront, unsigned char colBack)
+    {
+        return static_cast<unsigned char>((colFront * weightFront + colBack * weightBack) / weightSum);
+    };
+
+    return makePixel(static_cast<unsigned char>(weightSum / N),
+                     calcColor(getRed  (pixFront), getRed  (pixBack)),
+                     calcColor(getGreen(pixFront), getGreen(pixBack)),
+                     calcColor(getBlue (pixFront), getBlue (pixBack)));
+}
+
+
+//inline
+//double fastSqrt(double n)
+//{
+//    __asm //speeds up xBRZ by about 9% compared to std::sqrt which internally uses the same assembler instructions but adds some "fluff"
+//    {
+//        fld n
+//        fsqrt
+//    }
+//}
+//
+
+
+#ifdef _MSC_VER
+    #define FORCE_INLINE __forceinline
+#elif defined __GNUC__
+    #define FORCE_INLINE __attribute__((always_inline)) inline
+#else
+    #define FORCE_INLINE inline
+#endif
+
+
+enum RotationDegree //clock-wise
+{
+    ROT_0,
+    ROT_90,
+    ROT_180,
+    ROT_270
+};
+
+//calculate input matrix coordinates after rotation at compile time
+template <RotationDegree rotDeg, size_t I, size_t J, size_t N>
+struct MatrixRotation;
+
+template <size_t I, size_t J, size_t N>
+struct MatrixRotation<ROT_0, I, J, N>
+{
+    static const size_t I_old = I;
+    static const size_t J_old = J;
+};
+
+template <RotationDegree rotDeg, size_t I, size_t J, size_t N> //(i, j) = (row, col) indices, N = size of (square) matrix
+struct MatrixRotation
+{
+    static const size_t I_old = N - 1 - MatrixRotation<static_cast<RotationDegree>(rotDeg - 1), I, J, N>::J_old; //old coordinates before rotation!
+    static const size_t J_old =         MatrixRotation<static_cast<RotationDegree>(rotDeg - 1), I, J, N>::I_old; //
+};
+
+
+template <size_t N, RotationDegree rotDeg>
+class OutputMatrix
+{
+public:
+    OutputMatrix(uint32_t* out, int outWidth) : //access matrix area, top-left at position "out" for image with given width
+        out_(out),
+        outWidth_(outWidth) {}
+
+    template <size_t I, size_t J>
+    uint32_t& ref() const
+    {
+        static const size_t I_old = MatrixRotation<rotDeg, I, J, N>::I_old;
+        static const size_t J_old = MatrixRotation<rotDeg, I, J, N>::J_old;
+        return *(out_ + J_old + I_old * outWidth_);
+    }
+
+private:
+    uint32_t* out_;
+    const int outWidth_;
+};
+
+
+template <class T> inline
+T square(T value) { return value * value; }
+
+
+#if 0
+inline
+double distRGB(uint32_t pix1, uint32_t pix2)
+{
+    const double r_diff = static_cast<int>(getRed  (pix1)) - getRed  (pix2);
+    const double g_diff = static_cast<int>(getGreen(pix1)) - getGreen(pix2);
+    const double b_diff = static_cast<int>(getBlue (pix1)) - getBlue (pix2);
+
+    //euklidean RGB distance
+    return std::sqrt(square(r_diff) + square(g_diff) + square(b_diff));
+}
+#endif
+
+
+inline
+double distYCbCr(uint32_t pix1, uint32_t pix2, double lumaWeight)
+{
+    //https://en.wikipedia.org/wiki/YCbCr#ITU-R_BT.601_conversion
+    //YCbCr conversion is a matrix multiplication => take advantage of linearity by subtracting first!
+    const int r_diff = static_cast<int>(getRed  (pix1)) - getRed  (pix2); //we may delay division by 255 to after matrix multiplication
+    const int g_diff = static_cast<int>(getGreen(pix1)) - getGreen(pix2); //
+    const int b_diff = static_cast<int>(getBlue (pix1)) - getBlue (pix2); //substraction for int is noticeable faster than for double!
+
+    //const double k_b = 0.0722; //ITU-R BT.709 conversion
+    //const double k_r = 0.2126; //
+    const double k_b = 0.0593; //ITU-R BT.2020 conversion
+    const double k_r = 0.2627; //
+    const double k_g = 1 - k_b - k_r;
+
+    const double scale_b = 0.5 / (1 - k_b);
+    const double scale_r = 0.5 / (1 - k_r);
+
+    const double y   = k_r * r_diff + k_g * g_diff + k_b * b_diff; //[!], analog YCbCr!
+    const double c_b = scale_b * (b_diff - y);
+    const double c_r = scale_r * (r_diff - y);
+
+    //we skip division by 255 to have similar range like other distance functions
+    return std::sqrt(square(lumaWeight * y) + square(c_b) + square(c_r));
+}
+
+
+inline
+double distYCbCrBuffered(uint32_t pix1, uint32_t pix2)
+{
+    //30% perf boost compared to plain distYCbCr()!
+    //consumes 64 MB memory; using double is only 2% faster, but takes 128 MB
+    static const std::vector<float> diffToDist = []
+    {
+        std::vector<float> tmp;
+
+        for (uint32_t i = 0; i < 256 * 256 * 256; ++i) //startup time: 114 ms on Intel Core i5 (four cores)
+        {
+            const int r_diff = static_cast<signed char>(getByte<2>(i)) * 2;
+            const int g_diff = static_cast<signed char>(getByte<1>(i)) * 2;
+            const int b_diff = static_cast<signed char>(getByte<0>(i)) * 2;
+
+            const double k_b = 0.0593; //ITU-R BT.2020 conversion
+            const double k_r = 0.2627; //
+            const double k_g = 1 - k_b - k_r;
+
+            const double scale_b = 0.5 / (1 - k_b);
+            const double scale_r = 0.5 / (1 - k_r);
+
+            const double y   = k_r * r_diff + k_g * g_diff + k_b * b_diff; //[!], analog YCbCr!
+            const double c_b = scale_b * (b_diff - y);
+            const double c_r = scale_r * (r_diff - y);
+
+            tmp.push_back(static_cast<float>(std::sqrt(square(y) + square(c_b) + square(c_r))));
+        }
+        return tmp;
+    }();
+
+    //if (pix1 == pix2) -> 8% perf degradation!
+    //    return 0;
+    //if (pix1 < pix2)
+    //    std::swap(pix1, pix2); -> 30% perf degradation!!!
+
+    const int r_diff = static_cast<int>(getRed  (pix1)) - getRed  (pix2);
+    const int g_diff = static_cast<int>(getGreen(pix1)) - getGreen(pix2);
+    const int b_diff = static_cast<int>(getBlue (pix1)) - getBlue (pix2);
+
+    const size_t index = (static_cast<unsigned char>(r_diff / 2) << 16) | //slightly reduce precision (division by 2) to squeeze value into single byte
+                         (static_cast<unsigned char>(g_diff / 2) <<  8) |
+                         (static_cast<unsigned char>(b_diff / 2));
+
+#if 0 //attention: the following calculation creates an asymmetric color distance!!! (e.g. r_diff=46 will be unpacked as 45, but r_diff=-46 unpacks to -47
+    const size_t index = (((r_diff + 0xFF) / 2) << 16) | //slightly reduce precision (division by 2) to squeeze value into single byte
+                         (((g_diff + 0xFF) / 2) <<  8) |
+                         (( b_diff + 0xFF) / 2);
+#endif
+    return diffToDist[index];
+}
+
+
+#if defined _MSC_VER && !defined NDEBUG
+    const int debugPixelX = -1;
+    const int debugPixelY = 58;
+
+    thread_local bool breakIntoDebugger = false;
+#endif
+
+
+enum BlendType
+{
+    BLEND_NONE = 0,
+    BLEND_NORMAL,   //a normal indication to blend
+    BLEND_DOMINANT, //a strong indication to blend
+    //attention: BlendType must fit into the value range of 2 bit!!!
+};
+
+struct BlendResult
+{
+    BlendType
+    /**/blend_f, blend_g,
+    /**/blend_j, blend_k;
+};
+
+
+struct Kernel_3x3
+{
+    uint32_t
+    a, b, c,
+    d, e, f,
+    g, h, i;
+};
+
+struct Kernel_4x4 //kernel for preprocessing step
+{
+    uint32_t
+    a, b, c, //
+    e, f, g, // support reinterpret_cast from Kernel_4x4 => Kernel_3x3
+    i, j, k, //
+    m, n, o,
+    d, h, l, p;
+};
+
+/* input kernel area naming convention:
+-----------------
+| A | B | C | D |
+|---|---|---|---|
+| E | F | G | H |   evaluate the four corners between F, G, J, K
+|---|---|---|---|   input pixel is at position F
+| I | J | K | L |
+|---|---|---|---|
+| M | N | O | P |
+-----------------
+*/
+template <class ColorDistance>
+FORCE_INLINE //detect blend direction
+BlendResult preProcessCorners(const Kernel_4x4& ker, const xbrz::ScalerCfg& cfg) //result: F, G, J, K corners of "GradientType"
+{
+#if defined _MSC_VER && !defined NDEBUG
+    if (breakIntoDebugger)
+        __debugbreak(); //__asm int 3;
+#endif
+
+    BlendResult result = {};
+
+    if ((ker.f == ker.g &&
+         ker.j == ker.k) ||
+        (ker.f == ker.j &&
+         ker.g == ker.k))
+        return result;
+
+    auto dist = [&](uint32_t pix1, uint32_t pix2) { return ColorDistance::dist(pix1, pix2, cfg.luminanceWeight); };
+
+    double jg = dist(ker.i, ker.f) + dist(ker.f, ker.c) + dist(ker.n, ker.k) + dist(ker.k, ker.h) + cfg.centerDirectionBias * dist(ker.j, ker.g);
+    double fk = dist(ker.e, ker.j) + dist(ker.j, ker.o) + dist(ker.b, ker.g) + dist(ker.g, ker.l) + cfg.centerDirectionBias * dist(ker.f, ker.k);
+
+    if (jg < fk) //test sample: 70% of values max(jg, fk) / min(jg, fk) are between 1.1 and 3.7 with median being 1.8
+    {
+        const bool dominantGradient = cfg.dominantDirectionThreshold * jg < fk;
+        if (ker.f != ker.g && ker.f != ker.j)
+            result.blend_f = dominantGradient ? BLEND_DOMINANT : BLEND_NORMAL;
+
+        if (ker.k != ker.j && ker.k != ker.g)
+            result.blend_k = dominantGradient ? BLEND_DOMINANT : BLEND_NORMAL;
+    }
+    else if (fk < jg)
+    {
+        const bool dominantGradient = cfg.dominantDirectionThreshold * fk < jg;
+        if (ker.j != ker.f && ker.j != ker.k)
+            result.blend_j = dominantGradient ? BLEND_DOMINANT : BLEND_NORMAL;
+
+        if (ker.g != ker.f && ker.g != ker.k)
+            result.blend_g = dominantGradient ? BLEND_DOMINANT : BLEND_NORMAL;
+    }
+    return result;
+}
+
+#define DEF_GETTER(x) template <RotationDegree rotDeg> uint32_t inline get_##x(const Kernel_3x3& ker) { return ker.x; }
+//we cannot and NEED NOT write "ker.##x" since ## concatenates preprocessor tokens but "." is not a token
+DEF_GETTER(a) DEF_GETTER(b) DEF_GETTER(c)
+DEF_GETTER(d) DEF_GETTER(e) DEF_GETTER(f)
+DEF_GETTER(g) DEF_GETTER(h) DEF_GETTER(i)
+#undef DEF_GETTER
+
+#define DEF_GETTER(x, y)  template <> [[maybe_unused]] inline uint32_t get_##x<ROT_90>(const Kernel_3x3& ker) { return ker.y; }
+DEF_GETTER(a, g) DEF_GETTER(b, d) DEF_GETTER(c, a)
+DEF_GETTER(d, h) DEF_GETTER(e, e) DEF_GETTER(f, b)
+DEF_GETTER(g, i) DEF_GETTER(h, f) DEF_GETTER(i, c)
+#undef DEF_GETTER
+
+#define DEF_GETTER(x, y) template <> [[maybe_unused]] inline uint32_t get_##x<ROT_180>(const Kernel_3x3& ker) { return ker.y; }
+DEF_GETTER(a, i) DEF_GETTER(b, h) DEF_GETTER(c, g)
+DEF_GETTER(d, f) DEF_GETTER(e, e) DEF_GETTER(f, d)
+DEF_GETTER(g, c) DEF_GETTER(h, b) DEF_GETTER(i, a)
+#undef DEF_GETTER
+
+#define DEF_GETTER(x, y) template <> [[maybe_unused]] inline uint32_t get_##x<ROT_270>(const Kernel_3x3& ker) { return ker.y; }
+DEF_GETTER(a, c) DEF_GETTER(b, f) DEF_GETTER(c, i)
+DEF_GETTER(d, b) DEF_GETTER(e, e) DEF_GETTER(f, h)
+DEF_GETTER(g, a) DEF_GETTER(h, d) DEF_GETTER(i, g)
+#undef DEF_GETTER
+
+
+//compress four blend types into a single byte
+//inline BlendType getTopL   (unsigned char b) { return static_cast<BlendType>(0x3 & b); }
+inline BlendType getTopR   (unsigned char b) { return static_cast<BlendType>(0x3 & (b >> 2)); }
+inline BlendType getBottomR(unsigned char b) { return static_cast<BlendType>(0x3 & (b >> 4)); }
+inline BlendType getBottomL(unsigned char b) { return static_cast<BlendType>(0x3 & (b >> 6)); }
+
+inline void clearAddTopL(unsigned char& b, BlendType bt) { b = static_cast<unsigned char>(bt); }
+inline void addTopR     (unsigned char& b, BlendType bt) { b |= (bt << 2); } //buffer is assumed to be initialized before preprocessing!
+inline void addBottomR  (unsigned char& b, BlendType bt) { b |= (bt << 4); } //e.g. via clearAddTopL()
+inline void addBottomL  (unsigned char& b, BlendType bt) { b |= (bt << 6); } //
+
+inline bool blendingNeeded(unsigned char b)
+{
+    static_assert(BLEND_NONE == 0);
+    return b != 0;
+}
+
+template <RotationDegree rotDeg> inline
+unsigned char rotateBlendInfo(unsigned char b) { return b; }
+template <> inline unsigned char rotateBlendInfo<ROT_90 >(unsigned char b) { return ((b << 2) | (b >> 6)) & 0xff; }
+template <> inline unsigned char rotateBlendInfo<ROT_180>(unsigned char b) { return ((b << 4) | (b >> 4)) & 0xff; }
+template <> inline unsigned char rotateBlendInfo<ROT_270>(unsigned char b) { return ((b << 6) | (b >> 2)) & 0xff; }
+
+
+/* input kernel area naming convention:
+-------------
+| A | B | C |
+|---|---|---|
+| D | E | F | input pixel is at position E
+|---|---|---|
+| G | H | I |
+-------------
+*/
+template <class Scaler, class ColorDistance, RotationDegree rotDeg>
+FORCE_INLINE //perf: quite worth it!
+void blendPixel(const Kernel_3x3& ker,
+                uint32_t* target, int trgWidth,
+                unsigned char blendInfo, //result of preprocessing all four corners of pixel "e"
+                const xbrz::ScalerCfg& cfg)
+{
+    //#define a get_a<rotDeg>(ker)
+#define b get_b<rotDeg>(ker)
+#define c get_c<rotDeg>(ker)
+#define d get_d<rotDeg>(ker)
+#define e get_e<rotDeg>(ker)
+#define f get_f<rotDeg>(ker)
+#define g get_g<rotDeg>(ker)
+#define h get_h<rotDeg>(ker)
+#define i get_i<rotDeg>(ker)
+
+#if defined _MSC_VER && !defined NDEBUG
+    if (breakIntoDebugger)
+        __debugbreak(); //__asm int 3;
+#endif
+
+    const unsigned char blend = rotateBlendInfo<rotDeg>(blendInfo);
+
+    if (getBottomR(blend) >= BLEND_NORMAL)
+    {
+        auto eq   = [&](uint32_t pix1, uint32_t pix2) { return ColorDistance::dist(pix1, pix2, cfg.luminanceWeight) < cfg.equalColorTolerance; };
+        auto dist = [&](uint32_t pix1, uint32_t pix2) { return ColorDistance::dist(pix1, pix2, cfg.luminanceWeight); };
+
+        const bool doLineBlend = [&]() -> bool
+        {
+            if (getBottomR(blend) >= BLEND_DOMINANT)
+                return true;
+
+            //make sure there is no second blending in an adjacent rotation for this pixel: handles insular pixels, mario eyes
+            if (getTopR(blend) != BLEND_NONE && !eq(e, g)) //but support double-blending for 90° corners
+                return false;
+            if (getBottomL(blend) != BLEND_NONE && !eq(e, c))
+                return false;
+
+            //no full blending for L-shapes; blend corner only (handles "mario mushroom eyes")
+            if (!eq(e, i) && eq(g, h) && eq(h, i) && eq(i, f) && eq(f, c))
+                return false;
+
+            return true;
+        }();
+
+        const uint32_t px = dist(e, f) <= dist(e, h) ? f : h; //choose most similar color
+
+        OutputMatrix<Scaler::scale, rotDeg> out(target, trgWidth);
+
+        if (doLineBlend)
+        {
+            const double fg = dist(f, g); //test sample: 70% of values max(fg, hc) / min(fg, hc) are between 1.1 and 3.7 with median being 1.9
+            const double hc = dist(h, c); //
+
+            const bool haveShallowLine = cfg.steepDirectionThreshold * fg <= hc && e != g && d != g;
+            const bool haveSteepLine   = cfg.steepDirectionThreshold * hc <= fg && e != c && b != c;
+
+            if (haveShallowLine)
+            {
+                if (haveSteepLine)
+                    Scaler::blendLineSteepAndShallow(px, out);
+                else
+                    Scaler::blendLineShallow(px, out);
+            }
+            else
+            {
+                if (haveSteepLine)
+                    Scaler::blendLineSteep(px, out);
+                else
+                    Scaler::blendLineDiagonal(px, out);
+            }
+        }
+        else
+            Scaler::blendCorner(px, out);
+    }
+
+    //#undef a
+#undef b
+#undef c
+#undef d
+#undef e
+#undef f
+#undef g
+#undef h
+#undef i
+}
+
+
+class OobReaderTransparent
+{
+public:
+    OobReaderTransparent(const uint32_t* src, int srcWidth, int srcHeight, int y) :
+        s_m1(0 <= y - 1 && y - 1 < srcHeight ? src + srcWidth * (y - 1) : nullptr),
+        s_0 (0 <= y     && y     < srcHeight ? src + srcWidth *  y      : nullptr),
+        s_p1(0 <= y + 1 && y + 1 < srcHeight ? src + srcWidth * (y + 1) : nullptr),
+        s_p2(0 <= y + 2 && y + 2 < srcHeight ? src + srcWidth * (y + 2) : nullptr),
+        srcWidth_(srcWidth) {}
+
+    void readDhlp(Kernel_4x4& ker, int x) const //(x, y) is at kernel position F
+    {
+        [[likely]] if (const int x_p2 = x + 2; 0 <= x_p2 && x_p2 < srcWidth_)
+        {
+            ker.d = s_m1 ? s_m1[x_p2] : 0;
+            ker.h = s_0  ? s_0 [x_p2] : 0;
+            ker.l = s_p1 ? s_p1[x_p2] : 0;
+            ker.p = s_p2 ? s_p2[x_p2] : 0;
+        }
+        else
+        {
+            ker.d = 0;
+            ker.h = 0;
+            ker.l = 0;
+            ker.p = 0;
+        }
+    }
+
+private:
+    const uint32_t* const s_m1;
+    const uint32_t* const s_0;
+    const uint32_t* const s_p1;
+    const uint32_t* const s_p2;
+    const int srcWidth_;
+};
+
+
+class OobReaderDuplicate
+{
+public:
+    OobReaderDuplicate(const uint32_t* src, int srcWidth, int srcHeight, int y) :
+        s_m1(src + srcWidth * std::clamp(y - 1, 0, srcHeight - 1)),
+        s_0 (src + srcWidth * std::clamp(y,     0, srcHeight - 1)),
+        s_p1(src + srcWidth * std::clamp(y + 1, 0, srcHeight - 1)),
+        s_p2(src + srcWidth * std::clamp(y + 2, 0, srcHeight - 1)),
+        srcWidth_(srcWidth) {}
+
+    void readDhlp(Kernel_4x4& ker, int x) const //(x, y) is at kernel position F
+    {
+        const int x_p2 = std::clamp(x + 2, 0, srcWidth_ - 1);
+        ker.d = s_m1[x_p2];
+        ker.h = s_0 [x_p2];
+        ker.l = s_p1[x_p2];
+        ker.p = s_p2[x_p2];
+    }
+
+private:
+    const uint32_t* const s_m1;
+    const uint32_t* const s_0;
+    const uint32_t* const s_p1;
+    const uint32_t* const s_p2;
+    const int srcWidth_;
+};
+
+
+template <class Scaler, class ColorDistance, class OobReader> //scaler policy: see "Scaler2x" reference implementation
+void scaleImage(const uint32_t* src, uint32_t* trg, int srcWidth, int srcHeight, const xbrz::ScalerCfg& cfg, int yFirst, int yLast)
+{
+    yFirst = std::max(yFirst, 0);
+    yLast  = std::min(yLast, srcHeight);
+    if (yFirst >= yLast || srcWidth <= 0)
+        return;
+
+    const int trgWidth = srcWidth * Scaler::scale;
+
+    //(ab)use space of "sizeof(uint32_t) * srcWidth * Scaler::scale" at the end of the image as temporary
+    //buffer for "on the fly preprocessing" without risk of accidental overwriting before accessing
+    unsigned char* const preProcBuf = reinterpret_cast<unsigned char*>(trg + yLast * Scaler::scale * trgWidth) - srcWidth;
+
+    //initialize preprocessing buffer for first row of current stripe: detect upper left and right corner blending
+    //this cannot be optimized for adjacent processing stripes; we must not allow for a memory race condition!
+    {
+        const OobReader oobReader(src, srcWidth, srcHeight, yFirst - 1);
+
+        //initialize at position x = -1
+        Kernel_4x4 ker4 = {};
+        oobReader.readDhlp(ker4, -4); //hack: read a, e, i, m at x = -1
+        ker4.a = ker4.d;
+        ker4.e = ker4.h;
+        ker4.i = ker4.l;
+        ker4.m = ker4.p;
+
+        oobReader.readDhlp(ker4, -3);
+        ker4.b = ker4.d;
+        ker4.f = ker4.h;
+        ker4.j = ker4.l;
+        ker4.n = ker4.p;
+
+        oobReader.readDhlp(ker4, -2);
+        ker4.c = ker4.d;
+        ker4.g = ker4.h;
+        ker4.k = ker4.l;
+        ker4.o = ker4.p;
+
+        oobReader.readDhlp(ker4, -1);
+
+        {
+            const BlendResult res = preProcessCorners<ColorDistance>(ker4, cfg);
+            clearAddTopL(preProcBuf[0], res.blend_k); //set 1st known corner for (0, yFirst)
+        }
+
+        for (int x = 0; x < srcWidth; ++x)
+        {
+            ker4.a = ker4.b;    //shift previous kernel to the left
+            ker4.e = ker4.f;    // -----------------
+            ker4.i = ker4.j;    // | A | B | C | D |
+            ker4.m = ker4.n;    // |---|---|---|---|
+            /**/                // | E | F | G | H | (x, yFirst - 1) is at position F
+            ker4.b = ker4.c;    // |---|---|---|---|
+            ker4.f = ker4.g;    // | I | J | K | L |
+            ker4.j = ker4.k;    // |---|---|---|---|
+            ker4.n = ker4.o;    // | M | N | O | P |
+            /**/                // -----------------
+            ker4.c = ker4.d;
+            ker4.g = ker4.h;
+            ker4.k = ker4.l;
+            ker4.o = ker4.p;
+
+            oobReader.readDhlp(ker4, x);
+
+            /*  preprocessing blend result:
+                ---------
+                | F | G |   evaluate corner between F, G, J, K
+                |---+---|   current input pixel is at position F
+                | J | K |
+                ---------                                        */
+            const BlendResult res = preProcessCorners<ColorDistance>(ker4, cfg);
+            addTopR(preProcBuf[x], res.blend_j); //set 2nd known corner for (x, yFirst)
+
+            if (x + 1 < srcWidth)
+                clearAddTopL(preProcBuf[x + 1], res.blend_k); //set 1st known corner for (x + 1, yFirst)
+        }
+    }
+    //------------------------------------------------------------------------------------
+
+    for (int y = yFirst; y < yLast; ++y)
+    {
+        uint32_t* out = trg + Scaler::scale * y * trgWidth; //consider MT "striped" access
+
+        const OobReader oobReader(src, srcWidth, srcHeight, y);
+
+        //initialize at position x = -1
+        Kernel_4x4 ker4 = {};
+        oobReader.readDhlp(ker4, -4); //hack: read a, e, i, m at x = -1
+        ker4.a = ker4.d;
+        ker4.e = ker4.h;
+        ker4.i = ker4.l;
+        ker4.m = ker4.p;
+
+        oobReader.readDhlp(ker4, -3);
+        ker4.b = ker4.d;
+        ker4.f = ker4.h;
+        ker4.j = ker4.l;
+        ker4.n = ker4.p;
+
+        oobReader.readDhlp(ker4, -2);
+        ker4.c = ker4.d;
+        ker4.g = ker4.h;
+        ker4.k = ker4.l;
+        ker4.o = ker4.p;
+
+        oobReader.readDhlp(ker4, -1);
+
+        unsigned char blend_xy1 = 0; //corner blending for current (x, y + 1) position
+        {
+            const BlendResult res = preProcessCorners<ColorDistance>(ker4, cfg);
+            clearAddTopL(blend_xy1, res.blend_k); //set 1st known corner for (0, y + 1) and buffer for use on next column
+
+            addBottomL(preProcBuf[0], res.blend_g); //set 3rd known corner for (0, y)
+        }
+
+        for (int x = 0; x < srcWidth; ++x, out += Scaler::scale)
+        {
+#if defined _MSC_VER && !defined NDEBUG
+            breakIntoDebugger = debugPixelX == x && debugPixelY == y;
+#endif
+            ker4.a = ker4.b;    //shift previous kernel to the left
+            ker4.e = ker4.f;    // -----------------
+            ker4.i = ker4.j;    // | A | B | C | D |
+            ker4.m = ker4.n;    // |---|---|---|---|
+            /**/                // | E | F | G | H | (x, y) is at position F
+            ker4.b = ker4.c;    // |---|---|---|---|
+            ker4.f = ker4.g;    // | I | J | K | L |
+            ker4.j = ker4.k;    // |---|---|---|---|
+            ker4.n = ker4.o;    // | M | N | O | P |
+            /**/                // -----------------
+            ker4.c = ker4.d;
+            ker4.g = ker4.h;
+            ker4.k = ker4.l;
+            ker4.o = ker4.p;
+
+            oobReader.readDhlp(ker4, x);
+
+            //evaluate the four corners on bottom-right of current pixel
+            unsigned char blend_xy = preProcBuf[x]; //for current (x, y) position
+            {
+                /*  preprocessing blend result:
+                    ---------
+                    | F | G |   evaluate corner between F, G, J, K
+                    |---+---|   current input pixel is at position F
+                    | J | K |
+                    ---------                                        */
+                const BlendResult res = preProcessCorners<ColorDistance>(ker4, cfg);
+                addBottomR(blend_xy, res.blend_f); //all four corners of (x, y) have been determined at this point due to processing sequence!
+
+                addTopR(blend_xy1, res.blend_j); //set 2nd known corner for (x, y + 1)
+                preProcBuf[x] = blend_xy1; //store on current buffer position for use on next row
+
+                [[likely]] if (x + 1 < srcWidth)
+                {
+                    //blend_xy1 -> blend_x1y1
+                    clearAddTopL(blend_xy1, res.blend_k); //set 1st known corner for (x + 1, y + 1) and buffer for use on next column
+
+                    addBottomL(preProcBuf[x + 1], res.blend_g); //set 3rd known corner for (x + 1, y)
+                }
+            }
+
+            //fill block of size scale * scale with the given color
+            fillBlock(out, trgWidth * sizeof(uint32_t), ker4.f, Scaler::scale, Scaler::scale);
+            //place *after* preprocessing step, to not overwrite the results while processing the last pixel!
+
+            //blend all four corners of current pixel
+            if (blendingNeeded(blend_xy))
+            {
+#ifndef _MSC_VER
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wstrict-aliasing"
+#endif
+                const auto& ker3 = reinterpret_cast<const Kernel_3x3&>(ker4); //"The Things We Do for Perf"
+                blendPixel<Scaler, ColorDistance, ROT_0  >(ker3, out, trgWidth, blend_xy, cfg);
+                blendPixel<Scaler, ColorDistance, ROT_90 >(ker3, out, trgWidth, blend_xy, cfg);
+                blendPixel<Scaler, ColorDistance, ROT_180>(ker3, out, trgWidth, blend_xy, cfg);
+                blendPixel<Scaler, ColorDistance, ROT_270>(ker3, out, trgWidth, blend_xy, cfg);
+#ifndef _MSC_VER
+#pragma GCC diagnostic pop
+#endif
+            }
+        }
+    }
+}
+
+//------------------------------------------------------------------------------------
+
+template <class ColorGradient>
+struct Scaler2x : public ColorGradient
+{
+    static const int scale = 2;
+
+    template <unsigned int M, unsigned int N> //bring template function into scope for GCC
+    static void alphaGrad(uint32_t& pixBack, uint32_t pixFront) { ColorGradient::template alphaGrad<M, N>(pixBack, pixFront); }
+
+
+    template <class OutputMatrix>
+    static void blendLineShallow(uint32_t col, OutputMatrix& out)
+    {
+        alphaGrad<1, 4>(out.template ref<scale - 1, 0>(), col);
+        alphaGrad<3, 4>(out.template ref<scale - 1, 1>(), col);
+    }
+
+    template <class OutputMatrix>
+    static void blendLineSteep(uint32_t col, OutputMatrix& out)
+    {
+        alphaGrad<1, 4>(out.template ref<0, scale - 1>(), col);
+        alphaGrad<3, 4>(out.template ref<1, scale - 1>(), col);
+    }
+
+    template <class OutputMatrix>
+    static void blendLineSteepAndShallow(uint32_t col, OutputMatrix& out)
+    {
+        alphaGrad<1, 4>(out.template ref<1, 0>(), col);
+        alphaGrad<1, 4>(out.template ref<0, 1>(), col);
+        alphaGrad<5, 6>(out.template ref<1, 1>(), col); //[!] fixes 7/8 used in xBR
+    }
+
+    template <class OutputMatrix>
+    static void blendLineDiagonal(uint32_t col, OutputMatrix& out)
+    {
+        alphaGrad<1, 2>(out.template ref<1, 1>(), col);
+    }
+
+    template <class OutputMatrix>
+    static void blendCorner(uint32_t col, OutputMatrix& out)
+    {
+        //model a round corner
+        alphaGrad<21, 100>(out.template ref<1, 1>(), col); //exact: 1 - pi/4 = 0.2146018366
+    }
+};
+
+
+template <class ColorGradient>
+struct Scaler3x : public ColorGradient
+{
+    static const int scale = 3;
+
+    template <unsigned int M, unsigned int N> //bring template function into scope for GCC
+    static void alphaGrad(uint32_t& pixBack, uint32_t pixFront) { ColorGradient::template alphaGrad<M, N>(pixBack, pixFront); }
+
+
+    template <class OutputMatrix>
+    static void blendLineShallow(uint32_t col, OutputMatrix& out)
+    {
+        alphaGrad<1, 4>(out.template ref<scale - 1, 0>(), col);
+        alphaGrad<1, 4>(out.template ref<scale - 2, 2>(), col);
+
+        alphaGrad<3, 4>(out.template ref<scale - 1, 1>(), col);
+        out.template ref<scale - 1, 2>() = col;
+    }
+
+    template <class OutputMatrix>
+    static void blendLineSteep(uint32_t col, OutputMatrix& out)
+    {
+        alphaGrad<1, 4>(out.template ref<0, scale - 1>(), col);
+        alphaGrad<1, 4>(out.template ref<2, scale - 2>(), col);
+
+        alphaGrad<3, 4>(out.template ref<1, scale - 1>(), col);
+        out.template ref<2, scale - 1>() = col;
+    }
+
+    template <class OutputMatrix>
+    static void blendLineSteepAndShallow(uint32_t col, OutputMatrix& out)
+    {
+        alphaGrad<1, 4>(out.template ref<2, 0>(), col);
+        alphaGrad<1, 4>(out.template ref<0, 2>(), col);
+        alphaGrad<3, 4>(out.template ref<2, 1>(), col);
+        alphaGrad<3, 4>(out.template ref<1, 2>(), col);
+        out.template ref<2, 2>() = col;
+    }
+
+    template <class OutputMatrix>
+    static void blendLineDiagonal(uint32_t col, OutputMatrix& out)
+    {
+        alphaGrad<1, 8>(out.template ref<1, 2>(), col); //conflict with other rotations for this odd scale
+        alphaGrad<1, 8>(out.template ref<2, 1>(), col);
+        alphaGrad<7, 8>(out.template ref<2, 2>(), col); //
+    }
+
+    template <class OutputMatrix>
+    static void blendCorner(uint32_t col, OutputMatrix& out)
+    {
+        //model a round corner
+        alphaGrad<45, 100>(out.template ref<2, 2>(), col); //exact: 0.4545939598
+        //alphaGrad<7, 256>(out.template ref<2, 1>(), col); //0.02826017254 -> negligible + avoid conflicts with other rotations for this odd scale
+        //alphaGrad<7, 256>(out.template ref<1, 2>(), col); //0.02826017254
+    }
+};
+
+
+template <class ColorGradient>
+struct Scaler4x : public ColorGradient
+{
+    static const int scale = 4;
+
+    template <unsigned int M, unsigned int N> //bring template function into scope for GCC
+    static void alphaGrad(uint32_t& pixBack, uint32_t pixFront) { ColorGradient::template alphaGrad<M, N>(pixBack, pixFront); }
+
+
+    template <class OutputMatrix>
+    static void blendLineShallow(uint32_t col, OutputMatrix& out)
+    {
+        alphaGrad<1, 4>(out.template ref<scale - 1, 0>(), col);
+        alphaGrad<1, 4>(out.template ref<scale - 2, 2>(), col);
+
+        alphaGrad<3, 4>(out.template ref<scale - 1, 1>(), col);
+        alphaGrad<3, 4>(out.template ref<scale - 2, 3>(), col);
+
+        out.template ref<scale - 1, 2>() = col;
+        out.template ref<scale - 1, 3>() = col;
+    }
+
+    template <class OutputMatrix>
+    static void blendLineSteep(uint32_t col, OutputMatrix& out)
+    {
+        alphaGrad<1, 4>(out.template ref<0, scale - 1>(), col);
+        alphaGrad<1, 4>(out.template ref<2, scale - 2>(), col);
+
+        alphaGrad<3, 4>(out.template ref<1, scale - 1>(), col);
+        alphaGrad<3, 4>(out.template ref<3, scale - 2>(), col);
+
+        out.template ref<2, scale - 1>() = col;
+        out.template ref<3, scale - 1>() = col;
+    }
+
+    template <class OutputMatrix>
+    static void blendLineSteepAndShallow(uint32_t col, OutputMatrix& out)
+    {
+        alphaGrad<3, 4>(out.template ref<3, 1>(), col);
+        alphaGrad<3, 4>(out.template ref<1, 3>(), col);
+        alphaGrad<1, 4>(out.template ref<3, 0>(), col);
+        alphaGrad<1, 4>(out.template ref<0, 3>(), col);
+
+        alphaGrad<1, 3>(out.template ref<2, 2>(), col); //[!] fixes 1/4 used in xBR
+
+        out.template ref<3, 3>() = col;
+        out.template ref<3, 2>() = col;
+        out.template ref<2, 3>() = col;
+    }
+
+    template <class OutputMatrix>
+    static void blendLineDiagonal(uint32_t col, OutputMatrix& out)
+    {
+        alphaGrad<1, 2>(out.template ref<scale - 1, scale / 2    >(), col);
+        alphaGrad<1, 2>(out.template ref<scale - 2, scale / 2 + 1>(), col);
+        out.template ref<scale - 1, scale - 1>() = col;
+    }
+
+    template <class OutputMatrix>
+    static void blendCorner(uint32_t col, OutputMatrix& out)
+    {
+        //model a round corner
+        alphaGrad<68, 100>(out.template ref<3, 3>(), col); //exact: 0.6848532563
+        alphaGrad< 9, 100>(out.template ref<3, 2>(), col); //0.08677704501
+        alphaGrad< 9, 100>(out.template ref<2, 3>(), col); //0.08677704501
+    }
+};
+
+
+template <class ColorGradient>
+struct Scaler5x : public ColorGradient
+{
+    static const int scale = 5;
+
+    template <unsigned int M, unsigned int N> //bring template function into scope for GCC
+    static void alphaGrad(uint32_t& pixBack, uint32_t pixFront) { ColorGradient::template alphaGrad<M, N>(pixBack, pixFront); }
+
+
+    template <class OutputMatrix>
+    static void blendLineShallow(uint32_t col, OutputMatrix& out)
+    {
+        alphaGrad<1, 4>(out.template ref<scale - 1, 0>(), col);
+        alphaGrad<1, 4>(out.template ref<scale - 2, 2>(), col);
+        alphaGrad<1, 4>(out.template ref<scale - 3, 4>(), col);
+
+        alphaGrad<3, 4>(out.template ref<scale - 1, 1>(), col);
+        alphaGrad<3, 4>(out.template ref<scale - 2, 3>(), col);
+
+        out.template ref<scale - 1, 2>() = col;
+        out.template ref<scale - 1, 3>() = col;
+        out.template ref<scale - 1, 4>() = col;
+        out.template ref<scale - 2, 4>() = col;
+    }
+
+    template <class OutputMatrix>
+    static void blendLineSteep(uint32_t col, OutputMatrix& out)
+    {
+        alphaGrad<1, 4>(out.template ref<0, scale - 1>(), col);
+        alphaGrad<1, 4>(out.template ref<2, scale - 2>(), col);
+        alphaGrad<1, 4>(out.template ref<4, scale - 3>(), col);
+
+        alphaGrad<3, 4>(out.template ref<1, scale - 1>(), col);
+        alphaGrad<3, 4>(out.template ref<3, scale - 2>(), col);
+
+        out.template ref<2, scale - 1>() = col;
+        out.template ref<3, scale - 1>() = col;
+        out.template ref<4, scale - 1>() = col;
+        out.template ref<4, scale - 2>() = col;
+    }
+
+    template <class OutputMatrix>
+    static void blendLineSteepAndShallow(uint32_t col, OutputMatrix& out)
+    {
+        alphaGrad<1, 4>(out.template ref<0, scale - 1>(), col);
+        alphaGrad<1, 4>(out.template ref<2, scale - 2>(), col);
+        alphaGrad<3, 4>(out.template ref<1, scale - 1>(), col);
+
+        alphaGrad<1, 4>(out.template ref<scale - 1, 0>(), col);
+        alphaGrad<1, 4>(out.template ref<scale - 2, 2>(), col);
+        alphaGrad<3, 4>(out.template ref<scale - 1, 1>(), col);
+
+        alphaGrad<2, 3>(out.template ref<3, 3>(), col);
+
+        out.template ref<2, scale - 1>() = col;
+        out.template ref<3, scale - 1>() = col;
+        out.template ref<4, scale - 1>() = col;
+
+        out.template ref<scale - 1, 2>() = col;
+        out.template ref<scale - 1, 3>() = col;
+    }
+
+    template <class OutputMatrix>
+    static void blendLineDiagonal(uint32_t col, OutputMatrix& out)
+    {
+        alphaGrad<1, 8>(out.template ref<scale - 1, scale / 2    >(), col); //conflict with other rotations for this odd scale
+        alphaGrad<1, 8>(out.template ref<scale - 2, scale / 2 + 1>(), col);
+        alphaGrad<1, 8>(out.template ref<scale - 3, scale / 2 + 2>(), col); //
+
+        alphaGrad<7, 8>(out.template ref<4, 3>(), col);
+        alphaGrad<7, 8>(out.template ref<3, 4>(), col);
+
+        out.template ref<4, 4>() = col;
+    }
+
+    template <class OutputMatrix>
+    static void blendCorner(uint32_t col, OutputMatrix& out)
+    {
+        //model a round corner
+        alphaGrad<86, 100>(out.template ref<4, 4>(), col); //exact: 0.8631434088
+        alphaGrad<23, 100>(out.template ref<4, 3>(), col); //0.2306749731
+        alphaGrad<23, 100>(out.template ref<3, 4>(), col); //0.2306749731
+        //alphaGrad<1, 64>(out.template ref<4, 2>(), col); //0.01676812367 -> negligible + avoid conflicts with other rotations for this odd scale
+        //alphaGrad<1, 64>(out.template ref<2, 4>(), col); //0.01676812367
+    }
+};
+
+
+template <class ColorGradient>
+struct Scaler6x : public ColorGradient
+{
+    static const int scale = 6;
+
+    template <unsigned int M, unsigned int N> //bring template function into scope for GCC
+    static void alphaGrad(uint32_t& pixBack, uint32_t pixFront) { ColorGradient::template alphaGrad<M, N>(pixBack, pixFront); }
+
+
+    template <class OutputMatrix>
+    static void blendLineShallow(uint32_t col, OutputMatrix& out)
+    {
+        alphaGrad<1, 4>(out.template ref<scale - 1, 0>(), col);
+        alphaGrad<1, 4>(out.template ref<scale - 2, 2>(), col);
+        alphaGrad<1, 4>(out.template ref<scale - 3, 4>(), col);
+
+        alphaGrad<3, 4>(out.template ref<scale - 1, 1>(), col);
+        alphaGrad<3, 4>(out.template ref<scale - 2, 3>(), col);
+        alphaGrad<3, 4>(out.template ref<scale - 3, 5>(), col);
+
+        out.template ref<scale - 1, 2>() = col;
+        out.template ref<scale - 1, 3>() = col;
+        out.template ref<scale - 1, 4>() = col;
+        out.template ref<scale - 1, 5>() = col;
+
+        out.template ref<scale - 2, 4>() = col;
+        out.template ref<scale - 2, 5>() = col;
+    }
+
+    template <class OutputMatrix>
+    static void blendLineSteep(uint32_t col, OutputMatrix& out)
+    {
+        alphaGrad<1, 4>(out.template ref<0, scale - 1>(), col);
+        alphaGrad<1, 4>(out.template ref<2, scale - 2>(), col);
+        alphaGrad<1, 4>(out.template ref<4, scale - 3>(), col);
+
+        alphaGrad<3, 4>(out.template ref<1, scale - 1>(), col);
+        alphaGrad<3, 4>(out.template ref<3, scale - 2>(), col);
+        alphaGrad<3, 4>(out.template ref<5, scale - 3>(), col);
+
+        out.template ref<2, scale - 1>() = col;
+        out.template ref<3, scale - 1>() = col;
+        out.template ref<4, scale - 1>() = col;
+        out.template ref<5, scale - 1>() = col;
+
+        out.template ref<4, scale - 2>() = col;
+        out.template ref<5, scale - 2>() = col;
+    }
+
+    template <class OutputMatrix>
+    static void blendLineSteepAndShallow(uint32_t col, OutputMatrix& out)
+    {
+        alphaGrad<1, 4>(out.template ref<0, scale - 1>(), col);
+        alphaGrad<1, 4>(out.template ref<2, scale - 2>(), col);
+        alphaGrad<3, 4>(out.template ref<1, scale - 1>(), col);
+        alphaGrad<3, 4>(out.template ref<3, scale - 2>(), col);
+
+        alphaGrad<1, 4>(out.template ref<scale - 1, 0>(), col);
+        alphaGrad<1, 4>(out.template ref<scale - 2, 2>(), col);
+        alphaGrad<3, 4>(out.template ref<scale - 1, 1>(), col);
+        alphaGrad<3, 4>(out.template ref<scale - 2, 3>(), col);
+
+        out.template ref<2, scale - 1>() = col;
+        out.template ref<3, scale - 1>() = col;
+        out.template ref<4, scale - 1>() = col;
+        out.template ref<5, scale - 1>() = col;
+
+        out.template ref<4, scale - 2>() = col;
+        out.template ref<5, scale - 2>() = col;
+
+        out.template ref<scale - 1, 2>() = col;
+        out.template ref<scale - 1, 3>() = col;
+    }
+
+    template <class OutputMatrix>
+    static void blendLineDiagonal(uint32_t col, OutputMatrix& out)
+    {
+        alphaGrad<1, 2>(out.template ref<scale - 1, scale / 2    >(), col);
+        alphaGrad<1, 2>(out.template ref<scale - 2, scale / 2 + 1>(), col);
+        alphaGrad<1, 2>(out.template ref<scale - 3, scale / 2 + 2>(), col);
+
+        out.template ref<scale - 2, scale - 1>() = col;
+        out.template ref<scale - 1, scale - 1>() = col;
+        out.template ref<scale - 1, scale - 2>() = col;
+    }
+
+    template <class OutputMatrix>
+    static void blendCorner(uint32_t col, OutputMatrix& out)
+    {
+        //model a round corner
+        alphaGrad<97, 100>(out.template ref<5, 5>(), col); //exact: 0.9711013910
+        alphaGrad<42, 100>(out.template ref<4, 5>(), col); //0.4236372243
+        alphaGrad<42, 100>(out.template ref<5, 4>(), col); //0.4236372243
+        alphaGrad< 6, 100>(out.template ref<5, 3>(), col); //0.05652034508
+        alphaGrad< 6, 100>(out.template ref<3, 5>(), col); //0.05652034508
+    }
+};
+
+//------------------------------------------------------------------------------------
+
+struct ColorDistanceRGB
+{
+    static double dist(uint32_t pix1, uint32_t pix2, double luminanceWeight)
+    {
+        return distYCbCrBuffered(pix1, pix2);
+
+        //if (pix1 == pix2) //about 4% perf boost
+        //    return 0;
+        //return distYCbCr(pix1, pix2, luminanceWeight);
+    }
+};
+
+struct ColorDistanceARGB
+{
+    static double dist(uint32_t pix1, uint32_t pix2, double luminanceWeight)
+    {
+        const double a1 = getAlpha(pix1) / 255.0 ;
+        const double a2 = getAlpha(pix2) / 255.0 ;
+        /*
+        Requirements for a color distance handling alpha channel: with a1, a2 in [0, 1]
+
+            1. if a1 = a2, distance should be: a1 * distYCbCr()
+            2. if a1 = 0,  distance should be: a2 * distYCbCr(black, white) = a2 * 255
+            3. if a1 = 1,  ??? maybe: 255 * (1 - a2) + a2 * distYCbCr()
+        */
+
+        //return std::min(a1, a2) * distYCbCrBuffered(pix1, pix2) + 255 * abs(a1 - a2);
+        //=> following code is 15% faster:
+        const double d = distYCbCrBuffered(pix1, pix2);
+        if (a1 < a2)
+            return a1 * d + 255 * (a2 - a1);
+        else
+            return a2 * d + 255 * (a1 - a2);
+
+        //alternative? return std::sqrt(a1 * a2 * square(distYCbCrBuffered(pix1, pix2)) + square(255 * (a1 - a2)));
+    }
+};
+
+
+struct ColorDistanceUnbufferedARGB
+{
+    static double dist(uint32_t pix1, uint32_t pix2, double luminanceWeight)
+    {
+        const double a1 = getAlpha(pix1) / 255.0 ;
+        const double a2 = getAlpha(pix2) / 255.0 ;
+
+        const double d = distYCbCr(pix1, pix2, luminanceWeight);
+        if (a1 < a2)
+            return a1 * d + 255 * (a2 - a1);
+        else
+            return a2 * d + 255 * (a1 - a2);
+    }
+};
+
+
+struct ColorGradientRGB
+{
+    template <unsigned int M, unsigned int N>
+    static void alphaGrad(uint32_t& pixBack, uint32_t pixFront)
+    {
+        pixBack = gradientRGB<M, N>(pixFront, pixBack);
+    }
+};
+
+struct ColorGradientARGB
+{
+    template <unsigned int M, unsigned int N>
+    static void alphaGrad(uint32_t& pixBack, uint32_t pixFront)
+    {
+        pixBack = gradientARGB<M, N>(pixFront, pixBack);
+    }
+};
+}
+
+
+void xbrz::scale(size_t factor, const uint32_t* src, uint32_t* trg, int srcWidth, int srcHeight, ColorFormat colFmt, const xbrz::ScalerCfg& cfg, int yFirst, int yLast)
+{
+    if (factor == 1)
+    {
+        std::copy(src + yFirst * srcWidth, src + yLast * srcWidth, trg);
+        return;
+    }
+
+    static_assert(SCALE_FACTOR_MAX == 6);
+    switch (colFmt)
+    {
+        case ColorFormat::RGB:
+            switch (factor)
+            {
+                case 2:
+                    return scaleImage<Scaler2x<ColorGradientRGB>, ColorDistanceRGB, OobReaderDuplicate>(src, trg, srcWidth, srcHeight, cfg, yFirst, yLast);
+                case 3:
+                    return scaleImage<Scaler3x<ColorGradientRGB>, ColorDistanceRGB, OobReaderDuplicate>(src, trg, srcWidth, srcHeight, cfg, yFirst, yLast);
+                case 4:
+                    return scaleImage<Scaler4x<ColorGradientRGB>, ColorDistanceRGB, OobReaderDuplicate>(src, trg, srcWidth, srcHeight, cfg, yFirst, yLast);
+                case 5:
+                    return scaleImage<Scaler5x<ColorGradientRGB>, ColorDistanceRGB, OobReaderDuplicate>(src, trg, srcWidth, srcHeight, cfg, yFirst, yLast);
+                case 6:
+                    return scaleImage<Scaler6x<ColorGradientRGB>, ColorDistanceRGB, OobReaderDuplicate>(src, trg, srcWidth, srcHeight, cfg, yFirst, yLast);
+            }
+            break;
+
+        case ColorFormat::ARGB:
+            switch (factor)
+            {
+                case 2:
+                    return scaleImage<Scaler2x<ColorGradientARGB>, ColorDistanceARGB, OobReaderTransparent>(src, trg, srcWidth, srcHeight, cfg, yFirst, yLast);
+                case 3:
+                    return scaleImage<Scaler3x<ColorGradientARGB>, ColorDistanceARGB, OobReaderTransparent>(src, trg, srcWidth, srcHeight, cfg, yFirst, yLast);
+                case 4:
+                    return scaleImage<Scaler4x<ColorGradientARGB>, ColorDistanceARGB, OobReaderTransparent>(src, trg, srcWidth, srcHeight, cfg, yFirst, yLast);
+                case 5:
+                    return scaleImage<Scaler5x<ColorGradientARGB>, ColorDistanceARGB, OobReaderTransparent>(src, trg, srcWidth, srcHeight, cfg, yFirst, yLast);
+                case 6:
+                    return scaleImage<Scaler6x<ColorGradientARGB>, ColorDistanceARGB, OobReaderTransparent>(src, trg, srcWidth, srcHeight, cfg, yFirst, yLast);
+            }
+            break;
+
+        case ColorFormat::ARGB_UNBUFFERED:
+            switch (factor)
+            {
+                case 2:
+                    return scaleImage<Scaler2x<ColorGradientARGB>, ColorDistanceUnbufferedARGB, OobReaderTransparent>(src, trg, srcWidth, srcHeight, cfg, yFirst, yLast);
+                case 3:
+                    return scaleImage<Scaler3x<ColorGradientARGB>, ColorDistanceUnbufferedARGB, OobReaderTransparent>(src, trg, srcWidth, srcHeight, cfg, yFirst, yLast);
+                case 4:
+                    return scaleImage<Scaler4x<ColorGradientARGB>, ColorDistanceUnbufferedARGB, OobReaderTransparent>(src, trg, srcWidth, srcHeight, cfg, yFirst, yLast);
+                case 5:
+                    return scaleImage<Scaler5x<ColorGradientARGB>, ColorDistanceUnbufferedARGB, OobReaderTransparent>(src, trg, srcWidth, srcHeight, cfg, yFirst, yLast);
+                case 6:
+                    return scaleImage<Scaler6x<ColorGradientARGB>, ColorDistanceUnbufferedARGB, OobReaderTransparent>(src, trg, srcWidth, srcHeight, cfg, yFirst, yLast);
+            }
+            break;
+    }
+    assert(false);
+}
+
+
+bool xbrz::equalColorTest(uint32_t col1, uint32_t col2, ColorFormat colFmt, double luminanceWeight, double equalColorTolerance)
+{
+    switch (colFmt)
+    {
+        case ColorFormat::RGB:
+            return ColorDistanceRGB::dist(col1, col2, luminanceWeight) < equalColorTolerance;
+        case ColorFormat::ARGB:
+            return ColorDistanceARGB::dist(col1, col2, luminanceWeight) < equalColorTolerance;
+        case ColorFormat::ARGB_UNBUFFERED:
+            return ColorDistanceUnbufferedARGB::dist(col1, col2, luminanceWeight) < equalColorTolerance;
+    }
+    assert(false);
+    return false;
+}
+
+
+void xbrz::bilinearScale(const uint32_t* src, int srcWidth, int srcHeight,
+                         /**/  uint32_t* trg, int trgWidth, int trgHeight)
+{
+    bilinearScale(src, srcWidth, srcHeight, srcWidth * sizeof(uint32_t),
+                  trg, trgWidth, trgHeight, trgWidth * sizeof(uint32_t),
+    0, trgHeight, [](uint32_t pix) { return pix; });
+}
+
+
+void xbrz::nearestNeighborScale(const uint32_t* src, int srcWidth, int srcHeight,
+                                /**/  uint32_t* trg, int trgWidth, int trgHeight)
+{
+    nearestNeighborScale(src, srcWidth, srcHeight, srcWidth * sizeof(uint32_t),
+                         trg, trgWidth, trgHeight, trgWidth * sizeof(uint32_t),
+    0, trgHeight, [](uint32_t pix) { return pix; });
+}
+
+
+#if 0
+//#include <ppl.h>
+void bilinearScaleCpu(const uint32_t* src, int srcWidth, int srcHeight,
+                      /**/  uint32_t* trg, int trgWidth, int trgHeight)
+{
+    const int TASK_GRANULARITY = 16;
+
+    concurrency::task_group tg;
+
+    for (int i = 0; i < trgHeight; i += TASK_GRANULARITY)
+        tg.run([=]
+    {
+        const int iLast = std::min(i + TASK_GRANULARITY, trgHeight);
+        xbrz::bilinearScale(src, srcWidth, srcHeight, srcWidth * sizeof(uint32_t),
+                            trg, trgWidth, trgHeight, trgWidth * sizeof(uint32_t),
+        i, iLast, [](uint32_t pix) { return pix; });
+    });
+    tg.wait();
+}
+
+
+//Perf: AMP vs CPU: merely ~10% shorter runtime (scaling 1280x800 -> 1920x1080)
+//#include <amp.h>
+void bilinearScaleAmp(const uint32_t* src, int srcWidth, int srcHeight, //throw concurrency::runtime_exception
+                      /**/  uint32_t* trg, int trgWidth, int trgHeight)
+{
+    //C++ AMP reference:       https://msdn.microsoft.com/en-us/library/hh289390.aspx
+    //introduction to C++ AMP: https://msdn.microsoft.com/en-us/magazine/hh882446.aspx
+    using namespace concurrency;
+    //TODO: pitch
+
+    if (srcHeight <= 0 || srcWidth <= 0) return;
+
+    const float scaleX = static_cast<float>(trgWidth ) / srcWidth;
+    const float scaleY = static_cast<float>(trgHeight) / srcHeight;
+
+    array_view<const uint32_t, 2> srcView(srcHeight, srcWidth, src);
+    array_view<      uint32_t, 2> trgView(trgHeight, trgWidth, trg);
+    trgView.discard_data();
+
+    parallel_for_each(trgView.extent, [=](index<2> idx) restrict(amp) //throw ?
+    {
+        const int y = idx[0];
+        const int x = idx[1];
+        //Perf notes:
+        //    -> float-based calculation is (almost) 2x as fas as double!
+        //    -> no noticeable improvement via tiling: https://msdn.microsoft.com/en-us/magazine/hh882447.aspx
+        //    -> no noticeable improvement with restrict(amp,cpu)
+        //    -> iterating over y-axis only is significantly slower!
+        //    -> pre-calculating x,y-dependent variables in a buffer + array_view<> is ~ 20 % slower!
+        const int y1 = srcHeight * y / trgHeight;
+        int y2 = y1 + 1;
+        if (y2 == srcHeight) --y2;
+
+        const float yy1 = y / scaleY - y1;
+        const float y2y = 1 - yy1;
+        //-------------------------------------
+        const int x1 = srcWidth * x / trgWidth;
+        int x2 = x1 + 1;
+        if (x2 == srcWidth) --x2;
+
+        const float xx1 = x / scaleX - x1;
+        const float x2x = 1 - xx1;
+        //-------------------------------------
+        const float x2xy2y = x2x * y2y;
+        const float xx1y2y = xx1 * y2y;
+        const float x2xyy1 = x2x * yy1;
+        const float xx1yy1 = xx1 * yy1;
+
+        auto interpolate = [=](int offset)
+        {
+            /*
+                https://en.wikipedia.org/wiki/Bilinear_interpolation
+                (c11(x2 - x) + c21(x - x1)) * (y2 - y ) +
+                (c12(x2 - x) + c22(x - x1)) * (y  - y1)
+            */
+            const auto c11 = (srcView(y1, x1) >> (8 * offset)) & 0xff;
+            const auto c21 = (srcView(y1, x2) >> (8 * offset)) & 0xff;
+            const auto c12 = (srcView(y2, x1) >> (8 * offset)) & 0xff;
+            const auto c22 = (srcView(y2, x2) >> (8 * offset)) & 0xff;
+
+            return c11 * x2xy2y + c21 * xx1y2y +
+                   c12 * x2xyy1 + c22 * xx1yy1;
+        };
+
+        const float bi = interpolate(0);
+        const float gi = interpolate(1);
+        const float ri = interpolate(2);
+        const float ai = interpolate(3);
+
+        const auto b = static_cast<uint32_t>(bi + 0.5f);
+        const auto g = static_cast<uint32_t>(gi + 0.5f);
+        const auto r = static_cast<uint32_t>(ri + 0.5f);
+        const auto a = static_cast<uint32_t>(ai + 0.5f);
+
+        trgView(y, x) = (a << 24) | (r << 16) | (g << 8) | b;
+    });
+    trgView.synchronize(); //throw ?
+}
+#endif

+ 79 - 0
client/xBRZ/xbrz.h

@@ -0,0 +1,79 @@
+// ****************************************************************************
+// * This file is part of the xBRZ project. It is distributed under           *
+// * GNU General Public License: https://www.gnu.org/licenses/gpl-3.0         *
+// * Copyright (C) Zenju (zenju AT gmx DOT de) - All Rights Reserved          *
+// *                                                                          *
+// * Additionally and as a special exception, the author gives permission     *
+// * to link the code of this program with the following libraries            *
+// * (or with modified versions that use the same licenses), and distribute   *
+// * linked combinations including the two: MAME, FreeFileSync, Snes9x, ePSXe *
+// * You must obey the GNU General Public License in all respects for all of  *
+// * the code used other than MAME, FreeFileSync, Snes9x, ePSXe.              *
+// * If you modify this file, you may extend this exception to your version   *
+// * of the file, but you are not obligated to do so. If you do not wish to   *
+// * do so, delete this exception statement from your version.                *
+// ****************************************************************************
+
+#ifndef XBRZ_HEADER_3847894708239054
+#define XBRZ_HEADER_3847894708239054
+
+#include <cstddef> //size_t
+#include <cstdint> //uint32_t
+#include <limits>
+#include "xbrz_config.h"
+
+
+namespace xbrz
+{
+/*
+-------------------------------------------------------------------------
+| xBRZ: "Scale by rules" - high quality image upscaling filter by Zenju |
+-------------------------------------------------------------------------
+using a modified approach of xBR:
+http://board.byuu.org/viewtopic.php?f=10&t=2248
+- new rule set preserving small image features
+- highly optimized for performance
+- support alpha channel
+- support multithreading
+- support 64-bit architectures
+- support processing image slices
+- support scaling up to 6xBRZ
+*/
+
+enum class ColorFormat //from high bits -> low bits, 8 bit per channel
+{
+    RGB,  //8 bit for each red, green, blue, upper 8 bits unused
+    ARGB, //including alpha channel, BGRA byte order on little-endian machines
+    ARGB_UNBUFFERED, //like ARGB, but without the one-time buffer creation overhead (ca. 100 - 300 ms) at the expense of a slightly slower scaling time
+};
+
+const int SCALE_FACTOR_MAX = 6;
+
+/*
+-> map source (srcWidth * srcHeight) to target (scale * width x scale * height) image, optionally processing a half-open slice of rows [yFirst, yLast) only
+-> if your emulator changes only a few image slices during each cycle (e.g. DOSBox) then there's no need to run xBRZ on the complete image:
+   Just make sure you enlarge the source image slice by 2 rows on top and 2 on bottom (this is the additional range the xBRZ algorithm is using during analysis)
+   CAVEAT: If there are multiple changed slices, make sure they do not overlap after adding these additional rows in order to avoid a memory race condition
+   in the target image data if you are using multiple threads for processing each enlarged slice!
+
+THREAD-SAFETY: - parts of the same image may be scaled by multiple threads as long as the [yFirst, yLast) ranges do not overlap!
+               - there is a minor inefficiency for the first row of a slice, so avoid processing single rows only; suggestion: process at least 8-16 rows
+*/
+void scale(size_t factor, //valid range: 2 - SCALE_FACTOR_MAX
+           const uint32_t* src, uint32_t* trg, int srcWidth, int srcHeight,
+           ColorFormat colFmt,
+           const ScalerCfg& cfg = ScalerCfg(),
+           int yFirst = 0, int yLast = std::numeric_limits<int>::max()); //slice of source image
+
+void bilinearScale(const uint32_t* src, int srcWidth, int srcHeight,
+                   /**/  uint32_t* trg, int trgWidth, int trgHeight);
+
+void nearestNeighborScale(const uint32_t* src, int srcWidth, int srcHeight,
+                          /**/  uint32_t* trg, int trgWidth, int trgHeight);
+
+
+//parameter tuning
+bool equalColorTest(uint32_t col1, uint32_t col2, ColorFormat colFmt, double luminanceWeight, double equalColorTolerance);
+}
+
+#endif

+ 35 - 0
client/xBRZ/xbrz_config.h

@@ -0,0 +1,35 @@
+// ****************************************************************************
+// * This file is part of the xBRZ project. It is distributed under           *
+// * GNU General Public License: https://www.gnu.org/licenses/gpl-3.0         *
+// * Copyright (C) Zenju (zenju AT gmx DOT de) - All Rights Reserved          *
+// *                                                                          *
+// * Additionally and as a special exception, the author gives permission     *
+// * to link the code of this program with the following libraries            *
+// * (or with modified versions that use the same licenses), and distribute   *
+// * linked combinations including the two: MAME, FreeFileSync, Snes9x, ePSXe *
+// * You must obey the GNU General Public License in all respects for all of  *
+// * the code used other than MAME, FreeFileSync, Snes9x, ePSXe.              *
+// * If you modify this file, you may extend this exception to your version   *
+// * of the file, but you are not obligated to do so. If you do not wish to   *
+// * do so, delete this exception statement from your version.                *
+// ****************************************************************************
+
+#ifndef XBRZ_CONFIG_HEADER_284578425345
+#define XBRZ_CONFIG_HEADER_284578425345
+
+//do NOT include any headers here! used by xBRZ_dll!!!
+
+namespace xbrz
+{
+struct ScalerCfg
+{
+    double luminanceWeight            = 1;
+    double equalColorTolerance        = 30;
+    double centerDirectionBias        = 4;
+    double dominantDirectionThreshold = 3.6;
+    double steepDirectionThreshold    = 2.2;
+    double newTestAttribute           = 0; //unused; test new parameters
+};
+}
+
+#endif

+ 266 - 0
client/xBRZ/xbrz_tools.h

@@ -0,0 +1,266 @@
+// ****************************************************************************
+// * This file is part of the xBRZ project. It is distributed under           *
+// * GNU General Public License: https://www.gnu.org/licenses/gpl-3.0         *
+// * Copyright (C) Zenju (zenju AT gmx DOT de) - All Rights Reserved          *
+// *                                                                          *
+// * Additionally and as a special exception, the author gives permission     *
+// * to link the code of this program with the following libraries            *
+// * (or with modified versions that use the same licenses), and distribute   *
+// * linked combinations including the two: MAME, FreeFileSync, Snes9x, ePSXe *
+// * You must obey the GNU General Public License in all respects for all of  *
+// * the code used other than MAME, FreeFileSync, Snes9x, ePSXe.              *
+// * If you modify this file, you may extend this exception to your version   *
+// * of the file, but you are not obligated to do so. If you do not wish to   *
+// * do so, delete this exception statement from your version.                *
+// ****************************************************************************
+
+#ifndef XBRZ_TOOLS_H_825480175091875
+#define XBRZ_TOOLS_H_825480175091875
+
+#include <cassert>
+#include <algorithm>
+#include <type_traits>
+
+
+namespace xbrz
+{
+template <uint32_t N> inline
+unsigned char getByte(uint32_t val) { return static_cast<unsigned char>((val >> (8 * N)) & 0xff); }
+
+inline unsigned char getAlpha(uint32_t pix) { return getByte<3>(pix); }
+inline unsigned char getRed  (uint32_t pix) { return getByte<2>(pix); }
+inline unsigned char getGreen(uint32_t pix) { return getByte<1>(pix); }
+inline unsigned char getBlue (uint32_t pix) { return getByte<0>(pix); }
+
+inline uint32_t makePixel(unsigned char a, unsigned char r, unsigned char g, unsigned char b) { return (a << 24) | (r << 16) | (g << 8) | b; }
+inline uint32_t makePixel(                 unsigned char r, unsigned char g, unsigned char b) { return             (r << 16) | (g << 8) | b; }
+
+inline uint32_t rgb555to888(uint16_t pix) { return ((pix & 0x7C00) << 9) | ((pix & 0x03E0) << 6) | ((pix & 0x001F) << 3); }
+inline uint32_t rgb565to888(uint16_t pix) { return ((pix & 0xF800) << 8) | ((pix & 0x07E0) << 5) | ((pix & 0x001F) << 3); }
+
+inline uint16_t rgb888to555(uint32_t pix) { return static_cast<uint16_t>(((pix & 0xF80000) >> 9) | ((pix & 0x00F800) >> 6) | ((pix & 0x0000F8) >> 3)); }
+inline uint16_t rgb888to565(uint32_t pix) { return static_cast<uint16_t>(((pix & 0xF80000) >> 8) | ((pix & 0x00FC00) >> 5) | ((pix & 0x0000F8) >> 3)); }
+
+
+template <class Pix> inline
+Pix* byteAdvance(Pix* ptr, int bytes)
+{
+    using PixNonConst = typename std::remove_cv<Pix>::type;
+    using PixByte     = typename std::conditional<std::is_same<Pix, PixNonConst>::value, char, const char>::type;
+
+    static_assert(std::is_integral<PixNonConst>::value, "Pix* is expected to be cast-able to char*");
+
+    return reinterpret_cast<Pix*>(reinterpret_cast<PixByte*>(ptr) + bytes);
+}
+
+
+//fill block  with the given color
+template <class Pix> inline
+void fillBlock(Pix* trg, int pitch /*[bytes]*/, Pix col, int blockWidth, int blockHeight)
+{
+    //for (int y = 0; y < blockHeight; ++y, trg = byteAdvance(trg, pitch))
+    //    std::fill(trg, trg + blockWidth, col);
+
+    for (int y = 0; y < blockHeight; ++y, trg = byteAdvance(trg, pitch))
+        for (int x = 0; x < blockWidth; ++x)
+            trg[x] = col;
+}
+
+
+//nearest-neighbor (going over target image - slow for upscaling, since source is read multiple times missing out on cache! Fast for similar image sizes!)
+template <class PixSrc, class PixTrg, class PixConverter>
+void nearestNeighborScale(const PixSrc* src, int srcWidth, int srcHeight, int srcPitch /*[bytes]*/,
+                          /**/  PixTrg* trg, int trgWidth, int trgHeight, int trgPitch /*[bytes]*/,
+                          int yFirst, int yLast, PixConverter pixCvrt /*convert PixSrc to PixTrg*/)
+{
+    static_assert(std::is_integral<PixSrc>::value, "PixSrc* is expected to be cast-able to char*");
+    static_assert(std::is_integral<PixTrg>::value, "PixTrg* is expected to be cast-able to char*");
+
+    static_assert(std::is_same<decltype(pixCvrt(PixSrc())), PixTrg>::value, "PixConverter returning wrong pixel format");
+
+    if (srcPitch < srcWidth * static_cast<int>(sizeof(PixSrc))  ||
+        trgPitch < trgWidth * static_cast<int>(sizeof(PixTrg)))
+    {
+        assert(false);
+        return;
+    }
+
+    yFirst = std::max(yFirst, 0);
+    yLast  = std::min(yLast, trgHeight);
+    if (yFirst >= yLast || srcHeight <= 0 || srcWidth <= 0) return;
+
+    for (int y = yFirst; y < yLast; ++y)
+    {
+        const int ySrc = srcHeight * y / trgHeight;
+        const PixSrc* const srcLine = byteAdvance(src, ySrc * srcPitch);
+        PixTrg*       const trgLine = byteAdvance(trg, y    * trgPitch);
+
+        for (int x = 0; x < trgWidth; ++x)
+        {
+            const int xSrc = srcWidth * x / trgWidth;
+            trgLine[x] = pixCvrt(srcLine[xSrc]);
+        }
+    }
+}
+
+
+//nearest-neighbor (going over source image - fast for upscaling, since source is read only once
+template <class PixSrc, class PixTrg, class PixConverter>
+void nearestNeighborScaleOverSource(const PixSrc* src, int srcWidth, int srcHeight, int srcPitch /*[bytes]*/,
+                                    /**/  PixTrg* trg, int trgWidth, int trgHeight, int trgPitch /*[bytes]*/,
+                                    int yFirst, int yLast, PixConverter pixCvrt /*convert PixSrc to PixTrg*/)
+{
+    static_assert(std::is_integral<PixSrc>::value, "PixSrc* is expected to be cast-able to char*");
+    static_assert(std::is_integral<PixTrg>::value, "PixTrg* is expected to be cast-able to char*");
+
+    static_assert(std::is_same<decltype(pixCvrt(PixSrc())), PixTrg>::value, "PixConverter returning wrong pixel format");
+
+    if (srcPitch < srcWidth * static_cast<int>(sizeof(PixSrc))  ||
+        trgPitch < trgWidth * static_cast<int>(sizeof(PixTrg)))
+    {
+        assert(false);
+        return;
+    }
+
+    yFirst = std::max(yFirst, 0);
+    yLast  = std::min(yLast, srcHeight);
+    if (yFirst >= yLast || trgWidth <= 0 || trgHeight <= 0) return;
+
+    for (int y = yFirst; y < yLast; ++y)
+    {
+        //mathematically: ySrc = floor(srcHeight * yTrg / trgHeight)
+        // => search for integers in: [ySrc, ySrc + 1) * trgHeight / srcHeight
+
+        //keep within for loop to support MT input slices!
+        const int yTrgFirst = ( y      * trgHeight + srcHeight - 1) / srcHeight; //=ceil(y * trgHeight / srcHeight)
+        const int yTrgLast  = ((y + 1) * trgHeight + srcHeight - 1) / srcHeight; //=ceil(((y + 1) * trgHeight) / srcHeight)
+        const int blockHeight = yTrgLast - yTrgFirst;
+
+        if (blockHeight > 0)
+        {
+            const PixSrc* srcLine = byteAdvance(src, y         * srcPitch);
+            /**/  PixTrg* trgLine = byteAdvance(trg, yTrgFirst * trgPitch);
+            int xTrgFirst = 0;
+
+            for (int x = 0; x < srcWidth; ++x)
+            {
+                const int xTrgLast = ((x + 1) * trgWidth + srcWidth - 1) / srcWidth;
+                const int blockWidth = xTrgLast - xTrgFirst;
+                if (blockWidth > 0)
+                {
+                    xTrgFirst = xTrgLast;
+
+                    const auto trgPix = pixCvrt(srcLine[x]);
+                    fillBlock(trgLine, trgPitch, trgPix, blockWidth, blockHeight);
+                    trgLine += blockWidth;
+                }
+            }
+        }
+    }
+}
+
+
+template <class PixTrg, class PixConverter>
+void bilinearScale(const uint32_t* src, int srcWidth, int srcHeight, int srcPitch,
+                   /**/    PixTrg* trg, int trgWidth, int trgHeight, int trgPitch,
+                   int yFirst, int yLast, PixConverter pixCvrt /*convert uint32_t to PixTrg*/)
+{
+    static_assert(std::is_integral<PixTrg>::value,                            "PixTrg* is expected to be cast-able to char*");
+    static_assert(std::is_same<decltype(pixCvrt(uint32_t())), PixTrg>::value, "PixConverter returning wrong pixel format");
+
+    if (srcPitch < srcWidth * static_cast<int>(sizeof(uint32_t)) ||
+        trgPitch < trgWidth * static_cast<int>(sizeof(PixTrg)))
+    {
+        assert(false);
+        return;
+    }
+
+    yFirst = std::max(yFirst, 0);
+    yLast  = std::min(yLast, trgHeight);
+    if (yFirst >= yLast || srcHeight <= 0 || srcWidth <= 0) return;
+
+    const double scaleX = static_cast<double>(trgWidth ) / srcWidth;
+    const double scaleY = static_cast<double>(trgHeight) / srcHeight;
+
+    //perf notes:
+    //    -> double-based calculation is (slightly) faster than float
+    //    -> pre-calculation gives significant boost; std::vector<> memory allocation is negligible!
+    struct CoeffsX
+    {
+        int     x1 = 0;
+        int     x2 = 0;
+        double xx1 = 0;
+        double x2x = 0;
+    };
+    std::vector<CoeffsX> buf(trgWidth);
+    for (int x = 0; x < trgWidth; ++x)
+    {
+        const int x1 = srcWidth * x / trgWidth;
+        int x2 = x1 + 1;
+        if (x2 == srcWidth) --x2;
+
+        const double xx1 = x / scaleX - x1;
+        const double x2x = 1 - xx1;
+
+        buf[x] = { x1, x2, xx1, x2x };
+    }
+
+    for (int y = yFirst; y < yLast; ++y)
+    {
+        const int y1 = srcHeight * y / trgHeight;
+        int y2 = y1 + 1;
+        if (y2 == srcHeight) --y2;
+
+        const double yy1 = y / scaleY - y1;
+        const double y2y = 1 - yy1;
+
+        const uint32_t* const srcLine     = byteAdvance(src, y1 * srcPitch);
+        const uint32_t* const srcLineNext = byteAdvance(src, y2 * srcPitch);
+        PixTrg*         const trgLine     = byteAdvance(trg, y  * trgPitch);
+
+        for (int x = 0; x < trgWidth; ++x)
+        {
+            //perf: do NOT "simplify" the variable layout without measurement!
+            const int     x1 = buf[x].x1;
+            const int     x2 = buf[x].x2;
+            const double xx1 = buf[x].xx1;
+            const double x2x = buf[x].x2x;
+
+            const double x2xy2y = x2x * y2y;
+            const double xx1y2y = xx1 * y2y;
+            const double x2xyy1 = x2x * yy1;
+            const double xx1yy1 = xx1 * yy1;
+
+            auto interpolate = [=](int offset)
+            {
+                /* https://en.wikipedia.org/wiki/Bilinear_interpolation
+                     (c11(x2 - x) + c21(x - x1)) * (y2 - y ) +
+                     (c12(x2 - x) + c22(x - x1)) * (y  - y1)                          */
+                const auto c11 = (srcLine    [x1] >> (8 * offset)) & 0xff;
+                const auto c21 = (srcLine    [x2] >> (8 * offset)) & 0xff;
+                const auto c12 = (srcLineNext[x1] >> (8 * offset)) & 0xff;
+                const auto c22 = (srcLineNext[x2] >> (8 * offset)) & 0xff;
+
+                return c11 * x2xy2y + c21 * xx1y2y +
+                       c12 * x2xyy1 + c22 * xx1yy1;
+            };
+
+            const double bi = interpolate(0);
+            const double gi = interpolate(1);
+            const double ri = interpolate(2);
+            const double ai = interpolate(3);
+
+            const auto b = static_cast<uint32_t>(bi + 0.5);
+            const auto g = static_cast<uint32_t>(gi + 0.5);
+            const auto r = static_cast<uint32_t>(ri + 0.5);
+            const auto a = static_cast<uint32_t>(ai + 0.5);
+
+            const uint32_t trgPix = (a << 24) | (r << 16) | (g << 8) | b;
+
+            trgLine[x] = pixCvrt(trgPix);
+        }
+    }
+}
+}
+
+#endif //XBRZ_TOOLS_H_825480175091875

+ 8 - 2
config/schemas/settings.json

@@ -166,7 +166,8 @@
 				"showfps",
 				"targetfps",
 				"vsync",
-				"scalingMode"
+				"upscalingFilter",
+				"downscalingFilter"
 			],
 			"properties" : {
 				"resolution" : {
@@ -230,7 +231,12 @@
 					"type" : "boolean",
 					"default" : true
 				},
-				"scalingMode" : {
+				"upscalingFilter" : {
+					"type" : "string",
+					"enum" : [ "auto", "none", "xbrz2", "xbrz3", "xbrz4" ],
+					"default" : "auto"
+				},
+				"downscalingFilter" : {
 					"type" : "string",
 					"enum" : [ "nearest", "linear", "best" ],
 					"default" : "best"

+ 21 - 2
launcher/settingsView/csettingsview_moc.cpp

@@ -39,6 +39,15 @@ static constexpr std::array cursorTypesList =
 };
 
 static constexpr std::array upscalingFilterTypes =
+{
+	"auto",
+	"none",
+	"xbrz2",
+	"xbrz3",
+	"xbrz4"
+};
+
+static constexpr std::array downscalingFilterTypes =
 {
 	"nearest",
 	"linear",
@@ -138,10 +147,14 @@ void CSettingsView::loadSettings()
 	Languages::fillLanguages(ui->comboBoxLanguage, false);
 	fillValidRenderers();
 
-	std::string upscalingFilter = settings["video"]["scalingMode"].String();
+	std::string upscalingFilter = settings["video"]["upscalingFilter"].String();
 	int upscalingFilterIndex = vstd::find_pos(upscalingFilterTypes, upscalingFilter);
 	ui->comboBoxUpscalingFilter->setCurrentIndex(upscalingFilterIndex);
 
+	std::string downscalingFilter = settings["video"]["downscalingFilter"].String();
+	int downscalingFilterIndex = vstd::find_pos(downscalingFilterTypes, downscalingFilter);
+	ui->comboBoxDownscalingFilter->setCurrentIndex(downscalingFilterIndex);
+
 	ui->sliderMusicVolume->setValue(settings["general"]["music"].Integer());
 	ui->sliderSoundVolume->setValue(settings["general"]["sound"].Integer());
 	ui->sliderRelativeCursorSpeed->setValue(settings["general"]["relativePointerSpeedMultiplier"].Integer());
@@ -645,10 +658,16 @@ void CSettingsView::on_buttonIgnoreSslErrors_clicked(bool checked)
 
 void CSettingsView::on_comboBoxUpscalingFilter_currentIndexChanged(int index)
 {
-	Settings node = settings.write["video"]["scalingMode"];
+	Settings node = settings.write["video"]["upscalingFilter"];
 	node->String() = upscalingFilterTypes[index];
 }
 
+void CSettingsView::on_comboBoxDownscalingFilter_currentIndexChanged(int index)
+{
+	Settings node = settings.write["video"]["downscalingFilter"];
+	node->String() = downscalingFilterTypes[index];
+}
+
 void CSettingsView::on_sliderMusicVolume_valueChanged(int value)
 {
 	Settings node = settings.write["general"]["music"];

+ 1 - 0
launcher/settingsView/csettingsview_moc.h

@@ -67,6 +67,7 @@ private slots:
 
 	void on_buttonIgnoreSslErrors_clicked(bool checked);
 	void on_comboBoxUpscalingFilter_currentIndexChanged(int index);
+	void on_comboBoxDownscalingFilter_currentIndexChanged(int index);
 	void on_sliderMusicVolume_valueChanged(int value);
 	void on_sliderSoundVolume_valueChanged(int value);
 	void on_buttonRelativeCursorMode_toggled(bool value);

+ 42 - 14
launcher/settingsView/csettingsview_moc.ui

@@ -48,8 +48,8 @@
        <rect>
         <x>0</x>
         <y>0</y>
-        <width>730</width>
-        <height>1691</height>
+        <width>729</width>
+        <height>1396</height>
        </rect>
       </property>
       <layout class="QGridLayout" name="gridLayout" columnstretch="2,0,1,1,1">
@@ -57,7 +57,6 @@
         <widget class="QLabel" name="labelGeneral">
          <property name="font">
           <font>
-           <weight>75</weight>
            <bold>true</bold>
           </font>
          </property>
@@ -301,7 +300,6 @@
         <widget class="QLabel" name="labelArtificialIntelligence">
          <property name="font">
           <font>
-           <weight>75</weight>
            <bold>true</bold>
           </font>
          </property>
@@ -516,7 +514,6 @@ Fullscreen Exclusive Mode - game will cover entirety of your screen and will use
         <widget class="QLabel" name="labelVideo">
          <property name="font">
           <font>
-           <weight>75</weight>
            <bold>true</bold>
           </font>
          </property>
@@ -532,7 +529,6 @@ Fullscreen Exclusive Mode - game will cover entirety of your screen and will use
         <widget class="QLabel" name="labelAudio">
          <property name="font">
           <font>
-           <weight>75</weight>
            <bold>true</bold>
           </font>
          </property>
@@ -606,7 +602,7 @@ Fullscreen Exclusive Mode - game will cover entirety of your screen and will use
         </widget>
        </item>
        <item row="23" column="1" colspan="4">
-        <widget class="QComboBox" name="comboBoxUpscalingFilter">
+        <widget class="QComboBox" name="comboBoxDownscalingFilter">
          <item>
           <property name="text">
            <string>Nearest</string>
@@ -619,7 +615,7 @@ Fullscreen Exclusive Mode - game will cover entirety of your screen and will use
          </item>
          <item>
           <property name="text">
-           <string>Best (Linear)</string>
+           <string>Automatic (Linear)</string>
           </property>
          </item>
         </widget>
@@ -747,7 +743,6 @@ Fullscreen Exclusive Mode - game will cover entirety of your screen and will use
         <widget class="QLabel" name="labelInputMouse_2">
          <property name="font">
           <font>
-           <weight>75</weight>
            <bold>true</bold>
           </font>
          </property>
@@ -852,7 +847,6 @@ Fullscreen Exclusive Mode - game will cover entirety of your screen and will use
         <widget class="QLabel" name="labelNetwork">
          <property name="font">
           <font>
-           <weight>75</weight>
            <bold>true</bold>
           </font>
          </property>
@@ -894,9 +888,9 @@ Fullscreen Exclusive Mode - game will cover entirety of your screen and will use
         </widget>
        </item>
        <item row="23" column="0">
-        <widget class="QLabel" name="labelUpscalingFilter">
+        <widget class="QLabel" name="labelDownscalingFilter">
          <property name="text">
-          <string>Upscaling Filter</string>
+          <string>Downscaling Filter</string>
          </property>
         </widget>
        </item>
@@ -942,7 +936,6 @@ Fullscreen Exclusive Mode - game will cover entirety of your screen and will use
         <widget class="QLabel" name="labelInputMouse">
          <property name="font">
           <font>
-           <weight>75</weight>
            <bold>true</bold>
           </font>
          </property>
@@ -1020,7 +1013,6 @@ Fullscreen Exclusive Mode - game will cover entirety of your screen and will use
         <widget class="QLabel" name="labelInputMouse_3">
          <property name="font">
           <font>
-           <weight>75</weight>
            <bold>true</bold>
           </font>
          </property>
@@ -1160,6 +1152,42 @@ Fullscreen Exclusive Mode - game will cover entirety of your screen and will use
          </property>
         </widget>
        </item>
+       <item row="22" column="0">
+        <widget class="QLabel" name="labelUpscalingFilter">
+         <property name="text">
+          <string>Upscaling Filter</string>
+         </property>
+        </widget>
+       </item>
+       <item row="22" column="1" colspan="4">
+        <widget class="QComboBox" name="comboBoxUpscalingFilter">
+         <item>
+          <property name="text">
+           <string>Automatic</string>
+          </property>
+         </item>
+         <item>
+          <property name="text">
+           <string>None</string>
+          </property>
+         </item>
+         <item>
+          <property name="text">
+           <string>xBRZ x2</string>
+          </property>
+         </item>
+         <item>
+          <property name="text">
+           <string>xBRZ x3</string>
+          </property>
+         </item>
+         <item>
+          <property name="text">
+           <string>xBRZ x4</string>
+          </property>
+         </item>
+        </widget>
+       </item>
       </layout>
      </widget>
     </widget>

+ 3 - 1
lib/CMakeLists.txt

@@ -731,7 +731,7 @@ endif()
 
 set_target_properties(vcmi PROPERTIES COMPILE_DEFINITIONS "VCMI_DLL=1")
 target_link_libraries(vcmi PUBLIC
-	minizip::minizip ZLIB::ZLIB
+	minizip::minizip ZLIB::ZLIB TBB::tbb
 	${SYSTEM_LIBS} Boost::boost Boost::thread Boost::filesystem Boost::program_options Boost::locale Boost::date_time
 )
 
@@ -796,6 +796,8 @@ if(NOT ENABLE_STATIC_LIBS)
 endif()
 
 if(APPLE_IOS AND NOT USING_CONAN)
+	install(IMPORTED_RUNTIME_ARTIFACTS TBB::tbb LIBRARY DESTINATION ${LIB_DIR}) # CMake 3.21+
+
 	get_target_property(LINKED_LIBS vcmi LINK_LIBRARIES)
 	foreach(LINKED_LIB IN LISTS LINKED_LIBS)
 		if(NOT TARGET ${LINKED_LIB})

+ 7 - 0
lib/Rect.h

@@ -119,6 +119,13 @@ public:
 		return Rect(x-p.x,y-p.y,w,h);
 	}
 
+	template<typename T>
+	Rect operator*(const T &mul) const
+	{
+		return Rect(x*mul, y*mul, w*mul, h*mul);
+	}
+
+
 	Rect& operator=(const Rect &p)
 	{
 		x = p.x;