Просмотр исходного кода

Merge pull request #4686 from jpark37/fast-image-source

Premultiply image sources on load
Jim 4 лет назад
Родитель
Сommit
055bc1d1ea

+ 0 - 16
libobs/data/default.effect

@@ -62,13 +62,6 @@ float4 PSDrawNonlinearAlpha(VertInOut vert_in) : TARGET
 	return rgba;
 }
 
-float4 PSDrawAlphaBlend(VertInOut vert_in) : TARGET
-{
-	float4 rgba = image.Sample(def_sampler, vert_in.uv);
-	rgba.rgb *= rgba.a;
-	return rgba;
-}
-
 technique Draw
 {
 	pass
@@ -95,12 +88,3 @@ technique DrawNonlinearAlpha
 		pixel_shader  = PSDrawNonlinearAlpha(vert_in);
 	}
 }
-
-technique DrawAlphaBlend
-{
-	pass
-	{
-		vertex_shader = VSDefault(vert_in);
-		pixel_shader  = PSDrawAlphaBlend(vert_in);
-	}
-}

+ 140 - 37
libobs/graphics/graphics-ffmpeg.c

@@ -6,6 +6,7 @@
 #include <libswscale/swscale.h>
 
 #include "../obs-ffmpeg-compat.h"
+#include "srgb.h"
 
 struct ffmpeg_image {
 	const char *file;
@@ -121,58 +122,129 @@ fail:
 #define obs_bswap16(v) __builtin_bswap16(v)
 #endif
 
+static void *ffmpeg_image_copy_data_straight(struct ffmpeg_image *info,
+					     AVFrame *frame)
+{
+	const size_t linesize = (size_t)info->cx * 4;
+	const size_t totalsize = info->cy * linesize;
+	void *data = bmalloc(totalsize);
+
+	const size_t src_linesize = frame->linesize[0];
+	if (linesize != src_linesize) {
+		const size_t min_line = linesize < src_linesize ? linesize
+								: src_linesize;
+
+		uint8_t *dst = data;
+		const uint8_t *src = frame->data[0];
+		for (int y = 0; y < info->cy; y++) {
+			memcpy(dst, src, min_line);
+			dst += linesize;
+			src += src_linesize;
+		}
+	} else {
+		memcpy(data, frame->data[0], totalsize);
+	}
+
+	return data;
+}
+
 static void *ffmpeg_image_reformat_frame(struct ffmpeg_image *info,
-					 AVFrame *frame)
+					 AVFrame *frame,
+					 enum gs_image_alpha_mode alpha_mode)
 {
 	struct SwsContext *sws_ctx = NULL;
 	void *data = NULL;
 	int ret = 0;
 
-	if (info->format == AV_PIX_FMT_RGBA ||
-	    info->format == AV_PIX_FMT_BGRA ||
-	    info->format == AV_PIX_FMT_BGR0) {
-		const size_t linesize = (size_t)info->cx * 4;
-		const size_t totalsize = info->cy * linesize;
-		data = bmalloc(totalsize);
-
-		const size_t src_linesize = frame->linesize[0];
-		if (linesize != src_linesize) {
+	if (info->format == AV_PIX_FMT_BGR0) {
+		data = ffmpeg_image_copy_data_straight(info, frame);
+	} else if (info->format == AV_PIX_FMT_RGBA ||
+		   info->format == AV_PIX_FMT_BGRA) {
+		if (alpha_mode == GS_IMAGE_ALPHA_STRAIGHT) {
+			data = ffmpeg_image_copy_data_straight(info, frame);
+		} else {
+			const size_t linesize = (size_t)info->cx * 4;
+			const size_t totalsize = info->cy * linesize;
+			data = bmalloc(totalsize);
+			const size_t src_linesize = frame->linesize[0];
 			const size_t min_line = linesize < src_linesize
 							? linesize
 							: src_linesize;
-
 			uint8_t *dst = data;
 			const uint8_t *src = frame->data[0];
-			for (int y = 0; y < info->cy; y++) {
-				memcpy(dst, src, min_line);
-				dst += linesize;
-				src += src_linesize;
+			const size_t row_elements = min_line >> 2;
+			if (alpha_mode == GS_IMAGE_ALPHA_PREMULTIPLY_SRGB) {
+				for (int y = 0; y < info->cy; y++) {
+					gs_premultiply_xyza_srgb_loop_restrict(
+						dst, src, row_elements);
+					dst += linesize;
+					src += src_linesize;
+				}
+			} else if (alpha_mode == GS_IMAGE_ALPHA_PREMULTIPLY) {
+				for (int y = 0; y < info->cy; y++) {
+					gs_premultiply_xyza_loop_restrict(
+						dst, src, row_elements);
+					dst += linesize;
+					src += src_linesize;
+				}
 			}
-		} else {
-			memcpy(data, frame->data[0], totalsize);
 		}
 	} else if (info->format == AV_PIX_FMT_RGBA64BE) {
-		const size_t linesize = (size_t)info->cx * 8;
-		data = bmalloc(info->cy * linesize);
-
+		const size_t dst_linesize = (size_t)info->cx * 4;
+		data = bmalloc(info->cy * dst_linesize);
 		const size_t src_linesize = frame->linesize[0];
-		const size_t min_line = linesize < src_linesize ? linesize
-								: src_linesize;
-		const size_t pairs = min_line >> 1;
-
+		const size_t src_min_line = (dst_linesize * 2) < src_linesize
+						    ? (dst_linesize * 2)
+						    : src_linesize;
+		const size_t row_elements = src_min_line >> 3;
+		uint8_t *dst = data;
 		const uint8_t *src = frame->data[0];
-		uint16_t *dst = data;
-		for (int y = 0; y < info->cy; y++) {
-			for (size_t x = 0; x < pairs; ++x) {
-				uint16_t value;
-				memcpy(&value, src, sizeof(value));
-				*dst = obs_bswap16(value);
-				++dst;
-				src += sizeof(value);
+		uint16_t value[4];
+		float f[4];
+		if (alpha_mode == GS_IMAGE_ALPHA_STRAIGHT) {
+			for (int y = 0; y < info->cy; y++) {
+				for (size_t x = 0; x < row_elements; ++x) {
+					memcpy(value, src, sizeof(value));
+					f[0] = (float)obs_bswap16(value[0]) /
+					       65535.0f;
+					f[1] = (float)obs_bswap16(value[1]) /
+					       65535.0f;
+					f[2] = (float)obs_bswap16(value[2]) /
+					       65535.0f;
+					f[3] = (float)obs_bswap16(value[3]) /
+					       65535.0f;
+					gs_float3_srgb_linear_to_nonlinear(f);
+					gs_float4_to_u8x4(dst, f);
+					dst += sizeof(*dst) * 4;
+					src += sizeof(value);
+				}
+
+				src += src_linesize - src_min_line;
+			}
+		} else {
+			for (int y = 0; y < info->cy; y++) {
+				for (size_t x = 0; x < row_elements; ++x) {
+					memcpy(value, src, sizeof(value));
+					f[0] = (float)obs_bswap16(value[0]) /
+					       65535.0f;
+					f[1] = (float)obs_bswap16(value[1]) /
+					       65535.0f;
+					f[2] = (float)obs_bswap16(value[2]) /
+					       65535.0f;
+					f[3] = (float)obs_bswap16(value[3]) /
+					       65535.0f;
+					gs_premultiply_float4(f);
+					gs_float3_srgb_linear_to_nonlinear(f);
+					gs_float4_to_u8x4(dst, f);
+					dst += sizeof(*dst) * 4;
+					src += sizeof(value);
+				}
+
+				src += src_linesize - src_min_line;
 			}
-
-			src += src_linesize - min_line;
 		}
+
+		info->format = AV_PIX_FMT_RGBA;
 	} else {
 		static const enum AVPixelFormat format = AV_PIX_FMT_BGRA;
 
@@ -222,6 +294,14 @@ static void *ffmpeg_image_reformat_frame(struct ffmpeg_image *info,
 
 		av_freep(pointers);
 
+		if (alpha_mode == GS_IMAGE_ALPHA_PREMULTIPLY_SRGB) {
+			gs_premultiply_xyza_srgb_loop(data, (size_t)info->cx *
+								    info->cy);
+		} else if (alpha_mode == GS_IMAGE_ALPHA_PREMULTIPLY) {
+			gs_premultiply_xyza_loop(data,
+						 (size_t)info->cx * info->cy);
+		}
+
 		info->format = format;
 	}
 
@@ -229,7 +309,8 @@ fail:
 	return data;
 }
 
-static void *ffmpeg_image_decode(struct ffmpeg_image *info)
+static void *ffmpeg_image_decode(struct ffmpeg_image *info,
+				 enum gs_image_alpha_mode alpha_mode)
 {
 	AVPacket packet = {0};
 	void *data = NULL;
@@ -271,7 +352,7 @@ static void *ffmpeg_image_decode(struct ffmpeg_image *info)
 		}
 	}
 
-	data = ffmpeg_image_reformat_frame(info, frame);
+	data = ffmpeg_image_reformat_frame(info, frame, alpha_mode);
 
 fail:
 	av_packet_unref(&packet);
@@ -312,7 +393,29 @@ uint8_t *gs_create_texture_file_data(const char *file,
 	uint8_t *data = NULL;
 
 	if (ffmpeg_image_init(&image, file)) {
-		data = ffmpeg_image_decode(&image);
+		data = ffmpeg_image_decode(&image, GS_IMAGE_ALPHA_STRAIGHT);
+		if (data) {
+			*format = convert_format(image.format);
+			*cx_out = (uint32_t)image.cx;
+			*cy_out = (uint32_t)image.cy;
+		}
+
+		ffmpeg_image_free(&image);
+	}
+
+	return data;
+}
+
+uint8_t *gs_create_texture_file_data2(const char *file,
+				      enum gs_image_alpha_mode alpha_mode,
+				      enum gs_color_format *format,
+				      uint32_t *cx_out, uint32_t *cy_out)
+{
+	struct ffmpeg_image image;
+	uint8_t *data = NULL;
+
+	if (ffmpeg_image_init(&image, file)) {
+		data = ffmpeg_image_decode(&image, alpha_mode);
 		if (data) {
 			*format = convert_format(image.format);
 			*cx_out = (uint32_t)image.cx;

+ 9 - 0
libobs/graphics/graphics.h

@@ -559,10 +559,19 @@ EXPORT gs_shader_t *gs_vertexshader_create_from_file(const char *file,
 EXPORT gs_shader_t *gs_pixelshader_create_from_file(const char *file,
 						    char **error_string);
 
+enum gs_image_alpha_mode {
+	GS_IMAGE_ALPHA_STRAIGHT,
+	GS_IMAGE_ALPHA_PREMULTIPLY_SRGB,
+	GS_IMAGE_ALPHA_PREMULTIPLY,
+};
+
 EXPORT gs_texture_t *gs_texture_create_from_file(const char *file);
 EXPORT uint8_t *gs_create_texture_file_data(const char *file,
 					    enum gs_color_format *format,
 					    uint32_t *cx, uint32_t *cy);
+EXPORT uint8_t *gs_create_texture_file_data2(
+	const char *file, enum gs_image_alpha_mode alpha_mode,
+	enum gs_color_format *format, uint32_t *cx, uint32_t *cy);
 
 #define GS_FLIP_U (1 << 0)
 #define GS_FLIP_V (1 << 1)

+ 85 - 18
libobs/graphics/image-file.c

@@ -18,13 +18,14 @@
 #include "image-file.h"
 #include "../util/base.h"
 #include "../util/platform.h"
+#include "vec4.h"
 
 #define blog(level, format, ...) \
 	blog(level, "%s: " format, __FUNCTION__, __VA_ARGS__)
 
 static void *bi_def_bitmap_create(int width, int height)
 {
-	return bmalloc(width * height * 4);
+	return bmalloc((size_t)4 * width * height);
 }
 
 static void bi_def_bitmap_set_opaque(void *bitmap, bool opaque)
@@ -71,7 +72,8 @@ static inline void *alloc_mem(gs_image_file_t *image, uint64_t *mem_usage,
 }
 
 static bool init_animated_gif(gs_image_file_t *image, const char *path,
-			      uint64_t *mem_usage)
+			      uint64_t *mem_usage,
+			      enum gs_image_alpha_mode alpha_mode)
 {
 	bool is_animated_gif = true;
 	gif_result result;
@@ -156,9 +158,18 @@ static bool init_animated_gif(gs_image_file_t *image, const char *path,
 		image->format = GS_RGBA;
 
 		if (mem_usage) {
-			*mem_usage += image->cx * image->cy * 4;
+			*mem_usage += (size_t)4 * image->cx * image->cy;
 			*mem_usage += size;
 		}
+
+		if (alpha_mode == GS_IMAGE_ALPHA_PREMULTIPLY_SRGB) {
+			gs_premultiply_xyza_srgb_loop(image->gif.frame_image,
+						      (size_t)image->cx *
+							      image->cy);
+		} else if (alpha_mode == GS_IMAGE_ALPHA_PREMULTIPLY) {
+			gs_premultiply_xyza_loop(image->gif.frame_image,
+						 (size_t)image->cx * image->cy);
+		}
 	} else {
 		gif_finalise(&image->gif);
 		bfree(image->gif_data);
@@ -180,7 +191,8 @@ not_animated:
 }
 
 static void gs_image_file_init_internal(gs_image_file_t *image,
-					const char *file, uint64_t *mem_usage)
+					const char *file, uint64_t *mem_usage,
+					enum gs_image_alpha_mode alpha_mode)
 {
 	size_t len;
 
@@ -195,12 +207,13 @@ static void gs_image_file_init_internal(gs_image_file_t *image,
 	len = strlen(file);
 
 	if (len > 4 && strcmp(file + len - 4, ".gif") == 0) {
-		if (init_animated_gif(image, file, mem_usage))
+		if (init_animated_gif(image, file, mem_usage, alpha_mode)) {
 			return;
+		}
 	}
 
-	image->texture_data = gs_create_texture_file_data(
-		file, &image->format, &image->cx, &image->cy);
+	image->texture_data = gs_create_texture_file_data2(
+		file, alpha_mode, &image->format, &image->cx, &image->cy);
 
 	if (mem_usage) {
 		*mem_usage += image->cx * image->cy *
@@ -216,7 +229,7 @@ static void gs_image_file_init_internal(gs_image_file_t *image,
 
 void gs_image_file_init(gs_image_file_t *image, const char *file)
 {
-	gs_image_file_init_internal(image, file, NULL);
+	gs_image_file_init_internal(image, file, NULL, GS_IMAGE_ALPHA_STRAIGHT);
 }
 
 void gs_image_file_free(gs_image_file_t *image)
@@ -241,7 +254,16 @@ void gs_image_file_free(gs_image_file_t *image)
 
 void gs_image_file2_init(gs_image_file2_t *if2, const char *file)
 {
-	gs_image_file_init_internal(&if2->image, file, &if2->mem_usage);
+	gs_image_file_init_internal(&if2->image, file, &if2->mem_usage,
+				    GS_IMAGE_ALPHA_STRAIGHT);
+}
+
+void gs_image_file3_init(gs_image_file3_t *if3, const char *file,
+			 enum gs_image_alpha_mode alpha_mode)
+{
+	gs_image_file_init_internal(&if3->image2.image, file,
+				    &if3->image2.mem_usage, alpha_mode);
+	if3->alpha_mode = alpha_mode;
 }
 
 void gs_image_file_init_texture(gs_image_file_t *image)
@@ -296,7 +318,8 @@ static inline int calculate_new_frame(gs_image_file_t *image,
 	return new_frame;
 }
 
-static void decode_new_frame(gs_image_file_t *image, int new_frame)
+static void decode_new_frame(gs_image_file_t *image, int new_frame,
+			     enum gs_image_alpha_mode alpha_mode)
 {
 	if (!image->animation_frame_cache[new_frame]) {
 		int last_frame;
@@ -314,14 +337,22 @@ static void decode_new_frame(gs_image_file_t *image, int new_frame)
 
 		/* decode actual desired frame */
 		if (gif_decode_frame(&image->gif, new_frame) == GIF_OK) {
-			size_t pos = new_frame * image->gif.width *
-				     image->gif.height * 4;
+			const size_t area =
+				(size_t)image->gif.width * image->gif.height;
+			size_t pos = new_frame * area * 4;
 			image->animation_frame_cache[new_frame] =
 				image->animation_frame_data + pos;
 
+			if (alpha_mode == GS_IMAGE_ALPHA_PREMULTIPLY_SRGB) {
+				gs_premultiply_xyza_srgb_loop(
+					image->gif.frame_image, area);
+			} else if (alpha_mode == GS_IMAGE_ALPHA_PREMULTIPLY) {
+				gs_premultiply_xyza_loop(image->gif.frame_image,
+							 area);
+			}
+
 			memcpy(image->animation_frame_cache[new_frame],
-			       image->gif.frame_image,
-			       image->gif.width * image->gif.height * 4);
+			       image->gif.frame_image, area * 4);
 
 			image->last_decoded_frame = new_frame;
 		}
@@ -330,7 +361,9 @@ static void decode_new_frame(gs_image_file_t *image, int new_frame)
 	image->cur_frame = new_frame;
 }
 
-bool gs_image_file_tick(gs_image_file_t *image, uint64_t elapsed_time_ns)
+static bool gs_image_file_tick_internal(gs_image_file_t *image,
+					uint64_t elapsed_time_ns,
+					enum gs_image_alpha_mode alpha_mode)
 {
 	int loops;
 
@@ -346,7 +379,7 @@ bool gs_image_file_tick(gs_image_file_t *image, uint64_t elapsed_time_ns)
 			calculate_new_frame(image, elapsed_time_ns, loops);
 
 		if (new_frame != image->cur_frame) {
-			decode_new_frame(image, new_frame);
+			decode_new_frame(image, new_frame, alpha_mode);
 			return true;
 		}
 	}
@@ -354,15 +387,49 @@ bool gs_image_file_tick(gs_image_file_t *image, uint64_t elapsed_time_ns)
 	return false;
 }
 
-void gs_image_file_update_texture(gs_image_file_t *image)
+bool gs_image_file_tick(gs_image_file_t *image, uint64_t elapsed_time_ns)
+{
+	return gs_image_file_tick_internal(image, elapsed_time_ns, false);
+}
+
+bool gs_image_file2_tick(gs_image_file2_t *if2, uint64_t elapsed_time_ns)
+{
+	return gs_image_file_tick_internal(&if2->image, elapsed_time_ns, false);
+}
+
+bool gs_image_file3_tick(gs_image_file3_t *if3, uint64_t elapsed_time_ns)
+{
+	return gs_image_file_tick_internal(&if3->image2.image, elapsed_time_ns,
+					   if3->alpha_mode);
+}
+
+static void
+gs_image_file_update_texture_internal(gs_image_file_t *image,
+				      enum gs_image_alpha_mode alpha_mode)
 {
 	if (!image->is_animated_gif || !image->loaded)
 		return;
 
 	if (!image->animation_frame_cache[image->cur_frame])
-		decode_new_frame(image, image->cur_frame);
+		decode_new_frame(image, image->cur_frame, alpha_mode);
 
 	gs_texture_set_image(image->texture,
 			     image->animation_frame_cache[image->cur_frame],
 			     image->gif.width * 4, false);
 }
+
+void gs_image_file_update_texture(gs_image_file_t *image)
+{
+	gs_image_file_update_texture_internal(image, false);
+}
+
+void gs_image_file2_update_texture(gs_image_file2_t *if2)
+{
+	gs_image_file_update_texture_internal(&if2->image, false);
+}
+
+void gs_image_file3_update_texture(gs_image_file3_t *if3)
+{
+	gs_image_file_update_texture_internal(&if3->image2.image,
+					      if3->alpha_mode);
+}

+ 22 - 6
libobs/graphics/image-file.h

@@ -51,8 +51,14 @@ struct gs_image_file2 {
 	uint64_t mem_usage;
 };
 
+struct gs_image_file3 {
+	struct gs_image_file2 image2;
+	enum gs_image_alpha_mode alpha_mode;
+};
+
 typedef struct gs_image_file gs_image_file_t;
 typedef struct gs_image_file2 gs_image_file2_t;
+typedef struct gs_image_file3 gs_image_file3_t;
 
 EXPORT void gs_image_file_init(gs_image_file_t *image, const char *file);
 EXPORT void gs_image_file_free(gs_image_file_t *image);
@@ -64,26 +70,36 @@ EXPORT void gs_image_file_update_texture(gs_image_file_t *image);
 
 EXPORT void gs_image_file2_init(gs_image_file2_t *if2, const char *file);
 
+EXPORT bool gs_image_file2_tick(gs_image_file2_t *if2,
+				uint64_t elapsed_time_ns);
+EXPORT void gs_image_file2_update_texture(gs_image_file2_t *if2);
+
+EXPORT void gs_image_file3_init(gs_image_file3_t *if3, const char *file,
+				enum gs_image_alpha_mode alpha_mode);
+
+EXPORT bool gs_image_file3_tick(gs_image_file3_t *if3,
+				uint64_t elapsed_time_ns);
+EXPORT void gs_image_file3_update_texture(gs_image_file3_t *if3);
+
 static void gs_image_file2_free(gs_image_file2_t *if2)
 {
 	gs_image_file_free(&if2->image);
 	if2->mem_usage = 0;
 }
 
-static inline void gs_image_file2_init_texture(gs_image_file2_t *if2)
+static void gs_image_file2_init_texture(gs_image_file2_t *if2)
 {
 	gs_image_file_init_texture(&if2->image);
 }
 
-static inline bool gs_image_file2_tick(gs_image_file2_t *if2,
-				       uint64_t elapsed_time_ns)
+static void gs_image_file3_free(gs_image_file3_t *if3)
 {
-	return gs_image_file_tick(&if2->image, elapsed_time_ns);
+	gs_image_file2_free(&if3->image2);
 }
 
-static inline void gs_image_file2_update_texture(gs_image_file2_t *if2)
+static void gs_image_file3_init_texture(gs_image_file3_t *if3)
 {
-	gs_image_file_update_texture(&if2->image);
+	gs_image_file2_init_texture(&if3->image2);
 }
 
 #ifdef __cplusplus

+ 187 - 0
libobs/graphics/srgb.h

@@ -0,0 +1,187 @@
+/******************************************************************************
+    Copyright (C) 2021 by Hugh Bailey <[email protected]>
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+******************************************************************************/
+
+#pragma once
+
+#include <math.h>
+#include <string.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+static inline float gs_srgb_nonlinear_to_linear(float u)
+{
+	return (u <= 0.04045f) ? (u / 12.92f)
+			       : powf((u + 0.055f) / 1.055f, 2.4f);
+}
+
+static inline float gs_srgb_linear_to_nonlinear(float u)
+{
+	return (u <= 0.0031308f) ? (12.92f * u)
+				 : ((1.055f * powf(u, 1.0f / 2.4f)) - 0.055f);
+}
+
+static inline float gs_u8_to_float(uint8_t u)
+{
+	return (float)u / 255.0f;
+}
+
+static inline void gs_u8x4_to_float4(float *f, const uint8_t *u)
+{
+	f[0] = gs_u8_to_float(u[0]);
+	f[1] = gs_u8_to_float(u[1]);
+	f[2] = gs_u8_to_float(u[2]);
+	f[3] = gs_u8_to_float(u[3]);
+}
+
+static inline uint8_t gs_float_to_u8(float f)
+{
+	return (uint8_t)(f * 255.0f + 0.5f);
+}
+
+static inline void gs_premultiply_float4(float *f)
+{
+	f[0] *= f[3];
+	f[1] *= f[3];
+	f[2] *= f[3];
+}
+
+static inline void gs_float3_to_u8x3(uint8_t *u, const float *f)
+{
+	u[0] = gs_float_to_u8(f[0]);
+	u[1] = gs_float_to_u8(f[1]);
+	u[2] = gs_float_to_u8(f[2]);
+}
+
+static inline void gs_float4_to_u8x4(uint8_t *u, const float *f)
+{
+	u[0] = gs_float_to_u8(f[0]);
+	u[1] = gs_float_to_u8(f[1]);
+	u[2] = gs_float_to_u8(f[2]);
+	u[3] = gs_float_to_u8(f[3]);
+}
+
+static inline void gs_float3_srgb_nonlinear_to_linear(float *f)
+{
+	f[0] = gs_srgb_nonlinear_to_linear(f[0]);
+	f[1] = gs_srgb_nonlinear_to_linear(f[1]);
+	f[2] = gs_srgb_nonlinear_to_linear(f[2]);
+}
+
+static inline void gs_float3_srgb_linear_to_nonlinear(float *f)
+{
+	f[0] = gs_srgb_linear_to_nonlinear(f[0]);
+	f[1] = gs_srgb_linear_to_nonlinear(f[1]);
+	f[2] = gs_srgb_linear_to_nonlinear(f[2]);
+}
+
+static inline void gs_premultiply_xyza(uint8_t *data)
+{
+	uint8_t u[4];
+	float f[4];
+	memcpy(&u, data, sizeof(u));
+	gs_u8x4_to_float4(f, u);
+	gs_premultiply_float4(f);
+	gs_float3_to_u8x3(u, f);
+	memcpy(data, &u, sizeof(u));
+}
+
+static inline void gs_premultiply_xyza_srgb(uint8_t *data)
+{
+	uint8_t u[4];
+	float f[4];
+	memcpy(&u, data, sizeof(u));
+	gs_u8x4_to_float4(f, u);
+	gs_float3_srgb_nonlinear_to_linear(f);
+	gs_premultiply_float4(f);
+	gs_float3_srgb_linear_to_nonlinear(f);
+	gs_float3_to_u8x3(u, f);
+	memcpy(data, &u, sizeof(u));
+}
+
+static inline void gs_premultiply_xyza_restrict(uint8_t *__restrict dst,
+						const uint8_t *__restrict src)
+{
+	uint8_t u[4];
+	float f[4];
+	memcpy(&u, src, sizeof(u));
+	gs_u8x4_to_float4(f, u);
+	gs_premultiply_float4(f);
+	gs_float3_to_u8x3(u, f);
+	memcpy(dst, &u, sizeof(u));
+}
+
+static inline void
+gs_premultiply_xyza_srgb_restrict(uint8_t *__restrict dst,
+				  const uint8_t *__restrict src)
+{
+	uint8_t u[4];
+	float f[4];
+	memcpy(&u, src, sizeof(u));
+	gs_u8x4_to_float4(f, u);
+	gs_float3_srgb_nonlinear_to_linear(f);
+	gs_premultiply_float4(f);
+	gs_float3_srgb_linear_to_nonlinear(f);
+	gs_float3_to_u8x3(u, f);
+	memcpy(dst, &u, sizeof(u));
+}
+
+static inline void gs_premultiply_xyza_loop(uint8_t *data, size_t texel_count)
+{
+	for (size_t i = 0; i < texel_count; ++i) {
+		gs_premultiply_xyza(data);
+		data += 4;
+	}
+}
+
+static inline void gs_premultiply_xyza_srgb_loop(uint8_t *data,
+						 size_t texel_count)
+{
+	for (size_t i = 0; i < texel_count; ++i) {
+		gs_premultiply_xyza_srgb(data);
+		data += 4;
+	}
+}
+
+static inline void
+gs_premultiply_xyza_loop_restrict(uint8_t *__restrict dst,
+				  const uint8_t *__restrict src,
+				  size_t texel_count)
+{
+	for (size_t i = 0; i < texel_count; ++i) {
+		gs_premultiply_xyza_restrict(dst, src);
+		dst += 4;
+		src += 4;
+	}
+}
+
+static inline void
+gs_premultiply_xyza_srgb_loop_restrict(uint8_t *__restrict dst,
+				       const uint8_t *__restrict src,
+				       size_t texel_count)
+{
+	for (size_t i = 0; i < texel_count; ++i) {
+		gs_premultiply_xyza_srgb_restrict(dst, src);
+		dst += 4;
+		src += 4;
+	}
+}
+
+#ifdef __cplusplus
+}
+#endif

+ 25 - 101
libobs/graphics/vec4.h

@@ -18,6 +18,7 @@
 #pragma once
 
 #include "math-defs.h"
+#include "srgb.h"
 
 #include "../util/sse-intrin.h"
 
@@ -198,127 +199,50 @@ static inline void vec4_ceil(struct vec4 *dst, const struct vec4 *v)
 
 static inline uint32_t vec4_to_rgba(const struct vec4 *src)
 {
+	float f[4];
+	memcpy(f, src->ptr, sizeof(f));
+	uint8_t u[4];
+	gs_float4_to_u8x4(u, f);
 	uint32_t val;
-	val = (uint32_t)((src->x * 255.0f) + 0.5f);
-	val |= (uint32_t)((src->y * 255.0f) + 0.5f) << 8;
-	val |= (uint32_t)((src->z * 255.0f) + 0.5f) << 16;
-	val |= (uint32_t)((src->w * 255.0f) + 0.5f) << 24;
+	memcpy(&val, u, sizeof(val));
 	return val;
 }
 
 static inline uint32_t vec4_to_bgra(const struct vec4 *src)
 {
+	float f[4];
+	memcpy(f, src->ptr, sizeof(f));
+	uint8_t u[4];
+	gs_float4_to_u8x4(u, f);
+	uint8_t temp = u[0];
+	u[0] = u[2];
+	u[2] = temp;
 	uint32_t val;
-	val = (uint32_t)((src->z * 255.0f) + 0.5f);
-	val |= (uint32_t)((src->y * 255.0f) + 0.5f) << 8;
-	val |= (uint32_t)((src->x * 255.0f) + 0.5f) << 16;
-	val |= (uint32_t)((src->w * 255.0f) + 0.5f) << 24;
+	memcpy(&val, u, sizeof(val));
 	return val;
 }
 
 static inline void vec4_from_rgba(struct vec4 *dst, uint32_t rgba)
 {
-	dst->x = (float)(rgba & 0xFF) / 255.0f;
-	rgba >>= 8;
-	dst->y = (float)(rgba & 0xFF) / 255.0f;
-	rgba >>= 8;
-	dst->z = (float)(rgba & 0xFF) / 255.0f;
-	rgba >>= 8;
-	dst->w = (float)rgba / 255.0f;
+	uint8_t u[4];
+	memcpy(u, &rgba, sizeof(u));
+	gs_u8x4_to_float4(dst->ptr, u);
 }
 
 static inline void vec4_from_bgra(struct vec4 *dst, uint32_t bgra)
 {
-	dst->z = (float)(bgra & 0xFF) / 255.0f;
-	bgra >>= 8;
-	dst->y = (float)(bgra & 0xFF) / 255.0f;
-	bgra >>= 8;
-	dst->x = (float)(bgra & 0xFF) / 255.0f;
-	bgra >>= 8;
-	dst->w = (float)bgra / 255.0f;
-}
-
-static inline float srgb_nonlinear_to_linear(float u)
-{
-	return (u <= 0.04045f) ? (u / 12.92f)
-			       : powf((u + 0.055f) / 1.055f, 2.4f);
+	uint8_t u[4];
+	memcpy(u, &bgra, sizeof(u));
+	uint8_t temp = u[0];
+	u[0] = u[2];
+	u[2] = temp;
+	gs_u8x4_to_float4(dst->ptr, u);
 }
 
 static inline void vec4_from_rgba_srgb(struct vec4 *dst, uint32_t rgba)
 {
-	dst->x = srgb_nonlinear_to_linear((float)(rgba & 0xFF) / 255.0f);
-	rgba >>= 8;
-	dst->y = srgb_nonlinear_to_linear((float)(rgba & 0xFF) / 255.0f);
-	rgba >>= 8;
-	dst->z = srgb_nonlinear_to_linear((float)(rgba & 0xFF) / 255.0f);
-	rgba >>= 8;
-	dst->w = (float)rgba / 255.0f;
-}
-
-static inline void vec4_from_bgra_srgb(struct vec4 *dst, uint32_t bgra)
-{
-	dst->z = srgb_nonlinear_to_linear((float)(bgra & 0xFF) / 255.0f);
-	bgra >>= 8;
-	dst->y = srgb_nonlinear_to_linear((float)(bgra & 0xFF) / 255.0f);
-	bgra >>= 8;
-	dst->x = srgb_nonlinear_to_linear((float)(bgra & 0xFF) / 255.0f);
-	bgra >>= 8;
-	dst->w = (float)bgra / 255.0f;
-}
-
-static inline void vec4_from_rgba_srgb_premultiply(struct vec4 *dst,
-						   uint32_t rgba)
-{
-	vec4_from_rgba_srgb(dst, rgba);
-	dst->x *= dst->w;
-	dst->y *= dst->w;
-	dst->z *= dst->w;
-}
-
-static inline void vec4_from_bgra_srgb_premultiply(struct vec4 *dst,
-						   uint32_t bgra)
-{
-	vec4_from_bgra_srgb(dst, bgra);
-	dst->x *= dst->w;
-	dst->y *= dst->w;
-	dst->z *= dst->w;
-}
-
-static inline float srgb_linear_to_nonlinear(float u)
-{
-	return (u <= 0.0031308f) ? (12.92f * u)
-				 : ((1.055f * powf(u, 1.0f / 2.4f)) - 0.055f);
-}
-
-static inline uint32_t vec4_to_rgba_srgb(const struct vec4 *src)
-{
-	uint32_t val;
-	val = (uint32_t)((srgb_linear_to_nonlinear(src->x) * 255.0f) + 0.5f);
-	val |= (uint32_t)((srgb_linear_to_nonlinear(src->y) * 255.0f) + 0.5f)
-	       << 8;
-	val |= (uint32_t)((srgb_linear_to_nonlinear(src->z) * 255.0f) + 0.5f)
-	       << 16;
-	val |= (uint32_t)((src->w * 255.0f) + 0.5f) << 24;
-	return val;
-}
-
-static inline uint32_t vec4_to_bgra_srgb(const struct vec4 *src)
-{
-	uint32_t val;
-	val = (uint32_t)((srgb_linear_to_nonlinear(src->z) * 255.0f) + 0.5f);
-	val |= (uint32_t)((srgb_linear_to_nonlinear(src->y) * 255.0f) + 0.5f)
-	       << 8;
-	val |= (uint32_t)((srgb_linear_to_nonlinear(src->x) * 255.0f) + 0.5f)
-	       << 16;
-	val |= (uint32_t)((src->w * 255.0f) + 0.5f) << 24;
-	return val;
-}
-
-static inline void vec4_srgb_linear_to_nonlinear(struct vec4 *dst)
-{
-	dst->x = srgb_linear_to_nonlinear(dst->x);
-	dst->y = srgb_linear_to_nonlinear(dst->y);
-	dst->y = srgb_linear_to_nonlinear(dst->y);
+	vec4_from_rgba(dst, rgba);
+	gs_float3_srgb_nonlinear_to_linear(dst->ptr);
 }
 
 EXPORT void vec4_transform(struct vec4 *dst, const struct vec4 *v,

+ 1 - 1
libobs/obs-source.c

@@ -3684,7 +3684,7 @@ static inline bool can_bypass(obs_source_t *target, obs_source_t *parent,
 	       ((parent_flags & OBS_SOURCE_CUSTOM_DRAW) == 0) &&
 	       ((parent_flags & OBS_SOURCE_ASYNC) == 0) &&
 	       (((filter_flags & OBS_SOURCE_SRGB) == 0) ||
-		((parent_flags & OBS_SOURCE_SRGB) == 0));
+		((parent_flags & OBS_SOURCE_SRGB) != 0));
 }
 
 bool obs_source_process_filter_begin(obs_source_t *filter,

+ 27 - 38
plugins/image-source/image-source.c

@@ -23,7 +23,7 @@ struct image_source {
 	uint64_t last_time;
 	bool active;
 
-	gs_image_file2_t if2;
+	gs_image_file3_t if3;
 };
 
 static time_t get_modified_timestamp(const char *filename)
@@ -45,20 +45,23 @@ static void image_source_load(struct image_source *context)
 	char *file = context->file;
 
 	obs_enter_graphics();
-	gs_image_file2_free(&context->if2);
+	gs_image_file3_free(&context->if3);
 	obs_leave_graphics();
 
 	if (file && *file) {
 		debug("loading texture '%s'", file);
 		context->file_timestamp = get_modified_timestamp(file);
-		gs_image_file2_init(&context->if2, file);
+		gs_image_file3_init(&context->if3, file,
+				    context->linear_alpha
+					    ? GS_IMAGE_ALPHA_PREMULTIPLY_SRGB
+					    : GS_IMAGE_ALPHA_PREMULTIPLY);
 		context->update_time_elapsed = 0;
 
 		obs_enter_graphics();
-		gs_image_file2_init_texture(&context->if2);
+		gs_image_file3_init_texture(&context->if3);
 		obs_leave_graphics();
 
-		if (!context->if2.image.loaded)
+		if (!context->if3.image2.image.loaded)
 			warn("failed to load texture '%s'", file);
 	}
 }
@@ -66,7 +69,7 @@ static void image_source_load(struct image_source *context)
 static void image_source_unload(struct image_source *context)
 {
 	obs_enter_graphics();
-	gs_image_file2_free(&context->if2);
+	gs_image_file3_free(&context->if3);
 	obs_leave_graphics();
 }
 
@@ -135,28 +138,22 @@ static void image_source_destroy(void *data)
 static uint32_t image_source_getwidth(void *data)
 {
 	struct image_source *context = data;
-	return context->if2.image.cx;
+	return context->if3.image2.image.cx;
 }
 
 static uint32_t image_source_getheight(void *data)
 {
 	struct image_source *context = data;
-	return context->if2.image.cy;
+	return context->if3.image2.image.cy;
 }
 
 static void image_source_render(void *data, gs_effect_t *effect)
 {
 	struct image_source *context = data;
 
-	if (!context->if2.image.texture)
+	if (!context->if3.image2.image.texture)
 		return;
 
-	const char *tech_name = context->linear_alpha ? "DrawAlphaBlend"
-						      : "DrawNonlinearAlpha";
-
-	effect = obs_get_base_effect(OBS_EFFECT_DEFAULT);
-	gs_technique_t *tech = gs_effect_get_technique(effect, tech_name);
-
 	const bool previous = gs_framebuffer_srgb_enabled();
 	gs_enable_framebuffer_srgb(true);
 
@@ -164,18 +161,11 @@ static void image_source_render(void *data, gs_effect_t *effect)
 	gs_blend_function(GS_BLEND_ONE, GS_BLEND_INVSRCALPHA);
 
 	gs_eparam_t *const param = gs_effect_get_param_by_name(effect, "image");
-	gs_effect_set_texture_srgb(param, context->if2.image.texture);
-
-	size_t passes = gs_technique_begin(tech);
-	for (size_t i = 0; i < passes; i++) {
-		gs_technique_begin_pass(tech, i);
+	gs_effect_set_texture_srgb(param, context->if3.image2.image.texture);
 
-		gs_draw_sprite(context->if2.image.texture, 0,
-			       context->if2.image.cx, context->if2.image.cy);
-
-		gs_technique_end_pass(tech);
-	}
-	gs_technique_end(tech);
+	gs_draw_sprite(context->if3.image2.image.texture, 0,
+		       context->if3.image2.image.cx,
+		       context->if3.image2.image.cy);
 
 	gs_blend_state_pop();
 
@@ -202,20 +192,20 @@ static void image_source_tick(void *data, float seconds)
 
 	if (obs_source_active(context->source)) {
 		if (!context->active) {
-			if (context->if2.image.is_animated_gif)
+			if (context->if3.image2.image.is_animated_gif)
 				context->last_time = frame_time;
 			context->active = true;
 		}
 
 	} else {
 		if (context->active) {
-			if (context->if2.image.is_animated_gif) {
-				context->if2.image.cur_frame = 0;
-				context->if2.image.cur_loop = 0;
-				context->if2.image.cur_time = 0;
+			if (context->if3.image2.image.is_animated_gif) {
+				context->if3.image2.image.cur_frame = 0;
+				context->if3.image2.image.cur_loop = 0;
+				context->if3.image2.image.cur_time = 0;
 
 				obs_enter_graphics();
-				gs_image_file2_update_texture(&context->if2);
+				gs_image_file3_update_texture(&context->if3);
 				obs_leave_graphics();
 			}
 
@@ -225,13 +215,13 @@ static void image_source_tick(void *data, float seconds)
 		return;
 	}
 
-	if (context->last_time && context->if2.image.is_animated_gif) {
+	if (context->last_time && context->if3.image2.image.is_animated_gif) {
 		uint64_t elapsed = frame_time - context->last_time;
-		bool updated = gs_image_file2_tick(&context->if2, elapsed);
+		bool updated = gs_image_file3_tick(&context->if3, elapsed);
 
 		if (updated) {
 			obs_enter_graphics();
-			gs_image_file2_update_texture(&context->if2);
+			gs_image_file3_update_texture(&context->if3);
 			obs_leave_graphics();
 		}
 	}
@@ -281,7 +271,7 @@ static obs_properties_t *image_source_properties(void *data)
 uint64_t image_source_get_memory_usage(void *data)
 {
 	struct image_source *s = data;
-	return s->if2.mem_usage;
+	return s->if3.image2.mem_usage;
 }
 
 static void missing_file_callback(void *src, const char *new_path, void *data)
@@ -318,8 +308,7 @@ static obs_missing_files_t *image_source_missingfiles(void *data)
 static struct obs_source_info image_source_info = {
 	.id = "image_source",
 	.type = OBS_SOURCE_TYPE_INPUT,
-	.output_flags = OBS_SOURCE_VIDEO | OBS_SOURCE_CUSTOM_DRAW |
-			OBS_SOURCE_SRGB,
+	.output_flags = OBS_SOURCE_VIDEO | OBS_SOURCE_SRGB,
 	.get_name = image_source_get_name,
 	.create = image_source_create,
 	.destroy = image_source_destroy,