Browse Source

obs-filters: Use volume texture for LUT

Simplifies shader calculations. Not much perf gain, but GPA shows
bottleneck shift from Shader Execution to Sampler as expected.
jpark37 6 years ago
parent
commit
e4d8eaa1bc

+ 68 - 2
plugins/obs-filters/color-grade-filter.c

@@ -12,6 +12,8 @@
 
 /* clang-format on */
 
+static const uint32_t LUT_WIDTH = 64;
+
 struct lut_filter_data {
 	obs_source_t *context;
 	gs_effect_t *effect;
@@ -20,6 +22,8 @@ struct lut_filter_data {
 
 	char *file;
 	float clut_amount;
+	float clut_scale;
+	float clut_offset;
 };
 
 static const char *color_grade_filter_get_name(void *unused)
@@ -28,6 +32,54 @@ static const char *color_grade_filter_get_name(void *unused)
 	return obs_module_text("ColorGradeFilter");
 }
 
+static gs_texture_t *make_clut_texture(const enum gs_color_format format,
+				       const uint32_t image_width,
+				       const uint32_t image_height,
+				       const uint8_t *data)
+{
+	if (image_width % LUT_WIDTH != 0)
+		return NULL;
+
+	if (image_height % LUT_WIDTH != 0)
+		return NULL;
+
+	const uint32_t pixel_count = LUT_WIDTH * LUT_WIDTH * LUT_WIDTH;
+	if ((image_width * image_height) != pixel_count)
+		return NULL;
+
+	const uint32_t bpp = gs_get_format_bpp(format);
+	if (bpp % 8 != 0)
+		return NULL;
+
+	const uint32_t pixel_size = bpp / 8;
+	const uint32_t buffer_size = pixel_size * pixel_count;
+	uint8_t *const buffer = bmalloc(buffer_size);
+	const uint32_t macro_width = image_width / LUT_WIDTH;
+	const uint32_t macro_height = image_height / LUT_WIDTH;
+	uint8_t *cursor = buffer;
+	for (uint32_t z = 0; z < LUT_WIDTH; ++z) {
+		const int z_x = (z % macro_width) * LUT_WIDTH;
+		const int z_y = (z / macro_height) * LUT_WIDTH;
+		for (uint32_t y = 0; y < LUT_WIDTH; ++y) {
+			const uint32_t row_index = image_width * (z_y + y);
+			for (uint32_t x = 0; x < LUT_WIDTH; ++x) {
+				const uint32_t index = row_index + z_x + x;
+				memcpy(cursor, &data[pixel_size * index],
+				       pixel_size);
+
+				cursor += pixel_size;
+			}
+		}
+	}
+
+	gs_texture_t *const texture =
+		gs_voltexture_create(LUT_WIDTH, LUT_WIDTH, LUT_WIDTH, format, 1,
+				     (const uint8_t **)&buffer, 0);
+	bfree(buffer);
+
+	return texture;
+}
+
 static void color_grade_filter_update(void *data, obs_data_t *settings)
 {
 	struct lut_filter_data *filter = data;
@@ -49,10 +101,17 @@ static void color_grade_filter_update(void *data, obs_data_t *settings)
 
 	obs_enter_graphics();
 
-	gs_image_file_init_texture(&filter->image);
+	gs_voltexture_destroy(filter->target);
+	if (filter->image.loaded) {
+		filter->target = make_clut_texture(filter->image.format,
+						   filter->image.cx,
+						   filter->image.cy,
+						   filter->image.texture_data);
+	}
 
-	filter->target = filter->image.texture;
 	filter->clut_amount = (float)clut_amount;
+	filter->clut_scale = (float)(LUT_WIDTH - 1) / (float)LUT_WIDTH;
+	filter->clut_offset = 0.5f / (float)LUT_WIDTH;
 
 	char *effect_path = obs_module_file("color_grade_filter.effect");
 	gs_effect_destroy(filter->effect);
@@ -121,6 +180,7 @@ static void color_grade_filter_destroy(void *data)
 
 	obs_enter_graphics();
 	gs_effect_destroy(filter->effect);
+	gs_voltexture_destroy(filter->target);
 	gs_image_file_free(&filter->image);
 	obs_leave_graphics();
 
@@ -149,6 +209,12 @@ static void color_grade_filter_render(void *data, gs_effect_t *effect)
 	param = gs_effect_get_param_by_name(filter->effect, "clut_amount");
 	gs_effect_set_float(param, filter->clut_amount);
 
+	param = gs_effect_get_param_by_name(filter->effect, "clut_scale");
+	gs_effect_set_float(param, filter->clut_scale);
+
+	param = gs_effect_get_param_by_name(filter->effect, "clut_offset");
+	gs_effect_set_float(param, filter->clut_offset);
+
 	obs_source_process_filter_end(filter->context, filter->effect, 0, 0);
 
 	UNUSED_PARAMETER(effect);

+ 7 - 22
plugins/obs-filters/data/color_grade_filter.effect

@@ -1,13 +1,16 @@
 uniform float4x4 ViewProj;
 uniform texture2d image;
 
-uniform texture2d clut;
+uniform texture3d clut;
 uniform float clut_amount;
+uniform float clut_scale;
+uniform float clut_offset;
 
 sampler_state textureSampler {
 	Filter    = Linear;
 	AddressU  = Clamp;
 	AddressV  = Clamp;
+	AddressW  = Clamp;
 };
 
 struct VertDataIn {
@@ -31,29 +34,11 @@ VertDataOut VSDefault(VertDataIn v_in)
 float4 LUT(VertDataOut v_in) : TARGET
 {
 	float4 textureColor = image.Sample(textureSampler, v_in.uv);
-	float blueColor = textureColor.b * 63.0;
 
-	float2 quad1;
-	quad1.y = floor(floor(blueColor) / 8.0);
-	quad1.x = floor(blueColor) - (quad1.y * 8.0);
+	float3 clut_uvw = textureColor.rgb * clut_scale + clut_offset;
+	float3 luttedColor = clut.Sample(textureSampler, clut_uvw).rgb;
 
-	float2 quad2;
-	quad2.y = floor(ceil(blueColor) / 8.0);
-	quad2.x = ceil(blueColor) - (quad2.y * 8.0);
-
-	float2 texPos1;
-	texPos1.x = (quad1.x * 0.125) + 0.5/512.0 + ((0.125 - 1.0/512.0) * textureColor.r);
-	texPos1.y = (quad1.y * 0.125) + 0.5/512.0 + ((0.125 - 1.0/512.0) * textureColor.g);
-
-	float2 texPos2;
-	texPos2.x = (quad2.x * 0.125) + 0.5/512.0 + ((0.125 - 1.0/512.0) * textureColor.r);
-	texPos2.y = (quad2.y * 0.125) + 0.5/512.0 + ((0.125 - 1.0/512.0) * textureColor.g);
-
-	float4 newColor1 = clut.Sample(textureSampler, texPos1);
-	float4 newColor2 = clut.Sample(textureSampler, texPos2);
-	float4 luttedColor = lerp(newColor1, newColor2, frac(blueColor));
-
-	float4 final_color = lerp(textureColor, luttedColor, clut_amount);
+	float3 final_color = lerp(textureColor.rgb, luttedColor, clut_amount);
 	return float4(final_color.rgb, textureColor.a);
 }