Browse Source

Merge pull request #2551 from jpark37/cube-lut-enhance

Implement CUBE LUT domain properly, tetrahedral interpolation for 3D LUTs
Jim 5 years ago
parent
commit
04e6a39de9

+ 3 - 1
libobs-opengl/gl-shaderparser.c

@@ -427,7 +427,9 @@ static bool gl_write_texture_code(struct gl_shader_parser *glsp,
 	else if (cf_token_is(cfp, "Load")) {
 		written = gl_write_texture_call(glsp, var, "texelFetch", false);
 		dstr_cat(&glsp->gl_string, "(");
-		function_end = ").xy, 0)";
+		function_end = (strcmp(var->type, "texture3d") == 0)
+				       ? ").xyz, 0)"
+				       : ").xy, 0)";
 	}
 
 	if (!written)

+ 102 - 61
plugins/obs-filters/color-grade-filter.c

@@ -16,6 +16,11 @@
 
 static const uint32_t LUT_WIDTH = 64;
 
+enum clut_dimension {
+	CLUT_1D,
+	CLUT_3D,
+};
+
 struct lut_filter_data {
 	obs_source_t *context;
 	gs_effect_t *effect;
@@ -28,8 +33,11 @@ struct lut_filter_data {
 
 	char *file;
 	float clut_amount;
-	float clut_scale;
-	float clut_offset;
+	enum clut_dimension clut_dim;
+	struct vec3 clut_scale;
+	struct vec3 clut_offset;
+	struct vec3 domain_min;
+	struct vec3 domain_max;
 };
 
 static const char *color_grade_filter_get_name(void *unused)
@@ -109,6 +117,7 @@ static void *load_1d_lut(FILE *const file, const uint32_t width, float red,
 		4 * width * width * width * sizeof(struct half);
 	struct half *values = bmalloc(data_size);
 
+	size_t offset = 0;
 	bool data_found = true;
 	for (uint32_t index = 0; index < width; ++index) {
 		if (!data_found) {
@@ -117,38 +126,10 @@ static void *load_1d_lut(FILE *const file, const uint32_t width, float red,
 			break;
 		}
 
-		for (uint32_t z = 0; z < width; ++z) {
-			const uint32_t z_offset = z * width * width;
-			for (uint32_t y = 0; y < width; ++y) {
-				const uint32_t y_offset = y * width;
-				const uint32_t offset =
-					4 * (index + y_offset + z_offset);
-				values[offset] = half_from_float(red);
-				values[offset + 3] =
-					half_from_bits(0x3C00); // 1.0
-			}
-		}
-
-		for (uint32_t z = 0; z < width; ++z) {
-			const uint32_t z_offset = z * width * width;
-			for (uint32_t x = 0; x < width; ++x) {
-				const uint32_t offset =
-					4 * (x + (index * width) + z_offset) +
-					1;
-				values[offset] = half_from_float(green);
-			}
-		}
-
-		for (uint32_t y = 0; y < width; ++y) {
-			const uint32_t y_offset = y * width;
-			for (uint32_t x = 0; x < width; ++x) {
-				const uint32_t offset =
-					4 * (x + y_offset +
-					     (index * width * width)) +
-					2;
-				values[offset] = half_from_float(blue);
-			}
-		}
+		values[offset++] = half_from_float(red);
+		values[offset++] = half_from_float(green);
+		values[offset++] = half_from_float(blue);
+		values[offset++] = half_from_bits(0x3c00); // 1.0
 
 		data_found = get_cube_entry(file, &red, &green, &blue);
 	}
@@ -189,14 +170,14 @@ static void *load_3d_lut(FILE *const file, const uint32_t width, float red,
 	return values;
 }
 
-static void *load_cube_file(const char *const path, uint32_t *const width)
+static void *load_cube_file(const char *const path, uint32_t *const width,
+			    struct vec3 *domain_min, struct vec3 *domain_max,
+			    enum clut_dimension *dim)
 {
 	void *data = NULL;
 
 	FILE *const file = os_fopen(path, "rb");
 	if (file) {
-		float min_value[] = {0.0f, 0.0f, 0.0f};
-		float max_value[] = {1.0f, 1.0f, 1.0f};
 		float red, green, blue;
 		unsigned width_1d = 0;
 		unsigned width_3d = 0;
@@ -214,14 +195,10 @@ static void *load_cube_file(const char *const path, uint32_t *const width)
 				break;
 			} else if (sscanf(line, "DOMAIN_MIN %f %f %f", &f[0],
 					  &f[1], &f[2]) == 3) {
-				min_value[0] = f[0];
-				min_value[1] = f[1];
-				min_value[2] = f[2];
+				vec3_set(domain_min, f[0], f[1], f[2]);
 			} else if (sscanf(line, "DOMAIN_MAX %f %f %f", &f[0],
 					  &f[1], &f[2]) == 3) {
-				max_value[0] = f[0];
-				max_value[1] = f[1];
-				max_value[2] = f[2];
+				vec3_set(domain_max, f[0], f[1], f[2]);
 			} else if (sscanf(line, "LUT_1D_SIZE %u", &u) == 1) {
 				width_1d = u;
 			} else if (sscanf(line, "LUT_3D_SIZE %u", &u) == 1) {
@@ -229,17 +206,28 @@ static void *load_cube_file(const char *const path, uint32_t *const width)
 			}
 		}
 
-		if (data_found) {
+		if (domain_min->x >= domain_max->x ||
+		    domain_min->y >= domain_max->y ||
+		    domain_min->z >= domain_max->z) {
+			blog(LOG_WARNING,
+			     "Invalid CUBE LUT domain: [%f, %f], [%f, %f], [%f, %f]",
+			     domain_min->x, domain_max->x, domain_min->y,
+			     domain_max->y, domain_min->z, domain_max->z);
+		} else if (data_found) {
 			if (width_1d > 0) {
 				data = load_1d_lut(file, width_1d, red, green,
 						   blue);
-				if (data)
+				if (data) {
 					*width = width_1d;
+					*dim = CLUT_1D;
+				}
 			} else if (width_3d > 0) {
 				data = load_3d_lut(file, width_3d, red, green,
 						   blue);
-				if (data)
+				if (data) {
 					*width = width_3d;
+					*dim = CLUT_3D;
+				}
 			}
 		}
 
@@ -276,12 +264,18 @@ static void color_grade_filter_update(void *data, obs_data_t *settings)
 	obs_leave_graphics();
 
 	if (path) {
+		vec3_set(&filter->domain_min, 0.0f, 0.0f, 0.0f);
+		vec3_set(&filter->domain_max, 1.0f, 1.0f, 1.0f);
+
 		const char *const ext = os_get_path_extension(path);
 		if (ext && astrcmpi(ext, ".cube") == 0) {
-			filter->cube_data =
-				load_cube_file(path, &filter->cube_width);
+			filter->cube_data = load_cube_file(
+				path, &filter->cube_width, &filter->domain_min,
+				&filter->domain_max, &filter->clut_dim);
 		} else {
 			gs_image_file_init(&filter->image, path);
+			filter->cube_width = LUT_WIDTH;
+			filter->clut_dim = CLUT_3D;
 		}
 	}
 
@@ -292,16 +286,48 @@ static void color_grade_filter_update(void *data, obs_data_t *settings)
 			filter->target = make_clut_texture_png(
 				filter->image.format, filter->image.cx,
 				filter->image.cy, filter->image.texture_data);
-			filter->clut_scale =
-				(float)(LUT_WIDTH - 1) / (float)LUT_WIDTH;
-			filter->clut_offset = 0.5f / (float)LUT_WIDTH;
+			const float clut_scale = (float)(LUT_WIDTH - 1);
+			vec3_set(&filter->clut_scale, clut_scale, clut_scale,
+				 clut_scale);
+			vec3_set(&filter->clut_offset, 0.f, 0.f, 0.f);
 		} else if (filter->cube_data) {
 			const uint32_t width = filter->cube_width;
-			filter->target = gs_voltexture_create(
-				width, width, width, GS_RGBA16F, 1,
-				(uint8_t **)&filter->cube_data, 0);
-			filter->clut_scale = (float)(width - 1) / (float)width;
-			filter->clut_offset = 0.5f / (float)width;
+			if (filter->clut_dim == CLUT_1D) {
+				filter->target = gs_texture_create(
+					width, 1, GS_RGBA16F, 1,
+					(const uint8_t **)&filter->cube_data,
+					0);
+			} else {
+				filter->target = gs_voltexture_create(
+					width, width, width, GS_RGBA16F, 1,
+					(const uint8_t **)&filter->cube_data,
+					0);
+			}
+
+			struct vec3 domain_scale;
+			vec3_sub(&domain_scale, &filter->domain_max,
+				 &filter->domain_min);
+
+			const float width_minus_one = (float)(width - 1);
+			vec3_set(&filter->clut_scale, width_minus_one,
+				 width_minus_one, width_minus_one);
+			vec3_div(&filter->clut_scale, &filter->clut_scale,
+				 &domain_scale);
+
+			vec3_neg(&filter->clut_offset, &filter->domain_min);
+			vec3_mul(&filter->clut_offset, &filter->clut_offset,
+				 &filter->clut_scale);
+
+			/* 1D shader wants normalized UVW */
+			if (filter->clut_dim == CLUT_1D) {
+				vec3_divf(&filter->clut_scale,
+					  &filter->clut_scale, (float)width);
+
+				vec3_addf(&filter->clut_offset,
+					  &filter->clut_offset, 0.5f);
+				vec3_divf(&filter->clut_offset,
+					  &filter->clut_offset, (float)width);
+			}
 		}
 	}
 
@@ -398,21 +424,36 @@ static void color_grade_filter_render(void *data, gs_effect_t *effect)
 					     OBS_ALLOW_DIRECT_RENDERING))
 		return;
 
-	param = gs_effect_get_param_by_name(filter->effect, "clut");
+	const char *clut_texture_name = "clut_3d";
+	const char *tech_name = "Draw3D";
+	if (filter->clut_dim == CLUT_1D) {
+		clut_texture_name = "clut_1d";
+		tech_name = "Draw1D";
+	}
+
+	param = gs_effect_get_param_by_name(filter->effect, clut_texture_name);
 	gs_effect_set_texture(param, filter->target);
 
 	param = gs_effect_get_param_by_name(filter->effect, "clut_amount");
 	gs_effect_set_float(param, filter->clut_amount);
 
 	param = gs_effect_get_param_by_name(filter->effect, "clut_scale");
-	gs_effect_set_float(param, filter->clut_scale);
+	gs_effect_set_vec3(param, &filter->clut_scale);
 
 	param = gs_effect_get_param_by_name(filter->effect, "clut_offset");
-	gs_effect_set_float(param, filter->clut_offset);
+	gs_effect_set_vec3(param, &filter->clut_offset);
+
+	param = gs_effect_get_param_by_name(filter->effect, "domain_min");
+	gs_effect_set_vec3(param, &filter->domain_min);
+
+	param = gs_effect_get_param_by_name(filter->effect, "domain_max");
+	gs_effect_set_vec3(param, &filter->domain_max);
 
-	obs_source_process_filter_end(filter->context, filter->effect, 0, 0);
+	param = gs_effect_get_param_by_name(filter->effect, "cube_width_i");
+	gs_effect_set_float(param, 1.0f / filter->cube_width);
 
-	UNUSED_PARAMETER(effect);
+	obs_source_process_filter_tech_end(filter->context, filter->effect, 0,
+					   0, tech_name);
 }
 
 struct obs_source_info color_grade_filter = {

+ 125 - 10
plugins/obs-filters/data/color_grade_filter.effect

@@ -1,10 +1,14 @@
 uniform float4x4 ViewProj;
 uniform texture2d image;
 
-uniform texture3d clut;
+uniform texture2d clut_1d;
+uniform texture3d clut_3d;
 uniform float clut_amount;
-uniform float clut_scale;
-uniform float clut_offset;
+uniform float3 clut_scale;
+uniform float3 clut_offset;
+uniform float3 domain_min;
+uniform float3 domain_max;
+uniform float cube_width_i;
 
 sampler_state textureSampler {
 	Filter    = Linear;
@@ -31,22 +35,133 @@ VertDataOut VSDefault(VertDataIn v_in)
 	return vert_out;
 }
 
-float4 LUT(VertDataOut v_in) : TARGET
+float4 LUT1D(VertDataOut v_in) : TARGET
 {
 	float4 textureColor = image.Sample(textureSampler, v_in.uv);
 
-	float3 clut_uvw = textureColor.rgb * clut_scale + clut_offset;
-	float3 luttedColor = clut.Sample(textureSampler, clut_uvw).rgb;
+	if (textureColor.r >= domain_min.r && textureColor.r <= domain_max.r) {
+		float u = textureColor.r * clut_scale.r + clut_offset.r;
+		float channel = clut_1d.Sample(textureSampler, float2(u, 0.5)).r;
+		textureColor.r = lerp(textureColor.r, channel, clut_amount);
+	}
+
+	if (textureColor.g >= domain_min.g && textureColor.g <= domain_max.g) {
+		float u = textureColor.g * clut_scale.g + clut_offset.g;
+		float channel = clut_1d.Sample(textureSampler, float2(u, 0.5)).g;
+		textureColor.g = lerp(textureColor.g, channel, clut_amount);
+	}
+
+	if (textureColor.b >= domain_min.b && textureColor.b <= domain_max.b) {
+		float u = textureColor.b * clut_scale.b + clut_offset.b;
+		float channel = clut_1d.Sample(textureSampler, float2(u, 0.5)).b;
+		textureColor.b = lerp(textureColor.b, channel, clut_amount);
+	}
+
+	return textureColor;
+}
+
+float4 LUT3D(VertDataOut v_in) : TARGET
+{
+	float4 textureColor = image.Sample(textureSampler, v_in.uv);
+	float r = textureColor.r;
+	float g = textureColor.g;
+	float b = textureColor.b;
+	if (r >= domain_min.r && r <= domain_max.r &&
+		g >= domain_min.g && g <= domain_max.g &&
+		b >= domain_min.b && b <= domain_max.b)
+	{
+		float3 clut_pos = textureColor.rgb * clut_scale + clut_offset;
+		float3 floor_pos = floor(clut_pos);
+
+		float3 fracRGB = clut_pos - floor_pos;
+
+		float3 uvw0 = (floor_pos + 0.5) * cube_width_i;
+		float3 uvw3 = (floor_pos + 1.5) * cube_width_i;
 
-	float3 final_color = lerp(textureColor.rgb, luttedColor, clut_amount);
-	return float4(final_color.rgb, textureColor.a);
+		float fracL, fracM, fracS;
+		float3 uvw1, uvw2;
+		if (fracRGB.r < fracRGB.g) {
+			if (fracRGB.r < fracRGB.b) {
+				if (fracRGB.g < fracRGB.b) {
+					// f(R) < f(G) < f(B)
+					fracL = fracRGB.b;
+					fracM = fracRGB.g;
+					fracS = fracRGB.r;
+					uvw1 = float3(uvw0.x, uvw0.y, uvw3.z);
+					uvw2 = float3(uvw0.x, uvw3.y, uvw3.z);
+				} else {
+					// f(R) < f(B) <= f(G)
+					fracL = fracRGB.g;
+					fracM = fracRGB.b;
+					fracS = fracRGB.r;
+					uvw1 = float3(uvw0.x, uvw3.y, uvw0.z);
+					uvw2 = float3(uvw0.x, uvw3.y, uvw3.z);
+				}
+			} else {
+				// f(B) <= f(R) < f(G)
+				fracL = fracRGB.g;
+				fracM = fracRGB.r;
+				fracS = fracRGB.b;
+				uvw1 = float3(uvw0.x, uvw3.y, uvw0.z);
+				uvw2 = float3(uvw3.x, uvw3.y, uvw0.z);
+			}
+		} else if (fracRGB.r < fracRGB.b) {
+			// f(G) <= f(R) < f(B)
+			fracL = fracRGB.b;
+			fracM = fracRGB.r;
+			fracS = fracRGB.g;
+			uvw1 = float3(uvw0.x, uvw0.y, uvw3.z);
+			uvw2 = float3(uvw3.x, uvw0.y, uvw3.z);
+		} else if (fracRGB.g < fracRGB.b) {
+			// f(G) < f(B) <= f(R)
+			fracL = fracRGB.r;
+			fracM = fracRGB.b;
+			fracS = fracRGB.g;
+			uvw1 = float3(uvw3.x, uvw0.y, uvw0.z);
+			uvw2 = float3(uvw3.x, uvw0.y, uvw3.z);
+		} else {
+			// f(B) <= f(G) <= f(R)
+			fracL = fracRGB.r;
+			fracM = fracRGB.g;
+			fracS = fracRGB.b;
+			uvw1 = float3(uvw3.x, uvw0.y, uvw0.z);
+			uvw2 = float3(uvw3.x, uvw3.y, uvw0.z);
+		}
+
+		/* use filtering to collapse 4 taps to 2 */
+		/* use max to kill potential zero-divide NaN */
+
+		float coeff01 = (1.0 - fracM);
+		float weight01 = max((fracL - fracM) / coeff01, 0.0);
+		float3 uvw01 = lerp(uvw0, uvw1, weight01);
+		float3 sample01 = clut_3d.Sample(textureSampler, uvw01).rgb;
+
+		float coeff23 = fracM;
+		float weight23 = max(fracS / coeff23, 0.0);
+		float3 uvw23 = lerp(uvw2, uvw3, weight23);
+		float3 sample23 = clut_3d.Sample(textureSampler, uvw23).rgb;
+
+		float3 luttedColor = (coeff01 * sample01) + (coeff23 * sample23);
+		textureColor.rgb = lerp(textureColor.rgb, luttedColor, clut_amount);
+	}
+
+	return textureColor;
+}
+
+technique Draw1D
+{
+	pass
+	{
+		vertex_shader = VSDefault(v_in);
+		pixel_shader  = LUT1D(v_in);
+	}
 }
 
-technique Draw
+technique Draw3D
 {
 	pass
 	{
 		vertex_shader = VSDefault(v_in);
-		pixel_shader  = LUT(v_in);
+		pixel_shader  = LUT3D(v_in);
 	}
 }