浏览代码

obs-filters: 3D LUT tetrahedral interpolation

As requested by Adobe CUBE spec.
jpark37 5 年之前
父节点
当前提交
34d70c6832
共有 2 个文件被更改,包括 110 次插入33 次删除
  1. 31 27
      plugins/obs-filters/color-grade-filter.c
  2. 79 6
      plugins/obs-filters/data/color_grade_filter.effect

+ 31 - 27
plugins/obs-filters/color-grade-filter.c

@@ -274,6 +274,7 @@ static void color_grade_filter_update(void *data, obs_data_t *settings)
 				&filter->domain_max, &filter->clut_dim);
 		} else {
 			gs_image_file_init(&filter->image, path);
+			filter->cube_width = LUT_WIDTH;
 			filter->clut_dim = CLUT_3D;
 		}
 	}
@@ -285,48 +286,48 @@ static void color_grade_filter_update(void *data, obs_data_t *settings)
 			filter->target = make_clut_texture_png(
 				filter->image.format, filter->image.cx,
 				filter->image.cy, filter->image.texture_data);
-			const float clut_scale =
-				(float)(LUT_WIDTH - 1) / (float)LUT_WIDTH;
+			const float clut_scale = (float)(LUT_WIDTH - 1);
 			vec3_set(&filter->clut_scale, clut_scale, clut_scale,
 				 clut_scale);
-			const float clut_offset = 0.5f / (float)LUT_WIDTH;
-			vec3_set(&filter->clut_offset, clut_offset, clut_offset,
-				 clut_offset);
+			vec3_set(&filter->clut_offset, 0.f, 0.f, 0.f);
 		} else if (filter->cube_data) {
 			const uint32_t width = filter->cube_width;
 			if (filter->clut_dim == CLUT_1D) {
 				filter->target = gs_texture_create(
 					width, 1, GS_RGBA16F, 1,
-					(uint8_t **)&filter->cube_data, 0);
+					(const uint8_t **)&filter->cube_data,
+					0);
 			} else {
 				filter->target = gs_voltexture_create(
 					width, width, width, GS_RGBA16F, 1,
-					(uint8_t **)&filter->cube_data, 0);
+					(const uint8_t **)&filter->cube_data,
+					0);
 			}
 
 			struct vec3 domain_scale;
 			vec3_sub(&domain_scale, &filter->domain_max,
 				 &filter->domain_min);
-			const float clut_scale0 =
-				(width - 1) / (width * domain_scale.x);
-			const float clut_scale1 =
-				(width - 1) / (width * domain_scale.y);
-			const float clut_scale2 =
-				(width - 1) / (width * domain_scale.z);
-			vec3_set(&filter->clut_scale, clut_scale0, clut_scale1,
-				 clut_scale2);
-
-			const float clut_offset0 =
-				0.5f / width -
-				filter->domain_min.x * clut_scale0;
-			const float clut_offset1 =
-				0.5f / width -
-				filter->domain_min.y * clut_scale1;
-			const float clut_offset2 =
-				0.5f / width -
-				filter->domain_min.z * clut_scale2;
-			vec3_set(&filter->clut_offset, clut_offset0,
-				 clut_offset1, clut_offset2);
+
+			const float width_minus_one = (float)(width - 1);
+			vec3_set(&filter->clut_scale, width_minus_one,
+				 width_minus_one, width_minus_one);
+			vec3_div(&filter->clut_scale, &filter->clut_scale,
+				 &domain_scale);
+
+			vec3_neg(&filter->clut_offset, &filter->domain_min);
+			vec3_mul(&filter->clut_offset, &filter->clut_offset,
+				 &filter->clut_scale);
+
+			/* 1D shader wants normalized UVW */
+			if (filter->clut_dim == CLUT_1D) {
+				vec3_divf(&filter->clut_scale,
+					  &filter->clut_scale, (float)width);
+
+				vec3_addf(&filter->clut_offset,
+					  &filter->clut_offset, 0.5f);
+				vec3_divf(&filter->clut_offset,
+					  &filter->clut_offset, (float)width);
+			}
 		}
 	}
 
@@ -448,6 +449,9 @@ static void color_grade_filter_render(void *data, gs_effect_t *effect)
 	param = gs_effect_get_param_by_name(filter->effect, "domain_max");
 	gs_effect_set_vec3(param, &filter->domain_max);
 
+	param = gs_effect_get_param_by_name(filter->effect, "cube_width_i");
+	gs_effect_set_float(param, 1.0f / filter->cube_width);
+
 	obs_source_process_filter_tech_end(filter->context, filter->effect, 0,
 					   0, tech_name);
 }

+ 79 - 6
plugins/obs-filters/data/color_grade_filter.effect

@@ -8,6 +8,7 @@ uniform float3 clut_scale;
 uniform float3 clut_offset;
 uniform float3 domain_min;
 uniform float3 domain_max;
+uniform float cube_width_i;
 
 sampler_state textureSampler {
 	Filter    = Linear;
@@ -62,13 +63,85 @@ float4 LUT1D(VertDataOut v_in) : TARGET
 float4 LUT3D(VertDataOut v_in) : TARGET
 {
 	float4 textureColor = image.Sample(textureSampler, v_in.uv);
-
-	if (textureColor.r >= domain_min.r && textureColor.r <= domain_max.r &&
-		textureColor.g >= domain_min.g && textureColor.g <= domain_max.g &&
-		textureColor.b >= domain_min.b && textureColor.b <= domain_max.b)
+	float r = textureColor.r;
+	float g = textureColor.g;
+	float b = textureColor.b;
+	if (r >= domain_min.r && r <= domain_max.r &&
+		g >= domain_min.g && g <= domain_max.g &&
+		b >= domain_min.b && b <= domain_max.b)
 	{
-		float3 clut_uvw = textureColor.rgb * clut_scale + clut_offset;
-		float3 luttedColor = clut_3d.Sample(textureSampler, clut_uvw).rgb;
+		float3 clut_pos = textureColor.rgb * clut_scale + clut_offset;
+		float3 floor_pos = floor(clut_pos);
+
+		float3 fracRGB = clut_pos - floor_pos;
+
+		float3 uvw0 = (floor_pos + 0.5) * cube_width_i;
+		float3 uvw3 = (floor_pos + 1.5) * cube_width_i;
+
+		float fracL, fracM, fracS;
+		float3 uvw1, uvw2;
+		if (fracRGB.r < fracRGB.g) {
+			if (fracRGB.r < fracRGB.b) {
+				if (fracRGB.g < fracRGB.b) {
+					// f(R) < f(G) < f(B)
+					fracL = fracRGB.b;
+					fracM = fracRGB.g;
+					fracS = fracRGB.r;
+					uvw1 = float3(uvw0.x, uvw0.y, uvw3.z);
+					uvw2 = float3(uvw0.x, uvw3.y, uvw3.z);
+				} else {
+					// f(R) < f(B) <= f(G)
+					fracL = fracRGB.g;
+					fracM = fracRGB.b;
+					fracS = fracRGB.r;
+					uvw1 = float3(uvw0.x, uvw3.y, uvw0.z);
+					uvw2 = float3(uvw0.x, uvw3.y, uvw3.z);
+				}
+			} else {
+				// f(B) <= f(R) < f(G)
+				fracL = fracRGB.g;
+				fracM = fracRGB.r;
+				fracS = fracRGB.b;
+				uvw1 = float3(uvw0.x, uvw3.y, uvw0.z);
+				uvw2 = float3(uvw3.x, uvw3.y, uvw0.z);
+			}
+		} else if (fracRGB.r < fracRGB.b) {
+			// f(G) <= f(R) < f(B)
+			fracL = fracRGB.b;
+			fracM = fracRGB.r;
+			fracS = fracRGB.g;
+			uvw1 = float3(uvw0.x, uvw0.y, uvw3.z);
+			uvw2 = float3(uvw3.x, uvw0.y, uvw3.z);
+		} else if (fracRGB.g < fracRGB.b) {
+			// f(G) < f(B) <= f(R)
+			fracL = fracRGB.r;
+			fracM = fracRGB.b;
+			fracS = fracRGB.g;
+			uvw1 = float3(uvw3.x, uvw0.y, uvw0.z);
+			uvw2 = float3(uvw3.x, uvw0.y, uvw3.z);
+		} else {
+			// f(B) <= f(G) <= f(R)
+			fracL = fracRGB.r;
+			fracM = fracRGB.g;
+			fracS = fracRGB.b;
+			uvw1 = float3(uvw3.x, uvw0.y, uvw0.z);
+			uvw2 = float3(uvw3.x, uvw3.y, uvw0.z);
+		}
+
+		/* use filtering to collapse 4 taps to 2 */
+		/* use max to kill potential zero-divide NaN */
+
+		float coeff01 = (1.0 - fracM);
+		float weight01 = max((fracL - fracM) / coeff01, 0.0);
+		float3 uvw01 = lerp(uvw0, uvw1, weight01);
+		float3 sample01 = clut_3d.Sample(textureSampler, uvw01).rgb;
+
+		float coeff23 = fracM;
+		float weight23 = max(fracS / coeff23, 0.0);
+		float3 uvw23 = lerp(uvw2, uvw3, weight23);
+		float3 sample23 = clut_3d.Sample(textureSampler, uvw23).rgb;
+
+		float3 luttedColor = (coeff01 * sample01) + (coeff23 * sample23);
 		textureColor.rgb = lerp(textureColor.rgb, luttedColor, clut_amount);
 	}