Browse Source

libobs: obs-filters: Area upscale shader

Add a separate shader for area upscaling to take advantage of bilinear
filtering. Iterating over texels is unnecessary in the upscale case
because a target pixel can only overlap 1 or 2 texels in X and Y
directions. When only overlapping one texel, adjust UVs to sample texel
center to avoid filtering.

Also add "base_dimension" uniform to avoid unnecessary division.

Intel HD Graphics 530, 644x478 -> 1323x1080: ~836 us -> ~232 us
jpark37 6 years ago
parent
commit
85cc7c84bc
4 changed files with 138 additions and 44 deletions
  1. 98 38
      libobs/data/area.effect
  2. 16 3
      libobs/obs-scene.c
  3. 6 1
      libobs/obs-video.c
  4. 18 2
      plugins/obs-filters/scale-filter.c

+ 98 - 38
libobs/data/area.effect

@@ -1,13 +1,29 @@
 uniform float4x4 ViewProj;
+uniform float2 base_dimension;
 uniform float2 base_dimension_i;
 uniform texture2d image;
 
-struct VertInOut {
+sampler_state textureSampler {
+	Filter    = Linear;
+	AddressU  = Clamp;
+	AddressV  = Clamp;
+};
+
+struct VertData {
 	float4 pos : POSITION;
 	float2 uv  : TEXCOORD0;
 };
 
-VertInOut VSDefault(VertInOut vert_in)
+struct VertInOut {
+	float2 uv  : TEXCOORD0;
+	float4 pos : POSITION;
+};
+
+struct FragData {
+	float2 uv : TEXCOORD0;
+};
+
+VertInOut VSDefault(VertData vert_in)
 {
 	VertInOut vert_out;
 	vert_out.pos = mul(float4(vert_in.pos.xyz, 1.0), ViewProj);
@@ -15,50 +31,85 @@ VertInOut VSDefault(VertInOut vert_in)
 	return vert_out;
 }
 
-float4 PSDrawAreaRGBA(VertInOut vert_in) : TARGET
+float4 PSDrawAreaRGBA(FragData frag_in) : TARGET
 {
-	float4 totalcolor = float4(0.0, 0.0, 0.0, 0.0);
-
-	float2 uv = vert_in.uv;
-	float2 uvdelta = float2(ddx(uv.x), ddy(uv.y));
+	float2 uv = frag_in.uv;
+	float2 uv_delta = float2(ddx(uv.x), ddy(uv.y));
 
 	// Handle potential OpenGL flip.
-	uvdelta.y = abs(uvdelta.y);
+	if (obs_glsl_compile)
+		uv_delta.y = abs(uv_delta.y);
 
-	float2 uvhalfdelta = 0.5 * uvdelta;
-	float2 uvmin = uv - uvhalfdelta;
-	float2 uvmax = uv + uvhalfdelta;
+	float2 uv_min = uv - 0.5 * uv_delta;
+	float2 uv_max = uv_min + uv_delta;
 
-	float2 imagesize = 1.0 / base_dimension_i;
-	float2 loadindexmin = floor(uvmin * imagesize);
-	float2 loadindexmax = floor(uvmax * imagesize);
+	float2 load_index_begin = floor(uv_min * base_dimension);
+	float2 load_index_end = ceil(uv_max * base_dimension);
 
-	float2 targetsize = 1.0 / uvdelta;
-	float2 targetpos = uv * targetsize;
-	float2 targetposmin = targetpos - 0.5;
-	float2 targetposmax = targetpos + 0.5;
-	float2 scale = base_dimension_i * targetsize;
+	float2 target_dimension = 1.0 / uv_delta;
+	float2 target_pos = uv * target_dimension;
+	float2 target_pos_min = target_pos - 0.5;
+	float2 target_pos_max = target_pos + 0.5;
+	float2 scale = base_dimension_i * target_dimension;
 
-	float loadindexy = loadindexmin.y;
+	float4 total_color = float4(0.0, 0.0, 0.0, 0.0);
+
+	float load_index_y = load_index_begin.y;
 	do {
-		float loadindexx = loadindexmin.x;
+		float source_y_min = load_index_y * scale.y;
+		float source_y_max = source_y_min + scale.y;
+		float y_min = max(source_y_min, target_pos_min.y);
+		float y_max = min(source_y_max, target_pos_max.y);
+		float height = y_max - y_min;
+
+		float load_index_x = load_index_begin.x;
 		do {
-			float2 loadindex = float2(loadindexx, loadindexy);
-			float2 potentialtargetmin = loadindex * scale;
-			float2 potentialtargetmax = potentialtargetmin + scale;
-			float2 targetmin = max(potentialtargetmin, targetposmin);
-			float2 targetmax = min(potentialtargetmax, targetposmax);
-			float area = (targetmax.x - targetmin.x) * (targetmax.y - targetmin.y);
-			float4 sample = image.Load(int3(loadindex, 0));
-			totalcolor += area * sample;
-
-			++loadindexx;
-		} while (loadindexx <= loadindexmax.x);
-
-		++loadindexy;
-	} while (loadindexy <= loadindexmax.y);
-
-	return totalcolor;
+			float source_x_min = load_index_x * scale.x;
+			float source_x_max = source_x_min + scale.x;
+			float x_min = max(source_x_min, target_pos_min.x);
+			float x_max = min(source_x_max, target_pos_max.x);
+			float width = x_max - x_min;
+			float area = width * height;
+
+			float4 color = image.Load(int3(load_index_x, load_index_y, 0));
+			total_color += area * color;
+
+			++load_index_x;
+		} while (load_index_x < load_index_end.x);
+
+		++load_index_y;
+	} while (load_index_y < load_index_end.y);
+
+	return total_color;
+}
+
+float4 PSDrawAreaUpscaleRGBA(FragData frag_in) : TARGET
+{
+	float2 uv = frag_in.uv;
+	float2 uv_delta = float2(ddx(uv.x), ddy(uv.y));
+
+	// Handle potential OpenGL flip.
+	if (obs_glsl_compile)
+		uv_delta.y = abs(uv_delta.y);
+
+	float2 uv_min = uv - 0.5 * uv_delta;
+	float2 uv_max = uv_min + uv_delta;
+
+	float2 load_index_first = floor(uv_min * base_dimension);
+	float2 load_index_last = ceil(uv_max * base_dimension) - 1.0;
+
+	if (load_index_first.x < load_index_last.x) {
+		float uv_boundary_x = load_index_last.x * base_dimension_i.x;
+		uv.x = ((uv.x - uv_boundary_x) / uv_delta.x) * base_dimension_i.x + uv_boundary_x;
+	} else
+		uv.x = (load_index_first.x + 0.5) * base_dimension_i.x;
+	if (load_index_first.y < load_index_last.y) {
+		float uv_boundary_y = load_index_last.y * base_dimension_i.y;
+		uv.y = ((uv.y - uv_boundary_y) / uv_delta.y) * base_dimension_i.y + uv_boundary_y;
+	} else
+		uv.y = (load_index_first.y + 0.5) * base_dimension_i.y;
+
+	return image.Sample(textureSampler, uv);
 }
 
 technique Draw
@@ -66,6 +117,15 @@ technique Draw
 	pass
 	{
 		vertex_shader = VSDefault(vert_in);
-		pixel_shader  = PSDrawAreaRGBA(vert_in);
+		pixel_shader  = PSDrawAreaRGBA(frag_in);
+	}
+}
+
+technique DrawUpscale
+{
+	pass
+	{
+		vertex_shader = VSDefault(vert_in);
+		pixel_shader  = PSDrawAreaUpscaleRGBA(frag_in);
 	}
 }

+ 16 - 3
libobs/obs-scene.c

@@ -470,6 +470,7 @@ static void render_item_texture(struct obs_scene_item *item)
 	enum obs_scale_type type = item->scale_filter;
 	uint32_t cx = gs_texture_get_width(tex);
 	uint32_t cy = gs_texture_get_height(tex);
+	const char *tech = "Draw";
 
 	if (type != OBS_SCALE_DISABLE) {
 		if (type == OBS_SCALE_POINT) {
@@ -481,6 +482,7 @@ static void render_item_texture(struct obs_scene_item *item)
 		} else if (!close_float(item->output_scale.x, 1.0f, EPSILON) ||
 			   !close_float(item->output_scale.y, 1.0f, EPSILON)) {
 			gs_eparam_t *scale_param;
+			gs_eparam_t *scale_i_param;
 
 			if (item->output_scale.x < 0.5f ||
 			    item->output_scale.y < 0.5f) {
@@ -491,15 +493,26 @@ static void render_item_texture(struct obs_scene_item *item)
 				effect = obs->video.lanczos_effect;
 			} else if (type == OBS_SCALE_AREA) {
 				effect = obs->video.area_effect;
+				if ((item->output_scale.x >= 1.0f) &&
+				    (item->output_scale.y >= 1.0f))
+					tech = "DrawUpscale";
 			}
 
 			scale_param = gs_effect_get_param_by_name(
-				effect, "base_dimension_i");
+				effect, "base_dimension");
 			if (scale_param) {
+				struct vec2 base_res_i = {(float)cx, (float)cy};
+
+				gs_effect_set_vec2(scale_param, &base_res_i);
+			}
+
+			scale_i_param = gs_effect_get_param_by_name(
+				effect, "base_dimension_i");
+			if (scale_i_param) {
 				struct vec2 base_res_i = {1.0f / (float)cx,
 							  1.0f / (float)cy};
 
-				gs_effect_set_vec2(scale_param, &base_res_i);
+				gs_effect_set_vec2(scale_i_param, &base_res_i);
 			}
 		}
 	}
@@ -507,7 +520,7 @@ static void render_item_texture(struct obs_scene_item *item)
 	gs_blend_state_push();
 	gs_blend_function(GS_BLEND_ONE, GS_BLEND_INVSRCALPHA);
 
-	while (gs_effect_loop(effect, "Draw"))
+	while (gs_effect_loop(effect, tech))
 		obs_source_draw(tex, 0, 0, 0, 0, 0);
 
 	gs_blend_state_pop();

+ 6 - 1
libobs/obs-video.c

@@ -208,8 +208,9 @@ static inline void render_output_texture(struct obs_core_video *video)
 	gs_texture_t *target = video->output_texture;
 	uint32_t width = gs_texture_get_width(target);
 	uint32_t height = gs_texture_get_height(target);
-	struct vec2 base_i;
+	struct vec2 base, base_i;
 
+	vec2_set(&base, (float)video->base_width, (float)video->base_height);
 	vec2_set(&base_i, 1.0f / (float)video->base_width,
 		 1.0f / (float)video->base_height);
 
@@ -225,6 +226,8 @@ static inline void render_output_texture(struct obs_core_video *video)
 	gs_eparam_t *image = gs_effect_get_param_by_name(effect, "image");
 	gs_eparam_t *matrix =
 		gs_effect_get_param_by_name(effect, "color_matrix");
+	gs_eparam_t *bres =
+		gs_effect_get_param_by_name(effect, "base_dimension");
 	gs_eparam_t *bres_i =
 		gs_effect_get_param_by_name(effect, "base_dimension_i");
 	size_t passes, i;
@@ -232,6 +235,8 @@ static inline void render_output_texture(struct obs_core_video *video)
 	gs_set_render_target(target, NULL);
 	set_render_size(width, height);
 
+	if (bres)
+		gs_effect_set_vec2(bres, &base);
 	if (bres_i)
 		gs_effect_set_vec2(bres_i, &base_i);
 

+ 18 - 2
plugins/obs-filters/scale-filter.c

@@ -36,7 +36,9 @@ struct scale_filter_data {
 	gs_effect_t *effect;
 	gs_eparam_t *image_param;
 	gs_eparam_t *dimension_param;
+	gs_eparam_t *dimension_i_param;
 	gs_eparam_t *undistort_factor_param;
+	struct vec2 dimension;
 	struct vec2 dimension_i;
 	double undistort_factor;
 	int cx_in;
@@ -49,6 +51,7 @@ struct scale_filter_data {
 	bool target_valid;
 	bool valid;
 	bool undistort;
+	bool upscale;
 	bool base_canvas_resolution;
 };
 
@@ -203,6 +206,7 @@ static void scale_filter_tick(void *data, float seconds)
 		filter->cy_out = filter->cy_in;
 	}
 
+	vec2_set(&filter->dimension, (float)cx, (float)cy);
 	vec2_set(&filter->dimension_i, 1.0f / (float)cx, 1.0f / (float)cy);
 
 	if (filter->undistort) {
@@ -211,6 +215,8 @@ static void scale_filter_tick(void *data, float seconds)
 		filter->undistort_factor = 1.0;
 	}
 
+	filter->upscale = false;
+
 	/* ------------------------- */
 
 	lower_than_2x = filter->cx_out < cx / 2 || filter->cy_out < cy / 2;
@@ -232,6 +238,8 @@ static void scale_filter_tick(void *data, float seconds)
 			break;
 		case OBS_SCALE_AREA:
 			type = OBS_EFFECT_AREA;
+			if ((filter->cx_out >= cx) && (filter->cy_out >= cy))
+				filter->upscale = true;
 			break;
 		}
 	}
@@ -242,9 +250,12 @@ static void scale_filter_tick(void *data, float seconds)
 
 	if (type != OBS_EFFECT_DEFAULT) {
 		filter->dimension_param = gs_effect_get_param_by_name(
+			filter->effect, "base_dimension");
+		filter->dimension_i_param = gs_effect_get_param_by_name(
 			filter->effect, "base_dimension_i");
 	} else {
 		filter->dimension_param = NULL;
+		filter->dimension_i_param = NULL;
 	}
 
 	if (type == OBS_EFFECT_BICUBIC || type == OBS_EFFECT_LANCZOS) {
@@ -260,7 +271,9 @@ static void scale_filter_tick(void *data, float seconds)
 static void scale_filter_render(void *data, gs_effect_t *effect)
 {
 	struct scale_filter_data *filter = data;
-	const char *technique = filter->undistort ? "DrawUndistort" : "Draw";
+	const char *technique =
+		filter->undistort ? "DrawUndistort"
+				  : (filter->upscale ? "DrawUpscale" : "Draw");
 
 	if (!filter->valid || !filter->target_valid) {
 		obs_source_skip_video_filter(filter->context);
@@ -272,7 +285,10 @@ static void scale_filter_render(void *data, gs_effect_t *effect)
 		return;
 
 	if (filter->dimension_param)
-		gs_effect_set_vec2(filter->dimension_param,
+		gs_effect_set_vec2(filter->dimension_param, &filter->dimension);
+
+	if (filter->dimension_i_param)
+		gs_effect_set_vec2(filter->dimension_i_param,
 				   &filter->dimension_i);
 
 	if (filter->undistort_factor_param)