浏览代码

libobs: NV12 textures only for active GPU encoders

Intel GPUs in particular are slow to copy NV12/P010 textures. We can use
ordinary UNORM textures for CPU encoders.
jpark37 3 年之前
父节点
当前提交
c4fb34897e
共有 3 个文件被更改,包括 256 次插入183 次删除
  1. 6 1
      libobs/obs-internal.h
  2. 105 104
      libobs/obs-video.c
  3. 145 78
      libobs/obs.c

+ 6 - 1
libobs/obs-internal.h

@@ -246,10 +246,15 @@ struct obs_task_info {
 
 struct obs_core_video {
 	graphics_t *graphics;
+	gs_stagesurf_t *active_copy_surfaces[NUM_TEXTURES][NUM_CHANNELS];
 	gs_stagesurf_t *copy_surfaces[NUM_TEXTURES][NUM_CHANNELS];
+	gs_texture_t *convert_textures[NUM_CHANNELS];
+#ifdef _WIN32
+	gs_stagesurf_t *copy_surfaces_encode[NUM_TEXTURES];
+	gs_texture_t *convert_textures_encode[NUM_CHANNELS];
+#endif
 	gs_texture_t *render_texture;
 	gs_texture_t *output_texture;
-	gs_texture_t *convert_textures[NUM_CHANNELS];
 	bool texture_rendered;
 	bool textures_copied[NUM_TEXTURES];
 	bool texture_converted;

+ 105 - 104
libobs/obs-video.c

@@ -298,6 +298,7 @@ static void render_convert_plane(gs_effect_t *effect, gs_texture_t *target,
 
 static const char *render_convert_texture_name = "render_convert_texture";
 static void render_convert_texture(struct obs_core_video *video,
+				   gs_texture_t *const *const convert_textures,
 				   gs_texture_t *texture)
 {
 	profile_start(render_convert_texture_name);
@@ -322,28 +323,28 @@ static void render_convert_texture(struct obs_core_video *video,
 
 	gs_enable_blending(false);
 
-	if (video->convert_textures[0]) {
+	if (convert_textures[0]) {
 		gs_effect_set_texture(image, texture);
 		gs_effect_set_vec4(color_vec0, &vec0);
-		render_convert_plane(effect, video->convert_textures[0],
+		render_convert_plane(effect, convert_textures[0],
 				     video->conversion_techs[0]);
 
-		if (video->convert_textures[1]) {
+		if (convert_textures[1]) {
 			gs_effect_set_texture(image, texture);
 			gs_effect_set_vec4(color_vec1, &vec1);
-			if (!video->convert_textures[2])
+			if (!convert_textures[2])
 				gs_effect_set_vec4(color_vec2, &vec2);
 			gs_effect_set_float(width_i, video->conversion_width_i);
-			render_convert_plane(effect, video->convert_textures[1],
+			render_convert_plane(effect, convert_textures[1],
 					     video->conversion_techs[1]);
 
-			if (video->convert_textures[2]) {
+			if (convert_textures[2]) {
 				gs_effect_set_texture(image, texture);
 				gs_effect_set_vec4(color_vec2, &vec2);
 				gs_effect_set_float(width_i,
 						    video->conversion_width_i);
 				render_convert_plane(
-					effect, video->convert_textures[2],
+					effect, convert_textures[2],
 					video->conversion_techs[2]);
 			}
 		}
@@ -357,26 +358,32 @@ static void render_convert_texture(struct obs_core_video *video,
 }
 
 static const char *stage_output_texture_name = "stage_output_texture";
-static inline void stage_output_texture(struct obs_core_video *video,
-					int cur_texture)
+static inline void
+stage_output_texture(struct obs_core_video *video, int cur_texture,
+		     gs_texture_t *const *const convert_textures,
+		     gs_stagesurf_t *const *const copy_surfaces,
+		     size_t channel_count)
 {
 	profile_start(stage_output_texture_name);
 
 	unmap_last_surface(video);
 
 	if (!video->gpu_conversion) {
-		gs_stagesurf_t *copy = video->copy_surfaces[cur_texture][0];
-		if (copy)
+		gs_stagesurf_t *copy = copy_surfaces[0];
+		if (copy) {
 			gs_stage_texture(copy, video->output_texture);
+			video->active_copy_surfaces[cur_texture][0] = copy;
+		}
 
 		video->textures_copied[cur_texture] = true;
 	} else if (video->texture_converted) {
-		for (int i = 0; i < NUM_CHANNELS; i++) {
-			gs_stagesurf_t *copy =
-				video->copy_surfaces[cur_texture][i];
-			if (copy)
-				gs_stage_texture(copy,
-						 video->convert_textures[i]);
+		for (int i = 0; i < channel_count; i++) {
+			gs_stagesurf_t *copy = copy_surfaces[i];
+			if (copy) {
+				gs_stage_texture(copy, convert_textures[i]);
+				video->active_copy_surfaces[cur_texture][i] =
+					copy;
+			}
 		}
 
 		video->textures_copied[cur_texture] = true;
@@ -421,13 +428,13 @@ static inline bool queue_frame(struct obs_core_video *video, bool raw_active,
 	 * reason.  otherwise, it goes to the 'duplicate' case above, which
 	 * will ensure better performance. */
 	if (raw_active || vframe_info->count > 1) {
-		gs_copy_texture(tf.tex, video->convert_textures[0]);
+		gs_copy_texture(tf.tex, video->convert_textures_encode[0]);
 	} else {
-		gs_texture_t *tex = video->convert_textures[0];
-		gs_texture_t *tex_uv = video->convert_textures[1];
+		gs_texture_t *tex = video->convert_textures_encode[0];
+		gs_texture_t *tex_uv = video->convert_textures_encode[1];
 
-		video->convert_textures[0] = tf.tex;
-		video->convert_textures[1] = tf.tex_uv;
+		video->convert_textures_encode[0] = tf.tex;
+		video->convert_textures_encode[1] = tf.tex_uv;
 
 		tf.tex = tex;
 		tf.tex_uv = tex_uv;
@@ -489,15 +496,24 @@ static inline void render_video(struct obs_core_video *video, bool raw_active,
 	render_main_texture(video);
 
 	if (raw_active || gpu_active) {
+		gs_texture_t *const *convert_textures = video->convert_textures;
+		gs_stagesurf_t *const *copy_surfaces =
+			video->copy_surfaces[cur_texture];
+		size_t channel_count = NUM_CHANNELS;
 		gs_texture_t *texture = render_output_texture(video);
 
 #ifdef _WIN32
-		if (gpu_active)
+		if (gpu_active) {
+			convert_textures = video->convert_textures_encode;
+			copy_surfaces = video->copy_surfaces_encode;
+			channel_count = 1;
 			gs_flush();
+		}
 #endif
 
 		if (video->gpu_conversion)
-			render_convert_texture(video, texture);
+			render_convert_texture(video, convert_textures,
+					       texture);
 
 #ifdef _WIN32
 		if (gpu_active) {
@@ -507,7 +523,9 @@ static inline void render_video(struct obs_core_video *video, bool raw_active,
 #endif
 
 		if (raw_active)
-			stage_output_texture(video, cur_texture);
+			stage_output_texture(video, cur_texture,
+					     convert_textures, copy_surfaces,
+					     channel_count);
 	}
 
 	gs_set_render_target(NULL, NULL);
@@ -524,7 +542,7 @@ static inline bool download_frame(struct obs_core_video *video,
 
 	for (int channel = 0; channel < NUM_CHANNELS; ++channel) {
 		gs_stagesurf_t *surface =
-			video->copy_surfaces[prev_texture][channel];
+			video->active_copy_surfaces[prev_texture][channel];
 		if (surface) {
 			if (!gs_stagesurface_map(surface, &frame->data[channel],
 						 &frame->linesize[channel]))
@@ -561,108 +579,91 @@ static void set_gpu_converted_data(struct obs_core_video *video,
 				   const struct video_data *input,
 				   const struct video_output_info *info)
 {
-	if (video->using_nv12_tex) {
+	switch (info->format) {
+	case VIDEO_FORMAT_I420: {
 		const uint32_t width = info->width;
 		const uint32_t height = info->height;
 
-		const uint8_t *const in_uv = set_gpu_converted_plane(
-			width, height, input->linesize[0], output->linesize[0],
-			input->data[0], output->data[0]);
+		set_gpu_converted_plane(width, height, input->linesize[0],
+					output->linesize[0], input->data[0],
+					output->data[0]);
 
+		const uint32_t width_d2 = width / 2;
 		const uint32_t height_d2 = height / 2;
-		set_gpu_converted_plane(width, height_d2, input->linesize[0],
-					output->linesize[1], in_uv,
-					output->data[1]);
-	} else {
-		switch (info->format) {
-		case VIDEO_FORMAT_I420: {
-			const uint32_t width = info->width;
-			const uint32_t height = info->height;
-
-			set_gpu_converted_plane(width, height,
-						input->linesize[0],
-						output->linesize[0],
-						input->data[0],
-						output->data[0]);
-
-			const uint32_t width_d2 = width / 2;
-			const uint32_t height_d2 = height / 2;
 
-			set_gpu_converted_plane(width_d2, height_d2,
-						input->linesize[1],
-						output->linesize[1],
-						input->data[1],
-						output->data[1]);
-
-			set_gpu_converted_plane(width_d2, height_d2,
-						input->linesize[2],
-						output->linesize[2],
-						input->data[2],
-						output->data[2]);
+		set_gpu_converted_plane(width_d2, height_d2, input->linesize[1],
+					output->linesize[1], input->data[1],
+					output->data[1]);
 
-			break;
-		}
-		case VIDEO_FORMAT_NV12: {
-			const uint32_t width = info->width;
-			const uint32_t height = info->height;
+		set_gpu_converted_plane(width_d2, height_d2, input->linesize[2],
+					output->linesize[2], input->data[2],
+					output->data[2]);
 
+		break;
+	}
+	case VIDEO_FORMAT_NV12: {
+		const uint32_t width = info->width;
+		const uint32_t height = info->height;
+		const uint32_t height_d2 = height / 2;
+		if (input->linesize[1]) {
 			set_gpu_converted_plane(width, height,
 						input->linesize[0],
 						output->linesize[0],
 						input->data[0],
 						output->data[0]);
-
-			const uint32_t height_d2 = height / 2;
 			set_gpu_converted_plane(width, height_d2,
 						input->linesize[1],
 						output->linesize[1],
 						input->data[1],
 						output->data[1]);
-
-			break;
+		} else {
+			const uint8_t *const in_uv = set_gpu_converted_plane(
+				width, height, input->linesize[0],
+				output->linesize[0], input->data[0],
+				output->data[0]);
+			set_gpu_converted_plane(width, height_d2,
+						input->linesize[0],
+						output->linesize[1], in_uv,
+						output->data[1]);
 		}
-		case VIDEO_FORMAT_I444: {
-			const uint32_t width = info->width;
-			const uint32_t height = info->height;
 
-			set_gpu_converted_plane(width, height,
-						input->linesize[0],
-						output->linesize[0],
-						input->data[0],
-						output->data[0]);
+		break;
+	}
+	case VIDEO_FORMAT_I444: {
+		const uint32_t width = info->width;
+		const uint32_t height = info->height;
 
-			set_gpu_converted_plane(width, height,
-						input->linesize[1],
-						output->linesize[1],
-						input->data[1],
-						output->data[1]);
+		set_gpu_converted_plane(width, height, input->linesize[0],
+					output->linesize[0], input->data[0],
+					output->data[0]);
 
-			set_gpu_converted_plane(width, height,
-						input->linesize[2],
-						output->linesize[2],
-						input->data[2],
-						output->data[2]);
+		set_gpu_converted_plane(width, height, input->linesize[1],
+					output->linesize[1], input->data[1],
+					output->data[1]);
 
-			break;
-		}
+		set_gpu_converted_plane(width, height, input->linesize[2],
+					output->linesize[2], input->data[2],
+					output->data[2]);
 
-		case VIDEO_FORMAT_NONE:
-		case VIDEO_FORMAT_YVYU:
-		case VIDEO_FORMAT_YUY2:
-		case VIDEO_FORMAT_UYVY:
-		case VIDEO_FORMAT_RGBA:
-		case VIDEO_FORMAT_BGRA:
-		case VIDEO_FORMAT_BGRX:
-		case VIDEO_FORMAT_Y800:
-		case VIDEO_FORMAT_BGR3:
-		case VIDEO_FORMAT_I422:
-		case VIDEO_FORMAT_I40A:
-		case VIDEO_FORMAT_I42A:
-		case VIDEO_FORMAT_YUVA:
-		case VIDEO_FORMAT_AYUV:
-			/* unimplemented */
-			;
-		}
+		break;
+	}
+
+	case VIDEO_FORMAT_NONE:
+	case VIDEO_FORMAT_YVYU:
+	case VIDEO_FORMAT_YUY2:
+	case VIDEO_FORMAT_UYVY:
+	case VIDEO_FORMAT_RGBA:
+	case VIDEO_FORMAT_BGRA:
+	case VIDEO_FORMAT_BGRX:
+	case VIDEO_FORMAT_Y800:
+	case VIDEO_FORMAT_BGR3:
+	case VIDEO_FORMAT_I422:
+	case VIDEO_FORMAT_I40A:
+	case VIDEO_FORMAT_I42A:
+	case VIDEO_FORMAT_YUVA:
+	case VIDEO_FORMAT_AYUV:
+		/* unimplemented */
+		;
 	}
 }
 

+ 145 - 78
libobs/obs.c

@@ -101,74 +101,99 @@ static bool obs_init_gpu_conversion(struct obs_video_info *ovi)
 	else
 		blog(LOG_INFO, "NV12 texture support not available");
 
+	video->convert_textures[0] = NULL;
+	video->convert_textures[1] = NULL;
+	video->convert_textures[2] = NULL;
 #ifdef _WIN32
+	video->convert_textures_encode[0] = NULL;
+	video->convert_textures_encode[1] = NULL;
+	video->convert_textures_encode[2] = NULL;
 	if (video->using_nv12_tex) {
-		gs_texture_create_nv12(&video->convert_textures[0],
-				       &video->convert_textures[1],
-				       ovi->output_width, ovi->output_height,
-				       GS_RENDER_TARGET | GS_SHARED_KM_TEX);
-	} else {
+		if (!gs_texture_create_nv12(
+			    &video->convert_textures_encode[0],
+			    &video->convert_textures_encode[1],
+			    ovi->output_width, ovi->output_height,
+			    GS_RENDER_TARGET | GS_SHARED_KM_TEX)) {
+			return false;
+		}
+	}
 #endif
+
+	bool success = true;
+
+	const struct video_output_info *info =
+		video_output_get_info(video->video);
+	switch (info->format) {
+	case VIDEO_FORMAT_I420:
+		video->convert_textures[0] =
+			gs_texture_create(ovi->output_width, ovi->output_height,
+					  GS_R8, 1, NULL, GS_RENDER_TARGET);
+		video->convert_textures[1] = gs_texture_create(
+			ovi->output_width / 2, ovi->output_height / 2, GS_R8, 1,
+			NULL, GS_RENDER_TARGET);
+		video->convert_textures[2] = gs_texture_create(
+			ovi->output_width / 2, ovi->output_height / 2, GS_R8, 1,
+			NULL, GS_RENDER_TARGET);
+		if (!video->convert_textures[0] ||
+		    !video->convert_textures[1] || !video->convert_textures[2])
+			success = false;
+		break;
+	case VIDEO_FORMAT_NV12:
+		video->convert_textures[0] =
+			gs_texture_create(ovi->output_width, ovi->output_height,
+					  GS_R8, 1, NULL, GS_RENDER_TARGET);
+		video->convert_textures[1] = gs_texture_create(
+			ovi->output_width / 2, ovi->output_height / 2, GS_R8G8,
+			1, NULL, GS_RENDER_TARGET);
+		if (!video->convert_textures[0] || !video->convert_textures[1])
+			success = false;
+		break;
+	case VIDEO_FORMAT_I444:
 		video->convert_textures[0] =
 			gs_texture_create(ovi->output_width, ovi->output_height,
 					  GS_R8, 1, NULL, GS_RENDER_TARGET);
+		video->convert_textures[1] =
+			gs_texture_create(ovi->output_width, ovi->output_height,
+					  GS_R8, 1, NULL, GS_RENDER_TARGET);
+		video->convert_textures[2] =
+			gs_texture_create(ovi->output_width, ovi->output_height,
+					  GS_R8, 1, NULL, GS_RENDER_TARGET);
+		if (!video->convert_textures[0] ||
+		    !video->convert_textures[1] || !video->convert_textures[2])
+			success = false;
+	}
 
-		const struct video_output_info *info =
-			video_output_get_info(video->video);
-		switch (info->format) {
-		case VIDEO_FORMAT_I420:
-			video->convert_textures[1] = gs_texture_create(
-				ovi->output_width / 2, ovi->output_height / 2,
-				GS_R8, 1, NULL, GS_RENDER_TARGET);
-			video->convert_textures[2] = gs_texture_create(
-				ovi->output_width / 2, ovi->output_height / 2,
-				GS_R8, 1, NULL, GS_RENDER_TARGET);
-			if (!video->convert_textures[2])
-				return false;
-			break;
-		case VIDEO_FORMAT_NV12:
-			video->convert_textures[1] = gs_texture_create(
-				ovi->output_width / 2, ovi->output_height / 2,
-				GS_R8G8, 1, NULL, GS_RENDER_TARGET);
-			break;
-		case VIDEO_FORMAT_I444:
-			video->convert_textures[1] = gs_texture_create(
-				ovi->output_width, ovi->output_height, GS_R8, 1,
-				NULL, GS_RENDER_TARGET);
-			video->convert_textures[2] = gs_texture_create(
-				ovi->output_width, ovi->output_height, GS_R8, 1,
-				NULL, GS_RENDER_TARGET);
-			if (!video->convert_textures[2])
-				return false;
-			break;
-		default:
-			break;
-		}
+	if (!success) {
+		for (size_t c = 0; c < NUM_CHANNELS; c++) {
+			if (video->convert_textures[c]) {
+				gs_texture_destroy(video->convert_textures[c]);
+				video->convert_textures[c] = NULL;
+			}
 #ifdef _WIN32
-	}
+			if (video->convert_textures_encode[c]) {
+				gs_texture_destroy(
+					video->convert_textures_encode[c]);
+				video->convert_textures_encode[c] = NULL;
+			}
 #endif
+		}
+	}
 
-	if (!video->convert_textures[0])
-		return false;
-	if (!video->convert_textures[1])
-		return false;
-
-	return true;
+	return success;
 }
 
 static bool obs_init_gpu_copy_surfaces(struct obs_video_info *ovi, size_t i)
 {
 	struct obs_core_video *video = &obs->video;
 
-	video->copy_surfaces[i][0] = gs_stagesurface_create(
-		ovi->output_width, ovi->output_height, GS_R8);
-	if (!video->copy_surfaces[i][0])
-		return false;
-
 	const struct video_output_info *info =
 		video_output_get_info(video->video);
 	switch (info->format) {
 	case VIDEO_FORMAT_I420:
+		video->copy_surfaces[i][0] = gs_stagesurface_create(
+			ovi->output_width, ovi->output_height, GS_R8);
+		if (!video->copy_surfaces[i][0])
+			return false;
 		video->copy_surfaces[i][1] = gs_stagesurface_create(
 			ovi->output_width / 2, ovi->output_height / 2, GS_R8);
 		if (!video->copy_surfaces[i][1])
@@ -179,12 +204,20 @@ static bool obs_init_gpu_copy_surfaces(struct obs_video_info *ovi, size_t i)
 			return false;
 		break;
 	case VIDEO_FORMAT_NV12:
+		video->copy_surfaces[i][0] = gs_stagesurface_create(
+			ovi->output_width, ovi->output_height, GS_R8);
+		if (!video->copy_surfaces[i][0])
+			return false;
 		video->copy_surfaces[i][1] = gs_stagesurface_create(
 			ovi->output_width / 2, ovi->output_height / 2, GS_R8G8);
 		if (!video->copy_surfaces[i][1])
 			return false;
 		break;
 	case VIDEO_FORMAT_I444:
+		video->copy_surfaces[i][0] = gs_stagesurface_create(
+			ovi->output_width, ovi->output_height, GS_R8);
+		if (!video->copy_surfaces[i][0])
+			return false;
 		video->copy_surfaces[i][1] = gs_stagesurface_create(
 			ovi->output_width, ovi->output_height, GS_R8);
 		if (!video->copy_surfaces[i][1])
@@ -205,48 +238,78 @@ static bool obs_init_textures(struct obs_video_info *ovi)
 {
 	struct obs_core_video *video = &obs->video;
 
+	bool success = true;
+
 	for (size_t i = 0; i < NUM_TEXTURES; i++) {
 #ifdef _WIN32
 		if (video->using_nv12_tex) {
-			video->copy_surfaces[i][0] =
+			video->copy_surfaces_encode[i] =
 				gs_stagesurface_create_nv12(ovi->output_width,
 							    ovi->output_height);
-			if (!video->copy_surfaces[i][0])
-				return false;
+			if (!video->copy_surfaces_encode[i]) {
+				success = false;
+				break;
+			}
+		}
+#endif
 
+		if (video->gpu_conversion) {
+			if (!obs_init_gpu_copy_surfaces(ovi, i)) {
+				success = false;
+				break;
+			}
 		} else {
-#endif
-			if (video->gpu_conversion) {
-				if (!obs_init_gpu_copy_surfaces(ovi, i))
-					return false;
-			} else {
-				video->copy_surfaces[i][0] =
-					gs_stagesurface_create(
-						ovi->output_width,
-						ovi->output_height, GS_RGBA);
-				if (!video->copy_surfaces[i][0])
-					return false;
+			video->copy_surfaces[i][0] = gs_stagesurface_create(
+				ovi->output_width, ovi->output_height, GS_RGBA);
+			if (!video->copy_surfaces[i][0]) {
+				success = false;
+				break;
 			}
-#ifdef _WIN32
 		}
-#endif
 	}
 
 	video->render_texture = gs_texture_create(ovi->base_width,
 						  ovi->base_height, GS_RGBA, 1,
 						  NULL, GS_RENDER_TARGET);
-
 	if (!video->render_texture)
-		return false;
+		success = false;
 
 	video->output_texture = gs_texture_create(ovi->output_width,
 						  ovi->output_height, GS_RGBA,
 						  1, NULL, GS_RENDER_TARGET);
-
 	if (!video->output_texture)
-		return false;
+		success = false;
 
-	return true;
+	if (!success) {
+		for (size_t i = 0; i < NUM_TEXTURES; i++) {
+			for (size_t c = 0; c < NUM_CHANNELS; c++) {
+				if (video->copy_surfaces[i][c]) {
+					gs_stagesurface_destroy(
+						video->copy_surfaces[i][c]);
+					video->copy_surfaces[i][c] = NULL;
+				}
+			}
+#ifdef _WIN32
+			if (video->copy_surfaces_encode[i]) {
+				gs_stagesurface_destroy(
+					video->copy_surfaces_encode[i]);
+				video->copy_surfaces_encode[i] = NULL;
+			}
+#endif
+		}
+
+		if (video->render_texture) {
+			gs_texture_destroy(video->render_texture);
+			video->render_texture = NULL;
+		}
+
+		if (video->output_texture) {
+			gs_texture_destroy(video->output_texture);
+			video->output_texture = NULL;
+		}
+	}
+
+	return success;
 }
 
 gs_effect_t *obs_load_effect(gs_effect_t **effect, const char *file)
@@ -484,6 +547,13 @@ static void obs_free_video(void)
 					video->copy_surfaces[i][c] = NULL;
 				}
 			}
+#ifdef _WIN32
+			if (video->copy_surfaces_encode[i]) {
+				gs_stagesurface_destroy(
+					video->copy_surfaces_encode[i]);
+				video->copy_surfaces_encode[i] = NULL;
+			}
+#endif
 		}
 
 		gs_texture_destroy(video->render_texture);
@@ -493,16 +563,13 @@ static void obs_free_video(void)
 				gs_texture_destroy(video->convert_textures[c]);
 				video->convert_textures[c] = NULL;
 			}
-		}
-
-		for (size_t i = 0; i < NUM_TEXTURES; i++) {
-			for (size_t c = 0; c < NUM_CHANNELS; c++) {
-				if (video->copy_surfaces[i][c]) {
-					gs_stagesurface_destroy(
-						video->copy_surfaces[i][c]);
-					video->copy_surfaces[i][c] = NULL;
-				}
+#ifdef _WIN32
+			if (video->convert_textures_encode[c]) {
+				gs_texture_destroy(
+					video->convert_textures_encode[c]);
+				video->convert_textures_encode[c] = NULL;
 			}
+#endif
 		}
 
 		gs_texture_destroy(video->output_texture);