Browse Source

libobs: Add encoder packet timing support

Introduce support for the `encoder_packet_time` struct
to capture timing information for each frame, starting
from the composition of each frame, through the encoder,
to the queueing of the frame data to each output_t.

Timestamps for each of the following events are based on
`os_gettime_ns()`:

CTS: Composition time stamp (in the encoder render threads)
FER: Frame encode request
FERC: Frame encoder request completely
PIR: Packet interleave request (`send_interleaved()`)

Frame times are forwarded through encoder callbacks in the
context that runs on the relevant encoder thread, ensuring
no race conditions with accessing per encoder array happen.
All per-output processing happens on data that is owned by
the output.

Co-authored-by: Ruwen Hahn <[email protected]>
Alex Luccisano 1 year ago
parent
commit
6a53b8928f
7 changed files with 268 additions and 47 deletions
  1. 77 24
      libobs/obs-encoder.c
  2. 50 0
      libobs/obs-encoder.h
  3. 16 10
      libobs/obs-internal.h
  4. 13 4
      libobs/obs-output-delay.c
  5. 84 9
      libobs/obs-output.c
  6. 1 0
      libobs/obs-output.h
  7. 27 0
      libobs/obs-video-gpu-encode.c

+ 77 - 24
libobs/obs-encoder.c

@@ -417,6 +417,7 @@ void obs_encoder_destroy(obs_encoder_t *encoder)
 			encoder->info.destroy(encoder->context.data);
 		da_free(encoder->callbacks);
 		da_free(encoder->roi);
+		da_free(encoder->encoder_packet_times);
 		pthread_mutex_destroy(&encoder->init_mutex);
 		pthread_mutex_destroy(&encoder->callbacks_mutex);
 		pthread_mutex_destroy(&encoder->outputs_mutex);
@@ -713,10 +714,9 @@ void obs_encoder_shutdown(obs_encoder_t *encoder)
 	pthread_mutex_unlock(&encoder->init_mutex);
 }
 
-static inline size_t
-get_callback_idx(const struct obs_encoder *encoder,
-		 void (*new_packet)(void *param, struct encoder_packet *packet),
-		 void *param)
+static inline size_t get_callback_idx(const struct obs_encoder *encoder,
+				      encoded_callback_t new_packet,
+				      void *param)
 {
 	for (size_t i = 0; i < encoder->callbacks.num; i++) {
 		struct encoder_callback *cb = encoder->callbacks.array + i;
@@ -738,10 +738,9 @@ void pause_reset(struct pause_data *pause)
 	pthread_mutex_unlock(&pause->mutex);
 }
 
-static inline void obs_encoder_start_internal(
-	obs_encoder_t *encoder,
-	void (*new_packet)(void *param, struct encoder_packet *packet),
-	void *param)
+static inline void obs_encoder_start_internal(obs_encoder_t *encoder,
+					      encoded_callback_t new_packet,
+					      void *param)
 {
 	struct encoder_callback cb = {false, new_packet, param};
 	bool first = false;
@@ -768,9 +767,7 @@ static inline void obs_encoder_start_internal(
 	}
 }
 
-void obs_encoder_start(obs_encoder_t *encoder,
-		       void (*new_packet)(void *param,
-					  struct encoder_packet *packet),
+void obs_encoder_start(obs_encoder_t *encoder, encoded_callback_t new_packet,
 		       void *param)
 {
 	if (!obs_encoder_valid(encoder, "obs_encoder_start"))
@@ -783,9 +780,7 @@ void obs_encoder_start(obs_encoder_t *encoder,
 	pthread_mutex_unlock(&encoder->init_mutex);
 }
 
-void obs_encoder_stop(obs_encoder_t *encoder,
-		      void (*new_packet)(void *param,
-					 struct encoder_packet *packet),
+void obs_encoder_stop(obs_encoder_t *encoder, encoded_callback_t new_packet,
 		      void *param)
 {
 	bool last = false;
@@ -807,6 +802,8 @@ void obs_encoder_stop(obs_encoder_t *encoder,
 
 	pthread_mutex_unlock(&encoder->callbacks_mutex);
 
+	encoder->encoder_packet_times.num = 0;
+
 	if (last) {
 		remove_connection(encoder, true);
 		pthread_mutex_unlock(&encoder->init_mutex);
@@ -1263,7 +1260,8 @@ static inline bool get_sei(const struct obs_encoder *encoder, uint8_t **sei,
 
 static void send_first_video_packet(struct obs_encoder *encoder,
 				    struct encoder_callback *cb,
-				    struct encoder_packet *packet)
+				    struct encoder_packet *packet,
+				    struct encoder_packet_time *packet_time)
 {
 	struct encoder_packet first_packet;
 	DARRAY(uint8_t) data;
@@ -1277,7 +1275,7 @@ static void send_first_video_packet(struct obs_encoder *encoder,
 	da_init(data);
 
 	if (!get_sei(encoder, &sei, &size) || !sei || !size) {
-		cb->new_packet(cb->param, packet);
+		cb->new_packet(cb->param, packet, packet_time);
 		cb->sent_first_packet = true;
 		return;
 	}
@@ -1289,7 +1287,7 @@ static void send_first_video_packet(struct obs_encoder *encoder,
 	first_packet.data = data.array;
 	first_packet.size = data.num;
 
-	cb->new_packet(cb->param, &first_packet);
+	cb->new_packet(cb->param, &first_packet, packet_time);
 	cb->sent_first_packet = true;
 
 	da_free(data);
@@ -1298,14 +1296,15 @@ static void send_first_video_packet(struct obs_encoder *encoder,
 static const char *send_packet_name = "send_packet";
 static inline void send_packet(struct obs_encoder *encoder,
 			       struct encoder_callback *cb,
-			       struct encoder_packet *packet)
+			       struct encoder_packet *packet,
+			       struct encoder_packet_time *packet_time)
 {
 	profile_start(send_packet_name);
 	/* include SEI in first video packet */
 	if (encoder->info.type == OBS_ENCODER_VIDEO && !cb->sent_first_packet)
-		send_first_video_packet(encoder, cb, packet);
+		send_first_video_packet(encoder, cb, packet, packet_time);
 	else
-		cb->new_packet(cb->param, packet);
+		cb->new_packet(cb->param, packet, packet_time);
 	profile_end(send_packet_name);
 }
 
@@ -1357,12 +1356,40 @@ void send_off_encoder_packet(obs_encoder_t *encoder, bool success,
 		pkt->sys_dts_usec += encoder->pause.ts_offset / 1000;
 		pthread_mutex_unlock(&encoder->pause.mutex);
 
+		/* Find the encoder packet timing entry in the encoder
+		 * timing array with the corresponding PTS value, then remove
+		 * the entry from the array to ensure it doesn't continuously fill.
+		 */
+		struct encoder_packet_time ept_local;
+		struct encoder_packet_time *ept = NULL;
+		bool found_ept = false;
+		if (pkt->type == OBS_ENCODER_VIDEO) {
+			for (size_t i = encoder->encoder_packet_times.num;
+			     i > 0; i--) {
+				ept = &encoder->encoder_packet_times
+					       .array[i - 1];
+				if (ept->pts == pkt->pts) {
+					ept_local = *ept;
+					da_erase(encoder->encoder_packet_times,
+						 i - 1);
+					found_ept = true;
+					break;
+				}
+			}
+			if (!found_ept)
+				blog(LOG_DEBUG,
+				     "%s: Encoder packet timing for PTS %" PRId64
+				     " not found",
+				     __FUNCTION__, pkt->pts);
+		}
+
 		pthread_mutex_lock(&encoder->callbacks_mutex);
 
 		for (size_t i = encoder->callbacks.num; i > 0; i--) {
 			struct encoder_callback *cb;
 			cb = encoder->callbacks.array + (i - 1);
-			send_packet(encoder, cb, pkt);
+			send_packet(encoder, cb, pkt,
+				    found_ept ? &ept_local : NULL);
 		}
 
 		pthread_mutex_unlock(&encoder->callbacks_mutex);
@@ -1370,7 +1397,8 @@ void send_off_encoder_packet(obs_encoder_t *encoder, bool success,
 }
 
 static const char *do_encode_name = "do_encode";
-bool do_encode(struct obs_encoder *encoder, struct encoder_frame *frame)
+bool do_encode(struct obs_encoder *encoder, struct encoder_frame *frame,
+	       const uint64_t *frame_cts)
 {
 	profile_start(do_encode_name);
 	if (!encoder->profile_encoder_encode_name)
@@ -1381,6 +1409,7 @@ bool do_encode(struct obs_encoder *encoder, struct encoder_frame *frame)
 	struct encoder_packet pkt = {0};
 	bool received = false;
 	bool success;
+	uint64_t fer_ts = 0;
 
 	if (encoder->reconfigure_requested) {
 		encoder->reconfigure_requested = false;
@@ -1392,10 +1421,34 @@ bool do_encode(struct obs_encoder *encoder, struct encoder_frame *frame)
 	pkt.timebase_den = encoder->timebase_den;
 	pkt.encoder = encoder;
 
+	/* Get the frame encode request timestamp. This
+	 * needs to be read just before the encode request.
+	 */
+	fer_ts = os_gettime_ns();
+
 	profile_start(encoder->profile_encoder_encode_name);
 	success = encoder->info.encode(encoder->context.data, frame, &pkt,
 				       &received);
 	profile_end(encoder->profile_encoder_encode_name);
+
+	/* Generate and enqueue the frame timing metrics, namely
+	 * the CTS (composition time), FER (frame encode request), FERC
+	 * (frame encode request complete) and current PTS. PTS is used to
+	 * associate the frame timing data with the encode packet. */
+	if (frame_cts) {
+		struct encoder_packet_time *ept =
+			da_push_back_new(encoder->encoder_packet_times);
+		// Get the frame encode request complete timestamp
+		if (success) {
+			ept->ferc = os_gettime_ns();
+		} else {
+			// Encode had error, set ferc to 0
+			ept->ferc = 0;
+		}
+		ept->pts = frame->pts;
+		ept->cts = *frame_cts;
+		ept->fer = fer_ts;
+	}
 	send_off_encoder_packet(encoder, success, received, &pkt);
 
 	profile_end(do_encode_name);
@@ -1484,7 +1537,7 @@ static void receive_video(void *param, struct video_data *frame)
 	enc_frame.frames = 1;
 	enc_frame.pts = encoder->cur_pts;
 
-	if (do_encode(encoder, &enc_frame))
+	if (do_encode(encoder, &enc_frame, &frame->timestamp))
 		encoder->cur_pts +=
 			encoder->timebase_num * encoder->frame_rate_divisor;
 
@@ -1622,7 +1675,7 @@ static bool send_audio_data(struct obs_encoder *encoder)
 	enc_frame.frames = (uint32_t)encoder->framesize;
 	enc_frame.pts = encoder->cur_pts;
 
-	if (!do_encode(encoder, &enc_frame))
+	if (!do_encode(encoder, &enc_frame, NULL))
 		return false;
 
 	encoder->cur_pts += encoder->framesize;

+ 50 - 0
libobs/obs-encoder.h

@@ -45,6 +45,56 @@ enum obs_encoder_type {
 	OBS_ENCODER_VIDEO  /**< The encoder provides a video codec */
 };
 
+/* encoder_packet_time is used for timestamping events associated
+ * with each video frame. This is useful for deriving absolute
+ * timestamps (i.e. wall-clock based formats) and measuring latency.
+ *
+ * For each frame, there are four events of interest, described in
+ * the encoder_packet_time struct, namely cts, fer, ferc, and pir.
+ * The timebase of these four events is os_gettime_ns(), which provides
+ * very high resolution timestamping, and the ability to convert the
+ * timing to any other time format.
+ *
+ * Each frame follows a timeline in the following temporal order:
+ *   CTS, FER, FERC, PIR
+ *
+ * PTS is the integer-based monotonically increasing value that is used
+ * to associate an encoder_packet_time entry with a specific encoder_packet.
+ */
+struct encoder_packet_time {
+	/* PTS used to associate uncompressed frames with encoded packets. */
+	int64_t pts;
+
+	/* Composition timestamp is when the frame was rendered,
+	 * captured via os_gettime_ns().
+	 */
+	uint64_t cts;
+
+	/* FERC (Frame Encode Request) is when the frame was
+	 * submitted to the encoder for encoding via the encode
+	 * callback (e.g. encode_texture2()), captured via os_gettime_ns().
+	 */
+	uint64_t fer;
+
+	/* FERC (Frame Encode Request Complete) is when
+	 * the associated FER event completed. If the encode
+	 * is synchronous with the call, this means FERC - FEC
+	 * measures the actual encode time, otherwise if the
+	 * encode is asynchronous, it measures the pipeline
+	 * delay between encode request and encode complete.
+	 * FERC is also captured via os_gettime_ns().
+	 */
+	uint64_t ferc;
+
+	/* PIR (Packet Interleave Request) is when the encoded packet
+	 * is interleaved with the stream. PIR is captured via
+	 * os_gettime_ns(). The difference between PIR and CTS gives
+	 * the total latency between frame rendering
+	 * and packet interleaving.
+	 */
+	uint64_t pir;
+};
+
 /** Encoder output packet */
 struct encoder_packet {
 	uint8_t *data; /**< Packet data */

+ 16 - 10
libobs/obs-internal.h

@@ -26,6 +26,7 @@
 #include "util/profiler.h"
 #include "util/task.h"
 #include "util/uthash.h"
+#include "util/array-serializer.h"
 #include "callback/signal.h"
 #include "callback/proc.h"
 
@@ -1066,9 +1067,12 @@ struct delay_data {
 	enum delay_msg msg;
 	uint64_t ts;
 	struct encoder_packet packet;
+	bool packet_time_valid;
+	struct encoder_packet_time packet_time;
 };
 
-typedef void (*encoded_callback_t)(void *data, struct encoder_packet *packet);
+typedef void (*encoded_callback_t)(void *data, struct encoder_packet *packet,
+				   struct encoder_packet_time *frame_time);
 
 struct obs_weak_output {
 	struct obs_weak_ref ref;
@@ -1186,6 +1190,8 @@ struct obs_output {
 	// captions are output per track
 	struct caption_track_data *caption_tracks[MAX_OUTPUT_VIDEO_ENCODERS];
 
+	DARRAY(struct encoder_packet_time) encoder_packet_times[MAX_OUTPUT_VIDEO_ENCODERS];
+
 	bool valid;
 
 	uint64_t active_delay_ns;
@@ -1213,7 +1219,8 @@ static inline void do_output_signal(struct obs_output *output,
 	calldata_free(&params);
 }
 
-extern void process_delay(void *data, struct encoder_packet *packet);
+extern void process_delay(void *data, struct encoder_packet *packet,
+			  struct encoder_packet_time *packet_time);
 extern void obs_output_cleanup_delay(obs_output_t *output);
 extern bool obs_output_delay_start(obs_output_t *output);
 extern void obs_output_delay_stop(obs_output_t *output);
@@ -1241,7 +1248,7 @@ struct obs_weak_encoder {
 
 struct encoder_callback {
 	bool sent_first_packet;
-	void (*new_packet)(void *param, struct encoder_packet *packet);
+	encoded_callback_t new_packet;
 	void *param;
 };
 
@@ -1333,6 +1340,8 @@ struct obs_encoder {
 	pthread_mutex_t callbacks_mutex;
 	DARRAY(struct encoder_callback) callbacks;
 
+	DARRAY(struct encoder_packet_time) encoder_packet_times;
+
 	struct pause_data pause;
 
 	const char *profile_encoder_encode_name;
@@ -1348,13 +1357,9 @@ extern bool obs_encoder_initialize(obs_encoder_t *encoder);
 extern void obs_encoder_shutdown(obs_encoder_t *encoder);
 
 extern void obs_encoder_start(obs_encoder_t *encoder,
-			      void (*new_packet)(void *param,
-						 struct encoder_packet *packet),
-			      void *param);
+			      encoded_callback_t new_packet, void *param);
 extern void obs_encoder_stop(obs_encoder_t *encoder,
-			     void (*new_packet)(void *param,
-						struct encoder_packet *packet),
-			     void *param);
+			     encoded_callback_t new_packet, void *param);
 
 extern void obs_encoder_add_output(struct obs_encoder *encoder,
 				   struct obs_output *output);
@@ -1364,7 +1369,8 @@ extern void obs_encoder_remove_output(struct obs_encoder *encoder,
 extern bool start_gpu_encode(obs_encoder_t *encoder);
 extern void stop_gpu_encode(obs_encoder_t *encoder);
 
-extern bool do_encode(struct obs_encoder *encoder, struct encoder_frame *frame);
+extern bool do_encode(struct obs_encoder *encoder, struct encoder_frame *frame,
+		      const uint64_t *frame_cts);
 extern void send_off_encoder_packet(obs_encoder_t *encoder, bool success,
 				    bool received, struct encoder_packet *pkt);
 

+ 13 - 4
libobs/obs-output-delay.c

@@ -45,12 +45,17 @@ static inline bool log_flag_encoded(const struct obs_output *output,
 }
 
 static inline void push_packet(struct obs_output *output,
-			       struct encoder_packet *packet, uint64_t t)
+			       struct encoder_packet *packet,
+			       struct encoder_packet_time *packet_time,
+			       uint64_t t)
 {
 	struct delay_data dd;
 
 	dd.msg = DELAY_MSG_PACKET;
 	dd.ts = t;
+	dd.packet_time_valid = packet_time != NULL;
+	if (packet_time != NULL)
+		dd.packet_time = *packet_time;
 	obs_encoder_packet_create_instance(&dd.packet, packet);
 
 	pthread_mutex_lock(&output->delay_mutex);
@@ -66,7 +71,10 @@ static inline void process_delay_data(struct obs_output *output,
 		if (!delay_active(output) || !delay_capturing(output))
 			obs_encoder_packet_release(&dd->packet);
 		else
-			output->delay_callback(output, &dd->packet);
+			output->delay_callback(output, &dd->packet,
+					       dd->packet_time_valid
+						       ? &dd->packet_time
+						       : NULL);
 		break;
 	case DELAY_MSG_START:
 		obs_output_actual_start(output);
@@ -128,11 +136,12 @@ static inline bool pop_packet(struct obs_output *output, uint64_t t)
 	return popped;
 }
 
-void process_delay(void *data, struct encoder_packet *packet)
+void process_delay(void *data, struct encoder_packet *packet,
+		   struct encoder_packet_time *packet_time)
 {
 	struct obs_output *output = data;
 	uint64_t t = os_gettime_ns();
-	push_packet(output, packet, t);
+	push_packet(output, packet, packet_time, t);
 	while (pop_packet(output, t))
 		;
 }

+ 84 - 9
libobs/obs-output.c

@@ -18,6 +18,7 @@
 #include <inttypes.h>
 #include "util/platform.h"
 #include "util/util_uint64.h"
+#include "util/array-serializer.h"
 #include "graphics/math-extra.h"
 #include "obs.h"
 #include "obs-internal.h"
@@ -308,6 +309,9 @@ void obs_output_destroy(obs_output_t *output)
 
 		da_free(output->keyframe_group_tracking);
 
+		for (size_t i = 0; i < MAX_OUTPUT_VIDEO_ENCODERS; i++)
+			da_free(output->encoder_packet_times[i]);
+
 		clear_raw_audio_buffers(output);
 
 		os_event_destroy(output->stopping_event);
@@ -365,6 +369,7 @@ bool obs_output_actual_start(obs_output_t *output)
 		deque_init(&ctrack->caption_data);
 		pthread_mutex_unlock(&ctrack->caption_mutex);
 	}
+
 	return success;
 }
 
@@ -1476,8 +1481,10 @@ static inline void check_received(struct obs_output *output,
 	}
 }
 
-static inline void apply_interleaved_packet_offset(struct obs_output *output,
-						   struct encoder_packet *out)
+static inline void
+apply_interleaved_packet_offset(struct obs_output *output,
+				struct encoder_packet *out,
+				struct encoder_packet_time *packet_time)
 {
 	int64_t offset;
 
@@ -1491,6 +1498,8 @@ static inline void apply_interleaved_packet_offset(struct obs_output *output,
 
 	out->dts -= offset;
 	out->pts -= offset;
+	if (packet_time)
+		packet_time->pts -= offset;
 
 	/* convert the newly adjusted dts to relative dts time to ensure proper
 	 * interleaving.  if we're using an audio encoder that's already been
@@ -1526,7 +1535,7 @@ static size_t extract_itut_t35_buffer_from_sei(sei_t *sei, uint8_t **data_out)
 	if (!sei || !sei->head) {
 		return 0;
 	}
-	/* We should only need to get one payload, because the SEI that was 
+	/* We should only need to get one payload, because the SEI that was
 	 * generated should only have one message, so no need to iterate. If
 	 * we did iterate, we would need to generate multiple OBUs. */
 	sei_message_t *msg = sei_message_head(sei);
@@ -1712,8 +1721,8 @@ static bool add_caption(struct obs_output *output, struct encoder_packet *out)
 			uint8_t *obu_buffer = NULL;
 			size_t obu_buffer_size = 0;
 			size = extract_itut_t35_buffer_from_sei(&sei, &data);
-			metadata_obu_itu_t35(data, size, &obu_buffer,
-					     &obu_buffer_size);
+			metadata_obu_itut_t35(data, size, &obu_buffer,
+					      &obu_buffer_size);
 			if (obu_buffer) {
 				da_push_back_array(out_data, obu_buffer,
 						   obu_buffer_size);
@@ -1736,6 +1745,8 @@ static bool add_caption(struct obs_output *output, struct encoder_packet *out)
 static inline void send_interleaved(struct obs_output *output)
 {
 	struct encoder_packet out = output->interleaved_packets.array[0];
+	struct encoder_packet_time ept_local = {0};
+	bool found_ept = false;
 
 	/* do not send an interleaved packet if there's no packet of the
 	 * opposing type of a higher timestamp in the interleave buffer.
@@ -1779,6 +1790,39 @@ static inline void send_interleaved(struct obs_output *output)
 			}
 		}
 		pthread_mutex_unlock(&ctrack->caption_mutex);
+
+		/* Iterate the array of encoder packet times to
+		 * find a matching PTS entry, and drain the array.
+		 * Packet timing currently applies to video only.
+		 */
+		struct encoder_packet_time *ept = NULL;
+		size_t num_ept =
+			output->encoder_packet_times[out.track_idx].num;
+		if (num_ept) {
+			for (size_t i = 0; i < num_ept; i++) {
+				ept = &output->encoder_packet_times[out.track_idx]
+					       .array[i];
+				if (ept->pts == out.pts) {
+					ept_local = *ept;
+					da_erase(output->encoder_packet_times
+							 [out.track_idx],
+						 i);
+					found_ept = true;
+					break;
+				}
+			}
+			if (found_ept == false) {
+				blog(LOG_DEBUG,
+				     "%s: Track %lu encoder packet timing for PTS%" PRId64
+				     " not found.",
+				     __FUNCTION__, out.track_idx, out.pts);
+			}
+		} else {
+			// encoder_packet_times should not be empty; log if so.
+			blog(LOG_DEBUG,
+			     "%s: Track %lu encoder packet timing array empty.",
+			     __FUNCTION__, out.track_idx);
+		}
 	}
 
 	output->info.encoded_packet(output->context.data, &out);
@@ -1908,6 +1952,10 @@ static void discard_to_idx(struct obs_output *output, size_t idx)
 	for (size_t i = 0; i < idx; i++) {
 		struct encoder_packet *packet =
 			&output->interleaved_packets.array[i];
+		if (packet->type == OBS_ENCODER_VIDEO) {
+			da_pop_front(
+				output->encoder_packet_times[packet->track_idx]);
+		}
 		obs_encoder_packet_release(packet);
 	}
 
@@ -2095,7 +2143,7 @@ static bool initialize_interleaved_packets(struct obs_output *output)
 	for (size_t i = 0; i < output->interleaved_packets.num; i++) {
 		struct encoder_packet *packet =
 			&output->interleaved_packets.array[i];
-		apply_interleaved_packet_offset(output, packet);
+		apply_interleaved_packet_offset(output, packet, NULL);
 	}
 
 	return true;
@@ -2242,12 +2290,25 @@ check_encoder_group_keyframe_alignment(obs_output_t *output,
 	da_insert(output->keyframe_group_tracking, idx, &insert_data);
 }
 
-static void interleave_packets(void *data, struct encoder_packet *packet)
+static void apply_ept_offsets(struct obs_output *output)
+{
+	for (size_t i = 0; i < MAX_OUTPUT_VIDEO_ENCODERS; i++) {
+		for (size_t j = 0; j < output->encoder_packet_times[i].num;
+		     j++) {
+			output->encoder_packet_times[i].array[j].pts -=
+				output->video_offsets[i];
+		}
+	}
+}
+
+static void interleave_packets(void *data, struct encoder_packet *packet,
+			       struct encoder_packet_time *packet_time)
 {
 	struct obs_output *output = data;
 	struct encoder_packet out;
 	bool was_started;
 	bool received_video;
+	struct encoder_packet_time *output_packet_time = NULL;
 
 	if (!active(output))
 		return;
@@ -2283,8 +2344,15 @@ static void interleave_packets(void *data, struct encoder_packet *packet)
 	else
 		obs_encoder_packet_create_instance(&out, packet);
 
+	if (packet_time) {
+		output_packet_time = da_push_back_new(
+			output->encoder_packet_times[packet->track_idx]);
+		*output_packet_time = *packet_time;
+	}
+
 	if (was_started)
-		apply_interleaved_packet_offset(output, &out);
+		apply_interleaved_packet_offset(output, &out,
+						output_packet_time);
 	else
 		check_received(output, packet);
 
@@ -2304,6 +2372,7 @@ static void interleave_packets(void *data, struct encoder_packet *packet)
 			if (prune_interleaved_packets(output)) {
 				if (initialize_interleaved_packets(output)) {
 					resort_interleaved_packets(output);
+					apply_ept_offsets(output);
 					send_interleaved(output);
 				}
 			}
@@ -2317,8 +2386,10 @@ static void interleave_packets(void *data, struct encoder_packet *packet)
 	pthread_mutex_unlock(&output->interleaved_mutex);
 }
 
-static void default_encoded_callback(void *param, struct encoder_packet *packet)
+static void default_encoded_callback(void *param, struct encoder_packet *packet,
+				     struct encoder_packet_time *packet_time)
 {
+	UNUSED_PARAMETER(packet_time);
 	struct obs_output *output = param;
 
 	if (data_active(output)) {
@@ -2490,6 +2561,10 @@ static void reset_packet_data(obs_output_t *output)
 	output->received_audio = false;
 	output->highest_audio_ts = 0;
 
+	for (size_t i = 0; i < MAX_OUTPUT_VIDEO_ENCODERS; i++) {
+		output->encoder_packet_times[i].num = 0;
+	}
+
 	for (size_t i = 0; i < MAX_OUTPUT_VIDEO_ENCODERS; i++) {
 		output->received_video[i] = false;
 		output->video_offsets[i] = 0;

+ 1 - 0
libobs/obs-output.h

@@ -21,6 +21,7 @@
 extern "C" {
 #endif
 
+/* obs_output_info.flags definitions */
 #define OBS_OUTPUT_VIDEO (1 << 0)
 #define OBS_OUTPUT_AUDIO (1 << 1)
 #define OBS_OUTPUT_AV (OBS_OUTPUT_VIDEO | OBS_OUTPUT_AUDIO)

+ 27 - 0
libobs/obs-video-gpu-encode.c

@@ -40,6 +40,7 @@ static void *gpu_encode_thread(void *data)
 		uint64_t lock_key;
 		uint64_t next_key;
 		size_t lock_count = 0;
+		uint64_t fer_ts = 0;
 
 		if (os_atomic_load_bool(&video->gpu_encode_stop))
 			break;
@@ -153,6 +154,11 @@ static void *gpu_encode_thread(void *data)
 			else
 				next_key++;
 
+			/* Get the frame encode request timestamp. This
+			 * needs to be read just before the encode request.
+			 */
+			fer_ts = os_gettime_ns();
+
 			profile_start(gpu_encode_frame_name);
 			if (encoder->info.encode_texture2) {
 				struct encoder_texture tex = {0};
@@ -173,6 +179,27 @@ static void *gpu_encode_thread(void *data)
 			}
 			profile_end(gpu_encode_frame_name);
 
+			/* Generate and enqueue the frame timing metrics, namely
+			 * the CTS (composition time), FER (frame encode request), FERC
+			 * (frame encode request complete) and current PTS. PTS is used to
+			 * associate the frame timing data with the encode packet. */
+			if (tf.timestamp) {
+				struct encoder_packet_time *ept =
+					da_push_back_new(
+						encoder->encoder_packet_times);
+				// Get the frame encode request complete timestamp
+				if (success) {
+					ept->ferc = os_gettime_ns();
+				} else {
+					// Encode had error, set ferc to 0
+					ept->ferc = 0;
+				}
+
+				ept->pts = encoder->cur_pts;
+				ept->cts = tf.timestamp;
+				ept->fer = fer_ts;
+			}
+
 			send_off_encoder_packet(encoder, success, received,
 						&pkt);