Browse Source

obs-outputs: Add eFLV multitrack audio support

Ruwen Hahn 1 year ago
parent
commit
82193970f4
3 changed files with 162 additions and 3 deletions
  1. 97 0
      plugins/obs-outputs/flv-mux.c
  2. 18 0
      plugins/obs-outputs/flv-mux.h
  3. 47 3
      plugins/obs-outputs/flv-output.c

+ 97 - 0
plugins/obs-outputs/flv-mux.c

@@ -31,6 +31,7 @@
 
 #define AUDIODATA_AAC 10.0
 
+#define AUDIO_FRAMETYPE_OFFSET 4
 #define VIDEO_FRAMETYPE_OFFSET 4
 
 enum video_frametype_t {
@@ -39,6 +40,14 @@ enum video_frametype_t {
 };
 
 // Y2023 spec
+const uint8_t AUDIO_HEADER_EX = 9 << AUDIO_FRAMETYPE_OFFSET;
+enum audio_packet_type_t {
+	AUDIO_PACKETTYPE_SEQ_START = 0,
+	AUDIO_PACKETTYPE_FRAMES = 1,
+	AUDIO_PACKETTYPE_MULTICHANNEL_CONFIG = 4,
+	AUDIO_PACKETTYPE_MULTITRACK = 5,
+};
+
 const uint8_t FRAME_HEADER_EX = 8 << VIDEO_FRAMETYPE_OFFSET;
 enum packet_type_t {
 	PACKETTYPE_SEQ_START = 0,
@@ -63,6 +72,22 @@ enum datatype_t {
 	DATA_TYPE_OBJECT_END = 9,
 };
 
+static void s_wa4cc(struct serializer *s, enum audio_id_t id)
+{
+	switch (id) {
+	case AUDIO_CODEC_NONE:
+		assert(0 && "Tried to serialize AUDIO_CODEC_NONE");
+		break;
+
+	case AUDIO_CODEC_AAC:
+		s_w8(s, 'm');
+		s_w8(s, 'p');
+		s_w8(s, '4');
+		s_w8(s, 'a');
+		break;
+	}
+}
+
 static void s_w4cc(struct serializer *s, enum video_id_t id)
 {
 	switch (id) {
@@ -376,6 +401,62 @@ void flv_packet_mux(struct encoder_packet *packet, int32_t dts_offset,
 	*size = data.bytes.num;
 }
 
+void flv_packet_audio_ex(struct encoder_packet *packet,
+			 enum audio_id_t codec_id, int32_t dts_offset,
+			 uint8_t **output, size_t *size, int type, size_t idx)
+{
+	struct array_output_data data;
+	struct serializer s;
+
+	array_output_serializer_init(&s, &data);
+
+	assert(packet->type == OBS_ENCODER_AUDIO);
+
+	int32_t time_ms = get_ms_time(packet, packet->dts) - dts_offset;
+
+	bool is_multitrack = idx > 0;
+
+	if (!packet->data || !packet->size)
+		return;
+
+	int header_metadata_size = 5; // w8+wa4cc
+	if (is_multitrack)
+		header_metadata_size += 2; // w8 + w8
+
+	s_w8(&s, RTMP_PACKET_TYPE_AUDIO);
+
+#ifdef DEBUG_TIMESTAMPS
+	blog(LOG_DEBUG, "Audio: %lu", time_ms);
+
+	if (last_time > time_ms)
+		blog(LOG_DEBUG, "Non-monotonic");
+
+	last_time = time_ms;
+#endif
+
+	s_wb24(&s, (uint32_t)packet->size + header_metadata_size);
+	s_wb24(&s, (uint32_t)time_ms);
+	s_w8(&s, (time_ms >> 24) & 0x7F);
+	s_wb24(&s, 0);
+
+	s_w8(&s, AUDIO_HEADER_EX |
+			 (is_multitrack ? AUDIO_PACKETTYPE_MULTITRACK : type));
+	if (is_multitrack) {
+		s_w8(&s, MULTITRACKTYPE_ONE_TRACK | type);
+		s_wa4cc(&s, codec_id);
+		s_w8(&s, (uint8_t)idx);
+	} else {
+		s_wa4cc(&s, codec_id);
+	}
+
+	s_write(&s, packet->data, packet->size);
+
+	write_previous_tag_size(&s);
+
+	*output = data.bytes.array;
+	*size = data.bytes.num;
+}
+
 // Y2023 spec
 void flv_packet_ex(struct encoder_packet *packet, enum video_id_t codec_id,
 		   int32_t dts_offset, uint8_t **output, size_t *size, int type,
@@ -466,6 +547,22 @@ void flv_packet_end(struct encoder_packet *packet, enum video_id_t codec,
 	flv_packet_ex(packet, codec, 0, output, size, PACKETTYPE_SEQ_END, idx);
 }
 
+void flv_packet_audio_start(struct encoder_packet *packet,
+			    enum audio_id_t codec, uint8_t **output,
+			    size_t *size, size_t idx)
+{
+	flv_packet_audio_ex(packet, codec, 0, output, size,
+			    AUDIO_PACKETTYPE_SEQ_START, idx);
+}
+
+void flv_packet_audio_frames(struct encoder_packet *packet,
+			     enum audio_id_t codec, int32_t dts_offset,
+			     uint8_t **output, size_t *size, size_t idx)
+{
+	flv_packet_audio_ex(packet, codec, dts_offset, output, size,
+			    AUDIO_PACKETTYPE_FRAMES, idx);
+}
+
 void flv_packet_metadata(enum video_id_t codec_id, uint8_t **output,
 			 size_t *size, int bits_per_raw_sample,
 			 uint8_t color_primaries, int color_trc,

+ 18 - 0
plugins/obs-outputs/flv-mux.h

@@ -21,6 +21,11 @@
 
 #define MILLISECOND_DEN 1000
 
+enum audio_id_t {
+	AUDIO_CODEC_NONE = 0,
+	AUDIO_CODEC_AAC = 1,
+};
+
 enum video_id_t {
 	CODEC_NONE = 0, // not valid in rtmp
 	CODEC_H264 = 1, // legacy & Y2023 spec
@@ -28,6 +33,13 @@ enum video_id_t {
 	CODEC_HEVC,
 };
 
+static enum audio_id_t to_audio_type(const char *codec)
+{
+	if (strcmp(codec, "aac") == 0)
+		return AUDIO_CODEC_AAC;
+	return AUDIO_CODEC_NONE;
+}
+
 static enum video_id_t to_video_type(const char *codec)
 {
 	if (strcmp(codec, "h264") == 0)
@@ -72,3 +84,9 @@ extern void flv_packet_metadata(enum video_id_t codec, uint8_t **output,
 				uint8_t color_primaries, int color_trc,
 				int color_space, int min_luminance,
 				int max_luminance, size_t idx);
+extern void flv_packet_audio_start(struct encoder_packet *packet,
+				   enum audio_id_t codec, uint8_t **output,
+				   size_t *size, size_t idx);
+extern void flv_packet_audio_frames(struct encoder_packet *packet,
+				    enum audio_id_t codec, int32_t dts_offset,
+				    uint8_t **output, size_t *size, size_t idx);

+ 47 - 3
plugins/obs-outputs/flv-output.c

@@ -46,6 +46,7 @@ struct flv_output {
 	bool sent_headers;
 	int64_t last_packet_ts;
 
+	enum audio_id_t audio_codec[MAX_OUTPUT_AUDIO_ENCODERS];
 	enum video_id_t video_codec[MAX_OUTPUT_VIDEO_ENCODERS];
 
 	pthread_mutex_t mutex;
@@ -226,6 +227,29 @@ static int write_packet_ex(struct flv_output *stream,
 	return ret;
 }
 
+static int write_audio_packet_ex(struct flv_output *stream,
+				 struct encoder_packet *packet, bool is_header,
+				 size_t idx)
+{
+	uint8_t *data;
+	size_t size = 0;
+	int ret = 0;
+
+	if (is_header) {
+		flv_packet_audio_start(packet, stream->audio_codec[idx], &data,
+				       &size, idx);
+	} else {
+		flv_packet_audio_frames(packet, stream->audio_codec[idx],
+					stream->start_dts_offset, &data, &size,
+					idx);
+	}
+
+	fwrite(data, 1, size, stream->file);
+	bfree(data);
+
+	return ret;
+}
+
 static void write_meta_data(struct flv_output *stream)
 {
 	uint8_t *meta_data;
@@ -247,8 +271,13 @@ static bool write_audio_header(struct flv_output *stream, size_t idx)
 	if (!aencoder)
 		return false;
 
-	if (obs_encoder_get_extra_data(aencoder, &packet.data, &packet.size))
-		write_packet(stream, &packet, true);
+	if (obs_encoder_get_extra_data(aencoder, &packet.data, &packet.size)) {
+		if (idx == 0) {
+			write_packet(stream, &packet, true);
+		} else {
+			write_audio_packet_ex(stream, &packet, true, idx);
+		}
+	}
 
 	return true;
 }
@@ -394,6 +423,16 @@ static bool write_video_metadata(struct flv_output *stream, size_t idx)
 
 static void write_headers(struct flv_output *stream)
 {
+	for (size_t i = 0; i < MAX_OUTPUT_AUDIO_ENCODERS; i++) {
+		obs_encoder_t *enc =
+			obs_output_get_audio_encoder(stream->output, i);
+		if (!enc)
+			break;
+
+		const char *codec = obs_encoder_get_codec(enc);
+		stream->audio_codec[i] = to_audio_type(codec);
+	}
+
 	for (size_t i = 0; i < MAX_OUTPUT_VIDEO_ENCODERS; i++) {
 		obs_encoder_t *enc =
 			obs_output_get_video_encoder2(stream->output, i);
@@ -577,7 +616,12 @@ static void flv_output_data(void *data, struct encoder_packet *packet)
 		}
 		obs_encoder_packet_release(&parsed_packet);
 	} else {
-		write_packet(stream, packet, false);
+		if (packet->track_idx != 0) {
+			write_audio_packet_ex(stream, packet, false,
+					      packet->track_idx);
+		} else {
+			write_packet(stream, packet, false);
+		}
 	}
 
 unlock: