Преглед на файлове

obs-outputs: Add support for Hybrid MOV

derrod преди 9 месеца
родител
ревизия
5f2a4e976f

+ 1 - 0
plugins/obs-outputs/data/locale/en-US.ini

@@ -11,6 +11,7 @@ MP4Output="MP4 File Output"
 MP4Output.FilePath="File Path"
 MP4Output.StartChapter="Start"
 MP4Output.UnnamedChapter="Unnamed"
+MOVOutput="MOV File Output"
 
 IPFamily="IP Address Family"
 IPFamily.Both="IPv4 and IPv6 (Default)"

+ 83 - 9
plugins/obs-outputs/mp4-mux-internal.h

@@ -23,13 +23,6 @@
 #include <util/deque.h>
 #include <util/serializer.h>
 
-/* Flavour for target compatibility */
-enum mp4_flavour {
-	MP4,  /* ISO/IEC 14496-12 */
-	MOV,  /* Apple QuickTime */
-	CMAF, /* ISO/IEC 23000-19 */
-};
-
 enum mp4_track_type {
 	TRACK_UNKNOWN,
 	TRACK_VIDEO,
@@ -44,6 +37,7 @@ enum mp4_codec {
 	CODEC_H264,
 	CODEC_HEVC,
 	CODEC_AV1,
+	CODEC_PRORES,
 
 	/* Audio Codecs */
 	CODEC_AAC,
@@ -97,7 +91,7 @@ struct mp4_track {
 	/* Time Base (1/FPS for video, 1/sample rate for audio) */
 	uint32_t timebase_num;
 	uint32_t timebase_den;
-	/* Output timescale calculated from time base (Video only) */
+	/* Output timescale calculated from time base */
 	uint32_t timescale;
 
 	/* First PTS this track has seen (in track timescale) */
@@ -133,7 +127,7 @@ struct mp4_mux {
 	struct serializer *serializer;
 
 	/* Target format compatibility */
-	enum mp4_flavour mode;
+	enum mp4_flavor flavor;
 
 	/* Flags */
 	enum mp4_mux_flags flags;
@@ -340,3 +334,83 @@ static const char CHAPTER_PKT_FOOTER[12] = {
 	0x00, 0x00, 0x01, 0x00
 };
 /* clang-format on */
+
+/** QTFF/MOV specifics **/
+
+/* https://developer.apple.com/documentation/quicktime-file-format/sound_sample_description_version_2#LPCM-flag-values */
+enum lpcm_flags {
+	kAudioFormatFlagIsFloat = (1 << 0),
+	kAudioFormatFlagIsSignedInteger = (1 << 2),
+	kAudioFormatFlagIsPacked = (1 << 3),
+	kLinearPCMFormatFlagIsFloat = kAudioFormatFlagIsFloat,
+	kLinearPCMFormatFlagIsSignedInteger = kAudioFormatFlagIsSignedInteger,
+	kLinearPCMFormatFlagIsPacked = kAudioFormatFlagIsPacked,
+};
+
+static inline uint32_t get_lpcm_flags(enum mp4_codec codec)
+{
+	if (codec == CODEC_PCM_F32)
+		return kLinearPCMFormatFlagIsFloat | kLinearPCMFormatFlagIsPacked;
+	if (codec == CODEC_PCM_I16 || codec == CODEC_PCM_I24)
+		return kLinearPCMFormatFlagIsSignedInteger | kLinearPCMFormatFlagIsPacked;
+
+	return 0;
+}
+
+enum channel_map_bits {
+	FL = 1 << 0,
+	FR = 1 << 1,
+	FC = 1 << 2,
+	LFE = 1 << 3,
+	RL = 1 << 4,
+	RR = 1 << 5,
+	RC = 1 << 8,
+	SL = 1 << 9,
+	SR = 1 << 10,
+};
+
+static uint32_t get_mov_channel_bitmap(enum speaker_layout layout)
+{
+	switch (layout) {
+	case SPEAKERS_MONO:
+		return FC;
+	case SPEAKERS_STEREO:
+		return FL | FR;
+	case SPEAKERS_2POINT1:
+		return FL | FR | LFE;
+	case SPEAKERS_4POINT0:
+		return FL | FR | FC | RC;
+	case SPEAKERS_4POINT1:
+		return FL | FR | FC | LFE | RC;
+	case SPEAKERS_5POINT1:
+		return FL | FR | FC | LFE | RL | RR;
+	case SPEAKERS_7POINT1:
+		return FL | FR | FC | LFE | RL | RR | SL | SR;
+	case SPEAKERS_UNKNOWN:
+		break;
+	}
+
+	return 0;
+}
+
+enum coreaudio_layout {
+	kAudioChannelLayoutTag_UseChannelBitmap = (1 << 16) | 0,
+	kAudioChannelLayoutTag_Mono = (100 << 16) | 1,
+	kAudioChannelLayoutTag_Stereo = (101 << 16) | 2,
+	kAudioChannelLayoutTag_DVD_4 = (133 << 16) | 3, // 2.1 (AAC Only)
+};
+
+static enum coreaudio_layout get_mov_channel_layout(enum mp4_codec codec, enum speaker_layout layout)
+{
+	switch (layout) {
+	case SPEAKERS_MONO:
+		return kAudioChannelLayoutTag_Mono;
+	case SPEAKERS_STEREO:
+		return kAudioChannelLayoutTag_Stereo;
+	case SPEAKERS_2POINT1:
+		/* Only supported for AAC. */
+		return codec == CODEC_AAC ? kAudioChannelLayoutTag_DVD_4 : kAudioChannelLayoutTag_UseChannelBitmap;
+	default:
+		return kAudioChannelLayoutTag_UseChannelBitmap;
+	}
+}

+ 351 - 89
plugins/obs-outputs/mp4-mux.c

@@ -37,8 +37,9 @@
  * Standard identifier is included if not referring to ISO/IEC 14496-12.
  */
 
-#define do_log(level, format, ...) \
-	blog(level, "[mp4 muxer: '%s'] " format, obs_output_get_name(mux->output), ##__VA_ARGS__)
+#define do_log(level, format, ...)                                                          \
+	blog(level, "[%s muxer: '%s'] " format, mux->flavor == FLAVOR_MOV ? "mov" : "mp4", \
+	     obs_output_get_name(mux->output), ##__VA_ARGS__)
 
 #define warn(format, ...) do_log(LOG_WARNING, format, ##__VA_ARGS__)
 #define info(format, ...) do_log(LOG_INFO, format, ##__VA_ARGS__)
@@ -86,44 +87,49 @@ static size_t mp4_write_ftyp(struct mp4_mux *mux, bool fragmented)
 
 	write_box(s, 0, "ftyp");
 
-	const char *major_brand = "isom";
-	/* Following FFmpeg's example, when using negative CTS the major brand
-	 * needs to be either iso4 or iso6 depending on whether the file is
-	 * currently fragmented. */
-	if (mux->flags & MP4_USE_NEGATIVE_CTS)
-		major_brand = fragmented ? "iso6" : "iso4";
-
-	s_write(s, major_brand, 4); // major brand
-	s_wb32(s, 512);             // minor version
-
-	// minor brands (first one matches major brand)
-	s_write(s, major_brand, 4);
-
-	/* Write isom base brand if it's not the major brand */
-	if (strcmp(major_brand, "isom") != 0)
-		s_write(s, "isom", 4);
-
-	/* Avoid adding newer brand (iso6) unless necessary, use "obs1" brand
-	 * as a placeholder to maintain ftyp box size. */
-	if (fragmented && strcmp(major_brand, "iso6") != 0)
-		s_write(s, "iso6", 4);
-	else
-		s_write(s, "obs1", 4);
-
-	s_write(s, "iso2", 4);
-
-	/* Include H.264 brand if used */
-	for (size_t i = 0; i < mux->tracks.num; i++) {
-		struct mp4_track *track = &mux->tracks.array[i];
-		if (track->type == TRACK_VIDEO) {
-			if (track->codec == CODEC_H264)
-				s_write(s, "avc1", 4);
-			break;
+	if (mux->flavor == FLAVOR_MOV) {
+		/* For MOV, the brand is just "qt" followed by two spaces. */
+		s_write(s, "qt  ", 4); // major brand
+		s_wb32(s, 0x20140200); // minor version (BCD YYYYMM00 per QTFF spec)
+		s_write(s, "qt  ", 4); // minor brand
+	} else {
+		const char *major_brand = "isom";
+		/* Following FFmpeg's example, when using negative CTS the major brand
+		 * needs to be either iso4 or iso6 depending on whether the file is
+		 * currently fragmented. */
+		if (mux->flags & MP4_USE_NEGATIVE_CTS)
+			major_brand = fragmented ? "iso6" : "iso4";
+
+		s_write(s, major_brand, 4); // major brand
+		s_wb32(s, 0);               // minor version
+		s_write(s, major_brand, 4); // minor brands (first one matches major brand)
+
+		/* Write isom base brand if it's not the major brand */
+		if (strcmp(major_brand, "isom") != 0)
+			s_write(s, "isom", 4);
+
+		/* Avoid adding newer brand (iso6) unless necessary, use "obs1" brand
+		 * as a placeholder to maintain ftyp box size. */
+		if (fragmented && strcmp(major_brand, "iso6") != 0)
+			s_write(s, "iso6", 4);
+		else
+			s_write(s, "obs1", 4);
+
+		s_write(s, "iso2", 4);
+
+		/* Include H.264 brand if used */
+		for (size_t i = 0; i < mux->tracks.num; i++) {
+			struct mp4_track *track = &mux->tracks.array[i];
+			if (track->type == TRACK_VIDEO) {
+				if (track->codec == CODEC_H264)
+					s_write(s, "avc1", 4);
+				break;
+			}
 		}
-	}
 
-	/* General MP4 brannd */
-	s_write(s, "mp41", 4);
+		/* General MP4 brannd */
+		s_write(s, "mp41", 4);
+	}
 
 	return write_box_size(s, start);
 }
@@ -136,7 +142,7 @@ static size_t mp4_write_free(struct mp4_mux *mux)
 	/* Write a 16-byte free box, so it can be replaced with a 64-bit size
 	 * box header (u32 + char[4] + u64) */
 	s_wb32(s, 16);
-	s_write(s, "free", 4);
+	s_write(s, mux->flavor == FLAVOR_MOV ? "wide" : "free", 4);
 	s_wb64(s, 0);
 
 	return 16;
@@ -269,6 +275,11 @@ static size_t mp4_write_mdhd(struct mp4_mux *mux, struct mp4_track *track)
 
 	/* use 64-bit duration if necessary */
 	if (duration > UINT32_MAX || mux->creation_time > UINT32_MAX) {
+		if (mux->flavor == FLAVOR_MOV) {
+			/* QTFF does not specify how to handle 32-bit overflow for duration/timestamps. */
+			warn("Duration too large for MOV, this file may be unplayable in QuickTime!");
+		}
+
 		size = 44;
 		version = 1;
 	}
@@ -287,8 +298,8 @@ static size_t mp4_write_mdhd(struct mp4_mux *mux, struct mp4_track *track)
 		s_wb32(s, (uint32_t)duration);           // duration
 	}
 
-	s_wb16(s, 21956); // language (undefined)
-	s_wb16(s, 0);     // pre_defined
+	s_wb16(s, mux->flavor == FLAVOR_MOV ? 32767 : 21956); // language (undefined)
+	s_wb16(s, 0);                                         // pre_defined
 
 	return size;
 }
@@ -301,10 +312,15 @@ static size_t mp4_write_hdlr(struct mp4_mux *mux, struct mp4_track *track)
 
 	write_fullbox(s, 0, "hdlr", 0, 0);
 
-	s_wb32(s, 0); // pre_defined
+	if (mux->flavor == FLAVOR_MOV)
+		s_write(s, track ? "mhlr" : "dhlr", 4);
+	else
+		s_wb32(s, 0); // pre_defined
 
 	// handler_type
-	if (track->type == TRACK_VIDEO)
+	if (!track)
+		s_write(s, "url ", 4);
+	else if (track->type == TRACK_VIDEO)
 		s_write(s, "vide", 4);
 	else if (track->type == TRACK_CHAPTERS)
 		s_write(s, "text", 4);
@@ -315,13 +331,25 @@ static size_t mp4_write_hdlr(struct mp4_mux *mux, struct mp4_track *track)
 	s_wb32(s, 0); // reserved
 	s_wb32(s, 0); // reserved
 
-	// name (utf-8 string, null terminated)
-	if (track->type == TRACK_VIDEO)
-		s_write(s, "OBS Video Handler", 18);
+	const char *handler_name;
+	if (!track)
+		handler_name = "OBS Data Handler";
+	else if (track->type == TRACK_VIDEO)
+		handler_name = "OBS Video Handler";
 	else if (track->type == TRACK_CHAPTERS)
-		s_write(s, "OBS Chapter Handler", 20);
+		handler_name = "OBS Chapter Handler";
 	else
-		s_write(s, "OBS Audio Handler", 18);
+		handler_name = "OBS Audio Handler";
+
+	// name (null-terminated for MP4, pascal string for MOV)
+	size_t handler_len = strlen(handler_name);
+	if (mux->flavor == FLAVOR_MOV) {
+		s_w8(s, (uint8_t)handler_len);
+		s_write(s, handler_name, handler_len);
+	} else {
+		s_write(s, handler_name, handler_len);
+		s_w8(s, 0); // NULL terminator
+	}
 
 	return write_box_size(s, start);
 }
@@ -538,9 +566,16 @@ static inline void mp4_write_visual_sample_entry(struct mp4_mux *mux, obs_encode
 	// VisualSampleEntry Box
 	s_wb16(s, 0); // pre_defined
 	s_wb16(s, 0); // reserved
-	s_wb32(s, 0); // pre_defined
-	s_wb32(s, 0); // pre_defined
-	s_wb32(s, 0); // pre_defined
+
+	if (mux->flavor == FLAVOR_MOV) {
+		s_write(s, "OBSS", 4); // vendor
+		s_wb32(s, 0x200);      // temporal quality (codecNormalQuality = 512)
+		s_wb32(s, 0x200);      // spatial quality (codecNormalQuality)
+	} else {
+		s_wb32(s, 0); // pre_defined
+		s_wb32(s, 0); // pre_defined
+		s_wb32(s, 0); // pre_defined
+	}
 
 	s_wb16(s, (uint16_t)obs_encoder_get_width(enc));  // width
 	s_wb16(s, (uint16_t)obs_encoder_get_height(enc)); // height
@@ -705,6 +740,47 @@ static size_t mp4_write_av01(struct mp4_mux *mux, obs_encoder_t *enc)
 	return write_box_size(s, start);
 }
 
+/// (QTFF/Apple) Video Sample Description
+static size_t mp4_write_prores(struct mp4_mux *mux, obs_encoder_t *enc)
+{
+	struct serializer *s = mux->serializer;
+	int64_t start = serializer_get_pos(s);
+
+	/* We get the tag as an int, but need it as a char[4] */
+	union tag {
+		char c[4];
+		uint32_t i;
+	} codec_tag;
+
+	/* Codec tag varies for ProRes depending on configuration, so we need to get it from the encoder. */
+	obs_data_t *settings = obs_encoder_get_settings(enc);
+	codec_tag.i = (uint32_t)obs_data_get_int(settings, "codec_type");
+	obs_data_release(settings);
+
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+	codec_tag.i = ((codec_tag.i >> 24) & 0x000000FF) | ((codec_tag.i << 8) & 0x00FF0000) |
+		      ((codec_tag.i >> 8) & 0x0000FF00) | ((codec_tag.i << 24) & 0xFF000000);
+#endif
+
+	write_box(s, 0, codec_tag.c);
+
+	mp4_write_visual_sample_entry(mux, enc);
+
+	// colr
+	mp4_write_colr(mux, enc);
+
+	// clli
+	mp4_write_clli(mux, enc);
+
+	// mdcv
+	mp4_write_mdcv(mux, enc);
+
+	// pasp
+	mp4_write_pasp(mux);
+
+	return write_box_size(s, start);
+}
+
 static inline void put_descr(struct serializer *s, uint8_t tag, size_t size)
 {
 	int i = 3;
@@ -776,6 +852,8 @@ static size_t mp4_write_esds(struct mp4_mux *mux, struct mp4_track *track)
 static inline void mp4_write_audio_sample_entry(struct mp4_mux *mux, struct mp4_track *track, uint8_t version)
 {
 	struct serializer *s = mux->serializer;
+	bool is_mov = mux->flavor == FLAVOR_MOV;
+	bool is_pcm = track->codec == CODEC_PCM_I16 || track->codec == CODEC_PCM_I24 || track->codec == CODEC_PCM_F32;
 
 	// SampleEntry Box
 	s_w8(s, 0); // reserved
@@ -788,33 +866,69 @@ static inline void mp4_write_audio_sample_entry(struct mp4_mux *mux, struct mp4_
 	s_wb16(s, 1); // data_reference_index
 
 	// AudioSampleEntry Box
-	if (version == 1) {
-		s_wb16(s, 1); // entry_version
-		s_wb16(s, 0); // reserved
-		s_wb16(s, 0); // reserved
-		s_wb16(s, 0); // reserved
-	} else {
-		s_wb32(s, 0); // reserved
-		s_wb32(s, 0); // reserved
-	}
+	s_wb16(s, version); // entry_version
+	s_wb16(s, 0);       // reserved
+	s_wb16(s, 0);       // reserved
+	s_wb16(s, 0);       // reserved
 
 	audio_t *audio = obs_encoder_audio(track->encoder);
-	size_t channels = audio_output_get_channels(audio);
+	uint32_t channels = (uint32_t)audio_output_get_channels(audio);
 	uint32_t sample_rate = track->timescale;
 	bool alac = track->codec == CODEC_ALAC;
 
-	s_wb16(s, (uint32_t)channels); // channelcount
-
-	/* OBS FLAC is currently always 16 bit, ALAC always 24, this may change
-	 * in the futrure and should be handled differently then.
-	 * That being said thoes codecs are self-describing so in most cases it
-	 * shouldn't matter either way. */
-	s_wb16(s, alac ? 24 : 16); // samplesize
+	/* MOV specific version: https://developer.apple.com/documentation/quicktime-file-format/sound_sample_description_version_2 */
+	if (version == 2) {
+		// We need to get the raw float bytes, union seems to be the easiest way to do that.
+		union rate {
+			uint64_t u;
+			double f;
+		} rate;
+		rate.f = (double)sample_rate;
+
+		s_wb16(s, 3);          // always3
+		s_wb16(s, 16);         // always16
+		s_wb16(s, 0xfffe);     // alwaysMinus2
+		s_wb16(s, 0);          // always0
+		s_wb32(s, 0x00010000); // always65536
+		s_wb32(s, 72);         // sizeOfStructOnly (start of containing box to constLPCMFramesPerAudioPacket)
+		s_wb64(s, rate.u);     // audioSampleRate
+		s_wb32(s, channels);   // numAudioChannels
+		s_wb32(s, 0x7F000000); // always7F000000
+		s_wb32(s, is_pcm ? track->sample_size / channels * 8 : 0); // constBitsPerChannel
+		s_wb32(s, get_lpcm_flags(track->codec));                   // formatSpecificFlags
+		s_wb32(s, is_pcm ? track->sample_size : 0);                // constBytesPerAudioPacket
+		s_wb32(s, is_pcm ? 1 : 0);                                 // constLPCMFramesPerAudioPacket
+	} else {
+		s_wb16(s, channels); // channelcount
+
+		/* OBS FLAC is currently always 16-bit, ALAC always 24, this may change in the future and should be
+		 * handled differently then.
+		 * That being said those codecs are self-describing, so in most cases it shouldn't actually matter. */
+		s_wb16(s, !is_mov && alac ? 24 : 16); // samplesize
+
+		s_wb16(s, is_mov && !is_pcm ? -2 : 0); // pre_defined (compression ID in MOV)
+		s_wb16(s, 0);                          // reserved
+
+		/* The sample rate field is limited to 16-bits. Technically version 1 supports a "srat" box which
+		 * provides 32-bits, but this is not supported by most software (including FFmpeg and Chromium).
+		 * For encoded codecs (AAC etc.), the sample rate can be read from the encoded data itself.
+		 * For PCM FFmpeg will try to use the timescale as sample rate. */
+		if (sample_rate > UINT16_MAX) {
+			warn("Sample rate too high for MP4, file may not play back correctly.");
+			sample_rate = 0;
+		}
 
-	s_wb16(s, 0); // pre_defined
-	s_wb16(s, 0); // reserved
+		s_wb32(s, sample_rate << 16); // samplerate
 
-	s_wb32(s, sample_rate << 16); // samplerate
+		/* MOV-only data: https://developer.apple.com/documentation/quicktime-file-format/sound_sample_description_version_1 */
+		if (is_mov && version == 1) {
+			size_t frame_size = obs_encoder_get_frame_size(track->encoder);
+			s_wb32(s, is_pcm ? 1 : (uint32_t)frame_size);          // frame size
+			s_wb32(s, is_pcm ? track->sample_size / channels : 0); // bytes per packet
+			s_wb32(s, is_pcm ? track->sample_size : 0);            // bytes per frame
+			s_wb32(s, 2); // bytes per sample, 2 for anything but 8-bit
+		}
+	}
 }
 
 /// 12.2.4 Channel layout
@@ -1059,6 +1173,103 @@ static size_t mp4_write_Opus(struct mp4_mux *mux, struct mp4_track *track, uint8
 	return write_box_size(s, start);
 }
 
+/// (QTFF/Apple) siDecompressionParam Atom ('wave')
+static size_t mp4_write_wave(struct mp4_mux *mux, struct mp4_track *track, const char tag[4])
+{
+	struct serializer *s = mux->serializer;
+	int64_t start = serializer_get_pos(s);
+
+	write_box(s, 0, "wave");
+
+	/* frma atom containing codec tag (again) */
+	s_wb32(s, 12);
+	s_write(s, "frma", 4);
+	s_write(s, tag, 4);
+
+	if (track->codec == CODEC_AAC) {
+		mp4_write_esds(mux, track);
+	} else if (track->codec == CODEC_ALAC) {
+		uint8_t *extradata;
+		size_t extradata_size;
+
+		if (obs_encoder_get_extra_data(track->encoder, &extradata, &extradata_size)) {
+			/* Apple Lossless Magic Cookie */
+			s_write(s, extradata, extradata_size);
+		}
+	}
+
+	/* Terminator atom */
+	s_wb32(s, 8); // size
+	s_wb32(s, 0); // NULL name
+
+	return write_box_size(s, start);
+}
+
+/// (QTFF/Apple) Audio Channel Layout Atom (‘chan’)
+static size_t mp4_write_chan(struct mp4_mux *mux, struct mp4_track *track)
+{
+	struct serializer *s = mux->serializer;
+	int64_t start = serializer_get_pos(s);
+
+	audio_t *audio = obs_encoder_audio(track->encoder);
+	const struct audio_output_info *info = audio_output_get_info(audio);
+	uint32_t layout = get_mov_channel_layout(track->codec, info->speakers);
+	uint32_t bitmap = layout == kAudioChannelLayoutTag_UseChannelBitmap ? get_mov_channel_bitmap(info->speakers)
+									    : 0;
+	if (layout == kAudioChannelLayoutTag_UseChannelBitmap && !bitmap) {
+		warn("No valid speaker layout found, not writing chan box. File may not play back correctly!");
+		return 0;
+	}
+
+	write_fullbox(s, 0, "chan", 0, 0);
+	/* AudioChannelLayout from CoreAudioTypes.h */
+	s_wb32(s, layout); // mChannelLayoutTag
+	s_wb32(s, bitmap); // mChannelBitmap
+	s_wb32(s, 0);      // mNumberChannelDescriptions
+
+	return write_box_size(s, start);
+}
+
+/// (QTFF/Apple) Sound Sample Description (v1 and v2)
+static size_t mp4_write_mov_audio_tag(struct mp4_mux *mux, struct mp4_track *track)
+{
+	struct serializer *s = mux->serializer;
+	int64_t start = serializer_get_pos(s);
+
+	const char *tag = NULL;
+	audio_t *audio = obs_encoder_audio(track->encoder);
+	uint32_t sample_rate = audio_output_get_sample_rate(audio);
+	size_t channels = audio_output_get_channels(audio);
+	/* More than 2 channels or samples rates above 65535 Hz requires v2 */
+	uint8_t version = (channels > 2 || sample_rate > UINT16_MAX) ? 2 : 1;
+
+	if (track->codec == CODEC_PCM_F32 || track->codec == CODEC_PCM_I16 || track->codec == CODEC_PCM_I24) {
+		tag = "lpcm";
+		version = 2; /* lpcm also requires v2 */
+	} else if (track->codec == CODEC_AAC) {
+		tag = "mp4a";
+	} else if (track->codec == CODEC_ALAC) {
+		tag = "alac";
+	}
+
+	/* Unsupported/Unknown codec */
+	if (!tag)
+		return 0;
+
+	write_box(s, 0, tag);
+
+	mp4_write_audio_sample_entry(mux, track, version);
+
+	// wave
+	if (version == 1)
+		mp4_write_wave(mux, track, tag);
+
+	// chan
+	mp4_write_chan(mux, track);
+
+	return write_box_size(s, start);
+}
+
 /// 8.5.2 Sample Description Box
 static size_t mp4_write_stsd(struct mp4_mux *mux, struct mp4_track *track)
 {
@@ -1069,7 +1280,7 @@ static size_t mp4_write_stsd(struct mp4_mux *mux, struct mp4_track *track)
 	 * but in practice that doesn't appear to matter. */
 	uint8_t version = 0;
 
-	if (track->type == TRACK_AUDIO) {
+	if (track->type == TRACK_AUDIO && mux->flavor != FLAVOR_MOV) {
 		audio_t *audio = obs_encoder_audio(track->encoder);
 		version = audio_output_get_channels(audio) > 2 ? 1 : 0;
 	}
@@ -1086,18 +1297,24 @@ static size_t mp4_write_stsd(struct mp4_mux *mux, struct mp4_track *track)
 			mp4_write_hvc1(mux, track->encoder);
 		else if (track->codec == CODEC_AV1)
 			mp4_write_av01(mux, track->encoder);
+		else if (track->codec == CODEC_PRORES)
+			mp4_write_prores(mux, track->encoder);
 	} else if (track->type == TRACK_AUDIO) {
-		if (track->codec == CODEC_AAC)
-			mp4_write_mp4a(mux, track, version);
-		else if (track->codec == CODEC_OPUS)
-			mp4_write_Opus(mux, track, version);
-		else if (track->codec == CODEC_FLAC)
-			mp4_write_fLaC(mux, track, version);
-		else if (track->codec == CODEC_ALAC)
-			mp4_write_alac(mux, track, version);
-		else if (track->codec == CODEC_PCM_I16 || track->codec == CODEC_PCM_I24 ||
-			 track->codec == CODEC_PCM_F32)
-			mp4_write_xpcm(mux, track, version);
+		if (mux->flavor == FLAVOR_MOV) {
+			mp4_write_mov_audio_tag(mux, track);
+		} else {
+			if (track->codec == CODEC_AAC)
+				mp4_write_mp4a(mux, track, version);
+			else if (track->codec == CODEC_OPUS)
+				mp4_write_Opus(mux, track, version);
+			else if (track->codec == CODEC_FLAC)
+				mp4_write_fLaC(mux, track, version);
+			else if (track->codec == CODEC_ALAC)
+				mp4_write_alac(mux, track, version);
+			else if (track->codec == CODEC_PCM_I16 || track->codec == CODEC_PCM_I24 ||
+				 track->codec == CODEC_PCM_F32)
+				mp4_write_xpcm(mux, track, version);
+		}
 	} else if (track->type == TRACK_CHAPTERS) {
 		mp4_write_text(mux);
 	}
@@ -1419,8 +1636,8 @@ static size_t mp4_write_stbl(struct mp4_mux *mux, struct mp4_track *track, bool
 	// stts
 	mp4_write_stts(mux, track, fragmented);
 
-	// stss (non-fragmented only)
-	if (track->type == TRACK_VIDEO && !fragmented)
+	// stss (non-fragmented/non-prores only)
+	if (track->type == TRACK_VIDEO && !fragmented && track->codec != CODEC_PRORES)
 		mp4_write_stss(mux, track);
 
 	// ctts (non-fragmented only)
@@ -1510,6 +1727,10 @@ static size_t mp4_write_minf(struct mp4_mux *mux, struct mp4_track *track, bool
 	else
 		mp4_write_smhd(mux);
 
+	// hdlr for dinf, required in MOV only
+	if (mux->flavor == FLAVOR_MOV)
+		mp4_write_hdlr(mux, NULL);
+
 	// dinf, unnecessary but mandatory
 	mp4_write_dinf(mux);
 
@@ -1764,6 +1985,22 @@ static size_t mp4_write_data_atom(struct mp4_mux *mux, const char *data)
 	return size;
 }
 
+/// (QTFF/Apple) String atom
+static size_t mp4_write_string_data_atom(struct mp4_mux *mux, const char name[4], const char *data)
+{
+	struct serializer *s = mux->serializer;
+	int64_t start = serializer_get_pos(s);
+
+	uint16_t len = (uint16_t)strlen(data);
+
+	write_box(s, 0, name);
+	s_wb16(s, len);            // String length
+	s_write(s, "\x55\xC4", 2); // language code, just using undefined
+	s_write(s, data, len);     // Note: No NULL terminator
+
+	return write_box_size(s, start);
+}
+
 /// (QTFF/Apple) Metadata item atom
 static size_t mp4_write_ilst_item_atom(struct mp4_mux *mux, const char name[4], const char *value)
 {
@@ -1939,8 +2176,21 @@ static size_t mp4_write_udta(struct mp4_mux *mux)
 	/* Normally metadata would be directly in the moov, but since this is
 	 * Apple/QTFF format metadata it is inside udta. */
 
-	// meta
-	mp4_write_meta(mux);
+	if (mux->flavor == FLAVOR_MOV && !(mux->flags & MP4_USE_MDTA_KEY_VALUE)) {
+		// keys directly in udta atom
+		struct dstr value = {0};
+
+		/* Encoder name */
+		dstr_cat(&value, "OBS Studio (");
+		dstr_cat(&value, obs_get_version_string());
+		dstr_cat(&value, ")");
+		mp4_write_string_data_atom(mux, "\251swr", value.array);
+
+		dstr_free(&value);
+	} else {
+		// meta
+		mp4_write_meta(mux);
+	}
 
 	return write_box_size(s, start);
 }
@@ -2489,6 +2739,8 @@ static inline enum mp4_codec get_codec(obs_encoder_t *enc)
 		return CODEC_HEVC;
 	if (strcmp(codec, "av1") == 0)
 		return CODEC_AV1;
+	if (strcmp(codec, "prores") == 0)
+		return CODEC_PRORES;
 	if (strcmp(codec, "aac") == 0)
 		return CODEC_AAC;
 	if (strcmp(codec, "opus") == 0)
@@ -2582,16 +2834,24 @@ static inline void free_track(struct mp4_track *track)
 /* ===========================================================================*/
 /* API */
 
-struct mp4_mux *mp4_mux_create(obs_output_t *output, struct serializer *serializer, enum mp4_mux_flags flags)
+struct mp4_mux *mp4_mux_create(obs_output_t *output, struct serializer *serializer, enum mp4_mux_flags flags,
+			       enum mp4_flavor flavor)
 {
 	struct mp4_mux *mux = bzalloc(sizeof(struct mp4_mux));
 
 	mux->output = output;
 	mux->serializer = serializer;
 	mux->flags = flags;
+	mux->flavor = flavor;
 	/* Timestamp is based on 1904 rather than 1970. */
 	mux->creation_time = time(NULL) + 0x7C25B080;
 
+	if (flavor == FLAVOR_MOV && mux->creation_time > UINT32_MAX) {
+		/* This will only happen in 2040 but better safe than sorry! */
+		warn("Creation time too large for MOV, setting to 0 (unset).");
+		mux->creation_time = 0;
+	}
+
 	for (size_t i = 0; i < MAX_OUTPUT_VIDEO_ENCODERS; i++) {
 		obs_encoder_t *enc = obs_output_get_video_encoder2(output, i);
 		if (!enc)
@@ -2657,6 +2917,8 @@ bool mp4_mux_submit_packet(struct mp4_mux *mux, struct encoder_packet *pkt)
 			obs_parse_hevc_packet(&parsed_packet, pkt);
 		else if (track->codec == CODEC_AV1)
 			obs_parse_av1_packet(&parsed_packet, pkt);
+		else if (track->codec == CODEC_PRORES)
+			obs_encoder_packet_ref(&parsed_packet, pkt);
 
 		/* Set fragmentation PTS if packet is keyframe and PTS > 0 */
 		if (parsed_packet.keyframe && parsed_packet.pts > 0) {
@@ -2705,7 +2967,7 @@ bool mp4_mux_finalise(struct mp4_mux *mux)
 	info("Number of fragments: %u", mux->fragments_written);
 
 	if (mux->flags & MP4_SKIP_FINALISATION) {
-		warn("Skipping MP4 finalization!");
+		warn("Skipping finalization!");
 		return true;
 	}
 

+ 9 - 1
plugins/obs-outputs/mp4-mux.h

@@ -22,6 +22,13 @@
 
 struct mp4_mux;
 
+/* Flavor for target compatibility */
+enum mp4_flavor {
+	FLAVOR_MP4,  /* ISO/IEC 14496-12 */
+	FLAVOR_MOV,  /* Apple QuickTime */
+	FLAVOR_CMAF, /* ISO/IEC 23000-19 (not yet implemented) */
+};
+
 enum mp4_mux_flags {
 	/* Uses mdta key/value list for metadata instead of QuickTime keys */
 	MP4_USE_MDTA_KEY_VALUE = 1 << 0,
@@ -33,7 +40,8 @@ enum mp4_mux_flags {
 	MP4_USE_NEGATIVE_CTS = 1 << 3,
 };
 
-struct mp4_mux *mp4_mux_create(obs_output_t *output, struct serializer *serializer, enum mp4_mux_flags flags);
+struct mp4_mux *mp4_mux_create(obs_output_t *output, struct serializer *serializer, enum mp4_mux_flags flags,
+			       enum mp4_flavor flavor);
 void mp4_mux_destroy(struct mp4_mux *mux);
 bool mp4_mux_submit_packet(struct mp4_mux *mux, struct encoder_packet *pkt);
 bool mp4_mux_add_chapter(struct mp4_mux *mux, int64_t dts_usec, const char *name);

+ 45 - 11
plugins/obs-outputs/mp4-output.c

@@ -28,8 +28,9 @@
 
 #include <opts-parser.h>
 
-#define do_log(level, format, ...) \
-	blog(level, "[mp4 output: '%s'] " format, obs_output_get_name(out->output), ##__VA_ARGS__)
+#define do_log(level, format, ...)                                                                 \
+	blog(level, "[%s output: '%s'] " format, out->muxer_flavor == FLAVOR_MOV ? "mov" : "mp4", \
+	     obs_output_get_name(out->output), ##__VA_ARGS__)
 
 #define warn(format, ...) do_log(LOG_WARNING, format, ##__VA_ARGS__)
 #define info(format, ...) do_log(LOG_INFO, format, ##__VA_ARGS__)
@@ -58,6 +59,7 @@ struct mp4_output {
 	pthread_mutex_t mutex;
 
 	struct mp4_mux *muxer;
+	enum mp4_flavor muxer_flavor;
 	int flags;
 
 	size_t chapter_ctr;
@@ -140,6 +142,12 @@ static const char *mp4_output_name(void *unused)
 	return obs_module_text("MP4Output");
 }
 
+static const char *mov_output_name(void *unused)
+{
+	UNUSED_PARAMETER(unused);
+	return obs_module_text("MOVOutput");
+}
+
 static void mp4_clear_chapters(struct mp4_output *out)
 {
 	while (out->chapters.size) {
@@ -233,10 +241,11 @@ static void split_file_proc(void *data, calldata_t *cd)
 	os_atomic_set_bool(&out->manual_split, true);
 }
 
-static void *mp4_output_create(obs_data_t *settings, obs_output_t *output)
+static void *mp4_output_create_internal(obs_data_t *settings, obs_output_t *output, enum mp4_flavor flavor)
 {
 	struct mp4_output *out = bzalloc(sizeof(struct mp4_output));
 	out->output = output;
+	out->muxer_flavor = flavor;
 	pthread_mutex_init(&out->mutex, NULL);
 
 	signal_handler_t *sh = obs_output_get_signal_handler(output);
@@ -250,6 +259,16 @@ static void *mp4_output_create(obs_data_t *settings, obs_output_t *output)
 	return out;
 }
 
+static void *mp4_output_create(obs_data_t *settings, obs_output_t *output)
+{
+	return mp4_output_create_internal(settings, output, FLAVOR_MP4);
+}
+
+static void *mov_output_create(obs_data_t *settings, obs_output_t *output)
+{
+	return mp4_output_create_internal(settings, output, FLAVOR_MOV);
+}
+
 static inline void apply_flag(int *flags, const char *value, int flag_value)
 {
 	if (atoi(value))
@@ -325,7 +344,7 @@ static bool mp4_output_start(void *data)
 	obs_data_release(settings);
 
 	if (!buffered_file_serializer_init(&out->serializer, out->path.array, out->buffer_size, out->chunk_size)) {
-		warn("Unable to open MP4 file '%s'", out->path.array);
+		warn("Unable to open file '%s'", out->path.array);
 		return false;
 	}
 
@@ -333,11 +352,11 @@ static bool mp4_output_start(void *data)
 	obs_output_add_packet_callback(out->output, mp4_pkt_callback, (void *)out);
 
 	/* Initialise muxer and start capture */
-	out->muxer = mp4_mux_create(out->output, &out->serializer, out->flags);
+	out->muxer = mp4_mux_create(out->output, &out->serializer, out->flags, out->muxer_flavor);
 	os_atomic_set_bool(&out->active, true);
 	obs_output_begin_data_capture(out->output, 0);
 
-	info("Writing Hybrid MP4 file '%s'...", out->path.array);
+	info("Writing Hybrid MP4/MOV file '%s'...", out->path.array);
 	return true;
 }
 
@@ -436,18 +455,18 @@ static bool change_file(struct mp4_output *out, struct encoder_packet *pkt)
 	mp4_mux_destroy(out->muxer);
 	mp4_clear_chapters(out);
 
-	info("MP4 file split complete. Finalization took %" PRIu64 " ms.", (os_gettime_ns() - start_time) / 1000000);
+	info("File split complete. Finalization took %" PRIu64 " ms.", (os_gettime_ns() - start_time) / 1000000);
 
 	/* open new file */
 	generate_filename(out, &out->path, out->allow_overwrite);
 	info("Changing output file to '%s'", out->path.array);
 
 	if (!buffered_file_serializer_init(&out->serializer, out->path.array, out->buffer_size, out->chunk_size)) {
-		warn("Unable to open MP4 file '%s'", out->path.array);
+		warn("Unable to open file '%s'", out->path.array);
 		return false;
 	}
 
-	out->muxer = mp4_mux_create(out->output, &out->serializer, out->flags);
+	out->muxer = mp4_mux_create(out->output, &out->serializer, out->flags, out->muxer_flavor);
 
 	calldata_t cd = {0};
 	signal_handler_t *sh = obs_output_get_signal_handler(out->output);
@@ -500,7 +519,7 @@ static void mp4_output_actual_stop(struct mp4_output *out, int code)
 	/* Clear chapter data */
 	mp4_clear_chapters(out);
 
-	info("MP4 file output complete. Finalization took %" PRIu64 " ms.", (os_gettime_ns() - start_time) / 1000000);
+	info("File output complete. Finalization took %" PRIu64 " ms.", (os_gettime_ns() - start_time) / 1000000);
 }
 
 static void push_back_packet(struct mp4_output *out, struct encoder_packet *packet)
@@ -624,7 +643,7 @@ struct obs_output_info mp4_output_info = {
 	.id = "mp4_output",
 	.flags = OBS_OUTPUT_AV | OBS_OUTPUT_ENCODED | OBS_OUTPUT_MULTI_TRACK_AV | OBS_OUTPUT_CAN_PAUSE,
 	.encoded_video_codecs = "h264;hevc;av1",
-	.encoded_audio_codecs = "aac",
+	.encoded_audio_codecs = "aac;alac;flac;opus",
 	.get_name = mp4_output_name,
 	.create = mp4_output_create,
 	.destroy = mp4_output_destroy,
@@ -634,3 +653,18 @@ struct obs_output_info mp4_output_info = {
 	.get_properties = mp4_output_properties,
 	.get_total_bytes = mp4_output_total_bytes,
 };
+
+struct obs_output_info mov_output_info = {
+	.id = "mov_output",
+	.flags = OBS_OUTPUT_AV | OBS_OUTPUT_ENCODED | OBS_OUTPUT_MULTI_TRACK_AV | OBS_OUTPUT_CAN_PAUSE,
+	.encoded_video_codecs = "h264;hevc;prores",
+	.encoded_audio_codecs = "aac;alac",
+	.get_name = mov_output_name,
+	.create = mov_output_create,
+	.destroy = mp4_output_destroy,
+	.start = mp4_output_start,
+	.stop = mp4_output_stop,
+	.encoded_packet = mp4_output_packet,
+	.get_properties = mp4_output_properties,
+	.get_total_bytes = mp4_output_total_bytes,
+};

+ 2 - 0
plugins/obs-outputs/obs-outputs.c

@@ -16,6 +16,7 @@ extern struct obs_output_info rtmp_output_info;
 extern struct obs_output_info null_output_info;
 extern struct obs_output_info flv_output_info;
 extern struct obs_output_info mp4_output_info;
+extern struct obs_output_info mov_output_info;
 
 #if defined(_WIN32) && defined(MBEDTLS_THREADING_ALT)
 void mbed_mutex_init(mbedtls_threading_mutex_t *m)
@@ -63,6 +64,7 @@ bool obs_module_load(void)
 	obs_register_output(&null_output_info);
 	obs_register_output(&flv_output_info);
 	obs_register_output(&mp4_output_info);
+	obs_register_output(&mov_output_info);
 	return true;
 }