Просмотр исходного кода

obs-outputs: Add native MP4 muxer

derrod 1 год назад
Родитель
Сommit
df2a75fe4b

+ 5 - 0
plugins/obs-outputs/CMakeLists.txt

@@ -36,6 +36,10 @@ target_sources(
           librtmp/rtmp.c
           librtmp/rtmp.h
           librtmp/rtmp_sys.h
+          mp4-mux-internal.h
+          mp4-mux.c
+          mp4-mux.h
+          mp4-output.c
           net-if.c
           net-if.h
           null-output.c
@@ -60,6 +64,7 @@ target_link_libraries(
   obs-outputs
   PRIVATE OBS::libobs
           OBS::happy-eyeballs
+          OBS::opts-parser
           MbedTLS::MbedTLS
           ZLIB::ZLIB
           $<$<PLATFORM_ID:Windows>:OBS::w32-pthreads>

+ 5 - 0
plugins/obs-outputs/data/locale/en-US.ini

@@ -7,6 +7,11 @@ FLVOutput="FLV File Output"
 FLVOutput.FilePath="File Path"
 Default="Default"
 
+MP4Output="MP4 File Output"
+MP4Output.FilePath="File Path"
+MP4Output.StartChapter="Start"
+MP4Output.UnnamedChapter="Unnamed"
+
 IPFamily="IP Address Family"
 IPFamily.Both="IPv4 and IPv6 (Default)"
 IPFamily.V4Only="IPv4 Only"

+ 345 - 0
plugins/obs-outputs/mp4-mux-internal.h

@@ -0,0 +1,345 @@
+/******************************************************************************
+    Copyright (C) 2024 by Dennis Sädtler <[email protected]>
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+******************************************************************************/
+
+#pragma once
+
+#include "mp4-mux.h"
+
+#include <util/darray.h>
+#include <util/deque.h>
+#include <util/serializer.h>
+
+/* Flavour for target compatibility */
+enum mp4_flavour {
+	MP4,  /* ISO/IEC 14496-12 */
+	MOV,  /* Apple QuickTime */
+	CMAF, /* ISO/IEC 23000-19 */
+};
+
+enum mp4_track_type {
+	TRACK_UNKNOWN,
+	TRACK_VIDEO,
+	TRACK_AUDIO,
+	TRACK_CHAPTERS,
+};
+
+enum mp4_codec {
+	CODEC_UNKNOWN,
+
+	/* Video Codecs */
+	CODEC_H264,
+	CODEC_HEVC,
+	CODEC_AV1,
+
+	/* Audio Codecs */
+	CODEC_AAC,
+	CODEC_OPUS,
+	CODEC_FLAC,
+	CODEC_ALAC,
+	CODEC_PCM_I16,
+	CODEC_PCM_I24,
+	CODEC_PCM_F32,
+
+	/* Text/Chapter trakcs */
+	CODEC_TEXT,
+};
+
+struct chunk {
+	uint64_t offset;
+	uint32_t size;
+	uint32_t samples;
+};
+
+struct sample_delta {
+	uint32_t count;
+	uint32_t delta;
+};
+
+struct sample_offset {
+	uint32_t count;
+	int32_t offset;
+};
+
+struct fragment_sample {
+	uint32_t size;
+	int32_t offset;
+	uint32_t duration;
+};
+
+struct mp4_track {
+	enum mp4_track_type type;
+	enum mp4_codec codec;
+
+	/* Track ID in container */
+	uint8_t track_id;
+	/* Number of samples for this track  */
+	uint64_t samples;
+	/* Duration for this track  */
+	uint64_t duration;
+
+	/* Encoder associated with this track */
+	obs_encoder_t *encoder;
+
+	/* Time Base (1/FPS for video, 1/sample rate for audio) */
+	uint32_t timebase_num;
+	uint32_t timebase_den;
+	/* Output timescale calculated from time base (Video only) */
+	uint32_t timescale;
+
+	/* First PTS this track has seen (in track timescale) */
+	int64_t first_pts;
+	/* Highest PTS this track has seen (in usec) */
+	int64_t last_pts_usec;
+
+	/* deque of encoder_packet belonging to this track */
+	struct deque packets;
+
+	/* Sample sizes (fixed for PCM) */
+	uint32_t sample_size;
+	DARRAY(uint32_t) sample_sizes;
+	/* Data chunks in file containing samples for this track */
+	DARRAY(struct chunk) chunks;
+	/* Time delta between samples */
+	DARRAY(struct sample_delta) deltas;
+
+	/* Sample CT-DT offset, i.e. DTS-PTS offset (Video only) */
+	bool needs_ctts;
+	int32_t dts_offset;
+	DARRAY(struct sample_offset) offsets;
+	/* Sync samples, i.e. keyframes (Video only) */
+	DARRAY(uint32_t) sync_samples;
+
+	/* Temporary array with information about the samples to be included
+	 * in the next fragment. */
+	DARRAY(struct fragment_sample) fragment_samples;
+};
+
+struct mp4_mux {
+	obs_output_t *output;
+	struct serializer *serializer;
+
+	/* Target format compatibility */
+	enum mp4_flavour mode;
+
+	/* Flags */
+	enum mp4_mux_flags flags;
+
+	uint32_t fragments_written;
+	/* PTS where next fragmentation should take place */
+	int64_t next_frag_pts;
+
+	/* Creation time (seconds since Jan 1 1904) */
+	uint64_t creation_time;
+
+	/* Offset of placeholder atom/box to contain final mdat header */
+	size_t placeholder_offset;
+
+	uint8_t track_ctr;
+	/* Audio/Video tracks */
+	DARRAY(struct mp4_track) tracks;
+	/* Special tracks */
+	struct mp4_track *chapter_track;
+};
+
+/* clang-format off */
+// Defined in ISO/IEC 14496-12:2015 Section 8.2.2.1
+const int32_t UNITY_MATRIX[9] = {
+	0x00010000,	0,		0,
+	0,		0x00010000,	0,
+	0,		0,		0x40000000
+};
+/* clang-format on */
+
+enum tfhd_flags {
+	BASE_DATA_OFFSET_PRESENT = 0x000001,
+	SAMPLE_DESCRIPTION_INDEX_PRESENT = 0x000002,
+	DEFAULT_SAMPLE_DURATION_PRESENT = 0x000008,
+	DEFAULT_SAMPLE_SIZE_PRESENT = 0x000010,
+	DEFAULT_SAMPLE_FLAGS_PRESENT = 0x000020,
+	DURATION_IS_EMPTY = 0x010000,
+	DEFAULT_BASE_IS_MOOF = 0x020000,
+};
+
+enum trun_flags {
+	DATA_OFFSET_PRESENT = 0x000001,
+	FIRST_SAMPLE_FLAGS_PRESENT = 0x000004,
+	SAMPLE_DURATION_PRESENT = 0x000100,
+	SAMPLE_SIZE_PRESENT = 0x000200,
+	SAMPLE_FLAGS_PRESENT = 0x000400,
+	SAMPLE_COMPOSITION_TIME_OFFSETS_PRESENT = 0x000800,
+};
+
+/*
+ * ISO Standard structure (big endian so we can't easily use it):
+ *
+ * struct sample_flags {
+ * 	uint32_t reserved : 4;
+ * 	uint32_t is_leading : 2;
+ *	uint32_t sample_depends_on : 2;
+ *	uint32_t sample_is_depended_on : 2;
+ *	uint32_t sample_has_redundancy : 2;
+ *	uint32_t sample_padding_value : 3;
+ *	uint32_t sample_is_non_sync_sample : 1;
+ *	uint32_t sample_degradation_priority : 16;
+};
+*/
+
+enum sample_flags {
+	SAMPLE_FLAG_IS_NON_SYNC = 0x00010000,
+	SAMPLE_FLAG_DEPENDS_YES = 0x01000000,
+	SAMPLE_FLAG_DEPENDS_NO = 0x02000000,
+};
+
+#ifndef _WIN32
+static inline size_t min(size_t a, size_t b)
+{
+	return a < b ? a : b;
+}
+#endif
+
+static inline void get_speaker_positions(enum speaker_layout layout,
+					 uint8_t *arr, uint8_t *size,
+					 uint8_t *iso_layout)
+{
+	switch (layout) {
+	case SPEAKERS_MONO:
+		arr[0] = 2; // FC
+		*size = 1;
+		*iso_layout = 1;
+		break;
+	case SPEAKERS_UNKNOWN:
+	case SPEAKERS_STEREO:
+		arr[0] = 0; // FL
+		arr[1] = 1; // FR
+		*size = 2;
+		*iso_layout = 2;
+		break;
+	case SPEAKERS_2POINT1:
+		arr[0] = 0; // FL
+		arr[1] = 1; // FR
+		arr[2] = 3; // LFE
+		*size = 3;
+		break;
+	case SPEAKERS_4POINT0:
+		arr[0] = 0;  // FL
+		arr[1] = 1;  // FR
+		arr[2] = 2;  // FC
+		arr[3] = 10; // RC
+		*size = 4;
+		*iso_layout = 4;
+		break;
+	case SPEAKERS_4POINT1:
+		arr[0] = 0;  // FL
+		arr[1] = 1;  // FR
+		arr[2] = 2;  // FC
+		arr[3] = 3;  // LFE
+		arr[4] = 10; // RC
+		*size = 5;
+		break;
+	case SPEAKERS_5POINT1:
+		arr[0] = 0; // FL
+		arr[1] = 1; // FR
+		arr[2] = 2; // FC
+		arr[3] = 3; // LFE
+		arr[4] = 8; // RL
+		arr[5] = 9; // RR
+		*size = 6;
+		break;
+	case SPEAKERS_7POINT1:
+		arr[0] = 0;  // FL
+		arr[1] = 1;  // FR
+		arr[2] = 2;  // FC
+		arr[3] = 3;  // LFE
+		arr[4] = 8;  // RL
+		arr[5] = 9;  // RR
+		arr[6] = 13; // SL
+		arr[7] = 14; // SR
+		*size = 8;
+		*iso_layout = 12;
+		break;
+	}
+}
+
+static inline void get_colour_information(obs_encoder_t *enc, uint16_t *pri,
+					  uint16_t *trc, uint16_t *spc,
+					  uint8_t *full_range)
+{
+	video_t *video = obs_encoder_video(enc);
+	const struct video_output_info *info = video_output_get_info(video);
+
+	*full_range = info->range == VIDEO_RANGE_FULL ? 1 : 0;
+
+	switch (info->colorspace) {
+	case VIDEO_CS_601:
+		*pri = 6; // OBSCOL_PRI_SMPTE170M
+		*trc = 6;
+		*spc = 6;
+		break;
+	case VIDEO_CS_DEFAULT:
+	case VIDEO_CS_709:
+		*pri = 1; // OBSCOL_PRI_BT709
+		*trc = 1;
+		*spc = 1;
+		break;
+	case VIDEO_CS_SRGB:
+		*pri = 1;  // OBSCOL_PRI_BT709
+		*trc = 13; // OBSCOL_TRC_IEC61966_2_1
+		*spc = 1;  // OBSCOL_PRI_BT709
+		break;
+	case VIDEO_CS_2100_PQ:
+		*pri = 9;  // OBSCOL_PRI_BT2020
+		*trc = 16; // OBSCOL_TRC_SMPTE2084
+		*spc = 9;  // OBSCOL_SPC_BT2020_NCL
+		break;
+	case VIDEO_CS_2100_HLG:
+		*pri = 9;  // OBSCOL_PRI_BT2020
+		*trc = 18; // OBSCOL_TRC_ARIB_STD_B67
+		*spc = 9;  // OBSCOL_SPC_BT2020_NCL
+	}
+}
+
+/* Chapter stubs (from libavformat/movenc.c) */
+
+static const uint8_t TEXT_STUB_HEADER[] = {
+	// TextSampleEntry
+	0x00, 0x00, 0x00, 0x01, // displayFlags
+	0x00, 0x00,             // horizontal + vertical justification
+	0x00, 0x00, 0x00, 0x00, // bgColourRed/Green/Blue/Alpha
+	// BoxRecord
+	0x00, 0x00, 0x00, 0x00, // defTextBoxTop/Left
+	0x00, 0x00, 0x00, 0x00, // defTextBoxBottom/Right
+	// StyleRecord
+	0x00, 0x00, 0x00, 0x00, // startChar + endChar
+	0x00, 0x01,             // fontID
+	0x00, 0x00,             // fontStyleFlags + fontSize
+	0x00, 0x00, 0x00, 0x00, // fgColourRed/Green/Blue/Alpha
+	// FontTableBox
+	0x00, 0x00, 0x00, 0x0D, // box size
+	'f', 't', 'a', 'b',     // box atom name
+	0x00, 0x01,             // entry count
+	// FontRecord
+	0x00, 0x01, // font ID
+	0x00,       // font name length
+};
+
+/* clang-format off */
+static const char CHAPTER_PKT_FOOTER[12] = {
+	0x00, 0x00, 0x00, 0x0C,
+	'e',  'n',  'c',  'd',
+	0x00, 0x00, 0x01, 0x00
+};
+/* clang-format on */

+ 2843 - 0
plugins/obs-outputs/mp4-mux.c

@@ -0,0 +1,2843 @@
+/******************************************************************************
+    Copyright (C) 2024 by Dennis Sädtler <[email protected]>
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+******************************************************************************/
+
+#include "mp4-mux-internal.h"
+
+#include "rtmp-hevc.h"
+#include "rtmp-av1.h"
+
+#include <obs-avc.h>
+#include <obs-hevc.h>
+#include <obs-module.h>
+#include <util/dstr.h>
+#include <util/platform.h>
+#include <util/array-serializer.h>
+
+#include <time.h>
+
+/*
+ * (Mostly) compliant MP4 muxer for fun and profit.
+ * Based on ISO/IEC 14496-12 and FFmpeg's libavformat/movenc.c ([L]GPL)
+ *
+ * Specification section numbers are noted where applicable.
+ * Standard identifier is included if not referring to ISO/IEC 14496-12.
+ */
+
+#define do_log(level, format, ...)               \
+	blog(level, "[mp4 muxer: '%s'] " format, \
+	     obs_output_get_name(mux->output), ##__VA_ARGS__)
+
+#define warn(format, ...) do_log(LOG_WARNING, format, ##__VA_ARGS__)
+#define info(format, ...) do_log(LOG_INFO, format, ##__VA_ARGS__)
+
+/* Helper to overwrite placeholder size and return total size. */
+static inline size_t write_box_size(struct serializer *s, int64_t start)
+{
+	int64_t end = serializer_get_pos(s);
+	size_t size = end - start;
+
+	serializer_seek(s, start, SERIALIZE_SEEK_START);
+	s_wb32(s, (uint32_t)size);
+	serializer_seek(s, end, SERIALIZE_SEEK_START);
+
+	return size;
+}
+
+/// 4.2 Box header with size and char[4] name
+static inline void write_box(struct serializer *s, const size_t size,
+			     const char name[4])
+{
+	if (size <= UINT32_MAX) {
+		s_wb32(s, (uint32_t)size); // size
+		s_write(s, name, 4);       // boxtype
+	} else {
+		s_wb32(s, 1);        // size
+		s_write(s, name, 4); // boxtype
+		s_wb64(s, size);     // largesize
+	}
+}
+
+/// 4.2 FullBox extended header with u8 version and u24 flags
+static inline void write_fullbox(struct serializer *s, const size_t size,
+				 const char name[4], uint8_t version,
+				 uint32_t flags)
+{
+	write_box(s, size, name);
+	s_w8(s, version);
+	s_wb24(s, flags);
+}
+
+/// 4.3 File Type Box
+static size_t mp4_write_ftyp(struct mp4_mux *mux, bool fragmented)
+{
+	struct serializer *s = mux->serializer;
+	int64_t start = serializer_get_pos(s);
+
+	write_box(s, 0, "ftyp");
+
+	const char *major_brand = "isom";
+	/* Following FFmpeg's example, when using negative CTS the major brand
+	 * needs to be either iso4 or iso6 depending on whether the file is
+	 * currently fragmented. */
+	if (mux->flags & MP4_USE_NEGATIVE_CTS)
+		major_brand = fragmented ? "iso6" : "iso4";
+
+	s_write(s, major_brand, 4); // major brand
+	s_wb32(s, 512);             // minor version
+
+	// minor brands (first one matches major brand)
+	s_write(s, major_brand, 4);
+
+	/* Write isom base brand if it's not the major brand */
+	if (strcmp(major_brand, "isom") != 0)
+		s_write(s, "isom", 4);
+
+	/* Avoid adding newer brand (iso6) unless necessary, use "obs1" brand
+	 * as a placeholder to maintain ftyp box size. */
+	if (fragmented && strcmp(major_brand, "iso6") != 0)
+		s_write(s, "iso6", 4);
+	else
+		s_write(s, "obs1", 4);
+
+	s_write(s, "iso2", 4);
+
+	/* Include H.264 brand if used */
+	for (size_t i = 0; i < mux->tracks.num; i++) {
+		struct mp4_track *track = &mux->tracks.array[i];
+		if (track->type == TRACK_VIDEO) {
+			if (track->codec == CODEC_H264)
+				s_write(s, "avc1", 4);
+			break;
+		}
+	}
+
+	/* General MP4 brannd */
+	s_write(s, "mp41", 4);
+
+	return write_box_size(s, start);
+}
+
+/// 8.1.2 Free Space Box
+static size_t mp4_write_free(struct mp4_mux *mux)
+{
+	struct serializer *s = mux->serializer;
+
+	/* Write a 16-byte free box, so it can be replaced with a 64-bit size
+	 * box header (u32 + char[4] + u64) */
+	s_wb32(s, 16);
+	s_write(s, "free", 4);
+	s_wb64(s, 0);
+
+	return 16;
+}
+
+/// 8.2.2 Movie Header Box
+static size_t mp4_write_mvhd(struct mp4_mux *mux)
+{
+	struct serializer *s = mux->serializer;
+	size_t start = serializer_get_pos(s);
+
+	/* Use primary video track as the baseline for duration */
+	uint64_t duration = 0;
+	for (size_t i = 0; i < mux->tracks.num; i++) {
+		struct mp4_track *track = &mux->tracks.array[i];
+		if (track->type == TRACK_VIDEO) {
+			duration = util_mul_div64(track->duration, 1000,
+						  track->timebase_den);
+			break;
+		}
+	}
+
+	write_fullbox(s, 0, "mvhd", 0, 0);
+
+	if (duration > UINT32_MAX || mux->creation_time > UINT32_MAX) {
+		s_wb64(s, mux->creation_time); // creation time
+		s_wb64(s, mux->creation_time); // modification time
+		s_wb32(s, 1000);               // timescale
+		s_wb64(s, duration);           // duration (0 for fragmented)
+	} else {
+		s_wb32(s, (uint32_t)mux->creation_time); // creation time
+		s_wb32(s, (uint32_t)mux->creation_time); // modification time
+		s_wb32(s, 1000);                         // timescale
+		s_wb32(s, (uint32_t)duration); // duration (0 for fragmented)
+	}
+
+	s_wb32(s, 0x00010000); // rate, 16.16 fixed float (1 << 16)
+	s_wb16(s, 0x0100);     // volume
+
+	s_wb16(s, 0); // reserved
+	s_wb32(s, 0); // reserved
+	s_wb32(s, 0); // reserved
+
+	// Matrix
+	for (int i = 0; i < 9; i++)
+		s_wb32(s, UNITY_MATRIX[i]);
+
+	// pre_defined
+	s_wb32(s, 0);
+	s_wb32(s, 0);
+	s_wb32(s, 0);
+	s_wb32(s, 0);
+	s_wb32(s, 0);
+	s_wb32(s, 0);
+
+	s_wb32(s, mux->track_ctr + 1); // next_track_ID
+
+	return write_box_size(s, start);
+}
+
+/// 8.3.2 Track Header Box
+static size_t mp4_write_tkhd(struct mp4_mux *mux, struct mp4_track *track)
+{
+	struct serializer *s = mux->serializer;
+	size_t start = serializer_get_pos(s);
+
+	uint64_t duration =
+		util_mul_div64(track->duration, 1000, track->timebase_den);
+
+	/* Flags are 0x1 (enabled) | 0x2 (in movie) */
+	static const uint32_t flags = 0x1 | 0x2;
+	write_fullbox(s, 0, "tkhd", 0, flags);
+
+	if (duration > UINT32_MAX || mux->creation_time > UINT32_MAX) {
+		s_wb64(s, mux->creation_time); // creation time
+		s_wb64(s, mux->creation_time); // modification time
+		s_wb32(s, track->track_id);    // track_id
+		s_wb32(s, 0);                  // reserved
+		s_wb64(s, duration);           // duration in movie timescale
+	} else {
+		s_wb32(s, (uint32_t)mux->creation_time); // creation time
+		s_wb32(s, (uint32_t)mux->creation_time); // modification time
+		s_wb32(s, track->track_id);              // track_id
+		s_wb32(s, 0);                            // reserved
+		s_wb32(s, (uint32_t)duration); // duration in movie timescale
+	}
+
+	s_wb32(s, 0);                                      // reserved
+	s_wb32(s, 0);                                      // reserved
+	s_wb16(s, 0);                                      // layer
+	s_wb16(s, track->type == TRACK_AUDIO ? 1 : 0);     // alternate group
+	s_wb16(s, track->type == TRACK_AUDIO ? 0x100 : 0); // volume
+	s_wb16(s, 0);                                      // reserved
+
+	// Matrix (predefined)
+	for (int i = 0; i < 9; i++)
+		s_wb32(s, UNITY_MATRIX[i]);
+
+	if (track->type == TRACK_AUDIO) {
+		s_wb32(s, 0); // width
+		s_wb32(s, 0); // height
+	} else {
+		/* width/height are fixed point 16.16, so we just shift the
+		 * integer to the upper 16 bits */
+		uint32_t width = obs_encoder_get_width(track->encoder);
+		s_wb32(s, width << 16);
+		uint32_t height = obs_encoder_get_height(track->encoder);
+		s_wb32(s, height << 16);
+	}
+
+	return write_box_size(s, start);
+}
+
+/// 8.4.2 Media Header Box
+static size_t mp4_write_mdhd(struct mp4_mux *mux, struct mp4_track *track)
+{
+	struct serializer *s = mux->serializer;
+
+	size_t size = 32;
+	uint8_t version = 0;
+	uint64_t duration = track->duration;
+	uint32_t timescale = track->timescale;
+
+	if (track->type == TRACK_VIDEO) {
+		/* Update to track timescale */
+		duration = util_mul_div64(duration, track->timescale,
+					  track->timebase_den);
+	}
+
+	/* use 64-bit duration if necessary */
+	if (duration > UINT32_MAX || mux->creation_time > UINT32_MAX) {
+		size = 44;
+		version = 1;
+	}
+
+	write_fullbox(s, size, "mdhd", version, 0);
+
+	if (version == 1) {
+		s_wb64(s, mux->creation_time); // creation time
+		s_wb64(s, mux->creation_time); // modification time
+		s_wb32(s, timescale);          // timescale
+		s_wb64(s, (uint32_t)duration); // duration
+	} else {
+		s_wb32(s, (uint32_t)mux->creation_time); // creation time
+		s_wb32(s, (uint32_t)mux->creation_time); // modification time
+		s_wb32(s, timescale);                    // timescale
+		s_wb32(s, (uint32_t)duration);           // duration
+	}
+
+	s_wb16(s, 21956); // language (undefined)
+	s_wb16(s, 0);     // pre_defined
+
+	return size;
+}
+
+/// 8.4.3 Handler Reference Box
+static size_t mp4_write_hdlr(struct mp4_mux *mux, struct mp4_track *track)
+{
+	struct serializer *s = mux->serializer;
+	int64_t start = serializer_get_pos(s);
+
+	write_fullbox(s, 0, "hdlr", 0, 0);
+
+	s_wb32(s, 0); // pre_defined
+
+	// handler_type
+	if (track->type == TRACK_VIDEO)
+		s_write(s, "vide", 4);
+	else if (track->type == TRACK_CHAPTERS)
+		s_write(s, "text", 4);
+	else
+		s_write(s, "soun", 4);
+
+	s_wb32(s, 0); // reserved
+	s_wb32(s, 0); // reserved
+	s_wb32(s, 0); // reserved
+
+	// name (utf-8 string, null terminated)
+	if (track->type == TRACK_VIDEO)
+		s_write(s, "OBS Video Handler", 18);
+	else if (track->type == TRACK_CHAPTERS)
+		s_write(s, "OBS Chapter Handler", 20);
+	else
+		s_write(s, "OBS Audio Handler", 18);
+
+	return write_box_size(s, start);
+}
+
+/// 12.1.2 Video media header
+static size_t mp4_write_vmhd(struct mp4_mux *mux)
+{
+	struct serializer *s = mux->serializer;
+
+	/* Flags is always 1 */
+	write_fullbox(s, 20, "vmhd", 0, 1);
+
+	s_wb16(s, 0); // graphicsmode
+	s_wb16(s, 0); // opcolor r
+	s_wb16(s, 0); // opcolor g
+	s_wb16(s, 0); // opcolor b
+
+	return 16;
+}
+
+/// 12.2.2 Sound media header
+static size_t mp4_write_smhd(struct mp4_mux *mux)
+{
+	struct serializer *s = mux->serializer;
+
+	write_fullbox(s, 16, "smhd", 0, 0);
+
+	s_wb16(s, 0); // balance
+	s_wb16(s, 0); // reserved
+
+	return 16;
+}
+
+/// (QTFF/Apple) Text media information atom
+static size_t mp4_write_qt_text(struct mp4_mux *mux)
+{
+	struct serializer *s = mux->serializer;
+	int64_t start = serializer_get_pos(s);
+
+	write_box(s, 0, "text");
+
+	/* Identity matrix, note that it's not fixed point 16.16 */
+	s_wb16(s, 0x01);
+	s_wb32(s, 0x00);
+	s_wb32(s, 0x00);
+	s_wb32(s, 0x00);
+	s_wb32(s, 0x01);
+	s_wb32(s, 0x00);
+	s_wb32(s, 0x00);
+	s_wb32(s, 0x00);
+	s_wb32(s, 0x00004000);
+	/* Seemingly undocumented */
+	s_wb16(s, 0x0000);
+
+	return write_box_size(s, start);
+}
+
+/// (QTFF/Apple) Base media info atom
+static size_t mp4_write_gmin(struct mp4_mux *mux)
+{
+	struct serializer *s = mux->serializer;
+	int64_t start = serializer_get_pos(s);
+
+	write_fullbox(s, 0, "gmin", 0, 0);
+
+	s_wb16(s, 0x40);   // graphics mode
+	s_wb16(s, 0x8000); // opColor r
+	s_wb16(s, 0x8000); // opColor g
+	s_wb16(s, 0x8000); // opColor b
+	s_wb16(s, 0);      // balance
+	s_wb16(s, 0);      // reserved
+
+	return write_box_size(s, start);
+}
+
+/// (QTFF/Apple) Base media information header atom
+static size_t mp4_write_gmhd(struct mp4_mux *mux)
+{
+	struct serializer *s = mux->serializer;
+	int64_t start = serializer_get_pos(s);
+
+	write_box(s, 0, "gmhd");
+
+	// gmin
+	mp4_write_gmin(mux);
+	// text (QuickTime)
+	mp4_write_qt_text(mux);
+
+	return write_box_size(s, start);
+}
+
+/// ISO/IEC 14496-15 5.4.2.1 AVCConfigurationBox
+static size_t mp4_write_avcC(struct mp4_mux *mux, obs_encoder_t *enc)
+{
+	struct serializer *s = mux->serializer;
+
+	/* For AVC this is the parsed extra data. */
+	uint8_t *header;
+	size_t size;
+
+	struct encoder_packet packet = {.type = OBS_ENCODER_VIDEO,
+					.timebase_den = 1,
+					.keyframe = true};
+
+	if (!obs_encoder_get_extra_data(enc, &header, &size))
+		return 0;
+
+	packet.size = obs_parse_avc_header(&packet.data, header, size);
+
+	size_t box_size = packet.size + 8;
+	write_box(s, box_size, "avcC");
+	s_write(s, packet.data, packet.size);
+
+	bfree(packet.data);
+	return box_size;
+}
+
+/// ISO/IEC 14496-15 8.4.1.1 HEVCConfigurationBox
+static size_t mp4_write_hvcC(struct mp4_mux *mux, obs_encoder_t *enc)
+{
+	struct serializer *s = mux->serializer;
+
+	/* For HEVC this is the parsed extra data. */
+	uint8_t *header;
+	size_t size;
+
+	struct encoder_packet packet = {.type = OBS_ENCODER_VIDEO,
+					.timebase_den = 1,
+					.keyframe = true};
+
+	if (!obs_encoder_get_extra_data(enc, &header, &size))
+		return 0;
+
+	packet.size = obs_parse_hevc_header(&packet.data, header, size);
+
+	size_t box_size = packet.size + 8;
+	write_box(s, box_size, "hvcC");
+	s_write(s, packet.data, packet.size);
+
+	bfree(packet.data);
+	return box_size;
+}
+
+/// AV1 ISOBMFF 2.3. AV1 Codec Configuration Box
+static size_t mp4_write_av1C(struct mp4_mux *mux, obs_encoder_t *enc)
+{
+	struct serializer *s = mux->serializer;
+
+	/* For AV1 this is just the parsed extra data. */
+	uint8_t *header;
+	size_t size;
+
+	struct encoder_packet packet = {.type = OBS_ENCODER_VIDEO,
+					.timebase_den = 1,
+					.keyframe = true};
+
+	if (!obs_encoder_get_extra_data(enc, &header, &size))
+		return 0;
+
+	packet.size = obs_parse_av1_header(&packet.data, header, size);
+
+	size_t box_size = packet.size + 8;
+	write_box(s, box_size, "av1C");
+	s_write(s, packet.data, packet.size);
+
+	bfree(packet.data);
+	return box_size;
+}
+
+/// 12.1.5 Colour information
+static size_t mp4_write_colr(struct mp4_mux *mux, obs_encoder_t *enc)
+{
+	UNUSED_PARAMETER(enc);
+	struct serializer *s = mux->serializer;
+
+	write_box(s, 19, "colr");
+
+	uint8_t full_range = 0;
+	uint16_t pri, trc, spc;
+	pri = trc = spc = 0;
+	get_colour_information(enc, &pri, &trc, &spc, &full_range);
+
+	s_write(s, "nclx", 4);    // colour_type
+	s_wb16(s, pri);           // colour_primaries
+	s_wb16(s, trc);           // transfer_characteristics
+	s_wb16(s, spc);           // matrix_coefficiencts
+	s_w8(s, full_range << 7); // full range flag + 7 reserved bits (0)
+
+	return 19;
+}
+
+/// 12.1.4 Pixel Aspect Ratio
+static size_t mp4_write_pasp(struct mp4_mux *mux)
+{
+	struct serializer *s = mux->serializer;
+
+	write_box(s, 16, "pasp");
+
+	s_wb32(s, 1); // hSpacing
+	s_wb32(s, 1); // vSpacing
+
+	return 16;
+}
+
+/// 12.1.3 Visual Sample Entry
+static inline void mp4_write_visual_sample_entry(struct mp4_mux *mux,
+						 obs_encoder_t *enc)
+{
+	struct serializer *s = mux->serializer;
+
+	// SampleEntry Box
+	s_w8(s, 0); // reserved
+	s_w8(s, 0);
+	s_w8(s, 0);
+	s_w8(s, 0);
+	s_w8(s, 0);
+	s_w8(s, 0);
+
+	s_wb16(s, 1); // data_reference_index
+
+	// VisualSampleEntry Box
+	s_wb16(s, 0); // pre_defined
+	s_wb16(s, 0); // reserved
+	s_wb32(s, 0); // pre_defined
+	s_wb32(s, 0); // pre_defined
+	s_wb32(s, 0); // pre_defined
+
+	s_wb16(s, (uint16_t)obs_encoder_get_width(enc));  // width
+	s_wb16(s, (uint16_t)obs_encoder_get_height(enc)); // height
+
+	s_wb32(s, 0x00480000); // horizresolution (predefined)
+	s_wb32(s, 0x00480000); // vertresolution (predefined)
+
+	s_wb32(s, 0); // reserved
+	s_wb16(s, 1); // frame_count
+
+	/* Name is fixed 32-bytes and needs to be padded to that length.
+	 * First byte is the length, rest is a string sans NULL terminator. */
+	char compressor_name[32] = {0};
+	const char *enc_id = obs_encoder_get_id(enc);
+	if (enc_id) {
+		size_t len = strlen(enc_id);
+		if (len > 31)
+			len = 31;
+
+		compressor_name[0] = (char)len;
+		memcpy(compressor_name + 1, enc_id, len);
+	}
+	s_write(s, compressor_name, sizeof(compressor_name)); // compressorname
+
+	s_wb16(s, 0x0018); // depth
+	s_wb16(s, -1);     // pre_defined
+}
+
+/// 12.1.6 Content light level
+static size_t mp4_write_clli(struct mp4_mux *mux, obs_encoder_t *enc)
+{
+	struct serializer *s = mux->serializer;
+
+	video_t *video = obs_encoder_video(enc);
+	const struct video_output_info *info = video_output_get_info(video);
+
+	/* Only write box for HDR video */
+	if (info->colorspace != VIDEO_CS_2100_PQ &&
+	    info->colorspace != VIDEO_CS_2100_HLG)
+		return 0;
+
+	write_box(s, 12, "clli");
+
+	float nominal_peak = obs_get_video_hdr_nominal_peak_level();
+
+	s_wb16(s, (uint16_t)nominal_peak); // max_content_light_level
+	s_wb16(s, (uint16_t)nominal_peak); // max_pic_average_light_level
+
+	return 12;
+}
+
+/// 12.1.7 Mastering display colour volume
+static size_t mp4_write_mdcv(struct mp4_mux *mux, obs_encoder_t *enc)
+{
+	struct serializer *s = mux->serializer;
+
+	video_t *video = obs_encoder_video(enc);
+	const struct video_output_info *info = video_output_get_info(video);
+
+	// Only write atom for HDR video
+	if (info->colorspace != VIDEO_CS_2100_PQ &&
+	    info->colorspace != VIDEO_CS_2100_HLG)
+		return 0;
+
+	write_box(s, 32, "mdcv");
+
+	float nominal_peak = obs_get_video_hdr_nominal_peak_level();
+	uint32_t max_lum = (uint32_t)nominal_peak * 10000;
+
+	/* Note that these values are hardcoded everywhere in OBS, so these are
+	 * just the same as used in our other muxers/encoders. */
+
+	// 3 x display_primaries (x, y) pairs
+	s_wb16(s, 13250);
+	s_wb16(s, 34500);
+	s_wb16(s, 7500);
+	s_wb16(s, 3000);
+	s_wb16(s, 34000);
+	s_wb16(s, 16000);
+
+	s_wb16(s, 15635);   // white_point_x
+	s_wb16(s, 16450);   // white_point_y
+	s_wb32(s, max_lum); // max_display_mastering_luminance
+	s_wb32(s, 0);       // min_display_mastering_luminance
+
+	return 32;
+}
+
+/// ISO/IEC 14496-15 5.4.2.1 AVCSampleEntry
+static size_t mp4_write_avc1(struct mp4_mux *mux, obs_encoder_t *enc)
+{
+	struct serializer *s = mux->serializer;
+	int64_t start = serializer_get_pos(s);
+
+	write_box(s, 0, "avc1");
+
+	mp4_write_visual_sample_entry(mux, enc);
+
+	// avcC
+	mp4_write_avcC(mux, enc);
+
+	// colr
+	mp4_write_colr(mux, enc);
+
+	// pasp
+	mp4_write_pasp(mux);
+
+	return write_box_size(s, start);
+}
+
+/// ISO/IEC 14496-15 8.4.1.1 HEVCSampleEntry
+static size_t mp4_write_hvc1(struct mp4_mux *mux, obs_encoder_t *enc)
+{
+	struct serializer *s = mux->serializer;
+	int64_t start = serializer_get_pos(s);
+
+	write_box(s, 0, "hvc1");
+
+	mp4_write_visual_sample_entry(mux, enc);
+
+	// avcC
+	mp4_write_hvcC(mux, enc);
+
+	// colr
+	mp4_write_colr(mux, enc);
+
+	// clli
+	mp4_write_clli(mux, enc);
+
+	// mdcv
+	mp4_write_mdcv(mux, enc);
+
+	// pasp
+	mp4_write_pasp(mux);
+
+	return write_box_size(s, start);
+}
+
+/// AV1 ISOBMFF 2.2. AV1 Sample Entry
+static size_t mp4_write_av01(struct mp4_mux *mux, obs_encoder_t *enc)
+{
+	struct serializer *s = mux->serializer;
+	int64_t start = serializer_get_pos(s);
+
+	write_box(s, 0, "av01");
+
+	mp4_write_visual_sample_entry(mux, enc);
+
+	// avcC
+	mp4_write_av1C(mux, enc);
+
+	// colr
+	mp4_write_colr(mux, enc);
+
+	// clli
+	mp4_write_clli(mux, enc);
+
+	// mdcv
+	mp4_write_mdcv(mux, enc);
+
+	// pasp
+	mp4_write_pasp(mux);
+
+	return write_box_size(s, start);
+}
+
+static inline void put_descr(struct serializer *s, uint8_t tag, size_t size)
+{
+	int i = 3;
+	s_w8(s, tag);
+	for (; i > 0; i--)
+		s_w8(s, (uint8_t)((size >> (7 * i)) | 0x80));
+	s_w8(s, size & 0x7F);
+}
+
+/// ISO/IEC 14496-14 5.6 ESDBox
+static size_t mp4_write_esds(struct mp4_mux *mux, struct mp4_track *track)
+{
+	struct serializer *s = mux->serializer;
+	int64_t start = serializer_get_pos(s);
+
+	write_fullbox(s, 0, "esds", 0, 0);
+
+	/* Encoder extradata will be used as DecoderSpecificInfo  */
+	uint8_t *extradata;
+	size_t extradata_size;
+	if (!obs_encoder_get_extra_data(track->encoder, &extradata,
+					&extradata_size)) {
+		extradata_size = 0;
+	}
+
+	/// ISO/IEC 14496-1
+
+	// ES_Descriptor
+	size_t decoder_specific_info_len = extradata_size ? extradata_size + 5
+							  : 0;
+
+	put_descr(s, 0x03, 3 + 5 + 13 + decoder_specific_info_len + 5 + 1);
+	s_wb16(s, track->track_id);
+	s_w8(s, 0x00); // flags
+
+	// DecoderConfigDescriptor
+	put_descr(s, 0x04, 13 + decoder_specific_info_len);
+	s_w8(s, 0x40); // codec tag, 0x40 = AAC
+	s_w8(s, 0x15); // stream type field (0x15 = audio stream)
+
+	/* When writing the final MOOV this could theoretically be calculated
+	 * based on chunks, but it's not really all that important. */
+	uint32_t bitrate = 0;
+	obs_data_t *settings = obs_encoder_get_settings(track->encoder);
+	if (settings) {
+		int64_t enc_bitrate = obs_data_get_int(settings, "bitrate");
+		if (enc_bitrate)
+			bitrate = (uint32_t)(enc_bitrate * 1000);
+
+		obs_data_release(settings);
+	}
+
+	s_wb24(s, 0);       // bufferSizeDB (in bytes)
+	s_wb32(s, bitrate); // maxbitrate
+	s_wb32(s, bitrate); // avgBitrate
+
+	// DecoderSpecificInfo
+	if (extradata_size) {
+		put_descr(s, 0x05, extradata_size);
+		s_write(s, extradata, extradata_size);
+	}
+
+	// SLConfigDescriptor descriptor
+	put_descr(s, 0x06, 1);
+	s_w8(s, 0x02); // 0x2 = reserved for MP4, descriptor is empty
+
+	return write_box_size(s, start);
+}
+
+/// 12.2.3 Audio Sample Entry
+static inline void mp4_write_audio_sample_entry(struct mp4_mux *mux,
+						struct mp4_track *track,
+						uint8_t version)
+{
+	struct serializer *s = mux->serializer;
+
+	// SampleEntry Box
+	s_w8(s, 0); // reserved
+	s_w8(s, 0);
+	s_w8(s, 0);
+	s_w8(s, 0);
+	s_w8(s, 0);
+	s_w8(s, 0);
+
+	s_wb16(s, 1); // data_reference_index
+
+	// AudioSampleEntry Box
+	if (version == 1) {
+		s_wb16(s, 1); // entry_version
+		s_wb16(s, 0); // reserved
+		s_wb16(s, 0); // reserved
+		s_wb16(s, 0); // reserved
+	} else {
+		s_wb32(s, 0); // reserved
+		s_wb32(s, 0); // reserved
+	}
+
+	audio_t *audio = obs_encoder_audio(track->encoder);
+	size_t channels = audio_output_get_channels(audio);
+	uint32_t sample_rate = track->timescale;
+	bool alac = track->codec == CODEC_ALAC;
+
+	s_wb16(s, (uint32_t)channels); // channelcount
+
+	/* OBS FLAC is currently always 16 bit, ALAC always 24, this may change
+	 * in the futrure and should be handled differently then.
+	 * That being said thoes codecs are self-describing so in most cases it
+	 * shouldn't matter either way. */
+	s_wb16(s, alac ? 24 : 16); // samplesize
+
+	s_wb16(s, 0); // pre_defined
+	s_wb16(s, 0); // reserved
+
+	s_wb32(s, sample_rate << 16); // samplerate
+}
+
+/// 12.2.4 Channel layout
+static size_t mp4_write_chnl(struct mp4_mux *mux, struct mp4_track *track)
+{
+	struct serializer *s = mux->serializer;
+	int64_t start = serializer_get_pos(s);
+
+	write_fullbox(s, 0, "chnl", 0, 0);
+
+	audio_t *audio = obs_encoder_audio(track->encoder);
+	const struct audio_output_info *info = audio_output_get_info(audio);
+
+	s_w8(s, 1); // stream_structure (1 = channels)
+
+	/* 5.1 and 4.1 do not have a corresponding ISO layout, so we have to
+	 * write a manually created channel map for those. */
+	uint8_t map[8] = {0};
+	uint8_t items = 0;
+	uint8_t defined_layout = 0;
+
+	get_speaker_positions(info->speakers, map, &items, &defined_layout);
+
+	if (!defined_layout) {
+		warn("No ISO layout available for speaker layout %d, "
+		     "this may not be supported by all applications!",
+		     info->speakers);
+		s_w8(s, 0);             // definedLayout
+		s_write(s, map, items); // uint8_t speaker_position[count]
+	} else {
+		s_w8(s, defined_layout); // definedLayout
+		s_wb64(s, 0);            // ommitedChannelMap
+	}
+
+	return write_box_size(s, start);
+}
+
+/// ISO/IEC 14496-14 5.6 MP4AudioSampleEntry
+static size_t mp4_write_mp4a(struct mp4_mux *mux, struct mp4_track *track,
+			     uint8_t version)
+{
+	struct serializer *s = mux->serializer;
+	int64_t start = serializer_get_pos(s);
+
+	write_box(s, 0, "mp4a");
+
+	mp4_write_audio_sample_entry(mux, track, version);
+
+	// esds
+	mp4_write_esds(mux, track);
+
+	/* Write channel layout for version 1 sample entires */
+	if (version == 1)
+		mp4_write_chnl(mux, track);
+
+	return write_box_size(s, start);
+}
+
+/// Encapsulation of FLAC in ISO Base Media File Format 3.3.2 FLAC Specific Box
+static size_t mp4_write_dfLa(struct mp4_mux *mux, struct mp4_track *track)
+{
+	struct serializer *s = mux->serializer;
+	int64_t start = serializer_get_pos(s);
+
+	uint8_t *extradata;
+	size_t extradata_size;
+
+	if (!obs_encoder_get_extra_data(track->encoder, &extradata,
+					&extradata_size))
+		return 0;
+
+	write_fullbox(s, 0, "dfLa", 0, 0);
+
+	/// FLACMetadataBlock
+
+	// LastMetadataBlockFlag (1) | BlockType (0)
+	s_w8(s, 1 << 7 | 0);
+	// Length
+	s_wb24(s, (uint32_t)extradata_size);
+	// BlockData[Length]
+	s_write(s, extradata, extradata_size);
+
+	return write_box_size(s, start);
+}
+
+/// Encapsulation of FLAC in ISO Base Media File Format 3.3.1 FLACSampleEntry
+static size_t mp4_write_fLaC(struct mp4_mux *mux, struct mp4_track *track,
+			     uint8_t version)
+{
+	struct serializer *s = mux->serializer;
+	int64_t start = serializer_get_pos(s);
+
+	write_box(s, 0, "fLaC");
+
+	mp4_write_audio_sample_entry(mux, track, version);
+
+	// dfLa
+	mp4_write_dfLa(mux, track);
+
+	if (version == 1)
+		mp4_write_chnl(mux, track);
+
+	return write_box_size(s, start);
+}
+
+/// Apple Lossless Format "Magic Cookie" Description - MP4/M4A File
+static size_t mp4_write_alac(struct mp4_mux *mux, struct mp4_track *track,
+			     uint8_t version)
+{
+	struct serializer *s = mux->serializer;
+	int64_t start = serializer_get_pos(s);
+
+	uint8_t *extradata;
+	size_t extradata_size;
+
+	if (!obs_encoder_get_extra_data(track->encoder, &extradata,
+					&extradata_size))
+		return 0;
+
+	write_box(s, 0, "alac");
+
+	mp4_write_audio_sample_entry(mux, track, version);
+
+	/* Apple Lossless Magic Cookie */
+	s_write(s, extradata, extradata_size);
+
+	if (version == 1)
+		mp4_write_chnl(mux, track);
+
+	return write_box_size(s, start);
+}
+
+/// ISO/IEC 23003-5 5.1 PCM configuration
+static size_t mp4_write_pcmc(struct mp4_mux *mux, struct mp4_track *track)
+{
+	struct serializer *s = mux->serializer;
+	int64_t start = serializer_get_pos(s);
+
+	write_fullbox(s, 0, "pcmC", 0, 0);
+
+	s_w8(s, 1); // endianness, 1 = little endian
+
+	// bits per sample
+	if (track->codec == CODEC_PCM_I16)
+		s_w8(s, 16);
+	else if (track->codec == CODEC_PCM_I24)
+		s_w8(s, 24);
+	else if (track->codec == CODEC_PCM_F32)
+		s_w8(s, 32);
+
+	return write_box_size(s, start);
+}
+
+/// ISO/IEC 23003-5 5.1 PCM configuration
+static size_t mp4_write_xpcm(struct mp4_mux *mux, struct mp4_track *track,
+			     uint8_t version)
+{
+	struct serializer *s = mux->serializer;
+	int64_t start = serializer_get_pos(s);
+
+	/* Different box types for floating point and integer PCM*/
+	write_box(s, 0, track->codec == CODEC_PCM_F32 ? "fpcm" : "ipcm");
+
+	mp4_write_audio_sample_entry(mux, track, version);
+
+	/* ChannelLayout (chnl) is required for PCM */
+	mp4_write_chnl(mux, track);
+
+	// pcmc
+	mp4_write_pcmc(mux, track);
+
+	return write_box_size(s, start);
+}
+
+/// (QTFF/Apple) Text sample description
+static size_t mp4_write_text(struct mp4_mux *mux)
+{
+	struct serializer *s = mux->serializer;
+	int64_t start = serializer_get_pos(s);
+
+	write_fullbox(s, 0, "text", 0, 0);
+
+	s_wb32(s, 1); // number of entries
+
+	/* Preset sample description as used by FFmpeg. */
+	s_write(s, &TEXT_STUB_HEADER, sizeof(TEXT_STUB_HEADER));
+
+	return write_box_size(s, start);
+}
+
+static inline uint32_t rl32(const uint8_t *ptr)
+{
+	return (ptr[3] << 24) + (ptr[2] << 16) + (ptr[1] << 8) + ptr[0];
+}
+
+static inline uint16_t rl16(const uint8_t *ptr)
+{
+	return (ptr[1] << 8) + ptr[0];
+}
+
+/// Encapsulation of Opus in ISO Base Media File Format 4.3.2 Opus Specific Box
+static size_t mp4_write_dOps(struct mp4_mux *mux, struct mp4_track *track)
+{
+	struct serializer *s = mux->serializer;
+	int64_t start = serializer_get_pos(s);
+
+	uint8_t *extradata;
+	size_t extradata_size;
+
+	if (!obs_encoder_get_extra_data(track->encoder, &extradata,
+					&extradata_size))
+		return 0;
+
+	write_box(s, 0, "dOps");
+	s_w8(s, 0); // version
+
+	uint8_t channels = *(extradata + 9);
+	uint8_t channel_map = *(extradata + 18);
+
+	s_w8(s, channels); // channel count
+	// OpusHead is little-endian, but MP4 is big-endian, so we have to swap them here
+	s_wb16(s, rl16(extradata + 10)); // pre-skip
+	s_wb32(s, rl32(extradata + 12)); // input sample rate
+	s_wb16(s, rl16(extradata + 16)); // output gain
+	s_w8(s, channel_map);            // channel mapping family
+
+	if (channel_map)
+		s_write(s, extradata + 19, 2 + channels);
+
+	return write_box_size(s, start);
+}
+
+/// Encapsulation of Opus in ISO Base Media File Format 4.3.1 Sample entry format
+static size_t mp4_write_Opus(struct mp4_mux *mux, struct mp4_track *track,
+			     uint8_t version)
+{
+	struct serializer *s = mux->serializer;
+	int64_t start = serializer_get_pos(s);
+
+	write_box(s, 0, "Opus");
+
+	mp4_write_audio_sample_entry(mux, track, version);
+
+	// dOps
+	mp4_write_dOps(mux, track);
+
+	if (version == 1)
+		mp4_write_chnl(mux, track);
+
+	return write_box_size(s, start);
+}
+
+/// 8.5.2 Sample Description Box
+static size_t mp4_write_stsd(struct mp4_mux *mux, struct mp4_track *track)
+{
+	struct serializer *s = mux->serializer;
+	int64_t start = serializer_get_pos(s);
+
+	/* Anything but mono or stereo technically requires v1,
+	 * but in practice that doesn't appear to matter. */
+	uint8_t version = 0;
+
+	if (track->type == TRACK_AUDIO) {
+		audio_t *audio = obs_encoder_audio(track->encoder);
+		version = audio_output_get_channels(audio) > 2 ? 1 : 0;
+	}
+
+	write_fullbox(s, 0, "stsd", version, 0);
+
+	s_wb32(s, 1); // entry_count
+
+	// codec specific boxes
+	if (track->type == TRACK_VIDEO) {
+		if (track->codec == CODEC_H264)
+			mp4_write_avc1(mux, track->encoder);
+		else if (track->codec == CODEC_HEVC)
+			mp4_write_hvc1(mux, track->encoder);
+		else if (track->codec == CODEC_AV1)
+			mp4_write_av01(mux, track->encoder);
+	} else if (track->type == TRACK_AUDIO) {
+		if (track->codec == CODEC_AAC)
+			mp4_write_mp4a(mux, track, version);
+		else if (track->codec == CODEC_OPUS)
+			mp4_write_Opus(mux, track, version);
+		else if (track->codec == CODEC_FLAC)
+			mp4_write_fLaC(mux, track, version);
+		else if (track->codec == CODEC_ALAC)
+			mp4_write_alac(mux, track, version);
+		else if (track->codec == CODEC_PCM_I16 ||
+			 track->codec == CODEC_PCM_I24 ||
+			 track->codec == CODEC_PCM_F32)
+			mp4_write_xpcm(mux, track, version);
+	} else if (track->type == TRACK_CHAPTERS) {
+		mp4_write_text(mux);
+	}
+
+	return write_box_size(s, start);
+}
+
+/// 8.6.1.2 Decoding Time to Sample Box
+static size_t mp4_write_stts(struct mp4_mux *mux, struct mp4_track *track,
+			     bool fragmented)
+{
+	struct serializer *s = mux->serializer;
+
+	if (fragmented) {
+		write_fullbox(s, 16, "stts", 0, 0);
+		s_wb32(s, 0); // entry_count
+		return 16;
+	}
+
+	int64_t start = serializer_get_pos(s);
+	struct sample_delta *arr = track->deltas.array;
+	size_t num = track->deltas.num;
+
+	write_fullbox(s, 0, "stts", 0, 0);
+
+	s_wb32(s, (uint32_t)num); // entry_count
+
+	for (size_t idx = 0; idx < num; idx++) {
+		struct sample_delta *smp = &arr[idx];
+
+		uint64_t delta = util_mul_div64(smp->delta, track->timescale,
+						track->timebase_den);
+
+		s_wb32(s, smp->count);      // sample_count
+		s_wb32(s, (uint32_t)delta); // sample_delta
+	}
+
+	return write_box_size(s, start);
+}
+
+/// 8.6.2 Sync Sample Box
+static size_t mp4_write_stss(struct mp4_mux *mux, struct mp4_track *track)
+{
+	struct serializer *s = mux->serializer;
+	uint32_t num = (uint32_t)track->sync_samples.num;
+
+	if (!num)
+		return 0;
+
+	/* 16 byte FullBox header + 4-bytes (u32) per sync sample */
+	uint32_t size = 16 + 4 * num;
+
+	write_fullbox(s, size, "stss", 0, 0);
+	s_wb32(s, num); // entry_count
+
+	for (size_t idx = 0; idx < num; idx++)
+		s_wb32(s, track->sync_samples.array[idx]); // sample_number
+
+	return size;
+}
+
+/// 8.6.1.3 Composition Time to Sample Box
+static size_t mp4_write_ctts(struct mp4_mux *mux, struct mp4_track *track)
+{
+	struct serializer *s = mux->serializer;
+	uint32_t num = (uint32_t)track->offsets.num;
+
+	uint8_t version = mux->flags & MP4_USE_NEGATIVE_CTS ? 1 : 0;
+
+	/* 16 byte FullBox header + 8-bytes (u32+u32/i32) per offset entry */
+	uint32_t size = 16 + 8 * num;
+	write_fullbox(s, size, "ctts", version, 0);
+
+	s_wb32(s, num); // entry_count
+
+	for (size_t idx = 0; idx < num; idx++) {
+		int64_t offset = (int64_t)track->offsets.array[idx].offset *
+				 (int64_t)track->timescale /
+				 (int64_t)track->timebase_den;
+
+		s_wb32(s, track->offsets.array[idx].count); // sample_count
+		s_wb32(s, (uint32_t)offset);                // sample_offset
+	}
+
+	return size;
+}
+
+/// 8.7.4 Sample To Chunk Box
+static size_t mp4_write_stsc(struct mp4_mux *mux, struct mp4_track *track,
+			     bool fragmented)
+{
+	struct serializer *s = mux->serializer;
+
+	if (fragmented) {
+		write_fullbox(s, 16, "stsc", 0, 0);
+		s_wb32(s, 0); // entry_count
+		return 16;
+	}
+
+	struct chunk *arr = track->chunks.array;
+	size_t arr_num = track->chunks.num;
+
+	/* Compress into array with counter for repeating chunk sizes */
+	DARRAY(struct chunk_run {
+		uint32_t first;
+		uint32_t samples;
+	}) chunk_runs;
+
+	da_init(chunk_runs);
+
+	for (size_t idx = 0; idx < arr_num; idx++) {
+		struct chunk *chk = &arr[idx];
+
+		if (!chunk_runs.num ||
+		    chunk_runs.array[chunk_runs.num - 1].samples !=
+			    chk->samples) {
+			struct chunk_run *cr = da_push_back_new(chunk_runs);
+			cr->samples = chk->samples;
+			cr->first = (uint32_t)idx + 1; // ISO-BMFF is 1-indexed
+		}
+	}
+
+	uint32_t num = (uint32_t)chunk_runs.num;
+
+	/* 16 byte FullBox header + 12-bytes (u32+u32+u32) per chunk run */
+	uint32_t size = 16 + 12 * num;
+	write_fullbox(s, size, "stsc", 0, 0);
+
+	s_wb32(s, num); // entry_count
+
+	for (size_t idx = 0; idx < num; idx++) {
+		struct chunk_run *cr = &chunk_runs.array[idx];
+		s_wb32(s, cr->first);   // first_chunk
+		s_wb32(s, cr->samples); // samples_per_chunk
+		s_wb32(s, 1);           // sample_description_index
+	}
+
+	da_free(chunk_runs);
+
+	return size;
+}
+
+/// 8.7.3 Sample Size Boxes
+static size_t mp4_write_stsz(struct mp4_mux *mux, struct mp4_track *track,
+			     bool fragmented)
+{
+	struct serializer *s = mux->serializer;
+
+	if (fragmented) {
+		write_fullbox(s, 20, "stsz", 0, 0);
+		s_wb32(s, 0); // sample_size
+		s_wb32(s, 0); // sample_count
+
+		return 20;
+	}
+
+	int64_t start = serializer_get_pos(s);
+
+	/* This should only ever happen when recording > 24 hours of
+	 * 48 kHz PCM audio or 828 days of 60 FPS video. */
+	if (track->samples > UINT32_MAX) {
+		warn("Track %u has too many samples, its duration may not be "
+		     "read correctly. Remuxing the file to another format such "
+		     "as MKV may be required.",
+		     track->track_id);
+	}
+
+	write_fullbox(s, 0, "stsz", 0, 0);
+
+	if (track->sample_size) {
+		/* Fixed size samples mean we don't need an array */
+		s_wb32(s, track->sample_size);       // sample_size
+		s_wb32(s, (uint32_t)track->samples); // sample_count
+	} else {
+		s_wb32(s, 0);                                 // sample_size
+		s_wb32(s, (uint32_t)track->sample_sizes.num); // sample_count
+
+		for (size_t idx = 0; idx < track->sample_sizes.num; idx++) {
+			s_wb32(s, track->sample_sizes.array[idx]); // entry_size
+		}
+	}
+
+	return write_box_size(s, start);
+}
+
+/// 8.7.5 Chunk Offset Box
+static size_t mp4_write_stco(struct mp4_mux *mux, struct mp4_track *track,
+			     bool fragmented)
+{
+	struct serializer *s = mux->serializer;
+
+	if (fragmented) {
+		write_fullbox(s, 16, "stco", 0, 0);
+		s_wb32(s, 0); // entry_count
+		return 16;
+	}
+
+	struct chunk *arr = track->chunks.array;
+	uint32_t num = (uint32_t)track->chunks.num;
+
+	uint64_t last_off = arr[num - 1].offset;
+	uint32_t size;
+	bool co64 = last_off > UINT32_MAX;
+
+	/* When using 64-bit offsets we write 8-bytes (u64) per chunk,
+	 * otherwise 4-bytes (u32). */
+	if (co64) {
+		size = 16 + 8 * num;
+		write_fullbox(s, size, "co64", 0, 0);
+	} else {
+		size = 16 + 4 * num;
+		write_fullbox(s, size, "stco", 0, 0);
+	}
+
+	s_wb32(s, num); // entry_count
+
+	for (size_t idx = 0; idx < num; idx++) {
+		if (co64)
+			s_wb64(s, arr[idx].offset); // chunk_offset
+		else
+			s_wb32(s, (uint32_t)arr[idx].offset); // chunk_offset
+	}
+
+	return size;
+}
+
+/// 8.9.3 Sample Group Description Box
+static size_t mp4_write_sgpd_aac(struct mp4_mux *mux)
+{
+	struct serializer *s = mux->serializer;
+
+	int64_t start = serializer_get_pos(s);
+	write_fullbox(s, 0, "sgpd", 1, 0);
+
+	s_write(s, "roll", 4); // grouping_tpye
+	s_wb32(s, 2);          // default_length (i16)
+
+	s_wb32(s, 1); // entry_count
+
+	// AudioRollRecoveryEntry
+	s_wb16(s, -1); // roll_distance
+
+	return write_box_size(s, start);
+}
+
+/// 8.9.2 Sample to Group Box
+static size_t mp4_write_sbgp_aac(struct mp4_mux *mux, struct mp4_track *track)
+{
+	struct serializer *s = mux->serializer;
+
+	int64_t start = serializer_get_pos(s);
+	write_fullbox(s, 0, "sbgp", 0, 0);
+
+	/// 10.1 AudioRollRecoveryEntry
+	s_write(s, "roll", 4); // grouping_tpye
+
+	s_wb32(s, 1); // entry_count
+
+	s_wb32(s, (uint32_t)track->samples); // sample_count
+	s_wb32(s, 1);                        // group_description_index
+
+	return write_box_size(s, start);
+}
+
+static size_t mp4_write_sbgp_sbgp_opus(struct mp4_mux *mux,
+				       struct mp4_track *track)
+{
+	struct serializer *s = mux->serializer;
+	int64_t start = serializer_get_pos(s);
+
+	/// 8.9.3 Sample Group Description Box
+	write_fullbox(s, 0, "sgpd", 1, 0);
+
+	s_write(s, "roll", 4); // grouping_tpye
+	s_wb32(s, 2);          // default_length (i16)
+
+	/* Opus requires 80 ms of preroll, which at 48 kHz is 3840 PCM samples */
+	const int64_t opus_preroll = 3840;
+
+	/* Compute the preroll samples (should be 4, each being 20 ms) */
+	uint16_t preroll_count = 0;
+	int64_t preroll_remaining = opus_preroll;
+
+	for (size_t i = 0; i < track->deltas.num && preroll_remaining > 0;
+	     i++) {
+		for (uint32_t j = 0;
+		     j < track->deltas.array[i].count && preroll_remaining > 0;
+		     j++) {
+			preroll_remaining -= track->deltas.array[i].delta;
+			preroll_count++;
+		}
+	}
+
+	s_wb32(s, 1); // entry_count
+	/// 10.1 AudioRollRecoveryEntry
+	s_wb16(s, -preroll_count); // roll_distance
+
+	size_t size_sgpd = write_box_size(s, start);
+
+	/* --------------- */
+
+	/// 8.9.2 Sample to Group Box
+	start = serializer_get_pos(s);
+	write_fullbox(s, 0, "sbgp", 0, 0);
+
+	s_write(s, "roll", 4); // grouping_tpye
+	s_wb32(s, 2);          // entry_count
+
+	// entry 0
+	s_wb32(s, preroll_count); // sample_count
+	s_wb32(s, 0);             // group_description_index
+	// entry 1
+	s_wb32(s, (uint32_t)track->samples - preroll_count); // sample_count
+	s_wb32(s, 1); // group_description_index
+
+	return size_sgpd + write_box_size(s, start);
+}
+
+/// 8.5.1 Sample Table Box
+static size_t mp4_write_stbl(struct mp4_mux *mux, struct mp4_track *track,
+			     bool fragmented)
+{
+	struct serializer *s = mux->serializer;
+
+	int64_t start = serializer_get_pos(s);
+
+	write_box(s, 0, "stbl");
+
+	// stsd
+	mp4_write_stsd(mux, track);
+
+	// stts
+	mp4_write_stts(mux, track, fragmented);
+
+	// stss (non-fragmented only)
+	if (track->type == TRACK_VIDEO && !fragmented)
+		mp4_write_stss(mux, track);
+
+	// ctts (non-fragmented only)
+	if (track->needs_ctts && !fragmented)
+		mp4_write_ctts(mux, track);
+
+	// stsc
+	mp4_write_stsc(mux, track, fragmented);
+
+	// stsz
+	mp4_write_stsz(mux, track, fragmented);
+
+	// stco
+	mp4_write_stco(mux, track, fragmented);
+
+	if (!fragmented) {
+		/* AAC and Opus require a pre-roll to get correct decoder
+		 * output, sgpd and sbgp are used to create a "roll" group. */
+		if (track->codec == CODEC_AAC) {
+			// sgpd
+			mp4_write_sgpd_aac(mux);
+			// sbgp
+			mp4_write_sbgp_aac(mux, track);
+		} else if (track->codec == CODEC_OPUS) {
+			// sgpd + sbgp
+			mp4_write_sbgp_sbgp_opus(mux, track);
+		}
+	}
+
+	return write_box_size(s, start);
+}
+
+/// 8.7.2.2 DataEntryUrlBox
+static size_t mp4_write_url(struct mp4_mux *mux)
+{
+	struct serializer *s = mux->serializer;
+	int64_t start = serializer_get_pos(s);
+
+	write_fullbox(s, 0, "url ", 0, 1);
+
+	/* empty, flag 1 means data is in this file */
+
+	return write_box_size(s, start);
+}
+
+/// 8.7.2 Data Reference Box
+static size_t mp4_write_dref(struct mp4_mux *mux)
+{
+	struct serializer *s = mux->serializer;
+	int64_t start = serializer_get_pos(s);
+
+	write_fullbox(s, 0, "dref ", 0, 0);
+
+	s_wb32(s, 1); // entry_count
+
+	mp4_write_url(mux);
+
+	return write_box_size(s, start);
+}
+
+/// 8.7.1 Data Information Box
+static size_t mp4_write_dinf(struct mp4_mux *mux)
+{
+	struct serializer *s = mux->serializer;
+	int64_t start = serializer_get_pos(s);
+
+	write_box(s, 0, "dinf");
+
+	mp4_write_dref(mux);
+
+	return write_box_size(s, start);
+}
+
+/// 8.4.4 Media Information Box
+static size_t mp4_write_minf(struct mp4_mux *mux, struct mp4_track *track,
+			     bool fragmented)
+{
+	struct serializer *s = mux->serializer;
+	int64_t start = serializer_get_pos(s);
+
+	write_box(s, 0, "minf");
+
+	// vmhd/smhd/gmhd
+	if (track->type == TRACK_VIDEO)
+		mp4_write_vmhd(mux);
+	else if (track->type == TRACK_CHAPTERS)
+		mp4_write_gmhd(mux);
+	else
+		mp4_write_smhd(mux);
+
+	// dinf, unnecessary but mandatory
+	mp4_write_dinf(mux);
+
+	// stbl
+	mp4_write_stbl(mux, track, fragmented);
+
+	return write_box_size(s, start);
+}
+
+/// 8.4.1 Media Box
+static size_t mp4_write_mdia(struct mp4_mux *mux, struct mp4_track *track,
+			     bool fragmented)
+{
+	struct serializer *s = mux->serializer;
+	int64_t start = serializer_get_pos(s);
+
+	write_box(s, 0, "mdia");
+
+	// mdhd
+	mp4_write_mdhd(mux, track);
+
+	// hdlr
+	mp4_write_hdlr(mux, track);
+
+	// minf
+	mp4_write_minf(mux, track, fragmented);
+
+	return write_box_size(s, start);
+}
+
+/// (QTFF/Apple) User data atom
+static size_t mp4_write_udta_atom(struct mp4_mux *mux, const char tag[4],
+				  const char *val)
+{
+	struct serializer *s = mux->serializer;
+	int64_t start = serializer_get_pos(s);
+
+	write_box(s, 0, tag);
+	s_write(s, val, strlen(val));
+
+	return write_box_size(s, start);
+}
+
+/// 8.10.1 User Data Box
+static size_t mp4_write_track_udta(struct mp4_mux *mux, struct mp4_track *track)
+{
+	struct serializer *s = mux->serializer;
+	int64_t start = serializer_get_pos(s);
+
+	write_box(s, 0, "udta");
+
+	/* Our udta box contains QuickTime format user data atoms, which are
+	 * simple key-value pairs. Some are prefixed with 0xa9. */
+
+	const char *name = obs_encoder_get_name(track->encoder);
+	if (name)
+		mp4_write_udta_atom(mux, "name", name);
+
+	if (mux->flags & MP4_WRITE_ENCODER_INFO) {
+		const char *id = obs_encoder_get_id(track->encoder);
+		if (name)
+			mp4_write_udta_atom(mux, "\251enc", id);
+
+		obs_data_t *settings = obs_encoder_get_settings(track->encoder);
+		if (settings) {
+			const char *json =
+				obs_data_get_json_with_defaults(settings);
+			mp4_write_udta_atom(mux, "json", json);
+			obs_data_release(settings);
+		}
+	}
+
+	return write_box_size(s, start);
+}
+
+/// 8.6.6 Edit List Box
+static size_t mp4_write_elst(struct mp4_mux *mux, struct mp4_track *track)
+{
+	struct serializer *s = mux->serializer;
+	int64_t start = serializer_get_pos(s);
+
+	write_fullbox(s, 0, "elst", 0, 0);
+
+	s_wb32(s, 1); // entry count
+
+	uint64_t duration =
+		util_mul_div64(track->duration, 1000, track->timebase_den);
+	uint64_t delay = 0;
+
+	if (track->type == TRACK_VIDEO &&
+	    !(mux->flags & MP4_USE_NEGATIVE_CTS)) {
+		/* Compensate for frame-reordering delay (for example, when
+		 * using b-frames). */
+		int64_t dts_offset = 0;
+
+		if (track->offsets.num) {
+			struct sample_offset sample = track->offsets.array[0];
+			dts_offset = sample.offset;
+		} else if (track->packets.size) {
+			/* If no offset data exists yet (i.e. when writing the
+			 * incomplete moov in a fragmented file) use the raw
+			 * data from the current queued packets instead. */
+			struct encoder_packet pkt;
+			deque_peek_front(&track->packets, &pkt, sizeof(pkt));
+			dts_offset = pkt.pts - pkt.dts;
+		}
+
+		delay = util_mul_div64(dts_offset, track->timescale,
+				       track->timebase_den);
+	} else if (track->type == TRACK_AUDIO && track->first_pts < 0) {
+		delay = util_mul_div64(llabs(track->first_pts),
+				       track->timescale, track->timebase_den);
+		/* Subtract priming delay from total duration */
+		duration -= util_mul_div64(delay, 1000, track->timescale);
+	}
+
+	s_wb32(s, (uint32_t)duration); // segment_duration (movie timescale)
+	s_wb32(s, (uint32_t)delay);    // media_time (track timescale)
+	s_wb32(s, 1 << 16);            // media_rate
+
+	return write_box_size(s, start);
+}
+
+/// 8.6.5 Edit Box
+static size_t mp4_write_edts(struct mp4_mux *mux, struct mp4_track *track)
+{
+	struct serializer *s = mux->serializer;
+	int64_t start = serializer_get_pos(s);
+
+	write_box(s, 0, "edts");
+
+	mp4_write_elst(mux, track);
+
+	return write_box_size(s, start);
+}
+
+/// 8.3.3.2 TrackReferenceTypeBox
+static size_t mp4_write_chap(struct mp4_mux *mux)
+{
+	struct serializer *s = mux->serializer;
+	int64_t start = serializer_get_pos(s);
+
+	/// QTFF/Apple chapter track reference
+	write_box(s, 0, "chap");
+
+	s_wb32(s, mux->chapter_track->track_id);
+
+	return write_box_size(s, start);
+}
+
+/// 8.3.3 Track Reference Box
+static size_t mp4_write_tref(struct mp4_mux *mux)
+{
+	struct serializer *s = mux->serializer;
+	int64_t start = serializer_get_pos(s);
+
+	write_box(s, 0, "tref");
+
+	mp4_write_chap(mux);
+
+	return write_box_size(s, start);
+}
+
+/// 8.3.1 Track Box
+static size_t mp4_write_trak(struct mp4_mux *mux, struct mp4_track *track,
+			     bool fragmented)
+{
+	struct serializer *s = mux->serializer;
+	int64_t start = serializer_get_pos(s);
+
+	write_box(s, 0, "trak");
+
+	// tkhd
+	mp4_write_tkhd(mux, track);
+
+	// edts
+	mp4_write_edts(mux, track);
+
+	// tref
+	if (mux->chapter_track && track->type != TRACK_CHAPTERS)
+		mp4_write_tref(mux);
+
+	// mdia
+	mp4_write_mdia(mux, track, fragmented);
+
+	// udta (audio track name mainly)
+	mp4_write_track_udta(mux, track);
+
+	return write_box_size(s, start);
+}
+
+/// 8.8.3 Track Extends Box
+static size_t mp4_write_trex(struct mp4_mux *mux, uint32_t track_id)
+{
+	struct serializer *s = mux->serializer;
+
+	write_fullbox(s, 32, "trex", 0, 0);
+
+	s_wb32(s, track_id); // track_ID
+	s_wb32(s, 1);        // default_sample_description_index
+	s_wb32(s, 0);        // default_sample_duration
+	s_wb32(s, 0);        // default_sample_size
+	s_wb32(s, 0);        // default_sample_flags
+
+	return 32;
+}
+
+/// 8.8.1 Movie Extends Box
+static size_t mp4_write_mvex(struct mp4_mux *mux)
+{
+	struct serializer *s = mux->serializer;
+	int64_t start = serializer_get_pos(s);
+
+	write_box(s, 0, "mvex");
+
+	for (size_t track_id = 0; track_id < mux->tracks.num; track_id++)
+		mp4_write_trex(mux, (uint32_t)(track_id + 1));
+
+	return write_box_size(s, start);
+}
+
+/// (QTFF/Apple) Undocumented QuickTime/iTunes metadata handler
+static size_t mp4_write_itunes_hdlr(struct mp4_mux *mux)
+{
+	struct serializer *s = mux->serializer;
+
+	write_fullbox(s, 33, "hdlr", 0, 0);
+
+	s_wb32(s, 0);          // pre_defined
+	s_write(s, "mdir", 4); // handler_type
+
+	// reserved
+	s_write(s, "appl", 4);
+	s_wb32(s, 0);
+	s_wb32(s, 0);
+
+	s_w8(s, 0); // name (NULL)
+
+	return 33;
+}
+
+/// (QTFF/Apple) Data atom
+static size_t mp4_write_data_atom(struct mp4_mux *mux, const char *data)
+{
+	struct serializer *s = mux->serializer;
+
+	size_t len = strlen(data);
+	uint32_t size = 16 + (uint32_t)len;
+
+	write_box(s, size, "data");
+
+	s_wb32(s, 1); // type, 1 = utf-8 string
+	s_wb32(s, 0); // locale, 0 = default
+	s_write(s, data, len);
+
+	return size;
+}
+
+/// (QTFF/Apple) Metadata item atom
+static size_t mp4_write_ilst_item_atom(struct mp4_mux *mux, const char name[4],
+				       const char *value)
+{
+	struct serializer *s = mux->serializer;
+	int64_t start = serializer_get_pos(s);
+
+	write_box(s, 0, name);
+
+	mp4_write_data_atom(mux, value);
+
+	return write_box_size(s, start);
+}
+
+/// (QTFF/Apple) Metadata item list atom
+static size_t mp4_write_ilst(struct mp4_mux *mux)
+{
+	struct serializer *s = mux->serializer;
+	struct dstr value = {0};
+	int64_t start = serializer_get_pos(s);
+
+	write_box(s, 0, "ilst");
+
+	/* Encoder name */
+	dstr_cat(&value, "OBS Studio (");
+	dstr_cat(&value, obs_get_version_string());
+	dstr_cat(&value, ")");
+	/* Some QuickTime keys are prefixed with 0xa9 */
+	mp4_write_ilst_item_atom(mux, "\251too", value.array);
+
+	dstr_free(&value);
+
+	return write_box_size(s, start);
+}
+
+/// (QTFF/Apple) Key value metadata handler
+static size_t mp4_write_mdta_hdlr(struct mp4_mux *mux)
+{
+	struct serializer *s = mux->serializer;
+
+	write_fullbox(s, 33, "hdlr", 0, 0);
+
+	s_wb32(s, 0);          // pre_defined
+	s_write(s, "mdta", 4); // handler_type
+
+	// reserved
+	s_wb32(s, 0);
+	s_wb32(s, 0);
+	s_wb32(s, 0);
+
+	s_w8(s, 0); // name (NULL)
+	return 33;
+}
+
+/// (QTFF/Apple) Metadata item keys atom
+static size_t mp4_write_mdta_keys(struct mp4_mux *mux, obs_data_t *meta)
+{
+	struct serializer *s = mux->serializer;
+	int64_t start = serializer_get_pos(s);
+
+	write_fullbox(s, 0, "keys", 0, 0);
+
+	uint32_t count = 0;
+	int64_t count_pos = serializer_get_pos(s);
+	s_wb32(s, count); // count
+
+	obs_data_item_t *item = obs_data_first(meta);
+
+	for (; item != NULL; obs_data_item_next(&item)) {
+		const char *name = obs_data_item_get_name(item);
+		size_t len = strlen(name);
+
+		/* name is key type, can be udta or mdta */
+		write_box(s, len + 8, "mdta");
+		s_write(s, name, len); // key name
+
+		count++;
+	}
+
+	int64_t end = serializer_get_pos(s);
+
+	/* Overwrite count with correct value */
+	serializer_seek(s, count_pos, SERIALIZE_SEEK_START);
+	s_wb32(s, count);
+	serializer_seek(s, end, SERIALIZE_SEEK_START);
+
+	return write_box_size(s, start);
+}
+
+/// (QTFF/Apple) Metadata item atom, but name is an index instead
+static inline void write_key_entry(struct mp4_mux *mux, obs_data_item_t *item,
+				   uint32_t idx)
+{
+	struct serializer *s = mux->serializer;
+	int64_t start = serializer_get_pos(s);
+
+	s_wb32(s, 0);   // size
+	s_wb32(s, idx); // index
+
+	mp4_write_data_atom(mux, obs_data_item_get_string(item));
+
+	write_box_size(s, start);
+}
+
+/// (QTFF/Apple) Metadata item list atom
+static size_t mp4_write_mdta_ilst(struct mp4_mux *mux, obs_data_t *meta)
+{
+	struct serializer *s = mux->serializer;
+	int64_t start = serializer_get_pos(s);
+
+	write_box(s, 0, "ilst");
+
+	/* indices start with 1 */
+	uint32_t key_idx = 1;
+
+	obs_data_item_t *item = obs_data_first(meta);
+
+	for (; item != NULL; obs_data_item_next(&item)) {
+		write_key_entry(mux, item, key_idx);
+		key_idx++;
+	}
+
+	return write_box_size(s, start);
+}
+
+static void mp4_write_mdta_kv(struct mp4_mux *mux)
+{
+	struct dstr value = {0};
+
+	obs_data_t *meta = obs_data_create();
+
+	dstr_cat(&value, "OBS Studio (");
+	dstr_cat(&value, obs_get_version_string());
+	dstr_cat(&value, ")");
+
+	// ToDo figure out what else we could put in here for fun and profit :)
+	obs_data_set_string(meta, "tool", value.array);
+
+	/* Write keys */
+	mp4_write_mdta_keys(mux, meta);
+	/* Write values */
+	mp4_write_mdta_ilst(mux, meta);
+
+	obs_data_release(meta);
+	dstr_free(&value);
+}
+
+/// 8.11.1 The Meta box
+static size_t mp4_write_meta(struct mp4_mux *mux)
+{
+	struct serializer *s = mux->serializer;
+	int64_t start = serializer_get_pos(s);
+
+	write_fullbox(s, 0, "meta", 0, 0);
+
+	if (mux->flags & MP4_USE_MDTA_KEY_VALUE) {
+		mp4_write_mdta_hdlr(mux);
+		mp4_write_mdta_kv(mux);
+	} else {
+		mp4_write_itunes_hdlr(mux);
+		mp4_write_ilst(mux);
+	}
+
+	return write_box_size(s, start);
+}
+
+/// 8.10.1 User Data Box
+static size_t mp4_write_udta(struct mp4_mux *mux)
+{
+	struct serializer *s = mux->serializer;
+	int64_t start = serializer_get_pos(s);
+
+	write_box(s, 0, "udta");
+
+	/* Normally metadata would be directly in the moov, but since this is
+	 * Apple/QTFF format metadata it is inside udta. */
+
+	// meta
+	mp4_write_meta(mux);
+
+	return write_box_size(s, start);
+}
+
+/// Movie Box (8.2.1)
+static size_t mp4_write_moov(struct mp4_mux *mux, bool fragmented)
+{
+	struct serializer *s = mux->serializer;
+	int64_t start = serializer_get_pos(s);
+
+	write_box(s, 0, "moov");
+
+	mp4_write_mvhd(mux);
+
+	// trak(s)
+	for (size_t i = 0; i < mux->tracks.num; i++) {
+		struct mp4_track *track = &mux->tracks.array[i];
+		mp4_write_trak(mux, track, fragmented);
+	}
+
+	if (!fragmented && mux->chapter_track)
+		mp4_write_trak(mux, mux->chapter_track, false);
+
+	// mvex
+	if (fragmented)
+		mp4_write_mvex(mux);
+
+	// udta (metadata)
+	mp4_write_udta(mux);
+
+	return write_box_size(s, start);
+}
+
+/* ========================================================================== */
+/* moof (fragment header) stuff                                               */
+
+/// 8.8.5 Movie Fragment Header Box
+static size_t mp4_write_mfhd(struct mp4_mux *mux)
+{
+	struct serializer *s = mux->serializer;
+
+	write_fullbox(s, 16, "mfhd", 0, 0);
+
+	s_wb32(s, mux->fragments_written); // sequence_number
+
+	return 16;
+}
+
+/// 8.8.7 Track Fragment Header Box
+static size_t mp4_write_tfhd(struct mp4_mux *mux, struct mp4_track *track,
+			     size_t moof_start)
+{
+	struct serializer *s = mux->serializer;
+	int64_t start = serializer_get_pos(s);
+
+	uint32_t flags = BASE_DATA_OFFSET_PRESENT |
+			 DEFAULT_SAMPLE_FLAGS_PRESENT;
+
+	/* Add default size/duration if all samples match. */
+	bool durations_match = true;
+	bool sizes_match = true;
+	uint32_t duration;
+	uint32_t sample_size;
+
+	if (track->sample_size) {
+		duration = 1;
+		sample_size = track->sample_size;
+	} else {
+		duration = track->fragment_samples.array[0].duration;
+		sample_size = track->fragment_samples.array[0].size;
+
+		for (size_t idx = 1; idx < track->fragment_samples.num; idx++) {
+			uint32_t frag_duration =
+				track->fragment_samples.array[idx].duration;
+			uint32_t frag_size =
+				track->fragment_samples.array[idx].size;
+
+			durations_match = frag_duration == duration;
+			sizes_match = frag_size == sample_size;
+		}
+	}
+
+	if (durations_match)
+		flags |= DEFAULT_SAMPLE_DURATION_PRESENT;
+	if (sizes_match)
+		flags |= DEFAULT_SAMPLE_SIZE_PRESENT;
+
+	write_fullbox(s, 0, "tfhd", 0, flags);
+
+	s_wb32(s, track->track_id); // track_ID
+	s_wb64(s, moof_start);      // base_data_offset
+
+	// default_sample_duration
+	if (durations_match) {
+		if (track->type == TRACK_VIDEO) {
+			/* Convert duration to track timescale */
+			duration = (uint32_t)util_mul_div64(
+				duration, track->timescale,
+				track->timebase_den);
+		}
+
+		s_wb32(s, duration);
+	}
+	// default_sample_size
+	if (sizes_match)
+		s_wb32(s, sample_size);
+	// default_sample_flags
+	if (track->type == TRACK_VIDEO) {
+		s_wb32(s, SAMPLE_FLAG_DEPENDS_YES | SAMPLE_FLAG_IS_NON_SYNC);
+	} else {
+		s_wb32(s, SAMPLE_FLAG_DEPENDS_NO);
+	}
+
+	return write_box_size(s, start);
+}
+
+/// 8.8.12 Track fragment decode time
+static size_t mp4_write_tfdt(struct mp4_mux *mux, struct mp4_track *track)
+{
+	struct serializer *s = mux->serializer;
+
+	write_fullbox(s, 20, "tfdt", 1, 0);
+
+	/* Subtract samples that are not written yet */
+	uint64_t duration_written = track->duration;
+	for (size_t i = 0; i < track->fragment_samples.num; i++)
+		duration_written -= track->fragment_samples.array[i].duration;
+
+	if (track->type == TRACK_VIDEO) {
+		/* Convert to track timescale */
+		duration_written = util_mul_div64(duration_written,
+						  track->timescale,
+						  track->timebase_den);
+	}
+
+	s_wb64(s, duration_written); // baseMediaDecodeTime
+
+	return 20;
+}
+
+/// 8.8.8 Track Fragment Run Box
+static size_t mp4_write_trun(struct mp4_mux *mux, struct mp4_track *track,
+			     uint32_t moof_size, uint64_t *samples_mdat_offset)
+{
+	struct serializer *s = mux->serializer;
+	int64_t start = serializer_get_pos(s);
+
+	uint32_t flags = DATA_OFFSET_PRESENT;
+
+	if (!track->sample_size)
+		flags |= SAMPLE_SIZE_PRESENT;
+
+	if (track->type == TRACK_VIDEO) {
+		flags |= FIRST_SAMPLE_FLAGS_PRESENT;
+		flags |= SAMPLE_COMPOSITION_TIME_OFFSETS_PRESENT;
+	}
+
+	uint8_t version = mux->flags & MP4_USE_NEGATIVE_CTS ? 1 : 0;
+
+	write_fullbox(s, 0, "trun", version, flags);
+
+	/* moof_size + 8 bytes for mdat header + offset into mdat box data */
+	size_t data_offset = moof_size + 8 + *samples_mdat_offset;
+	size_t sample_count = track->fragment_samples.num;
+
+	if (track->sample_size) {
+		/* Update count based on fixed size */
+		size_t total_size = 0;
+		for (size_t i = 0; i < sample_count; i++)
+			total_size += track->fragment_samples.array[i].size;
+
+		*samples_mdat_offset += total_size;
+		sample_count = total_size / track->sample_size;
+	}
+
+	s_wb32(s, (uint32_t)sample_count); // sample_count
+	s_wb32(s, (uint32_t)data_offset);  // data_offset
+
+	/* If we have a fixed sample size (PCM audio) we only need to write
+	 * the sample count and offset. */
+	if (track->sample_size)
+		return write_box_size(s, start);
+
+	if (track->type == TRACK_VIDEO)
+		s_wb32(s, SAMPLE_FLAG_DEPENDS_NO); // first_sample_flags
+
+	for (size_t idx = 0; idx < sample_count; idx++) {
+		struct fragment_sample *smp =
+			&track->fragment_samples.array[idx];
+
+		s_wb32(s, smp->size); // sample_size
+
+		if (track->type == TRACK_VIDEO) {
+			// sample_composition_time_offset
+			int64_t offset = (int64_t)smp->offset *
+					 (int64_t)track->timescale /
+					 (int64_t)track->timebase_den;
+			s_wb32(s, (uint32_t)offset);
+		}
+
+		*samples_mdat_offset += smp->size;
+	}
+
+	return write_box_size(s, start);
+}
+
+/// 8.8.6 Track Fragment Box
+static size_t mp4_write_traf(struct mp4_mux *mux, struct mp4_track *track,
+			     int64_t moof_start, uint32_t moof_size,
+			     uint64_t *samples_mdat_offset)
+{
+	struct serializer *s = mux->serializer;
+	int64_t start = serializer_get_pos(s);
+
+	write_box(s, 0, "traf");
+
+	// tfhd
+	mp4_write_tfhd(mux, track, moof_start);
+
+	// tfdt
+	mp4_write_tfdt(mux, track);
+
+	// trun
+	mp4_write_trun(mux, track, moof_size, samples_mdat_offset);
+
+	return write_box_size(s, start);
+}
+
+/// 8.8.4 Movie Fragment Box
+static size_t mp4_write_moof(struct mp4_mux *mux, uint32_t moof_size,
+			     int64_t moof_start)
+{
+	struct serializer *s = mux->serializer;
+	int64_t start = serializer_get_pos(s);
+
+	write_box(s, 0, "moof");
+
+	mp4_write_mfhd(mux);
+
+	/* Track current mdat offset across tracks */
+	uint64_t samples_mdat_offset = 0;
+
+	// traf boxes
+	for (size_t i = 0; i < mux->tracks.num; i++) {
+		struct mp4_track *track = &mux->tracks.array[i];
+		/* Skip tracks that do not have any samples */
+		if (!track->fragment_samples.num)
+			continue;
+
+		mp4_write_traf(mux, track, moof_start, moof_size,
+			       &samples_mdat_offset);
+	}
+
+	return write_box_size(s, start);
+}
+
+/* ========================================================================== */
+/* Chapter packets                                                            */
+
+static void mp4_create_chapter_pkt(struct encoder_packet *pkt, int64_t dts_usec,
+				   const char *name)
+{
+	int64_t dts = dts_usec / 1000; // chapter track uses a ms timebase
+
+	pkt->pts = dts;
+	pkt->dts = dts;
+	pkt->dts_usec = dts_usec;
+	pkt->timebase_num = 1;
+	pkt->timebase_den = 1000;
+
+	/* Serialize with data with ref count */
+	struct serializer s;
+	struct array_output_data ao;
+	array_output_serializer_init(&s, &ao);
+
+	size_t len = min(strlen(name), UINT16_MAX);
+	long refs = 1;
+
+	/* encoder_packet refs */
+	s_write(&s, &refs, sizeof(refs));
+	/* actual packet data */
+	s_wb16(&s, (uint16_t)len);
+	s_write(&s, name, len);
+	s_write(&s, &CHAPTER_PKT_FOOTER, sizeof(CHAPTER_PKT_FOOTER));
+
+	pkt->data = (void *)(ao.bytes.array + sizeof(long));
+	pkt->size = ao.bytes.num - sizeof(long);
+}
+
+/* ========================================================================== */
+/* Encoder packet processing and fragment writer                              */
+
+static inline int64_t packet_pts_usec(struct encoder_packet *packet)
+{
+	return packet->pts * 1000000 / packet->timebase_den;
+}
+
+static inline struct encoder_packet *get_pkt_at(struct deque *dq, size_t idx)
+{
+	return deque_data(dq, idx * sizeof(struct encoder_packet));
+}
+
+static inline uint64_t get_longest_track_duration(struct mp4_mux *mux)
+{
+	uint64_t dur = 0;
+
+	for (size_t i = 0; i < mux->tracks.num; i++) {
+		struct mp4_track *track = &mux->tracks.array[i];
+		uint64_t track_dur = util_mul_div64(track->duration, 1000,
+						    track->timebase_den);
+
+		if (track_dur > dur)
+			dur = track_dur;
+	}
+
+	return dur;
+}
+
+static void process_packets(struct mp4_mux *mux, struct mp4_track *track,
+			    uint64_t *mdat_size)
+{
+	size_t count = track->packets.size / sizeof(struct encoder_packet);
+
+	if (!count)
+		return;
+
+	/* Only iterate upt to penultimate packet so we can determine duration
+	 * for all processed packets. */
+	for (size_t i = 0; i < count - 1; i++) {
+		struct encoder_packet *pkt = get_pkt_at(&track->packets, i);
+
+		if (mux->next_frag_pts &&
+		    packet_pts_usec(pkt) >= mux->next_frag_pts)
+			break;
+
+		struct encoder_packet *next =
+			get_pkt_at(&track->packets, i + 1);
+
+		/* Duration is just distance between current and next DTS. */
+		uint32_t duration = (uint32_t)(next->dts - pkt->dts);
+		uint32_t sample_count = 1;
+		uint32_t size = (uint32_t)pkt->size;
+		int32_t offset = (int32_t)(pkt->pts - pkt->dts);
+
+		/* When using negative CTS, subtract DTS-PTS offset. */
+		if (track->type == TRACK_VIDEO &&
+		    mux->flags & MP4_USE_NEGATIVE_CTS) {
+			if (!track->offsets.num)
+				track->dts_offset = offset;
+
+			offset -= track->dts_offset;
+		}
+
+		/* Create temporary sample information for moof */
+		struct fragment_sample *smp =
+			da_push_back_new(track->fragment_samples);
+		smp->size = size;
+		smp->offset = offset;
+		smp->duration = duration;
+
+		*mdat_size += size;
+
+		/* Update global sample information for full moov */
+		track->duration += duration;
+
+		if (track->sample_size) {
+			/* Adjust duration/count for fixed sample size */
+			sample_count = size / track->sample_size;
+			duration = 1;
+		}
+
+		if (!track->samples)
+			track->first_pts = pkt->pts;
+
+		track->samples += sample_count;
+
+		/* If delta (duration) matche sprevious, increment counter,
+		 * otherwise create a new entry. */
+		if (track->deltas.num == 0 ||
+		    track->deltas.array[track->deltas.num - 1].delta !=
+			    duration) {
+			struct sample_delta *new =
+				da_push_back_new(track->deltas);
+			new->delta = duration;
+			new->count = sample_count;
+		} else {
+			track->deltas.array[track->deltas.num - 1].count +=
+				sample_count;
+		}
+
+		if (!track->sample_size)
+			da_push_back(track->sample_sizes, &size);
+
+		if (track->type != TRACK_VIDEO)
+			continue;
+
+		if (pkt->keyframe)
+			da_push_back(track->sync_samples, &track->samples);
+
+		/* Only require ctts box if offet is non-zero */
+		if (offset && !track->needs_ctts)
+			track->needs_ctts = true;
+
+		/* If dts-pts offset matche sprevious, increment counter,
+		 * otherwise create a new entry. */
+		if (track->offsets.num == 0 ||
+		    track->offsets.array[track->offsets.num - 1].offset !=
+			    offset) {
+			struct sample_offset *new =
+				da_push_back_new(track->offsets);
+			new->offset = offset;
+			new->count = 1;
+		} else {
+			track->offsets.array[track->offsets.num - 1].count += 1;
+		}
+	}
+}
+
+/* Write track data to file */
+static void write_packets(struct mp4_mux *mux, struct mp4_track *track)
+{
+	struct serializer *s = mux->serializer;
+
+	size_t count = track->packets.size / sizeof(struct encoder_packet);
+	if (!count)
+		return;
+
+	struct chunk *chk = da_push_back_new(track->chunks);
+	chk->offset = serializer_get_pos(s);
+	chk->samples = (uint32_t)track->fragment_samples.num;
+
+	for (size_t i = 0; i < track->fragment_samples.num; i++) {
+		struct encoder_packet pkt;
+		deque_pop_front(&track->packets, &pkt,
+				sizeof(struct encoder_packet));
+		s_write(s, pkt.data, pkt.size);
+		obs_encoder_packet_release(&pkt);
+	}
+
+	chk->size = (uint32_t)(serializer_get_pos(s) - chk->offset);
+
+	/* Fixup sample count for fixed-size codecs */
+	if (track->sample_size)
+		chk->samples = chk->size / track->sample_size;
+
+	da_clear(track->fragment_samples);
+}
+
+static void mp4_flush_fragment(struct mp4_mux *mux)
+{
+	struct serializer *s = mux->serializer;
+
+	// Write file header if not already done
+	if (!mux->fragments_written) {
+		mp4_write_ftyp(mux, true);
+		/* Placeholder to write mdat header during soft-remux */
+		mux->placeholder_offset = serializer_get_pos(s);
+		mp4_write_free(mux);
+	}
+
+	// Array output as temporary buffer to avoid sending seeks to disk
+	struct serializer as;
+	struct array_output_data aod;
+	array_output_serializer_init(&as, &aod);
+	mux->serializer = &as;
+
+	// Write initial incomplete moov (because fragmentation)
+	if (!mux->fragments_written) {
+		mp4_write_moov(mux, true);
+		s_write(s, aod.bytes.array, aod.bytes.num);
+		array_output_serializer_reset(&aod);
+	}
+
+	mux->fragments_written++;
+
+	/* --------------------------------------------------------- */
+	/* Analyse packets and create fragment moof.                 */
+
+	uint64_t mdat_size = 8;
+
+	for (size_t idx = 0; idx < mux->tracks.num; idx++) {
+		struct mp4_track *track = &mux->tracks.array[idx];
+		process_packets(mux, track, &mdat_size);
+	}
+
+	if (!mux->next_frag_pts && mux->chapter_track) {
+		// Create dummy chapter marker at the end so duration is correct
+		uint64_t duration = get_longest_track_duration(mux);
+		struct encoder_packet pkt;
+		mp4_create_chapter_pkt(&pkt, (int64_t)duration * 1000, "Dummy");
+		deque_push_back(&mux->chapter_track->packets, &pkt,
+				sizeof(struct encoder_packet));
+
+		process_packets(mux, mux->chapter_track, &mdat_size);
+	}
+
+	// write moof once to get size
+	int64_t moof_start = serializer_get_pos(s);
+	size_t moof_size = mp4_write_moof(mux, 0, moof_start);
+	array_output_serializer_reset(&aod);
+
+	// write moof again with known size
+	mp4_write_moof(mux, (uint32_t)moof_size, moof_start);
+
+	// Write to output and restore real serializer
+	s_write(s, aod.bytes.array, aod.bytes.num);
+	mux->serializer = s;
+	array_output_serializer_free(&aod);
+
+	/* --------------------------------------------------------- */
+	/* Write audio and video samples (in chunks). Also update    */
+	/* global chunk and sample information for final moov.       */
+
+	if (mdat_size > UINT32_MAX) {
+		s_wb32(s, 1);
+		s_write(s, "mdat", 4);
+		s_wb64(s, mdat_size + 8);
+	} else {
+		s_wb32(s, (uint32_t)mdat_size);
+		s_write(s, "mdat", 4);
+	}
+
+	for (size_t i = 0; i < mux->tracks.num; i++) {
+		struct mp4_track *track = &mux->tracks.array[i];
+		write_packets(mux, track);
+	}
+
+	/* Only write chapter packets on final flush. */
+	if (!mux->next_frag_pts && mux->chapter_track)
+		write_packets(mux, mux->chapter_track);
+
+	mux->next_frag_pts = 0;
+}
+
+/* ========================================================================== */
+/* Track object functions                                                     */
+
+static inline void track_insert_packet(struct mp4_track *track,
+				       struct encoder_packet *pkt)
+{
+	int64_t pts_usec = packet_pts_usec(pkt);
+	if (pts_usec > track->last_pts_usec)
+		track->last_pts_usec = pts_usec;
+
+	deque_push_back(&track->packets, pkt, sizeof(struct encoder_packet));
+}
+
+static inline uint32_t get_sample_size(struct mp4_track *track)
+{
+	audio_t *audio = obs_encoder_audio(track->encoder);
+	if (!audio)
+		return 0;
+
+	const struct audio_output_info *info = audio_output_get_info(audio);
+	uint32_t channels = get_audio_channels(info->speakers);
+
+	switch (track->codec) {
+	case CODEC_PCM_F32:
+		return channels * 4; // 4 bytes per sample (32-bit)
+	case CODEC_PCM_I24:
+		return channels * 3; // 3 bytes per sample (24-bit)
+	case CODEC_PCM_I16:
+		return channels * 2; // 2 bytes per sample (16-bit)
+	default:
+		return 0;
+	}
+}
+
+static inline enum mp4_codec get_codec(obs_encoder_t *enc)
+{
+	const char *codec = obs_encoder_get_codec(enc);
+
+	if (strcmp(codec, "h264") == 0)
+		return CODEC_H264;
+	if (strcmp(codec, "hevc") == 0)
+		return CODEC_HEVC;
+	if (strcmp(codec, "av1") == 0)
+		return CODEC_AV1;
+	if (strcmp(codec, "aac") == 0)
+		return CODEC_AAC;
+	if (strcmp(codec, "opus") == 0)
+		return CODEC_OPUS;
+	if (strcmp(codec, "flac") == 0)
+		return CODEC_FLAC;
+	if (strcmp(codec, "alac") == 0)
+		return CODEC_ALAC;
+	if (strcmp(codec, "pcm_s16le") == 0)
+		return CODEC_PCM_I16;
+	if (strcmp(codec, "pcm_s24le") == 0)
+		return CODEC_PCM_I24;
+	if (strcmp(codec, "pcm_f32le") == 0)
+		return CODEC_PCM_F32;
+
+	return CODEC_UNKNOWN;
+}
+
+static inline void add_track(struct mp4_mux *mux, obs_encoder_t *enc)
+{
+	struct mp4_track *track = da_push_back_new(mux->tracks);
+
+	track->type = obs_encoder_get_type(enc) == OBS_ENCODER_VIDEO
+			      ? TRACK_VIDEO
+			      : TRACK_AUDIO;
+	track->encoder = obs_encoder_get_ref(enc);
+	track->codec = get_codec(enc);
+	track->track_id = ++mux->track_ctr;
+
+	/* Set timebase/timescale */
+	if (track->type == TRACK_VIDEO) {
+		video_t *video = obs_encoder_video(enc);
+		const struct video_output_info *info =
+			video_output_get_info(video);
+		track->timebase_num = info->fps_den;
+		track->timebase_den = info->fps_num;
+
+		track->timescale = track->timebase_den;
+		/* FFmpeg does this to compensate for non-monotonic timestamps,
+		 * we probably don't need it, but let's stick to what they do
+		 * for maximum compatibility. */
+		while (track->timescale < 10000)
+			track->timescale *= 2;
+	} else {
+		uint32_t sample_rate = obs_encoder_get_sample_rate(enc);
+		/* Opus is always 48 kHz */
+		if (track->codec == CODEC_OPUS)
+			sample_rate = 48000;
+		track->timebase_num = 1;
+		track->timebase_den = sample_rate;
+		track->timescale = sample_rate;
+	}
+
+	/* Set sample size (if fixed) */
+	if (track->type == TRACK_AUDIO)
+		track->sample_size = get_sample_size(track);
+}
+
+static inline void add_chapter_track(struct mp4_mux *mux)
+{
+	mux->chapter_track = bzalloc(sizeof(struct mp4_track));
+	mux->chapter_track->type = TRACK_CHAPTERS;
+	mux->chapter_track->codec = CODEC_TEXT;
+	mux->chapter_track->timescale = 1000;
+	mux->chapter_track->timebase_num = 1;
+	mux->chapter_track->timebase_den = 1000;
+	mux->chapter_track->track_id = ++mux->track_ctr;
+}
+
+static inline void free_packets(struct deque *dq)
+{
+	size_t num = dq->size / sizeof(struct encoder_packet);
+
+	for (size_t i = 0; i < num; i++) {
+		struct encoder_packet pkt;
+		deque_pop_front(dq, &pkt, sizeof(struct encoder_packet));
+		obs_encoder_packet_release(&pkt);
+	}
+}
+
+static inline void free_track(struct mp4_track *track)
+{
+	if (!track)
+		return;
+
+	obs_encoder_release(track->encoder);
+
+	free_packets(&track->packets);
+	deque_free(&track->packets);
+
+	da_free(track->sample_sizes);
+	da_free(track->chunks);
+	da_free(track->deltas);
+	da_free(track->offsets);
+	da_free(track->sync_samples);
+	da_free(track->fragment_samples);
+}
+
+/* ===========================================================================*/
+/* API */
+
+struct mp4_mux *mp4_mux_create(obs_output_t *output,
+			       struct serializer *serializer,
+			       enum mp4_mux_flags flags)
+{
+	struct mp4_mux *mux = bzalloc(sizeof(struct mp4_mux));
+
+	mux->output = output;
+	mux->serializer = serializer;
+	mux->flags = flags;
+	/* Timestamp is based on 1904 rather than 1970. */
+	mux->creation_time = time(NULL) + 0x7C25B080;
+
+	for (size_t i = 0; i < MAX_OUTPUT_VIDEO_ENCODERS; i++) {
+		obs_encoder_t *enc = obs_output_get_video_encoder2(output, i);
+		if (!enc)
+			continue;
+		add_track(mux, enc);
+	}
+
+	for (size_t i = 0; i < MAX_OUTPUT_AUDIO_ENCODERS; i++) {
+		obs_encoder_t *enc = obs_output_get_audio_encoder(output, i);
+		if (!enc)
+			continue;
+		add_track(mux, enc);
+	}
+
+	return mux;
+}
+
+void mp4_mux_destroy(struct mp4_mux *mux)
+{
+	for (size_t i = 0; i < mux->tracks.num; i++)
+		free_track(&mux->tracks.array[i]);
+
+	free_track(mux->chapter_track);
+	bfree(mux->chapter_track);
+	da_free(mux->tracks);
+	bfree(mux);
+}
+
+bool mp4_mux_submit_packet(struct mp4_mux *mux, struct encoder_packet *pkt)
+{
+	struct mp4_track *track = NULL;
+	struct encoder_packet parsed_packet;
+	enum obs_encoder_type type = pkt->type;
+	bool fragment_ready = mux->next_frag_pts > 0;
+
+	for (size_t i = 0; i < mux->tracks.num; i++) {
+		struct mp4_track *tmp = &mux->tracks.array[i];
+
+		fragment_ready = fragment_ready &&
+				 tmp->last_pts_usec >= mux->next_frag_pts;
+
+		if (tmp->encoder == pkt->encoder)
+			track = tmp;
+	}
+
+	if (!track) {
+		warn("Could not find track for packet of type %s with "
+		     "track id %zu!",
+		     type == OBS_ENCODER_VIDEO ? "video" : "audio",
+		     pkt->track_idx);
+		return false;
+	}
+
+	/* If all tracks have caught up to the keyframe we want to fragment on,
+	 * flush the current fragment to disk. */
+	if (fragment_ready)
+		mp4_flush_fragment(mux);
+
+	if (type == OBS_ENCODER_AUDIO) {
+		obs_encoder_packet_ref(&parsed_packet, pkt);
+	} else {
+		if (track->codec == CODEC_H264)
+			obs_parse_avc_packet(&parsed_packet, pkt);
+		else if (track->codec == CODEC_HEVC)
+			obs_parse_hevc_packet(&parsed_packet, pkt);
+		else if (track->codec == CODEC_AV1)
+			obs_parse_av1_packet(&parsed_packet, pkt);
+
+		/* Set fragmentation PTS if packet is keyframe and PTS > 0 */
+		if (parsed_packet.keyframe && parsed_packet.pts > 0) {
+			mux->next_frag_pts = packet_pts_usec(&parsed_packet);
+		}
+	}
+
+	track_insert_packet(track, &parsed_packet);
+
+	return true;
+}
+
+bool mp4_mux_add_chapter(struct mp4_mux *mux, int64_t dts_usec,
+			 const char *name)
+{
+	if (dts_usec < 0)
+		return false;
+	if (!mux->chapter_track)
+		add_chapter_track(mux);
+
+	/* To work correctly there needs to be a chapter at PTS 0,
+	 * create that here if necessary. */
+	if (dts_usec > 0 && mux->chapter_track->packets.size == 0) {
+		mp4_mux_add_chapter(mux, 0,
+				    obs_module_text("MP4Output.StartChapter"));
+	}
+
+	/* Create packets that will be muxed on final flush */
+	struct encoder_packet pkt;
+	mp4_create_chapter_pkt(&pkt, dts_usec, name);
+	track_insert_packet(mux->chapter_track, &pkt);
+
+	return true;
+}
+
+bool mp4_mux_finalise(struct mp4_mux *mux)
+{
+	struct serializer *s = mux->serializer;
+
+	/* Flush remaining audio/video samples as final fragment. */
+	info("Flushing final fragment...");
+
+	/* Set target PTS to zero to indicate that we want to flush all
+	 * the remaining packets */
+	mux->next_frag_pts = 0;
+	mp4_flush_fragment(mux);
+
+	info("Number of fragments: %u", mux->fragments_written);
+
+	if (mux->flags & MP4_SKIP_FINALISATION) {
+		warn("Skipping MP4 finalization!");
+		return true;
+	}
+
+	int64_t data_end = serializer_get_pos(s);
+
+	/* ---------------------------------------- */
+	/* Write full moov box                      */
+
+	/* Use array serializer for moov data as this will do a lot
+	 * of seeks to write size values of variable-size boxes. */
+	struct serializer fs;
+	struct array_output_data ao;
+	array_output_serializer_init(&fs, &ao);
+
+	mux->serializer = &fs;
+
+	mp4_write_moov(mux, false);
+	s_write(s, ao.bytes.array, ao.bytes.num);
+	info("Full moov size: %zu KiB", ao.bytes.num / 1024);
+
+	mux->serializer = s; // restore real serializer
+	array_output_serializer_free(&ao);
+
+	/* ---------------------------------------- */
+	/* Overwrite file header (ftyp + free/moov) */
+
+	serializer_seek(s, 0, SERIALIZE_SEEK_START);
+	mp4_write_ftyp(mux, false);
+
+	size_t data_size = data_end - mux->placeholder_offset;
+	serializer_seek(s, (int64_t)mux->placeholder_offset,
+			SERIALIZE_SEEK_START);
+
+	/* If data is more than 4 GiB the mdat header becomes 16 bytes, hence
+	 * why we create a 16-byte placeholder "free" box at the start. */
+	if (data_size > UINT32_MAX) {
+		s_wb32(s, 1); // 1 = use "largesize" field instead
+		s_write(s, "mdat", 4);
+		s_wb64(s, data_size); // largesize (64-bit)
+	} else {
+		s_wb32(s, (uint32_t)data_size);
+		s_write(s, "mdat", 4);
+	}
+
+	info("Final mdat size: %zu KiB", data_size / 1024);
+	return true;
+}

+ 43 - 0
plugins/obs-outputs/mp4-mux.h

@@ -0,0 +1,43 @@
+/******************************************************************************
+    Copyright (C) 2024 by Dennis Sädtler <[email protected]>
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+******************************************************************************/
+
+#pragma once
+
+#include <obs.h>
+#include <util/serializer.h>
+
+struct mp4_mux;
+
+enum mp4_mux_flags {
+	/* Uses mdta key/value list for metadata instead of QuickTime keys */
+	MP4_USE_MDTA_KEY_VALUE = 1 << 0,
+	/* Write encoder configuration to trak udat */
+	MP4_WRITE_ENCODER_INFO = 1 << 1,
+	/* Skip "soft-remux" and leave file in fragmented state */
+	MP4_SKIP_FINALISATION = 1 << 2,
+	/* Use negative CTS instead of edit lists */
+	MP4_USE_NEGATIVE_CTS = 1 << 3,
+};
+
+struct mp4_mux *mp4_mux_create(obs_output_t *output,
+			       struct serializer *serializer,
+			       enum mp4_mux_flags flags);
+void mp4_mux_destroy(struct mp4_mux *mux);
+bool mp4_mux_submit_packet(struct mp4_mux *mux, struct encoder_packet *pkt);
+bool mp4_mux_add_chapter(struct mp4_mux *mux, int64_t dts_usec,
+			 const char *name);
+bool mp4_mux_finalise(struct mp4_mux *mux);

+ 613 - 0
plugins/obs-outputs/mp4-output.c

@@ -0,0 +1,613 @@
+/******************************************************************************
+    Copyright (C) 2024 by Dennis Sädtler <[email protected]>
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+******************************************************************************/
+
+#include "mp4-mux.h"
+
+#include <inttypes.h>
+
+#include <obs-module.h>
+#include <util/platform.h>
+#include <util/dstr.h>
+#include <util/threading.h>
+#include <util/buffered-file-serializer.h>
+
+#include <opts-parser.h>
+
+#define do_log(level, format, ...)                \
+	blog(level, "[mp4 output: '%s'] " format, \
+	     obs_output_get_name(out->output), ##__VA_ARGS__)
+
+#define warn(format, ...) do_log(LOG_WARNING, format, ##__VA_ARGS__)
+#define info(format, ...) do_log(LOG_INFO, format, ##__VA_ARGS__)
+
+struct chapter {
+	int64_t dts_usec;
+	char *name;
+};
+
+struct mp4_output {
+	obs_output_t *output;
+	struct dstr path;
+
+	struct serializer serializer;
+
+	volatile bool active;
+	volatile bool stopping;
+	uint64_t stop_ts;
+
+	bool allow_overwrite;
+	uint64_t total_bytes;
+
+	pthread_mutex_t mutex;
+
+	struct mp4_mux *muxer;
+	int flags;
+
+	int64_t last_dts_usec;
+	DARRAY(struct chapter) chapters;
+
+	/* File splitting stuff */
+	bool split_file_enabled;
+	bool split_file_ready;
+	volatile bool manual_split;
+
+	size_t cur_size;
+	size_t max_size;
+
+	int64_t start_time;
+	int64_t max_time;
+
+	bool found_video[MAX_OUTPUT_VIDEO_ENCODERS];
+	bool found_audio[MAX_OUTPUT_AUDIO_ENCODERS];
+	int64_t video_pts_offsets[MAX_OUTPUT_VIDEO_ENCODERS];
+	int64_t audio_dts_offsets[MAX_OUTPUT_AUDIO_ENCODERS];
+
+	/* Buffer for packets while we reinitialise the muxer after splitting */
+	DARRAY(struct encoder_packet) split_buffer;
+};
+
+static inline bool stopping(struct mp4_output *out)
+{
+	return os_atomic_load_bool(&out->stopping);
+}
+
+static inline bool active(struct mp4_output *out)
+{
+	return os_atomic_load_bool(&out->active);
+}
+
+static inline int64_t packet_pts_usec(struct encoder_packet *packet)
+{
+	return packet->pts * 1000000 / packet->timebase_den;
+}
+
+static inline void ts_offset_clear(struct mp4_output *out)
+{
+	for (size_t i = 0; i < MAX_OUTPUT_VIDEO_ENCODERS; i++) {
+		out->found_video[i] = false;
+		out->video_pts_offsets[i] = 0;
+	}
+
+	for (size_t i = 0; i < MAX_OUTPUT_AUDIO_ENCODERS; i++) {
+		out->found_audio[i] = false;
+		out->audio_dts_offsets[i] = 0;
+	}
+}
+
+static inline void ts_offset_update(struct mp4_output *out,
+				    struct encoder_packet *packet)
+{
+	int64_t *offset;
+	bool *found;
+
+	if (packet->type == OBS_ENCODER_VIDEO) {
+		offset = &out->video_pts_offsets[packet->track_idx];
+		found = &out->found_video[packet->track_idx];
+	} else {
+		offset = &out->audio_dts_offsets[packet->track_idx];
+		found = &out->found_audio[packet->track_idx];
+	}
+
+	if (*found)
+		return;
+
+	*offset = packet->dts;
+	*found = true;
+}
+
+static const char *mp4_output_name(void *unused)
+{
+	UNUSED_PARAMETER(unused);
+	return obs_module_text("MP4Output");
+}
+
+static void mp4_output_destory(void *data)
+{
+	struct mp4_output *out = data;
+
+	for (size_t i = 0; i < out->chapters.num; i++)
+		bfree(out->chapters.array[i].name);
+	da_free(out->chapters);
+
+	pthread_mutex_destroy(&out->mutex);
+	dstr_free(&out->path);
+	bfree(out);
+}
+
+static void mp4_add_chapter_proc(void *data, calldata_t *cd)
+{
+	struct mp4_output *out = data;
+	struct dstr name = {0};
+
+	dstr_copy(&name, calldata_string(cd, "chapter_name"));
+
+	if (name.len == 0) {
+		/* Generate name if none provided. */
+		dstr_catf(&name, "%s %zu",
+			  obs_module_text("MP4Output.UnnamedChapter"),
+			  out->chapters.num + 1);
+	}
+
+	int64_t totalRecordSeconds = out->last_dts_usec / 1000 / 1000;
+	int seconds = (int)totalRecordSeconds % 60;
+	int totalMinutes = (int)totalRecordSeconds / 60;
+	int minutes = totalMinutes % 60;
+	int hours = totalMinutes / 60;
+
+	info("Adding chapter \"%s\" at %02d:%02d:%02d", name.array, hours,
+	     minutes, seconds);
+
+	pthread_mutex_lock(&out->mutex);
+	struct chapter *chap = da_push_back_new(out->chapters);
+	chap->dts_usec = out->last_dts_usec;
+	chap->name = name.array;
+	pthread_mutex_unlock(&out->mutex);
+}
+
+static void split_file_proc(void *data, calldata_t *cd)
+{
+	struct mp4_output *out = data;
+
+	calldata_set_bool(cd, "split_file_enabled", out->split_file_enabled);
+	if (!out->split_file_enabled)
+		return;
+
+	os_atomic_set_bool(&out->manual_split, true);
+}
+
+static void *mp4_output_create(obs_data_t *settings, obs_output_t *output)
+{
+	struct mp4_output *out = bzalloc(sizeof(struct mp4_output));
+	out->output = output;
+	pthread_mutex_init(&out->mutex, NULL);
+
+	signal_handler_t *sh = obs_output_get_signal_handler(output);
+	signal_handler_add(sh, "void file_changed(string next_file)");
+
+	proc_handler_t *ph = obs_output_get_proc_handler(output);
+	proc_handler_add(ph, "void split_file(out bool split_file_enabled)",
+			 split_file_proc, out);
+	proc_handler_add(ph, "void add_chapter(string chapter_name)",
+			 mp4_add_chapter_proc, out);
+
+	UNUSED_PARAMETER(settings);
+	return out;
+}
+
+static inline void apply_flag(int *flags, const char *value, int flag_value)
+{
+	if (atoi(value))
+		*flags |= flag_value;
+	else
+		*flags &= ~flag_value;
+}
+
+static int parse_custom_options(const char *opts_str)
+{
+	int flags = MP4_USE_NEGATIVE_CTS;
+
+	struct obs_options opts = obs_parse_options(opts_str);
+
+	for (size_t i = 0; i < opts.count; i++) {
+		struct obs_option opt = opts.options[i];
+
+		if (strcmp(opt.name, "skip_soft_remux") == 0) {
+			apply_flag(&flags, opt.value, MP4_SKIP_FINALISATION);
+		} else if (strcmp(opt.name, "write_encoder_info") == 0) {
+			apply_flag(&flags, opt.value, MP4_WRITE_ENCODER_INFO);
+		} else if (strcmp(opt.name, "use_metadata_tags") == 0) {
+			apply_flag(&flags, opt.value, MP4_USE_MDTA_KEY_VALUE);
+		} else if (strcmp(opt.name, "use_negative_cts") == 0) {
+			apply_flag(&flags, opt.value, MP4_USE_NEGATIVE_CTS);
+		} else {
+			blog(LOG_WARNING, "Unknown muxer option: %s = %s",
+			     opt.name, opt.value);
+		}
+	}
+
+	obs_free_options(opts);
+
+	return flags;
+}
+
+static bool mp4_output_start(void *data)
+{
+	struct mp4_output *out = data;
+
+	if (!obs_output_can_begin_data_capture(out->output, 0))
+		return false;
+	if (!obs_output_initialize_encoders(out->output, 0))
+		return false;
+
+	os_atomic_set_bool(&out->stopping, false);
+
+	/* get path */
+	obs_data_t *settings = obs_output_get_settings(out->output);
+	const char *path = obs_data_get_string(settings, "path");
+	dstr_copy(&out->path, path);
+
+	out->max_time = obs_data_get_int(settings, "max_time_sec") * 1000000LL;
+	out->max_size = obs_data_get_int(settings, "max_size_mb") * 1024 * 1024;
+	out->split_file_enabled = obs_data_get_bool(settings, "split_file");
+	out->allow_overwrite = obs_data_get_bool(settings, "allow_overwrite");
+	out->cur_size = 0;
+
+	/* Allow skipping the remux step for debugging purposes. */
+	const char *muxer_settings =
+		obs_data_get_string(settings, "muxer_settings");
+	out->flags = parse_custom_options(muxer_settings);
+
+	obs_data_release(settings);
+
+	if (!buffered_file_serializer_init_defaults(&out->serializer,
+						    out->path.array)) {
+		warn("Unable to open MP4 file '%s'", out->path.array);
+		return false;
+	}
+
+	/* Initialise muxer and start capture */
+	out->muxer = mp4_mux_create(out->output, &out->serializer, out->flags);
+	os_atomic_set_bool(&out->active, true);
+	obs_output_begin_data_capture(out->output, 0);
+
+	info("Writing Hybrid MP4 file '%s'...", out->path.array);
+	return true;
+}
+
+static inline bool should_split(struct mp4_output *out,
+				struct encoder_packet *packet)
+{
+	/* split at video frame on primary track */
+	if (packet->type != OBS_ENCODER_VIDEO || packet->track_idx > 0)
+		return false;
+
+	/* don't split group of pictures */
+	if (!packet->keyframe)
+		return false;
+
+	if (os_atomic_load_bool(&out->manual_split))
+		return true;
+
+	/* reached maximum file size */
+	if (out->max_size > 0 &&
+	    out->cur_size + (int64_t)packet->size >= out->max_size)
+		return true;
+
+	/* reached maximum duration */
+	if (out->max_time > 0 &&
+	    packet->dts_usec - out->start_time >= out->max_time)
+		return true;
+
+	return false;
+}
+
+static void find_best_filename(struct dstr *path, bool space)
+{
+	int num = 2;
+
+	if (!os_file_exists(path->array))
+		return;
+
+	const char *ext = strrchr(path->array, '.');
+	if (!ext)
+		return;
+
+	size_t extstart = ext - path->array;
+	struct dstr testpath;
+	dstr_init_copy_dstr(&testpath, path);
+	for (;;) {
+		dstr_resize(&testpath, extstart);
+		dstr_catf(&testpath, space ? " (%d)" : "_%d", num++);
+		dstr_cat(&testpath, ext);
+
+		if (!os_file_exists(testpath.array)) {
+			dstr_free(path);
+			dstr_init_move(path, &testpath);
+			break;
+		}
+	}
+}
+
+static void generate_filename(struct mp4_output *out, struct dstr *dst,
+			      bool overwrite)
+{
+	obs_data_t *settings = obs_output_get_settings(out->output);
+	const char *dir = obs_data_get_string(settings, "directory");
+	const char *fmt = obs_data_get_string(settings, "format");
+	const char *ext = obs_data_get_string(settings, "extension");
+	bool space = obs_data_get_bool(settings, "allow_spaces");
+
+	char *filename = os_generate_formatted_filename(ext, space, fmt);
+
+	dstr_copy(dst, dir);
+	dstr_replace(dst, "\\", "/");
+	if (dstr_end(dst) != '/')
+		dstr_cat_ch(dst, '/');
+	dstr_cat(dst, filename);
+
+	char *slash = strrchr(dst->array, '/');
+	if (slash) {
+		*slash = 0;
+		os_mkdirs(dst->array);
+		*slash = '/';
+	}
+
+	if (!overwrite)
+		find_best_filename(dst, space);
+
+	bfree(filename);
+	obs_data_release(settings);
+}
+
+static bool change_file(struct mp4_output *out, struct encoder_packet *pkt)
+{
+	uint64_t start_time = os_gettime_ns();
+
+	/* finalise file */
+	for (size_t i = 0; i < out->chapters.num; i++) {
+		struct chapter *chap = &out->chapters.array[i];
+		mp4_mux_add_chapter(out->muxer, chap->dts_usec, chap->name);
+	}
+
+	mp4_mux_finalise(out->muxer);
+
+	info("Waiting for file writer to finish...");
+
+	/* flush/close file and destroy old muxer */
+	buffered_file_serializer_free(&out->serializer);
+	mp4_mux_destroy(out->muxer);
+
+	for (size_t i = 0; i < out->chapters.num; i++)
+		bfree(out->chapters.array[i].name);
+
+	da_clear(out->chapters);
+
+	info("MP4 file split complete. Finalization took %" PRIu64 " ms.",
+	     (os_gettime_ns() - start_time) / 1000000);
+
+	/* open new file */
+	generate_filename(out, &out->path, out->allow_overwrite);
+	info("Changing output file to '%s'", out->path.array);
+
+	if (!buffered_file_serializer_init_defaults(&out->serializer,
+						    out->path.array)) {
+		warn("Unable to open MP4 file '%s'", out->path.array);
+		return false;
+	}
+
+	out->muxer = mp4_mux_create(out->output, &out->serializer, out->flags);
+
+	calldata_t cd = {0};
+	signal_handler_t *sh = obs_output_get_signal_handler(out->output);
+	calldata_set_string(&cd, "next_file", out->path.array);
+	signal_handler_signal(sh, "file_changed", &cd);
+	calldata_free(&cd);
+
+	out->cur_size = 0;
+	out->start_time = pkt->dts_usec;
+	ts_offset_clear(out);
+
+	return true;
+}
+
+static void mp4_output_stop(void *data, uint64_t ts)
+{
+	struct mp4_output *out = data;
+	out->stop_ts = ts / 1000;
+	os_atomic_set_bool(&out->stopping, true);
+}
+
+static void mp4_output_actual_stop(struct mp4_output *out, int code)
+{
+	os_atomic_set_bool(&out->active, false);
+
+	uint64_t start_time = os_gettime_ns();
+
+	for (size_t i = 0; i < out->chapters.num; i++) {
+		struct chapter *chap = &out->chapters.array[i];
+		mp4_mux_add_chapter(out->muxer, chap->dts_usec, chap->name);
+	}
+
+	mp4_mux_finalise(out->muxer);
+
+	if (code) {
+		obs_output_signal_stop(out->output, code);
+	} else {
+		obs_output_end_data_capture(out->output);
+	}
+
+	info("Waiting for file writer to finish...");
+
+	/* Flush/close output file and destroy muxer */
+	buffered_file_serializer_free(&out->serializer);
+	mp4_mux_destroy(out->muxer);
+	out->muxer = NULL;
+
+	/* Clear chapter data */
+	for (size_t i = 0; i < out->chapters.num; i++)
+		bfree(out->chapters.array[i].name);
+
+	da_clear(out->chapters);
+
+	info("MP4 file output complete. Finalization took %" PRIu64 " ms.",
+	     (os_gettime_ns() - start_time) / 1000000);
+}
+
+static void push_back_packet(struct mp4_output *out,
+			     struct encoder_packet *packet)
+{
+	struct encoder_packet pkt;
+	obs_encoder_packet_ref(&pkt, packet);
+	da_push_back(out->split_buffer, &pkt);
+}
+
+static inline bool submit_packet(struct mp4_output *out,
+				 struct encoder_packet *pkt)
+{
+	out->total_bytes += pkt->size;
+
+	if (!out->split_file_enabled)
+		return mp4_mux_submit_packet(out->muxer, pkt);
+
+	out->cur_size += pkt->size;
+
+	/* Apply DTS/PTS offset local packet copy */
+	struct encoder_packet modified = *pkt;
+
+	if (modified.type == OBS_ENCODER_VIDEO) {
+		modified.dts -= out->video_pts_offsets[modified.track_idx];
+		modified.pts -= out->video_pts_offsets[modified.track_idx];
+	} else {
+		modified.dts -= out->audio_dts_offsets[modified.track_idx];
+		modified.pts -= out->audio_dts_offsets[modified.track_idx];
+	}
+
+	return mp4_mux_submit_packet(out->muxer, &modified);
+}
+
+static void mp4_output_packet(void *data, struct encoder_packet *packet)
+{
+	struct mp4_output *out = data;
+
+	pthread_mutex_lock(&out->mutex);
+
+	if (!active(out))
+		goto unlock;
+
+	if (!packet) {
+		mp4_output_actual_stop(out, OBS_OUTPUT_ENCODE_ERROR);
+		goto unlock;
+	}
+
+	if (stopping(out)) {
+		if (packet->sys_dts_usec >= (int64_t)out->stop_ts) {
+			mp4_output_actual_stop(out, 0);
+			goto unlock;
+		}
+	}
+
+	if (out->split_file_enabled) {
+		if (out->split_buffer.num) {
+			int64_t pts_usec = packet_pts_usec(packet);
+			struct encoder_packet *first_pkt =
+				out->split_buffer.array;
+			int64_t first_pts_usec = packet_pts_usec(first_pkt);
+
+			if (pts_usec >= first_pts_usec) {
+				if (packet->type != OBS_ENCODER_AUDIO) {
+					push_back_packet(out, packet);
+					goto unlock;
+				}
+
+				if (!change_file(out, first_pkt)) {
+					mp4_output_actual_stop(
+						out, OBS_OUTPUT_ERROR);
+					goto unlock;
+				}
+				out->split_file_ready = true;
+			}
+		} else if (should_split(out, packet)) {
+			push_back_packet(out, packet);
+			goto unlock;
+		}
+	}
+
+	if (out->split_file_ready) {
+		for (size_t i = 0; i < out->split_buffer.num; i++) {
+			struct encoder_packet *pkt =
+				&out->split_buffer.array[i];
+			ts_offset_update(out, pkt);
+			submit_packet(out, pkt);
+			obs_encoder_packet_release(pkt);
+		}
+
+		da_free(out->split_buffer);
+		out->split_file_ready = false;
+		os_atomic_set_bool(&out->manual_split, false);
+	}
+
+	if (out->split_file_enabled)
+		ts_offset_update(out, packet);
+
+	/* Update PTS for chapter markers */
+	if (packet->type == OBS_ENCODER_VIDEO && packet->track_idx == 0)
+		out->last_dts_usec = packet->dts_usec - out->start_time;
+
+	submit_packet(out, packet);
+
+	if (serializer_get_pos(&out->serializer) == -1)
+		mp4_output_actual_stop(out, OBS_OUTPUT_ERROR);
+
+unlock:
+	pthread_mutex_unlock(&out->mutex);
+}
+
+static obs_properties_t *mp4_output_properties(void *unused)
+{
+	UNUSED_PARAMETER(unused);
+
+	obs_properties_t *props = obs_properties_create();
+
+	obs_properties_add_text(props, "path",
+				obs_module_text("MP4Output.FilePath"),
+				OBS_TEXT_DEFAULT);
+	obs_properties_add_text(props, "muxer_settings", "muxer_settings",
+				OBS_TEXT_DEFAULT);
+	return props;
+}
+
+uint64_t mp4_output_total_bytes(void *data)
+{
+	struct mp4_output *out = data;
+	return out->total_bytes;
+}
+
+struct obs_output_info mp4_output_info = {
+	.id = "mp4_output",
+	.flags = OBS_OUTPUT_AV | OBS_OUTPUT_ENCODED |
+		 OBS_OUTPUT_MULTI_TRACK_AV | OBS_OUTPUT_CAN_PAUSE,
+	.encoded_video_codecs = "h264;hevc;av1",
+	.encoded_audio_codecs = "aac",
+	.get_name = mp4_output_name,
+	.create = mp4_output_create,
+	.destroy = mp4_output_destory,
+	.start = mp4_output_start,
+	.stop = mp4_output_stop,
+	.encoded_packet = mp4_output_packet,
+	.get_properties = mp4_output_properties,
+	.get_total_bytes = mp4_output_total_bytes,
+};

+ 2 - 0
plugins/obs-outputs/obs-outputs.c

@@ -15,6 +15,7 @@ MODULE_EXPORT const char *obs_module_description(void)
 extern struct obs_output_info rtmp_output_info;
 extern struct obs_output_info null_output_info;
 extern struct obs_output_info flv_output_info;
+extern struct obs_output_info mp4_output_info;
 #if defined(FTL_FOUND)
 extern struct obs_output_info ftl_output_info;
 #endif
@@ -65,6 +66,7 @@ bool obs_module_load(void)
 	obs_register_output(&rtmp_output_info);
 	obs_register_output(&null_output_info);
 	obs_register_output(&flv_output_info);
+	obs_register_output(&mp4_output_info);
 #if defined(FTL_FOUND)
 	obs_register_output(&ftl_output_info);
 #endif