mp4-mux.c 81 KB


  1. /******************************************************************************
  2. Copyright (C) 2024 by Dennis Sädtler <[email protected]>
  3. This program is free software: you can redistribute it and/or modify
  4. it under the terms of the GNU General Public License as published by
  5. the Free Software Foundation, either version 2 of the License, or
  6. (at your option) any later version.
  7. This program is distributed in the hope that it will be useful,
  8. but WITHOUT ANY WARRANTY; without even the implied warranty of
  9. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  10. GNU General Public License for more details.
  11. You should have received a copy of the GNU General Public License
  12. along with this program. If not, see <http://www.gnu.org/licenses/>.
  13. ******************************************************************************/
  14. #include "mp4-mux-internal.h"
  15. #include "rtmp-hevc.h"
  16. #include "rtmp-av1.h"
  17. #include <obs-avc.h>
  18. #include <obs-hevc.h>
  19. #include <obs-module.h>
  20. #include <util/dstr.h>
  21. #include <util/platform.h>
  22. #include <util/array-serializer.h>
  23. #include <time.h>
  24. /*
  25. * (Mostly) compliant MP4 muxer for fun and profit.
  26. * Based on ISO/IEC 14496-12 and FFmpeg's libavformat/movenc.c ([L]GPL)
  27. *
  28. * Specification section numbers are noted where applicable.
  29. * Standard identifier is included if not referring to ISO/IEC 14496-12.
  30. */
  31. #define do_log(level, format, ...) \
  32. blog(level, "[%s muxer: '%s'] " format, mux->flavor == FLAVOR_MOV ? "mov" : "mp4", \
  33. obs_output_get_name(mux->output), ##__VA_ARGS__)
  34. #define warn(format, ...) do_log(LOG_WARNING, format, ##__VA_ARGS__)
  35. #define info(format, ...) do_log(LOG_INFO, format, ##__VA_ARGS__)
  36. /* Helper to overwrite placeholder size and return total size. */
  37. static inline size_t write_box_size(struct serializer *s, int64_t start)
  38. {
  39. int64_t end = serializer_get_pos(s);
  40. size_t size = end - start;
  41. serializer_seek(s, start, SERIALIZE_SEEK_START);
  42. s_wb32(s, (uint32_t)size);
  43. serializer_seek(s, end, SERIALIZE_SEEK_START);
  44. return size;
  45. }
  46. /// 4.2 Box header with size and char[4] name
  47. static inline void write_box(struct serializer *s, const size_t size, const char name[4])
  48. {
  49. if (size <= UINT32_MAX) {
  50. s_wb32(s, (uint32_t)size); // size
  51. s_write(s, name, 4); // boxtype
  52. } else {
  53. s_wb32(s, 1); // size
  54. s_write(s, name, 4); // boxtype
  55. s_wb64(s, size); // largesize
  56. }
  57. }
  58. /// 4.2 FullBox extended header with u8 version and u24 flags
  59. static inline void write_fullbox(struct serializer *s, const size_t size, const char name[4], uint8_t version,
  60. uint32_t flags)
  61. {
  62. write_box(s, size, name);
  63. s_w8(s, version);
  64. s_wb24(s, flags);
  65. }
  66. /// 4.3 File Type Box
  67. static size_t mp4_write_ftyp(struct mp4_mux *mux, bool fragmented)
  68. {
  69. struct serializer *s = mux->serializer;
  70. int64_t start = serializer_get_pos(s);
  71. write_box(s, 0, "ftyp");
  72. if (mux->flavor == FLAVOR_MOV) {
  73. /* For MOV, the brand is just "qt" followed by two spaces. */
  74. s_write(s, "qt ", 4); // major brand
  75. s_wb32(s, 0x20140200); // minor version (BCD YYYYMM00 per QTFF spec)
  76. s_write(s, "qt ", 4); // minor brand
  77. } else {
  78. const char *major_brand = "isom";
  79. /* Following FFmpeg's example, when using negative CTS the major brand
  80. * needs to be either iso4 or iso6 depending on whether the file is
  81. * currently fragmented. */
  82. if (mux->flags & MP4_USE_NEGATIVE_CTS)
  83. major_brand = fragmented ? "iso6" : "iso4";
  84. s_write(s, major_brand, 4); // major brand
  85. s_wb32(s, 0); // minor version
  86. s_write(s, major_brand, 4); // minor brands (first one matches major brand)
  87. /* Write isom base brand if it's not the major brand */
  88. if (strcmp(major_brand, "isom") != 0)
  89. s_write(s, "isom", 4);
  90. /* Avoid adding newer brand (iso6) unless necessary, use "obs1" brand
  91. * as a placeholder to maintain ftyp box size. */
  92. if (fragmented && strcmp(major_brand, "iso6") != 0)
  93. s_write(s, "iso6", 4);
  94. else
  95. s_write(s, "obs1", 4);
  96. s_write(s, "iso2", 4);
  97. /* Include H.264 brand if used */
  98. for (size_t i = 0; i < mux->tracks.num; i++) {
  99. struct mp4_track *track = &mux->tracks.array[i];
  100. if (track->type == TRACK_VIDEO) {
  101. if (track->codec == CODEC_H264)
  102. s_write(s, "avc1", 4);
  103. break;
  104. }
  105. }
  106. /* General MP4 brannd */
  107. s_write(s, "mp41", 4);
  108. }
  109. return write_box_size(s, start);
  110. }
  111. /// 8.1.2 Free Space Box
  112. static size_t mp4_write_free(struct mp4_mux *mux)
  113. {
  114. struct serializer *s = mux->serializer;
  115. /* Write a 16-byte free box, so it can be replaced with a 64-bit size
  116. * box header (u32 + char[4] + u64) */
  117. s_wb32(s, 16);
  118. s_write(s, mux->flavor == FLAVOR_MOV ? "wide" : "free", 4);
  119. s_wb64(s, 0);
  120. return 16;
  121. }
  122. /// 8.2.2 Movie Header Box
  123. static size_t mp4_write_mvhd(struct mp4_mux *mux)
  124. {
  125. struct serializer *s = mux->serializer;
  126. size_t start = serializer_get_pos(s);
  127. /* Use primary video track as the baseline for duration */
  128. uint64_t duration = 0;
  129. for (size_t i = 0; i < mux->tracks.num; i++) {
  130. struct mp4_track *track = &mux->tracks.array[i];
  131. if (track->type == TRACK_VIDEO) {
  132. duration = util_mul_div64(track->duration, 1000, track->timebase_den);
  133. break;
  134. }
  135. }
  136. bool extended_ts = duration > UINT32_MAX || mux->creation_time > UINT32_MAX;
  137. uint8_t version = extended_ts ? 1 : 0;
  138. write_fullbox(s, 0, "mvhd", version, 0);
  139. if (extended_ts) {
  140. s_wb64(s, mux->creation_time); // creation time
  141. s_wb64(s, mux->creation_time); // modification time
  142. s_wb32(s, 1000); // timescale
  143. s_wb64(s, duration); // duration (0 for fragmented)
  144. } else {
  145. s_wb32(s, (uint32_t)mux->creation_time); // creation time
  146. s_wb32(s, (uint32_t)mux->creation_time); // modification time
  147. s_wb32(s, 1000); // timescale
  148. s_wb32(s, (uint32_t)duration); // duration (0 for fragmented)
  149. }
  150. s_wb32(s, 0x00010000); // rate, 16.16 fixed float (1 << 16)
  151. s_wb16(s, 0x0100); // volume
  152. s_wb16(s, 0); // reserved
  153. s_wb32(s, 0); // reserved
  154. s_wb32(s, 0); // reserved
  155. // Matrix
  156. for (int i = 0; i < 9; i++)
  157. s_wb32(s, UNITY_MATRIX[i]);
  158. // pre_defined
  159. s_wb32(s, 0);
  160. s_wb32(s, 0);
  161. s_wb32(s, 0);
  162. s_wb32(s, 0);
  163. s_wb32(s, 0);
  164. s_wb32(s, 0);
  165. s_wb32(s, mux->track_ctr + 1); // next_track_ID
  166. return write_box_size(s, start);
  167. }
  168. /// 8.3.2 Track Header Box
  169. static size_t mp4_write_tkhd(struct mp4_mux *mux, struct mp4_track *track)
  170. {
  171. struct serializer *s = mux->serializer;
  172. size_t start = serializer_get_pos(s);
  173. uint64_t duration = util_mul_div64(track->duration, 1000, track->timebase_den);
  174. bool extended_ts = duration > UINT32_MAX || mux->creation_time > UINT32_MAX;
  175. uint8_t version = extended_ts ? 1 : 0;
  176. /* Flags are 0x1 (enabled) | 0x2 (in movie) */
  177. static const uint32_t flags = 0x1 | 0x2;
  178. write_fullbox(s, 0, "tkhd", version, flags);
  179. if (extended_ts) {
  180. s_wb64(s, mux->creation_time); // creation time
  181. s_wb64(s, mux->creation_time); // modification time
  182. s_wb32(s, track->track_id); // track_id
  183. s_wb32(s, 0); // reserved
  184. s_wb64(s, duration); // duration in movie timescale
  185. } else {
  186. s_wb32(s, (uint32_t)mux->creation_time); // creation time
  187. s_wb32(s, (uint32_t)mux->creation_time); // modification time
  188. s_wb32(s, track->track_id); // track_id
  189. s_wb32(s, 0); // reserved
  190. s_wb32(s, (uint32_t)duration); // duration in movie timescale
  191. }
  192. s_wb32(s, 0); // reserved
  193. s_wb32(s, 0); // reserved
  194. s_wb16(s, 0); // layer
  195. s_wb16(s, track->type == TRACK_AUDIO ? 1 : 0); // alternate group
  196. s_wb16(s, track->type == TRACK_AUDIO ? 0x100 : 0); // volume
  197. s_wb16(s, 0); // reserved
  198. // Matrix (predefined)
  199. for (int i = 0; i < 9; i++)
  200. s_wb32(s, UNITY_MATRIX[i]);
  201. if (track->type == TRACK_AUDIO) {
  202. s_wb32(s, 0); // width
  203. s_wb32(s, 0); // height
  204. } else {
  205. /* width/height are fixed point 16.16, so we just shift the
  206. * integer to the upper 16 bits */
  207. uint32_t width = obs_encoder_get_width(track->encoder);
  208. s_wb32(s, width << 16);
  209. uint32_t height = obs_encoder_get_height(track->encoder);
  210. s_wb32(s, height << 16);
  211. }
  212. return write_box_size(s, start);
  213. }
  214. /// 8.4.2 Media Header Box
  215. static size_t mp4_write_mdhd(struct mp4_mux *mux, struct mp4_track *track)
  216. {
  217. struct serializer *s = mux->serializer;
  218. size_t size = 32;
  219. uint8_t version = 0;
  220. uint64_t duration = track->duration;
  221. uint32_t timescale = track->timescale;
  222. if (track->type == TRACK_VIDEO) {
  223. /* Update to track timescale */
  224. duration = util_mul_div64(duration, track->timescale, track->timebase_den);
  225. }
  226. /* use 64-bit duration if necessary */
  227. if (duration > UINT32_MAX || mux->creation_time > UINT32_MAX) {
  228. if (mux->flavor == FLAVOR_MOV) {
  229. /* QTFF does not specify how to handle 32-bit overflow for duration/timestamps. */
  230. warn("Duration too large for MOV, this file may be unplayable in QuickTime!");
  231. }
  232. size = 44;
  233. version = 1;
  234. }
  235. write_fullbox(s, size, "mdhd", version, 0);
  236. if (version == 1) {
  237. s_wb64(s, mux->creation_time); // creation time
  238. s_wb64(s, mux->creation_time); // modification time
  239. s_wb32(s, timescale); // timescale
  240. s_wb64(s, (uint32_t)duration); // duration
  241. } else {
  242. s_wb32(s, (uint32_t)mux->creation_time); // creation time
  243. s_wb32(s, (uint32_t)mux->creation_time); // modification time
  244. s_wb32(s, timescale); // timescale
  245. s_wb32(s, (uint32_t)duration); // duration
  246. }
  247. s_wb16(s, mux->flavor == FLAVOR_MOV ? 32767 : 21956); // language (undefined)
  248. s_wb16(s, 0); // pre_defined
  249. return size;
  250. }
  251. /// 8.4.3 Handler Reference Box
  252. static size_t mp4_write_hdlr(struct mp4_mux *mux, struct mp4_track *track)
  253. {
  254. struct serializer *s = mux->serializer;
  255. int64_t start = serializer_get_pos(s);
  256. write_fullbox(s, 0, "hdlr", 0, 0);
  257. if (mux->flavor == FLAVOR_MOV)
  258. s_write(s, track ? "mhlr" : "dhlr", 4);
  259. else
  260. s_wb32(s, 0); // pre_defined
  261. // handler_type
  262. if (!track)
  263. s_write(s, "url ", 4);
  264. else if (track->type == TRACK_VIDEO)
  265. s_write(s, "vide", 4);
  266. else if (track->type == TRACK_CHAPTERS)
  267. s_write(s, "text", 4);
  268. else
  269. s_write(s, "soun", 4);
  270. s_wb32(s, 0); // reserved
  271. s_wb32(s, 0); // reserved
  272. s_wb32(s, 0); // reserved
  273. const char *handler_name;
  274. if (!track)
  275. handler_name = "OBS Data Handler";
  276. else if (track->type == TRACK_VIDEO)
  277. handler_name = "OBS Video Handler";
  278. else if (track->type == TRACK_CHAPTERS)
  279. handler_name = "OBS Chapter Handler";
  280. else
  281. handler_name = "OBS Audio Handler";
  282. // name (null-terminated for MP4, pascal string for MOV)
  283. size_t handler_len = strlen(handler_name);
  284. if (mux->flavor == FLAVOR_MOV) {
  285. s_w8(s, (uint8_t)handler_len);
  286. s_write(s, handler_name, handler_len);
  287. } else {
  288. s_write(s, handler_name, handler_len);
  289. s_w8(s, 0); // NULL terminator
  290. }
  291. return write_box_size(s, start);
  292. }
  293. /// 12.1.2 Video media header
  294. static size_t mp4_write_vmhd(struct mp4_mux *mux)
  295. {
  296. struct serializer *s = mux->serializer;
  297. /* Flags is always 1 */
  298. write_fullbox(s, 20, "vmhd", 0, 1);
  299. s_wb16(s, 0); // graphicsmode
  300. s_wb16(s, 0); // opcolor r
  301. s_wb16(s, 0); // opcolor g
  302. s_wb16(s, 0); // opcolor b
  303. return 16;
  304. }
  305. /// 12.2.2 Sound media header
  306. static size_t mp4_write_smhd(struct mp4_mux *mux)
  307. {
  308. struct serializer *s = mux->serializer;
  309. write_fullbox(s, 16, "smhd", 0, 0);
  310. s_wb16(s, 0); // balance
  311. s_wb16(s, 0); // reserved
  312. return 16;
  313. }
  314. /// (QTFF/Apple) Text media information atom
  315. static size_t mp4_write_qt_text(struct mp4_mux *mux)
  316. {
  317. struct serializer *s = mux->serializer;
  318. int64_t start = serializer_get_pos(s);
  319. write_box(s, 0, "text");
  320. /* Identity matrix, note that it's not fixed point 16.16 */
  321. s_wb16(s, 0x01);
  322. s_wb32(s, 0x00);
  323. s_wb32(s, 0x00);
  324. s_wb32(s, 0x00);
  325. s_wb32(s, 0x01);
  326. s_wb32(s, 0x00);
  327. s_wb32(s, 0x00);
  328. s_wb32(s, 0x00);
  329. s_wb32(s, 0x00004000);
  330. /* Seemingly undocumented */
  331. s_wb16(s, 0x0000);
  332. return write_box_size(s, start);
  333. }
  334. /// (QTFF/Apple) Base media info atom
  335. static size_t mp4_write_gmin(struct mp4_mux *mux)
  336. {
  337. struct serializer *s = mux->serializer;
  338. int64_t start = serializer_get_pos(s);
  339. write_fullbox(s, 0, "gmin", 0, 0);
  340. s_wb16(s, 0x40); // graphics mode
  341. s_wb16(s, 0x8000); // opColor r
  342. s_wb16(s, 0x8000); // opColor g
  343. s_wb16(s, 0x8000); // opColor b
  344. s_wb16(s, 0); // balance
  345. s_wb16(s, 0); // reserved
  346. return write_box_size(s, start);
  347. }
  348. /// (QTFF/Apple) Base media information header atom
  349. static size_t mp4_write_gmhd(struct mp4_mux *mux)
  350. {
  351. struct serializer *s = mux->serializer;
  352. int64_t start = serializer_get_pos(s);
  353. write_box(s, 0, "gmhd");
  354. // gmin
  355. mp4_write_gmin(mux);
  356. // text (QuickTime)
  357. mp4_write_qt_text(mux);
  358. return write_box_size(s, start);
  359. }
  360. /// ISO/IEC 14496-15 5.4.2.1 AVCConfigurationBox
  361. static size_t mp4_write_avcC(struct mp4_mux *mux, obs_encoder_t *enc)
  362. {
  363. struct serializer *s = mux->serializer;
  364. /* For AVC this is the parsed extra data. */
  365. uint8_t *header;
  366. size_t size;
  367. struct encoder_packet packet = {.type = OBS_ENCODER_VIDEO, .timebase_den = 1, .keyframe = true};
  368. if (!obs_encoder_get_extra_data(enc, &header, &size))
  369. return 0;
  370. packet.size = obs_parse_avc_header(&packet.data, header, size);
  371. size_t box_size = packet.size + 8;
  372. write_box(s, box_size, "avcC");
  373. s_write(s, packet.data, packet.size);
  374. bfree(packet.data);
  375. return box_size;
  376. }
  377. /// ISO/IEC 14496-15 8.4.1.1 HEVCConfigurationBox
  378. static size_t mp4_write_hvcC(struct mp4_mux *mux, obs_encoder_t *enc)
  379. {
  380. struct serializer *s = mux->serializer;
  381. /* For HEVC this is the parsed extra data. */
  382. uint8_t *header;
  383. size_t size;
  384. struct encoder_packet packet = {.type = OBS_ENCODER_VIDEO, .timebase_den = 1, .keyframe = true};
  385. if (!obs_encoder_get_extra_data(enc, &header, &size))
  386. return 0;
  387. packet.size = obs_parse_hevc_header(&packet.data, header, size);
  388. size_t box_size = packet.size + 8;
  389. write_box(s, box_size, "hvcC");
  390. s_write(s, packet.data, packet.size);
  391. bfree(packet.data);
  392. return box_size;
  393. }
  394. /// AV1 ISOBMFF 2.3. AV1 Codec Configuration Box
  395. static size_t mp4_write_av1C(struct mp4_mux *mux, obs_encoder_t *enc)
  396. {
  397. struct serializer *s = mux->serializer;
  398. /* For AV1 this is just the parsed extra data. */
  399. uint8_t *header;
  400. size_t size;
  401. struct encoder_packet packet = {.type = OBS_ENCODER_VIDEO, .timebase_den = 1, .keyframe = true};
  402. if (!obs_encoder_get_extra_data(enc, &header, &size))
  403. return 0;
  404. packet.size = obs_parse_av1_header(&packet.data, header, size);
  405. size_t box_size = packet.size + 8;
  406. write_box(s, box_size, "av1C");
  407. s_write(s, packet.data, packet.size);
  408. bfree(packet.data);
  409. return box_size;
  410. }
  411. /// 12.1.5 Colour information
  412. static size_t mp4_write_colr(struct mp4_mux *mux, obs_encoder_t *enc)
  413. {
  414. UNUSED_PARAMETER(enc);
  415. struct serializer *s = mux->serializer;
  416. write_box(s, 19, "colr");
  417. uint8_t full_range = 0;
  418. uint16_t pri, trc, spc;
  419. pri = trc = spc = 0;
  420. get_colour_information(enc, &pri, &trc, &spc, &full_range);
  421. s_write(s, "nclx", 4); // colour_type
  422. s_wb16(s, pri); // colour_primaries
  423. s_wb16(s, trc); // transfer_characteristics
  424. s_wb16(s, spc); // matrix_coefficiencts
  425. s_w8(s, full_range << 7); // full range flag + 7 reserved bits (0)
  426. return 19;
  427. }
  428. /// 12.1.4 Pixel Aspect Ratio
  429. static size_t mp4_write_pasp(struct mp4_mux *mux)
  430. {
  431. struct serializer *s = mux->serializer;
  432. write_box(s, 16, "pasp");
  433. s_wb32(s, 1); // hSpacing
  434. s_wb32(s, 1); // vSpacing
  435. return 16;
  436. }
  437. /// 12.1.3 Visual Sample Entry
  438. static inline void mp4_write_visual_sample_entry(struct mp4_mux *mux, obs_encoder_t *enc)
  439. {
  440. struct serializer *s = mux->serializer;
  441. // SampleEntry Box
  442. s_w8(s, 0); // reserved
  443. s_w8(s, 0);
  444. s_w8(s, 0);
  445. s_w8(s, 0);
  446. s_w8(s, 0);
  447. s_w8(s, 0);
  448. s_wb16(s, 1); // data_reference_index
  449. // VisualSampleEntry Box
  450. s_wb16(s, 0); // pre_defined
  451. s_wb16(s, 0); // reserved
  452. if (mux->flavor == FLAVOR_MOV) {
  453. s_write(s, "OBSS", 4); // vendor
  454. s_wb32(s, 0x200); // temporal quality (codecNormalQuality = 512)
  455. s_wb32(s, 0x200); // spatial quality (codecNormalQuality)
  456. } else {
  457. s_wb32(s, 0); // pre_defined
  458. s_wb32(s, 0); // pre_defined
  459. s_wb32(s, 0); // pre_defined
  460. }
  461. s_wb16(s, (uint16_t)obs_encoder_get_width(enc)); // width
  462. s_wb16(s, (uint16_t)obs_encoder_get_height(enc)); // height
  463. s_wb32(s, 0x00480000); // horizresolution (predefined)
  464. s_wb32(s, 0x00480000); // vertresolution (predefined)
  465. s_wb32(s, 0); // reserved
  466. s_wb16(s, 1); // frame_count
  467. /* Name is fixed 32-bytes and needs to be padded to that length.
  468. * First byte is the length, rest is a string sans NULL terminator. */
  469. char compressor_name[32] = {0};
  470. const char *enc_id = obs_encoder_get_id(enc);
  471. if (enc_id) {
  472. size_t len = strlen(enc_id);
  473. if (len > 31)
  474. len = 31;
  475. compressor_name[0] = (char)len;
  476. memcpy(compressor_name + 1, enc_id, len);
  477. }
  478. s_write(s, compressor_name, sizeof(compressor_name)); // compressorname
  479. s_wb16(s, 0x0018); // depth
  480. s_wb16(s, -1); // pre_defined
  481. }
  482. /// 12.1.6 Content light level
  483. static size_t mp4_write_clli(struct mp4_mux *mux, obs_encoder_t *enc)
  484. {
  485. struct serializer *s = mux->serializer;
  486. video_t *video = obs_encoder_video(enc);
  487. const struct video_output_info *info = video_output_get_info(video);
  488. /* Only write box for HDR video */
  489. if (info->colorspace != VIDEO_CS_2100_PQ && info->colorspace != VIDEO_CS_2100_HLG)
  490. return 0;
  491. write_box(s, 12, "clli");
  492. float nominal_peak = obs_get_video_hdr_nominal_peak_level();
  493. s_wb16(s, (uint16_t)nominal_peak); // max_content_light_level
  494. s_wb16(s, (uint16_t)nominal_peak); // max_pic_average_light_level
  495. return 12;
  496. }
  497. /// 12.1.7 Mastering display colour volume
  498. static size_t mp4_write_mdcv(struct mp4_mux *mux, obs_encoder_t *enc)
  499. {
  500. struct serializer *s = mux->serializer;
  501. video_t *video = obs_encoder_video(enc);
  502. const struct video_output_info *info = video_output_get_info(video);
  503. // Only write atom for HDR video
  504. if (info->colorspace != VIDEO_CS_2100_PQ && info->colorspace != VIDEO_CS_2100_HLG)
  505. return 0;
  506. write_box(s, 32, "mdcv");
  507. float nominal_peak = obs_get_video_hdr_nominal_peak_level();
  508. uint32_t max_lum = (uint32_t)nominal_peak * 10000;
  509. /* Note that these values are hardcoded everywhere in OBS, so these are
  510. * just the same as used in our other muxers/encoders. */
  511. // 3 x display_primaries (x, y) pairs
  512. s_wb16(s, 13250);
  513. s_wb16(s, 34500);
  514. s_wb16(s, 7500);
  515. s_wb16(s, 3000);
  516. s_wb16(s, 34000);
  517. s_wb16(s, 16000);
  518. s_wb16(s, 15635); // white_point_x
  519. s_wb16(s, 16450); // white_point_y
  520. s_wb32(s, max_lum); // max_display_mastering_luminance
  521. s_wb32(s, 0); // min_display_mastering_luminance
  522. return 32;
  523. }
  524. /// ISO/IEC 14496-15 5.4.2.1 AVCSampleEntry
  525. static size_t mp4_write_avc1(struct mp4_mux *mux, obs_encoder_t *enc)
  526. {
  527. struct serializer *s = mux->serializer;
  528. int64_t start = serializer_get_pos(s);
  529. write_box(s, 0, "avc1");
  530. mp4_write_visual_sample_entry(mux, enc);
  531. // avcC
  532. mp4_write_avcC(mux, enc);
  533. // colr
  534. mp4_write_colr(mux, enc);
  535. // pasp
  536. mp4_write_pasp(mux);
  537. return write_box_size(s, start);
  538. }
  539. /// ISO/IEC 14496-15 8.4.1.1 HEVCSampleEntry
  540. static size_t mp4_write_hvc1(struct mp4_mux *mux, obs_encoder_t *enc)
  541. {
  542. struct serializer *s = mux->serializer;
  543. int64_t start = serializer_get_pos(s);
  544. write_box(s, 0, "hvc1");
  545. mp4_write_visual_sample_entry(mux, enc);
  546. // avcC
  547. mp4_write_hvcC(mux, enc);
  548. // colr
  549. mp4_write_colr(mux, enc);
  550. // clli
  551. mp4_write_clli(mux, enc);
  552. // mdcv
  553. mp4_write_mdcv(mux, enc);
  554. // pasp
  555. mp4_write_pasp(mux);
  556. return write_box_size(s, start);
  557. }
  558. /// AV1 ISOBMFF 2.2. AV1 Sample Entry
  559. static size_t mp4_write_av01(struct mp4_mux *mux, obs_encoder_t *enc)
  560. {
  561. struct serializer *s = mux->serializer;
  562. int64_t start = serializer_get_pos(s);
  563. write_box(s, 0, "av01");
  564. mp4_write_visual_sample_entry(mux, enc);
  565. // avcC
  566. mp4_write_av1C(mux, enc);
  567. // colr
  568. mp4_write_colr(mux, enc);
  569. // clli
  570. mp4_write_clli(mux, enc);
  571. // mdcv
  572. mp4_write_mdcv(mux, enc);
  573. // pasp
  574. mp4_write_pasp(mux);
  575. return write_box_size(s, start);
  576. }
  577. /// (QTFF/Apple) Video Sample Description
  578. static size_t mp4_write_prores(struct mp4_mux *mux, obs_encoder_t *enc)
  579. {
  580. struct serializer *s = mux->serializer;
  581. int64_t start = serializer_get_pos(s);
  582. /* We get the tag as an int, but need it as a char[4] */
  583. union tag {
  584. char c[4];
  585. uint32_t i;
  586. } codec_tag;
  587. /* Codec tag varies for ProRes depending on configuration, so we need to get it from the encoder. */
  588. obs_data_t *settings = obs_encoder_get_settings(enc);
  589. codec_tag.i = (uint32_t)obs_data_get_int(settings, "codec_type");
  590. obs_data_release(settings);
  591. #if __BYTE_ORDER == __LITTLE_ENDIAN
  592. codec_tag.i = ((codec_tag.i >> 24) & 0x000000FF) | ((codec_tag.i << 8) & 0x00FF0000) |
  593. ((codec_tag.i >> 8) & 0x0000FF00) | ((codec_tag.i << 24) & 0xFF000000);
  594. #endif
  595. write_box(s, 0, codec_tag.c);
  596. mp4_write_visual_sample_entry(mux, enc);
  597. // colr
  598. mp4_write_colr(mux, enc);
  599. // clli
  600. mp4_write_clli(mux, enc);
  601. // mdcv
  602. mp4_write_mdcv(mux, enc);
  603. // pasp
  604. mp4_write_pasp(mux);
  605. return write_box_size(s, start);
  606. }
  607. static inline void put_descr(struct serializer *s, uint8_t tag, size_t size)
  608. {
  609. int i = 3;
  610. s_w8(s, tag);
  611. for (; i > 0; i--)
  612. s_w8(s, (uint8_t)((size >> (7 * i)) | 0x80));
  613. s_w8(s, size & 0x7F);
  614. }
  615. /// ISO/IEC 14496-14 5.6 ESDBox
  616. static size_t mp4_write_esds(struct mp4_mux *mux, struct mp4_track *track)
  617. {
  618. struct serializer *s = mux->serializer;
  619. int64_t start = serializer_get_pos(s);
  620. write_fullbox(s, 0, "esds", 0, 0);
  621. /* Encoder extradata will be used as DecoderSpecificInfo */
  622. uint8_t *extradata;
  623. size_t extradata_size;
  624. if (!obs_encoder_get_extra_data(track->encoder, &extradata, &extradata_size)) {
  625. extradata_size = 0;
  626. }
  627. /// ISO/IEC 14496-1
  628. // ES_Descriptor
  629. size_t decoder_specific_info_len = extradata_size ? extradata_size + 5 : 0;
  630. put_descr(s, 0x03, 3 + 5 + 13 + decoder_specific_info_len + 5 + 1);
  631. s_wb16(s, track->track_id);
  632. s_w8(s, 0x00); // flags
  633. // DecoderConfigDescriptor
  634. put_descr(s, 0x04, 13 + decoder_specific_info_len);
  635. s_w8(s, 0x40); // codec tag, 0x40 = AAC
  636. s_w8(s, 0x15); // stream type field (0x15 = audio stream)
  637. /* When writing the final MOOV this could theoretically be calculated
  638. * based on chunks, but it's not really all that important. */
  639. uint32_t bitrate = 0;
  640. obs_data_t *settings = obs_encoder_get_settings(track->encoder);
  641. if (settings) {
  642. int64_t enc_bitrate = obs_data_get_int(settings, "bitrate");
  643. if (enc_bitrate)
  644. bitrate = (uint32_t)(enc_bitrate * 1000);
  645. obs_data_release(settings);
  646. }
  647. s_wb24(s, 0); // bufferSizeDB (in bytes)
  648. s_wb32(s, bitrate); // maxbitrate
  649. s_wb32(s, bitrate); // avgBitrate
  650. // DecoderSpecificInfo
  651. if (extradata_size) {
  652. put_descr(s, 0x05, extradata_size);
  653. s_write(s, extradata, extradata_size);
  654. }
  655. // SLConfigDescriptor descriptor
  656. put_descr(s, 0x06, 1);
  657. s_w8(s, 0x02); // 0x2 = reserved for MP4, descriptor is empty
  658. return write_box_size(s, start);
  659. }
  660. /// 12.2.3 Audio Sample Entry
  661. static inline void mp4_write_audio_sample_entry(struct mp4_mux *mux, struct mp4_track *track, uint8_t version)
  662. {
  663. struct serializer *s = mux->serializer;
  664. bool is_mov = mux->flavor == FLAVOR_MOV;
  665. bool is_pcm = track->codec == CODEC_PCM_I16 || track->codec == CODEC_PCM_I24 || track->codec == CODEC_PCM_F32;
  666. // SampleEntry Box
  667. s_w8(s, 0); // reserved
  668. s_w8(s, 0);
  669. s_w8(s, 0);
  670. s_w8(s, 0);
  671. s_w8(s, 0);
  672. s_w8(s, 0);
  673. s_wb16(s, 1); // data_reference_index
  674. // AudioSampleEntry Box
  675. s_wb16(s, version); // entry_version
  676. s_wb16(s, 0); // reserved
  677. s_wb16(s, 0); // reserved
  678. s_wb16(s, 0); // reserved
  679. audio_t *audio = obs_encoder_audio(track->encoder);
  680. uint32_t channels = (uint32_t)audio_output_get_channels(audio);
  681. uint32_t sample_rate = track->timescale;
  682. bool alac = track->codec == CODEC_ALAC;
  683. /* MOV specific version: https://developer.apple.com/documentation/quicktime-file-format/sound_sample_description_version_2 */
  684. if (version == 2) {
  685. // We need to get the raw float bytes, union seems to be the easiest way to do that.
  686. union rate {
  687. uint64_t u;
  688. double f;
  689. } rate;
  690. rate.f = (double)sample_rate;
  691. s_wb16(s, 3); // always3
  692. s_wb16(s, 16); // always16
  693. s_wb16(s, 0xfffe); // alwaysMinus2
  694. s_wb16(s, 0); // always0
  695. s_wb32(s, 0x00010000); // always65536
  696. s_wb32(s, 72); // sizeOfStructOnly (start of containing box to constLPCMFramesPerAudioPacket)
  697. s_wb64(s, rate.u); // audioSampleRate
  698. s_wb32(s, channels); // numAudioChannels
  699. s_wb32(s, 0x7F000000); // always7F000000
  700. s_wb32(s, is_pcm ? track->sample_size / channels * 8 : 0); // constBitsPerChannel
  701. s_wb32(s, get_lpcm_flags(track->codec)); // formatSpecificFlags
  702. s_wb32(s, is_pcm ? track->sample_size : 0); // constBytesPerAudioPacket
  703. s_wb32(s, is_pcm ? 1 : 0); // constLPCMFramesPerAudioPacket
  704. } else {
  705. s_wb16(s, channels); // channelcount
  706. /* OBS FLAC is currently always 16-bit, ALAC always 24, this may change in the future and should be
  707. * handled differently then.
  708. * That being said those codecs are self-describing, so in most cases it shouldn't actually matter. */
  709. s_wb16(s, !is_mov && alac ? 24 : 16); // samplesize
  710. s_wb16(s, is_mov && !is_pcm ? -2 : 0); // pre_defined (compression ID in MOV)
  711. s_wb16(s, 0); // reserved
  712. /* The sample rate field is limited to 16-bits. Technically version 1 supports a "srat" box which
  713. * provides 32-bits, but this is not supported by most software (including FFmpeg and Chromium).
  714. * For encoded codecs (AAC etc.), the sample rate can be read from the encoded data itself.
  715. * For PCM FFmpeg will try to use the timescale as sample rate. */
  716. if (sample_rate > UINT16_MAX) {
  717. warn("Sample rate too high for MP4, file may not play back correctly.");
  718. sample_rate = 0;
  719. }
  720. s_wb32(s, sample_rate << 16); // samplerate
  721. /* MOV-only data: https://developer.apple.com/documentation/quicktime-file-format/sound_sample_description_version_1 */
  722. if (is_mov && version == 1) {
  723. size_t frame_size = obs_encoder_get_frame_size(track->encoder);
  724. s_wb32(s, is_pcm ? 1 : (uint32_t)frame_size); // frame size
  725. s_wb32(s, is_pcm ? track->sample_size / channels : 0); // bytes per packet
  726. s_wb32(s, is_pcm ? track->sample_size : 0); // bytes per frame
  727. s_wb32(s, 2); // bytes per sample, 2 for anything but 8-bit
  728. }
  729. }
  730. }
  731. /// 12.2.4 Channel layout
  732. static size_t mp4_write_chnl(struct mp4_mux *mux, struct mp4_track *track)
  733. {
  734. struct serializer *s = mux->serializer;
  735. int64_t start = serializer_get_pos(s);
  736. write_fullbox(s, 0, "chnl", 0, 0);
  737. audio_t *audio = obs_encoder_audio(track->encoder);
  738. const struct audio_output_info *info = audio_output_get_info(audio);
  739. s_w8(s, 1); // stream_structure (1 = channels)
  740. /* 5.1 and 4.1 do not have a corresponding ISO layout, so we have to
  741. * write a manually created channel map for those. */
  742. uint8_t map[8] = {0};
  743. uint8_t items = 0;
  744. uint8_t defined_layout = 0;
  745. get_speaker_positions(info->speakers, map, &items, &defined_layout);
  746. if (!defined_layout) {
  747. warn("No ISO layout available for speaker layout %d, "
  748. "this may not be supported by all applications!",
  749. info->speakers);
  750. s_w8(s, 0); // definedLayout
  751. s_write(s, map, items); // uint8_t speaker_position[count]
  752. } else {
  753. s_w8(s, defined_layout); // definedLayout
  754. s_wb64(s, 0); // ommitedChannelMap
  755. }
  756. return write_box_size(s, start);
  757. }
  758. /// ISO/IEC 14496-14 5.6 MP4AudioSampleEntry
  759. static size_t mp4_write_mp4a(struct mp4_mux *mux, struct mp4_track *track, uint8_t version)
  760. {
  761. struct serializer *s = mux->serializer;
  762. int64_t start = serializer_get_pos(s);
  763. write_box(s, 0, "mp4a");
  764. mp4_write_audio_sample_entry(mux, track, version);
  765. // esds
  766. mp4_write_esds(mux, track);
  767. /* Write channel layout for version 1 sample entires */
  768. if (version == 1)
  769. mp4_write_chnl(mux, track);
  770. return write_box_size(s, start);
  771. }
  772. /// Encapsulation of FLAC in ISO Base Media File Format 3.3.2 FLAC Specific Box
  773. static size_t mp4_write_dfLa(struct mp4_mux *mux, struct mp4_track *track)
  774. {
  775. struct serializer *s = mux->serializer;
  776. int64_t start = serializer_get_pos(s);
  777. uint8_t *extradata;
  778. size_t extradata_size;
  779. if (!obs_encoder_get_extra_data(track->encoder, &extradata, &extradata_size))
  780. return 0;
  781. write_fullbox(s, 0, "dfLa", 0, 0);
  782. /// FLACMetadataBlock
  783. // LastMetadataBlockFlag (1) | BlockType (0)
  784. s_w8(s, 1 << 7 | 0);
  785. // Length
  786. s_wb24(s, (uint32_t)extradata_size);
  787. // BlockData[Length]
  788. s_write(s, extradata, extradata_size);
  789. return write_box_size(s, start);
  790. }
  791. /// Encapsulation of FLAC in ISO Base Media File Format 3.3.1 FLACSampleEntry
  792. static size_t mp4_write_fLaC(struct mp4_mux *mux, struct mp4_track *track, uint8_t version)
  793. {
  794. struct serializer *s = mux->serializer;
  795. int64_t start = serializer_get_pos(s);
  796. write_box(s, 0, "fLaC");
  797. mp4_write_audio_sample_entry(mux, track, version);
  798. // dfLa
  799. mp4_write_dfLa(mux, track);
  800. if (version == 1)
  801. mp4_write_chnl(mux, track);
  802. return write_box_size(s, start);
  803. }
  804. /// Apple Lossless Format "Magic Cookie" Description - MP4/M4A File
  805. static size_t mp4_write_alac(struct mp4_mux *mux, struct mp4_track *track, uint8_t version)
  806. {
  807. struct serializer *s = mux->serializer;
  808. int64_t start = serializer_get_pos(s);
  809. uint8_t *extradata;
  810. size_t extradata_size;
  811. if (!obs_encoder_get_extra_data(track->encoder, &extradata, &extradata_size))
  812. return 0;
  813. write_box(s, 0, "alac");
  814. mp4_write_audio_sample_entry(mux, track, version);
  815. /* Apple Lossless Magic Cookie */
  816. s_write(s, extradata, extradata_size);
  817. if (version == 1)
  818. mp4_write_chnl(mux, track);
  819. return write_box_size(s, start);
  820. }
  821. /// ISO/IEC 23003-5 5.1 PCM configuration
  822. static size_t mp4_write_pcmc(struct mp4_mux *mux, struct mp4_track *track)
  823. {
  824. struct serializer *s = mux->serializer;
  825. int64_t start = serializer_get_pos(s);
  826. write_fullbox(s, 0, "pcmC", 0, 0);
  827. s_w8(s, 1); // endianness, 1 = little endian
  828. // bits per sample
  829. if (track->codec == CODEC_PCM_I16)
  830. s_w8(s, 16);
  831. else if (track->codec == CODEC_PCM_I24)
  832. s_w8(s, 24);
  833. else if (track->codec == CODEC_PCM_F32)
  834. s_w8(s, 32);
  835. return write_box_size(s, start);
  836. }
  837. /// ISO/IEC 23003-5 5.1 PCM configuration
  838. static size_t mp4_write_xpcm(struct mp4_mux *mux, struct mp4_track *track, uint8_t version)
  839. {
  840. struct serializer *s = mux->serializer;
  841. int64_t start = serializer_get_pos(s);
  842. /* Different box types for floating point and integer PCM*/
  843. write_box(s, 0, track->codec == CODEC_PCM_F32 ? "fpcm" : "ipcm");
  844. mp4_write_audio_sample_entry(mux, track, version);
  845. /* ChannelLayout (chnl) is required for PCM */
  846. mp4_write_chnl(mux, track);
  847. // pcmc
  848. mp4_write_pcmc(mux, track);
  849. return write_box_size(s, start);
  850. }
  851. /// (QTFF/Apple) Text sample description
  852. static size_t mp4_write_text(struct mp4_mux *mux)
  853. {
  854. struct serializer *s = mux->serializer;
  855. int64_t start = serializer_get_pos(s);
  856. write_fullbox(s, 0, "text", 0, 0);
  857. s_wb32(s, 1); // number of entries
  858. /* Preset sample description as used by FFmpeg. */
  859. s_write(s, &TEXT_STUB_HEADER, sizeof(TEXT_STUB_HEADER));
  860. return write_box_size(s, start);
  861. }
  862. static inline uint32_t rl32(const uint8_t *ptr)
  863. {
  864. return (ptr[3] << 24) + (ptr[2] << 16) + (ptr[1] << 8) + ptr[0];
  865. }
  866. static inline uint16_t rl16(const uint8_t *ptr)
  867. {
  868. return (ptr[1] << 8) + ptr[0];
  869. }
  870. /// Encapsulation of Opus in ISO Base Media File Format 4.3.2 Opus Specific Box
  871. static size_t mp4_write_dOps(struct mp4_mux *mux, struct mp4_track *track)
  872. {
  873. struct serializer *s = mux->serializer;
  874. int64_t start = serializer_get_pos(s);
  875. uint8_t *extradata;
  876. size_t extradata_size;
  877. if (!obs_encoder_get_extra_data(track->encoder, &extradata, &extradata_size))
  878. return 0;
  879. write_box(s, 0, "dOps");
  880. s_w8(s, 0); // version
  881. uint8_t channels = *(extradata + 9);
  882. uint8_t channel_map = *(extradata + 18);
  883. s_w8(s, channels); // channel count
  884. // OpusHead is little-endian, but MP4 is big-endian, so we have to swap them here
  885. s_wb16(s, rl16(extradata + 10)); // pre-skip
  886. s_wb32(s, rl32(extradata + 12)); // input sample rate
  887. s_wb16(s, rl16(extradata + 16)); // output gain
  888. s_w8(s, channel_map); // channel mapping family
  889. if (channel_map)
  890. s_write(s, extradata + 19, 2 + channels);
  891. return write_box_size(s, start);
  892. }
  893. /// Encapsulation of Opus in ISO Base Media File Format 4.3.1 Sample entry format
  894. static size_t mp4_write_Opus(struct mp4_mux *mux, struct mp4_track *track, uint8_t version)
  895. {
  896. struct serializer *s = mux->serializer;
  897. int64_t start = serializer_get_pos(s);
  898. write_box(s, 0, "Opus");
  899. mp4_write_audio_sample_entry(mux, track, version);
  900. // dOps
  901. mp4_write_dOps(mux, track);
  902. if (version == 1)
  903. mp4_write_chnl(mux, track);
  904. return write_box_size(s, start);
  905. }
  906. /// (QTFF/Apple) siDecompressionParam Atom ('wave')
  907. static size_t mp4_write_wave(struct mp4_mux *mux, struct mp4_track *track, const char tag[4])
  908. {
  909. struct serializer *s = mux->serializer;
  910. int64_t start = serializer_get_pos(s);
  911. write_box(s, 0, "wave");
  912. /* frma atom containing codec tag (again) */
  913. s_wb32(s, 12);
  914. s_write(s, "frma", 4);
  915. s_write(s, tag, 4);
  916. if (track->codec == CODEC_AAC) {
  917. mp4_write_esds(mux, track);
  918. } else if (track->codec == CODEC_ALAC) {
  919. uint8_t *extradata;
  920. size_t extradata_size;
  921. if (obs_encoder_get_extra_data(track->encoder, &extradata, &extradata_size)) {
  922. /* Apple Lossless Magic Cookie */
  923. s_write(s, extradata, extradata_size);
  924. }
  925. }
  926. /* Terminator atom */
  927. s_wb32(s, 8); // size
  928. s_wb32(s, 0); // NULL name
  929. return write_box_size(s, start);
  930. }
  931. /// (QTFF/Apple) Audio Channel Layout Atom (‘chan’)
  932. static size_t mp4_write_chan(struct mp4_mux *mux, struct mp4_track *track)
  933. {
  934. struct serializer *s = mux->serializer;
  935. int64_t start = serializer_get_pos(s);
  936. audio_t *audio = obs_encoder_audio(track->encoder);
  937. const struct audio_output_info *info = audio_output_get_info(audio);
  938. uint32_t layout = get_mov_channel_layout(track->codec, info->speakers);
  939. uint32_t bitmap = layout == kAudioChannelLayoutTag_UseChannelBitmap ? get_mov_channel_bitmap(info->speakers)
  940. : 0;
  941. if (layout == kAudioChannelLayoutTag_UseChannelBitmap && !bitmap) {
  942. warn("No valid speaker layout found, not writing chan box. File may not play back correctly!");
  943. return 0;
  944. }
  945. write_fullbox(s, 0, "chan", 0, 0);
  946. /* AudioChannelLayout from CoreAudioTypes.h */
  947. s_wb32(s, layout); // mChannelLayoutTag
  948. s_wb32(s, bitmap); // mChannelBitmap
  949. s_wb32(s, 0); // mNumberChannelDescriptions
  950. return write_box_size(s, start);
  951. }
  952. /// (QTFF/Apple) Sound Sample Description (v1 and v2)
  953. static size_t mp4_write_mov_audio_tag(struct mp4_mux *mux, struct mp4_track *track)
  954. {
  955. struct serializer *s = mux->serializer;
  956. int64_t start = serializer_get_pos(s);
  957. const char *tag = NULL;
  958. audio_t *audio = obs_encoder_audio(track->encoder);
  959. uint32_t sample_rate = audio_output_get_sample_rate(audio);
  960. size_t channels = audio_output_get_channels(audio);
  961. /* More than 2 channels or samples rates above 65535 Hz requires v2 */
  962. uint8_t version = (channels > 2 || sample_rate > UINT16_MAX) ? 2 : 1;
  963. if (track->codec == CODEC_PCM_F32 || track->codec == CODEC_PCM_I16 || track->codec == CODEC_PCM_I24) {
  964. tag = "lpcm";
  965. version = 2; /* lpcm also requires v2 */
  966. } else if (track->codec == CODEC_AAC) {
  967. tag = "mp4a";
  968. } else if (track->codec == CODEC_ALAC) {
  969. tag = "alac";
  970. }
  971. /* Unsupported/Unknown codec */
  972. if (!tag)
  973. return 0;
  974. write_box(s, 0, tag);
  975. mp4_write_audio_sample_entry(mux, track, version);
  976. // wave
  977. if (version == 1)
  978. mp4_write_wave(mux, track, tag);
  979. // chan
  980. mp4_write_chan(mux, track);
  981. return write_box_size(s, start);
  982. }
  983. /// 8.5.2 Sample Description Box
  984. static size_t mp4_write_stsd(struct mp4_mux *mux, struct mp4_track *track)
  985. {
  986. struct serializer *s = mux->serializer;
  987. int64_t start = serializer_get_pos(s);
  988. /* Anything but mono or stereo technically requires v1,
  989. * but in practice that doesn't appear to matter. */
  990. uint8_t version = 0;
  991. if (track->type == TRACK_AUDIO && mux->flavor != FLAVOR_MOV) {
  992. audio_t *audio = obs_encoder_audio(track->encoder);
  993. version = audio_output_get_channels(audio) > 2 ? 1 : 0;
  994. }
  995. write_fullbox(s, 0, "stsd", version, 0);
  996. s_wb32(s, 1); // entry_count
  997. // codec specific boxes
  998. if (track->type == TRACK_VIDEO) {
  999. if (track->codec == CODEC_H264)
  1000. mp4_write_avc1(mux, track->encoder);
  1001. else if (track->codec == CODEC_HEVC)
  1002. mp4_write_hvc1(mux, track->encoder);
  1003. else if (track->codec == CODEC_AV1)
  1004. mp4_write_av01(mux, track->encoder);
  1005. else if (track->codec == CODEC_PRORES)
  1006. mp4_write_prores(mux, track->encoder);
  1007. } else if (track->type == TRACK_AUDIO) {
  1008. if (mux->flavor == FLAVOR_MOV) {
  1009. mp4_write_mov_audio_tag(mux, track);
  1010. } else {
  1011. if (track->codec == CODEC_AAC)
  1012. mp4_write_mp4a(mux, track, version);
  1013. else if (track->codec == CODEC_OPUS)
  1014. mp4_write_Opus(mux, track, version);
  1015. else if (track->codec == CODEC_FLAC)
  1016. mp4_write_fLaC(mux, track, version);
  1017. else if (track->codec == CODEC_ALAC)
  1018. mp4_write_alac(mux, track, version);
  1019. else if (track->codec == CODEC_PCM_I16 || track->codec == CODEC_PCM_I24 ||
  1020. track->codec == CODEC_PCM_F32)
  1021. mp4_write_xpcm(mux, track, version);
  1022. }
  1023. } else if (track->type == TRACK_CHAPTERS) {
  1024. mp4_write_text(mux);
  1025. }
  1026. return write_box_size(s, start);
  1027. }
  1028. /// 8.6.1.2 Decoding Time to Sample Box
  1029. static size_t mp4_write_stts(struct mp4_mux *mux, struct mp4_track *track, bool fragmented)
  1030. {
  1031. struct serializer *s = mux->serializer;
  1032. if (fragmented) {
  1033. write_fullbox(s, 16, "stts", 0, 0);
  1034. s_wb32(s, 0); // entry_count
  1035. return 16;
  1036. }
  1037. int64_t start = serializer_get_pos(s);
  1038. struct sample_delta *arr = track->deltas.array;
  1039. size_t num = track->deltas.num;
  1040. write_fullbox(s, 0, "stts", 0, 0);
  1041. s_wb32(s, (uint32_t)num); // entry_count
  1042. for (size_t idx = 0; idx < num; idx++) {
  1043. struct sample_delta *smp = &arr[idx];
  1044. uint64_t delta = util_mul_div64(smp->delta, track->timescale, track->timebase_den);
  1045. s_wb32(s, smp->count); // sample_count
  1046. s_wb32(s, (uint32_t)delta); // sample_delta
  1047. }
  1048. return write_box_size(s, start);
  1049. }
  1050. /// 8.6.2 Sync Sample Box
  1051. static size_t mp4_write_stss(struct mp4_mux *mux, struct mp4_track *track)
  1052. {
  1053. struct serializer *s = mux->serializer;
  1054. uint32_t num = (uint32_t)track->sync_samples.num;
  1055. if (!num)
  1056. return 0;
  1057. /* 16 byte FullBox header + 4-bytes (u32) per sync sample */
  1058. uint32_t size = 16 + 4 * num;
  1059. write_fullbox(s, size, "stss", 0, 0);
  1060. s_wb32(s, num); // entry_count
  1061. for (size_t idx = 0; idx < num; idx++)
  1062. s_wb32(s, track->sync_samples.array[idx]); // sample_number
  1063. return size;
  1064. }
  1065. /// 8.6.1.3 Composition Time to Sample Box
  1066. static size_t mp4_write_ctts(struct mp4_mux *mux, struct mp4_track *track)
  1067. {
  1068. struct serializer *s = mux->serializer;
  1069. uint32_t num = (uint32_t)track->offsets.num;
  1070. uint8_t version = mux->flags & MP4_USE_NEGATIVE_CTS ? 1 : 0;
  1071. /* 16 byte FullBox header + 8-bytes (u32+u32/i32) per offset entry */
  1072. uint32_t size = 16 + 8 * num;
  1073. write_fullbox(s, size, "ctts", version, 0);
  1074. s_wb32(s, num); // entry_count
  1075. for (size_t idx = 0; idx < num; idx++) {
  1076. int64_t offset = (int64_t)track->offsets.array[idx].offset * (int64_t)track->timescale /
  1077. (int64_t)track->timebase_den;
  1078. s_wb32(s, track->offsets.array[idx].count); // sample_count
  1079. s_wb32(s, (uint32_t)offset); // sample_offset
  1080. }
  1081. return size;
  1082. }
  1083. /// 8.7.4 Sample To Chunk Box
  1084. static size_t mp4_write_stsc(struct mp4_mux *mux, struct mp4_track *track, bool fragmented)
  1085. {
  1086. struct serializer *s = mux->serializer;
  1087. if (fragmented) {
  1088. write_fullbox(s, 16, "stsc", 0, 0);
  1089. s_wb32(s, 0); // entry_count
  1090. return 16;
  1091. }
  1092. struct chunk *arr = track->chunks.array;
  1093. size_t arr_num = track->chunks.num;
  1094. /* Compress into array with counter for repeating chunk sizes */
  1095. DARRAY(struct chunk_run {
  1096. uint32_t first;
  1097. uint32_t samples;
  1098. }) chunk_runs;
  1099. da_init(chunk_runs);
  1100. for (size_t idx = 0; idx < arr_num; idx++) {
  1101. struct chunk *chk = &arr[idx];
  1102. if (!chunk_runs.num || chunk_runs.array[chunk_runs.num - 1].samples != chk->samples) {
  1103. struct chunk_run *cr = da_push_back_new(chunk_runs);
  1104. cr->samples = chk->samples;
  1105. cr->first = (uint32_t)idx + 1; // ISO-BMFF is 1-indexed
  1106. }
  1107. }
  1108. uint32_t num = (uint32_t)chunk_runs.num;
  1109. /* 16 byte FullBox header + 12-bytes (u32+u32+u32) per chunk run */
  1110. uint32_t size = 16 + 12 * num;
  1111. write_fullbox(s, size, "stsc", 0, 0);
  1112. s_wb32(s, num); // entry_count
  1113. for (size_t idx = 0; idx < num; idx++) {
  1114. struct chunk_run *cr = &chunk_runs.array[idx];
  1115. s_wb32(s, cr->first); // first_chunk
  1116. s_wb32(s, cr->samples); // samples_per_chunk
  1117. s_wb32(s, 1); // sample_description_index
  1118. }
  1119. da_free(chunk_runs);
  1120. return size;
  1121. }
  1122. /// 8.7.3 Sample Size Boxes
  1123. static size_t mp4_write_stsz(struct mp4_mux *mux, struct mp4_track *track, bool fragmented)
  1124. {
  1125. struct serializer *s = mux->serializer;
  1126. if (fragmented) {
  1127. write_fullbox(s, 20, "stsz", 0, 0);
  1128. s_wb32(s, 0); // sample_size
  1129. s_wb32(s, 0); // sample_count
  1130. return 20;
  1131. }
  1132. int64_t start = serializer_get_pos(s);
  1133. /* This should only ever happen when recording > 24 hours of
  1134. * 48 kHz PCM audio or 828 days of 60 FPS video. */
  1135. if (track->samples > UINT32_MAX) {
  1136. warn("Track %u has too many samples, its duration may not be "
  1137. "read correctly. Remuxing the file to another format such "
  1138. "as MKV may be required.",
  1139. track->track_id);
  1140. }
  1141. write_fullbox(s, 0, "stsz", 0, 0);
  1142. if (track->sample_size) {
  1143. /* Fixed size samples mean we don't need an array */
  1144. s_wb32(s, track->sample_size); // sample_size
  1145. s_wb32(s, (uint32_t)track->samples); // sample_count
  1146. } else {
  1147. s_wb32(s, 0); // sample_size
  1148. s_wb32(s, (uint32_t)track->sample_sizes.num); // sample_count
  1149. for (size_t idx = 0; idx < track->sample_sizes.num; idx++) {
  1150. s_wb32(s, track->sample_sizes.array[idx]); // entry_size
  1151. }
  1152. }
  1153. return write_box_size(s, start);
  1154. }
  1155. /// 8.7.5 Chunk Offset Box
  1156. static size_t mp4_write_stco(struct mp4_mux *mux, struct mp4_track *track, bool fragmented)
  1157. {
  1158. struct serializer *s = mux->serializer;
  1159. if (fragmented) {
  1160. write_fullbox(s, 16, "stco", 0, 0);
  1161. s_wb32(s, 0); // entry_count
  1162. return 16;
  1163. }
  1164. struct chunk *arr = track->chunks.array;
  1165. uint32_t num = (uint32_t)track->chunks.num;
  1166. uint64_t last_off = arr[num - 1].offset;
  1167. uint32_t size;
  1168. bool co64 = last_off > UINT32_MAX;
  1169. /* When using 64-bit offsets we write 8-bytes (u64) per chunk,
  1170. * otherwise 4-bytes (u32). */
  1171. if (co64) {
  1172. size = 16 + 8 * num;
  1173. write_fullbox(s, size, "co64", 0, 0);
  1174. } else {
  1175. size = 16 + 4 * num;
  1176. write_fullbox(s, size, "stco", 0, 0);
  1177. }
  1178. s_wb32(s, num); // entry_count
  1179. for (size_t idx = 0; idx < num; idx++) {
  1180. if (co64)
  1181. s_wb64(s, arr[idx].offset); // chunk_offset
  1182. else
  1183. s_wb32(s, (uint32_t)arr[idx].offset); // chunk_offset
  1184. }
  1185. return size;
  1186. }
  1187. /// 8.9.3 Sample Group Description Box
  1188. static size_t mp4_write_sgpd_aac(struct mp4_mux *mux)
  1189. {
  1190. struct serializer *s = mux->serializer;
  1191. int64_t start = serializer_get_pos(s);
  1192. write_fullbox(s, 0, "sgpd", 1, 0);
  1193. s_write(s, "roll", 4); // grouping_tpye
  1194. s_wb32(s, 2); // default_length (i16)
  1195. s_wb32(s, 1); // entry_count
  1196. // AudioRollRecoveryEntry
  1197. s_wb16(s, -1); // roll_distance
  1198. return write_box_size(s, start);
  1199. }
  1200. /// 8.9.2 Sample to Group Box
  1201. static size_t mp4_write_sbgp_aac(struct mp4_mux *mux, struct mp4_track *track)
  1202. {
  1203. struct serializer *s = mux->serializer;
  1204. int64_t start = serializer_get_pos(s);
  1205. write_fullbox(s, 0, "sbgp", 0, 0);
  1206. /// 10.1 AudioRollRecoveryEntry
  1207. s_write(s, "roll", 4); // grouping_tpye
  1208. s_wb32(s, 1); // entry_count
  1209. s_wb32(s, (uint32_t)track->samples); // sample_count
  1210. s_wb32(s, 1); // group_description_index
  1211. return write_box_size(s, start);
  1212. }
  1213. static size_t mp4_write_sbgp_sbgp_opus(struct mp4_mux *mux, struct mp4_track *track)
  1214. {
  1215. struct serializer *s = mux->serializer;
  1216. int64_t start = serializer_get_pos(s);
  1217. /// 8.9.3 Sample Group Description Box
  1218. write_fullbox(s, 0, "sgpd", 1, 0);
  1219. s_write(s, "roll", 4); // grouping_tpye
  1220. s_wb32(s, 2); // default_length (i16)
  1221. /* Opus requires 80 ms of preroll, which at 48 kHz is 3840 PCM samples */
  1222. const int64_t opus_preroll = 3840;
  1223. /* Compute the preroll samples (should be 4, each being 20 ms) */
  1224. uint16_t preroll_count = 0;
  1225. int64_t preroll_remaining = opus_preroll;
  1226. for (size_t i = 0; i < track->deltas.num && preroll_remaining > 0; i++) {
  1227. for (uint32_t j = 0; j < track->deltas.array[i].count && preroll_remaining > 0; j++) {
  1228. preroll_remaining -= track->deltas.array[i].delta;
  1229. preroll_count++;
  1230. }
  1231. }
  1232. s_wb32(s, 1); // entry_count
  1233. /// 10.1 AudioRollRecoveryEntry
  1234. s_wb16(s, -preroll_count); // roll_distance
  1235. size_t size_sgpd = write_box_size(s, start);
  1236. /* --------------- */
  1237. /// 8.9.2 Sample to Group Box
  1238. start = serializer_get_pos(s);
  1239. write_fullbox(s, 0, "sbgp", 0, 0);
  1240. s_write(s, "roll", 4); // grouping_tpye
  1241. s_wb32(s, 2); // entry_count
  1242. // entry 0
  1243. s_wb32(s, preroll_count); // sample_count
  1244. s_wb32(s, 0); // group_description_index
  1245. // entry 1
  1246. s_wb32(s, (uint32_t)track->samples - preroll_count); // sample_count
  1247. s_wb32(s, 1); // group_description_index
  1248. return size_sgpd + write_box_size(s, start);
  1249. }
  1250. /// 8.5.1 Sample Table Box
  1251. static size_t mp4_write_stbl(struct mp4_mux *mux, struct mp4_track *track, bool fragmented)
  1252. {
  1253. struct serializer *s = mux->serializer;
  1254. int64_t start = serializer_get_pos(s);
  1255. write_box(s, 0, "stbl");
  1256. // stsd
  1257. mp4_write_stsd(mux, track);
  1258. // stts
  1259. mp4_write_stts(mux, track, fragmented);
  1260. // stss (non-fragmented/non-prores only)
  1261. if (track->type == TRACK_VIDEO && !fragmented && track->codec != CODEC_PRORES)
  1262. mp4_write_stss(mux, track);
  1263. // ctts (non-fragmented only)
  1264. if (track->needs_ctts && !fragmented)
  1265. mp4_write_ctts(mux, track);
  1266. // stsc
  1267. mp4_write_stsc(mux, track, fragmented);
  1268. // stsz
  1269. mp4_write_stsz(mux, track, fragmented);
  1270. // stco
  1271. mp4_write_stco(mux, track, fragmented);
  1272. if (!fragmented) {
  1273. /* AAC and Opus require a pre-roll to get correct decoder
  1274. * output, sgpd and sbgp are used to create a "roll" group. */
  1275. if (track->codec == CODEC_AAC) {
  1276. // sgpd
  1277. mp4_write_sgpd_aac(mux);
  1278. // sbgp
  1279. mp4_write_sbgp_aac(mux, track);
  1280. } else if (track->codec == CODEC_OPUS) {
  1281. // sgpd + sbgp
  1282. mp4_write_sbgp_sbgp_opus(mux, track);
  1283. }
  1284. }
  1285. return write_box_size(s, start);
  1286. }
  1287. /// 8.7.2.2 DataEntryUrlBox
  1288. static size_t mp4_write_url(struct mp4_mux *mux)
  1289. {
  1290. struct serializer *s = mux->serializer;
  1291. int64_t start = serializer_get_pos(s);
  1292. write_fullbox(s, 0, "url ", 0, 1);
  1293. /* empty, flag 1 means data is in this file */
  1294. return write_box_size(s, start);
  1295. }
  1296. /// 8.7.2 Data Reference Box
  1297. static size_t mp4_write_dref(struct mp4_mux *mux)
  1298. {
  1299. struct serializer *s = mux->serializer;
  1300. int64_t start = serializer_get_pos(s);
  1301. write_fullbox(s, 0, "dref ", 0, 0);
  1302. s_wb32(s, 1); // entry_count
  1303. mp4_write_url(mux);
  1304. return write_box_size(s, start);
  1305. }
  1306. /// 8.7.1 Data Information Box
  1307. static size_t mp4_write_dinf(struct mp4_mux *mux)
  1308. {
  1309. struct serializer *s = mux->serializer;
  1310. int64_t start = serializer_get_pos(s);
  1311. write_box(s, 0, "dinf");
  1312. mp4_write_dref(mux);
  1313. return write_box_size(s, start);
  1314. }
  1315. /// 8.4.4 Media Information Box
  1316. static size_t mp4_write_minf(struct mp4_mux *mux, struct mp4_track *track, bool fragmented)
  1317. {
  1318. struct serializer *s = mux->serializer;
  1319. int64_t start = serializer_get_pos(s);
  1320. write_box(s, 0, "minf");
  1321. // vmhd/smhd/gmhd
  1322. if (track->type == TRACK_VIDEO)
  1323. mp4_write_vmhd(mux);
  1324. else if (track->type == TRACK_CHAPTERS)
  1325. mp4_write_gmhd(mux);
  1326. else
  1327. mp4_write_smhd(mux);
  1328. // hdlr for dinf, required in MOV only
  1329. if (mux->flavor == FLAVOR_MOV)
  1330. mp4_write_hdlr(mux, NULL);
  1331. // dinf, unnecessary but mandatory
  1332. mp4_write_dinf(mux);
  1333. // stbl
  1334. mp4_write_stbl(mux, track, fragmented);
  1335. return write_box_size(s, start);
  1336. }
  1337. /// 8.4.1 Media Box
  1338. static size_t mp4_write_mdia(struct mp4_mux *mux, struct mp4_track *track, bool fragmented)
  1339. {
  1340. struct serializer *s = mux->serializer;
  1341. int64_t start = serializer_get_pos(s);
  1342. write_box(s, 0, "mdia");
  1343. // mdhd
  1344. mp4_write_mdhd(mux, track);
  1345. // hdlr
  1346. mp4_write_hdlr(mux, track);
  1347. // minf
  1348. mp4_write_minf(mux, track, fragmented);
  1349. return write_box_size(s, start);
  1350. }
  1351. /// (QTFF/Apple) User data atom
  1352. static size_t mp4_write_udta_atom(struct mp4_mux *mux, const char tag[4], const char *val)
  1353. {
  1354. struct serializer *s = mux->serializer;
  1355. int64_t start = serializer_get_pos(s);
  1356. write_box(s, 0, tag);
  1357. s_write(s, val, strlen(val));
  1358. return write_box_size(s, start);
  1359. }
  1360. /// 8.10.1 User Data Box
  1361. static size_t mp4_write_track_udta(struct mp4_mux *mux, struct mp4_track *track)
  1362. {
  1363. struct serializer *s = mux->serializer;
  1364. int64_t start = serializer_get_pos(s);
  1365. write_box(s, 0, "udta");
  1366. /* Our udta box contains QuickTime format user data atoms, which are
  1367. * simple key-value pairs. Some are prefixed with 0xa9. */
  1368. const char *name = obs_encoder_get_name(track->encoder);
  1369. if (name)
  1370. mp4_write_udta_atom(mux, "name", name);
  1371. if (mux->flags & MP4_WRITE_ENCODER_INFO) {
  1372. const char *id = obs_encoder_get_id(track->encoder);
  1373. if (name)
  1374. mp4_write_udta_atom(mux, "\251enc", id);
  1375. obs_data_t *settings = obs_encoder_get_settings(track->encoder);
  1376. if (settings) {
  1377. const char *json = obs_data_get_json_with_defaults(settings);
  1378. mp4_write_udta_atom(mux, "json", json);
  1379. obs_data_release(settings);
  1380. }
  1381. }
  1382. return write_box_size(s, start);
  1383. }
  1384. /// 8.6.6 Edit List Box
  1385. static size_t mp4_write_elst(struct mp4_mux *mux, struct mp4_track *track)
  1386. {
  1387. struct serializer *s = mux->serializer;
  1388. int64_t start = serializer_get_pos(s);
  1389. write_fullbox(s, 0, "elst", 0, 0);
  1390. s_wb32(s, 1); // entry count
  1391. uint64_t duration = util_mul_div64(track->duration, 1000, track->timebase_den);
  1392. uint64_t delay = 0;
  1393. if (track->type == TRACK_VIDEO && !(mux->flags & MP4_USE_NEGATIVE_CTS)) {
  1394. /* Compensate for frame-reordering delay (for example, when
  1395. * using b-frames). */
  1396. int64_t dts_offset = 0;
  1397. if (track->offsets.num) {
  1398. struct sample_offset sample = track->offsets.array[0];
  1399. dts_offset = sample.offset;
  1400. } else if (track->packets.size) {
  1401. /* If no offset data exists yet (i.e. when writing the
  1402. * incomplete moov in a fragmented file) use the raw
  1403. * data from the current queued packets instead. */
  1404. struct encoder_packet pkt;
  1405. deque_peek_front(&track->packets, &pkt, sizeof(pkt));
  1406. dts_offset = pkt.pts - pkt.dts;
  1407. }
  1408. delay = util_mul_div64(dts_offset, track->timescale, track->timebase_den);
  1409. } else if (track->type == TRACK_AUDIO && track->first_pts < 0) {
  1410. delay = util_mul_div64(llabs(track->first_pts), track->timescale, track->timebase_den);
  1411. /* Subtract priming delay from total duration */
  1412. duration -= util_mul_div64(delay, 1000, track->timescale);
  1413. }
  1414. s_wb32(s, (uint32_t)duration); // segment_duration (movie timescale)
  1415. s_wb32(s, (uint32_t)delay); // media_time (track timescale)
  1416. s_wb32(s, 1 << 16); // media_rate
  1417. return write_box_size(s, start);
  1418. }
  1419. /// 8.6.5 Edit Box
  1420. static size_t mp4_write_edts(struct mp4_mux *mux, struct mp4_track *track)
  1421. {
  1422. struct serializer *s = mux->serializer;
  1423. int64_t start = serializer_get_pos(s);
  1424. write_box(s, 0, "edts");
  1425. mp4_write_elst(mux, track);
  1426. return write_box_size(s, start);
  1427. }
  1428. /// 8.3.3.2 TrackReferenceTypeBox
  1429. static size_t mp4_write_chap(struct mp4_mux *mux)
  1430. {
  1431. struct serializer *s = mux->serializer;
  1432. int64_t start = serializer_get_pos(s);
  1433. /// QTFF/Apple chapter track reference
  1434. write_box(s, 0, "chap");
  1435. s_wb32(s, mux->chapter_track->track_id);
  1436. return write_box_size(s, start);
  1437. }
  1438. /// 8.3.3 Track Reference Box
  1439. static size_t mp4_write_tref(struct mp4_mux *mux)
  1440. {
  1441. struct serializer *s = mux->serializer;
  1442. int64_t start = serializer_get_pos(s);
  1443. write_box(s, 0, "tref");
  1444. mp4_write_chap(mux);
  1445. return write_box_size(s, start);
  1446. }
  1447. /// 8.3.1 Track Box
  1448. static size_t mp4_write_trak(struct mp4_mux *mux, struct mp4_track *track, bool fragmented)
  1449. {
  1450. struct serializer *s = mux->serializer;
  1451. int64_t start = serializer_get_pos(s);
  1452. /* If track has no data, omit it from full moov. */
  1453. if (!fragmented && !track->chunks.num)
  1454. return 0;
  1455. write_box(s, 0, "trak");
  1456. // tkhd
  1457. mp4_write_tkhd(mux, track);
  1458. // edts
  1459. mp4_write_edts(mux, track);
  1460. // tref
  1461. if (mux->chapter_track && track->type != TRACK_CHAPTERS)
  1462. mp4_write_tref(mux);
  1463. // mdia
  1464. mp4_write_mdia(mux, track, fragmented);
  1465. // udta (audio track name mainly)
  1466. mp4_write_track_udta(mux, track);
  1467. return write_box_size(s, start);
  1468. }
  1469. /// 8.8.3 Track Extends Box
  1470. static size_t mp4_write_trex(struct mp4_mux *mux, uint32_t track_id)
  1471. {
  1472. struct serializer *s = mux->serializer;
  1473. write_fullbox(s, 32, "trex", 0, 0);
  1474. s_wb32(s, track_id); // track_ID
  1475. s_wb32(s, 1); // default_sample_description_index
  1476. s_wb32(s, 0); // default_sample_duration
  1477. s_wb32(s, 0); // default_sample_size
  1478. s_wb32(s, 0); // default_sample_flags
  1479. return 32;
  1480. }
  1481. /// 8.8.1 Movie Extends Box
  1482. static size_t mp4_write_mvex(struct mp4_mux *mux)
  1483. {
  1484. struct serializer *s = mux->serializer;
  1485. int64_t start = serializer_get_pos(s);
  1486. write_box(s, 0, "mvex");
  1487. for (size_t track_id = 0; track_id < mux->tracks.num; track_id++)
  1488. mp4_write_trex(mux, (uint32_t)(track_id + 1));
  1489. return write_box_size(s, start);
  1490. }
  1491. /// (QTFF/Apple) Undocumented QuickTime/iTunes metadata handler
  1492. static size_t mp4_write_itunes_hdlr(struct mp4_mux *mux)
  1493. {
  1494. struct serializer *s = mux->serializer;
  1495. write_fullbox(s, 33, "hdlr", 0, 0);
  1496. s_wb32(s, 0); // pre_defined
  1497. s_write(s, "mdir", 4); // handler_type
  1498. // reserved
  1499. s_write(s, "appl", 4);
  1500. s_wb32(s, 0);
  1501. s_wb32(s, 0);
  1502. s_w8(s, 0); // name (NULL)
  1503. return 33;
  1504. }
  1505. /// (QTFF/Apple) Data atom
  1506. static size_t mp4_write_data_atom(struct mp4_mux *mux, const char *data)
  1507. {
  1508. struct serializer *s = mux->serializer;
  1509. size_t len = strlen(data);
  1510. uint32_t size = 16 + (uint32_t)len;
  1511. write_box(s, size, "data");
  1512. s_wb32(s, 1); // type, 1 = utf-8 string
  1513. s_wb32(s, 0); // locale, 0 = default
  1514. s_write(s, data, len);
  1515. return size;
  1516. }
  1517. /// (QTFF/Apple) String atom
  1518. static size_t mp4_write_string_data_atom(struct mp4_mux *mux, const char name[4], const char *data)
  1519. {
  1520. struct serializer *s = mux->serializer;
  1521. int64_t start = serializer_get_pos(s);
  1522. uint16_t len = (uint16_t)strlen(data);
  1523. write_box(s, 0, name);
  1524. s_wb16(s, len); // String length
  1525. s_write(s, "\x55\xC4", 2); // language code, just using undefined
  1526. s_write(s, data, len); // Note: No NULL terminator
  1527. return write_box_size(s, start);
  1528. }
  1529. /// (QTFF/Apple) Metadata item atom
  1530. static size_t mp4_write_ilst_item_atom(struct mp4_mux *mux, const char name[4], const char *value)
  1531. {
  1532. struct serializer *s = mux->serializer;
  1533. int64_t start = serializer_get_pos(s);
  1534. write_box(s, 0, name);
  1535. mp4_write_data_atom(mux, value);
  1536. return write_box_size(s, start);
  1537. }
  1538. /// (QTFF/Apple) Metadata item list atom
  1539. static size_t mp4_write_ilst(struct mp4_mux *mux)
  1540. {
  1541. struct serializer *s = mux->serializer;
  1542. struct dstr value = {0};
  1543. int64_t start = serializer_get_pos(s);
  1544. write_box(s, 0, "ilst");
  1545. /* Encoder name */
  1546. dstr_cat(&value, "OBS Studio (");
  1547. dstr_cat(&value, obs_get_version_string());
  1548. dstr_cat(&value, ")");
  1549. /* Some QuickTime keys are prefixed with 0xa9 */
  1550. mp4_write_ilst_item_atom(mux, "\251too", value.array);
  1551. dstr_free(&value);
  1552. return write_box_size(s, start);
  1553. }
  1554. /// (QTFF/Apple) Key value metadata handler
  1555. static size_t mp4_write_mdta_hdlr(struct mp4_mux *mux)
  1556. {
  1557. struct serializer *s = mux->serializer;
  1558. write_fullbox(s, 33, "hdlr", 0, 0);
  1559. s_wb32(s, 0); // pre_defined
  1560. s_write(s, "mdta", 4); // handler_type
  1561. // reserved
  1562. s_wb32(s, 0);
  1563. s_wb32(s, 0);
  1564. s_wb32(s, 0);
  1565. s_w8(s, 0); // name (NULL)
  1566. return 33;
  1567. }
  1568. /// (QTFF/Apple) Metadata item keys atom
  1569. static size_t mp4_write_mdta_keys(struct mp4_mux *mux, obs_data_t *meta)
  1570. {
  1571. struct serializer *s = mux->serializer;
  1572. int64_t start = serializer_get_pos(s);
  1573. write_fullbox(s, 0, "keys", 0, 0);
  1574. uint32_t count = 0;
  1575. int64_t count_pos = serializer_get_pos(s);
  1576. s_wb32(s, count); // count
  1577. obs_data_item_t *item = obs_data_first(meta);
  1578. for (; item != NULL; obs_data_item_next(&item)) {
  1579. const char *name = obs_data_item_get_name(item);
  1580. size_t len = strlen(name);
  1581. /* name is key type, can be udta or mdta */
  1582. write_box(s, len + 8, "mdta");
  1583. s_write(s, name, len); // key name
  1584. count++;
  1585. }
  1586. int64_t end = serializer_get_pos(s);
  1587. /* Overwrite count with correct value */
  1588. serializer_seek(s, count_pos, SERIALIZE_SEEK_START);
  1589. s_wb32(s, count);
  1590. serializer_seek(s, end, SERIALIZE_SEEK_START);
  1591. return write_box_size(s, start);
  1592. }
  1593. /// (QTFF/Apple) Metadata item atom, but name is an index instead
  1594. static inline void write_key_entry(struct mp4_mux *mux, obs_data_item_t *item, uint32_t idx)
  1595. {
  1596. struct serializer *s = mux->serializer;
  1597. int64_t start = serializer_get_pos(s);
  1598. s_wb32(s, 0); // size
  1599. s_wb32(s, idx); // index
  1600. mp4_write_data_atom(mux, obs_data_item_get_string(item));
  1601. write_box_size(s, start);
  1602. }
  1603. /// (QTFF/Apple) Metadata item list atom
  1604. static size_t mp4_write_mdta_ilst(struct mp4_mux *mux, obs_data_t *meta)
  1605. {
  1606. struct serializer *s = mux->serializer;
  1607. int64_t start = serializer_get_pos(s);
  1608. write_box(s, 0, "ilst");
  1609. /* indices start with 1 */
  1610. uint32_t key_idx = 1;
  1611. obs_data_item_t *item = obs_data_first(meta);
  1612. for (; item != NULL; obs_data_item_next(&item)) {
  1613. write_key_entry(mux, item, key_idx);
  1614. key_idx++;
  1615. }
  1616. return write_box_size(s, start);
  1617. }
  1618. static void mp4_write_mdta_kv(struct mp4_mux *mux)
  1619. {
  1620. struct dstr value = {0};
  1621. obs_data_t *meta = obs_data_create();
  1622. dstr_cat(&value, "OBS Studio (");
  1623. dstr_cat(&value, obs_get_version_string());
  1624. dstr_cat(&value, ")");
  1625. // ToDo figure out what else we could put in here for fun and profit :)
  1626. obs_data_set_string(meta, "tool", value.array);
  1627. /* Write keys */
  1628. mp4_write_mdta_keys(mux, meta);
  1629. /* Write values */
  1630. mp4_write_mdta_ilst(mux, meta);
  1631. obs_data_release(meta);
  1632. dstr_free(&value);
  1633. }
  1634. /// 8.11.1 The Meta box
  1635. static size_t mp4_write_meta(struct mp4_mux *mux)
  1636. {
  1637. struct serializer *s = mux->serializer;
  1638. int64_t start = serializer_get_pos(s);
  1639. write_fullbox(s, 0, "meta", 0, 0);
  1640. if (mux->flags & MP4_USE_MDTA_KEY_VALUE) {
  1641. mp4_write_mdta_hdlr(mux);
  1642. mp4_write_mdta_kv(mux);
  1643. } else {
  1644. mp4_write_itunes_hdlr(mux);
  1645. mp4_write_ilst(mux);
  1646. }
  1647. return write_box_size(s, start);
  1648. }
  1649. /// 8.10.1 User Data Box
  1650. static size_t mp4_write_udta(struct mp4_mux *mux)
  1651. {
  1652. struct serializer *s = mux->serializer;
  1653. int64_t start = serializer_get_pos(s);
  1654. write_box(s, 0, "udta");
  1655. /* Normally metadata would be directly in the moov, but since this is
  1656. * Apple/QTFF format metadata it is inside udta. */
  1657. if (mux->flavor == FLAVOR_MOV && !(mux->flags & MP4_USE_MDTA_KEY_VALUE)) {
  1658. // keys directly in udta atom
  1659. struct dstr value = {0};
  1660. /* Encoder name */
  1661. dstr_cat(&value, "OBS Studio (");
  1662. dstr_cat(&value, obs_get_version_string());
  1663. dstr_cat(&value, ")");
  1664. mp4_write_string_data_atom(mux, "\251swr", value.array);
  1665. dstr_free(&value);
  1666. } else {
  1667. // meta
  1668. mp4_write_meta(mux);
  1669. }
  1670. return write_box_size(s, start);
  1671. }
  1672. /// Movie Box (8.2.1)
  1673. static size_t mp4_write_moov(struct mp4_mux *mux, bool fragmented)
  1674. {
  1675. struct serializer *s = mux->serializer;
  1676. int64_t start = serializer_get_pos(s);
  1677. write_box(s, 0, "moov");
  1678. mp4_write_mvhd(mux);
  1679. // trak(s)
  1680. for (size_t i = 0; i < mux->tracks.num; i++) {
  1681. struct mp4_track *track = &mux->tracks.array[i];
  1682. mp4_write_trak(mux, track, fragmented);
  1683. }
  1684. if (!fragmented && mux->chapter_track)
  1685. mp4_write_trak(mux, mux->chapter_track, false);
  1686. // mvex
  1687. if (fragmented)
  1688. mp4_write_mvex(mux);
  1689. // udta (metadata)
  1690. mp4_write_udta(mux);
  1691. return write_box_size(s, start);
  1692. }
  1693. /* ========================================================================== */
  1694. /* moof (fragment header) stuff */
  1695. /// 8.8.5 Movie Fragment Header Box
  1696. static size_t mp4_write_mfhd(struct mp4_mux *mux)
  1697. {
  1698. struct serializer *s = mux->serializer;
  1699. write_fullbox(s, 16, "mfhd", 0, 0);
  1700. s_wb32(s, mux->fragments_written); // sequence_number
  1701. return 16;
  1702. }
  1703. /// 8.8.7 Track Fragment Header Box
  1704. static size_t mp4_write_tfhd(struct mp4_mux *mux, struct mp4_track *track, size_t moof_start)
  1705. {
  1706. struct serializer *s = mux->serializer;
  1707. int64_t start = serializer_get_pos(s);
  1708. uint32_t flags = BASE_DATA_OFFSET_PRESENT | DEFAULT_SAMPLE_FLAGS_PRESENT;
  1709. /* Add default size/duration if all samples match. */
  1710. bool durations_match = true;
  1711. bool sizes_match = true;
  1712. uint32_t duration;
  1713. uint32_t sample_size;
  1714. if (track->sample_size) {
  1715. duration = 1;
  1716. sample_size = track->sample_size;
  1717. } else {
  1718. duration = track->fragment_samples.array[0].duration;
  1719. sample_size = track->fragment_samples.array[0].size;
  1720. for (size_t idx = 1; idx < track->fragment_samples.num; idx++) {
  1721. uint32_t frag_duration = track->fragment_samples.array[idx].duration;
  1722. uint32_t frag_size = track->fragment_samples.array[idx].size;
  1723. durations_match = frag_duration == duration;
  1724. sizes_match = frag_size == sample_size;
  1725. }
  1726. }
  1727. if (durations_match)
  1728. flags |= DEFAULT_SAMPLE_DURATION_PRESENT;
  1729. if (sizes_match)
  1730. flags |= DEFAULT_SAMPLE_SIZE_PRESENT;
  1731. write_fullbox(s, 0, "tfhd", 0, flags);
  1732. s_wb32(s, track->track_id); // track_ID
  1733. s_wb64(s, moof_start); // base_data_offset
  1734. // default_sample_duration
  1735. if (durations_match) {
  1736. if (track->type == TRACK_VIDEO) {
  1737. /* Convert duration to track timescale */
  1738. duration = (uint32_t)util_mul_div64(duration, track->timescale, track->timebase_den);
  1739. }
  1740. s_wb32(s, duration);
  1741. }
  1742. // default_sample_size
  1743. if (sizes_match)
  1744. s_wb32(s, sample_size);
  1745. // default_sample_flags
  1746. if (track->type == TRACK_VIDEO) {
  1747. s_wb32(s, SAMPLE_FLAG_DEPENDS_YES | SAMPLE_FLAG_IS_NON_SYNC);
  1748. } else {
  1749. s_wb32(s, SAMPLE_FLAG_DEPENDS_NO);
  1750. }
  1751. return write_box_size(s, start);
  1752. }
  1753. /// 8.8.12 Track fragment decode time
  1754. static size_t mp4_write_tfdt(struct mp4_mux *mux, struct mp4_track *track)
  1755. {
  1756. struct serializer *s = mux->serializer;
  1757. write_fullbox(s, 20, "tfdt", 1, 0);
  1758. /* Subtract samples that are not written yet */
  1759. uint64_t duration_written = track->duration;
  1760. for (size_t i = 0; i < track->fragment_samples.num; i++)
  1761. duration_written -= track->fragment_samples.array[i].duration;
  1762. if (track->type == TRACK_VIDEO) {
  1763. /* Convert to track timescale */
  1764. duration_written = util_mul_div64(duration_written, track->timescale, track->timebase_den);
  1765. }
  1766. s_wb64(s, duration_written); // baseMediaDecodeTime
  1767. return 20;
  1768. }
  1769. /// 8.8.8 Track Fragment Run Box
  1770. static size_t mp4_write_trun(struct mp4_mux *mux, struct mp4_track *track, uint32_t moof_size,
  1771. uint64_t *samples_mdat_offset)
  1772. {
  1773. struct serializer *s = mux->serializer;
  1774. int64_t start = serializer_get_pos(s);
  1775. uint32_t flags = DATA_OFFSET_PRESENT;
  1776. if (!track->sample_size)
  1777. flags |= SAMPLE_SIZE_PRESENT;
  1778. if (track->type == TRACK_VIDEO) {
  1779. flags |= FIRST_SAMPLE_FLAGS_PRESENT;
  1780. flags |= SAMPLE_COMPOSITION_TIME_OFFSETS_PRESENT;
  1781. }
  1782. uint8_t version = mux->flags & MP4_USE_NEGATIVE_CTS ? 1 : 0;
  1783. write_fullbox(s, 0, "trun", version, flags);
  1784. /* moof_size + 8 bytes for mdat header + offset into mdat box data */
  1785. size_t data_offset = moof_size + 8 + *samples_mdat_offset;
  1786. size_t sample_count = track->fragment_samples.num;
  1787. if (track->sample_size) {
  1788. /* Update count based on fixed size */
  1789. size_t total_size = 0;
  1790. for (size_t i = 0; i < sample_count; i++)
  1791. total_size += track->fragment_samples.array[i].size;
  1792. *samples_mdat_offset += total_size;
  1793. sample_count = total_size / track->sample_size;
  1794. }
  1795. s_wb32(s, (uint32_t)sample_count); // sample_count
  1796. s_wb32(s, (uint32_t)data_offset); // data_offset
  1797. /* If we have a fixed sample size (PCM audio) we only need to write
  1798. * the sample count and offset. */
  1799. if (track->sample_size)
  1800. return write_box_size(s, start);
  1801. if (track->type == TRACK_VIDEO)
  1802. s_wb32(s, SAMPLE_FLAG_DEPENDS_NO); // first_sample_flags
  1803. for (size_t idx = 0; idx < sample_count; idx++) {
  1804. struct fragment_sample *smp = &track->fragment_samples.array[idx];
  1805. s_wb32(s, smp->size); // sample_size
  1806. if (track->type == TRACK_VIDEO) {
  1807. // sample_composition_time_offset
  1808. int64_t offset =
  1809. (int64_t)smp->offset * (int64_t)track->timescale / (int64_t)track->timebase_den;
  1810. s_wb32(s, (uint32_t)offset);
  1811. }
  1812. *samples_mdat_offset += smp->size;
  1813. }
  1814. return write_box_size(s, start);
  1815. }
  1816. /// 8.8.6 Track Fragment Box
  1817. static size_t mp4_write_traf(struct mp4_mux *mux, struct mp4_track *track, int64_t moof_start, uint32_t moof_size,
  1818. uint64_t *samples_mdat_offset)
  1819. {
  1820. struct serializer *s = mux->serializer;
  1821. int64_t start = serializer_get_pos(s);
  1822. write_box(s, 0, "traf");
  1823. // tfhd
  1824. mp4_write_tfhd(mux, track, moof_start);
  1825. // tfdt
  1826. mp4_write_tfdt(mux, track);
  1827. // trun
  1828. mp4_write_trun(mux, track, moof_size, samples_mdat_offset);
  1829. return write_box_size(s, start);
  1830. }
  1831. /// 8.8.4 Movie Fragment Box
  1832. static size_t mp4_write_moof(struct mp4_mux *mux, uint32_t moof_size, int64_t moof_start)
  1833. {
  1834. struct serializer *s = mux->serializer;
  1835. int64_t start = serializer_get_pos(s);
  1836. write_box(s, 0, "moof");
  1837. mp4_write_mfhd(mux);
  1838. /* Track current mdat offset across tracks */
  1839. uint64_t samples_mdat_offset = 0;
  1840. // traf boxes
  1841. for (size_t i = 0; i < mux->tracks.num; i++) {
  1842. struct mp4_track *track = &mux->tracks.array[i];
  1843. /* Skip tracks that do not have any samples */
  1844. if (!track->fragment_samples.num)
  1845. continue;
  1846. mp4_write_traf(mux, track, moof_start, moof_size, &samples_mdat_offset);
  1847. }
  1848. return write_box_size(s, start);
  1849. }
  1850. /* ========================================================================== */
  1851. /* Chapter packets */
  1852. static void mp4_create_chapter_pkt(struct encoder_packet *pkt, int64_t dts_usec, const char *name)
  1853. {
  1854. int64_t dts = dts_usec / 1000; // chapter track uses a ms timebase
  1855. pkt->pts = dts;
  1856. pkt->dts = dts;
  1857. pkt->dts_usec = dts_usec;
  1858. pkt->timebase_num = 1;
  1859. pkt->timebase_den = 1000;
  1860. /* Serialize with data with ref count */
  1861. struct serializer s;
  1862. struct array_output_data ao;
  1863. array_output_serializer_init(&s, &ao);
  1864. size_t len = min(strlen(name), UINT16_MAX);
  1865. long refs = 1;
  1866. /* encoder_packet refs */
  1867. s_write(&s, &refs, sizeof(refs));
  1868. /* actual packet data */
  1869. s_wb16(&s, (uint16_t)len);
  1870. s_write(&s, name, len);
  1871. s_write(&s, &CHAPTER_PKT_FOOTER, sizeof(CHAPTER_PKT_FOOTER));
  1872. pkt->data = (void *)(ao.bytes.array + sizeof(long));
  1873. pkt->size = ao.bytes.num - sizeof(long);
  1874. }
  1875. /* ========================================================================== */
  1876. /* Encoder packet processing and fragment writer */
  1877. static inline int64_t packet_pts_usec(struct encoder_packet *packet)
  1878. {
  1879. return packet->pts * 1000000 / packet->timebase_den;
  1880. }
  1881. static inline struct encoder_packet *get_pkt_at(struct deque *dq, size_t idx)
  1882. {
  1883. return deque_data(dq, idx * sizeof(struct encoder_packet));
  1884. }
  1885. static inline uint64_t get_longest_track_duration(struct mp4_mux *mux)
  1886. {
  1887. uint64_t dur = 0;
  1888. for (size_t i = 0; i < mux->tracks.num; i++) {
  1889. struct mp4_track *track = &mux->tracks.array[i];
  1890. uint64_t track_dur = util_mul_div64(track->duration, 1000, track->timebase_den);
  1891. if (track_dur > dur)
  1892. dur = track_dur;
  1893. }
  1894. return dur;
  1895. }
  1896. static void process_packets(struct mp4_mux *mux, struct mp4_track *track, uint64_t *mdat_size)
  1897. {
  1898. size_t count = track->packets.size / sizeof(struct encoder_packet);
  1899. if (!count)
  1900. return;
  1901. /* Only iterate upt to penultimate packet so we can determine duration
  1902. * for all processed packets. */
  1903. for (size_t i = 0; i < count - 1; i++) {
  1904. struct encoder_packet *pkt = get_pkt_at(&track->packets, i);
  1905. if (mux->next_frag_pts && packet_pts_usec(pkt) >= mux->next_frag_pts)
  1906. break;
  1907. struct encoder_packet *next = get_pkt_at(&track->packets, i + 1);
  1908. /* Duration is just distance between current and next DTS. */
  1909. uint32_t duration = (uint32_t)(next->dts - pkt->dts);
  1910. uint32_t sample_count = 1;
  1911. uint32_t size = (uint32_t)pkt->size;
  1912. int32_t offset = (int32_t)(pkt->pts - pkt->dts);
  1913. /* When using negative CTS, subtract DTS-PTS offset. */
  1914. if (track->type == TRACK_VIDEO && mux->flags & MP4_USE_NEGATIVE_CTS) {
  1915. if (!track->offsets.num)
  1916. track->dts_offset = offset;
  1917. offset -= track->dts_offset;
  1918. }
  1919. /* Create temporary sample information for moof */
  1920. struct fragment_sample *smp = da_push_back_new(track->fragment_samples);
  1921. smp->size = size;
  1922. smp->offset = offset;
  1923. smp->duration = duration;
  1924. *mdat_size += size;
  1925. /* Update global sample information for full moov */
  1926. track->duration += duration;
  1927. if (track->sample_size) {
  1928. /* Adjust duration/count for fixed sample size */
  1929. sample_count = size / track->sample_size;
  1930. duration = 1;
  1931. }
  1932. if (!track->samples)
  1933. track->first_pts = pkt->pts;
  1934. track->samples += sample_count;
  1935. /* If delta (duration) matche sprevious, increment counter,
  1936. * otherwise create a new entry. */
  1937. if (track->deltas.num == 0 || track->deltas.array[track->deltas.num - 1].delta != duration) {
  1938. struct sample_delta *new = da_push_back_new(track->deltas);
  1939. new->delta = duration;
  1940. new->count = sample_count;
  1941. } else {
  1942. track->deltas.array[track->deltas.num - 1].count += sample_count;
  1943. }
  1944. if (!track->sample_size)
  1945. da_push_back(track->sample_sizes, &size);
  1946. if (track->type != TRACK_VIDEO)
  1947. continue;
  1948. if (pkt->keyframe)
  1949. da_push_back(track->sync_samples, &track->samples);
  1950. /* Only require ctts box if offet is non-zero */
  1951. if (offset && !track->needs_ctts)
  1952. track->needs_ctts = true;
  1953. /* If dts-pts offset matche sprevious, increment counter,
  1954. * otherwise create a new entry. */
  1955. if (track->offsets.num == 0 || track->offsets.array[track->offsets.num - 1].offset != offset) {
  1956. struct sample_offset *new = da_push_back_new(track->offsets);
  1957. new->offset = offset;
  1958. new->count = 1;
  1959. } else {
  1960. track->offsets.array[track->offsets.num - 1].count += 1;
  1961. }
  1962. }
  1963. }
  1964. /* Write track data to file */
  1965. static void write_packets(struct mp4_mux *mux, struct mp4_track *track)
  1966. {
  1967. struct serializer *s = mux->serializer;
  1968. size_t count = track->packets.size / sizeof(struct encoder_packet);
  1969. if (!count || !track->fragment_samples.num)
  1970. return;
  1971. struct chunk *chk = da_push_back_new(track->chunks);
  1972. chk->offset = serializer_get_pos(s);
  1973. chk->samples = (uint32_t)track->fragment_samples.num;
  1974. for (size_t i = 0; i < track->fragment_samples.num; i++) {
  1975. struct encoder_packet pkt;
  1976. deque_pop_front(&track->packets, &pkt, sizeof(struct encoder_packet));
  1977. s_write(s, pkt.data, pkt.size);
  1978. obs_encoder_packet_release(&pkt);
  1979. }
  1980. chk->size = (uint32_t)(serializer_get_pos(s) - chk->offset);
  1981. /* Fixup sample count for fixed-size codecs */
  1982. if (track->sample_size)
  1983. chk->samples = chk->size / track->sample_size;
  1984. da_clear(track->fragment_samples);
  1985. }
  1986. static void mp4_flush_fragment(struct mp4_mux *mux)
  1987. {
  1988. struct serializer *s = mux->serializer;
  1989. // Write file header if not already done
  1990. if (!mux->fragments_written) {
  1991. mp4_write_ftyp(mux, true);
  1992. /* Placeholder to write mdat header during soft-remux */
  1993. mux->placeholder_offset = serializer_get_pos(s);
  1994. mp4_write_free(mux);
  1995. }
  1996. // Array output as temporary buffer to avoid sending seeks to disk
  1997. struct serializer as;
  1998. struct array_output_data aod;
  1999. array_output_serializer_init(&as, &aod);
  2000. mux->serializer = &as;
  2001. // Write initial incomplete moov (because fragmentation)
  2002. if (!mux->fragments_written) {
  2003. mp4_write_moov(mux, true);
  2004. s_write(s, aod.bytes.array, aod.bytes.num);
  2005. array_output_serializer_reset(&aod);
  2006. }
  2007. mux->fragments_written++;
  2008. /* --------------------------------------------------------- */
  2009. /* Analyse packets and create fragment moof. */
  2010. uint64_t mdat_size = 8;
  2011. for (size_t idx = 0; idx < mux->tracks.num; idx++) {
  2012. struct mp4_track *track = &mux->tracks.array[idx];
  2013. process_packets(mux, track, &mdat_size);
  2014. }
  2015. if (!mux->next_frag_pts && mux->chapter_track) {
  2016. // Create dummy chapter marker at the end so duration is correct
  2017. uint64_t duration = get_longest_track_duration(mux);
  2018. struct encoder_packet pkt;
  2019. mp4_create_chapter_pkt(&pkt, (int64_t)duration * 1000, "Dummy");
  2020. deque_push_back(&mux->chapter_track->packets, &pkt, sizeof(struct encoder_packet));
  2021. process_packets(mux, mux->chapter_track, &mdat_size);
  2022. }
  2023. // write moof once to get size
  2024. int64_t moof_start = serializer_get_pos(s);
  2025. size_t moof_size = mp4_write_moof(mux, 0, moof_start);
  2026. array_output_serializer_reset(&aod);
  2027. // write moof again with known size
  2028. mp4_write_moof(mux, (uint32_t)moof_size, moof_start);
  2029. // Write to output and restore real serializer
  2030. s_write(s, aod.bytes.array, aod.bytes.num);
  2031. mux->serializer = s;
  2032. array_output_serializer_free(&aod);
  2033. /* --------------------------------------------------------- */
  2034. /* Write audio and video samples (in chunks). Also update */
  2035. /* global chunk and sample information for final moov. */
  2036. if (mdat_size > UINT32_MAX) {
  2037. s_wb32(s, 1);
  2038. s_write(s, "mdat", 4);
  2039. s_wb64(s, mdat_size + 8);
  2040. } else {
  2041. s_wb32(s, (uint32_t)mdat_size);
  2042. s_write(s, "mdat", 4);
  2043. }
  2044. for (size_t i = 0; i < mux->tracks.num; i++) {
  2045. struct mp4_track *track = &mux->tracks.array[i];
  2046. write_packets(mux, track);
  2047. }
  2048. /* Only write chapter packets on final flush. */
  2049. if (!mux->next_frag_pts && mux->chapter_track)
  2050. write_packets(mux, mux->chapter_track);
  2051. mux->next_frag_pts = 0;
  2052. }
  2053. /* ========================================================================== */
  2054. /* Track object functions */
  2055. static inline void track_insert_packet(struct mp4_track *track, struct encoder_packet *pkt)
  2056. {
  2057. int64_t pts_usec = packet_pts_usec(pkt);
  2058. if (pts_usec > track->last_pts_usec)
  2059. track->last_pts_usec = pts_usec;
  2060. deque_push_back(&track->packets, pkt, sizeof(struct encoder_packet));
  2061. }
  2062. static inline uint32_t get_sample_size(struct mp4_track *track)
  2063. {
  2064. audio_t *audio = obs_encoder_audio(track->encoder);
  2065. if (!audio)
  2066. return 0;
  2067. const struct audio_output_info *info = audio_output_get_info(audio);
  2068. uint32_t channels = get_audio_channels(info->speakers);
  2069. switch (track->codec) {
  2070. case CODEC_PCM_F32:
  2071. return channels * 4; // 4 bytes per sample (32-bit)
  2072. case CODEC_PCM_I24:
  2073. return channels * 3; // 3 bytes per sample (24-bit)
  2074. case CODEC_PCM_I16:
  2075. return channels * 2; // 2 bytes per sample (16-bit)
  2076. default:
  2077. return 0;
  2078. }
  2079. }
  2080. static inline enum mp4_codec get_codec(obs_encoder_t *enc)
  2081. {
  2082. const char *codec = obs_encoder_get_codec(enc);
  2083. if (strcmp(codec, "h264") == 0)
  2084. return CODEC_H264;
  2085. if (strcmp(codec, "hevc") == 0)
  2086. return CODEC_HEVC;
  2087. if (strcmp(codec, "av1") == 0)
  2088. return CODEC_AV1;
  2089. if (strcmp(codec, "prores") == 0)
  2090. return CODEC_PRORES;
  2091. if (strcmp(codec, "aac") == 0)
  2092. return CODEC_AAC;
  2093. if (strcmp(codec, "opus") == 0)
  2094. return CODEC_OPUS;
  2095. if (strcmp(codec, "flac") == 0)
  2096. return CODEC_FLAC;
  2097. if (strcmp(codec, "alac") == 0)
  2098. return CODEC_ALAC;
  2099. if (strcmp(codec, "pcm_s16le") == 0)
  2100. return CODEC_PCM_I16;
  2101. if (strcmp(codec, "pcm_s24le") == 0)
  2102. return CODEC_PCM_I24;
  2103. if (strcmp(codec, "pcm_f32le") == 0)
  2104. return CODEC_PCM_F32;
  2105. return CODEC_UNKNOWN;
  2106. }
  2107. static inline void add_track(struct mp4_mux *mux, obs_encoder_t *enc)
  2108. {
  2109. struct mp4_track *track = da_push_back_new(mux->tracks);
  2110. track->type = obs_encoder_get_type(enc) == OBS_ENCODER_VIDEO ? TRACK_VIDEO : TRACK_AUDIO;
  2111. track->encoder = obs_encoder_get_ref(enc);
  2112. track->codec = get_codec(enc);
  2113. track->track_id = ++mux->track_ctr;
  2114. /* Set timebase/timescale */
  2115. if (track->type == TRACK_VIDEO) {
  2116. video_t *video = obs_encoder_video(enc);
  2117. const struct video_output_info *info = video_output_get_info(video);
  2118. track->timebase_num = info->fps_den;
  2119. track->timebase_den = info->fps_num;
  2120. track->timescale = track->timebase_den;
  2121. } else {
  2122. uint32_t sample_rate = obs_encoder_get_sample_rate(enc);
  2123. /* Opus is always 48 kHz */
  2124. if (track->codec == CODEC_OPUS)
  2125. sample_rate = 48000;
  2126. track->timebase_num = 1;
  2127. track->timebase_den = sample_rate;
  2128. track->timescale = sample_rate;
  2129. }
  2130. /* Set sample size (if fixed) */
  2131. if (track->type == TRACK_AUDIO)
  2132. track->sample_size = get_sample_size(track);
  2133. }
  2134. static inline void add_chapter_track(struct mp4_mux *mux)
  2135. {
  2136. mux->chapter_track = bzalloc(sizeof(struct mp4_track));
  2137. mux->chapter_track->type = TRACK_CHAPTERS;
  2138. mux->chapter_track->codec = CODEC_TEXT;
  2139. mux->chapter_track->timescale = 1000;
  2140. mux->chapter_track->timebase_num = 1;
  2141. mux->chapter_track->timebase_den = 1000;
  2142. mux->chapter_track->track_id = ++mux->track_ctr;
  2143. }
  2144. static inline void free_packets(struct deque *dq)
  2145. {
  2146. size_t num = dq->size / sizeof(struct encoder_packet);
  2147. for (size_t i = 0; i < num; i++) {
  2148. struct encoder_packet pkt;
  2149. deque_pop_front(dq, &pkt, sizeof(struct encoder_packet));
  2150. obs_encoder_packet_release(&pkt);
  2151. }
  2152. }
  2153. static inline void free_track(struct mp4_track *track)
  2154. {
  2155. if (!track)
  2156. return;
  2157. obs_encoder_release(track->encoder);
  2158. free_packets(&track->packets);
  2159. deque_free(&track->packets);
  2160. da_free(track->sample_sizes);
  2161. da_free(track->chunks);
  2162. da_free(track->deltas);
  2163. da_free(track->offsets);
  2164. da_free(track->sync_samples);
  2165. da_free(track->fragment_samples);
  2166. }
  2167. /* ===========================================================================*/
  2168. /* API */
  2169. struct mp4_mux *mp4_mux_create(obs_output_t *output, struct serializer *serializer, enum mp4_mux_flags flags,
  2170. enum mp4_flavor flavor)
  2171. {
  2172. struct mp4_mux *mux = bzalloc(sizeof(struct mp4_mux));
  2173. mux->output = output;
  2174. mux->serializer = serializer;
  2175. mux->flags = flags;
  2176. mux->flavor = flavor;
  2177. /* Timestamp is based on 1904 rather than 1970. */
  2178. mux->creation_time = time(NULL) + 0x7C25B080;
  2179. if (flavor == FLAVOR_MOV && mux->creation_time > UINT32_MAX) {
  2180. /* This will only happen in 2040 but better safe than sorry! */
  2181. warn("Creation time too large for MOV, setting to 0 (unset).");
  2182. mux->creation_time = 0;
  2183. }
  2184. for (size_t i = 0; i < MAX_OUTPUT_VIDEO_ENCODERS; i++) {
  2185. obs_encoder_t *enc = obs_output_get_video_encoder2(output, i);
  2186. if (!enc)
  2187. continue;
  2188. add_track(mux, enc);
  2189. }
  2190. for (size_t i = 0; i < MAX_OUTPUT_AUDIO_ENCODERS; i++) {
  2191. obs_encoder_t *enc = obs_output_get_audio_encoder(output, i);
  2192. if (!enc)
  2193. continue;
  2194. add_track(mux, enc);
  2195. }
  2196. return mux;
  2197. }
  2198. void mp4_mux_destroy(struct mp4_mux *mux)
  2199. {
  2200. for (size_t i = 0; i < mux->tracks.num; i++)
  2201. free_track(&mux->tracks.array[i]);
  2202. free_track(mux->chapter_track);
  2203. bfree(mux->chapter_track);
  2204. da_free(mux->tracks);
  2205. bfree(mux);
  2206. }
  2207. bool mp4_mux_submit_packet(struct mp4_mux *mux, struct encoder_packet *pkt)
  2208. {
  2209. struct mp4_track *track = NULL;
  2210. struct encoder_packet parsed_packet;
  2211. enum obs_encoder_type type = pkt->type;
  2212. bool fragment_ready = mux->next_frag_pts > 0;
  2213. for (size_t i = 0; i < mux->tracks.num; i++) {
  2214. struct mp4_track *tmp = &mux->tracks.array[i];
  2215. fragment_ready = fragment_ready && tmp->last_pts_usec >= mux->next_frag_pts;
  2216. if (tmp->encoder == pkt->encoder)
  2217. track = tmp;
  2218. }
  2219. if (!track) {
  2220. warn("Could not find track for packet of type %s with "
  2221. "track id %zu!",
  2222. type == OBS_ENCODER_VIDEO ? "video" : "audio", pkt->track_idx);
  2223. return false;
  2224. }
  2225. /* If all tracks have caught up to the keyframe we want to fragment on,
  2226. * flush the current fragment to disk. */
  2227. if (fragment_ready)
  2228. mp4_flush_fragment(mux);
  2229. if (type == OBS_ENCODER_AUDIO) {
  2230. obs_encoder_packet_ref(&parsed_packet, pkt);
  2231. } else {
  2232. if (track->codec == CODEC_H264)
  2233. obs_parse_avc_packet(&parsed_packet, pkt);
  2234. else if (track->codec == CODEC_HEVC)
  2235. obs_parse_hevc_packet(&parsed_packet, pkt);
  2236. else if (track->codec == CODEC_AV1)
  2237. obs_parse_av1_packet(&parsed_packet, pkt);
  2238. else if (track->codec == CODEC_PRORES)
  2239. obs_encoder_packet_ref(&parsed_packet, pkt);
  2240. /* Set fragmentation PTS if packet is keyframe and PTS > 0 */
  2241. if (parsed_packet.keyframe && parsed_packet.pts > 0) {
  2242. mux->next_frag_pts = packet_pts_usec(&parsed_packet);
  2243. }
  2244. }
  2245. track_insert_packet(track, &parsed_packet);
  2246. return true;
  2247. }
  2248. bool mp4_mux_add_chapter(struct mp4_mux *mux, int64_t dts_usec, const char *name)
  2249. {
  2250. if (dts_usec < 0)
  2251. return false;
  2252. if (!mux->chapter_track)
  2253. add_chapter_track(mux);
  2254. /* To work correctly there needs to be a chapter at PTS 0,
  2255. * create that here if necessary. */
  2256. if (dts_usec > 0 && mux->chapter_track->packets.size == 0) {
  2257. mp4_mux_add_chapter(mux, 0, obs_module_text("MP4Output.StartChapter"));
  2258. }
  2259. /* Create packets that will be muxed on final flush */
  2260. struct encoder_packet pkt;
  2261. mp4_create_chapter_pkt(&pkt, dts_usec, name);
  2262. track_insert_packet(mux->chapter_track, &pkt);
  2263. return true;
  2264. }
  2265. bool mp4_mux_finalise(struct mp4_mux *mux)
  2266. {
  2267. struct serializer *s = mux->serializer;
  2268. /* Flush remaining audio/video samples as final fragment. */
  2269. info("Flushing final fragment...");
  2270. /* Set target PTS to zero to indicate that we want to flush all
  2271. * the remaining packets */
  2272. mux->next_frag_pts = 0;
  2273. mp4_flush_fragment(mux);
  2274. info("Number of fragments: %u", mux->fragments_written);
  2275. if (mux->flags & MP4_SKIP_FINALISATION) {
  2276. warn("Skipping finalization!");
  2277. return true;
  2278. }
  2279. int64_t data_end = serializer_get_pos(s);
  2280. /* ---------------------------------------- */
  2281. /* Write full moov box */
  2282. /* Use array serializer for moov data as this will do a lot
  2283. * of seeks to write size values of variable-size boxes. */
  2284. struct serializer fs;
  2285. struct array_output_data ao;
  2286. array_output_serializer_init(&fs, &ao);
  2287. mux->serializer = &fs;
  2288. mp4_write_moov(mux, false);
  2289. s_write(s, ao.bytes.array, ao.bytes.num);
  2290. info("Full moov size: %zu KiB", ao.bytes.num / 1024);
  2291. mux->serializer = s; // restore real serializer
  2292. array_output_serializer_free(&ao);
  2293. /* ---------------------------------------- */
  2294. /* Overwrite file header (ftyp + free/moov) */
  2295. serializer_seek(s, 0, SERIALIZE_SEEK_START);
  2296. mp4_write_ftyp(mux, false);
  2297. size_t data_size = data_end - mux->placeholder_offset;
  2298. serializer_seek(s, (int64_t)mux->placeholder_offset, SERIALIZE_SEEK_START);
  2299. /* If data is more than 4 GiB the mdat header becomes 16 bytes, hence
  2300. * why we create a 16-byte placeholder "free" box at the start. */
  2301. if (data_size > UINT32_MAX) {
  2302. s_wb32(s, 1); // 1 = use "largesize" field instead
  2303. s_write(s, "mdat", 4);
  2304. s_wb64(s, data_size); // largesize (64-bit)
  2305. } else {
  2306. s_wb32(s, (uint32_t)data_size);
  2307. s_write(s, "mdat", 4);
  2308. }
  2309. info("Final mdat size: %zu KiB", data_size / 1024);
  2310. return true;
  2311. }