mp4-mux-internal.h 10.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416
  1. /******************************************************************************
  2. Copyright (C) 2024 by Dennis Sädtler <[email protected]>
  3. This program is free software: you can redistribute it and/or modify
  4. it under the terms of the GNU General Public License as published by
  5. the Free Software Foundation, either version 2 of the License, or
  6. (at your option) any later version.
  7. This program is distributed in the hope that it will be useful,
  8. but WITHOUT ANY WARRANTY; without even the implied warranty of
  9. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  10. GNU General Public License for more details.
  11. You should have received a copy of the GNU General Public License
  12. along with this program. If not, see <http://www.gnu.org/licenses/>.
  13. ******************************************************************************/
  14. #pragma once
  15. #include "mp4-mux.h"
  16. #include <util/darray.h>
  17. #include <util/deque.h>
  18. #include <util/serializer.h>
  19. enum mp4_track_type {
  20. TRACK_UNKNOWN,
  21. TRACK_VIDEO,
  22. TRACK_AUDIO,
  23. TRACK_CHAPTERS,
  24. };
  25. enum mp4_codec {
  26. CODEC_UNKNOWN,
  27. /* Video Codecs */
  28. CODEC_H264,
  29. CODEC_HEVC,
  30. CODEC_AV1,
  31. CODEC_PRORES,
  32. /* Audio Codecs */
  33. CODEC_AAC,
  34. CODEC_OPUS,
  35. CODEC_FLAC,
  36. CODEC_ALAC,
  37. CODEC_PCM_I16,
  38. CODEC_PCM_I24,
  39. CODEC_PCM_F32,
  40. /* Text/Chapter trakcs */
  41. CODEC_TEXT,
  42. };
  43. struct chunk {
  44. uint64_t offset;
  45. uint32_t size;
  46. uint32_t samples;
  47. };
  48. struct sample_delta {
  49. uint32_t count;
  50. uint32_t delta;
  51. };
  52. struct sample_offset {
  53. uint32_t count;
  54. int32_t offset;
  55. };
  56. struct fragment_sample {
  57. uint32_t size;
  58. int32_t offset;
  59. uint32_t duration;
  60. };
  61. struct mp4_track {
  62. enum mp4_track_type type;
  63. enum mp4_codec codec;
  64. /* Track ID in container */
  65. uint8_t track_id;
  66. /* Number of samples for this track */
  67. uint64_t samples;
  68. /* Duration for this track */
  69. uint64_t duration;
  70. /* Encoder associated with this track */
  71. obs_encoder_t *encoder;
  72. /* Time Base (1/FPS for video, 1/sample rate for audio) */
  73. uint32_t timebase_num;
  74. uint32_t timebase_den;
  75. /* Output timescale calculated from time base */
  76. uint32_t timescale;
  77. /* First PTS this track has seen (in track timescale) */
  78. int64_t first_pts;
  79. /* Highest PTS this track has seen (in usec) */
  80. int64_t last_pts_usec;
  81. /* deque of encoder_packet belonging to this track */
  82. struct deque packets;
  83. /* Sample sizes (fixed for PCM) */
  84. uint32_t sample_size;
  85. DARRAY(uint32_t) sample_sizes;
  86. /* Data chunks in file containing samples for this track */
  87. DARRAY(struct chunk) chunks;
  88. /* Time delta between samples */
  89. DARRAY(struct sample_delta) deltas;
  90. /* Sample CT-DT offset, i.e. DTS-PTS offset (Video only) */
  91. bool needs_ctts;
  92. int32_t dts_offset;
  93. DARRAY(struct sample_offset) offsets;
  94. /* Sync samples, i.e. keyframes (Video only) */
  95. DARRAY(uint32_t) sync_samples;
  96. /* Temporary array with information about the samples to be included
  97. * in the next fragment. */
  98. DARRAY(struct fragment_sample) fragment_samples;
  99. };
  100. struct mp4_mux {
  101. obs_output_t *output;
  102. struct serializer *serializer;
  103. /* Target format compatibility */
  104. enum mp4_flavor flavor;
  105. /* Flags */
  106. enum mp4_mux_flags flags;
  107. uint32_t fragments_written;
  108. /* PTS where next fragmentation should take place */
  109. int64_t next_frag_pts;
  110. /* Creation time (seconds since Jan 1 1904) */
  111. uint64_t creation_time;
  112. /* Offset of placeholder atom/box to contain final mdat header */
  113. size_t placeholder_offset;
  114. uint8_t track_ctr;
  115. /* Audio/Video tracks */
  116. DARRAY(struct mp4_track) tracks;
  117. /* Special tracks */
  118. struct mp4_track *chapter_track;
  119. };
  120. /* clang-format off */
  121. // Defined in ISO/IEC 14496-12:2015 Section 8.2.2.1
  122. const int32_t UNITY_MATRIX[9] = {
  123. 0x00010000, 0, 0,
  124. 0, 0x00010000, 0,
  125. 0, 0, 0x40000000
  126. };
  127. /* clang-format on */
  128. enum tfhd_flags {
  129. BASE_DATA_OFFSET_PRESENT = 0x000001,
  130. SAMPLE_DESCRIPTION_INDEX_PRESENT = 0x000002,
  131. DEFAULT_SAMPLE_DURATION_PRESENT = 0x000008,
  132. DEFAULT_SAMPLE_SIZE_PRESENT = 0x000010,
  133. DEFAULT_SAMPLE_FLAGS_PRESENT = 0x000020,
  134. DURATION_IS_EMPTY = 0x010000,
  135. DEFAULT_BASE_IS_MOOF = 0x020000,
  136. };
  137. enum trun_flags {
  138. DATA_OFFSET_PRESENT = 0x000001,
  139. FIRST_SAMPLE_FLAGS_PRESENT = 0x000004,
  140. SAMPLE_DURATION_PRESENT = 0x000100,
  141. SAMPLE_SIZE_PRESENT = 0x000200,
  142. SAMPLE_FLAGS_PRESENT = 0x000400,
  143. SAMPLE_COMPOSITION_TIME_OFFSETS_PRESENT = 0x000800,
  144. };
  145. /*
  146. * ISO Standard structure (big endian so we can't easily use it):
  147. *
  148. * struct sample_flags {
  149. * uint32_t reserved : 4;
  150. * uint32_t is_leading : 2;
  151. * uint32_t sample_depends_on : 2;
  152. * uint32_t sample_is_depended_on : 2;
  153. * uint32_t sample_has_redundancy : 2;
  154. * uint32_t sample_padding_value : 3;
  155. * uint32_t sample_is_non_sync_sample : 1;
  156. * uint32_t sample_degradation_priority : 16;
  157. };
  158. */
  159. enum sample_flags {
  160. SAMPLE_FLAG_IS_NON_SYNC = 0x00010000,
  161. SAMPLE_FLAG_DEPENDS_YES = 0x01000000,
  162. SAMPLE_FLAG_DEPENDS_NO = 0x02000000,
  163. };
  164. #ifndef _WIN32
  165. static inline size_t min(size_t a, size_t b)
  166. {
  167. return a < b ? a : b;
  168. }
  169. #endif
  170. static inline void get_speaker_positions(enum speaker_layout layout, uint8_t *arr, uint8_t *size, uint8_t *iso_layout)
  171. {
  172. switch (layout) {
  173. case SPEAKERS_MONO:
  174. arr[0] = 2; // FC
  175. *size = 1;
  176. *iso_layout = 1;
  177. break;
  178. case SPEAKERS_UNKNOWN:
  179. case SPEAKERS_STEREO:
  180. arr[0] = 0; // FL
  181. arr[1] = 1; // FR
  182. *size = 2;
  183. *iso_layout = 2;
  184. break;
  185. case SPEAKERS_2POINT1:
  186. arr[0] = 0; // FL
  187. arr[1] = 1; // FR
  188. arr[2] = 3; // LFE
  189. *size = 3;
  190. break;
  191. case SPEAKERS_4POINT0:
  192. arr[0] = 0; // FL
  193. arr[1] = 1; // FR
  194. arr[2] = 2; // FC
  195. arr[3] = 10; // RC
  196. *size = 4;
  197. *iso_layout = 4;
  198. break;
  199. case SPEAKERS_4POINT1:
  200. arr[0] = 0; // FL
  201. arr[1] = 1; // FR
  202. arr[2] = 2; // FC
  203. arr[3] = 3; // LFE
  204. arr[4] = 10; // RC
  205. *size = 5;
  206. break;
  207. case SPEAKERS_5POINT1:
  208. arr[0] = 0; // FL
  209. arr[1] = 1; // FR
  210. arr[2] = 2; // FC
  211. arr[3] = 3; // LFE
  212. arr[4] = 8; // RL
  213. arr[5] = 9; // RR
  214. *size = 6;
  215. break;
  216. case SPEAKERS_7POINT1:
  217. arr[0] = 0; // FL
  218. arr[1] = 1; // FR
  219. arr[2] = 2; // FC
  220. arr[3] = 3; // LFE
  221. arr[4] = 8; // RL
  222. arr[5] = 9; // RR
  223. arr[6] = 13; // SL
  224. arr[7] = 14; // SR
  225. *size = 8;
  226. *iso_layout = 12;
  227. break;
  228. }
  229. }
  230. static inline void get_colour_information(obs_encoder_t *enc, uint16_t *pri, uint16_t *trc, uint16_t *spc,
  231. uint8_t *full_range)
  232. {
  233. video_t *video = obs_encoder_video(enc);
  234. const struct video_output_info *info = video_output_get_info(video);
  235. *full_range = info->range == VIDEO_RANGE_FULL ? 1 : 0;
  236. switch (info->colorspace) {
  237. case VIDEO_CS_601:
  238. *pri = 6; // OBSCOL_PRI_SMPTE170M
  239. *trc = 6;
  240. *spc = 6;
  241. break;
  242. case VIDEO_CS_DEFAULT:
  243. case VIDEO_CS_709:
  244. *pri = 1; // OBSCOL_PRI_BT709
  245. *trc = 1;
  246. *spc = 1;
  247. break;
  248. case VIDEO_CS_SRGB:
  249. *pri = 1; // OBSCOL_PRI_BT709
  250. *trc = 13; // OBSCOL_TRC_IEC61966_2_1
  251. *spc = 1; // OBSCOL_PRI_BT709
  252. break;
  253. case VIDEO_CS_2100_PQ:
  254. *pri = 9; // OBSCOL_PRI_BT2020
  255. *trc = 16; // OBSCOL_TRC_SMPTE2084
  256. *spc = 9; // OBSCOL_SPC_BT2020_NCL
  257. break;
  258. case VIDEO_CS_2100_HLG:
  259. *pri = 9; // OBSCOL_PRI_BT2020
  260. *trc = 18; // OBSCOL_TRC_ARIB_STD_B67
  261. *spc = 9; // OBSCOL_SPC_BT2020_NCL
  262. }
  263. }
  264. /* Chapter stubs (from libavformat/movenc.c) */
  265. static const uint8_t TEXT_STUB_HEADER[] = {
  266. // TextSampleEntry
  267. 0x00, 0x00, 0x00, 0x01, // displayFlags
  268. 0x00, 0x00, // horizontal + vertical justification
  269. 0x00, 0x00, 0x00, 0x00, // bgColourRed/Green/Blue/Alpha
  270. // BoxRecord
  271. 0x00, 0x00, 0x00, 0x00, // defTextBoxTop/Left
  272. 0x00, 0x00, 0x00, 0x00, // defTextBoxBottom/Right
  273. // StyleRecord
  274. 0x00, 0x00, 0x00, 0x00, // startChar + endChar
  275. 0x00, 0x01, // fontID
  276. 0x00, 0x00, // fontStyleFlags + fontSize
  277. 0x00, 0x00, 0x00, 0x00, // fgColourRed/Green/Blue/Alpha
  278. // FontTableBox
  279. 0x00, 0x00, 0x00, 0x0D, // box size
  280. 'f', 't', 'a', 'b', // box atom name
  281. 0x00, 0x01, // entry count
  282. // FontRecord
  283. 0x00, 0x01, // font ID
  284. 0x00, // font name length
  285. };
  286. /* clang-format off */
  287. static const char CHAPTER_PKT_FOOTER[12] = {
  288. 0x00, 0x00, 0x00, 0x0C,
  289. 'e', 'n', 'c', 'd',
  290. 0x00, 0x00, 0x01, 0x00
  291. };
  292. /* clang-format on */
  293. /** QTFF/MOV specifics **/
  294. /* https://developer.apple.com/documentation/quicktime-file-format/sound_sample_description_version_2#LPCM-flag-values */
  295. enum lpcm_flags {
  296. kAudioFormatFlagIsFloat = (1 << 0),
  297. kAudioFormatFlagIsSignedInteger = (1 << 2),
  298. kAudioFormatFlagIsPacked = (1 << 3),
  299. kLinearPCMFormatFlagIsFloat = kAudioFormatFlagIsFloat,
  300. kLinearPCMFormatFlagIsSignedInteger = kAudioFormatFlagIsSignedInteger,
  301. kLinearPCMFormatFlagIsPacked = kAudioFormatFlagIsPacked,
  302. };
  303. static inline uint32_t get_lpcm_flags(enum mp4_codec codec)
  304. {
  305. if (codec == CODEC_PCM_F32)
  306. return kLinearPCMFormatFlagIsFloat | kLinearPCMFormatFlagIsPacked;
  307. if (codec == CODEC_PCM_I16 || codec == CODEC_PCM_I24)
  308. return kLinearPCMFormatFlagIsSignedInteger | kLinearPCMFormatFlagIsPacked;
  309. return 0;
  310. }
  311. enum channel_map_bits {
  312. FL = 1 << 0,
  313. FR = 1 << 1,
  314. FC = 1 << 2,
  315. LFE = 1 << 3,
  316. RL = 1 << 4,
  317. RR = 1 << 5,
  318. RC = 1 << 8,
  319. SL = 1 << 9,
  320. SR = 1 << 10,
  321. };
  322. static uint32_t get_mov_channel_bitmap(enum speaker_layout layout)
  323. {
  324. switch (layout) {
  325. case SPEAKERS_MONO:
  326. return FC;
  327. case SPEAKERS_STEREO:
  328. return FL | FR;
  329. case SPEAKERS_2POINT1:
  330. return FL | FR | LFE;
  331. case SPEAKERS_4POINT0:
  332. return FL | FR | FC | RC;
  333. case SPEAKERS_4POINT1:
  334. return FL | FR | FC | LFE | RC;
  335. case SPEAKERS_5POINT1:
  336. return FL | FR | FC | LFE | RL | RR;
  337. case SPEAKERS_7POINT1:
  338. return FL | FR | FC | LFE | RL | RR | SL | SR;
  339. case SPEAKERS_UNKNOWN:
  340. break;
  341. }
  342. return 0;
  343. }
  344. enum coreaudio_layout {
  345. kAudioChannelLayoutTag_UseChannelBitmap = (1 << 16) | 0,
  346. kAudioChannelLayoutTag_Mono = (100 << 16) | 1,
  347. kAudioChannelLayoutTag_Stereo = (101 << 16) | 2,
  348. kAudioChannelLayoutTag_DVD_4 = (133 << 16) | 3, // 2.1 (AAC Only)
  349. };
  350. static enum coreaudio_layout get_mov_channel_layout(enum mp4_codec codec, enum speaker_layout layout)
  351. {
  352. switch (layout) {
  353. case SPEAKERS_MONO:
  354. return kAudioChannelLayoutTag_Mono;
  355. case SPEAKERS_STEREO:
  356. return kAudioChannelLayoutTag_Stereo;
  357. case SPEAKERS_2POINT1:
  358. /* Only supported for AAC. */
  359. return codec == CODEC_AAC ? kAudioChannelLayoutTag_DVD_4 : kAudioChannelLayoutTag_UseChannelBitmap;
  360. default:
  361. return kAudioChannelLayoutTag_UseChannelBitmap;
  362. }
  363. }