mp4-mux-internal.h 8.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345
  1. /******************************************************************************
  2. Copyright (C) 2024 by Dennis Sädtler <[email protected]>
  3. This program is free software: you can redistribute it and/or modify
  4. it under the terms of the GNU General Public License as published by
  5. the Free Software Foundation, either version 2 of the License, or
  6. (at your option) any later version.
  7. This program is distributed in the hope that it will be useful,
  8. but WITHOUT ANY WARRANTY; without even the implied warranty of
  9. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  10. GNU General Public License for more details.
  11. You should have received a copy of the GNU General Public License
  12. along with this program. If not, see <http://www.gnu.org/licenses/>.
  13. ******************************************************************************/
  14. #pragma once
  15. #include "mp4-mux.h"
  16. #include <util/darray.h>
  17. #include <util/deque.h>
  18. #include <util/serializer.h>
  19. /* Flavour for target compatibility */
  20. enum mp4_flavour {
  21. MP4, /* ISO/IEC 14496-12 */
  22. MOV, /* Apple QuickTime */
  23. CMAF, /* ISO/IEC 23000-19 */
  24. };
  25. enum mp4_track_type {
  26. TRACK_UNKNOWN,
  27. TRACK_VIDEO,
  28. TRACK_AUDIO,
  29. TRACK_CHAPTERS,
  30. };
  31. enum mp4_codec {
  32. CODEC_UNKNOWN,
  33. /* Video Codecs */
  34. CODEC_H264,
  35. CODEC_HEVC,
  36. CODEC_AV1,
  37. /* Audio Codecs */
  38. CODEC_AAC,
  39. CODEC_OPUS,
  40. CODEC_FLAC,
  41. CODEC_ALAC,
  42. CODEC_PCM_I16,
  43. CODEC_PCM_I24,
  44. CODEC_PCM_F32,
  45. /* Text/Chapter trakcs */
  46. CODEC_TEXT,
  47. };
  48. struct chunk {
  49. uint64_t offset;
  50. uint32_t size;
  51. uint32_t samples;
  52. };
  53. struct sample_delta {
  54. uint32_t count;
  55. uint32_t delta;
  56. };
  57. struct sample_offset {
  58. uint32_t count;
  59. int32_t offset;
  60. };
  61. struct fragment_sample {
  62. uint32_t size;
  63. int32_t offset;
  64. uint32_t duration;
  65. };
  66. struct mp4_track {
  67. enum mp4_track_type type;
  68. enum mp4_codec codec;
  69. /* Track ID in container */
  70. uint8_t track_id;
  71. /* Number of samples for this track */
  72. uint64_t samples;
  73. /* Duration for this track */
  74. uint64_t duration;
  75. /* Encoder associated with this track */
  76. obs_encoder_t *encoder;
  77. /* Time Base (1/FPS for video, 1/sample rate for audio) */
  78. uint32_t timebase_num;
  79. uint32_t timebase_den;
  80. /* Output timescale calculated from time base (Video only) */
  81. uint32_t timescale;
  82. /* First PTS this track has seen (in track timescale) */
  83. int64_t first_pts;
  84. /* Highest PTS this track has seen (in usec) */
  85. int64_t last_pts_usec;
  86. /* deque of encoder_packet belonging to this track */
  87. struct deque packets;
  88. /* Sample sizes (fixed for PCM) */
  89. uint32_t sample_size;
  90. DARRAY(uint32_t) sample_sizes;
  91. /* Data chunks in file containing samples for this track */
  92. DARRAY(struct chunk) chunks;
  93. /* Time delta between samples */
  94. DARRAY(struct sample_delta) deltas;
  95. /* Sample CT-DT offset, i.e. DTS-PTS offset (Video only) */
  96. bool needs_ctts;
  97. int32_t dts_offset;
  98. DARRAY(struct sample_offset) offsets;
  99. /* Sync samples, i.e. keyframes (Video only) */
  100. DARRAY(uint32_t) sync_samples;
  101. /* Temporary array with information about the samples to be included
  102. * in the next fragment. */
  103. DARRAY(struct fragment_sample) fragment_samples;
  104. };
  105. struct mp4_mux {
  106. obs_output_t *output;
  107. struct serializer *serializer;
  108. /* Target format compatibility */
  109. enum mp4_flavour mode;
  110. /* Flags */
  111. enum mp4_mux_flags flags;
  112. uint32_t fragments_written;
  113. /* PTS where next fragmentation should take place */
  114. int64_t next_frag_pts;
  115. /* Creation time (seconds since Jan 1 1904) */
  116. uint64_t creation_time;
  117. /* Offset of placeholder atom/box to contain final mdat header */
  118. size_t placeholder_offset;
  119. uint8_t track_ctr;
  120. /* Audio/Video tracks */
  121. DARRAY(struct mp4_track) tracks;
  122. /* Special tracks */
  123. struct mp4_track *chapter_track;
  124. };
  125. /* clang-format off */
  126. // Defined in ISO/IEC 14496-12:2015 Section 8.2.2.1
  127. const int32_t UNITY_MATRIX[9] = {
  128. 0x00010000, 0, 0,
  129. 0, 0x00010000, 0,
  130. 0, 0, 0x40000000
  131. };
  132. /* clang-format on */
  133. enum tfhd_flags {
  134. BASE_DATA_OFFSET_PRESENT = 0x000001,
  135. SAMPLE_DESCRIPTION_INDEX_PRESENT = 0x000002,
  136. DEFAULT_SAMPLE_DURATION_PRESENT = 0x000008,
  137. DEFAULT_SAMPLE_SIZE_PRESENT = 0x000010,
  138. DEFAULT_SAMPLE_FLAGS_PRESENT = 0x000020,
  139. DURATION_IS_EMPTY = 0x010000,
  140. DEFAULT_BASE_IS_MOOF = 0x020000,
  141. };
  142. enum trun_flags {
  143. DATA_OFFSET_PRESENT = 0x000001,
  144. FIRST_SAMPLE_FLAGS_PRESENT = 0x000004,
  145. SAMPLE_DURATION_PRESENT = 0x000100,
  146. SAMPLE_SIZE_PRESENT = 0x000200,
  147. SAMPLE_FLAGS_PRESENT = 0x000400,
  148. SAMPLE_COMPOSITION_TIME_OFFSETS_PRESENT = 0x000800,
  149. };
  150. /*
  151. * ISO Standard structure (big endian so we can't easily use it):
  152. *
  153. * struct sample_flags {
  154. * uint32_t reserved : 4;
  155. * uint32_t is_leading : 2;
  156. * uint32_t sample_depends_on : 2;
  157. * uint32_t sample_is_depended_on : 2;
  158. * uint32_t sample_has_redundancy : 2;
  159. * uint32_t sample_padding_value : 3;
  160. * uint32_t sample_is_non_sync_sample : 1;
  161. * uint32_t sample_degradation_priority : 16;
  162. };
  163. */
  164. enum sample_flags {
  165. SAMPLE_FLAG_IS_NON_SYNC = 0x00010000,
  166. SAMPLE_FLAG_DEPENDS_YES = 0x01000000,
  167. SAMPLE_FLAG_DEPENDS_NO = 0x02000000,
  168. };
  169. #ifndef _WIN32
  170. static inline size_t min(size_t a, size_t b)
  171. {
  172. return a < b ? a : b;
  173. }
  174. #endif
  175. static inline void get_speaker_positions(enum speaker_layout layout,
  176. uint8_t *arr, uint8_t *size,
  177. uint8_t *iso_layout)
  178. {
  179. switch (layout) {
  180. case SPEAKERS_MONO:
  181. arr[0] = 2; // FC
  182. *size = 1;
  183. *iso_layout = 1;
  184. break;
  185. case SPEAKERS_UNKNOWN:
  186. case SPEAKERS_STEREO:
  187. arr[0] = 0; // FL
  188. arr[1] = 1; // FR
  189. *size = 2;
  190. *iso_layout = 2;
  191. break;
  192. case SPEAKERS_2POINT1:
  193. arr[0] = 0; // FL
  194. arr[1] = 1; // FR
  195. arr[2] = 3; // LFE
  196. *size = 3;
  197. break;
  198. case SPEAKERS_4POINT0:
  199. arr[0] = 0; // FL
  200. arr[1] = 1; // FR
  201. arr[2] = 2; // FC
  202. arr[3] = 10; // RC
  203. *size = 4;
  204. *iso_layout = 4;
  205. break;
  206. case SPEAKERS_4POINT1:
  207. arr[0] = 0; // FL
  208. arr[1] = 1; // FR
  209. arr[2] = 2; // FC
  210. arr[3] = 3; // LFE
  211. arr[4] = 10; // RC
  212. *size = 5;
  213. break;
  214. case SPEAKERS_5POINT1:
  215. arr[0] = 0; // FL
  216. arr[1] = 1; // FR
  217. arr[2] = 2; // FC
  218. arr[3] = 3; // LFE
  219. arr[4] = 8; // RL
  220. arr[5] = 9; // RR
  221. *size = 6;
  222. break;
  223. case SPEAKERS_7POINT1:
  224. arr[0] = 0; // FL
  225. arr[1] = 1; // FR
  226. arr[2] = 2; // FC
  227. arr[3] = 3; // LFE
  228. arr[4] = 8; // RL
  229. arr[5] = 9; // RR
  230. arr[6] = 13; // SL
  231. arr[7] = 14; // SR
  232. *size = 8;
  233. *iso_layout = 12;
  234. break;
  235. }
  236. }
  237. static inline void get_colour_information(obs_encoder_t *enc, uint16_t *pri,
  238. uint16_t *trc, uint16_t *spc,
  239. uint8_t *full_range)
  240. {
  241. video_t *video = obs_encoder_video(enc);
  242. const struct video_output_info *info = video_output_get_info(video);
  243. *full_range = info->range == VIDEO_RANGE_FULL ? 1 : 0;
  244. switch (info->colorspace) {
  245. case VIDEO_CS_601:
  246. *pri = 6; // OBSCOL_PRI_SMPTE170M
  247. *trc = 6;
  248. *spc = 6;
  249. break;
  250. case VIDEO_CS_DEFAULT:
  251. case VIDEO_CS_709:
  252. *pri = 1; // OBSCOL_PRI_BT709
  253. *trc = 1;
  254. *spc = 1;
  255. break;
  256. case VIDEO_CS_SRGB:
  257. *pri = 1; // OBSCOL_PRI_BT709
  258. *trc = 13; // OBSCOL_TRC_IEC61966_2_1
  259. *spc = 1; // OBSCOL_PRI_BT709
  260. break;
  261. case VIDEO_CS_2100_PQ:
  262. *pri = 9; // OBSCOL_PRI_BT2020
  263. *trc = 16; // OBSCOL_TRC_SMPTE2084
  264. *spc = 9; // OBSCOL_SPC_BT2020_NCL
  265. break;
  266. case VIDEO_CS_2100_HLG:
  267. *pri = 9; // OBSCOL_PRI_BT2020
  268. *trc = 18; // OBSCOL_TRC_ARIB_STD_B67
  269. *spc = 9; // OBSCOL_SPC_BT2020_NCL
  270. }
  271. }
  272. /* Chapter stubs (from libavformat/movenc.c) */
  273. static const uint8_t TEXT_STUB_HEADER[] = {
  274. // TextSampleEntry
  275. 0x00, 0x00, 0x00, 0x01, // displayFlags
  276. 0x00, 0x00, // horizontal + vertical justification
  277. 0x00, 0x00, 0x00, 0x00, // bgColourRed/Green/Blue/Alpha
  278. // BoxRecord
  279. 0x00, 0x00, 0x00, 0x00, // defTextBoxTop/Left
  280. 0x00, 0x00, 0x00, 0x00, // defTextBoxBottom/Right
  281. // StyleRecord
  282. 0x00, 0x00, 0x00, 0x00, // startChar + endChar
  283. 0x00, 0x01, // fontID
  284. 0x00, 0x00, // fontStyleFlags + fontSize
  285. 0x00, 0x00, 0x00, 0x00, // fgColourRed/Green/Blue/Alpha
  286. // FontTableBox
  287. 0x00, 0x00, 0x00, 0x0D, // box size
  288. 'f', 't', 'a', 'b', // box atom name
  289. 0x00, 0x01, // entry count
  290. // FontRecord
  291. 0x00, 0x01, // font ID
  292. 0x00, // font name length
  293. };
  294. /* clang-format off */
  295. static const char CHAPTER_PKT_FOOTER[12] = {
  296. 0x00, 0x00, 0x00, 0x0C,
  297. 'e', 'n', 'c', 'd',
  298. 0x00, 0x00, 0x01, 0x00
  299. };
  300. /* clang-format on */