mp4-mux.c 72 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755
  1. /******************************************************************************
  2. Copyright (C) 2024 by Dennis Sädtler <[email protected]>
  3. This program is free software: you can redistribute it and/or modify
  4. it under the terms of the GNU General Public License as published by
  5. the Free Software Foundation, either version 2 of the License, or
  6. (at your option) any later version.
  7. This program is distributed in the hope that it will be useful,
  8. but WITHOUT ANY WARRANTY; without even the implied warranty of
  9. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  10. GNU General Public License for more details.
  11. You should have received a copy of the GNU General Public License
  12. along with this program. If not, see <http://www.gnu.org/licenses/>.
  13. ******************************************************************************/
  14. #include "mp4-mux-internal.h"
  15. #include "rtmp-hevc.h"
  16. #include "rtmp-av1.h"
  17. #include <obs-avc.h>
  18. #include <obs-hevc.h>
  19. #include <obs-module.h>
  20. #include <util/dstr.h>
  21. #include <util/platform.h>
  22. #include <util/array-serializer.h>
  23. #include <time.h>
  24. /*
  25. * (Mostly) compliant MP4 muxer for fun and profit.
  26. * Based on ISO/IEC 14496-12 and FFmpeg's libavformat/movenc.c ([L]GPL)
  27. *
  28. * Specification section numbers are noted where applicable.
  29. * Standard identifier is included if not referring to ISO/IEC 14496-12.
  30. */
  31. #define do_log(level, format, ...) \
  32. blog(level, "[mp4 muxer: '%s'] " format, obs_output_get_name(mux->output), ##__VA_ARGS__)
  33. #define warn(format, ...) do_log(LOG_WARNING, format, ##__VA_ARGS__)
  34. #define info(format, ...) do_log(LOG_INFO, format, ##__VA_ARGS__)
  35. /* Helper to overwrite placeholder size and return total size. */
  36. static inline size_t write_box_size(struct serializer *s, int64_t start)
  37. {
  38. int64_t end = serializer_get_pos(s);
  39. size_t size = end - start;
  40. serializer_seek(s, start, SERIALIZE_SEEK_START);
  41. s_wb32(s, (uint32_t)size);
  42. serializer_seek(s, end, SERIALIZE_SEEK_START);
  43. return size;
  44. }
  45. /// 4.2 Box header with size and char[4] name
  46. static inline void write_box(struct serializer *s, const size_t size, const char name[4])
  47. {
  48. if (size <= UINT32_MAX) {
  49. s_wb32(s, (uint32_t)size); // size
  50. s_write(s, name, 4); // boxtype
  51. } else {
  52. s_wb32(s, 1); // size
  53. s_write(s, name, 4); // boxtype
  54. s_wb64(s, size); // largesize
  55. }
  56. }
  57. /// 4.2 FullBox extended header with u8 version and u24 flags
  58. static inline void write_fullbox(struct serializer *s, const size_t size, const char name[4], uint8_t version,
  59. uint32_t flags)
  60. {
  61. write_box(s, size, name);
  62. s_w8(s, version);
  63. s_wb24(s, flags);
  64. }
  65. /// 4.3 File Type Box
  66. static size_t mp4_write_ftyp(struct mp4_mux *mux, bool fragmented)
  67. {
  68. struct serializer *s = mux->serializer;
  69. int64_t start = serializer_get_pos(s);
  70. write_box(s, 0, "ftyp");
  71. const char *major_brand = "isom";
  72. /* Following FFmpeg's example, when using negative CTS the major brand
  73. * needs to be either iso4 or iso6 depending on whether the file is
  74. * currently fragmented. */
  75. if (mux->flags & MP4_USE_NEGATIVE_CTS)
  76. major_brand = fragmented ? "iso6" : "iso4";
  77. s_write(s, major_brand, 4); // major brand
  78. s_wb32(s, 512); // minor version
  79. // minor brands (first one matches major brand)
  80. s_write(s, major_brand, 4);
  81. /* Write isom base brand if it's not the major brand */
  82. if (strcmp(major_brand, "isom") != 0)
  83. s_write(s, "isom", 4);
  84. /* Avoid adding newer brand (iso6) unless necessary, use "obs1" brand
  85. * as a placeholder to maintain ftyp box size. */
  86. if (fragmented && strcmp(major_brand, "iso6") != 0)
  87. s_write(s, "iso6", 4);
  88. else
  89. s_write(s, "obs1", 4);
  90. s_write(s, "iso2", 4);
  91. /* Include H.264 brand if used */
  92. for (size_t i = 0; i < mux->tracks.num; i++) {
  93. struct mp4_track *track = &mux->tracks.array[i];
  94. if (track->type == TRACK_VIDEO) {
  95. if (track->codec == CODEC_H264)
  96. s_write(s, "avc1", 4);
  97. break;
  98. }
  99. }
  100. /* General MP4 brannd */
  101. s_write(s, "mp41", 4);
  102. return write_box_size(s, start);
  103. }
  104. /// 8.1.2 Free Space Box
  105. static size_t mp4_write_free(struct mp4_mux *mux)
  106. {
  107. struct serializer *s = mux->serializer;
  108. /* Write a 16-byte free box, so it can be replaced with a 64-bit size
  109. * box header (u32 + char[4] + u64) */
  110. s_wb32(s, 16);
  111. s_write(s, "free", 4);
  112. s_wb64(s, 0);
  113. return 16;
  114. }
  115. /// 8.2.2 Movie Header Box
  116. static size_t mp4_write_mvhd(struct mp4_mux *mux)
  117. {
  118. struct serializer *s = mux->serializer;
  119. size_t start = serializer_get_pos(s);
  120. /* Use primary video track as the baseline for duration */
  121. uint64_t duration = 0;
  122. for (size_t i = 0; i < mux->tracks.num; i++) {
  123. struct mp4_track *track = &mux->tracks.array[i];
  124. if (track->type == TRACK_VIDEO) {
  125. duration = util_mul_div64(track->duration, 1000, track->timebase_den);
  126. break;
  127. }
  128. }
  129. write_fullbox(s, 0, "mvhd", 0, 0);
  130. if (duration > UINT32_MAX || mux->creation_time > UINT32_MAX) {
  131. s_wb64(s, mux->creation_time); // creation time
  132. s_wb64(s, mux->creation_time); // modification time
  133. s_wb32(s, 1000); // timescale
  134. s_wb64(s, duration); // duration (0 for fragmented)
  135. } else {
  136. s_wb32(s, (uint32_t)mux->creation_time); // creation time
  137. s_wb32(s, (uint32_t)mux->creation_time); // modification time
  138. s_wb32(s, 1000); // timescale
  139. s_wb32(s, (uint32_t)duration); // duration (0 for fragmented)
  140. }
  141. s_wb32(s, 0x00010000); // rate, 16.16 fixed float (1 << 16)
  142. s_wb16(s, 0x0100); // volume
  143. s_wb16(s, 0); // reserved
  144. s_wb32(s, 0); // reserved
  145. s_wb32(s, 0); // reserved
  146. // Matrix
  147. for (int i = 0; i < 9; i++)
  148. s_wb32(s, UNITY_MATRIX[i]);
  149. // pre_defined
  150. s_wb32(s, 0);
  151. s_wb32(s, 0);
  152. s_wb32(s, 0);
  153. s_wb32(s, 0);
  154. s_wb32(s, 0);
  155. s_wb32(s, 0);
  156. s_wb32(s, mux->track_ctr + 1); // next_track_ID
  157. return write_box_size(s, start);
  158. }
  159. /// 8.3.2 Track Header Box
  160. static size_t mp4_write_tkhd(struct mp4_mux *mux, struct mp4_track *track)
  161. {
  162. struct serializer *s = mux->serializer;
  163. size_t start = serializer_get_pos(s);
  164. uint64_t duration = util_mul_div64(track->duration, 1000, track->timebase_den);
  165. /* Flags are 0x1 (enabled) | 0x2 (in movie) */
  166. static const uint32_t flags = 0x1 | 0x2;
  167. write_fullbox(s, 0, "tkhd", 0, flags);
  168. if (duration > UINT32_MAX || mux->creation_time > UINT32_MAX) {
  169. s_wb64(s, mux->creation_time); // creation time
  170. s_wb64(s, mux->creation_time); // modification time
  171. s_wb32(s, track->track_id); // track_id
  172. s_wb32(s, 0); // reserved
  173. s_wb64(s, duration); // duration in movie timescale
  174. } else {
  175. s_wb32(s, (uint32_t)mux->creation_time); // creation time
  176. s_wb32(s, (uint32_t)mux->creation_time); // modification time
  177. s_wb32(s, track->track_id); // track_id
  178. s_wb32(s, 0); // reserved
  179. s_wb32(s, (uint32_t)duration); // duration in movie timescale
  180. }
  181. s_wb32(s, 0); // reserved
  182. s_wb32(s, 0); // reserved
  183. s_wb16(s, 0); // layer
  184. s_wb16(s, track->type == TRACK_AUDIO ? 1 : 0); // alternate group
  185. s_wb16(s, track->type == TRACK_AUDIO ? 0x100 : 0); // volume
  186. s_wb16(s, 0); // reserved
  187. // Matrix (predefined)
  188. for (int i = 0; i < 9; i++)
  189. s_wb32(s, UNITY_MATRIX[i]);
  190. if (track->type == TRACK_AUDIO) {
  191. s_wb32(s, 0); // width
  192. s_wb32(s, 0); // height
  193. } else {
  194. /* width/height are fixed point 16.16, so we just shift the
  195. * integer to the upper 16 bits */
  196. uint32_t width = obs_encoder_get_width(track->encoder);
  197. s_wb32(s, width << 16);
  198. uint32_t height = obs_encoder_get_height(track->encoder);
  199. s_wb32(s, height << 16);
  200. }
  201. return write_box_size(s, start);
  202. }
  203. /// 8.4.2 Media Header Box
  204. static size_t mp4_write_mdhd(struct mp4_mux *mux, struct mp4_track *track)
  205. {
  206. struct serializer *s = mux->serializer;
  207. size_t size = 32;
  208. uint8_t version = 0;
  209. uint64_t duration = track->duration;
  210. uint32_t timescale = track->timescale;
  211. if (track->type == TRACK_VIDEO) {
  212. /* Update to track timescale */
  213. duration = util_mul_div64(duration, track->timescale, track->timebase_den);
  214. }
  215. /* use 64-bit duration if necessary */
  216. if (duration > UINT32_MAX || mux->creation_time > UINT32_MAX) {
  217. size = 44;
  218. version = 1;
  219. }
  220. write_fullbox(s, size, "mdhd", version, 0);
  221. if (version == 1) {
  222. s_wb64(s, mux->creation_time); // creation time
  223. s_wb64(s, mux->creation_time); // modification time
  224. s_wb32(s, timescale); // timescale
  225. s_wb64(s, (uint32_t)duration); // duration
  226. } else {
  227. s_wb32(s, (uint32_t)mux->creation_time); // creation time
  228. s_wb32(s, (uint32_t)mux->creation_time); // modification time
  229. s_wb32(s, timescale); // timescale
  230. s_wb32(s, (uint32_t)duration); // duration
  231. }
  232. s_wb16(s, 21956); // language (undefined)
  233. s_wb16(s, 0); // pre_defined
  234. return size;
  235. }
  236. /// 8.4.3 Handler Reference Box
  237. static size_t mp4_write_hdlr(struct mp4_mux *mux, struct mp4_track *track)
  238. {
  239. struct serializer *s = mux->serializer;
  240. int64_t start = serializer_get_pos(s);
  241. write_fullbox(s, 0, "hdlr", 0, 0);
  242. s_wb32(s, 0); // pre_defined
  243. // handler_type
  244. if (track->type == TRACK_VIDEO)
  245. s_write(s, "vide", 4);
  246. else if (track->type == TRACK_CHAPTERS)
  247. s_write(s, "text", 4);
  248. else
  249. s_write(s, "soun", 4);
  250. s_wb32(s, 0); // reserved
  251. s_wb32(s, 0); // reserved
  252. s_wb32(s, 0); // reserved
  253. // name (utf-8 string, null terminated)
  254. if (track->type == TRACK_VIDEO)
  255. s_write(s, "OBS Video Handler", 18);
  256. else if (track->type == TRACK_CHAPTERS)
  257. s_write(s, "OBS Chapter Handler", 20);
  258. else
  259. s_write(s, "OBS Audio Handler", 18);
  260. return write_box_size(s, start);
  261. }
  262. /// 12.1.2 Video media header
  263. static size_t mp4_write_vmhd(struct mp4_mux *mux)
  264. {
  265. struct serializer *s = mux->serializer;
  266. /* Flags is always 1 */
  267. write_fullbox(s, 20, "vmhd", 0, 1);
  268. s_wb16(s, 0); // graphicsmode
  269. s_wb16(s, 0); // opcolor r
  270. s_wb16(s, 0); // opcolor g
  271. s_wb16(s, 0); // opcolor b
  272. return 16;
  273. }
  274. /// 12.2.2 Sound media header
  275. static size_t mp4_write_smhd(struct mp4_mux *mux)
  276. {
  277. struct serializer *s = mux->serializer;
  278. write_fullbox(s, 16, "smhd", 0, 0);
  279. s_wb16(s, 0); // balance
  280. s_wb16(s, 0); // reserved
  281. return 16;
  282. }
  283. /// (QTFF/Apple) Text media information atom
  284. static size_t mp4_write_qt_text(struct mp4_mux *mux)
  285. {
  286. struct serializer *s = mux->serializer;
  287. int64_t start = serializer_get_pos(s);
  288. write_box(s, 0, "text");
  289. /* Identity matrix, note that it's not fixed point 16.16 */
  290. s_wb16(s, 0x01);
  291. s_wb32(s, 0x00);
  292. s_wb32(s, 0x00);
  293. s_wb32(s, 0x00);
  294. s_wb32(s, 0x01);
  295. s_wb32(s, 0x00);
  296. s_wb32(s, 0x00);
  297. s_wb32(s, 0x00);
  298. s_wb32(s, 0x00004000);
  299. /* Seemingly undocumented */
  300. s_wb16(s, 0x0000);
  301. return write_box_size(s, start);
  302. }
  303. /// (QTFF/Apple) Base media info atom
  304. static size_t mp4_write_gmin(struct mp4_mux *mux)
  305. {
  306. struct serializer *s = mux->serializer;
  307. int64_t start = serializer_get_pos(s);
  308. write_fullbox(s, 0, "gmin", 0, 0);
  309. s_wb16(s, 0x40); // graphics mode
  310. s_wb16(s, 0x8000); // opColor r
  311. s_wb16(s, 0x8000); // opColor g
  312. s_wb16(s, 0x8000); // opColor b
  313. s_wb16(s, 0); // balance
  314. s_wb16(s, 0); // reserved
  315. return write_box_size(s, start);
  316. }
  317. /// (QTFF/Apple) Base media information header atom
  318. static size_t mp4_write_gmhd(struct mp4_mux *mux)
  319. {
  320. struct serializer *s = mux->serializer;
  321. int64_t start = serializer_get_pos(s);
  322. write_box(s, 0, "gmhd");
  323. // gmin
  324. mp4_write_gmin(mux);
  325. // text (QuickTime)
  326. mp4_write_qt_text(mux);
  327. return write_box_size(s, start);
  328. }
  329. /// ISO/IEC 14496-15 5.4.2.1 AVCConfigurationBox
  330. static size_t mp4_write_avcC(struct mp4_mux *mux, obs_encoder_t *enc)
  331. {
  332. struct serializer *s = mux->serializer;
  333. /* For AVC this is the parsed extra data. */
  334. uint8_t *header;
  335. size_t size;
  336. struct encoder_packet packet = {.type = OBS_ENCODER_VIDEO, .timebase_den = 1, .keyframe = true};
  337. if (!obs_encoder_get_extra_data(enc, &header, &size))
  338. return 0;
  339. packet.size = obs_parse_avc_header(&packet.data, header, size);
  340. size_t box_size = packet.size + 8;
  341. write_box(s, box_size, "avcC");
  342. s_write(s, packet.data, packet.size);
  343. bfree(packet.data);
  344. return box_size;
  345. }
  346. /// ISO/IEC 14496-15 8.4.1.1 HEVCConfigurationBox
  347. static size_t mp4_write_hvcC(struct mp4_mux *mux, obs_encoder_t *enc)
  348. {
  349. struct serializer *s = mux->serializer;
  350. /* For HEVC this is the parsed extra data. */
  351. uint8_t *header;
  352. size_t size;
  353. struct encoder_packet packet = {.type = OBS_ENCODER_VIDEO, .timebase_den = 1, .keyframe = true};
  354. if (!obs_encoder_get_extra_data(enc, &header, &size))
  355. return 0;
  356. packet.size = obs_parse_hevc_header(&packet.data, header, size);
  357. size_t box_size = packet.size + 8;
  358. write_box(s, box_size, "hvcC");
  359. s_write(s, packet.data, packet.size);
  360. bfree(packet.data);
  361. return box_size;
  362. }
  363. /// AV1 ISOBMFF 2.3. AV1 Codec Configuration Box
  364. static size_t mp4_write_av1C(struct mp4_mux *mux, obs_encoder_t *enc)
  365. {
  366. struct serializer *s = mux->serializer;
  367. /* For AV1 this is just the parsed extra data. */
  368. uint8_t *header;
  369. size_t size;
  370. struct encoder_packet packet = {.type = OBS_ENCODER_VIDEO, .timebase_den = 1, .keyframe = true};
  371. if (!obs_encoder_get_extra_data(enc, &header, &size))
  372. return 0;
  373. packet.size = obs_parse_av1_header(&packet.data, header, size);
  374. size_t box_size = packet.size + 8;
  375. write_box(s, box_size, "av1C");
  376. s_write(s, packet.data, packet.size);
  377. bfree(packet.data);
  378. return box_size;
  379. }
  380. /// 12.1.5 Colour information
  381. static size_t mp4_write_colr(struct mp4_mux *mux, obs_encoder_t *enc)
  382. {
  383. UNUSED_PARAMETER(enc);
  384. struct serializer *s = mux->serializer;
  385. write_box(s, 19, "colr");
  386. uint8_t full_range = 0;
  387. uint16_t pri, trc, spc;
  388. pri = trc = spc = 0;
  389. get_colour_information(enc, &pri, &trc, &spc, &full_range);
  390. s_write(s, "nclx", 4); // colour_type
  391. s_wb16(s, pri); // colour_primaries
  392. s_wb16(s, trc); // transfer_characteristics
  393. s_wb16(s, spc); // matrix_coefficiencts
  394. s_w8(s, full_range << 7); // full range flag + 7 reserved bits (0)
  395. return 19;
  396. }
  397. /// 12.1.4 Pixel Aspect Ratio
  398. static size_t mp4_write_pasp(struct mp4_mux *mux)
  399. {
  400. struct serializer *s = mux->serializer;
  401. write_box(s, 16, "pasp");
  402. s_wb32(s, 1); // hSpacing
  403. s_wb32(s, 1); // vSpacing
  404. return 16;
  405. }
  406. /// 12.1.3 Visual Sample Entry
  407. static inline void mp4_write_visual_sample_entry(struct mp4_mux *mux, obs_encoder_t *enc)
  408. {
  409. struct serializer *s = mux->serializer;
  410. // SampleEntry Box
  411. s_w8(s, 0); // reserved
  412. s_w8(s, 0);
  413. s_w8(s, 0);
  414. s_w8(s, 0);
  415. s_w8(s, 0);
  416. s_w8(s, 0);
  417. s_wb16(s, 1); // data_reference_index
  418. // VisualSampleEntry Box
  419. s_wb16(s, 0); // pre_defined
  420. s_wb16(s, 0); // reserved
  421. s_wb32(s, 0); // pre_defined
  422. s_wb32(s, 0); // pre_defined
  423. s_wb32(s, 0); // pre_defined
  424. s_wb16(s, (uint16_t)obs_encoder_get_width(enc)); // width
  425. s_wb16(s, (uint16_t)obs_encoder_get_height(enc)); // height
  426. s_wb32(s, 0x00480000); // horizresolution (predefined)
  427. s_wb32(s, 0x00480000); // vertresolution (predefined)
  428. s_wb32(s, 0); // reserved
  429. s_wb16(s, 1); // frame_count
  430. /* Name is fixed 32-bytes and needs to be padded to that length.
  431. * First byte is the length, rest is a string sans NULL terminator. */
  432. char compressor_name[32] = {0};
  433. const char *enc_id = obs_encoder_get_id(enc);
  434. if (enc_id) {
  435. size_t len = strlen(enc_id);
  436. if (len > 31)
  437. len = 31;
  438. compressor_name[0] = (char)len;
  439. memcpy(compressor_name + 1, enc_id, len);
  440. }
  441. s_write(s, compressor_name, sizeof(compressor_name)); // compressorname
  442. s_wb16(s, 0x0018); // depth
  443. s_wb16(s, -1); // pre_defined
  444. }
  445. /// 12.1.6 Content light level
  446. static size_t mp4_write_clli(struct mp4_mux *mux, obs_encoder_t *enc)
  447. {
  448. struct serializer *s = mux->serializer;
  449. video_t *video = obs_encoder_video(enc);
  450. const struct video_output_info *info = video_output_get_info(video);
  451. /* Only write box for HDR video */
  452. if (info->colorspace != VIDEO_CS_2100_PQ && info->colorspace != VIDEO_CS_2100_HLG)
  453. return 0;
  454. write_box(s, 12, "clli");
  455. float nominal_peak = obs_get_video_hdr_nominal_peak_level();
  456. s_wb16(s, (uint16_t)nominal_peak); // max_content_light_level
  457. s_wb16(s, (uint16_t)nominal_peak); // max_pic_average_light_level
  458. return 12;
  459. }
  460. /// 12.1.7 Mastering display colour volume
  461. static size_t mp4_write_mdcv(struct mp4_mux *mux, obs_encoder_t *enc)
  462. {
  463. struct serializer *s = mux->serializer;
  464. video_t *video = obs_encoder_video(enc);
  465. const struct video_output_info *info = video_output_get_info(video);
  466. // Only write atom for HDR video
  467. if (info->colorspace != VIDEO_CS_2100_PQ && info->colorspace != VIDEO_CS_2100_HLG)
  468. return 0;
  469. write_box(s, 32, "mdcv");
  470. float nominal_peak = obs_get_video_hdr_nominal_peak_level();
  471. uint32_t max_lum = (uint32_t)nominal_peak * 10000;
  472. /* Note that these values are hardcoded everywhere in OBS, so these are
  473. * just the same as used in our other muxers/encoders. */
  474. // 3 x display_primaries (x, y) pairs
  475. s_wb16(s, 13250);
  476. s_wb16(s, 34500);
  477. s_wb16(s, 7500);
  478. s_wb16(s, 3000);
  479. s_wb16(s, 34000);
  480. s_wb16(s, 16000);
  481. s_wb16(s, 15635); // white_point_x
  482. s_wb16(s, 16450); // white_point_y
  483. s_wb32(s, max_lum); // max_display_mastering_luminance
  484. s_wb32(s, 0); // min_display_mastering_luminance
  485. return 32;
  486. }
  487. /// ISO/IEC 14496-15 5.4.2.1 AVCSampleEntry
  488. static size_t mp4_write_avc1(struct mp4_mux *mux, obs_encoder_t *enc)
  489. {
  490. struct serializer *s = mux->serializer;
  491. int64_t start = serializer_get_pos(s);
  492. write_box(s, 0, "avc1");
  493. mp4_write_visual_sample_entry(mux, enc);
  494. // avcC
  495. mp4_write_avcC(mux, enc);
  496. // colr
  497. mp4_write_colr(mux, enc);
  498. // pasp
  499. mp4_write_pasp(mux);
  500. return write_box_size(s, start);
  501. }
  502. /// ISO/IEC 14496-15 8.4.1.1 HEVCSampleEntry
  503. static size_t mp4_write_hvc1(struct mp4_mux *mux, obs_encoder_t *enc)
  504. {
  505. struct serializer *s = mux->serializer;
  506. int64_t start = serializer_get_pos(s);
  507. write_box(s, 0, "hvc1");
  508. mp4_write_visual_sample_entry(mux, enc);
  509. // avcC
  510. mp4_write_hvcC(mux, enc);
  511. // colr
  512. mp4_write_colr(mux, enc);
  513. // clli
  514. mp4_write_clli(mux, enc);
  515. // mdcv
  516. mp4_write_mdcv(mux, enc);
  517. // pasp
  518. mp4_write_pasp(mux);
  519. return write_box_size(s, start);
  520. }
  521. /// AV1 ISOBMFF 2.2. AV1 Sample Entry
  522. static size_t mp4_write_av01(struct mp4_mux *mux, obs_encoder_t *enc)
  523. {
  524. struct serializer *s = mux->serializer;
  525. int64_t start = serializer_get_pos(s);
  526. write_box(s, 0, "av01");
  527. mp4_write_visual_sample_entry(mux, enc);
  528. // avcC
  529. mp4_write_av1C(mux, enc);
  530. // colr
  531. mp4_write_colr(mux, enc);
  532. // clli
  533. mp4_write_clli(mux, enc);
  534. // mdcv
  535. mp4_write_mdcv(mux, enc);
  536. // pasp
  537. mp4_write_pasp(mux);
  538. return write_box_size(s, start);
  539. }
  540. static inline void put_descr(struct serializer *s, uint8_t tag, size_t size)
  541. {
  542. int i = 3;
  543. s_w8(s, tag);
  544. for (; i > 0; i--)
  545. s_w8(s, (uint8_t)((size >> (7 * i)) | 0x80));
  546. s_w8(s, size & 0x7F);
  547. }
  548. /// ISO/IEC 14496-14 5.6 ESDBox
  549. static size_t mp4_write_esds(struct mp4_mux *mux, struct mp4_track *track)
  550. {
  551. struct serializer *s = mux->serializer;
  552. int64_t start = serializer_get_pos(s);
  553. write_fullbox(s, 0, "esds", 0, 0);
  554. /* Encoder extradata will be used as DecoderSpecificInfo */
  555. uint8_t *extradata;
  556. size_t extradata_size;
  557. if (!obs_encoder_get_extra_data(track->encoder, &extradata, &extradata_size)) {
  558. extradata_size = 0;
  559. }
  560. /// ISO/IEC 14496-1
  561. // ES_Descriptor
  562. size_t decoder_specific_info_len = extradata_size ? extradata_size + 5 : 0;
  563. put_descr(s, 0x03, 3 + 5 + 13 + decoder_specific_info_len + 5 + 1);
  564. s_wb16(s, track->track_id);
  565. s_w8(s, 0x00); // flags
  566. // DecoderConfigDescriptor
  567. put_descr(s, 0x04, 13 + decoder_specific_info_len);
  568. s_w8(s, 0x40); // codec tag, 0x40 = AAC
  569. s_w8(s, 0x15); // stream type field (0x15 = audio stream)
  570. /* When writing the final MOOV this could theoretically be calculated
  571. * based on chunks, but it's not really all that important. */
  572. uint32_t bitrate = 0;
  573. obs_data_t *settings = obs_encoder_get_settings(track->encoder);
  574. if (settings) {
  575. int64_t enc_bitrate = obs_data_get_int(settings, "bitrate");
  576. if (enc_bitrate)
  577. bitrate = (uint32_t)(enc_bitrate * 1000);
  578. obs_data_release(settings);
  579. }
  580. s_wb24(s, 0); // bufferSizeDB (in bytes)
  581. s_wb32(s, bitrate); // maxbitrate
  582. s_wb32(s, bitrate); // avgBitrate
  583. // DecoderSpecificInfo
  584. if (extradata_size) {
  585. put_descr(s, 0x05, extradata_size);
  586. s_write(s, extradata, extradata_size);
  587. }
  588. // SLConfigDescriptor descriptor
  589. put_descr(s, 0x06, 1);
  590. s_w8(s, 0x02); // 0x2 = reserved for MP4, descriptor is empty
  591. return write_box_size(s, start);
  592. }
  593. /// 12.2.3 Audio Sample Entry
  594. static inline void mp4_write_audio_sample_entry(struct mp4_mux *mux, struct mp4_track *track, uint8_t version)
  595. {
  596. struct serializer *s = mux->serializer;
  597. // SampleEntry Box
  598. s_w8(s, 0); // reserved
  599. s_w8(s, 0);
  600. s_w8(s, 0);
  601. s_w8(s, 0);
  602. s_w8(s, 0);
  603. s_w8(s, 0);
  604. s_wb16(s, 1); // data_reference_index
  605. // AudioSampleEntry Box
  606. if (version == 1) {
  607. s_wb16(s, 1); // entry_version
  608. s_wb16(s, 0); // reserved
  609. s_wb16(s, 0); // reserved
  610. s_wb16(s, 0); // reserved
  611. } else {
  612. s_wb32(s, 0); // reserved
  613. s_wb32(s, 0); // reserved
  614. }
  615. audio_t *audio = obs_encoder_audio(track->encoder);
  616. size_t channels = audio_output_get_channels(audio);
  617. uint32_t sample_rate = track->timescale;
  618. bool alac = track->codec == CODEC_ALAC;
  619. s_wb16(s, (uint32_t)channels); // channelcount
  620. /* OBS FLAC is currently always 16 bit, ALAC always 24, this may change
  621. * in the futrure and should be handled differently then.
  622. * That being said thoes codecs are self-describing so in most cases it
  623. * shouldn't matter either way. */
  624. s_wb16(s, alac ? 24 : 16); // samplesize
  625. s_wb16(s, 0); // pre_defined
  626. s_wb16(s, 0); // reserved
  627. s_wb32(s, sample_rate << 16); // samplerate
  628. }
  629. /// 12.2.4 Channel layout
  630. static size_t mp4_write_chnl(struct mp4_mux *mux, struct mp4_track *track)
  631. {
  632. struct serializer *s = mux->serializer;
  633. int64_t start = serializer_get_pos(s);
  634. write_fullbox(s, 0, "chnl", 0, 0);
  635. audio_t *audio = obs_encoder_audio(track->encoder);
  636. const struct audio_output_info *info = audio_output_get_info(audio);
  637. s_w8(s, 1); // stream_structure (1 = channels)
  638. /* 5.1 and 4.1 do not have a corresponding ISO layout, so we have to
  639. * write a manually created channel map for those. */
  640. uint8_t map[8] = {0};
  641. uint8_t items = 0;
  642. uint8_t defined_layout = 0;
  643. get_speaker_positions(info->speakers, map, &items, &defined_layout);
  644. if (!defined_layout) {
  645. warn("No ISO layout available for speaker layout %d, "
  646. "this may not be supported by all applications!",
  647. info->speakers);
  648. s_w8(s, 0); // definedLayout
  649. s_write(s, map, items); // uint8_t speaker_position[count]
  650. } else {
  651. s_w8(s, defined_layout); // definedLayout
  652. s_wb64(s, 0); // ommitedChannelMap
  653. }
  654. return write_box_size(s, start);
  655. }
  656. /// ISO/IEC 14496-14 5.6 MP4AudioSampleEntry
  657. static size_t mp4_write_mp4a(struct mp4_mux *mux, struct mp4_track *track, uint8_t version)
  658. {
  659. struct serializer *s = mux->serializer;
  660. int64_t start = serializer_get_pos(s);
  661. write_box(s, 0, "mp4a");
  662. mp4_write_audio_sample_entry(mux, track, version);
  663. // esds
  664. mp4_write_esds(mux, track);
  665. /* Write channel layout for version 1 sample entires */
  666. if (version == 1)
  667. mp4_write_chnl(mux, track);
  668. return write_box_size(s, start);
  669. }
  670. /// Encapsulation of FLAC in ISO Base Media File Format 3.3.2 FLAC Specific Box
  671. static size_t mp4_write_dfLa(struct mp4_mux *mux, struct mp4_track *track)
  672. {
  673. struct serializer *s = mux->serializer;
  674. int64_t start = serializer_get_pos(s);
  675. uint8_t *extradata;
  676. size_t extradata_size;
  677. if (!obs_encoder_get_extra_data(track->encoder, &extradata, &extradata_size))
  678. return 0;
  679. write_fullbox(s, 0, "dfLa", 0, 0);
  680. /// FLACMetadataBlock
  681. // LastMetadataBlockFlag (1) | BlockType (0)
  682. s_w8(s, 1 << 7 | 0);
  683. // Length
  684. s_wb24(s, (uint32_t)extradata_size);
  685. // BlockData[Length]
  686. s_write(s, extradata, extradata_size);
  687. return write_box_size(s, start);
  688. }
  689. /// Encapsulation of FLAC in ISO Base Media File Format 3.3.1 FLACSampleEntry
  690. static size_t mp4_write_fLaC(struct mp4_mux *mux, struct mp4_track *track, uint8_t version)
  691. {
  692. struct serializer *s = mux->serializer;
  693. int64_t start = serializer_get_pos(s);
  694. write_box(s, 0, "fLaC");
  695. mp4_write_audio_sample_entry(mux, track, version);
  696. // dfLa
  697. mp4_write_dfLa(mux, track);
  698. if (version == 1)
  699. mp4_write_chnl(mux, track);
  700. return write_box_size(s, start);
  701. }
  702. /// Apple Lossless Format "Magic Cookie" Description - MP4/M4A File
  703. static size_t mp4_write_alac(struct mp4_mux *mux, struct mp4_track *track, uint8_t version)
  704. {
  705. struct serializer *s = mux->serializer;
  706. int64_t start = serializer_get_pos(s);
  707. uint8_t *extradata;
  708. size_t extradata_size;
  709. if (!obs_encoder_get_extra_data(track->encoder, &extradata, &extradata_size))
  710. return 0;
  711. write_box(s, 0, "alac");
  712. mp4_write_audio_sample_entry(mux, track, version);
  713. /* Apple Lossless Magic Cookie */
  714. s_write(s, extradata, extradata_size);
  715. if (version == 1)
  716. mp4_write_chnl(mux, track);
  717. return write_box_size(s, start);
  718. }
  719. /// ISO/IEC 23003-5 5.1 PCM configuration
  720. static size_t mp4_write_pcmc(struct mp4_mux *mux, struct mp4_track *track)
  721. {
  722. struct serializer *s = mux->serializer;
  723. int64_t start = serializer_get_pos(s);
  724. write_fullbox(s, 0, "pcmC", 0, 0);
  725. s_w8(s, 1); // endianness, 1 = little endian
  726. // bits per sample
  727. if (track->codec == CODEC_PCM_I16)
  728. s_w8(s, 16);
  729. else if (track->codec == CODEC_PCM_I24)
  730. s_w8(s, 24);
  731. else if (track->codec == CODEC_PCM_F32)
  732. s_w8(s, 32);
  733. return write_box_size(s, start);
  734. }
  735. /// ISO/IEC 23003-5 5.1 PCM configuration
  736. static size_t mp4_write_xpcm(struct mp4_mux *mux, struct mp4_track *track, uint8_t version)
  737. {
  738. struct serializer *s = mux->serializer;
  739. int64_t start = serializer_get_pos(s);
  740. /* Different box types for floating point and integer PCM*/
  741. write_box(s, 0, track->codec == CODEC_PCM_F32 ? "fpcm" : "ipcm");
  742. mp4_write_audio_sample_entry(mux, track, version);
  743. /* ChannelLayout (chnl) is required for PCM */
  744. mp4_write_chnl(mux, track);
  745. // pcmc
  746. mp4_write_pcmc(mux, track);
  747. return write_box_size(s, start);
  748. }
  749. /// (QTFF/Apple) Text sample description
  750. static size_t mp4_write_text(struct mp4_mux *mux)
  751. {
  752. struct serializer *s = mux->serializer;
  753. int64_t start = serializer_get_pos(s);
  754. write_fullbox(s, 0, "text", 0, 0);
  755. s_wb32(s, 1); // number of entries
  756. /* Preset sample description as used by FFmpeg. */
  757. s_write(s, &TEXT_STUB_HEADER, sizeof(TEXT_STUB_HEADER));
  758. return write_box_size(s, start);
  759. }
  760. static inline uint32_t rl32(const uint8_t *ptr)
  761. {
  762. return (ptr[3] << 24) + (ptr[2] << 16) + (ptr[1] << 8) + ptr[0];
  763. }
  764. static inline uint16_t rl16(const uint8_t *ptr)
  765. {
  766. return (ptr[1] << 8) + ptr[0];
  767. }
  768. /// Encapsulation of Opus in ISO Base Media File Format 4.3.2 Opus Specific Box
  769. static size_t mp4_write_dOps(struct mp4_mux *mux, struct mp4_track *track)
  770. {
  771. struct serializer *s = mux->serializer;
  772. int64_t start = serializer_get_pos(s);
  773. uint8_t *extradata;
  774. size_t extradata_size;
  775. if (!obs_encoder_get_extra_data(track->encoder, &extradata, &extradata_size))
  776. return 0;
  777. write_box(s, 0, "dOps");
  778. s_w8(s, 0); // version
  779. uint8_t channels = *(extradata + 9);
  780. uint8_t channel_map = *(extradata + 18);
  781. s_w8(s, channels); // channel count
  782. // OpusHead is little-endian, but MP4 is big-endian, so we have to swap them here
  783. s_wb16(s, rl16(extradata + 10)); // pre-skip
  784. s_wb32(s, rl32(extradata + 12)); // input sample rate
  785. s_wb16(s, rl16(extradata + 16)); // output gain
  786. s_w8(s, channel_map); // channel mapping family
  787. if (channel_map)
  788. s_write(s, extradata + 19, 2 + channels);
  789. return write_box_size(s, start);
  790. }
  791. /// Encapsulation of Opus in ISO Base Media File Format 4.3.1 Sample entry format
  792. static size_t mp4_write_Opus(struct mp4_mux *mux, struct mp4_track *track, uint8_t version)
  793. {
  794. struct serializer *s = mux->serializer;
  795. int64_t start = serializer_get_pos(s);
  796. write_box(s, 0, "Opus");
  797. mp4_write_audio_sample_entry(mux, track, version);
  798. // dOps
  799. mp4_write_dOps(mux, track);
  800. if (version == 1)
  801. mp4_write_chnl(mux, track);
  802. return write_box_size(s, start);
  803. }
  804. /// 8.5.2 Sample Description Box
  805. static size_t mp4_write_stsd(struct mp4_mux *mux, struct mp4_track *track)
  806. {
  807. struct serializer *s = mux->serializer;
  808. int64_t start = serializer_get_pos(s);
  809. /* Anything but mono or stereo technically requires v1,
  810. * but in practice that doesn't appear to matter. */
  811. uint8_t version = 0;
  812. if (track->type == TRACK_AUDIO) {
  813. audio_t *audio = obs_encoder_audio(track->encoder);
  814. version = audio_output_get_channels(audio) > 2 ? 1 : 0;
  815. }
  816. write_fullbox(s, 0, "stsd", version, 0);
  817. s_wb32(s, 1); // entry_count
  818. // codec specific boxes
  819. if (track->type == TRACK_VIDEO) {
  820. if (track->codec == CODEC_H264)
  821. mp4_write_avc1(mux, track->encoder);
  822. else if (track->codec == CODEC_HEVC)
  823. mp4_write_hvc1(mux, track->encoder);
  824. else if (track->codec == CODEC_AV1)
  825. mp4_write_av01(mux, track->encoder);
  826. } else if (track->type == TRACK_AUDIO) {
  827. if (track->codec == CODEC_AAC)
  828. mp4_write_mp4a(mux, track, version);
  829. else if (track->codec == CODEC_OPUS)
  830. mp4_write_Opus(mux, track, version);
  831. else if (track->codec == CODEC_FLAC)
  832. mp4_write_fLaC(mux, track, version);
  833. else if (track->codec == CODEC_ALAC)
  834. mp4_write_alac(mux, track, version);
  835. else if (track->codec == CODEC_PCM_I16 || track->codec == CODEC_PCM_I24 ||
  836. track->codec == CODEC_PCM_F32)
  837. mp4_write_xpcm(mux, track, version);
  838. } else if (track->type == TRACK_CHAPTERS) {
  839. mp4_write_text(mux);
  840. }
  841. return write_box_size(s, start);
  842. }
  843. /// 8.6.1.2 Decoding Time to Sample Box
  844. static size_t mp4_write_stts(struct mp4_mux *mux, struct mp4_track *track, bool fragmented)
  845. {
  846. struct serializer *s = mux->serializer;
  847. if (fragmented) {
  848. write_fullbox(s, 16, "stts", 0, 0);
  849. s_wb32(s, 0); // entry_count
  850. return 16;
  851. }
  852. int64_t start = serializer_get_pos(s);
  853. struct sample_delta *arr = track->deltas.array;
  854. size_t num = track->deltas.num;
  855. write_fullbox(s, 0, "stts", 0, 0);
  856. s_wb32(s, (uint32_t)num); // entry_count
  857. for (size_t idx = 0; idx < num; idx++) {
  858. struct sample_delta *smp = &arr[idx];
  859. uint64_t delta = util_mul_div64(smp->delta, track->timescale, track->timebase_den);
  860. s_wb32(s, smp->count); // sample_count
  861. s_wb32(s, (uint32_t)delta); // sample_delta
  862. }
  863. return write_box_size(s, start);
  864. }
  865. /// 8.6.2 Sync Sample Box
  866. static size_t mp4_write_stss(struct mp4_mux *mux, struct mp4_track *track)
  867. {
  868. struct serializer *s = mux->serializer;
  869. uint32_t num = (uint32_t)track->sync_samples.num;
  870. if (!num)
  871. return 0;
  872. /* 16 byte FullBox header + 4-bytes (u32) per sync sample */
  873. uint32_t size = 16 + 4 * num;
  874. write_fullbox(s, size, "stss", 0, 0);
  875. s_wb32(s, num); // entry_count
  876. for (size_t idx = 0; idx < num; idx++)
  877. s_wb32(s, track->sync_samples.array[idx]); // sample_number
  878. return size;
  879. }
  880. /// 8.6.1.3 Composition Time to Sample Box
  881. static size_t mp4_write_ctts(struct mp4_mux *mux, struct mp4_track *track)
  882. {
  883. struct serializer *s = mux->serializer;
  884. uint32_t num = (uint32_t)track->offsets.num;
  885. uint8_t version = mux->flags & MP4_USE_NEGATIVE_CTS ? 1 : 0;
  886. /* 16 byte FullBox header + 8-bytes (u32+u32/i32) per offset entry */
  887. uint32_t size = 16 + 8 * num;
  888. write_fullbox(s, size, "ctts", version, 0);
  889. s_wb32(s, num); // entry_count
  890. for (size_t idx = 0; idx < num; idx++) {
  891. int64_t offset = (int64_t)track->offsets.array[idx].offset * (int64_t)track->timescale /
  892. (int64_t)track->timebase_den;
  893. s_wb32(s, track->offsets.array[idx].count); // sample_count
  894. s_wb32(s, (uint32_t)offset); // sample_offset
  895. }
  896. return size;
  897. }
  898. /// 8.7.4 Sample To Chunk Box
  899. static size_t mp4_write_stsc(struct mp4_mux *mux, struct mp4_track *track, bool fragmented)
  900. {
  901. struct serializer *s = mux->serializer;
  902. if (fragmented) {
  903. write_fullbox(s, 16, "stsc", 0, 0);
  904. s_wb32(s, 0); // entry_count
  905. return 16;
  906. }
  907. struct chunk *arr = track->chunks.array;
  908. size_t arr_num = track->chunks.num;
  909. /* Compress into array with counter for repeating chunk sizes */
  910. DARRAY(struct chunk_run {
  911. uint32_t first;
  912. uint32_t samples;
  913. }) chunk_runs;
  914. da_init(chunk_runs);
  915. for (size_t idx = 0; idx < arr_num; idx++) {
  916. struct chunk *chk = &arr[idx];
  917. if (!chunk_runs.num || chunk_runs.array[chunk_runs.num - 1].samples != chk->samples) {
  918. struct chunk_run *cr = da_push_back_new(chunk_runs);
  919. cr->samples = chk->samples;
  920. cr->first = (uint32_t)idx + 1; // ISO-BMFF is 1-indexed
  921. }
  922. }
  923. uint32_t num = (uint32_t)chunk_runs.num;
  924. /* 16 byte FullBox header + 12-bytes (u32+u32+u32) per chunk run */
  925. uint32_t size = 16 + 12 * num;
  926. write_fullbox(s, size, "stsc", 0, 0);
  927. s_wb32(s, num); // entry_count
  928. for (size_t idx = 0; idx < num; idx++) {
  929. struct chunk_run *cr = &chunk_runs.array[idx];
  930. s_wb32(s, cr->first); // first_chunk
  931. s_wb32(s, cr->samples); // samples_per_chunk
  932. s_wb32(s, 1); // sample_description_index
  933. }
  934. da_free(chunk_runs);
  935. return size;
  936. }
  937. /// 8.7.3 Sample Size Boxes
  938. static size_t mp4_write_stsz(struct mp4_mux *mux, struct mp4_track *track, bool fragmented)
  939. {
  940. struct serializer *s = mux->serializer;
  941. if (fragmented) {
  942. write_fullbox(s, 20, "stsz", 0, 0);
  943. s_wb32(s, 0); // sample_size
  944. s_wb32(s, 0); // sample_count
  945. return 20;
  946. }
  947. int64_t start = serializer_get_pos(s);
  948. /* This should only ever happen when recording > 24 hours of
  949. * 48 kHz PCM audio or 828 days of 60 FPS video. */
  950. if (track->samples > UINT32_MAX) {
  951. warn("Track %u has too many samples, its duration may not be "
  952. "read correctly. Remuxing the file to another format such "
  953. "as MKV may be required.",
  954. track->track_id);
  955. }
  956. write_fullbox(s, 0, "stsz", 0, 0);
  957. if (track->sample_size) {
  958. /* Fixed size samples mean we don't need an array */
  959. s_wb32(s, track->sample_size); // sample_size
  960. s_wb32(s, (uint32_t)track->samples); // sample_count
  961. } else {
  962. s_wb32(s, 0); // sample_size
  963. s_wb32(s, (uint32_t)track->sample_sizes.num); // sample_count
  964. for (size_t idx = 0; idx < track->sample_sizes.num; idx++) {
  965. s_wb32(s, track->sample_sizes.array[idx]); // entry_size
  966. }
  967. }
  968. return write_box_size(s, start);
  969. }
  970. /// 8.7.5 Chunk Offset Box
  971. static size_t mp4_write_stco(struct mp4_mux *mux, struct mp4_track *track, bool fragmented)
  972. {
  973. struct serializer *s = mux->serializer;
  974. if (fragmented) {
  975. write_fullbox(s, 16, "stco", 0, 0);
  976. s_wb32(s, 0); // entry_count
  977. return 16;
  978. }
  979. struct chunk *arr = track->chunks.array;
  980. uint32_t num = (uint32_t)track->chunks.num;
  981. uint64_t last_off = arr[num - 1].offset;
  982. uint32_t size;
  983. bool co64 = last_off > UINT32_MAX;
  984. /* When using 64-bit offsets we write 8-bytes (u64) per chunk,
  985. * otherwise 4-bytes (u32). */
  986. if (co64) {
  987. size = 16 + 8 * num;
  988. write_fullbox(s, size, "co64", 0, 0);
  989. } else {
  990. size = 16 + 4 * num;
  991. write_fullbox(s, size, "stco", 0, 0);
  992. }
  993. s_wb32(s, num); // entry_count
  994. for (size_t idx = 0; idx < num; idx++) {
  995. if (co64)
  996. s_wb64(s, arr[idx].offset); // chunk_offset
  997. else
  998. s_wb32(s, (uint32_t)arr[idx].offset); // chunk_offset
  999. }
  1000. return size;
  1001. }
  1002. /// 8.9.3 Sample Group Description Box
  1003. static size_t mp4_write_sgpd_aac(struct mp4_mux *mux)
  1004. {
  1005. struct serializer *s = mux->serializer;
  1006. int64_t start = serializer_get_pos(s);
  1007. write_fullbox(s, 0, "sgpd", 1, 0);
  1008. s_write(s, "roll", 4); // grouping_tpye
  1009. s_wb32(s, 2); // default_length (i16)
  1010. s_wb32(s, 1); // entry_count
  1011. // AudioRollRecoveryEntry
  1012. s_wb16(s, -1); // roll_distance
  1013. return write_box_size(s, start);
  1014. }
  1015. /// 8.9.2 Sample to Group Box
  1016. static size_t mp4_write_sbgp_aac(struct mp4_mux *mux, struct mp4_track *track)
  1017. {
  1018. struct serializer *s = mux->serializer;
  1019. int64_t start = serializer_get_pos(s);
  1020. write_fullbox(s, 0, "sbgp", 0, 0);
  1021. /// 10.1 AudioRollRecoveryEntry
  1022. s_write(s, "roll", 4); // grouping_tpye
  1023. s_wb32(s, 1); // entry_count
  1024. s_wb32(s, (uint32_t)track->samples); // sample_count
  1025. s_wb32(s, 1); // group_description_index
  1026. return write_box_size(s, start);
  1027. }
  1028. static size_t mp4_write_sbgp_sbgp_opus(struct mp4_mux *mux, struct mp4_track *track)
  1029. {
  1030. struct serializer *s = mux->serializer;
  1031. int64_t start = serializer_get_pos(s);
  1032. /// 8.9.3 Sample Group Description Box
  1033. write_fullbox(s, 0, "sgpd", 1, 0);
  1034. s_write(s, "roll", 4); // grouping_tpye
  1035. s_wb32(s, 2); // default_length (i16)
  1036. /* Opus requires 80 ms of preroll, which at 48 kHz is 3840 PCM samples */
  1037. const int64_t opus_preroll = 3840;
  1038. /* Compute the preroll samples (should be 4, each being 20 ms) */
  1039. uint16_t preroll_count = 0;
  1040. int64_t preroll_remaining = opus_preroll;
  1041. for (size_t i = 0; i < track->deltas.num && preroll_remaining > 0; i++) {
  1042. for (uint32_t j = 0; j < track->deltas.array[i].count && preroll_remaining > 0; j++) {
  1043. preroll_remaining -= track->deltas.array[i].delta;
  1044. preroll_count++;
  1045. }
  1046. }
  1047. s_wb32(s, 1); // entry_count
  1048. /// 10.1 AudioRollRecoveryEntry
  1049. s_wb16(s, -preroll_count); // roll_distance
  1050. size_t size_sgpd = write_box_size(s, start);
  1051. /* --------------- */
  1052. /// 8.9.2 Sample to Group Box
  1053. start = serializer_get_pos(s);
  1054. write_fullbox(s, 0, "sbgp", 0, 0);
  1055. s_write(s, "roll", 4); // grouping_tpye
  1056. s_wb32(s, 2); // entry_count
  1057. // entry 0
  1058. s_wb32(s, preroll_count); // sample_count
  1059. s_wb32(s, 0); // group_description_index
  1060. // entry 1
  1061. s_wb32(s, (uint32_t)track->samples - preroll_count); // sample_count
  1062. s_wb32(s, 1); // group_description_index
  1063. return size_sgpd + write_box_size(s, start);
  1064. }
  1065. /// 8.5.1 Sample Table Box
  1066. static size_t mp4_write_stbl(struct mp4_mux *mux, struct mp4_track *track, bool fragmented)
  1067. {
  1068. struct serializer *s = mux->serializer;
  1069. int64_t start = serializer_get_pos(s);
  1070. write_box(s, 0, "stbl");
  1071. // stsd
  1072. mp4_write_stsd(mux, track);
  1073. // stts
  1074. mp4_write_stts(mux, track, fragmented);
  1075. // stss (non-fragmented only)
  1076. if (track->type == TRACK_VIDEO && !fragmented)
  1077. mp4_write_stss(mux, track);
  1078. // ctts (non-fragmented only)
  1079. if (track->needs_ctts && !fragmented)
  1080. mp4_write_ctts(mux, track);
  1081. // stsc
  1082. mp4_write_stsc(mux, track, fragmented);
  1083. // stsz
  1084. mp4_write_stsz(mux, track, fragmented);
  1085. // stco
  1086. mp4_write_stco(mux, track, fragmented);
  1087. if (!fragmented) {
  1088. /* AAC and Opus require a pre-roll to get correct decoder
  1089. * output, sgpd and sbgp are used to create a "roll" group. */
  1090. if (track->codec == CODEC_AAC) {
  1091. // sgpd
  1092. mp4_write_sgpd_aac(mux);
  1093. // sbgp
  1094. mp4_write_sbgp_aac(mux, track);
  1095. } else if (track->codec == CODEC_OPUS) {
  1096. // sgpd + sbgp
  1097. mp4_write_sbgp_sbgp_opus(mux, track);
  1098. }
  1099. }
  1100. return write_box_size(s, start);
  1101. }
  1102. /// 8.7.2.2 DataEntryUrlBox
  1103. static size_t mp4_write_url(struct mp4_mux *mux)
  1104. {
  1105. struct serializer *s = mux->serializer;
  1106. int64_t start = serializer_get_pos(s);
  1107. write_fullbox(s, 0, "url ", 0, 1);
  1108. /* empty, flag 1 means data is in this file */
  1109. return write_box_size(s, start);
  1110. }
  1111. /// 8.7.2 Data Reference Box
  1112. static size_t mp4_write_dref(struct mp4_mux *mux)
  1113. {
  1114. struct serializer *s = mux->serializer;
  1115. int64_t start = serializer_get_pos(s);
  1116. write_fullbox(s, 0, "dref ", 0, 0);
  1117. s_wb32(s, 1); // entry_count
  1118. mp4_write_url(mux);
  1119. return write_box_size(s, start);
  1120. }
  1121. /// 8.7.1 Data Information Box
  1122. static size_t mp4_write_dinf(struct mp4_mux *mux)
  1123. {
  1124. struct serializer *s = mux->serializer;
  1125. int64_t start = serializer_get_pos(s);
  1126. write_box(s, 0, "dinf");
  1127. mp4_write_dref(mux);
  1128. return write_box_size(s, start);
  1129. }
  1130. /// 8.4.4 Media Information Box
  1131. static size_t mp4_write_minf(struct mp4_mux *mux, struct mp4_track *track, bool fragmented)
  1132. {
  1133. struct serializer *s = mux->serializer;
  1134. int64_t start = serializer_get_pos(s);
  1135. write_box(s, 0, "minf");
  1136. // vmhd/smhd/gmhd
  1137. if (track->type == TRACK_VIDEO)
  1138. mp4_write_vmhd(mux);
  1139. else if (track->type == TRACK_CHAPTERS)
  1140. mp4_write_gmhd(mux);
  1141. else
  1142. mp4_write_smhd(mux);
  1143. // dinf, unnecessary but mandatory
  1144. mp4_write_dinf(mux);
  1145. // stbl
  1146. mp4_write_stbl(mux, track, fragmented);
  1147. return write_box_size(s, start);
  1148. }
  1149. /// 8.4.1 Media Box
  1150. static size_t mp4_write_mdia(struct mp4_mux *mux, struct mp4_track *track, bool fragmented)
  1151. {
  1152. struct serializer *s = mux->serializer;
  1153. int64_t start = serializer_get_pos(s);
  1154. write_box(s, 0, "mdia");
  1155. // mdhd
  1156. mp4_write_mdhd(mux, track);
  1157. // hdlr
  1158. mp4_write_hdlr(mux, track);
  1159. // minf
  1160. mp4_write_minf(mux, track, fragmented);
  1161. return write_box_size(s, start);
  1162. }
  1163. /// (QTFF/Apple) User data atom
  1164. static size_t mp4_write_udta_atom(struct mp4_mux *mux, const char tag[4], const char *val)
  1165. {
  1166. struct serializer *s = mux->serializer;
  1167. int64_t start = serializer_get_pos(s);
  1168. write_box(s, 0, tag);
  1169. s_write(s, val, strlen(val));
  1170. return write_box_size(s, start);
  1171. }
  1172. /// 8.10.1 User Data Box
  1173. static size_t mp4_write_track_udta(struct mp4_mux *mux, struct mp4_track *track)
  1174. {
  1175. struct serializer *s = mux->serializer;
  1176. int64_t start = serializer_get_pos(s);
  1177. write_box(s, 0, "udta");
  1178. /* Our udta box contains QuickTime format user data atoms, which are
  1179. * simple key-value pairs. Some are prefixed with 0xa9. */
  1180. const char *name = obs_encoder_get_name(track->encoder);
  1181. if (name)
  1182. mp4_write_udta_atom(mux, "name", name);
  1183. if (mux->flags & MP4_WRITE_ENCODER_INFO) {
  1184. const char *id = obs_encoder_get_id(track->encoder);
  1185. if (name)
  1186. mp4_write_udta_atom(mux, "\251enc", id);
  1187. obs_data_t *settings = obs_encoder_get_settings(track->encoder);
  1188. if (settings) {
  1189. const char *json = obs_data_get_json_with_defaults(settings);
  1190. mp4_write_udta_atom(mux, "json", json);
  1191. obs_data_release(settings);
  1192. }
  1193. }
  1194. return write_box_size(s, start);
  1195. }
  1196. /// 8.6.6 Edit List Box
  1197. static size_t mp4_write_elst(struct mp4_mux *mux, struct mp4_track *track)
  1198. {
  1199. struct serializer *s = mux->serializer;
  1200. int64_t start = serializer_get_pos(s);
  1201. write_fullbox(s, 0, "elst", 0, 0);
  1202. s_wb32(s, 1); // entry count
  1203. uint64_t duration = util_mul_div64(track->duration, 1000, track->timebase_den);
  1204. uint64_t delay = 0;
  1205. if (track->type == TRACK_VIDEO && !(mux->flags & MP4_USE_NEGATIVE_CTS)) {
  1206. /* Compensate for frame-reordering delay (for example, when
  1207. * using b-frames). */
  1208. int64_t dts_offset = 0;
  1209. if (track->offsets.num) {
  1210. struct sample_offset sample = track->offsets.array[0];
  1211. dts_offset = sample.offset;
  1212. } else if (track->packets.size) {
  1213. /* If no offset data exists yet (i.e. when writing the
  1214. * incomplete moov in a fragmented file) use the raw
  1215. * data from the current queued packets instead. */
  1216. struct encoder_packet pkt;
  1217. deque_peek_front(&track->packets, &pkt, sizeof(pkt));
  1218. dts_offset = pkt.pts - pkt.dts;
  1219. }
  1220. delay = util_mul_div64(dts_offset, track->timescale, track->timebase_den);
  1221. } else if (track->type == TRACK_AUDIO && track->first_pts < 0) {
  1222. delay = util_mul_div64(llabs(track->first_pts), track->timescale, track->timebase_den);
  1223. /* Subtract priming delay from total duration */
  1224. duration -= util_mul_div64(delay, 1000, track->timescale);
  1225. }
  1226. s_wb32(s, (uint32_t)duration); // segment_duration (movie timescale)
  1227. s_wb32(s, (uint32_t)delay); // media_time (track timescale)
  1228. s_wb32(s, 1 << 16); // media_rate
  1229. return write_box_size(s, start);
  1230. }
  1231. /// 8.6.5 Edit Box
  1232. static size_t mp4_write_edts(struct mp4_mux *mux, struct mp4_track *track)
  1233. {
  1234. struct serializer *s = mux->serializer;
  1235. int64_t start = serializer_get_pos(s);
  1236. write_box(s, 0, "edts");
  1237. mp4_write_elst(mux, track);
  1238. return write_box_size(s, start);
  1239. }
  1240. /// 8.3.3.2 TrackReferenceTypeBox
  1241. static size_t mp4_write_chap(struct mp4_mux *mux)
  1242. {
  1243. struct serializer *s = mux->serializer;
  1244. int64_t start = serializer_get_pos(s);
  1245. /// QTFF/Apple chapter track reference
  1246. write_box(s, 0, "chap");
  1247. s_wb32(s, mux->chapter_track->track_id);
  1248. return write_box_size(s, start);
  1249. }
  1250. /// 8.3.3 Track Reference Box
  1251. static size_t mp4_write_tref(struct mp4_mux *mux)
  1252. {
  1253. struct serializer *s = mux->serializer;
  1254. int64_t start = serializer_get_pos(s);
  1255. write_box(s, 0, "tref");
  1256. mp4_write_chap(mux);
  1257. return write_box_size(s, start);
  1258. }
  1259. /// 8.3.1 Track Box
  1260. static size_t mp4_write_trak(struct mp4_mux *mux, struct mp4_track *track, bool fragmented)
  1261. {
  1262. struct serializer *s = mux->serializer;
  1263. int64_t start = serializer_get_pos(s);
  1264. /* If track has no data, omit it from full moov. */
  1265. if (!fragmented && !track->chunks.num)
  1266. return 0;
  1267. write_box(s, 0, "trak");
  1268. // tkhd
  1269. mp4_write_tkhd(mux, track);
  1270. // edts
  1271. mp4_write_edts(mux, track);
  1272. // tref
  1273. if (mux->chapter_track && track->type != TRACK_CHAPTERS)
  1274. mp4_write_tref(mux);
  1275. // mdia
  1276. mp4_write_mdia(mux, track, fragmented);
  1277. // udta (audio track name mainly)
  1278. mp4_write_track_udta(mux, track);
  1279. return write_box_size(s, start);
  1280. }
  1281. /// 8.8.3 Track Extends Box
  1282. static size_t mp4_write_trex(struct mp4_mux *mux, uint32_t track_id)
  1283. {
  1284. struct serializer *s = mux->serializer;
  1285. write_fullbox(s, 32, "trex", 0, 0);
  1286. s_wb32(s, track_id); // track_ID
  1287. s_wb32(s, 1); // default_sample_description_index
  1288. s_wb32(s, 0); // default_sample_duration
  1289. s_wb32(s, 0); // default_sample_size
  1290. s_wb32(s, 0); // default_sample_flags
  1291. return 32;
  1292. }
  1293. /// 8.8.1 Movie Extends Box
  1294. static size_t mp4_write_mvex(struct mp4_mux *mux)
  1295. {
  1296. struct serializer *s = mux->serializer;
  1297. int64_t start = serializer_get_pos(s);
  1298. write_box(s, 0, "mvex");
  1299. for (size_t track_id = 0; track_id < mux->tracks.num; track_id++)
  1300. mp4_write_trex(mux, (uint32_t)(track_id + 1));
  1301. return write_box_size(s, start);
  1302. }
  1303. /// (QTFF/Apple) Undocumented QuickTime/iTunes metadata handler
  1304. static size_t mp4_write_itunes_hdlr(struct mp4_mux *mux)
  1305. {
  1306. struct serializer *s = mux->serializer;
  1307. write_fullbox(s, 33, "hdlr", 0, 0);
  1308. s_wb32(s, 0); // pre_defined
  1309. s_write(s, "mdir", 4); // handler_type
  1310. // reserved
  1311. s_write(s, "appl", 4);
  1312. s_wb32(s, 0);
  1313. s_wb32(s, 0);
  1314. s_w8(s, 0); // name (NULL)
  1315. return 33;
  1316. }
  1317. /// (QTFF/Apple) Data atom
  1318. static size_t mp4_write_data_atom(struct mp4_mux *mux, const char *data)
  1319. {
  1320. struct serializer *s = mux->serializer;
  1321. size_t len = strlen(data);
  1322. uint32_t size = 16 + (uint32_t)len;
  1323. write_box(s, size, "data");
  1324. s_wb32(s, 1); // type, 1 = utf-8 string
  1325. s_wb32(s, 0); // locale, 0 = default
  1326. s_write(s, data, len);
  1327. return size;
  1328. }
  1329. /// (QTFF/Apple) Metadata item atom
  1330. static size_t mp4_write_ilst_item_atom(struct mp4_mux *mux, const char name[4], const char *value)
  1331. {
  1332. struct serializer *s = mux->serializer;
  1333. int64_t start = serializer_get_pos(s);
  1334. write_box(s, 0, name);
  1335. mp4_write_data_atom(mux, value);
  1336. return write_box_size(s, start);
  1337. }
  1338. /// (QTFF/Apple) Metadata item list atom
  1339. static size_t mp4_write_ilst(struct mp4_mux *mux)
  1340. {
  1341. struct serializer *s = mux->serializer;
  1342. struct dstr value = {0};
  1343. int64_t start = serializer_get_pos(s);
  1344. write_box(s, 0, "ilst");
  1345. /* Encoder name */
  1346. dstr_cat(&value, "OBS Studio (");
  1347. dstr_cat(&value, obs_get_version_string());
  1348. dstr_cat(&value, ")");
  1349. /* Some QuickTime keys are prefixed with 0xa9 */
  1350. mp4_write_ilst_item_atom(mux, "\251too", value.array);
  1351. dstr_free(&value);
  1352. return write_box_size(s, start);
  1353. }
  1354. /// (QTFF/Apple) Key value metadata handler
  1355. static size_t mp4_write_mdta_hdlr(struct mp4_mux *mux)
  1356. {
  1357. struct serializer *s = mux->serializer;
  1358. write_fullbox(s, 33, "hdlr", 0, 0);
  1359. s_wb32(s, 0); // pre_defined
  1360. s_write(s, "mdta", 4); // handler_type
  1361. // reserved
  1362. s_wb32(s, 0);
  1363. s_wb32(s, 0);
  1364. s_wb32(s, 0);
  1365. s_w8(s, 0); // name (NULL)
  1366. return 33;
  1367. }
  1368. /// (QTFF/Apple) Metadata item keys atom
  1369. static size_t mp4_write_mdta_keys(struct mp4_mux *mux, obs_data_t *meta)
  1370. {
  1371. struct serializer *s = mux->serializer;
  1372. int64_t start = serializer_get_pos(s);
  1373. write_fullbox(s, 0, "keys", 0, 0);
  1374. uint32_t count = 0;
  1375. int64_t count_pos = serializer_get_pos(s);
  1376. s_wb32(s, count); // count
  1377. obs_data_item_t *item = obs_data_first(meta);
  1378. for (; item != NULL; obs_data_item_next(&item)) {
  1379. const char *name = obs_data_item_get_name(item);
  1380. size_t len = strlen(name);
  1381. /* name is key type, can be udta or mdta */
  1382. write_box(s, len + 8, "mdta");
  1383. s_write(s, name, len); // key name
  1384. count++;
  1385. }
  1386. int64_t end = serializer_get_pos(s);
  1387. /* Overwrite count with correct value */
  1388. serializer_seek(s, count_pos, SERIALIZE_SEEK_START);
  1389. s_wb32(s, count);
  1390. serializer_seek(s, end, SERIALIZE_SEEK_START);
  1391. return write_box_size(s, start);
  1392. }
  1393. /// (QTFF/Apple) Metadata item atom, but name is an index instead
  1394. static inline void write_key_entry(struct mp4_mux *mux, obs_data_item_t *item, uint32_t idx)
  1395. {
  1396. struct serializer *s = mux->serializer;
  1397. int64_t start = serializer_get_pos(s);
  1398. s_wb32(s, 0); // size
  1399. s_wb32(s, idx); // index
  1400. mp4_write_data_atom(mux, obs_data_item_get_string(item));
  1401. write_box_size(s, start);
  1402. }
  1403. /// (QTFF/Apple) Metadata item list atom
  1404. static size_t mp4_write_mdta_ilst(struct mp4_mux *mux, obs_data_t *meta)
  1405. {
  1406. struct serializer *s = mux->serializer;
  1407. int64_t start = serializer_get_pos(s);
  1408. write_box(s, 0, "ilst");
  1409. /* indices start with 1 */
  1410. uint32_t key_idx = 1;
  1411. obs_data_item_t *item = obs_data_first(meta);
  1412. for (; item != NULL; obs_data_item_next(&item)) {
  1413. write_key_entry(mux, item, key_idx);
  1414. key_idx++;
  1415. }
  1416. return write_box_size(s, start);
  1417. }
  1418. static void mp4_write_mdta_kv(struct mp4_mux *mux)
  1419. {
  1420. struct dstr value = {0};
  1421. obs_data_t *meta = obs_data_create();
  1422. dstr_cat(&value, "OBS Studio (");
  1423. dstr_cat(&value, obs_get_version_string());
  1424. dstr_cat(&value, ")");
  1425. // ToDo figure out what else we could put in here for fun and profit :)
  1426. obs_data_set_string(meta, "tool", value.array);
  1427. /* Write keys */
  1428. mp4_write_mdta_keys(mux, meta);
  1429. /* Write values */
  1430. mp4_write_mdta_ilst(mux, meta);
  1431. obs_data_release(meta);
  1432. dstr_free(&value);
  1433. }
  1434. /// 8.11.1 The Meta box
  1435. static size_t mp4_write_meta(struct mp4_mux *mux)
  1436. {
  1437. struct serializer *s = mux->serializer;
  1438. int64_t start = serializer_get_pos(s);
  1439. write_fullbox(s, 0, "meta", 0, 0);
  1440. if (mux->flags & MP4_USE_MDTA_KEY_VALUE) {
  1441. mp4_write_mdta_hdlr(mux);
  1442. mp4_write_mdta_kv(mux);
  1443. } else {
  1444. mp4_write_itunes_hdlr(mux);
  1445. mp4_write_ilst(mux);
  1446. }
  1447. return write_box_size(s, start);
  1448. }
  1449. /// 8.10.1 User Data Box
  1450. static size_t mp4_write_udta(struct mp4_mux *mux)
  1451. {
  1452. struct serializer *s = mux->serializer;
  1453. int64_t start = serializer_get_pos(s);
  1454. write_box(s, 0, "udta");
  1455. /* Normally metadata would be directly in the moov, but since this is
  1456. * Apple/QTFF format metadata it is inside udta. */
  1457. // meta
  1458. mp4_write_meta(mux);
  1459. return write_box_size(s, start);
  1460. }
  1461. /// Movie Box (8.2.1)
  1462. static size_t mp4_write_moov(struct mp4_mux *mux, bool fragmented)
  1463. {
  1464. struct serializer *s = mux->serializer;
  1465. int64_t start = serializer_get_pos(s);
  1466. write_box(s, 0, "moov");
  1467. mp4_write_mvhd(mux);
  1468. // trak(s)
  1469. for (size_t i = 0; i < mux->tracks.num; i++) {
  1470. struct mp4_track *track = &mux->tracks.array[i];
  1471. mp4_write_trak(mux, track, fragmented);
  1472. }
  1473. if (!fragmented && mux->chapter_track)
  1474. mp4_write_trak(mux, mux->chapter_track, false);
  1475. // mvex
  1476. if (fragmented)
  1477. mp4_write_mvex(mux);
  1478. // udta (metadata)
  1479. mp4_write_udta(mux);
  1480. return write_box_size(s, start);
  1481. }
  1482. /* ========================================================================== */
  1483. /* moof (fragment header) stuff */
  1484. /// 8.8.5 Movie Fragment Header Box
  1485. static size_t mp4_write_mfhd(struct mp4_mux *mux)
  1486. {
  1487. struct serializer *s = mux->serializer;
  1488. write_fullbox(s, 16, "mfhd", 0, 0);
  1489. s_wb32(s, mux->fragments_written); // sequence_number
  1490. return 16;
  1491. }
  1492. /// 8.8.7 Track Fragment Header Box
  1493. static size_t mp4_write_tfhd(struct mp4_mux *mux, struct mp4_track *track, size_t moof_start)
  1494. {
  1495. struct serializer *s = mux->serializer;
  1496. int64_t start = serializer_get_pos(s);
  1497. uint32_t flags = BASE_DATA_OFFSET_PRESENT | DEFAULT_SAMPLE_FLAGS_PRESENT;
  1498. /* Add default size/duration if all samples match. */
  1499. bool durations_match = true;
  1500. bool sizes_match = true;
  1501. uint32_t duration;
  1502. uint32_t sample_size;
  1503. if (track->sample_size) {
  1504. duration = 1;
  1505. sample_size = track->sample_size;
  1506. } else {
  1507. duration = track->fragment_samples.array[0].duration;
  1508. sample_size = track->fragment_samples.array[0].size;
  1509. for (size_t idx = 1; idx < track->fragment_samples.num; idx++) {
  1510. uint32_t frag_duration = track->fragment_samples.array[idx].duration;
  1511. uint32_t frag_size = track->fragment_samples.array[idx].size;
  1512. durations_match = frag_duration == duration;
  1513. sizes_match = frag_size == sample_size;
  1514. }
  1515. }
  1516. if (durations_match)
  1517. flags |= DEFAULT_SAMPLE_DURATION_PRESENT;
  1518. if (sizes_match)
  1519. flags |= DEFAULT_SAMPLE_SIZE_PRESENT;
  1520. write_fullbox(s, 0, "tfhd", 0, flags);
  1521. s_wb32(s, track->track_id); // track_ID
  1522. s_wb64(s, moof_start); // base_data_offset
  1523. // default_sample_duration
  1524. if (durations_match) {
  1525. if (track->type == TRACK_VIDEO) {
  1526. /* Convert duration to track timescale */
  1527. duration = (uint32_t)util_mul_div64(duration, track->timescale, track->timebase_den);
  1528. }
  1529. s_wb32(s, duration);
  1530. }
  1531. // default_sample_size
  1532. if (sizes_match)
  1533. s_wb32(s, sample_size);
  1534. // default_sample_flags
  1535. if (track->type == TRACK_VIDEO) {
  1536. s_wb32(s, SAMPLE_FLAG_DEPENDS_YES | SAMPLE_FLAG_IS_NON_SYNC);
  1537. } else {
  1538. s_wb32(s, SAMPLE_FLAG_DEPENDS_NO);
  1539. }
  1540. return write_box_size(s, start);
  1541. }
  1542. /// 8.8.12 Track fragment decode time
  1543. static size_t mp4_write_tfdt(struct mp4_mux *mux, struct mp4_track *track)
  1544. {
  1545. struct serializer *s = mux->serializer;
  1546. write_fullbox(s, 20, "tfdt", 1, 0);
  1547. /* Subtract samples that are not written yet */
  1548. uint64_t duration_written = track->duration;
  1549. for (size_t i = 0; i < track->fragment_samples.num; i++)
  1550. duration_written -= track->fragment_samples.array[i].duration;
  1551. if (track->type == TRACK_VIDEO) {
  1552. /* Convert to track timescale */
  1553. duration_written = util_mul_div64(duration_written, track->timescale, track->timebase_den);
  1554. }
  1555. s_wb64(s, duration_written); // baseMediaDecodeTime
  1556. return 20;
  1557. }
  1558. /// 8.8.8 Track Fragment Run Box
  1559. static size_t mp4_write_trun(struct mp4_mux *mux, struct mp4_track *track, uint32_t moof_size,
  1560. uint64_t *samples_mdat_offset)
  1561. {
  1562. struct serializer *s = mux->serializer;
  1563. int64_t start = serializer_get_pos(s);
  1564. uint32_t flags = DATA_OFFSET_PRESENT;
  1565. if (!track->sample_size)
  1566. flags |= SAMPLE_SIZE_PRESENT;
  1567. if (track->type == TRACK_VIDEO) {
  1568. flags |= FIRST_SAMPLE_FLAGS_PRESENT;
  1569. flags |= SAMPLE_COMPOSITION_TIME_OFFSETS_PRESENT;
  1570. }
  1571. uint8_t version = mux->flags & MP4_USE_NEGATIVE_CTS ? 1 : 0;
  1572. write_fullbox(s, 0, "trun", version, flags);
  1573. /* moof_size + 8 bytes for mdat header + offset into mdat box data */
  1574. size_t data_offset = moof_size + 8 + *samples_mdat_offset;
  1575. size_t sample_count = track->fragment_samples.num;
  1576. if (track->sample_size) {
  1577. /* Update count based on fixed size */
  1578. size_t total_size = 0;
  1579. for (size_t i = 0; i < sample_count; i++)
  1580. total_size += track->fragment_samples.array[i].size;
  1581. *samples_mdat_offset += total_size;
  1582. sample_count = total_size / track->sample_size;
  1583. }
  1584. s_wb32(s, (uint32_t)sample_count); // sample_count
  1585. s_wb32(s, (uint32_t)data_offset); // data_offset
  1586. /* If we have a fixed sample size (PCM audio) we only need to write
  1587. * the sample count and offset. */
  1588. if (track->sample_size)
  1589. return write_box_size(s, start);
  1590. if (track->type == TRACK_VIDEO)
  1591. s_wb32(s, SAMPLE_FLAG_DEPENDS_NO); // first_sample_flags
  1592. for (size_t idx = 0; idx < sample_count; idx++) {
  1593. struct fragment_sample *smp = &track->fragment_samples.array[idx];
  1594. s_wb32(s, smp->size); // sample_size
  1595. if (track->type == TRACK_VIDEO) {
  1596. // sample_composition_time_offset
  1597. int64_t offset =
  1598. (int64_t)smp->offset * (int64_t)track->timescale / (int64_t)track->timebase_den;
  1599. s_wb32(s, (uint32_t)offset);
  1600. }
  1601. *samples_mdat_offset += smp->size;
  1602. }
  1603. return write_box_size(s, start);
  1604. }
  1605. /// 8.8.6 Track Fragment Box
  1606. static size_t mp4_write_traf(struct mp4_mux *mux, struct mp4_track *track, int64_t moof_start, uint32_t moof_size,
  1607. uint64_t *samples_mdat_offset)
  1608. {
  1609. struct serializer *s = mux->serializer;
  1610. int64_t start = serializer_get_pos(s);
  1611. write_box(s, 0, "traf");
  1612. // tfhd
  1613. mp4_write_tfhd(mux, track, moof_start);
  1614. // tfdt
  1615. mp4_write_tfdt(mux, track);
  1616. // trun
  1617. mp4_write_trun(mux, track, moof_size, samples_mdat_offset);
  1618. return write_box_size(s, start);
  1619. }
  1620. /// 8.8.4 Movie Fragment Box
  1621. static size_t mp4_write_moof(struct mp4_mux *mux, uint32_t moof_size, int64_t moof_start)
  1622. {
  1623. struct serializer *s = mux->serializer;
  1624. int64_t start = serializer_get_pos(s);
  1625. write_box(s, 0, "moof");
  1626. mp4_write_mfhd(mux);
  1627. /* Track current mdat offset across tracks */
  1628. uint64_t samples_mdat_offset = 0;
  1629. // traf boxes
  1630. for (size_t i = 0; i < mux->tracks.num; i++) {
  1631. struct mp4_track *track = &mux->tracks.array[i];
  1632. /* Skip tracks that do not have any samples */
  1633. if (!track->fragment_samples.num)
  1634. continue;
  1635. mp4_write_traf(mux, track, moof_start, moof_size, &samples_mdat_offset);
  1636. }
  1637. return write_box_size(s, start);
  1638. }
  1639. /* ========================================================================== */
  1640. /* Chapter packets */
  1641. static void mp4_create_chapter_pkt(struct encoder_packet *pkt, int64_t dts_usec, const char *name)
  1642. {
  1643. int64_t dts = dts_usec / 1000; // chapter track uses a ms timebase
  1644. pkt->pts = dts;
  1645. pkt->dts = dts;
  1646. pkt->dts_usec = dts_usec;
  1647. pkt->timebase_num = 1;
  1648. pkt->timebase_den = 1000;
  1649. /* Serialize with data with ref count */
  1650. struct serializer s;
  1651. struct array_output_data ao;
  1652. array_output_serializer_init(&s, &ao);
  1653. size_t len = min(strlen(name), UINT16_MAX);
  1654. long refs = 1;
  1655. /* encoder_packet refs */
  1656. s_write(&s, &refs, sizeof(refs));
  1657. /* actual packet data */
  1658. s_wb16(&s, (uint16_t)len);
  1659. s_write(&s, name, len);
  1660. s_write(&s, &CHAPTER_PKT_FOOTER, sizeof(CHAPTER_PKT_FOOTER));
  1661. pkt->data = (void *)(ao.bytes.array + sizeof(long));
  1662. pkt->size = ao.bytes.num - sizeof(long);
  1663. }
  1664. /* ========================================================================== */
  1665. /* Encoder packet processing and fragment writer */
  1666. static inline int64_t packet_pts_usec(struct encoder_packet *packet)
  1667. {
  1668. return packet->pts * 1000000 / packet->timebase_den;
  1669. }
  1670. static inline struct encoder_packet *get_pkt_at(struct deque *dq, size_t idx)
  1671. {
  1672. return deque_data(dq, idx * sizeof(struct encoder_packet));
  1673. }
  1674. static inline uint64_t get_longest_track_duration(struct mp4_mux *mux)
  1675. {
  1676. uint64_t dur = 0;
  1677. for (size_t i = 0; i < mux->tracks.num; i++) {
  1678. struct mp4_track *track = &mux->tracks.array[i];
  1679. uint64_t track_dur = util_mul_div64(track->duration, 1000, track->timebase_den);
  1680. if (track_dur > dur)
  1681. dur = track_dur;
  1682. }
  1683. return dur;
  1684. }
  1685. static void process_packets(struct mp4_mux *mux, struct mp4_track *track, uint64_t *mdat_size)
  1686. {
  1687. size_t count = track->packets.size / sizeof(struct encoder_packet);
  1688. if (!count)
  1689. return;
  1690. /* Only iterate upt to penultimate packet so we can determine duration
  1691. * for all processed packets. */
  1692. for (size_t i = 0; i < count - 1; i++) {
  1693. struct encoder_packet *pkt = get_pkt_at(&track->packets, i);
  1694. if (mux->next_frag_pts && packet_pts_usec(pkt) >= mux->next_frag_pts)
  1695. break;
  1696. struct encoder_packet *next = get_pkt_at(&track->packets, i + 1);
  1697. /* Duration is just distance between current and next DTS. */
  1698. uint32_t duration = (uint32_t)(next->dts - pkt->dts);
  1699. uint32_t sample_count = 1;
  1700. uint32_t size = (uint32_t)pkt->size;
  1701. int32_t offset = (int32_t)(pkt->pts - pkt->dts);
  1702. /* When using negative CTS, subtract DTS-PTS offset. */
  1703. if (track->type == TRACK_VIDEO && mux->flags & MP4_USE_NEGATIVE_CTS) {
  1704. if (!track->offsets.num)
  1705. track->dts_offset = offset;
  1706. offset -= track->dts_offset;
  1707. }
  1708. /* Create temporary sample information for moof */
  1709. struct fragment_sample *smp = da_push_back_new(track->fragment_samples);
  1710. smp->size = size;
  1711. smp->offset = offset;
  1712. smp->duration = duration;
  1713. *mdat_size += size;
  1714. /* Update global sample information for full moov */
  1715. track->duration += duration;
  1716. if (track->sample_size) {
  1717. /* Adjust duration/count for fixed sample size */
  1718. sample_count = size / track->sample_size;
  1719. duration = 1;
  1720. }
  1721. if (!track->samples)
  1722. track->first_pts = pkt->pts;
  1723. track->samples += sample_count;
  1724. /* If delta (duration) matche sprevious, increment counter,
  1725. * otherwise create a new entry. */
  1726. if (track->deltas.num == 0 || track->deltas.array[track->deltas.num - 1].delta != duration) {
  1727. struct sample_delta *new = da_push_back_new(track->deltas);
  1728. new->delta = duration;
  1729. new->count = sample_count;
  1730. } else {
  1731. track->deltas.array[track->deltas.num - 1].count += sample_count;
  1732. }
  1733. if (!track->sample_size)
  1734. da_push_back(track->sample_sizes, &size);
  1735. if (track->type != TRACK_VIDEO)
  1736. continue;
  1737. if (pkt->keyframe)
  1738. da_push_back(track->sync_samples, &track->samples);
  1739. /* Only require ctts box if offet is non-zero */
  1740. if (offset && !track->needs_ctts)
  1741. track->needs_ctts = true;
  1742. /* If dts-pts offset matche sprevious, increment counter,
  1743. * otherwise create a new entry. */
  1744. if (track->offsets.num == 0 || track->offsets.array[track->offsets.num - 1].offset != offset) {
  1745. struct sample_offset *new = da_push_back_new(track->offsets);
  1746. new->offset = offset;
  1747. new->count = 1;
  1748. } else {
  1749. track->offsets.array[track->offsets.num - 1].count += 1;
  1750. }
  1751. }
  1752. }
  1753. /* Write track data to file */
  1754. static void write_packets(struct mp4_mux *mux, struct mp4_track *track)
  1755. {
  1756. struct serializer *s = mux->serializer;
  1757. size_t count = track->packets.size / sizeof(struct encoder_packet);
  1758. if (!count || !track->fragment_samples.num)
  1759. return;
  1760. struct chunk *chk = da_push_back_new(track->chunks);
  1761. chk->offset = serializer_get_pos(s);
  1762. chk->samples = (uint32_t)track->fragment_samples.num;
  1763. for (size_t i = 0; i < track->fragment_samples.num; i++) {
  1764. struct encoder_packet pkt;
  1765. deque_pop_front(&track->packets, &pkt, sizeof(struct encoder_packet));
  1766. s_write(s, pkt.data, pkt.size);
  1767. obs_encoder_packet_release(&pkt);
  1768. }
  1769. chk->size = (uint32_t)(serializer_get_pos(s) - chk->offset);
  1770. /* Fixup sample count for fixed-size codecs */
  1771. if (track->sample_size)
  1772. chk->samples = chk->size / track->sample_size;
  1773. da_clear(track->fragment_samples);
  1774. }
  1775. static void mp4_flush_fragment(struct mp4_mux *mux)
  1776. {
  1777. struct serializer *s = mux->serializer;
  1778. // Write file header if not already done
  1779. if (!mux->fragments_written) {
  1780. mp4_write_ftyp(mux, true);
  1781. /* Placeholder to write mdat header during soft-remux */
  1782. mux->placeholder_offset = serializer_get_pos(s);
  1783. mp4_write_free(mux);
  1784. }
  1785. // Array output as temporary buffer to avoid sending seeks to disk
  1786. struct serializer as;
  1787. struct array_output_data aod;
  1788. array_output_serializer_init(&as, &aod);
  1789. mux->serializer = &as;
  1790. // Write initial incomplete moov (because fragmentation)
  1791. if (!mux->fragments_written) {
  1792. mp4_write_moov(mux, true);
  1793. s_write(s, aod.bytes.array, aod.bytes.num);
  1794. array_output_serializer_reset(&aod);
  1795. }
  1796. mux->fragments_written++;
  1797. /* --------------------------------------------------------- */
  1798. /* Analyse packets and create fragment moof. */
  1799. uint64_t mdat_size = 8;
  1800. for (size_t idx = 0; idx < mux->tracks.num; idx++) {
  1801. struct mp4_track *track = &mux->tracks.array[idx];
  1802. process_packets(mux, track, &mdat_size);
  1803. }
  1804. if (!mux->next_frag_pts && mux->chapter_track) {
  1805. // Create dummy chapter marker at the end so duration is correct
  1806. uint64_t duration = get_longest_track_duration(mux);
  1807. struct encoder_packet pkt;
  1808. mp4_create_chapter_pkt(&pkt, (int64_t)duration * 1000, "Dummy");
  1809. deque_push_back(&mux->chapter_track->packets, &pkt, sizeof(struct encoder_packet));
  1810. process_packets(mux, mux->chapter_track, &mdat_size);
  1811. }
  1812. // write moof once to get size
  1813. int64_t moof_start = serializer_get_pos(s);
  1814. size_t moof_size = mp4_write_moof(mux, 0, moof_start);
  1815. array_output_serializer_reset(&aod);
  1816. // write moof again with known size
  1817. mp4_write_moof(mux, (uint32_t)moof_size, moof_start);
  1818. // Write to output and restore real serializer
  1819. s_write(s, aod.bytes.array, aod.bytes.num);
  1820. mux->serializer = s;
  1821. array_output_serializer_free(&aod);
  1822. /* --------------------------------------------------------- */
  1823. /* Write audio and video samples (in chunks). Also update */
  1824. /* global chunk and sample information for final moov. */
  1825. if (mdat_size > UINT32_MAX) {
  1826. s_wb32(s, 1);
  1827. s_write(s, "mdat", 4);
  1828. s_wb64(s, mdat_size + 8);
  1829. } else {
  1830. s_wb32(s, (uint32_t)mdat_size);
  1831. s_write(s, "mdat", 4);
  1832. }
  1833. for (size_t i = 0; i < mux->tracks.num; i++) {
  1834. struct mp4_track *track = &mux->tracks.array[i];
  1835. write_packets(mux, track);
  1836. }
  1837. /* Only write chapter packets on final flush. */
  1838. if (!mux->next_frag_pts && mux->chapter_track)
  1839. write_packets(mux, mux->chapter_track);
  1840. mux->next_frag_pts = 0;
  1841. }
  1842. /* ========================================================================== */
  1843. /* Track object functions */
  1844. static inline void track_insert_packet(struct mp4_track *track, struct encoder_packet *pkt)
  1845. {
  1846. int64_t pts_usec = packet_pts_usec(pkt);
  1847. if (pts_usec > track->last_pts_usec)
  1848. track->last_pts_usec = pts_usec;
  1849. deque_push_back(&track->packets, pkt, sizeof(struct encoder_packet));
  1850. }
  1851. static inline uint32_t get_sample_size(struct mp4_track *track)
  1852. {
  1853. audio_t *audio = obs_encoder_audio(track->encoder);
  1854. if (!audio)
  1855. return 0;
  1856. const struct audio_output_info *info = audio_output_get_info(audio);
  1857. uint32_t channels = get_audio_channels(info->speakers);
  1858. switch (track->codec) {
  1859. case CODEC_PCM_F32:
  1860. return channels * 4; // 4 bytes per sample (32-bit)
  1861. case CODEC_PCM_I24:
  1862. return channels * 3; // 3 bytes per sample (24-bit)
  1863. case CODEC_PCM_I16:
  1864. return channels * 2; // 2 bytes per sample (16-bit)
  1865. default:
  1866. return 0;
  1867. }
  1868. }
  1869. static inline enum mp4_codec get_codec(obs_encoder_t *enc)
  1870. {
  1871. const char *codec = obs_encoder_get_codec(enc);
  1872. if (strcmp(codec, "h264") == 0)
  1873. return CODEC_H264;
  1874. if (strcmp(codec, "hevc") == 0)
  1875. return CODEC_HEVC;
  1876. if (strcmp(codec, "av1") == 0)
  1877. return CODEC_AV1;
  1878. if (strcmp(codec, "aac") == 0)
  1879. return CODEC_AAC;
  1880. if (strcmp(codec, "opus") == 0)
  1881. return CODEC_OPUS;
  1882. if (strcmp(codec, "flac") == 0)
  1883. return CODEC_FLAC;
  1884. if (strcmp(codec, "alac") == 0)
  1885. return CODEC_ALAC;
  1886. if (strcmp(codec, "pcm_s16le") == 0)
  1887. return CODEC_PCM_I16;
  1888. if (strcmp(codec, "pcm_s24le") == 0)
  1889. return CODEC_PCM_I24;
  1890. if (strcmp(codec, "pcm_f32le") == 0)
  1891. return CODEC_PCM_F32;
  1892. return CODEC_UNKNOWN;
  1893. }
  1894. static inline void add_track(struct mp4_mux *mux, obs_encoder_t *enc)
  1895. {
  1896. struct mp4_track *track = da_push_back_new(mux->tracks);
  1897. track->type = obs_encoder_get_type(enc) == OBS_ENCODER_VIDEO ? TRACK_VIDEO : TRACK_AUDIO;
  1898. track->encoder = obs_encoder_get_ref(enc);
  1899. track->codec = get_codec(enc);
  1900. track->track_id = ++mux->track_ctr;
  1901. /* Set timebase/timescale */
  1902. if (track->type == TRACK_VIDEO) {
  1903. video_t *video = obs_encoder_video(enc);
  1904. const struct video_output_info *info = video_output_get_info(video);
  1905. track->timebase_num = info->fps_den;
  1906. track->timebase_den = info->fps_num;
  1907. track->timescale = track->timebase_den;
  1908. /* FFmpeg does this to compensate for non-monotonic timestamps,
  1909. * we probably don't need it, but let's stick to what they do
  1910. * for maximum compatibility. */
  1911. while (track->timescale < 10000)
  1912. track->timescale *= 2;
  1913. } else {
  1914. uint32_t sample_rate = obs_encoder_get_sample_rate(enc);
  1915. /* Opus is always 48 kHz */
  1916. if (track->codec == CODEC_OPUS)
  1917. sample_rate = 48000;
  1918. track->timebase_num = 1;
  1919. track->timebase_den = sample_rate;
  1920. track->timescale = sample_rate;
  1921. }
  1922. /* Set sample size (if fixed) */
  1923. if (track->type == TRACK_AUDIO)
  1924. track->sample_size = get_sample_size(track);
  1925. }
  1926. static inline void add_chapter_track(struct mp4_mux *mux)
  1927. {
  1928. mux->chapter_track = bzalloc(sizeof(struct mp4_track));
  1929. mux->chapter_track->type = TRACK_CHAPTERS;
  1930. mux->chapter_track->codec = CODEC_TEXT;
  1931. mux->chapter_track->timescale = 1000;
  1932. mux->chapter_track->timebase_num = 1;
  1933. mux->chapter_track->timebase_den = 1000;
  1934. mux->chapter_track->track_id = ++mux->track_ctr;
  1935. }
  1936. static inline void free_packets(struct deque *dq)
  1937. {
  1938. size_t num = dq->size / sizeof(struct encoder_packet);
  1939. for (size_t i = 0; i < num; i++) {
  1940. struct encoder_packet pkt;
  1941. deque_pop_front(dq, &pkt, sizeof(struct encoder_packet));
  1942. obs_encoder_packet_release(&pkt);
  1943. }
  1944. }
  1945. static inline void free_track(struct mp4_track *track)
  1946. {
  1947. if (!track)
  1948. return;
  1949. obs_encoder_release(track->encoder);
  1950. free_packets(&track->packets);
  1951. deque_free(&track->packets);
  1952. da_free(track->sample_sizes);
  1953. da_free(track->chunks);
  1954. da_free(track->deltas);
  1955. da_free(track->offsets);
  1956. da_free(track->sync_samples);
  1957. da_free(track->fragment_samples);
  1958. }
  1959. /* ===========================================================================*/
  1960. /* API */
  1961. struct mp4_mux *mp4_mux_create(obs_output_t *output, struct serializer *serializer, enum mp4_mux_flags flags)
  1962. {
  1963. struct mp4_mux *mux = bzalloc(sizeof(struct mp4_mux));
  1964. mux->output = output;
  1965. mux->serializer = serializer;
  1966. mux->flags = flags;
  1967. /* Timestamp is based on 1904 rather than 1970. */
  1968. mux->creation_time = time(NULL) + 0x7C25B080;
  1969. for (size_t i = 0; i < MAX_OUTPUT_VIDEO_ENCODERS; i++) {
  1970. obs_encoder_t *enc = obs_output_get_video_encoder2(output, i);
  1971. if (!enc)
  1972. continue;
  1973. add_track(mux, enc);
  1974. }
  1975. for (size_t i = 0; i < MAX_OUTPUT_AUDIO_ENCODERS; i++) {
  1976. obs_encoder_t *enc = obs_output_get_audio_encoder(output, i);
  1977. if (!enc)
  1978. continue;
  1979. add_track(mux, enc);
  1980. }
  1981. return mux;
  1982. }
  1983. void mp4_mux_destroy(struct mp4_mux *mux)
  1984. {
  1985. for (size_t i = 0; i < mux->tracks.num; i++)
  1986. free_track(&mux->tracks.array[i]);
  1987. free_track(mux->chapter_track);
  1988. bfree(mux->chapter_track);
  1989. da_free(mux->tracks);
  1990. bfree(mux);
  1991. }
  1992. bool mp4_mux_submit_packet(struct mp4_mux *mux, struct encoder_packet *pkt)
  1993. {
  1994. struct mp4_track *track = NULL;
  1995. struct encoder_packet parsed_packet;
  1996. enum obs_encoder_type type = pkt->type;
  1997. bool fragment_ready = mux->next_frag_pts > 0;
  1998. for (size_t i = 0; i < mux->tracks.num; i++) {
  1999. struct mp4_track *tmp = &mux->tracks.array[i];
  2000. fragment_ready = fragment_ready && tmp->last_pts_usec >= mux->next_frag_pts;
  2001. if (tmp->encoder == pkt->encoder)
  2002. track = tmp;
  2003. }
  2004. if (!track) {
  2005. warn("Could not find track for packet of type %s with "
  2006. "track id %zu!",
  2007. type == OBS_ENCODER_VIDEO ? "video" : "audio", pkt->track_idx);
  2008. return false;
  2009. }
  2010. /* If all tracks have caught up to the keyframe we want to fragment on,
  2011. * flush the current fragment to disk. */
  2012. if (fragment_ready)
  2013. mp4_flush_fragment(mux);
  2014. if (type == OBS_ENCODER_AUDIO) {
  2015. obs_encoder_packet_ref(&parsed_packet, pkt);
  2016. } else {
  2017. if (track->codec == CODEC_H264)
  2018. obs_parse_avc_packet(&parsed_packet, pkt);
  2019. else if (track->codec == CODEC_HEVC)
  2020. obs_parse_hevc_packet(&parsed_packet, pkt);
  2021. else if (track->codec == CODEC_AV1)
  2022. obs_parse_av1_packet(&parsed_packet, pkt);
  2023. /* Set fragmentation PTS if packet is keyframe and PTS > 0 */
  2024. if (parsed_packet.keyframe && parsed_packet.pts > 0) {
  2025. mux->next_frag_pts = packet_pts_usec(&parsed_packet);
  2026. }
  2027. }
  2028. track_insert_packet(track, &parsed_packet);
  2029. return true;
  2030. }
  2031. bool mp4_mux_add_chapter(struct mp4_mux *mux, int64_t dts_usec, const char *name)
  2032. {
  2033. if (dts_usec < 0)
  2034. return false;
  2035. if (!mux->chapter_track)
  2036. add_chapter_track(mux);
  2037. /* To work correctly there needs to be a chapter at PTS 0,
  2038. * create that here if necessary. */
  2039. if (dts_usec > 0 && mux->chapter_track->packets.size == 0) {
  2040. mp4_mux_add_chapter(mux, 0, obs_module_text("MP4Output.StartChapter"));
  2041. }
  2042. /* Create packets that will be muxed on final flush */
  2043. struct encoder_packet pkt;
  2044. mp4_create_chapter_pkt(&pkt, dts_usec, name);
  2045. track_insert_packet(mux->chapter_track, &pkt);
  2046. return true;
  2047. }
  2048. bool mp4_mux_finalise(struct mp4_mux *mux)
  2049. {
  2050. struct serializer *s = mux->serializer;
  2051. /* Flush remaining audio/video samples as final fragment. */
  2052. info("Flushing final fragment...");
  2053. /* Set target PTS to zero to indicate that we want to flush all
  2054. * the remaining packets */
  2055. mux->next_frag_pts = 0;
  2056. mp4_flush_fragment(mux);
  2057. info("Number of fragments: %u", mux->fragments_written);
  2058. if (mux->flags & MP4_SKIP_FINALISATION) {
  2059. warn("Skipping MP4 finalization!");
  2060. return true;
  2061. }
  2062. int64_t data_end = serializer_get_pos(s);
  2063. /* ---------------------------------------- */
  2064. /* Write full moov box */
  2065. /* Use array serializer for moov data as this will do a lot
  2066. * of seeks to write size values of variable-size boxes. */
  2067. struct serializer fs;
  2068. struct array_output_data ao;
  2069. array_output_serializer_init(&fs, &ao);
  2070. mux->serializer = &fs;
  2071. mp4_write_moov(mux, false);
  2072. s_write(s, ao.bytes.array, ao.bytes.num);
  2073. info("Full moov size: %zu KiB", ao.bytes.num / 1024);
  2074. mux->serializer = s; // restore real serializer
  2075. array_output_serializer_free(&ao);
  2076. /* ---------------------------------------- */
  2077. /* Overwrite file header (ftyp + free/moov) */
  2078. serializer_seek(s, 0, SERIALIZE_SEEK_START);
  2079. mp4_write_ftyp(mux, false);
  2080. size_t data_size = data_end - mux->placeholder_offset;
  2081. serializer_seek(s, (int64_t)mux->placeholder_offset, SERIALIZE_SEEK_START);
  2082. /* If data is more than 4 GiB the mdat header becomes 16 bytes, hence
  2083. * why we create a 16-byte placeholder "free" box at the start. */
  2084. if (data_size > UINT32_MAX) {
  2085. s_wb32(s, 1); // 1 = use "largesize" field instead
  2086. s_write(s, "mdat", 4);
  2087. s_wb64(s, data_size); // largesize (64-bit)
  2088. } else {
  2089. s_wb32(s, (uint32_t)data_size);
  2090. s_write(s, "mdat", 4);
  2091. }
  2092. info("Final mdat size: %zu KiB", data_size / 1024);
  2093. return true;
  2094. }