mp4-mux.c 72 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843
  1. /******************************************************************************
  2. Copyright (C) 2024 by Dennis Sädtler <[email protected]>
  3. This program is free software: you can redistribute it and/or modify
  4. it under the terms of the GNU General Public License as published by
  5. the Free Software Foundation, either version 2 of the License, or
  6. (at your option) any later version.
  7. This program is distributed in the hope that it will be useful,
  8. but WITHOUT ANY WARRANTY; without even the implied warranty of
  9. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  10. GNU General Public License for more details.
  11. You should have received a copy of the GNU General Public License
  12. along with this program. If not, see <http://www.gnu.org/licenses/>.
  13. ******************************************************************************/
  14. #include "mp4-mux-internal.h"
  15. #include "rtmp-hevc.h"
  16. #include "rtmp-av1.h"
  17. #include <obs-avc.h>
  18. #include <obs-hevc.h>
  19. #include <obs-module.h>
  20. #include <util/dstr.h>
  21. #include <util/platform.h>
  22. #include <util/array-serializer.h>
  23. #include <time.h>
  24. /*
  25. * (Mostly) compliant MP4 muxer for fun and profit.
  26. * Based on ISO/IEC 14496-12 and FFmpeg's libavformat/movenc.c ([L]GPL)
  27. *
  28. * Specification section numbers are noted where applicable.
  29. * Standard identifier is included if not referring to ISO/IEC 14496-12.
  30. */
  31. #define do_log(level, format, ...) \
  32. blog(level, "[mp4 muxer: '%s'] " format, \
  33. obs_output_get_name(mux->output), ##__VA_ARGS__)
  34. #define warn(format, ...) do_log(LOG_WARNING, format, ##__VA_ARGS__)
  35. #define info(format, ...) do_log(LOG_INFO, format, ##__VA_ARGS__)
  36. /* Helper to overwrite placeholder size and return total size. */
  37. static inline size_t write_box_size(struct serializer *s, int64_t start)
  38. {
  39. int64_t end = serializer_get_pos(s);
  40. size_t size = end - start;
  41. serializer_seek(s, start, SERIALIZE_SEEK_START);
  42. s_wb32(s, (uint32_t)size);
  43. serializer_seek(s, end, SERIALIZE_SEEK_START);
  44. return size;
  45. }
  46. /// 4.2 Box header with size and char[4] name
  47. static inline void write_box(struct serializer *s, const size_t size,
  48. const char name[4])
  49. {
  50. if (size <= UINT32_MAX) {
  51. s_wb32(s, (uint32_t)size); // size
  52. s_write(s, name, 4); // boxtype
  53. } else {
  54. s_wb32(s, 1); // size
  55. s_write(s, name, 4); // boxtype
  56. s_wb64(s, size); // largesize
  57. }
  58. }
  59. /// 4.2 FullBox extended header with u8 version and u24 flags
  60. static inline void write_fullbox(struct serializer *s, const size_t size,
  61. const char name[4], uint8_t version,
  62. uint32_t flags)
  63. {
  64. write_box(s, size, name);
  65. s_w8(s, version);
  66. s_wb24(s, flags);
  67. }
  68. /// 4.3 File Type Box
  69. static size_t mp4_write_ftyp(struct mp4_mux *mux, bool fragmented)
  70. {
  71. struct serializer *s = mux->serializer;
  72. int64_t start = serializer_get_pos(s);
  73. write_box(s, 0, "ftyp");
  74. const char *major_brand = "isom";
  75. /* Following FFmpeg's example, when using negative CTS the major brand
  76. * needs to be either iso4 or iso6 depending on whether the file is
  77. * currently fragmented. */
  78. if (mux->flags & MP4_USE_NEGATIVE_CTS)
  79. major_brand = fragmented ? "iso6" : "iso4";
  80. s_write(s, major_brand, 4); // major brand
  81. s_wb32(s, 512); // minor version
  82. // minor brands (first one matches major brand)
  83. s_write(s, major_brand, 4);
  84. /* Write isom base brand if it's not the major brand */
  85. if (strcmp(major_brand, "isom") != 0)
  86. s_write(s, "isom", 4);
  87. /* Avoid adding newer brand (iso6) unless necessary, use "obs1" brand
  88. * as a placeholder to maintain ftyp box size. */
  89. if (fragmented && strcmp(major_brand, "iso6") != 0)
  90. s_write(s, "iso6", 4);
  91. else
  92. s_write(s, "obs1", 4);
  93. s_write(s, "iso2", 4);
  94. /* Include H.264 brand if used */
  95. for (size_t i = 0; i < mux->tracks.num; i++) {
  96. struct mp4_track *track = &mux->tracks.array[i];
  97. if (track->type == TRACK_VIDEO) {
  98. if (track->codec == CODEC_H264)
  99. s_write(s, "avc1", 4);
  100. break;
  101. }
  102. }
  103. /* General MP4 brannd */
  104. s_write(s, "mp41", 4);
  105. return write_box_size(s, start);
  106. }
  107. /// 8.1.2 Free Space Box
  108. static size_t mp4_write_free(struct mp4_mux *mux)
  109. {
  110. struct serializer *s = mux->serializer;
  111. /* Write a 16-byte free box, so it can be replaced with a 64-bit size
  112. * box header (u32 + char[4] + u64) */
  113. s_wb32(s, 16);
  114. s_write(s, "free", 4);
  115. s_wb64(s, 0);
  116. return 16;
  117. }
  118. /// 8.2.2 Movie Header Box
  119. static size_t mp4_write_mvhd(struct mp4_mux *mux)
  120. {
  121. struct serializer *s = mux->serializer;
  122. size_t start = serializer_get_pos(s);
  123. /* Use primary video track as the baseline for duration */
  124. uint64_t duration = 0;
  125. for (size_t i = 0; i < mux->tracks.num; i++) {
  126. struct mp4_track *track = &mux->tracks.array[i];
  127. if (track->type == TRACK_VIDEO) {
  128. duration = util_mul_div64(track->duration, 1000,
  129. track->timebase_den);
  130. break;
  131. }
  132. }
  133. write_fullbox(s, 0, "mvhd", 0, 0);
  134. if (duration > UINT32_MAX || mux->creation_time > UINT32_MAX) {
  135. s_wb64(s, mux->creation_time); // creation time
  136. s_wb64(s, mux->creation_time); // modification time
  137. s_wb32(s, 1000); // timescale
  138. s_wb64(s, duration); // duration (0 for fragmented)
  139. } else {
  140. s_wb32(s, (uint32_t)mux->creation_time); // creation time
  141. s_wb32(s, (uint32_t)mux->creation_time); // modification time
  142. s_wb32(s, 1000); // timescale
  143. s_wb32(s, (uint32_t)duration); // duration (0 for fragmented)
  144. }
  145. s_wb32(s, 0x00010000); // rate, 16.16 fixed float (1 << 16)
  146. s_wb16(s, 0x0100); // volume
  147. s_wb16(s, 0); // reserved
  148. s_wb32(s, 0); // reserved
  149. s_wb32(s, 0); // reserved
  150. // Matrix
  151. for (int i = 0; i < 9; i++)
  152. s_wb32(s, UNITY_MATRIX[i]);
  153. // pre_defined
  154. s_wb32(s, 0);
  155. s_wb32(s, 0);
  156. s_wb32(s, 0);
  157. s_wb32(s, 0);
  158. s_wb32(s, 0);
  159. s_wb32(s, 0);
  160. s_wb32(s, mux->track_ctr + 1); // next_track_ID
  161. return write_box_size(s, start);
  162. }
  163. /// 8.3.2 Track Header Box
  164. static size_t mp4_write_tkhd(struct mp4_mux *mux, struct mp4_track *track)
  165. {
  166. struct serializer *s = mux->serializer;
  167. size_t start = serializer_get_pos(s);
  168. uint64_t duration =
  169. util_mul_div64(track->duration, 1000, track->timebase_den);
  170. /* Flags are 0x1 (enabled) | 0x2 (in movie) */
  171. static const uint32_t flags = 0x1 | 0x2;
  172. write_fullbox(s, 0, "tkhd", 0, flags);
  173. if (duration > UINT32_MAX || mux->creation_time > UINT32_MAX) {
  174. s_wb64(s, mux->creation_time); // creation time
  175. s_wb64(s, mux->creation_time); // modification time
  176. s_wb32(s, track->track_id); // track_id
  177. s_wb32(s, 0); // reserved
  178. s_wb64(s, duration); // duration in movie timescale
  179. } else {
  180. s_wb32(s, (uint32_t)mux->creation_time); // creation time
  181. s_wb32(s, (uint32_t)mux->creation_time); // modification time
  182. s_wb32(s, track->track_id); // track_id
  183. s_wb32(s, 0); // reserved
  184. s_wb32(s, (uint32_t)duration); // duration in movie timescale
  185. }
  186. s_wb32(s, 0); // reserved
  187. s_wb32(s, 0); // reserved
  188. s_wb16(s, 0); // layer
  189. s_wb16(s, track->type == TRACK_AUDIO ? 1 : 0); // alternate group
  190. s_wb16(s, track->type == TRACK_AUDIO ? 0x100 : 0); // volume
  191. s_wb16(s, 0); // reserved
  192. // Matrix (predefined)
  193. for (int i = 0; i < 9; i++)
  194. s_wb32(s, UNITY_MATRIX[i]);
  195. if (track->type == TRACK_AUDIO) {
  196. s_wb32(s, 0); // width
  197. s_wb32(s, 0); // height
  198. } else {
  199. /* width/height are fixed point 16.16, so we just shift the
  200. * integer to the upper 16 bits */
  201. uint32_t width = obs_encoder_get_width(track->encoder);
  202. s_wb32(s, width << 16);
  203. uint32_t height = obs_encoder_get_height(track->encoder);
  204. s_wb32(s, height << 16);
  205. }
  206. return write_box_size(s, start);
  207. }
  208. /// 8.4.2 Media Header Box
  209. static size_t mp4_write_mdhd(struct mp4_mux *mux, struct mp4_track *track)
  210. {
  211. struct serializer *s = mux->serializer;
  212. size_t size = 32;
  213. uint8_t version = 0;
  214. uint64_t duration = track->duration;
  215. uint32_t timescale = track->timescale;
  216. if (track->type == TRACK_VIDEO) {
  217. /* Update to track timescale */
  218. duration = util_mul_div64(duration, track->timescale,
  219. track->timebase_den);
  220. }
  221. /* use 64-bit duration if necessary */
  222. if (duration > UINT32_MAX || mux->creation_time > UINT32_MAX) {
  223. size = 44;
  224. version = 1;
  225. }
  226. write_fullbox(s, size, "mdhd", version, 0);
  227. if (version == 1) {
  228. s_wb64(s, mux->creation_time); // creation time
  229. s_wb64(s, mux->creation_time); // modification time
  230. s_wb32(s, timescale); // timescale
  231. s_wb64(s, (uint32_t)duration); // duration
  232. } else {
  233. s_wb32(s, (uint32_t)mux->creation_time); // creation time
  234. s_wb32(s, (uint32_t)mux->creation_time); // modification time
  235. s_wb32(s, timescale); // timescale
  236. s_wb32(s, (uint32_t)duration); // duration
  237. }
  238. s_wb16(s, 21956); // language (undefined)
  239. s_wb16(s, 0); // pre_defined
  240. return size;
  241. }
  242. /// 8.4.3 Handler Reference Box
  243. static size_t mp4_write_hdlr(struct mp4_mux *mux, struct mp4_track *track)
  244. {
  245. struct serializer *s = mux->serializer;
  246. int64_t start = serializer_get_pos(s);
  247. write_fullbox(s, 0, "hdlr", 0, 0);
  248. s_wb32(s, 0); // pre_defined
  249. // handler_type
  250. if (track->type == TRACK_VIDEO)
  251. s_write(s, "vide", 4);
  252. else if (track->type == TRACK_CHAPTERS)
  253. s_write(s, "text", 4);
  254. else
  255. s_write(s, "soun", 4);
  256. s_wb32(s, 0); // reserved
  257. s_wb32(s, 0); // reserved
  258. s_wb32(s, 0); // reserved
  259. // name (utf-8 string, null terminated)
  260. if (track->type == TRACK_VIDEO)
  261. s_write(s, "OBS Video Handler", 18);
  262. else if (track->type == TRACK_CHAPTERS)
  263. s_write(s, "OBS Chapter Handler", 20);
  264. else
  265. s_write(s, "OBS Audio Handler", 18);
  266. return write_box_size(s, start);
  267. }
  268. /// 12.1.2 Video media header
  269. static size_t mp4_write_vmhd(struct mp4_mux *mux)
  270. {
  271. struct serializer *s = mux->serializer;
  272. /* Flags is always 1 */
  273. write_fullbox(s, 20, "vmhd", 0, 1);
  274. s_wb16(s, 0); // graphicsmode
  275. s_wb16(s, 0); // opcolor r
  276. s_wb16(s, 0); // opcolor g
  277. s_wb16(s, 0); // opcolor b
  278. return 16;
  279. }
  280. /// 12.2.2 Sound media header
  281. static size_t mp4_write_smhd(struct mp4_mux *mux)
  282. {
  283. struct serializer *s = mux->serializer;
  284. write_fullbox(s, 16, "smhd", 0, 0);
  285. s_wb16(s, 0); // balance
  286. s_wb16(s, 0); // reserved
  287. return 16;
  288. }
  289. /// (QTFF/Apple) Text media information atom
  290. static size_t mp4_write_qt_text(struct mp4_mux *mux)
  291. {
  292. struct serializer *s = mux->serializer;
  293. int64_t start = serializer_get_pos(s);
  294. write_box(s, 0, "text");
  295. /* Identity matrix, note that it's not fixed point 16.16 */
  296. s_wb16(s, 0x01);
  297. s_wb32(s, 0x00);
  298. s_wb32(s, 0x00);
  299. s_wb32(s, 0x00);
  300. s_wb32(s, 0x01);
  301. s_wb32(s, 0x00);
  302. s_wb32(s, 0x00);
  303. s_wb32(s, 0x00);
  304. s_wb32(s, 0x00004000);
  305. /* Seemingly undocumented */
  306. s_wb16(s, 0x0000);
  307. return write_box_size(s, start);
  308. }
  309. /// (QTFF/Apple) Base media info atom
  310. static size_t mp4_write_gmin(struct mp4_mux *mux)
  311. {
  312. struct serializer *s = mux->serializer;
  313. int64_t start = serializer_get_pos(s);
  314. write_fullbox(s, 0, "gmin", 0, 0);
  315. s_wb16(s, 0x40); // graphics mode
  316. s_wb16(s, 0x8000); // opColor r
  317. s_wb16(s, 0x8000); // opColor g
  318. s_wb16(s, 0x8000); // opColor b
  319. s_wb16(s, 0); // balance
  320. s_wb16(s, 0); // reserved
  321. return write_box_size(s, start);
  322. }
  323. /// (QTFF/Apple) Base media information header atom
  324. static size_t mp4_write_gmhd(struct mp4_mux *mux)
  325. {
  326. struct serializer *s = mux->serializer;
  327. int64_t start = serializer_get_pos(s);
  328. write_box(s, 0, "gmhd");
  329. // gmin
  330. mp4_write_gmin(mux);
  331. // text (QuickTime)
  332. mp4_write_qt_text(mux);
  333. return write_box_size(s, start);
  334. }
  335. /// ISO/IEC 14496-15 5.4.2.1 AVCConfigurationBox
  336. static size_t mp4_write_avcC(struct mp4_mux *mux, obs_encoder_t *enc)
  337. {
  338. struct serializer *s = mux->serializer;
  339. /* For AVC this is the parsed extra data. */
  340. uint8_t *header;
  341. size_t size;
  342. struct encoder_packet packet = {.type = OBS_ENCODER_VIDEO,
  343. .timebase_den = 1,
  344. .keyframe = true};
  345. if (!obs_encoder_get_extra_data(enc, &header, &size))
  346. return 0;
  347. packet.size = obs_parse_avc_header(&packet.data, header, size);
  348. size_t box_size = packet.size + 8;
  349. write_box(s, box_size, "avcC");
  350. s_write(s, packet.data, packet.size);
  351. bfree(packet.data);
  352. return box_size;
  353. }
  354. /// ISO/IEC 14496-15 8.4.1.1 HEVCConfigurationBox
  355. static size_t mp4_write_hvcC(struct mp4_mux *mux, obs_encoder_t *enc)
  356. {
  357. struct serializer *s = mux->serializer;
  358. /* For HEVC this is the parsed extra data. */
  359. uint8_t *header;
  360. size_t size;
  361. struct encoder_packet packet = {.type = OBS_ENCODER_VIDEO,
  362. .timebase_den = 1,
  363. .keyframe = true};
  364. if (!obs_encoder_get_extra_data(enc, &header, &size))
  365. return 0;
  366. packet.size = obs_parse_hevc_header(&packet.data, header, size);
  367. size_t box_size = packet.size + 8;
  368. write_box(s, box_size, "hvcC");
  369. s_write(s, packet.data, packet.size);
  370. bfree(packet.data);
  371. return box_size;
  372. }
  373. /// AV1 ISOBMFF 2.3. AV1 Codec Configuration Box
  374. static size_t mp4_write_av1C(struct mp4_mux *mux, obs_encoder_t *enc)
  375. {
  376. struct serializer *s = mux->serializer;
  377. /* For AV1 this is just the parsed extra data. */
  378. uint8_t *header;
  379. size_t size;
  380. struct encoder_packet packet = {.type = OBS_ENCODER_VIDEO,
  381. .timebase_den = 1,
  382. .keyframe = true};
  383. if (!obs_encoder_get_extra_data(enc, &header, &size))
  384. return 0;
  385. packet.size = obs_parse_av1_header(&packet.data, header, size);
  386. size_t box_size = packet.size + 8;
  387. write_box(s, box_size, "av1C");
  388. s_write(s, packet.data, packet.size);
  389. bfree(packet.data);
  390. return box_size;
  391. }
  392. /// 12.1.5 Colour information
  393. static size_t mp4_write_colr(struct mp4_mux *mux, obs_encoder_t *enc)
  394. {
  395. UNUSED_PARAMETER(enc);
  396. struct serializer *s = mux->serializer;
  397. write_box(s, 19, "colr");
  398. uint8_t full_range = 0;
  399. uint16_t pri, trc, spc;
  400. pri = trc = spc = 0;
  401. get_colour_information(enc, &pri, &trc, &spc, &full_range);
  402. s_write(s, "nclx", 4); // colour_type
  403. s_wb16(s, pri); // colour_primaries
  404. s_wb16(s, trc); // transfer_characteristics
  405. s_wb16(s, spc); // matrix_coefficiencts
  406. s_w8(s, full_range << 7); // full range flag + 7 reserved bits (0)
  407. return 19;
  408. }
  409. /// 12.1.4 Pixel Aspect Ratio
  410. static size_t mp4_write_pasp(struct mp4_mux *mux)
  411. {
  412. struct serializer *s = mux->serializer;
  413. write_box(s, 16, "pasp");
  414. s_wb32(s, 1); // hSpacing
  415. s_wb32(s, 1); // vSpacing
  416. return 16;
  417. }
  418. /// 12.1.3 Visual Sample Entry
  419. static inline void mp4_write_visual_sample_entry(struct mp4_mux *mux,
  420. obs_encoder_t *enc)
  421. {
  422. struct serializer *s = mux->serializer;
  423. // SampleEntry Box
  424. s_w8(s, 0); // reserved
  425. s_w8(s, 0);
  426. s_w8(s, 0);
  427. s_w8(s, 0);
  428. s_w8(s, 0);
  429. s_w8(s, 0);
  430. s_wb16(s, 1); // data_reference_index
  431. // VisualSampleEntry Box
  432. s_wb16(s, 0); // pre_defined
  433. s_wb16(s, 0); // reserved
  434. s_wb32(s, 0); // pre_defined
  435. s_wb32(s, 0); // pre_defined
  436. s_wb32(s, 0); // pre_defined
  437. s_wb16(s, (uint16_t)obs_encoder_get_width(enc)); // width
  438. s_wb16(s, (uint16_t)obs_encoder_get_height(enc)); // height
  439. s_wb32(s, 0x00480000); // horizresolution (predefined)
  440. s_wb32(s, 0x00480000); // vertresolution (predefined)
  441. s_wb32(s, 0); // reserved
  442. s_wb16(s, 1); // frame_count
  443. /* Name is fixed 32-bytes and needs to be padded to that length.
  444. * First byte is the length, rest is a string sans NULL terminator. */
  445. char compressor_name[32] = {0};
  446. const char *enc_id = obs_encoder_get_id(enc);
  447. if (enc_id) {
  448. size_t len = strlen(enc_id);
  449. if (len > 31)
  450. len = 31;
  451. compressor_name[0] = (char)len;
  452. memcpy(compressor_name + 1, enc_id, len);
  453. }
  454. s_write(s, compressor_name, sizeof(compressor_name)); // compressorname
  455. s_wb16(s, 0x0018); // depth
  456. s_wb16(s, -1); // pre_defined
  457. }
  458. /// 12.1.6 Content light level
  459. static size_t mp4_write_clli(struct mp4_mux *mux, obs_encoder_t *enc)
  460. {
  461. struct serializer *s = mux->serializer;
  462. video_t *video = obs_encoder_video(enc);
  463. const struct video_output_info *info = video_output_get_info(video);
  464. /* Only write box for HDR video */
  465. if (info->colorspace != VIDEO_CS_2100_PQ &&
  466. info->colorspace != VIDEO_CS_2100_HLG)
  467. return 0;
  468. write_box(s, 12, "clli");
  469. float nominal_peak = obs_get_video_hdr_nominal_peak_level();
  470. s_wb16(s, (uint16_t)nominal_peak); // max_content_light_level
  471. s_wb16(s, (uint16_t)nominal_peak); // max_pic_average_light_level
  472. return 12;
  473. }
  474. /// 12.1.7 Mastering display colour volume
  475. static size_t mp4_write_mdcv(struct mp4_mux *mux, obs_encoder_t *enc)
  476. {
  477. struct serializer *s = mux->serializer;
  478. video_t *video = obs_encoder_video(enc);
  479. const struct video_output_info *info = video_output_get_info(video);
  480. // Only write atom for HDR video
  481. if (info->colorspace != VIDEO_CS_2100_PQ &&
  482. info->colorspace != VIDEO_CS_2100_HLG)
  483. return 0;
  484. write_box(s, 32, "mdcv");
  485. float nominal_peak = obs_get_video_hdr_nominal_peak_level();
  486. uint32_t max_lum = (uint32_t)nominal_peak * 10000;
  487. /* Note that these values are hardcoded everywhere in OBS, so these are
  488. * just the same as used in our other muxers/encoders. */
  489. // 3 x display_primaries (x, y) pairs
  490. s_wb16(s, 13250);
  491. s_wb16(s, 34500);
  492. s_wb16(s, 7500);
  493. s_wb16(s, 3000);
  494. s_wb16(s, 34000);
  495. s_wb16(s, 16000);
  496. s_wb16(s, 15635); // white_point_x
  497. s_wb16(s, 16450); // white_point_y
  498. s_wb32(s, max_lum); // max_display_mastering_luminance
  499. s_wb32(s, 0); // min_display_mastering_luminance
  500. return 32;
  501. }
  502. /// ISO/IEC 14496-15 5.4.2.1 AVCSampleEntry
  503. static size_t mp4_write_avc1(struct mp4_mux *mux, obs_encoder_t *enc)
  504. {
  505. struct serializer *s = mux->serializer;
  506. int64_t start = serializer_get_pos(s);
  507. write_box(s, 0, "avc1");
  508. mp4_write_visual_sample_entry(mux, enc);
  509. // avcC
  510. mp4_write_avcC(mux, enc);
  511. // colr
  512. mp4_write_colr(mux, enc);
  513. // pasp
  514. mp4_write_pasp(mux);
  515. return write_box_size(s, start);
  516. }
  517. /// ISO/IEC 14496-15 8.4.1.1 HEVCSampleEntry
  518. static size_t mp4_write_hvc1(struct mp4_mux *mux, obs_encoder_t *enc)
  519. {
  520. struct serializer *s = mux->serializer;
  521. int64_t start = serializer_get_pos(s);
  522. write_box(s, 0, "hvc1");
  523. mp4_write_visual_sample_entry(mux, enc);
  524. // avcC
  525. mp4_write_hvcC(mux, enc);
  526. // colr
  527. mp4_write_colr(mux, enc);
  528. // clli
  529. mp4_write_clli(mux, enc);
  530. // mdcv
  531. mp4_write_mdcv(mux, enc);
  532. // pasp
  533. mp4_write_pasp(mux);
  534. return write_box_size(s, start);
  535. }
  536. /// AV1 ISOBMFF 2.2. AV1 Sample Entry
  537. static size_t mp4_write_av01(struct mp4_mux *mux, obs_encoder_t *enc)
  538. {
  539. struct serializer *s = mux->serializer;
  540. int64_t start = serializer_get_pos(s);
  541. write_box(s, 0, "av01");
  542. mp4_write_visual_sample_entry(mux, enc);
  543. // avcC
  544. mp4_write_av1C(mux, enc);
  545. // colr
  546. mp4_write_colr(mux, enc);
  547. // clli
  548. mp4_write_clli(mux, enc);
  549. // mdcv
  550. mp4_write_mdcv(mux, enc);
  551. // pasp
  552. mp4_write_pasp(mux);
  553. return write_box_size(s, start);
  554. }
  555. static inline void put_descr(struct serializer *s, uint8_t tag, size_t size)
  556. {
  557. int i = 3;
  558. s_w8(s, tag);
  559. for (; i > 0; i--)
  560. s_w8(s, (uint8_t)((size >> (7 * i)) | 0x80));
  561. s_w8(s, size & 0x7F);
  562. }
  563. /// ISO/IEC 14496-14 5.6 ESDBox
  564. static size_t mp4_write_esds(struct mp4_mux *mux, struct mp4_track *track)
  565. {
  566. struct serializer *s = mux->serializer;
  567. int64_t start = serializer_get_pos(s);
  568. write_fullbox(s, 0, "esds", 0, 0);
  569. /* Encoder extradata will be used as DecoderSpecificInfo */
  570. uint8_t *extradata;
  571. size_t extradata_size;
  572. if (!obs_encoder_get_extra_data(track->encoder, &extradata,
  573. &extradata_size)) {
  574. extradata_size = 0;
  575. }
  576. /// ISO/IEC 14496-1
  577. // ES_Descriptor
  578. size_t decoder_specific_info_len = extradata_size ? extradata_size + 5
  579. : 0;
  580. put_descr(s, 0x03, 3 + 5 + 13 + decoder_specific_info_len + 5 + 1);
  581. s_wb16(s, track->track_id);
  582. s_w8(s, 0x00); // flags
  583. // DecoderConfigDescriptor
  584. put_descr(s, 0x04, 13 + decoder_specific_info_len);
  585. s_w8(s, 0x40); // codec tag, 0x40 = AAC
  586. s_w8(s, 0x15); // stream type field (0x15 = audio stream)
  587. /* When writing the final MOOV this could theoretically be calculated
  588. * based on chunks, but it's not really all that important. */
  589. uint32_t bitrate = 0;
  590. obs_data_t *settings = obs_encoder_get_settings(track->encoder);
  591. if (settings) {
  592. int64_t enc_bitrate = obs_data_get_int(settings, "bitrate");
  593. if (enc_bitrate)
  594. bitrate = (uint32_t)(enc_bitrate * 1000);
  595. obs_data_release(settings);
  596. }
  597. s_wb24(s, 0); // bufferSizeDB (in bytes)
  598. s_wb32(s, bitrate); // maxbitrate
  599. s_wb32(s, bitrate); // avgBitrate
  600. // DecoderSpecificInfo
  601. if (extradata_size) {
  602. put_descr(s, 0x05, extradata_size);
  603. s_write(s, extradata, extradata_size);
  604. }
  605. // SLConfigDescriptor descriptor
  606. put_descr(s, 0x06, 1);
  607. s_w8(s, 0x02); // 0x2 = reserved for MP4, descriptor is empty
  608. return write_box_size(s, start);
  609. }
  610. /// 12.2.3 Audio Sample Entry
  611. static inline void mp4_write_audio_sample_entry(struct mp4_mux *mux,
  612. struct mp4_track *track,
  613. uint8_t version)
  614. {
  615. struct serializer *s = mux->serializer;
  616. // SampleEntry Box
  617. s_w8(s, 0); // reserved
  618. s_w8(s, 0);
  619. s_w8(s, 0);
  620. s_w8(s, 0);
  621. s_w8(s, 0);
  622. s_w8(s, 0);
  623. s_wb16(s, 1); // data_reference_index
  624. // AudioSampleEntry Box
  625. if (version == 1) {
  626. s_wb16(s, 1); // entry_version
  627. s_wb16(s, 0); // reserved
  628. s_wb16(s, 0); // reserved
  629. s_wb16(s, 0); // reserved
  630. } else {
  631. s_wb32(s, 0); // reserved
  632. s_wb32(s, 0); // reserved
  633. }
  634. audio_t *audio = obs_encoder_audio(track->encoder);
  635. size_t channels = audio_output_get_channels(audio);
  636. uint32_t sample_rate = track->timescale;
  637. bool alac = track->codec == CODEC_ALAC;
  638. s_wb16(s, (uint32_t)channels); // channelcount
  639. /* OBS FLAC is currently always 16 bit, ALAC always 24, this may change
  640. * in the futrure and should be handled differently then.
  641. * That being said thoes codecs are self-describing so in most cases it
  642. * shouldn't matter either way. */
  643. s_wb16(s, alac ? 24 : 16); // samplesize
  644. s_wb16(s, 0); // pre_defined
  645. s_wb16(s, 0); // reserved
  646. s_wb32(s, sample_rate << 16); // samplerate
  647. }
  648. /// 12.2.4 Channel layout
  649. static size_t mp4_write_chnl(struct mp4_mux *mux, struct mp4_track *track)
  650. {
  651. struct serializer *s = mux->serializer;
  652. int64_t start = serializer_get_pos(s);
  653. write_fullbox(s, 0, "chnl", 0, 0);
  654. audio_t *audio = obs_encoder_audio(track->encoder);
  655. const struct audio_output_info *info = audio_output_get_info(audio);
  656. s_w8(s, 1); // stream_structure (1 = channels)
  657. /* 5.1 and 4.1 do not have a corresponding ISO layout, so we have to
  658. * write a manually created channel map for those. */
  659. uint8_t map[8] = {0};
  660. uint8_t items = 0;
  661. uint8_t defined_layout = 0;
  662. get_speaker_positions(info->speakers, map, &items, &defined_layout);
  663. if (!defined_layout) {
  664. warn("No ISO layout available for speaker layout %d, "
  665. "this may not be supported by all applications!",
  666. info->speakers);
  667. s_w8(s, 0); // definedLayout
  668. s_write(s, map, items); // uint8_t speaker_position[count]
  669. } else {
  670. s_w8(s, defined_layout); // definedLayout
  671. s_wb64(s, 0); // ommitedChannelMap
  672. }
  673. return write_box_size(s, start);
  674. }
  675. /// ISO/IEC 14496-14 5.6 MP4AudioSampleEntry
  676. static size_t mp4_write_mp4a(struct mp4_mux *mux, struct mp4_track *track,
  677. uint8_t version)
  678. {
  679. struct serializer *s = mux->serializer;
  680. int64_t start = serializer_get_pos(s);
  681. write_box(s, 0, "mp4a");
  682. mp4_write_audio_sample_entry(mux, track, version);
  683. // esds
  684. mp4_write_esds(mux, track);
  685. /* Write channel layout for version 1 sample entires */
  686. if (version == 1)
  687. mp4_write_chnl(mux, track);
  688. return write_box_size(s, start);
  689. }
  690. /// Encapsulation of FLAC in ISO Base Media File Format 3.3.2 FLAC Specific Box
  691. static size_t mp4_write_dfLa(struct mp4_mux *mux, struct mp4_track *track)
  692. {
  693. struct serializer *s = mux->serializer;
  694. int64_t start = serializer_get_pos(s);
  695. uint8_t *extradata;
  696. size_t extradata_size;
  697. if (!obs_encoder_get_extra_data(track->encoder, &extradata,
  698. &extradata_size))
  699. return 0;
  700. write_fullbox(s, 0, "dfLa", 0, 0);
  701. /// FLACMetadataBlock
  702. // LastMetadataBlockFlag (1) | BlockType (0)
  703. s_w8(s, 1 << 7 | 0);
  704. // Length
  705. s_wb24(s, (uint32_t)extradata_size);
  706. // BlockData[Length]
  707. s_write(s, extradata, extradata_size);
  708. return write_box_size(s, start);
  709. }
  710. /// Encapsulation of FLAC in ISO Base Media File Format 3.3.1 FLACSampleEntry
  711. static size_t mp4_write_fLaC(struct mp4_mux *mux, struct mp4_track *track,
  712. uint8_t version)
  713. {
  714. struct serializer *s = mux->serializer;
  715. int64_t start = serializer_get_pos(s);
  716. write_box(s, 0, "fLaC");
  717. mp4_write_audio_sample_entry(mux, track, version);
  718. // dfLa
  719. mp4_write_dfLa(mux, track);
  720. if (version == 1)
  721. mp4_write_chnl(mux, track);
  722. return write_box_size(s, start);
  723. }
  724. /// Apple Lossless Format "Magic Cookie" Description - MP4/M4A File
  725. static size_t mp4_write_alac(struct mp4_mux *mux, struct mp4_track *track,
  726. uint8_t version)
  727. {
  728. struct serializer *s = mux->serializer;
  729. int64_t start = serializer_get_pos(s);
  730. uint8_t *extradata;
  731. size_t extradata_size;
  732. if (!obs_encoder_get_extra_data(track->encoder, &extradata,
  733. &extradata_size))
  734. return 0;
  735. write_box(s, 0, "alac");
  736. mp4_write_audio_sample_entry(mux, track, version);
  737. /* Apple Lossless Magic Cookie */
  738. s_write(s, extradata, extradata_size);
  739. if (version == 1)
  740. mp4_write_chnl(mux, track);
  741. return write_box_size(s, start);
  742. }
  743. /// ISO/IEC 23003-5 5.1 PCM configuration
  744. static size_t mp4_write_pcmc(struct mp4_mux *mux, struct mp4_track *track)
  745. {
  746. struct serializer *s = mux->serializer;
  747. int64_t start = serializer_get_pos(s);
  748. write_fullbox(s, 0, "pcmC", 0, 0);
  749. s_w8(s, 1); // endianness, 1 = little endian
  750. // bits per sample
  751. if (track->codec == CODEC_PCM_I16)
  752. s_w8(s, 16);
  753. else if (track->codec == CODEC_PCM_I24)
  754. s_w8(s, 24);
  755. else if (track->codec == CODEC_PCM_F32)
  756. s_w8(s, 32);
  757. return write_box_size(s, start);
  758. }
  759. /// ISO/IEC 23003-5 5.1 PCM configuration
  760. static size_t mp4_write_xpcm(struct mp4_mux *mux, struct mp4_track *track,
  761. uint8_t version)
  762. {
  763. struct serializer *s = mux->serializer;
  764. int64_t start = serializer_get_pos(s);
  765. /* Different box types for floating point and integer PCM*/
  766. write_box(s, 0, track->codec == CODEC_PCM_F32 ? "fpcm" : "ipcm");
  767. mp4_write_audio_sample_entry(mux, track, version);
  768. /* ChannelLayout (chnl) is required for PCM */
  769. mp4_write_chnl(mux, track);
  770. // pcmc
  771. mp4_write_pcmc(mux, track);
  772. return write_box_size(s, start);
  773. }
  774. /// (QTFF/Apple) Text sample description
  775. static size_t mp4_write_text(struct mp4_mux *mux)
  776. {
  777. struct serializer *s = mux->serializer;
  778. int64_t start = serializer_get_pos(s);
  779. write_fullbox(s, 0, "text", 0, 0);
  780. s_wb32(s, 1); // number of entries
  781. /* Preset sample description as used by FFmpeg. */
  782. s_write(s, &TEXT_STUB_HEADER, sizeof(TEXT_STUB_HEADER));
  783. return write_box_size(s, start);
  784. }
  785. static inline uint32_t rl32(const uint8_t *ptr)
  786. {
  787. return (ptr[3] << 24) + (ptr[2] << 16) + (ptr[1] << 8) + ptr[0];
  788. }
  789. static inline uint16_t rl16(const uint8_t *ptr)
  790. {
  791. return (ptr[1] << 8) + ptr[0];
  792. }
  793. /// Encapsulation of Opus in ISO Base Media File Format 4.3.2 Opus Specific Box
  794. static size_t mp4_write_dOps(struct mp4_mux *mux, struct mp4_track *track)
  795. {
  796. struct serializer *s = mux->serializer;
  797. int64_t start = serializer_get_pos(s);
  798. uint8_t *extradata;
  799. size_t extradata_size;
  800. if (!obs_encoder_get_extra_data(track->encoder, &extradata,
  801. &extradata_size))
  802. return 0;
  803. write_box(s, 0, "dOps");
  804. s_w8(s, 0); // version
  805. uint8_t channels = *(extradata + 9);
  806. uint8_t channel_map = *(extradata + 18);
  807. s_w8(s, channels); // channel count
  808. // OpusHead is little-endian, but MP4 is big-endian, so we have to swap them here
  809. s_wb16(s, rl16(extradata + 10)); // pre-skip
  810. s_wb32(s, rl32(extradata + 12)); // input sample rate
  811. s_wb16(s, rl16(extradata + 16)); // output gain
  812. s_w8(s, channel_map); // channel mapping family
  813. if (channel_map)
  814. s_write(s, extradata + 19, 2 + channels);
  815. return write_box_size(s, start);
  816. }
  817. /// Encapsulation of Opus in ISO Base Media File Format 4.3.1 Sample entry format
  818. static size_t mp4_write_Opus(struct mp4_mux *mux, struct mp4_track *track,
  819. uint8_t version)
  820. {
  821. struct serializer *s = mux->serializer;
  822. int64_t start = serializer_get_pos(s);
  823. write_box(s, 0, "Opus");
  824. mp4_write_audio_sample_entry(mux, track, version);
  825. // dOps
  826. mp4_write_dOps(mux, track);
  827. if (version == 1)
  828. mp4_write_chnl(mux, track);
  829. return write_box_size(s, start);
  830. }
  831. /// 8.5.2 Sample Description Box
  832. static size_t mp4_write_stsd(struct mp4_mux *mux, struct mp4_track *track)
  833. {
  834. struct serializer *s = mux->serializer;
  835. int64_t start = serializer_get_pos(s);
  836. /* Anything but mono or stereo technically requires v1,
  837. * but in practice that doesn't appear to matter. */
  838. uint8_t version = 0;
  839. if (track->type == TRACK_AUDIO) {
  840. audio_t *audio = obs_encoder_audio(track->encoder);
  841. version = audio_output_get_channels(audio) > 2 ? 1 : 0;
  842. }
  843. write_fullbox(s, 0, "stsd", version, 0);
  844. s_wb32(s, 1); // entry_count
  845. // codec specific boxes
  846. if (track->type == TRACK_VIDEO) {
  847. if (track->codec == CODEC_H264)
  848. mp4_write_avc1(mux, track->encoder);
  849. else if (track->codec == CODEC_HEVC)
  850. mp4_write_hvc1(mux, track->encoder);
  851. else if (track->codec == CODEC_AV1)
  852. mp4_write_av01(mux, track->encoder);
  853. } else if (track->type == TRACK_AUDIO) {
  854. if (track->codec == CODEC_AAC)
  855. mp4_write_mp4a(mux, track, version);
  856. else if (track->codec == CODEC_OPUS)
  857. mp4_write_Opus(mux, track, version);
  858. else if (track->codec == CODEC_FLAC)
  859. mp4_write_fLaC(mux, track, version);
  860. else if (track->codec == CODEC_ALAC)
  861. mp4_write_alac(mux, track, version);
  862. else if (track->codec == CODEC_PCM_I16 ||
  863. track->codec == CODEC_PCM_I24 ||
  864. track->codec == CODEC_PCM_F32)
  865. mp4_write_xpcm(mux, track, version);
  866. } else if (track->type == TRACK_CHAPTERS) {
  867. mp4_write_text(mux);
  868. }
  869. return write_box_size(s, start);
  870. }
  871. /// 8.6.1.2 Decoding Time to Sample Box
  872. static size_t mp4_write_stts(struct mp4_mux *mux, struct mp4_track *track,
  873. bool fragmented)
  874. {
  875. struct serializer *s = mux->serializer;
  876. if (fragmented) {
  877. write_fullbox(s, 16, "stts", 0, 0);
  878. s_wb32(s, 0); // entry_count
  879. return 16;
  880. }
  881. int64_t start = serializer_get_pos(s);
  882. struct sample_delta *arr = track->deltas.array;
  883. size_t num = track->deltas.num;
  884. write_fullbox(s, 0, "stts", 0, 0);
  885. s_wb32(s, (uint32_t)num); // entry_count
  886. for (size_t idx = 0; idx < num; idx++) {
  887. struct sample_delta *smp = &arr[idx];
  888. uint64_t delta = util_mul_div64(smp->delta, track->timescale,
  889. track->timebase_den);
  890. s_wb32(s, smp->count); // sample_count
  891. s_wb32(s, (uint32_t)delta); // sample_delta
  892. }
  893. return write_box_size(s, start);
  894. }
  895. /// 8.6.2 Sync Sample Box
  896. static size_t mp4_write_stss(struct mp4_mux *mux, struct mp4_track *track)
  897. {
  898. struct serializer *s = mux->serializer;
  899. uint32_t num = (uint32_t)track->sync_samples.num;
  900. if (!num)
  901. return 0;
  902. /* 16 byte FullBox header + 4-bytes (u32) per sync sample */
  903. uint32_t size = 16 + 4 * num;
  904. write_fullbox(s, size, "stss", 0, 0);
  905. s_wb32(s, num); // entry_count
  906. for (size_t idx = 0; idx < num; idx++)
  907. s_wb32(s, track->sync_samples.array[idx]); // sample_number
  908. return size;
  909. }
  910. /// 8.6.1.3 Composition Time to Sample Box
  911. static size_t mp4_write_ctts(struct mp4_mux *mux, struct mp4_track *track)
  912. {
  913. struct serializer *s = mux->serializer;
  914. uint32_t num = (uint32_t)track->offsets.num;
  915. uint8_t version = mux->flags & MP4_USE_NEGATIVE_CTS ? 1 : 0;
  916. /* 16 byte FullBox header + 8-bytes (u32+u32/i32) per offset entry */
  917. uint32_t size = 16 + 8 * num;
  918. write_fullbox(s, size, "ctts", version, 0);
  919. s_wb32(s, num); // entry_count
  920. for (size_t idx = 0; idx < num; idx++) {
  921. int64_t offset = (int64_t)track->offsets.array[idx].offset *
  922. (int64_t)track->timescale /
  923. (int64_t)track->timebase_den;
  924. s_wb32(s, track->offsets.array[idx].count); // sample_count
  925. s_wb32(s, (uint32_t)offset); // sample_offset
  926. }
  927. return size;
  928. }
  929. /// 8.7.4 Sample To Chunk Box
  930. static size_t mp4_write_stsc(struct mp4_mux *mux, struct mp4_track *track,
  931. bool fragmented)
  932. {
  933. struct serializer *s = mux->serializer;
  934. if (fragmented) {
  935. write_fullbox(s, 16, "stsc", 0, 0);
  936. s_wb32(s, 0); // entry_count
  937. return 16;
  938. }
  939. struct chunk *arr = track->chunks.array;
  940. size_t arr_num = track->chunks.num;
  941. /* Compress into array with counter for repeating chunk sizes */
  942. DARRAY(struct chunk_run {
  943. uint32_t first;
  944. uint32_t samples;
  945. }) chunk_runs;
  946. da_init(chunk_runs);
  947. for (size_t idx = 0; idx < arr_num; idx++) {
  948. struct chunk *chk = &arr[idx];
  949. if (!chunk_runs.num ||
  950. chunk_runs.array[chunk_runs.num - 1].samples !=
  951. chk->samples) {
  952. struct chunk_run *cr = da_push_back_new(chunk_runs);
  953. cr->samples = chk->samples;
  954. cr->first = (uint32_t)idx + 1; // ISO-BMFF is 1-indexed
  955. }
  956. }
  957. uint32_t num = (uint32_t)chunk_runs.num;
  958. /* 16 byte FullBox header + 12-bytes (u32+u32+u32) per chunk run */
  959. uint32_t size = 16 + 12 * num;
  960. write_fullbox(s, size, "stsc", 0, 0);
  961. s_wb32(s, num); // entry_count
  962. for (size_t idx = 0; idx < num; idx++) {
  963. struct chunk_run *cr = &chunk_runs.array[idx];
  964. s_wb32(s, cr->first); // first_chunk
  965. s_wb32(s, cr->samples); // samples_per_chunk
  966. s_wb32(s, 1); // sample_description_index
  967. }
  968. da_free(chunk_runs);
  969. return size;
  970. }
  971. /// 8.7.3 Sample Size Boxes
  972. static size_t mp4_write_stsz(struct mp4_mux *mux, struct mp4_track *track,
  973. bool fragmented)
  974. {
  975. struct serializer *s = mux->serializer;
  976. if (fragmented) {
  977. write_fullbox(s, 20, "stsz", 0, 0);
  978. s_wb32(s, 0); // sample_size
  979. s_wb32(s, 0); // sample_count
  980. return 20;
  981. }
  982. int64_t start = serializer_get_pos(s);
  983. /* This should only ever happen when recording > 24 hours of
  984. * 48 kHz PCM audio or 828 days of 60 FPS video. */
  985. if (track->samples > UINT32_MAX) {
  986. warn("Track %u has too many samples, its duration may not be "
  987. "read correctly. Remuxing the file to another format such "
  988. "as MKV may be required.",
  989. track->track_id);
  990. }
  991. write_fullbox(s, 0, "stsz", 0, 0);
  992. if (track->sample_size) {
  993. /* Fixed size samples mean we don't need an array */
  994. s_wb32(s, track->sample_size); // sample_size
  995. s_wb32(s, (uint32_t)track->samples); // sample_count
  996. } else {
  997. s_wb32(s, 0); // sample_size
  998. s_wb32(s, (uint32_t)track->sample_sizes.num); // sample_count
  999. for (size_t idx = 0; idx < track->sample_sizes.num; idx++) {
  1000. s_wb32(s, track->sample_sizes.array[idx]); // entry_size
  1001. }
  1002. }
  1003. return write_box_size(s, start);
  1004. }
  1005. /// 8.7.5 Chunk Offset Box
  1006. static size_t mp4_write_stco(struct mp4_mux *mux, struct mp4_track *track,
  1007. bool fragmented)
  1008. {
  1009. struct serializer *s = mux->serializer;
  1010. if (fragmented) {
  1011. write_fullbox(s, 16, "stco", 0, 0);
  1012. s_wb32(s, 0); // entry_count
  1013. return 16;
  1014. }
  1015. struct chunk *arr = track->chunks.array;
  1016. uint32_t num = (uint32_t)track->chunks.num;
  1017. uint64_t last_off = arr[num - 1].offset;
  1018. uint32_t size;
  1019. bool co64 = last_off > UINT32_MAX;
  1020. /* When using 64-bit offsets we write 8-bytes (u64) per chunk,
  1021. * otherwise 4-bytes (u32). */
  1022. if (co64) {
  1023. size = 16 + 8 * num;
  1024. write_fullbox(s, size, "co64", 0, 0);
  1025. } else {
  1026. size = 16 + 4 * num;
  1027. write_fullbox(s, size, "stco", 0, 0);
  1028. }
  1029. s_wb32(s, num); // entry_count
  1030. for (size_t idx = 0; idx < num; idx++) {
  1031. if (co64)
  1032. s_wb64(s, arr[idx].offset); // chunk_offset
  1033. else
  1034. s_wb32(s, (uint32_t)arr[idx].offset); // chunk_offset
  1035. }
  1036. return size;
  1037. }
  1038. /// 8.9.3 Sample Group Description Box
  1039. static size_t mp4_write_sgpd_aac(struct mp4_mux *mux)
  1040. {
  1041. struct serializer *s = mux->serializer;
  1042. int64_t start = serializer_get_pos(s);
  1043. write_fullbox(s, 0, "sgpd", 1, 0);
  1044. s_write(s, "roll", 4); // grouping_tpye
  1045. s_wb32(s, 2); // default_length (i16)
  1046. s_wb32(s, 1); // entry_count
  1047. // AudioRollRecoveryEntry
  1048. s_wb16(s, -1); // roll_distance
  1049. return write_box_size(s, start);
  1050. }
  1051. /// 8.9.2 Sample to Group Box
  1052. static size_t mp4_write_sbgp_aac(struct mp4_mux *mux, struct mp4_track *track)
  1053. {
  1054. struct serializer *s = mux->serializer;
  1055. int64_t start = serializer_get_pos(s);
  1056. write_fullbox(s, 0, "sbgp", 0, 0);
  1057. /// 10.1 AudioRollRecoveryEntry
  1058. s_write(s, "roll", 4); // grouping_tpye
  1059. s_wb32(s, 1); // entry_count
  1060. s_wb32(s, (uint32_t)track->samples); // sample_count
  1061. s_wb32(s, 1); // group_description_index
  1062. return write_box_size(s, start);
  1063. }
  1064. static size_t mp4_write_sbgp_sbgp_opus(struct mp4_mux *mux,
  1065. struct mp4_track *track)
  1066. {
  1067. struct serializer *s = mux->serializer;
  1068. int64_t start = serializer_get_pos(s);
  1069. /// 8.9.3 Sample Group Description Box
  1070. write_fullbox(s, 0, "sgpd", 1, 0);
  1071. s_write(s, "roll", 4); // grouping_tpye
  1072. s_wb32(s, 2); // default_length (i16)
  1073. /* Opus requires 80 ms of preroll, which at 48 kHz is 3840 PCM samples */
  1074. const int64_t opus_preroll = 3840;
  1075. /* Compute the preroll samples (should be 4, each being 20 ms) */
  1076. uint16_t preroll_count = 0;
  1077. int64_t preroll_remaining = opus_preroll;
  1078. for (size_t i = 0; i < track->deltas.num && preroll_remaining > 0;
  1079. i++) {
  1080. for (uint32_t j = 0;
  1081. j < track->deltas.array[i].count && preroll_remaining > 0;
  1082. j++) {
  1083. preroll_remaining -= track->deltas.array[i].delta;
  1084. preroll_count++;
  1085. }
  1086. }
  1087. s_wb32(s, 1); // entry_count
  1088. /// 10.1 AudioRollRecoveryEntry
  1089. s_wb16(s, -preroll_count); // roll_distance
  1090. size_t size_sgpd = write_box_size(s, start);
  1091. /* --------------- */
  1092. /// 8.9.2 Sample to Group Box
  1093. start = serializer_get_pos(s);
  1094. write_fullbox(s, 0, "sbgp", 0, 0);
  1095. s_write(s, "roll", 4); // grouping_tpye
  1096. s_wb32(s, 2); // entry_count
  1097. // entry 0
  1098. s_wb32(s, preroll_count); // sample_count
  1099. s_wb32(s, 0); // group_description_index
  1100. // entry 1
  1101. s_wb32(s, (uint32_t)track->samples - preroll_count); // sample_count
  1102. s_wb32(s, 1); // group_description_index
  1103. return size_sgpd + write_box_size(s, start);
  1104. }
  1105. /// 8.5.1 Sample Table Box
  1106. static size_t mp4_write_stbl(struct mp4_mux *mux, struct mp4_track *track,
  1107. bool fragmented)
  1108. {
  1109. struct serializer *s = mux->serializer;
  1110. int64_t start = serializer_get_pos(s);
  1111. write_box(s, 0, "stbl");
  1112. // stsd
  1113. mp4_write_stsd(mux, track);
  1114. // stts
  1115. mp4_write_stts(mux, track, fragmented);
  1116. // stss (non-fragmented only)
  1117. if (track->type == TRACK_VIDEO && !fragmented)
  1118. mp4_write_stss(mux, track);
  1119. // ctts (non-fragmented only)
  1120. if (track->needs_ctts && !fragmented)
  1121. mp4_write_ctts(mux, track);
  1122. // stsc
  1123. mp4_write_stsc(mux, track, fragmented);
  1124. // stsz
  1125. mp4_write_stsz(mux, track, fragmented);
  1126. // stco
  1127. mp4_write_stco(mux, track, fragmented);
  1128. if (!fragmented) {
  1129. /* AAC and Opus require a pre-roll to get correct decoder
  1130. * output, sgpd and sbgp are used to create a "roll" group. */
  1131. if (track->codec == CODEC_AAC) {
  1132. // sgpd
  1133. mp4_write_sgpd_aac(mux);
  1134. // sbgp
  1135. mp4_write_sbgp_aac(mux, track);
  1136. } else if (track->codec == CODEC_OPUS) {
  1137. // sgpd + sbgp
  1138. mp4_write_sbgp_sbgp_opus(mux, track);
  1139. }
  1140. }
  1141. return write_box_size(s, start);
  1142. }
  1143. /// 8.7.2.2 DataEntryUrlBox
  1144. static size_t mp4_write_url(struct mp4_mux *mux)
  1145. {
  1146. struct serializer *s = mux->serializer;
  1147. int64_t start = serializer_get_pos(s);
  1148. write_fullbox(s, 0, "url ", 0, 1);
  1149. /* empty, flag 1 means data is in this file */
  1150. return write_box_size(s, start);
  1151. }
  1152. /// 8.7.2 Data Reference Box
  1153. static size_t mp4_write_dref(struct mp4_mux *mux)
  1154. {
  1155. struct serializer *s = mux->serializer;
  1156. int64_t start = serializer_get_pos(s);
  1157. write_fullbox(s, 0, "dref ", 0, 0);
  1158. s_wb32(s, 1); // entry_count
  1159. mp4_write_url(mux);
  1160. return write_box_size(s, start);
  1161. }
  1162. /// 8.7.1 Data Information Box
  1163. static size_t mp4_write_dinf(struct mp4_mux *mux)
  1164. {
  1165. struct serializer *s = mux->serializer;
  1166. int64_t start = serializer_get_pos(s);
  1167. write_box(s, 0, "dinf");
  1168. mp4_write_dref(mux);
  1169. return write_box_size(s, start);
  1170. }
  1171. /// 8.4.4 Media Information Box
  1172. static size_t mp4_write_minf(struct mp4_mux *mux, struct mp4_track *track,
  1173. bool fragmented)
  1174. {
  1175. struct serializer *s = mux->serializer;
  1176. int64_t start = serializer_get_pos(s);
  1177. write_box(s, 0, "minf");
  1178. // vmhd/smhd/gmhd
  1179. if (track->type == TRACK_VIDEO)
  1180. mp4_write_vmhd(mux);
  1181. else if (track->type == TRACK_CHAPTERS)
  1182. mp4_write_gmhd(mux);
  1183. else
  1184. mp4_write_smhd(mux);
  1185. // dinf, unnecessary but mandatory
  1186. mp4_write_dinf(mux);
  1187. // stbl
  1188. mp4_write_stbl(mux, track, fragmented);
  1189. return write_box_size(s, start);
  1190. }
  1191. /// 8.4.1 Media Box
  1192. static size_t mp4_write_mdia(struct mp4_mux *mux, struct mp4_track *track,
  1193. bool fragmented)
  1194. {
  1195. struct serializer *s = mux->serializer;
  1196. int64_t start = serializer_get_pos(s);
  1197. write_box(s, 0, "mdia");
  1198. // mdhd
  1199. mp4_write_mdhd(mux, track);
  1200. // hdlr
  1201. mp4_write_hdlr(mux, track);
  1202. // minf
  1203. mp4_write_minf(mux, track, fragmented);
  1204. return write_box_size(s, start);
  1205. }
  1206. /// (QTFF/Apple) User data atom
  1207. static size_t mp4_write_udta_atom(struct mp4_mux *mux, const char tag[4],
  1208. const char *val)
  1209. {
  1210. struct serializer *s = mux->serializer;
  1211. int64_t start = serializer_get_pos(s);
  1212. write_box(s, 0, tag);
  1213. s_write(s, val, strlen(val));
  1214. return write_box_size(s, start);
  1215. }
  1216. /// 8.10.1 User Data Box
  1217. static size_t mp4_write_track_udta(struct mp4_mux *mux, struct mp4_track *track)
  1218. {
  1219. struct serializer *s = mux->serializer;
  1220. int64_t start = serializer_get_pos(s);
  1221. write_box(s, 0, "udta");
  1222. /* Our udta box contains QuickTime format user data atoms, which are
  1223. * simple key-value pairs. Some are prefixed with 0xa9. */
  1224. const char *name = obs_encoder_get_name(track->encoder);
  1225. if (name)
  1226. mp4_write_udta_atom(mux, "name", name);
  1227. if (mux->flags & MP4_WRITE_ENCODER_INFO) {
  1228. const char *id = obs_encoder_get_id(track->encoder);
  1229. if (name)
  1230. mp4_write_udta_atom(mux, "\251enc", id);
  1231. obs_data_t *settings = obs_encoder_get_settings(track->encoder);
  1232. if (settings) {
  1233. const char *json =
  1234. obs_data_get_json_with_defaults(settings);
  1235. mp4_write_udta_atom(mux, "json", json);
  1236. obs_data_release(settings);
  1237. }
  1238. }
  1239. return write_box_size(s, start);
  1240. }
  1241. /// 8.6.6 Edit List Box
  1242. static size_t mp4_write_elst(struct mp4_mux *mux, struct mp4_track *track)
  1243. {
  1244. struct serializer *s = mux->serializer;
  1245. int64_t start = serializer_get_pos(s);
  1246. write_fullbox(s, 0, "elst", 0, 0);
  1247. s_wb32(s, 1); // entry count
  1248. uint64_t duration =
  1249. util_mul_div64(track->duration, 1000, track->timebase_den);
  1250. uint64_t delay = 0;
  1251. if (track->type == TRACK_VIDEO &&
  1252. !(mux->flags & MP4_USE_NEGATIVE_CTS)) {
  1253. /* Compensate for frame-reordering delay (for example, when
  1254. * using b-frames). */
  1255. int64_t dts_offset = 0;
  1256. if (track->offsets.num) {
  1257. struct sample_offset sample = track->offsets.array[0];
  1258. dts_offset = sample.offset;
  1259. } else if (track->packets.size) {
  1260. /* If no offset data exists yet (i.e. when writing the
  1261. * incomplete moov in a fragmented file) use the raw
  1262. * data from the current queued packets instead. */
  1263. struct encoder_packet pkt;
  1264. deque_peek_front(&track->packets, &pkt, sizeof(pkt));
  1265. dts_offset = pkt.pts - pkt.dts;
  1266. }
  1267. delay = util_mul_div64(dts_offset, track->timescale,
  1268. track->timebase_den);
  1269. } else if (track->type == TRACK_AUDIO && track->first_pts < 0) {
  1270. delay = util_mul_div64(llabs(track->first_pts),
  1271. track->timescale, track->timebase_den);
  1272. /* Subtract priming delay from total duration */
  1273. duration -= util_mul_div64(delay, 1000, track->timescale);
  1274. }
  1275. s_wb32(s, (uint32_t)duration); // segment_duration (movie timescale)
  1276. s_wb32(s, (uint32_t)delay); // media_time (track timescale)
  1277. s_wb32(s, 1 << 16); // media_rate
  1278. return write_box_size(s, start);
  1279. }
  1280. /// 8.6.5 Edit Box
  1281. static size_t mp4_write_edts(struct mp4_mux *mux, struct mp4_track *track)
  1282. {
  1283. struct serializer *s = mux->serializer;
  1284. int64_t start = serializer_get_pos(s);
  1285. write_box(s, 0, "edts");
  1286. mp4_write_elst(mux, track);
  1287. return write_box_size(s, start);
  1288. }
  1289. /// 8.3.3.2 TrackReferenceTypeBox
  1290. static size_t mp4_write_chap(struct mp4_mux *mux)
  1291. {
  1292. struct serializer *s = mux->serializer;
  1293. int64_t start = serializer_get_pos(s);
  1294. /// QTFF/Apple chapter track reference
  1295. write_box(s, 0, "chap");
  1296. s_wb32(s, mux->chapter_track->track_id);
  1297. return write_box_size(s, start);
  1298. }
  1299. /// 8.3.3 Track Reference Box
  1300. static size_t mp4_write_tref(struct mp4_mux *mux)
  1301. {
  1302. struct serializer *s = mux->serializer;
  1303. int64_t start = serializer_get_pos(s);
  1304. write_box(s, 0, "tref");
  1305. mp4_write_chap(mux);
  1306. return write_box_size(s, start);
  1307. }
  1308. /// 8.3.1 Track Box
  1309. static size_t mp4_write_trak(struct mp4_mux *mux, struct mp4_track *track,
  1310. bool fragmented)
  1311. {
  1312. struct serializer *s = mux->serializer;
  1313. int64_t start = serializer_get_pos(s);
  1314. write_box(s, 0, "trak");
  1315. // tkhd
  1316. mp4_write_tkhd(mux, track);
  1317. // edts
  1318. mp4_write_edts(mux, track);
  1319. // tref
  1320. if (mux->chapter_track && track->type != TRACK_CHAPTERS)
  1321. mp4_write_tref(mux);
  1322. // mdia
  1323. mp4_write_mdia(mux, track, fragmented);
  1324. // udta (audio track name mainly)
  1325. mp4_write_track_udta(mux, track);
  1326. return write_box_size(s, start);
  1327. }
  1328. /// 8.8.3 Track Extends Box
  1329. static size_t mp4_write_trex(struct mp4_mux *mux, uint32_t track_id)
  1330. {
  1331. struct serializer *s = mux->serializer;
  1332. write_fullbox(s, 32, "trex", 0, 0);
  1333. s_wb32(s, track_id); // track_ID
  1334. s_wb32(s, 1); // default_sample_description_index
  1335. s_wb32(s, 0); // default_sample_duration
  1336. s_wb32(s, 0); // default_sample_size
  1337. s_wb32(s, 0); // default_sample_flags
  1338. return 32;
  1339. }
  1340. /// 8.8.1 Movie Extends Box
  1341. static size_t mp4_write_mvex(struct mp4_mux *mux)
  1342. {
  1343. struct serializer *s = mux->serializer;
  1344. int64_t start = serializer_get_pos(s);
  1345. write_box(s, 0, "mvex");
  1346. for (size_t track_id = 0; track_id < mux->tracks.num; track_id++)
  1347. mp4_write_trex(mux, (uint32_t)(track_id + 1));
  1348. return write_box_size(s, start);
  1349. }
  1350. /// (QTFF/Apple) Undocumented QuickTime/iTunes metadata handler
  1351. static size_t mp4_write_itunes_hdlr(struct mp4_mux *mux)
  1352. {
  1353. struct serializer *s = mux->serializer;
  1354. write_fullbox(s, 33, "hdlr", 0, 0);
  1355. s_wb32(s, 0); // pre_defined
  1356. s_write(s, "mdir", 4); // handler_type
  1357. // reserved
  1358. s_write(s, "appl", 4);
  1359. s_wb32(s, 0);
  1360. s_wb32(s, 0);
  1361. s_w8(s, 0); // name (NULL)
  1362. return 33;
  1363. }
  1364. /// (QTFF/Apple) Data atom
  1365. static size_t mp4_write_data_atom(struct mp4_mux *mux, const char *data)
  1366. {
  1367. struct serializer *s = mux->serializer;
  1368. size_t len = strlen(data);
  1369. uint32_t size = 16 + (uint32_t)len;
  1370. write_box(s, size, "data");
  1371. s_wb32(s, 1); // type, 1 = utf-8 string
  1372. s_wb32(s, 0); // locale, 0 = default
  1373. s_write(s, data, len);
  1374. return size;
  1375. }
  1376. /// (QTFF/Apple) Metadata item atom
  1377. static size_t mp4_write_ilst_item_atom(struct mp4_mux *mux, const char name[4],
  1378. const char *value)
  1379. {
  1380. struct serializer *s = mux->serializer;
  1381. int64_t start = serializer_get_pos(s);
  1382. write_box(s, 0, name);
  1383. mp4_write_data_atom(mux, value);
  1384. return write_box_size(s, start);
  1385. }
  1386. /// (QTFF/Apple) Metadata item list atom
  1387. static size_t mp4_write_ilst(struct mp4_mux *mux)
  1388. {
  1389. struct serializer *s = mux->serializer;
  1390. struct dstr value = {0};
  1391. int64_t start = serializer_get_pos(s);
  1392. write_box(s, 0, "ilst");
  1393. /* Encoder name */
  1394. dstr_cat(&value, "OBS Studio (");
  1395. dstr_cat(&value, obs_get_version_string());
  1396. dstr_cat(&value, ")");
  1397. /* Some QuickTime keys are prefixed with 0xa9 */
  1398. mp4_write_ilst_item_atom(mux, "\251too", value.array);
  1399. dstr_free(&value);
  1400. return write_box_size(s, start);
  1401. }
  1402. /// (QTFF/Apple) Key value metadata handler
  1403. static size_t mp4_write_mdta_hdlr(struct mp4_mux *mux)
  1404. {
  1405. struct serializer *s = mux->serializer;
  1406. write_fullbox(s, 33, "hdlr", 0, 0);
  1407. s_wb32(s, 0); // pre_defined
  1408. s_write(s, "mdta", 4); // handler_type
  1409. // reserved
  1410. s_wb32(s, 0);
  1411. s_wb32(s, 0);
  1412. s_wb32(s, 0);
  1413. s_w8(s, 0); // name (NULL)
  1414. return 33;
  1415. }
  1416. /// (QTFF/Apple) Metadata item keys atom
  1417. static size_t mp4_write_mdta_keys(struct mp4_mux *mux, obs_data_t *meta)
  1418. {
  1419. struct serializer *s = mux->serializer;
  1420. int64_t start = serializer_get_pos(s);
  1421. write_fullbox(s, 0, "keys", 0, 0);
  1422. uint32_t count = 0;
  1423. int64_t count_pos = serializer_get_pos(s);
  1424. s_wb32(s, count); // count
  1425. obs_data_item_t *item = obs_data_first(meta);
  1426. for (; item != NULL; obs_data_item_next(&item)) {
  1427. const char *name = obs_data_item_get_name(item);
  1428. size_t len = strlen(name);
  1429. /* name is key type, can be udta or mdta */
  1430. write_box(s, len + 8, "mdta");
  1431. s_write(s, name, len); // key name
  1432. count++;
  1433. }
  1434. int64_t end = serializer_get_pos(s);
  1435. /* Overwrite count with correct value */
  1436. serializer_seek(s, count_pos, SERIALIZE_SEEK_START);
  1437. s_wb32(s, count);
  1438. serializer_seek(s, end, SERIALIZE_SEEK_START);
  1439. return write_box_size(s, start);
  1440. }
  1441. /// (QTFF/Apple) Metadata item atom, but name is an index instead
  1442. static inline void write_key_entry(struct mp4_mux *mux, obs_data_item_t *item,
  1443. uint32_t idx)
  1444. {
  1445. struct serializer *s = mux->serializer;
  1446. int64_t start = serializer_get_pos(s);
  1447. s_wb32(s, 0); // size
  1448. s_wb32(s, idx); // index
  1449. mp4_write_data_atom(mux, obs_data_item_get_string(item));
  1450. write_box_size(s, start);
  1451. }
  1452. /// (QTFF/Apple) Metadata item list atom
  1453. static size_t mp4_write_mdta_ilst(struct mp4_mux *mux, obs_data_t *meta)
  1454. {
  1455. struct serializer *s = mux->serializer;
  1456. int64_t start = serializer_get_pos(s);
  1457. write_box(s, 0, "ilst");
  1458. /* indices start with 1 */
  1459. uint32_t key_idx = 1;
  1460. obs_data_item_t *item = obs_data_first(meta);
  1461. for (; item != NULL; obs_data_item_next(&item)) {
  1462. write_key_entry(mux, item, key_idx);
  1463. key_idx++;
  1464. }
  1465. return write_box_size(s, start);
  1466. }
  1467. static void mp4_write_mdta_kv(struct mp4_mux *mux)
  1468. {
  1469. struct dstr value = {0};
  1470. obs_data_t *meta = obs_data_create();
  1471. dstr_cat(&value, "OBS Studio (");
  1472. dstr_cat(&value, obs_get_version_string());
  1473. dstr_cat(&value, ")");
  1474. // ToDo figure out what else we could put in here for fun and profit :)
  1475. obs_data_set_string(meta, "tool", value.array);
  1476. /* Write keys */
  1477. mp4_write_mdta_keys(mux, meta);
  1478. /* Write values */
  1479. mp4_write_mdta_ilst(mux, meta);
  1480. obs_data_release(meta);
  1481. dstr_free(&value);
  1482. }
  1483. /// 8.11.1 The Meta box
  1484. static size_t mp4_write_meta(struct mp4_mux *mux)
  1485. {
  1486. struct serializer *s = mux->serializer;
  1487. int64_t start = serializer_get_pos(s);
  1488. write_fullbox(s, 0, "meta", 0, 0);
  1489. if (mux->flags & MP4_USE_MDTA_KEY_VALUE) {
  1490. mp4_write_mdta_hdlr(mux);
  1491. mp4_write_mdta_kv(mux);
  1492. } else {
  1493. mp4_write_itunes_hdlr(mux);
  1494. mp4_write_ilst(mux);
  1495. }
  1496. return write_box_size(s, start);
  1497. }
  1498. /// 8.10.1 User Data Box
  1499. static size_t mp4_write_udta(struct mp4_mux *mux)
  1500. {
  1501. struct serializer *s = mux->serializer;
  1502. int64_t start = serializer_get_pos(s);
  1503. write_box(s, 0, "udta");
  1504. /* Normally metadata would be directly in the moov, but since this is
  1505. * Apple/QTFF format metadata it is inside udta. */
  1506. // meta
  1507. mp4_write_meta(mux);
  1508. return write_box_size(s, start);
  1509. }
  1510. /// Movie Box (8.2.1)
  1511. static size_t mp4_write_moov(struct mp4_mux *mux, bool fragmented)
  1512. {
  1513. struct serializer *s = mux->serializer;
  1514. int64_t start = serializer_get_pos(s);
  1515. write_box(s, 0, "moov");
  1516. mp4_write_mvhd(mux);
  1517. // trak(s)
  1518. for (size_t i = 0; i < mux->tracks.num; i++) {
  1519. struct mp4_track *track = &mux->tracks.array[i];
  1520. mp4_write_trak(mux, track, fragmented);
  1521. }
  1522. if (!fragmented && mux->chapter_track)
  1523. mp4_write_trak(mux, mux->chapter_track, false);
  1524. // mvex
  1525. if (fragmented)
  1526. mp4_write_mvex(mux);
  1527. // udta (metadata)
  1528. mp4_write_udta(mux);
  1529. return write_box_size(s, start);
  1530. }
  1531. /* ========================================================================== */
  1532. /* moof (fragment header) stuff */
  1533. /// 8.8.5 Movie Fragment Header Box
  1534. static size_t mp4_write_mfhd(struct mp4_mux *mux)
  1535. {
  1536. struct serializer *s = mux->serializer;
  1537. write_fullbox(s, 16, "mfhd", 0, 0);
  1538. s_wb32(s, mux->fragments_written); // sequence_number
  1539. return 16;
  1540. }
  1541. /// 8.8.7 Track Fragment Header Box
  1542. static size_t mp4_write_tfhd(struct mp4_mux *mux, struct mp4_track *track,
  1543. size_t moof_start)
  1544. {
  1545. struct serializer *s = mux->serializer;
  1546. int64_t start = serializer_get_pos(s);
  1547. uint32_t flags = BASE_DATA_OFFSET_PRESENT |
  1548. DEFAULT_SAMPLE_FLAGS_PRESENT;
  1549. /* Add default size/duration if all samples match. */
  1550. bool durations_match = true;
  1551. bool sizes_match = true;
  1552. uint32_t duration;
  1553. uint32_t sample_size;
  1554. if (track->sample_size) {
  1555. duration = 1;
  1556. sample_size = track->sample_size;
  1557. } else {
  1558. duration = track->fragment_samples.array[0].duration;
  1559. sample_size = track->fragment_samples.array[0].size;
  1560. for (size_t idx = 1; idx < track->fragment_samples.num; idx++) {
  1561. uint32_t frag_duration =
  1562. track->fragment_samples.array[idx].duration;
  1563. uint32_t frag_size =
  1564. track->fragment_samples.array[idx].size;
  1565. durations_match = frag_duration == duration;
  1566. sizes_match = frag_size == sample_size;
  1567. }
  1568. }
  1569. if (durations_match)
  1570. flags |= DEFAULT_SAMPLE_DURATION_PRESENT;
  1571. if (sizes_match)
  1572. flags |= DEFAULT_SAMPLE_SIZE_PRESENT;
  1573. write_fullbox(s, 0, "tfhd", 0, flags);
  1574. s_wb32(s, track->track_id); // track_ID
  1575. s_wb64(s, moof_start); // base_data_offset
  1576. // default_sample_duration
  1577. if (durations_match) {
  1578. if (track->type == TRACK_VIDEO) {
  1579. /* Convert duration to track timescale */
  1580. duration = (uint32_t)util_mul_div64(
  1581. duration, track->timescale,
  1582. track->timebase_den);
  1583. }
  1584. s_wb32(s, duration);
  1585. }
  1586. // default_sample_size
  1587. if (sizes_match)
  1588. s_wb32(s, sample_size);
  1589. // default_sample_flags
  1590. if (track->type == TRACK_VIDEO) {
  1591. s_wb32(s, SAMPLE_FLAG_DEPENDS_YES | SAMPLE_FLAG_IS_NON_SYNC);
  1592. } else {
  1593. s_wb32(s, SAMPLE_FLAG_DEPENDS_NO);
  1594. }
  1595. return write_box_size(s, start);
  1596. }
  1597. /// 8.8.12 Track fragment decode time
  1598. static size_t mp4_write_tfdt(struct mp4_mux *mux, struct mp4_track *track)
  1599. {
  1600. struct serializer *s = mux->serializer;
  1601. write_fullbox(s, 20, "tfdt", 1, 0);
  1602. /* Subtract samples that are not written yet */
  1603. uint64_t duration_written = track->duration;
  1604. for (size_t i = 0; i < track->fragment_samples.num; i++)
  1605. duration_written -= track->fragment_samples.array[i].duration;
  1606. if (track->type == TRACK_VIDEO) {
  1607. /* Convert to track timescale */
  1608. duration_written = util_mul_div64(duration_written,
  1609. track->timescale,
  1610. track->timebase_den);
  1611. }
  1612. s_wb64(s, duration_written); // baseMediaDecodeTime
  1613. return 20;
  1614. }
  1615. /// 8.8.8 Track Fragment Run Box
  1616. static size_t mp4_write_trun(struct mp4_mux *mux, struct mp4_track *track,
  1617. uint32_t moof_size, uint64_t *samples_mdat_offset)
  1618. {
  1619. struct serializer *s = mux->serializer;
  1620. int64_t start = serializer_get_pos(s);
  1621. uint32_t flags = DATA_OFFSET_PRESENT;
  1622. if (!track->sample_size)
  1623. flags |= SAMPLE_SIZE_PRESENT;
  1624. if (track->type == TRACK_VIDEO) {
  1625. flags |= FIRST_SAMPLE_FLAGS_PRESENT;
  1626. flags |= SAMPLE_COMPOSITION_TIME_OFFSETS_PRESENT;
  1627. }
  1628. uint8_t version = mux->flags & MP4_USE_NEGATIVE_CTS ? 1 : 0;
  1629. write_fullbox(s, 0, "trun", version, flags);
  1630. /* moof_size + 8 bytes for mdat header + offset into mdat box data */
  1631. size_t data_offset = moof_size + 8 + *samples_mdat_offset;
  1632. size_t sample_count = track->fragment_samples.num;
  1633. if (track->sample_size) {
  1634. /* Update count based on fixed size */
  1635. size_t total_size = 0;
  1636. for (size_t i = 0; i < sample_count; i++)
  1637. total_size += track->fragment_samples.array[i].size;
  1638. *samples_mdat_offset += total_size;
  1639. sample_count = total_size / track->sample_size;
  1640. }
  1641. s_wb32(s, (uint32_t)sample_count); // sample_count
  1642. s_wb32(s, (uint32_t)data_offset); // data_offset
  1643. /* If we have a fixed sample size (PCM audio) we only need to write
  1644. * the sample count and offset. */
  1645. if (track->sample_size)
  1646. return write_box_size(s, start);
  1647. if (track->type == TRACK_VIDEO)
  1648. s_wb32(s, SAMPLE_FLAG_DEPENDS_NO); // first_sample_flags
  1649. for (size_t idx = 0; idx < sample_count; idx++) {
  1650. struct fragment_sample *smp =
  1651. &track->fragment_samples.array[idx];
  1652. s_wb32(s, smp->size); // sample_size
  1653. if (track->type == TRACK_VIDEO) {
  1654. // sample_composition_time_offset
  1655. int64_t offset = (int64_t)smp->offset *
  1656. (int64_t)track->timescale /
  1657. (int64_t)track->timebase_den;
  1658. s_wb32(s, (uint32_t)offset);
  1659. }
  1660. *samples_mdat_offset += smp->size;
  1661. }
  1662. return write_box_size(s, start);
  1663. }
  1664. /// 8.8.6 Track Fragment Box
  1665. static size_t mp4_write_traf(struct mp4_mux *mux, struct mp4_track *track,
  1666. int64_t moof_start, uint32_t moof_size,
  1667. uint64_t *samples_mdat_offset)
  1668. {
  1669. struct serializer *s = mux->serializer;
  1670. int64_t start = serializer_get_pos(s);
  1671. write_box(s, 0, "traf");
  1672. // tfhd
  1673. mp4_write_tfhd(mux, track, moof_start);
  1674. // tfdt
  1675. mp4_write_tfdt(mux, track);
  1676. // trun
  1677. mp4_write_trun(mux, track, moof_size, samples_mdat_offset);
  1678. return write_box_size(s, start);
  1679. }
  1680. /// 8.8.4 Movie Fragment Box
  1681. static size_t mp4_write_moof(struct mp4_mux *mux, uint32_t moof_size,
  1682. int64_t moof_start)
  1683. {
  1684. struct serializer *s = mux->serializer;
  1685. int64_t start = serializer_get_pos(s);
  1686. write_box(s, 0, "moof");
  1687. mp4_write_mfhd(mux);
  1688. /* Track current mdat offset across tracks */
  1689. uint64_t samples_mdat_offset = 0;
  1690. // traf boxes
  1691. for (size_t i = 0; i < mux->tracks.num; i++) {
  1692. struct mp4_track *track = &mux->tracks.array[i];
  1693. /* Skip tracks that do not have any samples */
  1694. if (!track->fragment_samples.num)
  1695. continue;
  1696. mp4_write_traf(mux, track, moof_start, moof_size,
  1697. &samples_mdat_offset);
  1698. }
  1699. return write_box_size(s, start);
  1700. }
  1701. /* ========================================================================== */
  1702. /* Chapter packets */
  1703. static void mp4_create_chapter_pkt(struct encoder_packet *pkt, int64_t dts_usec,
  1704. const char *name)
  1705. {
  1706. int64_t dts = dts_usec / 1000; // chapter track uses a ms timebase
  1707. pkt->pts = dts;
  1708. pkt->dts = dts;
  1709. pkt->dts_usec = dts_usec;
  1710. pkt->timebase_num = 1;
  1711. pkt->timebase_den = 1000;
  1712. /* Serialize with data with ref count */
  1713. struct serializer s;
  1714. struct array_output_data ao;
  1715. array_output_serializer_init(&s, &ao);
  1716. size_t len = min(strlen(name), UINT16_MAX);
  1717. long refs = 1;
  1718. /* encoder_packet refs */
  1719. s_write(&s, &refs, sizeof(refs));
  1720. /* actual packet data */
  1721. s_wb16(&s, (uint16_t)len);
  1722. s_write(&s, name, len);
  1723. s_write(&s, &CHAPTER_PKT_FOOTER, sizeof(CHAPTER_PKT_FOOTER));
  1724. pkt->data = (void *)(ao.bytes.array + sizeof(long));
  1725. pkt->size = ao.bytes.num - sizeof(long);
  1726. }
  1727. /* ========================================================================== */
  1728. /* Encoder packet processing and fragment writer */
  1729. static inline int64_t packet_pts_usec(struct encoder_packet *packet)
  1730. {
  1731. return packet->pts * 1000000 / packet->timebase_den;
  1732. }
  1733. static inline struct encoder_packet *get_pkt_at(struct deque *dq, size_t idx)
  1734. {
  1735. return deque_data(dq, idx * sizeof(struct encoder_packet));
  1736. }
  1737. static inline uint64_t get_longest_track_duration(struct mp4_mux *mux)
  1738. {
  1739. uint64_t dur = 0;
  1740. for (size_t i = 0; i < mux->tracks.num; i++) {
  1741. struct mp4_track *track = &mux->tracks.array[i];
  1742. uint64_t track_dur = util_mul_div64(track->duration, 1000,
  1743. track->timebase_den);
  1744. if (track_dur > dur)
  1745. dur = track_dur;
  1746. }
  1747. return dur;
  1748. }
  1749. static void process_packets(struct mp4_mux *mux, struct mp4_track *track,
  1750. uint64_t *mdat_size)
  1751. {
  1752. size_t count = track->packets.size / sizeof(struct encoder_packet);
  1753. if (!count)
  1754. return;
  1755. /* Only iterate upt to penultimate packet so we can determine duration
  1756. * for all processed packets. */
  1757. for (size_t i = 0; i < count - 1; i++) {
  1758. struct encoder_packet *pkt = get_pkt_at(&track->packets, i);
  1759. if (mux->next_frag_pts &&
  1760. packet_pts_usec(pkt) >= mux->next_frag_pts)
  1761. break;
  1762. struct encoder_packet *next =
  1763. get_pkt_at(&track->packets, i + 1);
  1764. /* Duration is just distance between current and next DTS. */
  1765. uint32_t duration = (uint32_t)(next->dts - pkt->dts);
  1766. uint32_t sample_count = 1;
  1767. uint32_t size = (uint32_t)pkt->size;
  1768. int32_t offset = (int32_t)(pkt->pts - pkt->dts);
  1769. /* When using negative CTS, subtract DTS-PTS offset. */
  1770. if (track->type == TRACK_VIDEO &&
  1771. mux->flags & MP4_USE_NEGATIVE_CTS) {
  1772. if (!track->offsets.num)
  1773. track->dts_offset = offset;
  1774. offset -= track->dts_offset;
  1775. }
  1776. /* Create temporary sample information for moof */
  1777. struct fragment_sample *smp =
  1778. da_push_back_new(track->fragment_samples);
  1779. smp->size = size;
  1780. smp->offset = offset;
  1781. smp->duration = duration;
  1782. *mdat_size += size;
  1783. /* Update global sample information for full moov */
  1784. track->duration += duration;
  1785. if (track->sample_size) {
  1786. /* Adjust duration/count for fixed sample size */
  1787. sample_count = size / track->sample_size;
  1788. duration = 1;
  1789. }
  1790. if (!track->samples)
  1791. track->first_pts = pkt->pts;
  1792. track->samples += sample_count;
  1793. /* If delta (duration) matche sprevious, increment counter,
  1794. * otherwise create a new entry. */
  1795. if (track->deltas.num == 0 ||
  1796. track->deltas.array[track->deltas.num - 1].delta !=
  1797. duration) {
  1798. struct sample_delta *new =
  1799. da_push_back_new(track->deltas);
  1800. new->delta = duration;
  1801. new->count = sample_count;
  1802. } else {
  1803. track->deltas.array[track->deltas.num - 1].count +=
  1804. sample_count;
  1805. }
  1806. if (!track->sample_size)
  1807. da_push_back(track->sample_sizes, &size);
  1808. if (track->type != TRACK_VIDEO)
  1809. continue;
  1810. if (pkt->keyframe)
  1811. da_push_back(track->sync_samples, &track->samples);
  1812. /* Only require ctts box if offet is non-zero */
  1813. if (offset && !track->needs_ctts)
  1814. track->needs_ctts = true;
  1815. /* If dts-pts offset matche sprevious, increment counter,
  1816. * otherwise create a new entry. */
  1817. if (track->offsets.num == 0 ||
  1818. track->offsets.array[track->offsets.num - 1].offset !=
  1819. offset) {
  1820. struct sample_offset *new =
  1821. da_push_back_new(track->offsets);
  1822. new->offset = offset;
  1823. new->count = 1;
  1824. } else {
  1825. track->offsets.array[track->offsets.num - 1].count += 1;
  1826. }
  1827. }
  1828. }
  1829. /* Write track data to file */
  1830. static void write_packets(struct mp4_mux *mux, struct mp4_track *track)
  1831. {
  1832. struct serializer *s = mux->serializer;
  1833. size_t count = track->packets.size / sizeof(struct encoder_packet);
  1834. if (!count || !track->fragment_samples.num)
  1835. return;
  1836. struct chunk *chk = da_push_back_new(track->chunks);
  1837. chk->offset = serializer_get_pos(s);
  1838. chk->samples = (uint32_t)track->fragment_samples.num;
  1839. for (size_t i = 0; i < track->fragment_samples.num; i++) {
  1840. struct encoder_packet pkt;
  1841. deque_pop_front(&track->packets, &pkt,
  1842. sizeof(struct encoder_packet));
  1843. s_write(s, pkt.data, pkt.size);
  1844. obs_encoder_packet_release(&pkt);
  1845. }
  1846. chk->size = (uint32_t)(serializer_get_pos(s) - chk->offset);
  1847. /* Fixup sample count for fixed-size codecs */
  1848. if (track->sample_size)
  1849. chk->samples = chk->size / track->sample_size;
  1850. da_clear(track->fragment_samples);
  1851. }
  1852. static void mp4_flush_fragment(struct mp4_mux *mux)
  1853. {
  1854. struct serializer *s = mux->serializer;
  1855. // Write file header if not already done
  1856. if (!mux->fragments_written) {
  1857. mp4_write_ftyp(mux, true);
  1858. /* Placeholder to write mdat header during soft-remux */
  1859. mux->placeholder_offset = serializer_get_pos(s);
  1860. mp4_write_free(mux);
  1861. }
  1862. // Array output as temporary buffer to avoid sending seeks to disk
  1863. struct serializer as;
  1864. struct array_output_data aod;
  1865. array_output_serializer_init(&as, &aod);
  1866. mux->serializer = &as;
  1867. // Write initial incomplete moov (because fragmentation)
  1868. if (!mux->fragments_written) {
  1869. mp4_write_moov(mux, true);
  1870. s_write(s, aod.bytes.array, aod.bytes.num);
  1871. array_output_serializer_reset(&aod);
  1872. }
  1873. mux->fragments_written++;
  1874. /* --------------------------------------------------------- */
  1875. /* Analyse packets and create fragment moof. */
  1876. uint64_t mdat_size = 8;
  1877. for (size_t idx = 0; idx < mux->tracks.num; idx++) {
  1878. struct mp4_track *track = &mux->tracks.array[idx];
  1879. process_packets(mux, track, &mdat_size);
  1880. }
  1881. if (!mux->next_frag_pts && mux->chapter_track) {
  1882. // Create dummy chapter marker at the end so duration is correct
  1883. uint64_t duration = get_longest_track_duration(mux);
  1884. struct encoder_packet pkt;
  1885. mp4_create_chapter_pkt(&pkt, (int64_t)duration * 1000, "Dummy");
  1886. deque_push_back(&mux->chapter_track->packets, &pkt,
  1887. sizeof(struct encoder_packet));
  1888. process_packets(mux, mux->chapter_track, &mdat_size);
  1889. }
  1890. // write moof once to get size
  1891. int64_t moof_start = serializer_get_pos(s);
  1892. size_t moof_size = mp4_write_moof(mux, 0, moof_start);
  1893. array_output_serializer_reset(&aod);
  1894. // write moof again with known size
  1895. mp4_write_moof(mux, (uint32_t)moof_size, moof_start);
  1896. // Write to output and restore real serializer
  1897. s_write(s, aod.bytes.array, aod.bytes.num);
  1898. mux->serializer = s;
  1899. array_output_serializer_free(&aod);
  1900. /* --------------------------------------------------------- */
  1901. /* Write audio and video samples (in chunks). Also update */
  1902. /* global chunk and sample information for final moov. */
  1903. if (mdat_size > UINT32_MAX) {
  1904. s_wb32(s, 1);
  1905. s_write(s, "mdat", 4);
  1906. s_wb64(s, mdat_size + 8);
  1907. } else {
  1908. s_wb32(s, (uint32_t)mdat_size);
  1909. s_write(s, "mdat", 4);
  1910. }
  1911. for (size_t i = 0; i < mux->tracks.num; i++) {
  1912. struct mp4_track *track = &mux->tracks.array[i];
  1913. write_packets(mux, track);
  1914. }
  1915. /* Only write chapter packets on final flush. */
  1916. if (!mux->next_frag_pts && mux->chapter_track)
  1917. write_packets(mux, mux->chapter_track);
  1918. mux->next_frag_pts = 0;
  1919. }
  1920. /* ========================================================================== */
  1921. /* Track object functions */
  1922. static inline void track_insert_packet(struct mp4_track *track,
  1923. struct encoder_packet *pkt)
  1924. {
  1925. int64_t pts_usec = packet_pts_usec(pkt);
  1926. if (pts_usec > track->last_pts_usec)
  1927. track->last_pts_usec = pts_usec;
  1928. deque_push_back(&track->packets, pkt, sizeof(struct encoder_packet));
  1929. }
  1930. static inline uint32_t get_sample_size(struct mp4_track *track)
  1931. {
  1932. audio_t *audio = obs_encoder_audio(track->encoder);
  1933. if (!audio)
  1934. return 0;
  1935. const struct audio_output_info *info = audio_output_get_info(audio);
  1936. uint32_t channels = get_audio_channels(info->speakers);
  1937. switch (track->codec) {
  1938. case CODEC_PCM_F32:
  1939. return channels * 4; // 4 bytes per sample (32-bit)
  1940. case CODEC_PCM_I24:
  1941. return channels * 3; // 3 bytes per sample (24-bit)
  1942. case CODEC_PCM_I16:
  1943. return channels * 2; // 2 bytes per sample (16-bit)
  1944. default:
  1945. return 0;
  1946. }
  1947. }
  1948. static inline enum mp4_codec get_codec(obs_encoder_t *enc)
  1949. {
  1950. const char *codec = obs_encoder_get_codec(enc);
  1951. if (strcmp(codec, "h264") == 0)
  1952. return CODEC_H264;
  1953. if (strcmp(codec, "hevc") == 0)
  1954. return CODEC_HEVC;
  1955. if (strcmp(codec, "av1") == 0)
  1956. return CODEC_AV1;
  1957. if (strcmp(codec, "aac") == 0)
  1958. return CODEC_AAC;
  1959. if (strcmp(codec, "opus") == 0)
  1960. return CODEC_OPUS;
  1961. if (strcmp(codec, "flac") == 0)
  1962. return CODEC_FLAC;
  1963. if (strcmp(codec, "alac") == 0)
  1964. return CODEC_ALAC;
  1965. if (strcmp(codec, "pcm_s16le") == 0)
  1966. return CODEC_PCM_I16;
  1967. if (strcmp(codec, "pcm_s24le") == 0)
  1968. return CODEC_PCM_I24;
  1969. if (strcmp(codec, "pcm_f32le") == 0)
  1970. return CODEC_PCM_F32;
  1971. return CODEC_UNKNOWN;
  1972. }
  1973. static inline void add_track(struct mp4_mux *mux, obs_encoder_t *enc)
  1974. {
  1975. struct mp4_track *track = da_push_back_new(mux->tracks);
  1976. track->type = obs_encoder_get_type(enc) == OBS_ENCODER_VIDEO
  1977. ? TRACK_VIDEO
  1978. : TRACK_AUDIO;
  1979. track->encoder = obs_encoder_get_ref(enc);
  1980. track->codec = get_codec(enc);
  1981. track->track_id = ++mux->track_ctr;
  1982. /* Set timebase/timescale */
  1983. if (track->type == TRACK_VIDEO) {
  1984. video_t *video = obs_encoder_video(enc);
  1985. const struct video_output_info *info =
  1986. video_output_get_info(video);
  1987. track->timebase_num = info->fps_den;
  1988. track->timebase_den = info->fps_num;
  1989. track->timescale = track->timebase_den;
  1990. /* FFmpeg does this to compensate for non-monotonic timestamps,
  1991. * we probably don't need it, but let's stick to what they do
  1992. * for maximum compatibility. */
  1993. while (track->timescale < 10000)
  1994. track->timescale *= 2;
  1995. } else {
  1996. uint32_t sample_rate = obs_encoder_get_sample_rate(enc);
  1997. /* Opus is always 48 kHz */
  1998. if (track->codec == CODEC_OPUS)
  1999. sample_rate = 48000;
  2000. track->timebase_num = 1;
  2001. track->timebase_den = sample_rate;
  2002. track->timescale = sample_rate;
  2003. }
  2004. /* Set sample size (if fixed) */
  2005. if (track->type == TRACK_AUDIO)
  2006. track->sample_size = get_sample_size(track);
  2007. }
  2008. static inline void add_chapter_track(struct mp4_mux *mux)
  2009. {
  2010. mux->chapter_track = bzalloc(sizeof(struct mp4_track));
  2011. mux->chapter_track->type = TRACK_CHAPTERS;
  2012. mux->chapter_track->codec = CODEC_TEXT;
  2013. mux->chapter_track->timescale = 1000;
  2014. mux->chapter_track->timebase_num = 1;
  2015. mux->chapter_track->timebase_den = 1000;
  2016. mux->chapter_track->track_id = ++mux->track_ctr;
  2017. }
  2018. static inline void free_packets(struct deque *dq)
  2019. {
  2020. size_t num = dq->size / sizeof(struct encoder_packet);
  2021. for (size_t i = 0; i < num; i++) {
  2022. struct encoder_packet pkt;
  2023. deque_pop_front(dq, &pkt, sizeof(struct encoder_packet));
  2024. obs_encoder_packet_release(&pkt);
  2025. }
  2026. }
  2027. static inline void free_track(struct mp4_track *track)
  2028. {
  2029. if (!track)
  2030. return;
  2031. obs_encoder_release(track->encoder);
  2032. free_packets(&track->packets);
  2033. deque_free(&track->packets);
  2034. da_free(track->sample_sizes);
  2035. da_free(track->chunks);
  2036. da_free(track->deltas);
  2037. da_free(track->offsets);
  2038. da_free(track->sync_samples);
  2039. da_free(track->fragment_samples);
  2040. }
  2041. /* ===========================================================================*/
  2042. /* API */
  2043. struct mp4_mux *mp4_mux_create(obs_output_t *output,
  2044. struct serializer *serializer,
  2045. enum mp4_mux_flags flags)
  2046. {
  2047. struct mp4_mux *mux = bzalloc(sizeof(struct mp4_mux));
  2048. mux->output = output;
  2049. mux->serializer = serializer;
  2050. mux->flags = flags;
  2051. /* Timestamp is based on 1904 rather than 1970. */
  2052. mux->creation_time = time(NULL) + 0x7C25B080;
  2053. for (size_t i = 0; i < MAX_OUTPUT_VIDEO_ENCODERS; i++) {
  2054. obs_encoder_t *enc = obs_output_get_video_encoder2(output, i);
  2055. if (!enc)
  2056. continue;
  2057. add_track(mux, enc);
  2058. }
  2059. for (size_t i = 0; i < MAX_OUTPUT_AUDIO_ENCODERS; i++) {
  2060. obs_encoder_t *enc = obs_output_get_audio_encoder(output, i);
  2061. if (!enc)
  2062. continue;
  2063. add_track(mux, enc);
  2064. }
  2065. return mux;
  2066. }
  2067. void mp4_mux_destroy(struct mp4_mux *mux)
  2068. {
  2069. for (size_t i = 0; i < mux->tracks.num; i++)
  2070. free_track(&mux->tracks.array[i]);
  2071. free_track(mux->chapter_track);
  2072. bfree(mux->chapter_track);
  2073. da_free(mux->tracks);
  2074. bfree(mux);
  2075. }
  2076. bool mp4_mux_submit_packet(struct mp4_mux *mux, struct encoder_packet *pkt)
  2077. {
  2078. struct mp4_track *track = NULL;
  2079. struct encoder_packet parsed_packet;
  2080. enum obs_encoder_type type = pkt->type;
  2081. bool fragment_ready = mux->next_frag_pts > 0;
  2082. for (size_t i = 0; i < mux->tracks.num; i++) {
  2083. struct mp4_track *tmp = &mux->tracks.array[i];
  2084. fragment_ready = fragment_ready &&
  2085. tmp->last_pts_usec >= mux->next_frag_pts;
  2086. if (tmp->encoder == pkt->encoder)
  2087. track = tmp;
  2088. }
  2089. if (!track) {
  2090. warn("Could not find track for packet of type %s with "
  2091. "track id %zu!",
  2092. type == OBS_ENCODER_VIDEO ? "video" : "audio",
  2093. pkt->track_idx);
  2094. return false;
  2095. }
  2096. /* If all tracks have caught up to the keyframe we want to fragment on,
  2097. * flush the current fragment to disk. */
  2098. if (fragment_ready)
  2099. mp4_flush_fragment(mux);
  2100. if (type == OBS_ENCODER_AUDIO) {
  2101. obs_encoder_packet_ref(&parsed_packet, pkt);
  2102. } else {
  2103. if (track->codec == CODEC_H264)
  2104. obs_parse_avc_packet(&parsed_packet, pkt);
  2105. else if (track->codec == CODEC_HEVC)
  2106. obs_parse_hevc_packet(&parsed_packet, pkt);
  2107. else if (track->codec == CODEC_AV1)
  2108. obs_parse_av1_packet(&parsed_packet, pkt);
  2109. /* Set fragmentation PTS if packet is keyframe and PTS > 0 */
  2110. if (parsed_packet.keyframe && parsed_packet.pts > 0) {
  2111. mux->next_frag_pts = packet_pts_usec(&parsed_packet);
  2112. }
  2113. }
  2114. track_insert_packet(track, &parsed_packet);
  2115. return true;
  2116. }
  2117. bool mp4_mux_add_chapter(struct mp4_mux *mux, int64_t dts_usec,
  2118. const char *name)
  2119. {
  2120. if (dts_usec < 0)
  2121. return false;
  2122. if (!mux->chapter_track)
  2123. add_chapter_track(mux);
  2124. /* To work correctly there needs to be a chapter at PTS 0,
  2125. * create that here if necessary. */
  2126. if (dts_usec > 0 && mux->chapter_track->packets.size == 0) {
  2127. mp4_mux_add_chapter(mux, 0,
  2128. obs_module_text("MP4Output.StartChapter"));
  2129. }
  2130. /* Create packets that will be muxed on final flush */
  2131. struct encoder_packet pkt;
  2132. mp4_create_chapter_pkt(&pkt, dts_usec, name);
  2133. track_insert_packet(mux->chapter_track, &pkt);
  2134. return true;
  2135. }
  2136. bool mp4_mux_finalise(struct mp4_mux *mux)
  2137. {
  2138. struct serializer *s = mux->serializer;
  2139. /* Flush remaining audio/video samples as final fragment. */
  2140. info("Flushing final fragment...");
  2141. /* Set target PTS to zero to indicate that we want to flush all
  2142. * the remaining packets */
  2143. mux->next_frag_pts = 0;
  2144. mp4_flush_fragment(mux);
  2145. info("Number of fragments: %u", mux->fragments_written);
  2146. if (mux->flags & MP4_SKIP_FINALISATION) {
  2147. warn("Skipping MP4 finalization!");
  2148. return true;
  2149. }
  2150. int64_t data_end = serializer_get_pos(s);
  2151. /* ---------------------------------------- */
  2152. /* Write full moov box */
  2153. /* Use array serializer for moov data as this will do a lot
  2154. * of seeks to write size values of variable-size boxes. */
  2155. struct serializer fs;
  2156. struct array_output_data ao;
  2157. array_output_serializer_init(&fs, &ao);
  2158. mux->serializer = &fs;
  2159. mp4_write_moov(mux, false);
  2160. s_write(s, ao.bytes.array, ao.bytes.num);
  2161. info("Full moov size: %zu KiB", ao.bytes.num / 1024);
  2162. mux->serializer = s; // restore real serializer
  2163. array_output_serializer_free(&ao);
  2164. /* ---------------------------------------- */
  2165. /* Overwrite file header (ftyp + free/moov) */
  2166. serializer_seek(s, 0, SERIALIZE_SEEK_START);
  2167. mp4_write_ftyp(mux, false);
  2168. size_t data_size = data_end - mux->placeholder_offset;
  2169. serializer_seek(s, (int64_t)mux->placeholder_offset,
  2170. SERIALIZE_SEEK_START);
  2171. /* If data is more than 4 GiB the mdat header becomes 16 bytes, hence
  2172. * why we create a 16-byte placeholder "free" box at the start. */
  2173. if (data_size > UINT32_MAX) {
  2174. s_wb32(s, 1); // 1 = use "largesize" field instead
  2175. s_write(s, "mdat", 4);
  2176. s_wb64(s, data_size); // largesize (64-bit)
  2177. } else {
  2178. s_wb32(s, (uint32_t)data_size);
  2179. s_write(s, "mdat", 4);
  2180. }
  2181. info("Final mdat size: %zu KiB", data_size / 1024);
  2182. return true;
  2183. }