encoder.cpp 29 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202
  1. #include <util/darray.h>
  2. #include <util/dstr.hpp>
  3. #include <obs-module.h>
  4. #include <initializer_list>
  5. #include <memory>
  6. #include <vector>
  7. #ifndef _WIN32
  8. #include <AudioToolbox/AudioToolbox.h>
  9. #endif
  10. #define CA_LOG(level, format, ...) \
  11. blog(level, "[CoreAudio encoder]: " format, ##__VA_ARGS__)
  12. #define CA_LOG_ENCODER(format_name, encoder, level, format, ...) \
  13. blog(level, "[CoreAudio %s: '%s']: " format, \
  14. format_name, obs_encoder_get_name(encoder), \
  15. ##__VA_ARGS__)
  16. #define CA_BLOG(level, format, ...) \
  17. CA_LOG_ENCODER(ca->format_name, ca->encoder, level, format, \
  18. ##__VA_ARGS__)
  19. #define CA_CO_LOG(level, format, ...) \
  20. do { \
  21. if (ca) \
  22. CA_BLOG(level, format, ##__VA_ARGS__); \
  23. else \
  24. CA_LOG(level, format, ##__VA_ARGS__); \
  25. } while (false)
  26. #ifdef _WIN32
  27. #include "windows-imports.h"
  28. #endif
  29. using namespace std;
  30. namespace {
  31. struct asbd_builder {
  32. AudioStreamBasicDescription asbd;
  33. asbd_builder &sample_rate(Float64 rate)
  34. {
  35. asbd.mSampleRate = rate;
  36. return *this;
  37. }
  38. asbd_builder &format_id(UInt32 format)
  39. {
  40. asbd.mFormatID = format;
  41. return *this;
  42. }
  43. asbd_builder &format_flags(UInt32 flags)
  44. {
  45. asbd.mFormatFlags = flags;
  46. return *this;
  47. }
  48. asbd_builder &bytes_per_packet(UInt32 bytes)
  49. {
  50. asbd.mBytesPerPacket = bytes;
  51. return *this;
  52. }
  53. asbd_builder &frames_per_packet(UInt32 frames)
  54. {
  55. asbd.mFramesPerPacket = frames;
  56. return *this;
  57. }
  58. asbd_builder &bytes_per_frame(UInt32 bytes)
  59. {
  60. asbd.mBytesPerFrame = bytes;
  61. return *this;
  62. }
  63. asbd_builder &channels_per_frame(UInt32 channels)
  64. {
  65. asbd.mChannelsPerFrame = channels;
  66. return *this;
  67. }
  68. asbd_builder &bits_per_channel(UInt32 bits)
  69. {
  70. asbd.mBitsPerChannel = bits;
  71. return *this;
  72. }
  73. };
  74. struct ca_encoder {
  75. obs_encoder_t *encoder;
  76. const char *format_name;
  77. UInt32 format_id;
  78. const initializer_list<UInt32> *allowed_formats;
  79. AudioConverterRef converter;
  80. size_t output_buffer_size;
  81. vector<uint8_t> output_buffer;
  82. size_t out_frames_per_packet;
  83. size_t in_packets;
  84. size_t in_frame_size;
  85. size_t in_bytes_required;
  86. DARRAY(uint8_t) input_buffer;
  87. size_t bytes_read;
  88. uint64_t total_samples;
  89. uint64_t samples_per_second;
  90. vector<uint8_t> extra_data;
  91. size_t channels;
  92. ~ca_encoder()
  93. {
  94. if (converter)
  95. AudioConverterDispose(converter);
  96. da_free(input_buffer);
  97. }
  98. };
  99. typedef struct ca_encoder ca_encoder;
  100. }
  101. namespace std {
  102. #ifndef _WIN32
  103. template <>
  104. struct default_delete<remove_pointer<CFErrorRef>::type> {
  105. void operator()(remove_pointer<CFErrorRef>::type *err)
  106. {
  107. CFRelease(err);
  108. }
  109. };
  110. template <>
  111. struct default_delete<remove_pointer<CFStringRef>::type> {
  112. void operator()(remove_pointer<CFStringRef>::type *str)
  113. {
  114. CFRelease(str);
  115. }
  116. };
  117. #endif
  118. template <>
  119. struct default_delete<remove_pointer<AudioConverterRef>::type> {
  120. void operator()(AudioConverterRef converter)
  121. {
  122. AudioConverterDispose(converter);
  123. }
  124. };
  125. }
  126. template <typename T>
  127. using cf_ptr = unique_ptr<typename remove_pointer<T>::type>;
  128. #ifndef _MSC_VER
  129. __attribute__((__format__(__printf__, 3, 4)))
  130. #endif
  131. static void log_to_dstr(DStr &str, ca_encoder *ca, const char *fmt, ...)
  132. {
  133. dstr prev_str = *static_cast<dstr*>(str);
  134. va_list args;
  135. va_start(args, fmt);
  136. dstr_vcatf(str, fmt, args);
  137. va_end(args);
  138. if (str->array)
  139. return;
  140. char array[4096];
  141. va_start(args, fmt);
  142. vsnprintf(array, 4096, fmt, args);
  143. va_end(args);
  144. array[4095] = 0;
  145. if (!prev_str.array && !prev_str.len)
  146. CA_CO_LOG(LOG_ERROR, "Could not allocate buffer for logging:"
  147. "\n'%s'", array);
  148. else
  149. CA_CO_LOG(LOG_ERROR, "Could not allocate buffer for logging:"
  150. "\n'%s'\nPrevious log entries:\n%s",
  151. array, prev_str.array);
  152. bfree(prev_str.array);
  153. }
  154. static const char *flush_log(DStr &log)
  155. {
  156. if (!log->array || !log->len)
  157. return "";
  158. if (log->array[log->len - 1] == '\n') {
  159. log->array[log->len - 1] = 0; //Get rid of last newline
  160. log->len -= 1;
  161. }
  162. return log->array;
  163. }
  164. #define CA_CO_DLOG_(level, format) \
  165. CA_CO_LOG(level, format "%s%s", \
  166. log->array ? ":\n" : "", flush_log(log))
  167. #define CA_CO_DLOG(level, format, ...) \
  168. CA_CO_LOG(level, format "%s%s", ##__VA_ARGS__, \
  169. log->array ? ":\n" : "", flush_log(log))
  170. static const char *aac_get_name(void)
  171. {
  172. return obs_module_text("CoreAudioAAC");
  173. }
  174. static const char *code_to_str(OSStatus code)
  175. {
  176. switch (code) {
  177. #define HANDLE_CODE(c) case c: return #c
  178. HANDLE_CODE(kAudio_UnimplementedError);
  179. HANDLE_CODE(kAudio_FileNotFoundError);
  180. HANDLE_CODE(kAudio_FilePermissionError);
  181. HANDLE_CODE(kAudio_TooManyFilesOpenError);
  182. HANDLE_CODE(kAudio_BadFilePathError);
  183. HANDLE_CODE(kAudio_ParamError);
  184. HANDLE_CODE(kAudio_MemFullError);
  185. HANDLE_CODE(kAudioConverterErr_FormatNotSupported);
  186. HANDLE_CODE(kAudioConverterErr_OperationNotSupported);
  187. HANDLE_CODE(kAudioConverterErr_PropertyNotSupported);
  188. HANDLE_CODE(kAudioConverterErr_InvalidInputSize);
  189. HANDLE_CODE(kAudioConverterErr_InvalidOutputSize);
  190. HANDLE_CODE(kAudioConverterErr_UnspecifiedError);
  191. HANDLE_CODE(kAudioConverterErr_BadPropertySizeError);
  192. HANDLE_CODE(kAudioConverterErr_RequiresPacketDescriptionsError);
  193. HANDLE_CODE(kAudioConverterErr_InputSampleRateOutOfRange);
  194. HANDLE_CODE(kAudioConverterErr_OutputSampleRateOutOfRange);
  195. #undef HANDLE_CODE
  196. default: break;
  197. }
  198. return NULL;
  199. }
  200. static DStr osstatus_to_dstr(OSStatus code)
  201. {
  202. DStr result;
  203. #ifndef _WIN32
  204. cf_ptr<CFErrorRef> err{CFErrorCreate(kCFAllocatorDefault,
  205. kCFErrorDomainOSStatus, code, NULL)};
  206. cf_ptr<CFStringRef> str{CFErrorCopyDescription(err.get())};
  207. CFIndex length = CFStringGetLength(str.get());
  208. CFIndex max_size = CFStringGetMaximumSizeForEncoding(length,
  209. kCFStringEncodingUTF8);
  210. dstr_ensure_capacity(result, max_size);
  211. if (result->array && CFStringGetCString(str.get(), result->array,
  212. max_size, kCFStringEncodingUTF8)) {
  213. dstr_resize(result, strlen(result->array));
  214. return result;
  215. }
  216. #endif
  217. const char *code_str = code_to_str(code);
  218. dstr_printf(result, "%s%s%d%s",
  219. code_str ? code_str : "",
  220. code_str ? " (" : "",
  221. static_cast<int>(code),
  222. code_str ? ")" : "");
  223. return result;
  224. }
  225. static void log_osstatus(int log_level, ca_encoder *ca, const char *context,
  226. OSStatus code)
  227. {
  228. DStr str = osstatus_to_dstr(code);
  229. if (ca)
  230. CA_BLOG(log_level, "Error in %s: %s", context, str->array);
  231. else
  232. CA_LOG(log_level, "Error in %s: %s", context, str->array);
  233. }
  234. static const char *format_id_to_str(UInt32 format_id)
  235. {
  236. #define FORMAT_TO_STR(x) case x: return #x
  237. switch (format_id) {
  238. FORMAT_TO_STR(kAudioFormatLinearPCM);
  239. FORMAT_TO_STR(kAudioFormatAC3);
  240. FORMAT_TO_STR(kAudioFormat60958AC3);
  241. FORMAT_TO_STR(kAudioFormatAppleIMA4);
  242. FORMAT_TO_STR(kAudioFormatMPEG4AAC);
  243. FORMAT_TO_STR(kAudioFormatMPEG4CELP);
  244. FORMAT_TO_STR(kAudioFormatMPEG4HVXC);
  245. FORMAT_TO_STR(kAudioFormatMPEG4TwinVQ);
  246. FORMAT_TO_STR(kAudioFormatMACE3);
  247. FORMAT_TO_STR(kAudioFormatMACE6);
  248. FORMAT_TO_STR(kAudioFormatULaw);
  249. FORMAT_TO_STR(kAudioFormatALaw);
  250. FORMAT_TO_STR(kAudioFormatQDesign);
  251. FORMAT_TO_STR(kAudioFormatQDesign2);
  252. FORMAT_TO_STR(kAudioFormatQUALCOMM);
  253. FORMAT_TO_STR(kAudioFormatMPEGLayer1);
  254. FORMAT_TO_STR(kAudioFormatMPEGLayer2);
  255. FORMAT_TO_STR(kAudioFormatMPEGLayer3);
  256. FORMAT_TO_STR(kAudioFormatTimeCode);
  257. FORMAT_TO_STR(kAudioFormatMIDIStream);
  258. FORMAT_TO_STR(kAudioFormatParameterValueStream);
  259. FORMAT_TO_STR(kAudioFormatAppleLossless);
  260. FORMAT_TO_STR(kAudioFormatMPEG4AAC_HE);
  261. FORMAT_TO_STR(kAudioFormatMPEG4AAC_LD);
  262. FORMAT_TO_STR(kAudioFormatMPEG4AAC_ELD);
  263. FORMAT_TO_STR(kAudioFormatMPEG4AAC_ELD_SBR);
  264. FORMAT_TO_STR(kAudioFormatMPEG4AAC_HE_V2);
  265. FORMAT_TO_STR(kAudioFormatMPEG4AAC_Spatial);
  266. FORMAT_TO_STR(kAudioFormatAMR);
  267. FORMAT_TO_STR(kAudioFormatAudible);
  268. FORMAT_TO_STR(kAudioFormatiLBC);
  269. FORMAT_TO_STR(kAudioFormatDVIIntelIMA);
  270. FORMAT_TO_STR(kAudioFormatMicrosoftGSM);
  271. FORMAT_TO_STR(kAudioFormatAES3);
  272. }
  273. #undef FORMAT_TO_STR
  274. return "Unknown format";
  275. }
  276. static void aac_destroy(void *data)
  277. {
  278. ca_encoder *ca = static_cast<ca_encoder*>(data);
  279. delete ca;
  280. }
  281. template <typename Func>
  282. static bool query_converter_property_raw(DStr &log, ca_encoder *ca,
  283. AudioFormatPropertyID property,
  284. const char *get_property_info, const char *get_property,
  285. AudioConverterRef converter, Func &&func)
  286. {
  287. UInt32 size = 0;
  288. OSStatus code = AudioConverterGetPropertyInfo(converter, property,
  289. &size, nullptr);
  290. if (code) {
  291. log_to_dstr(log, ca, "%s: %s\n", get_property_info,
  292. osstatus_to_dstr(code)->array);
  293. return false;
  294. }
  295. if (!size) {
  296. log_to_dstr(log, ca, "%s returned 0 size\n", get_property_info);
  297. return false;
  298. }
  299. vector<uint8_t> buffer;
  300. try {
  301. buffer.resize(size);
  302. } catch (...) {
  303. log_to_dstr(log, ca, "Failed to allocate %u bytes for %s\n",
  304. static_cast<uint32_t>(size), get_property);
  305. return false;
  306. }
  307. code = AudioConverterGetProperty(converter, property, &size,
  308. buffer.data());
  309. if (code) {
  310. log_to_dstr(log, ca, "%s: %s\n", get_property,
  311. osstatus_to_dstr(code)->array);
  312. return false;
  313. }
  314. func(size, static_cast<void*>(buffer.data()));
  315. return true;
  316. }
  317. #define EXPAND_CONVERTER_NAMES(x) x, \
  318. "AudioConverterGetPropertyInfo(" #x ")", \
  319. "AudioConverterGetProperty(" #x ")"
  320. template <typename Func>
  321. static bool enumerate_bitrates(DStr &log, ca_encoder *ca,
  322. AudioConverterRef converter, Func &&func)
  323. {
  324. auto helper = [&](UInt32 size, void *data)
  325. {
  326. auto range = static_cast<AudioValueRange*>(data);
  327. size_t num_ranges = size / sizeof(AudioValueRange);
  328. for (size_t i = 0; i < num_ranges; i++)
  329. func(static_cast<UInt32>(range[i].mMinimum),
  330. static_cast<UInt32>(range[i].mMaximum));
  331. };
  332. return query_converter_property_raw(log, ca, EXPAND_CONVERTER_NAMES(
  333. kAudioConverterApplicableEncodeBitRates),
  334. converter, helper);
  335. }
  336. typedef void (*bitrate_enumeration_func)(void *data, UInt32 min, UInt32 max);
  337. static bool enumerate_bitrates(ca_encoder *ca, AudioConverterRef converter,
  338. bitrate_enumeration_func enum_func, void *data)
  339. {
  340. if (!converter && ca)
  341. converter = ca->converter;
  342. UInt32 size;
  343. OSStatus code = AudioConverterGetPropertyInfo(converter,
  344. kAudioConverterApplicableEncodeBitRates,
  345. &size, NULL);
  346. if (code) {
  347. log_osstatus(LOG_WARNING, ca,
  348. "AudioConverterGetPropertyInfo(bitrates)",
  349. code);
  350. return false;
  351. }
  352. if (!size) {
  353. if (ca)
  354. CA_BLOG(LOG_WARNING, "Query for applicable bitrates "
  355. "returned 0 size");
  356. else
  357. CA_LOG(LOG_WARNING, "Query for applicable bitrates "
  358. "returned 0 size");
  359. return false;
  360. }
  361. size_t num_bitrates = (size + sizeof(AudioValueRange) - 1) /
  362. sizeof(AudioValueRange);
  363. vector<AudioValueRange> bitrates;
  364. try {
  365. bitrates.resize(num_bitrates);
  366. } catch (...) {
  367. if (ca)
  368. CA_BLOG(LOG_WARNING, "Could not allocate buffer while "
  369. "enumerating bitrates");
  370. else
  371. CA_LOG(LOG_WARNING, "Could not allocate buffer while "
  372. "enumerating bitrates");
  373. return false;
  374. }
  375. code = AudioConverterGetProperty(converter,
  376. kAudioConverterApplicableEncodeBitRates,
  377. &size, bitrates.data());
  378. if (code) {
  379. log_osstatus(LOG_WARNING, ca,
  380. "AudioConverterGetProperty(bitrates)", code);
  381. return false;
  382. }
  383. for (size_t i = 0; i < num_bitrates; i++)
  384. enum_func(data, (UInt32)bitrates[i].mMinimum,
  385. (UInt32)bitrates[i].mMaximum);
  386. return num_bitrates > 0;
  387. }
  388. static bool bitrate_valid(DStr &log, ca_encoder *ca,
  389. AudioConverterRef converter, UInt32 bitrate)
  390. {
  391. bool valid = false;
  392. auto helper = [&](UInt32 min_, UInt32 max_)
  393. {
  394. if (min_ == bitrate || max_ == bitrate)
  395. valid = true;
  396. };
  397. enumerate_bitrates(log, ca, converter, helper);
  398. return valid;
  399. }
  400. static bool create_encoder(DStr &log, ca_encoder *ca,
  401. AudioStreamBasicDescription *in,
  402. AudioStreamBasicDescription *out,
  403. UInt32 format_id, UInt32 bitrate, UInt32 rate_control)
  404. {
  405. #define STATUS_CHECK(c) \
  406. code = c; \
  407. if (code) { \
  408. log_to_dstr(log, ca, #c " returned %s", \
  409. osstatus_to_dstr(code)->array); \
  410. return false; \
  411. }
  412. auto out_ = asbd_builder()
  413. .sample_rate((Float64)ca->samples_per_second)
  414. .channels_per_frame((UInt32)ca->channels)
  415. .format_id(format_id)
  416. .asbd;
  417. UInt32 size = sizeof(*out);
  418. OSStatus code;
  419. STATUS_CHECK(AudioFormatGetProperty(kAudioFormatProperty_FormatInfo,
  420. 0, NULL, &size, &out_));
  421. *out = out_;
  422. STATUS_CHECK(AudioConverterNew(in, out, &ca->converter))
  423. STATUS_CHECK(AudioConverterSetProperty(ca->converter,
  424. kAudioCodecPropertyBitRateControlMode,
  425. sizeof(rate_control), &rate_control));
  426. if (!bitrate_valid(log, ca, ca->converter, bitrate)) {
  427. log_to_dstr(log, ca, "Encoder does not support bitrate %u "
  428. "for format %s (0x%x)\n",
  429. (uint32_t)bitrate, format_id_to_str(format_id),
  430. (uint32_t)format_id);
  431. return false;
  432. }
  433. ca->format_id = format_id;
  434. return true;
  435. #undef STATUS_CHECK
  436. }
  437. static const initializer_list<UInt32> aac_formats = {
  438. kAudioFormatMPEG4AAC_HE_V2,
  439. kAudioFormatMPEG4AAC_HE,
  440. kAudioFormatMPEG4AAC,
  441. };
  442. static const initializer_list<UInt32> aac_lc_formats = {
  443. kAudioFormatMPEG4AAC,
  444. };
  445. static void *aac_create(obs_data_t *settings, obs_encoder_t *encoder)
  446. {
  447. #define STATUS_CHECK(c) \
  448. code = c; \
  449. if (code) { \
  450. log_osstatus(LOG_ERROR, ca.get(), #c, code); \
  451. return nullptr; \
  452. }
  453. UInt32 bitrate = (UInt32)obs_data_get_int(settings, "bitrate") * 1000;
  454. if (!bitrate) {
  455. CA_LOG_ENCODER("AAC", encoder, LOG_ERROR,
  456. "Invalid bitrate specified");
  457. return NULL;
  458. }
  459. const enum audio_format format = AUDIO_FORMAT_FLOAT;
  460. if (is_audio_planar(format)) {
  461. CA_LOG_ENCODER("AAC", encoder, LOG_ERROR,
  462. "Got non-interleaved audio format %d", format);
  463. return NULL;
  464. }
  465. unique_ptr<ca_encoder> ca;
  466. try {
  467. ca.reset(new ca_encoder());
  468. } catch (...) {
  469. CA_LOG_ENCODER("AAC", encoder, LOG_ERROR,
  470. "Could not allocate encoder");
  471. return nullptr;
  472. }
  473. ca->encoder = encoder;
  474. ca->format_name = "AAC";
  475. audio_t *audio = obs_encoder_audio(encoder);
  476. const struct audio_output_info *aoi = audio_output_get_info(audio);
  477. ca->channels = audio_output_get_channels(audio);
  478. ca->samples_per_second = audio_output_get_sample_rate(audio);
  479. size_t bytes_per_frame = get_audio_size(format, aoi->speakers, 1);
  480. size_t bits_per_channel = get_audio_bytes_per_channel(format) * 8;
  481. auto in = asbd_builder()
  482. .sample_rate((Float64)ca->samples_per_second)
  483. .channels_per_frame((UInt32)ca->channels)
  484. .bytes_per_frame((UInt32)bytes_per_frame)
  485. .frames_per_packet(1)
  486. .bytes_per_packet((UInt32)(1 * bytes_per_frame))
  487. .bits_per_channel((UInt32)bits_per_channel)
  488. .format_id(kAudioFormatLinearPCM)
  489. .format_flags(kAudioFormatFlagsNativeEndian |
  490. kAudioFormatFlagIsPacked |
  491. kAudioFormatFlagIsFloat |
  492. 0)
  493. .asbd;
  494. AudioStreamBasicDescription out;
  495. UInt32 rate_control = kAudioCodecBitRateControlMode_Constant;
  496. if (obs_data_get_bool(settings, "allow he-aac")) {
  497. ca->allowed_formats = &aac_formats;
  498. } else {
  499. ca->allowed_formats = &aac_lc_formats;
  500. }
  501. DStr log;
  502. bool encoder_created = false;
  503. for (UInt32 format_id : *ca->allowed_formats) {
  504. log_to_dstr(log, ca.get(), "Trying format %s (0x%x)\n",
  505. format_id_to_str(format_id),
  506. (uint32_t)format_id);
  507. if (!create_encoder(log, ca.get(), &in, &out, format_id,
  508. bitrate, rate_control))
  509. continue;
  510. encoder_created = true;
  511. break;
  512. }
  513. if (!encoder_created) {
  514. CA_CO_DLOG(LOG_ERROR, "Could not create encoder for "
  515. "selected format%s",
  516. ca->allowed_formats->size() == 1 ? "" : "s");
  517. return nullptr;
  518. }
  519. if (log->len)
  520. CA_CO_DLOG_(LOG_DEBUG, "Encoder created");
  521. OSStatus code;
  522. UInt32 converter_quality = kAudioConverterQuality_Max;
  523. STATUS_CHECK(AudioConverterSetProperty(ca->converter,
  524. kAudioConverterCodecQuality,
  525. sizeof(converter_quality), &converter_quality));
  526. STATUS_CHECK(AudioConverterSetProperty(ca->converter,
  527. kAudioConverterEncodeBitRate,
  528. sizeof(bitrate), &bitrate));
  529. UInt32 size = sizeof(in);
  530. STATUS_CHECK(AudioConverterGetProperty(ca->converter,
  531. kAudioConverterCurrentInputStreamDescription,
  532. &size, &in));
  533. size = sizeof(out);
  534. STATUS_CHECK(AudioConverterGetProperty(ca->converter,
  535. kAudioConverterCurrentOutputStreamDescription,
  536. &size, &out));
  537. ca->in_frame_size = in.mBytesPerFrame;
  538. ca->in_packets = out.mFramesPerPacket / in.mFramesPerPacket;
  539. ca->in_bytes_required = ca->in_packets * ca->in_frame_size;
  540. ca->out_frames_per_packet = out.mFramesPerPacket;
  541. da_init(ca->input_buffer);
  542. ca->output_buffer_size = out.mBytesPerPacket;
  543. if (out.mBytesPerPacket == 0) {
  544. UInt32 max_packet_size = 0;
  545. size = sizeof(max_packet_size);
  546. code = AudioConverterGetProperty(ca->converter,
  547. kAudioConverterPropertyMaximumOutputPacketSize,
  548. &size, &max_packet_size);
  549. if (code) {
  550. log_osstatus(LOG_WARNING, ca.get(),
  551. "AudioConverterGetProperty(PacketSz)",
  552. code);
  553. ca->output_buffer_size = 32768;
  554. } else {
  555. ca->output_buffer_size = max_packet_size;
  556. }
  557. }
  558. try {
  559. ca->output_buffer.resize(ca->output_buffer_size);
  560. } catch (...) {
  561. CA_BLOG(LOG_ERROR, "Failed to allocate output buffer");
  562. return nullptr;
  563. }
  564. const char *format_name =
  565. out.mFormatID == kAudioFormatMPEG4AAC_HE_V2 ? "HE-AAC v2" :
  566. out.mFormatID == kAudioFormatMPEG4AAC_HE ? "HE-AAC" : "AAC";
  567. CA_BLOG(LOG_INFO, "settings:\n"
  568. "\tmode: %s\n"
  569. "\tbitrate: %u\n"
  570. "\tsample rate: %llu\n"
  571. "\tcbr: %s\n"
  572. "\toutput buffer: %lu",
  573. format_name, (unsigned int)bitrate / 1000,
  574. ca->samples_per_second,
  575. rate_control == kAudioCodecBitRateControlMode_Constant ?
  576. "on" : "off",
  577. (unsigned long)ca->output_buffer_size);
  578. return ca.release();
  579. }
  580. static OSStatus complex_input_data_proc(AudioConverterRef inAudioConverter,
  581. UInt32 *ioNumberDataPackets, AudioBufferList *ioData,
  582. AudioStreamPacketDescription **outDataPacketDescription,
  583. void *inUserData)
  584. {
  585. UNUSED_PARAMETER(inAudioConverter);
  586. UNUSED_PARAMETER(outDataPacketDescription);
  587. ca_encoder *ca = static_cast<ca_encoder*>(inUserData);
  588. if (ca->bytes_read) {
  589. da_erase_range(ca->input_buffer, 0, ca->bytes_read);
  590. ca->bytes_read = 0;
  591. }
  592. if (ca->input_buffer.num < ca->in_bytes_required) {
  593. *ioNumberDataPackets = 0;
  594. ioData->mBuffers[0].mData = NULL;
  595. return 1;
  596. }
  597. *ioNumberDataPackets =
  598. (UInt32)(ca->in_bytes_required / ca->in_frame_size);
  599. ioData->mNumberBuffers = 1;
  600. ioData->mBuffers[0].mData = ca->input_buffer.array;
  601. ioData->mBuffers[0].mNumberChannels = (UInt32)ca->channels;
  602. ioData->mBuffers[0].mDataByteSize = (UInt32)ca->in_bytes_required;
  603. ca->bytes_read += ca->in_packets * ca->in_frame_size;
  604. return 0;
  605. }
  606. #ifdef _MSC_VER
  607. // disable warning that recommends if ((foo = bar > 0) == false) over
  608. // if (!(foo = bar > 0))
  609. #pragma warning(push)
  610. #pragma warning(disable: 4706)
  611. #endif
  612. static bool aac_encode(void *data, struct encoder_frame *frame,
  613. struct encoder_packet *packet, bool *received_packet)
  614. {
  615. ca_encoder *ca = static_cast<ca_encoder*>(data);
  616. da_push_back_array(ca->input_buffer, frame->data[0],
  617. frame->linesize[0]);
  618. if ((ca->input_buffer.num - ca->bytes_read) < ca->in_bytes_required)
  619. return true;
  620. UInt32 packets = 1;
  621. AudioBufferList buffer_list = { 0 };
  622. buffer_list.mNumberBuffers = 1;
  623. buffer_list.mBuffers[0].mNumberChannels = (UInt32)ca->channels;
  624. buffer_list.mBuffers[0].mDataByteSize = (UInt32)ca->output_buffer_size;
  625. buffer_list.mBuffers[0].mData = ca->output_buffer.data();
  626. AudioStreamPacketDescription out_desc = { 0 };
  627. OSStatus code = AudioConverterFillComplexBuffer(ca->converter,
  628. complex_input_data_proc, ca, &packets,
  629. &buffer_list, &out_desc);
  630. if (code && code != 1) {
  631. log_osstatus(LOG_ERROR, ca, "AudioConverterFillComplexBuffer",
  632. code);
  633. return false;
  634. }
  635. if (!(*received_packet = packets > 0))
  636. return true;
  637. packet->pts = ca->total_samples;
  638. packet->dts = ca->total_samples;
  639. packet->timebase_num = 1;
  640. packet->timebase_den = (uint32_t)ca->samples_per_second;
  641. packet->type = OBS_ENCODER_AUDIO;
  642. packet->size = out_desc.mDataByteSize;
  643. packet->data =
  644. (uint8_t*)buffer_list.mBuffers[0].mData + out_desc.mStartOffset;
  645. ca->total_samples += ca->bytes_read / ca->in_frame_size;
  646. return true;
  647. }
  648. #ifdef _MSC_VER
  649. #pragma warning(pop)
  650. #endif
  651. static void aac_audio_info(void *data, struct audio_convert_info *info)
  652. {
  653. UNUSED_PARAMETER(data);
  654. info->format = AUDIO_FORMAT_FLOAT;
  655. }
  656. static size_t aac_frame_size(void *data)
  657. {
  658. ca_encoder *ca = static_cast<ca_encoder*>(data);
  659. return ca->out_frames_per_packet;
  660. }
  661. /* The following code was extracted from encca_aac.c in HandBrake's libhb */
  662. #define MP4ESDescrTag 0x03
  663. #define MP4DecConfigDescrTag 0x04
  664. #define MP4DecSpecificDescrTag 0x05
  665. // based off of mov_mp4_read_descr_len from mov.c in ffmpeg's libavformat
  666. static int read_descr_len(uint8_t **buffer)
  667. {
  668. int len = 0;
  669. int count = 4;
  670. while (count--)
  671. {
  672. int c = *(*buffer)++;
  673. len = (len << 7) | (c & 0x7f);
  674. if (!(c & 0x80))
  675. break;
  676. }
  677. return len;
  678. }
  679. // based off of mov_mp4_read_descr from mov.c in ffmpeg's libavformat
  680. static int read_descr(uint8_t **buffer, int *tag)
  681. {
  682. *tag = *(*buffer)++;
  683. return read_descr_len(buffer);
  684. }
  685. // based off of mov_read_esds from mov.c in ffmpeg's libavformat
  686. static void read_esds_desc_ext(uint8_t* desc_ext, vector<uint8_t> &buffer,
  687. bool version_flags)
  688. {
  689. uint8_t *esds = desc_ext;
  690. int tag, len;
  691. if (version_flags)
  692. esds += 4; // version + flags
  693. read_descr(&esds, &tag);
  694. esds += 2; // ID
  695. if (tag == MP4ESDescrTag)
  696. esds++; // priority
  697. read_descr(&esds, &tag);
  698. if (tag == MP4DecConfigDescrTag) {
  699. esds++; // object type id
  700. esds++; // stream type
  701. esds += 3; // buffer size db
  702. esds += 4; // max bitrate
  703. esds += 4; // average bitrate
  704. len = read_descr(&esds, &tag);
  705. if (tag == MP4DecSpecificDescrTag)
  706. try {
  707. buffer.assign(esds, esds + len);
  708. } catch (...) {
  709. //leave buffer empty
  710. }
  711. }
  712. }
  713. /* extracted code ends here */
  714. static void query_extra_data(ca_encoder *ca)
  715. {
  716. UInt32 size = 0;
  717. OSStatus code;
  718. code = AudioConverterGetPropertyInfo(ca->converter,
  719. kAudioConverterCompressionMagicCookie,
  720. &size, NULL);
  721. if (code) {
  722. log_osstatus(LOG_ERROR, ca,
  723. "AudioConverterGetPropertyInfo(magic_cookie)",
  724. code);
  725. return;
  726. }
  727. if (!size) {
  728. CA_BLOG(LOG_WARNING, "Got 0 data size info for magic_cookie");
  729. return;
  730. }
  731. vector<uint8_t> extra_data;
  732. try {
  733. extra_data.resize(size);
  734. } catch (...) {
  735. CA_BLOG(LOG_WARNING, "Could not allocate extra data buffer");
  736. return;
  737. }
  738. code = AudioConverterGetProperty(ca->converter,
  739. kAudioConverterCompressionMagicCookie,
  740. &size, extra_data.data());
  741. if (code) {
  742. log_osstatus(LOG_ERROR, ca,
  743. "AudioConverterGetProperty(magic_cookie)",
  744. code);
  745. return;
  746. }
  747. if (!size) {
  748. CA_BLOG(LOG_WARNING, "Got 0 data size for magic_cookie");
  749. return;
  750. }
  751. read_esds_desc_ext(extra_data.data(), ca->extra_data, false);
  752. }
  753. static bool aac_extra_data(void *data, uint8_t **extra_data, size_t *size)
  754. {
  755. ca_encoder *ca = static_cast<ca_encoder*>(data);
  756. if (!ca->extra_data.size())
  757. query_extra_data(ca);
  758. if (!ca->extra_data.size())
  759. return false;
  760. *extra_data = ca->extra_data.data();
  761. *size = ca->extra_data.size();
  762. return true;
  763. }
  764. static asbd_builder fill_common_asbd_fields(asbd_builder builder,
  765. bool in=false)
  766. {
  767. UInt32 bytes_per_frame = 8;
  768. UInt32 channels = 2;
  769. UInt32 bits_per_channel = bytes_per_frame / channels * 8;
  770. builder.channels_per_frame(channels);
  771. if (in) {
  772. builder
  773. .bytes_per_frame(bytes_per_frame)
  774. .frames_per_packet(1)
  775. .bytes_per_packet(1 * bytes_per_frame)
  776. .bits_per_channel(bits_per_channel);
  777. }
  778. return builder;
  779. }
  780. static AudioStreamBasicDescription get_default_in_asbd()
  781. {
  782. return fill_common_asbd_fields(asbd_builder(), true)
  783. .sample_rate(44100)
  784. .format_id(kAudioFormatLinearPCM)
  785. .format_flags(kAudioFormatFlagsNativeEndian |
  786. kAudioFormatFlagIsPacked |
  787. kAudioFormatFlagIsFloat |
  788. 0)
  789. .asbd;
  790. }
  791. static asbd_builder get_default_out_asbd_builder()
  792. {
  793. return fill_common_asbd_fields(asbd_builder())
  794. .sample_rate(44100);
  795. }
  796. static cf_ptr<AudioConverterRef> get_converter(DStr &log, ca_encoder *ca,
  797. AudioStreamBasicDescription out,
  798. AudioStreamBasicDescription in = get_default_in_asbd())
  799. {
  800. UInt32 size = sizeof(out);
  801. OSStatus code;
  802. #define STATUS_CHECK(x) \
  803. code = x; \
  804. if (code) { \
  805. log_to_dstr(log, ca, "%s: %s\n", #x, \
  806. osstatus_to_dstr(code)->array); \
  807. return nullptr; \
  808. }
  809. STATUS_CHECK(AudioFormatGetProperty(kAudioFormatProperty_FormatInfo,
  810. 0, NULL, &size, &out));
  811. AudioConverterRef converter;
  812. STATUS_CHECK(AudioConverterNew(&in, &out, &converter));
  813. return cf_ptr<AudioConverterRef>{converter};
  814. #undef STATUS_CHECK
  815. }
  816. static AudioConverterRef get_default_converter(UInt32 format_id)
  817. {
  818. auto out = get_default_out_asbd_builder()
  819. .format_id(format_id)
  820. .asbd;
  821. DStr log;
  822. auto converter = get_converter(log, nullptr, out);
  823. if (!converter) {
  824. CA_LOG(LOG_ERROR, "Couldn't get default converter for format "
  825. "%s (0x%x):\n%s",
  826. format_id_to_str(format_id),
  827. static_cast<uint32_t>(format_id),
  828. flush_log(log));
  829. return nullptr;
  830. }
  831. return converter.release();
  832. }
  833. static AudioConverterRef aac_default_converter(void)
  834. {
  835. return get_default_converter(kAudioFormatMPEG4AAC);
  836. }
  837. static AudioConverterRef he_aac_default_converter(void)
  838. {
  839. return get_default_converter(kAudioFormatMPEG4AAC_HE);
  840. }
  841. struct find_matching_bitrate_helper {
  842. UInt32 bitrate;
  843. UInt32 best_match;
  844. int diff;
  845. };
  846. typedef struct find_matching_bitrate_helper find_matching_bitrate_helper;
  847. static void find_matching_bitrate_func(void *data, UInt32 min, UInt32 max)
  848. {
  849. find_matching_bitrate_helper *helper =
  850. static_cast<find_matching_bitrate_helper*>(data);
  851. int min_diff = abs((int)helper->bitrate - (int)min);
  852. int max_diff = abs((int)helper->bitrate - (int)max);
  853. if (min_diff < helper->diff) {
  854. helper->best_match = min;
  855. helper->diff = min_diff;
  856. }
  857. if (max_diff < helper->diff) {
  858. helper->best_match = max;
  859. helper->diff = max_diff;
  860. }
  861. }
  862. static UInt32 find_matching_bitrate(UInt32 bitrate)
  863. {
  864. find_matching_bitrate_helper helper;
  865. helper.bitrate = bitrate * 1000;
  866. helper.best_match = 0;
  867. helper.diff = INT_MAX;
  868. AudioConverterRef converter = aac_default_converter();
  869. if (!converter) {
  870. CA_LOG(LOG_ERROR, "Could not get converter to match "
  871. "default bitrate");
  872. return bitrate;
  873. }
  874. bool has_bitrates = enumerate_bitrates(NULL, converter,
  875. find_matching_bitrate_func, &helper);
  876. AudioConverterDispose(converter);
  877. if (!has_bitrates) {
  878. CA_LOG(LOG_ERROR, "No bitrates found while matching "
  879. "default bitrate");
  880. AudioConverterDispose(converter);
  881. return bitrate;
  882. }
  883. if (helper.best_match != helper.bitrate)
  884. CA_LOG(LOG_INFO, "Returning closest matching bitrate %u "
  885. "instead of requested bitrate %u",
  886. (uint32_t)helper.best_match / 1000,
  887. (uint32_t)bitrate);
  888. return helper.best_match / 1000;
  889. }
  890. static void aac_defaults(obs_data_t *settings)
  891. {
  892. obs_data_set_default_int(settings, "bitrate",
  893. find_matching_bitrate(128));
  894. obs_data_set_default_bool(settings, "allow he-aac", true);
  895. }
  896. struct add_bitrates_helper {
  897. DARRAY(UInt32) bitrates;
  898. };
  899. typedef struct add_bitrates_helper add_bitrates_helper;
  900. static void add_bitrates_func(void *data, UInt32 min, UInt32 max)
  901. {
  902. add_bitrates_helper *helper =
  903. static_cast<add_bitrates_helper*>(data);
  904. if (da_find(helper->bitrates, &min, 0) == DARRAY_INVALID)
  905. da_push_back(helper->bitrates, &min);
  906. if (da_find(helper->bitrates, &max, 0) == DARRAY_INVALID)
  907. da_push_back(helper->bitrates, &max);
  908. }
  909. static int bitrate_compare(const void *data1, const void *data2)
  910. {
  911. const UInt32 *bitrate1 = static_cast<const UInt32*>(data1);
  912. const UInt32 *bitrate2 = static_cast<const UInt32*>(data2);
  913. return (int)*bitrate1 - (int)*bitrate2;
  914. }
  915. static void add_bitrates(obs_property_t *prop, ca_encoder *ca)
  916. {
  917. add_bitrates_helper helper = { { {0} } };
  918. for (UInt32 format_id : (ca ? *ca->allowed_formats : aac_formats))
  919. enumerate_bitrates(ca,
  920. get_default_converter(format_id),
  921. add_bitrates_func, &helper);
  922. if (!helper.bitrates.num) {
  923. CA_BLOG(LOG_ERROR, "Enumeration found no available bitrates");
  924. return;
  925. }
  926. qsort(helper.bitrates.array, helper.bitrates.num, sizeof(UInt32),
  927. bitrate_compare);
  928. struct dstr str = { 0 };
  929. for (size_t i = 0; i < helper.bitrates.num; i++) {
  930. dstr_printf(&str, "%u",
  931. (uint32_t)helper.bitrates.array[i]/1000);
  932. obs_property_list_add_int(prop, str.array,
  933. helper.bitrates.array[i]/1000);
  934. }
  935. dstr_free(&str);
  936. da_free(helper.bitrates);
  937. }
  938. static obs_properties_t *aac_properties(void *data)
  939. {
  940. ca_encoder *ca = static_cast<ca_encoder*>(data);
  941. obs_properties_t *props = obs_properties_create();
  942. obs_property_t *p = obs_properties_add_list(props, "bitrate",
  943. obs_module_text("Bitrate"),
  944. OBS_COMBO_TYPE_LIST, OBS_COMBO_FORMAT_INT);
  945. add_bitrates(p, ca);
  946. obs_properties_add_bool(props, "allow he-aac",
  947. obs_module_text("AllowHEAAC"));
  948. return props;
  949. }
  950. OBS_DECLARE_MODULE()
  951. OBS_MODULE_USE_DEFAULT_LOCALE("coreaudio-encoder", "en-US")
  952. bool obs_module_load(void)
  953. {
  954. #ifdef _WIN32
  955. if (!load_core_audio()) {
  956. CA_LOG(LOG_WARNING, "Couldn't load CoreAudio AAC encoder");
  957. return true;
  958. }
  959. CA_LOG(LOG_INFO, "Adding CoreAudio AAC encoder");
  960. #endif
  961. struct obs_encoder_info aac_info;
  962. aac_info.id = "CoreAudio_AAC";
  963. aac_info.type = OBS_ENCODER_AUDIO;
  964. aac_info.codec = "AAC";
  965. aac_info.get_name = aac_get_name;
  966. aac_info.destroy = aac_destroy;
  967. aac_info.create = aac_create;
  968. aac_info.encode = aac_encode;
  969. aac_info.get_frame_size = aac_frame_size;
  970. aac_info.get_audio_info = aac_audio_info;
  971. aac_info.get_extra_data = aac_extra_data;
  972. aac_info.get_defaults = aac_defaults;
  973. aac_info.get_properties = aac_properties;
  974. obs_register_encoder(&aac_info);
  975. return true;
  976. }
  977. #ifdef _WIN32
  978. void obs_module_unload(void)
  979. {
  980. unload_core_audio();
  981. }
  982. #endif