encoder.cpp 33 KB


  1. #include <util/dstr.hpp>
  2. #include <obs-module.h>
  3. #include <algorithm>
  4. #include <cstdlib>
  5. #include <initializer_list>
  6. #include <memory>
  7. #include <mutex>
  8. #include <vector>
  9. #ifndef _WIN32
  10. #include <AudioToolbox/AudioToolbox.h>
  11. #endif
  12. #define CA_LOG(level, format, ...) \
  13. blog(level, "[CoreAudio encoder]: " format, ##__VA_ARGS__)
  14. #define CA_LOG_ENCODER(format_name, encoder, level, format, ...) \
  15. blog(level, "[CoreAudio %s: '%s']: " format, \
  16. format_name, obs_encoder_get_name(encoder), \
  17. ##__VA_ARGS__)
  18. #define CA_BLOG(level, format, ...) \
  19. CA_LOG_ENCODER(ca->format_name, ca->encoder, level, format, \
  20. ##__VA_ARGS__)
  21. #define CA_CO_LOG(level, format, ...) \
  22. do { \
  23. if (ca) \
  24. CA_BLOG(level, format, ##__VA_ARGS__); \
  25. else \
  26. CA_LOG(level, format, ##__VA_ARGS__); \
  27. } while (false)
  28. #ifdef _WIN32
  29. #include "windows-imports.h"
  30. #endif
  31. using namespace std;
  32. namespace {
  33. struct asbd_builder {
  34. AudioStreamBasicDescription asbd;
  35. asbd_builder &sample_rate(Float64 rate)
  36. {
  37. asbd.mSampleRate = rate;
  38. return *this;
  39. }
  40. asbd_builder &format_id(UInt32 format)
  41. {
  42. asbd.mFormatID = format;
  43. return *this;
  44. }
  45. asbd_builder &format_flags(UInt32 flags)
  46. {
  47. asbd.mFormatFlags = flags;
  48. return *this;
  49. }
  50. asbd_builder &bytes_per_packet(UInt32 bytes)
  51. {
  52. asbd.mBytesPerPacket = bytes;
  53. return *this;
  54. }
  55. asbd_builder &frames_per_packet(UInt32 frames)
  56. {
  57. asbd.mFramesPerPacket = frames;
  58. return *this;
  59. }
  60. asbd_builder &bytes_per_frame(UInt32 bytes)
  61. {
  62. asbd.mBytesPerFrame = bytes;
  63. return *this;
  64. }
  65. asbd_builder &channels_per_frame(UInt32 channels)
  66. {
  67. asbd.mChannelsPerFrame = channels;
  68. return *this;
  69. }
  70. asbd_builder &bits_per_channel(UInt32 bits)
  71. {
  72. asbd.mBitsPerChannel = bits;
  73. return *this;
  74. }
  75. };
  76. struct ca_encoder {
  77. obs_encoder_t *encoder = nullptr;
  78. const char *format_name = nullptr;
  79. UInt32 format_id = 0;
  80. const initializer_list<UInt32> *allowed_formats = nullptr;
  81. AudioConverterRef converter = nullptr;
  82. size_t output_buffer_size = 0;
  83. vector<uint8_t> output_buffer;
  84. size_t out_frames_per_packet = 0;
  85. size_t in_packets = 0;
  86. size_t in_frame_size = 0;
  87. size_t in_bytes_required = 0;
  88. vector<uint8_t> input_buffer;
  89. vector<uint8_t> encode_buffer;
  90. uint64_t total_samples = 0;
  91. uint64_t samples_per_second = 0;
  92. vector<uint8_t> extra_data;
  93. size_t channels = 0;
  94. ~ca_encoder()
  95. {
  96. if (converter)
  97. AudioConverterDispose(converter);
  98. }
  99. };
  100. typedef struct ca_encoder ca_encoder;
  101. }
  102. namespace std {
  103. #ifndef _WIN32
  104. template <>
  105. struct default_delete<remove_pointer<CFErrorRef>::type> {
  106. void operator()(remove_pointer<CFErrorRef>::type *err)
  107. {
  108. CFRelease(err);
  109. }
  110. };
  111. template <>
  112. struct default_delete<remove_pointer<CFStringRef>::type> {
  113. void operator()(remove_pointer<CFStringRef>::type *str)
  114. {
  115. CFRelease(str);
  116. }
  117. };
  118. #endif
  119. template <>
  120. struct default_delete<remove_pointer<AudioConverterRef>::type> {
  121. void operator()(AudioConverterRef converter)
  122. {
  123. AudioConverterDispose(converter);
  124. }
  125. };
  126. }
  127. template <typename T>
  128. using cf_ptr = unique_ptr<typename remove_pointer<T>::type>;
  129. #ifndef _MSC_VER
  130. __attribute__((__format__(__printf__, 3, 4)))
  131. #endif
  132. static void log_to_dstr(DStr &str, ca_encoder *ca, const char *fmt, ...)
  133. {
  134. dstr prev_str = *static_cast<dstr*>(str);
  135. va_list args;
  136. va_start(args, fmt);
  137. dstr_vcatf(str, fmt, args);
  138. va_end(args);
  139. if (str->array)
  140. return;
  141. char array[4096];
  142. va_start(args, fmt);
  143. vsnprintf(array, 4096, fmt, args);
  144. va_end(args);
  145. array[4095] = 0;
  146. if (!prev_str.array && !prev_str.len)
  147. CA_CO_LOG(LOG_ERROR, "Could not allocate buffer for logging:"
  148. "\n'%s'", array);
  149. else
  150. CA_CO_LOG(LOG_ERROR, "Could not allocate buffer for logging:"
  151. "\n'%s'\nPrevious log entries:\n%s",
  152. array, prev_str.array);
  153. bfree(prev_str.array);
  154. }
  155. static const char *flush_log(DStr &log)
  156. {
  157. if (!log->array || !log->len)
  158. return "";
  159. if (log->array[log->len - 1] == '\n') {
  160. log->array[log->len - 1] = 0; //Get rid of last newline
  161. log->len -= 1;
  162. }
  163. return log->array;
  164. }
  165. #define CA_CO_DLOG_(level, format) \
  166. CA_CO_LOG(level, format "%s%s", \
  167. log->array ? ":\n" : "", flush_log(log))
  168. #define CA_CO_DLOG(level, format, ...) \
  169. CA_CO_LOG(level, format "%s%s", ##__VA_ARGS__, \
  170. log->array ? ":\n" : "", flush_log(log))
  171. static const char *aac_get_name(void*)
  172. {
  173. return obs_module_text("CoreAudioAAC");
  174. }
  175. static const char *code_to_str(OSStatus code)
  176. {
  177. switch (code) {
  178. #define HANDLE_CODE(c) case c: return #c
  179. HANDLE_CODE(kAudio_UnimplementedError);
  180. HANDLE_CODE(kAudio_FileNotFoundError);
  181. HANDLE_CODE(kAudio_FilePermissionError);
  182. HANDLE_CODE(kAudio_TooManyFilesOpenError);
  183. HANDLE_CODE(kAudio_BadFilePathError);
  184. HANDLE_CODE(kAudio_ParamError);
  185. HANDLE_CODE(kAudio_MemFullError);
  186. HANDLE_CODE(kAudioConverterErr_FormatNotSupported);
  187. HANDLE_CODE(kAudioConverterErr_OperationNotSupported);
  188. HANDLE_CODE(kAudioConverterErr_PropertyNotSupported);
  189. HANDLE_CODE(kAudioConverterErr_InvalidInputSize);
  190. HANDLE_CODE(kAudioConverterErr_InvalidOutputSize);
  191. HANDLE_CODE(kAudioConverterErr_UnspecifiedError);
  192. HANDLE_CODE(kAudioConverterErr_BadPropertySizeError);
  193. HANDLE_CODE(kAudioConverterErr_RequiresPacketDescriptionsError);
  194. HANDLE_CODE(kAudioConverterErr_InputSampleRateOutOfRange);
  195. HANDLE_CODE(kAudioConverterErr_OutputSampleRateOutOfRange);
  196. #undef HANDLE_CODE
  197. default: break;
  198. }
  199. return NULL;
  200. }
  201. static DStr osstatus_to_dstr(OSStatus code)
  202. {
  203. DStr result;
  204. #ifndef _WIN32
  205. cf_ptr<CFErrorRef> err{CFErrorCreate(kCFAllocatorDefault,
  206. kCFErrorDomainOSStatus, code, NULL)};
  207. cf_ptr<CFStringRef> str{CFErrorCopyDescription(err.get())};
  208. CFIndex length = CFStringGetLength(str.get());
  209. CFIndex max_size = CFStringGetMaximumSizeForEncoding(length,
  210. kCFStringEncodingUTF8);
  211. dstr_ensure_capacity(result, max_size);
  212. if (result->array && CFStringGetCString(str.get(), result->array,
  213. max_size, kCFStringEncodingUTF8)) {
  214. dstr_resize(result, strlen(result->array));
  215. return result;
  216. }
  217. #endif
  218. const char *code_str = code_to_str(code);
  219. dstr_printf(result, "%s%s%d%s",
  220. code_str ? code_str : "",
  221. code_str ? " (" : "",
  222. static_cast<int>(code),
  223. code_str ? ")" : "");
  224. return result;
  225. }
  226. static void log_osstatus(int log_level, ca_encoder *ca, const char *context,
  227. OSStatus code)
  228. {
  229. DStr str = osstatus_to_dstr(code);
  230. if (ca)
  231. CA_BLOG(log_level, "Error in %s: %s", context, str->array);
  232. else
  233. CA_LOG(log_level, "Error in %s: %s", context, str->array);
  234. }
  235. static const char *format_id_to_str(UInt32 format_id)
  236. {
  237. #define FORMAT_TO_STR(x) case x: return #x
  238. switch (format_id) {
  239. FORMAT_TO_STR(kAudioFormatLinearPCM);
  240. FORMAT_TO_STR(kAudioFormatAC3);
  241. FORMAT_TO_STR(kAudioFormat60958AC3);
  242. FORMAT_TO_STR(kAudioFormatAppleIMA4);
  243. FORMAT_TO_STR(kAudioFormatMPEG4AAC);
  244. FORMAT_TO_STR(kAudioFormatMPEG4CELP);
  245. FORMAT_TO_STR(kAudioFormatMPEG4HVXC);
  246. FORMAT_TO_STR(kAudioFormatMPEG4TwinVQ);
  247. FORMAT_TO_STR(kAudioFormatMACE3);
  248. FORMAT_TO_STR(kAudioFormatMACE6);
  249. FORMAT_TO_STR(kAudioFormatULaw);
  250. FORMAT_TO_STR(kAudioFormatALaw);
  251. FORMAT_TO_STR(kAudioFormatQDesign);
  252. FORMAT_TO_STR(kAudioFormatQDesign2);
  253. FORMAT_TO_STR(kAudioFormatQUALCOMM);
  254. FORMAT_TO_STR(kAudioFormatMPEGLayer1);
  255. FORMAT_TO_STR(kAudioFormatMPEGLayer2);
  256. FORMAT_TO_STR(kAudioFormatMPEGLayer3);
  257. FORMAT_TO_STR(kAudioFormatTimeCode);
  258. FORMAT_TO_STR(kAudioFormatMIDIStream);
  259. FORMAT_TO_STR(kAudioFormatParameterValueStream);
  260. FORMAT_TO_STR(kAudioFormatAppleLossless);
  261. FORMAT_TO_STR(kAudioFormatMPEG4AAC_HE);
  262. FORMAT_TO_STR(kAudioFormatMPEG4AAC_LD);
  263. FORMAT_TO_STR(kAudioFormatMPEG4AAC_ELD);
  264. FORMAT_TO_STR(kAudioFormatMPEG4AAC_ELD_SBR);
  265. FORMAT_TO_STR(kAudioFormatMPEG4AAC_HE_V2);
  266. FORMAT_TO_STR(kAudioFormatMPEG4AAC_Spatial);
  267. FORMAT_TO_STR(kAudioFormatAMR);
  268. FORMAT_TO_STR(kAudioFormatAudible);
  269. FORMAT_TO_STR(kAudioFormatiLBC);
  270. FORMAT_TO_STR(kAudioFormatDVIIntelIMA);
  271. FORMAT_TO_STR(kAudioFormatMicrosoftGSM);
  272. FORMAT_TO_STR(kAudioFormatAES3);
  273. }
  274. #undef FORMAT_TO_STR
  275. return "Unknown format";
  276. }
  277. static void aac_destroy(void *data)
  278. {
  279. ca_encoder *ca = static_cast<ca_encoder*>(data);
  280. delete ca;
  281. }
  282. template <typename Func>
  283. static bool query_converter_property_raw(DStr &log, ca_encoder *ca,
  284. AudioFormatPropertyID property,
  285. const char *get_property_info, const char *get_property,
  286. AudioConverterRef converter, Func &&func)
  287. {
  288. UInt32 size = 0;
  289. OSStatus code = AudioConverterGetPropertyInfo(converter, property,
  290. &size, nullptr);
  291. if (code) {
  292. log_to_dstr(log, ca, "%s: %s\n", get_property_info,
  293. osstatus_to_dstr(code)->array);
  294. return false;
  295. }
  296. if (!size) {
  297. log_to_dstr(log, ca, "%s returned 0 size\n", get_property_info);
  298. return false;
  299. }
  300. vector<uint8_t> buffer;
  301. try {
  302. buffer.resize(size);
  303. } catch (...) {
  304. log_to_dstr(log, ca, "Failed to allocate %u bytes for %s\n",
  305. static_cast<uint32_t>(size), get_property);
  306. return false;
  307. }
  308. code = AudioConverterGetProperty(converter, property, &size,
  309. buffer.data());
  310. if (code) {
  311. log_to_dstr(log, ca, "%s: %s\n", get_property,
  312. osstatus_to_dstr(code)->array);
  313. return false;
  314. }
  315. func(size, static_cast<void*>(buffer.data()));
  316. return true;
  317. }
  318. #define EXPAND_CONVERTER_NAMES(x) x, \
  319. "AudioConverterGetPropertyInfo(" #x ")", \
  320. "AudioConverterGetProperty(" #x ")"
  321. template <typename Func>
  322. static bool enumerate_bitrates(DStr &log, ca_encoder *ca,
  323. AudioConverterRef converter, Func &&func)
  324. {
  325. auto helper = [&](UInt32 size, void *data)
  326. {
  327. auto range = static_cast<AudioValueRange*>(data);
  328. size_t num_ranges = size / sizeof(AudioValueRange);
  329. for (size_t i = 0; i < num_ranges; i++)
  330. func(static_cast<UInt32>(range[i].mMinimum),
  331. static_cast<UInt32>(range[i].mMaximum));
  332. };
  333. return query_converter_property_raw(log, ca, EXPAND_CONVERTER_NAMES(
  334. kAudioConverterApplicableEncodeBitRates),
  335. converter, helper);
  336. }
  337. static bool bitrate_valid(DStr &log, ca_encoder *ca,
  338. AudioConverterRef converter, UInt32 bitrate)
  339. {
  340. bool valid = false;
  341. auto helper = [&](UInt32 min_, UInt32 max_)
  342. {
  343. if (min_ == bitrate || max_ == bitrate)
  344. valid = true;
  345. };
  346. enumerate_bitrates(log, ca, converter, helper);
  347. return valid;
  348. }
  349. static bool create_encoder(DStr &log, ca_encoder *ca,
  350. AudioStreamBasicDescription *in,
  351. AudioStreamBasicDescription *out,
  352. UInt32 format_id, UInt32 bitrate, UInt32 samplerate,
  353. UInt32 rate_control)
  354. {
  355. #define STATUS_CHECK(c) \
  356. code = c; \
  357. if (code) { \
  358. log_to_dstr(log, ca, #c " returned %s", \
  359. osstatus_to_dstr(code)->array); \
  360. return false; \
  361. }
  362. Float64 srate = samplerate ?
  363. (Float64)samplerate :
  364. (Float64)ca->samples_per_second;
  365. auto out_ = asbd_builder()
  366. .sample_rate(srate)
  367. .channels_per_frame((UInt32)ca->channels)
  368. .format_id(format_id)
  369. .asbd;
  370. UInt32 size = sizeof(*out);
  371. OSStatus code;
  372. STATUS_CHECK(AudioFormatGetProperty(kAudioFormatProperty_FormatInfo,
  373. 0, NULL, &size, &out_));
  374. *out = out_;
  375. STATUS_CHECK(AudioConverterNew(in, out, &ca->converter))
  376. STATUS_CHECK(AudioConverterSetProperty(ca->converter,
  377. kAudioCodecPropertyBitRateControlMode,
  378. sizeof(rate_control), &rate_control));
  379. if (!bitrate_valid(log, ca, ca->converter, bitrate)) {
  380. log_to_dstr(log, ca, "Encoder does not support bitrate %u "
  381. "for format %s (0x%x)\n",
  382. (uint32_t)bitrate, format_id_to_str(format_id),
  383. (uint32_t)format_id);
  384. return false;
  385. }
  386. ca->format_id = format_id;
  387. return true;
  388. #undef STATUS_CHECK
  389. }
  390. static const initializer_list<UInt32> aac_formats = {
  391. kAudioFormatMPEG4AAC_HE_V2,
  392. kAudioFormatMPEG4AAC_HE,
  393. kAudioFormatMPEG4AAC,
  394. };
  395. static const initializer_list<UInt32> aac_lc_formats = {
  396. kAudioFormatMPEG4AAC,
  397. };
  398. static void *aac_create(obs_data_t *settings, obs_encoder_t *encoder)
  399. {
  400. #define STATUS_CHECK(c) \
  401. code = c; \
  402. if (code) { \
  403. log_osstatus(LOG_ERROR, ca.get(), #c, code); \
  404. return nullptr; \
  405. }
  406. UInt32 bitrate = (UInt32)obs_data_get_int(settings, "bitrate") * 1000;
  407. if (!bitrate) {
  408. CA_LOG_ENCODER("AAC", encoder, LOG_ERROR,
  409. "Invalid bitrate specified");
  410. return NULL;
  411. }
  412. const enum audio_format format = AUDIO_FORMAT_FLOAT;
  413. if (is_audio_planar(format)) {
  414. CA_LOG_ENCODER("AAC", encoder, LOG_ERROR,
  415. "Got non-interleaved audio format %d", format);
  416. return NULL;
  417. }
  418. unique_ptr<ca_encoder> ca;
  419. try {
  420. ca.reset(new ca_encoder());
  421. } catch (...) {
  422. CA_LOG_ENCODER("AAC", encoder, LOG_ERROR,
  423. "Could not allocate encoder");
  424. return nullptr;
  425. }
  426. ca->encoder = encoder;
  427. ca->format_name = "AAC";
  428. audio_t *audio = obs_encoder_audio(encoder);
  429. const struct audio_output_info *aoi = audio_output_get_info(audio);
  430. ca->channels = audio_output_get_channels(audio);
  431. ca->samples_per_second = audio_output_get_sample_rate(audio);
  432. size_t bytes_per_frame = get_audio_size(format, aoi->speakers, 1);
  433. size_t bits_per_channel = get_audio_bytes_per_channel(format) * 8;
  434. auto in = asbd_builder()
  435. .sample_rate((Float64)ca->samples_per_second)
  436. .channels_per_frame((UInt32)ca->channels)
  437. .bytes_per_frame((UInt32)bytes_per_frame)
  438. .frames_per_packet(1)
  439. .bytes_per_packet((UInt32)(1 * bytes_per_frame))
  440. .bits_per_channel((UInt32)bits_per_channel)
  441. .format_id(kAudioFormatLinearPCM)
  442. .format_flags(kAudioFormatFlagsNativeEndian |
  443. kAudioFormatFlagIsPacked |
  444. kAudioFormatFlagIsFloat |
  445. 0)
  446. .asbd;
  447. AudioStreamBasicDescription out;
  448. UInt32 rate_control = kAudioCodecBitRateControlMode_Constant;
  449. if (obs_data_get_bool(settings, "allow he-aac")) {
  450. ca->allowed_formats = &aac_formats;
  451. } else {
  452. ca->allowed_formats = &aac_lc_formats;
  453. }
  454. auto samplerate =
  455. static_cast<UInt32>(obs_data_get_int(settings, "samplerate"));
  456. DStr log;
  457. bool encoder_created = false;
  458. for (UInt32 format_id : *ca->allowed_formats) {
  459. log_to_dstr(log, ca.get(), "Trying format %s (0x%x)\n",
  460. format_id_to_str(format_id),
  461. (uint32_t)format_id);
  462. if (!create_encoder(log, ca.get(), &in, &out, format_id,
  463. bitrate, samplerate, rate_control))
  464. continue;
  465. encoder_created = true;
  466. break;
  467. }
  468. if (!encoder_created) {
  469. CA_CO_DLOG(LOG_ERROR, "Could not create encoder for "
  470. "selected format%s",
  471. ca->allowed_formats->size() == 1 ? "" : "s");
  472. return nullptr;
  473. }
  474. if (log->len)
  475. CA_CO_DLOG_(LOG_DEBUG, "Encoder created");
  476. OSStatus code;
  477. UInt32 converter_quality = kAudioConverterQuality_Max;
  478. STATUS_CHECK(AudioConverterSetProperty(ca->converter,
  479. kAudioConverterCodecQuality,
  480. sizeof(converter_quality), &converter_quality));
  481. STATUS_CHECK(AudioConverterSetProperty(ca->converter,
  482. kAudioConverterEncodeBitRate,
  483. sizeof(bitrate), &bitrate));
  484. UInt32 size = sizeof(in);
  485. STATUS_CHECK(AudioConverterGetProperty(ca->converter,
  486. kAudioConverterCurrentInputStreamDescription,
  487. &size, &in));
  488. size = sizeof(out);
  489. STATUS_CHECK(AudioConverterGetProperty(ca->converter,
  490. kAudioConverterCurrentOutputStreamDescription,
  491. &size, &out));
  492. ca->in_frame_size = in.mBytesPerFrame;
  493. ca->in_packets = out.mFramesPerPacket / in.mFramesPerPacket;
  494. ca->in_bytes_required = ca->in_packets * ca->in_frame_size;
  495. ca->out_frames_per_packet = out.mFramesPerPacket;
  496. ca->output_buffer_size = out.mBytesPerPacket;
  497. if (out.mBytesPerPacket == 0) {
  498. UInt32 max_packet_size = 0;
  499. size = sizeof(max_packet_size);
  500. code = AudioConverterGetProperty(ca->converter,
  501. kAudioConverterPropertyMaximumOutputPacketSize,
  502. &size, &max_packet_size);
  503. if (code) {
  504. log_osstatus(LOG_WARNING, ca.get(),
  505. "AudioConverterGetProperty(PacketSz)",
  506. code);
  507. ca->output_buffer_size = 32768;
  508. } else {
  509. ca->output_buffer_size = max_packet_size;
  510. }
  511. }
  512. try {
  513. ca->output_buffer.resize(ca->output_buffer_size);
  514. } catch (...) {
  515. CA_BLOG(LOG_ERROR, "Failed to allocate output buffer");
  516. return nullptr;
  517. }
  518. const char *format_name =
  519. out.mFormatID == kAudioFormatMPEG4AAC_HE_V2 ? "HE-AAC v2" :
  520. out.mFormatID == kAudioFormatMPEG4AAC_HE ? "HE-AAC" : "AAC";
  521. CA_BLOG(LOG_INFO, "settings:\n"
  522. "\tmode: %s\n"
  523. "\tbitrate: %u\n"
  524. "\tsample rate: %llu\n"
  525. "\tcbr: %s\n"
  526. "\toutput buffer: %lu",
  527. format_name, (unsigned int)bitrate / 1000,
  528. ca->samples_per_second,
  529. rate_control == kAudioCodecBitRateControlMode_Constant ?
  530. "on" : "off",
  531. (unsigned long)ca->output_buffer_size);
  532. return ca.release();
  533. #undef STATUS_CHECK
  534. }
  535. static OSStatus complex_input_data_proc(AudioConverterRef inAudioConverter,
  536. UInt32 *ioNumberDataPackets, AudioBufferList *ioData,
  537. AudioStreamPacketDescription **outDataPacketDescription,
  538. void *inUserData)
  539. {
  540. UNUSED_PARAMETER(inAudioConverter);
  541. UNUSED_PARAMETER(outDataPacketDescription);
  542. ca_encoder *ca = static_cast<ca_encoder*>(inUserData);
  543. if (ca->input_buffer.size() < ca->in_bytes_required) {
  544. *ioNumberDataPackets = 0;
  545. ioData->mBuffers[0].mData = NULL;
  546. return 1;
  547. }
  548. auto start = begin(ca->input_buffer);
  549. auto stop = begin(ca->input_buffer) + ca->in_bytes_required;
  550. ca->encode_buffer.assign(start, stop);
  551. ca->input_buffer.erase(start, stop);
  552. *ioNumberDataPackets =
  553. (UInt32)(ca->in_bytes_required / ca->in_frame_size);
  554. ioData->mNumberBuffers = 1;
  555. ioData->mBuffers[0].mData = ca->encode_buffer.data();
  556. ioData->mBuffers[0].mNumberChannels = (UInt32)ca->channels;
  557. ioData->mBuffers[0].mDataByteSize = (UInt32)ca->in_bytes_required;
  558. return 0;
  559. }
  560. #ifdef _MSC_VER
  561. // disable warning that recommends if ((foo = bar > 0) == false) over
  562. // if (!(foo = bar > 0))
  563. #pragma warning(push)
  564. #pragma warning(disable: 4706)
  565. #endif
  566. static bool aac_encode(void *data, struct encoder_frame *frame,
  567. struct encoder_packet *packet, bool *received_packet)
  568. {
  569. ca_encoder *ca = static_cast<ca_encoder*>(data);
  570. ca->input_buffer.insert(end(ca->input_buffer),
  571. frame->data[0], frame->data[0] + frame->linesize[0]);
  572. if (ca->input_buffer.size() < ca->in_bytes_required)
  573. return true;
  574. UInt32 packets = 1;
  575. AudioBufferList buffer_list = { 0 };
  576. buffer_list.mNumberBuffers = 1;
  577. buffer_list.mBuffers[0].mNumberChannels = (UInt32)ca->channels;
  578. buffer_list.mBuffers[0].mDataByteSize = (UInt32)ca->output_buffer_size;
  579. buffer_list.mBuffers[0].mData = ca->output_buffer.data();
  580. AudioStreamPacketDescription out_desc = { 0 };
  581. OSStatus code = AudioConverterFillComplexBuffer(ca->converter,
  582. complex_input_data_proc, ca, &packets,
  583. &buffer_list, &out_desc);
  584. if (code && code != 1) {
  585. log_osstatus(LOG_ERROR, ca, "AudioConverterFillComplexBuffer",
  586. code);
  587. return false;
  588. }
  589. if (!(*received_packet = packets > 0))
  590. return true;
  591. packet->pts = ca->total_samples;
  592. packet->dts = ca->total_samples;
  593. packet->timebase_num = 1;
  594. packet->timebase_den = (uint32_t)ca->samples_per_second;
  595. packet->type = OBS_ENCODER_AUDIO;
  596. packet->size = out_desc.mDataByteSize;
  597. packet->data =
  598. (uint8_t*)buffer_list.mBuffers[0].mData + out_desc.mStartOffset;
  599. ca->total_samples += ca->in_bytes_required / ca->in_frame_size;
  600. return true;
  601. }
  602. #ifdef _MSC_VER
  603. #pragma warning(pop)
  604. #endif
  605. static void aac_audio_info(void *data, struct audio_convert_info *info)
  606. {
  607. UNUSED_PARAMETER(data);
  608. info->format = AUDIO_FORMAT_FLOAT;
  609. }
  610. static size_t aac_frame_size(void *data)
  611. {
  612. ca_encoder *ca = static_cast<ca_encoder*>(data);
  613. return ca->out_frames_per_packet;
  614. }
  615. /* The following code was extracted from encca_aac.c in HandBrake's libhb */
  616. #define MP4ESDescrTag 0x03
  617. #define MP4DecConfigDescrTag 0x04
  618. #define MP4DecSpecificDescrTag 0x05
  619. // based off of mov_mp4_read_descr_len from mov.c in ffmpeg's libavformat
  620. static int read_descr_len(uint8_t **buffer)
  621. {
  622. int len = 0;
  623. int count = 4;
  624. while (count--)
  625. {
  626. int c = *(*buffer)++;
  627. len = (len << 7) | (c & 0x7f);
  628. if (!(c & 0x80))
  629. break;
  630. }
  631. return len;
  632. }
  633. // based off of mov_mp4_read_descr from mov.c in ffmpeg's libavformat
  634. static int read_descr(uint8_t **buffer, int *tag)
  635. {
  636. *tag = *(*buffer)++;
  637. return read_descr_len(buffer);
  638. }
  639. // based off of mov_read_esds from mov.c in ffmpeg's libavformat
  640. static void read_esds_desc_ext(uint8_t* desc_ext, vector<uint8_t> &buffer,
  641. bool version_flags)
  642. {
  643. uint8_t *esds = desc_ext;
  644. int tag, len;
  645. if (version_flags)
  646. esds += 4; // version + flags
  647. read_descr(&esds, &tag);
  648. esds += 2; // ID
  649. if (tag == MP4ESDescrTag)
  650. esds++; // priority
  651. read_descr(&esds, &tag);
  652. if (tag == MP4DecConfigDescrTag) {
  653. esds++; // object type id
  654. esds++; // stream type
  655. esds += 3; // buffer size db
  656. esds += 4; // max bitrate
  657. esds += 4; // average bitrate
  658. len = read_descr(&esds, &tag);
  659. if (tag == MP4DecSpecificDescrTag)
  660. try {
  661. buffer.assign(esds, esds + len);
  662. } catch (...) {
  663. //leave buffer empty
  664. }
  665. }
  666. }
  667. /* extracted code ends here */
  668. static void query_extra_data(ca_encoder *ca)
  669. {
  670. UInt32 size = 0;
  671. OSStatus code;
  672. code = AudioConverterGetPropertyInfo(ca->converter,
  673. kAudioConverterCompressionMagicCookie,
  674. &size, NULL);
  675. if (code) {
  676. log_osstatus(LOG_ERROR, ca,
  677. "AudioConverterGetPropertyInfo(magic_cookie)",
  678. code);
  679. return;
  680. }
  681. if (!size) {
  682. CA_BLOG(LOG_WARNING, "Got 0 data size info for magic_cookie");
  683. return;
  684. }
  685. vector<uint8_t> extra_data;
  686. try {
  687. extra_data.resize(size);
  688. } catch (...) {
  689. CA_BLOG(LOG_WARNING, "Could not allocate extra data buffer");
  690. return;
  691. }
  692. code = AudioConverterGetProperty(ca->converter,
  693. kAudioConverterCompressionMagicCookie,
  694. &size, extra_data.data());
  695. if (code) {
  696. log_osstatus(LOG_ERROR, ca,
  697. "AudioConverterGetProperty(magic_cookie)",
  698. code);
  699. return;
  700. }
  701. if (!size) {
  702. CA_BLOG(LOG_WARNING, "Got 0 data size for magic_cookie");
  703. return;
  704. }
  705. read_esds_desc_ext(extra_data.data(), ca->extra_data, false);
  706. }
  707. static bool aac_extra_data(void *data, uint8_t **extra_data, size_t *size)
  708. {
  709. ca_encoder *ca = static_cast<ca_encoder*>(data);
  710. if (!ca->extra_data.size())
  711. query_extra_data(ca);
  712. if (!ca->extra_data.size())
  713. return false;
  714. *extra_data = ca->extra_data.data();
  715. *size = ca->extra_data.size();
  716. return true;
  717. }
  718. static asbd_builder fill_common_asbd_fields(asbd_builder builder,
  719. bool in=false)
  720. {
  721. UInt32 bytes_per_frame = 8;
  722. UInt32 channels = 2;
  723. UInt32 bits_per_channel = bytes_per_frame / channels * 8;
  724. builder.channels_per_frame(channels);
  725. if (in) {
  726. builder
  727. .bytes_per_frame(bytes_per_frame)
  728. .frames_per_packet(1)
  729. .bytes_per_packet(1 * bytes_per_frame)
  730. .bits_per_channel(bits_per_channel);
  731. }
  732. return builder;
  733. }
  734. static AudioStreamBasicDescription get_default_in_asbd()
  735. {
  736. return fill_common_asbd_fields(asbd_builder(), true)
  737. .sample_rate(44100)
  738. .format_id(kAudioFormatLinearPCM)
  739. .format_flags(kAudioFormatFlagsNativeEndian |
  740. kAudioFormatFlagIsPacked |
  741. kAudioFormatFlagIsFloat |
  742. 0)
  743. .asbd;
  744. }
  745. static asbd_builder get_default_out_asbd_builder()
  746. {
  747. return fill_common_asbd_fields(asbd_builder())
  748. .sample_rate(44100);
  749. }
  750. static cf_ptr<AudioConverterRef> get_converter(DStr &log, ca_encoder *ca,
  751. AudioStreamBasicDescription out,
  752. AudioStreamBasicDescription in = get_default_in_asbd())
  753. {
  754. UInt32 size = sizeof(out);
  755. OSStatus code;
  756. #define STATUS_CHECK(x) \
  757. code = x; \
  758. if (code) { \
  759. log_to_dstr(log, ca, "%s: %s\n", #x, \
  760. osstatus_to_dstr(code)->array); \
  761. return nullptr; \
  762. }
  763. STATUS_CHECK(AudioFormatGetProperty(kAudioFormatProperty_FormatInfo,
  764. 0, NULL, &size, &out));
  765. AudioConverterRef converter;
  766. STATUS_CHECK(AudioConverterNew(&in, &out, &converter));
  767. return cf_ptr<AudioConverterRef>{converter};
  768. #undef STATUS_CHECK
  769. }
  770. static bool find_best_match(DStr &log, ca_encoder *ca, UInt32 bitrate,
  771. UInt32 &best_match)
  772. {
  773. UInt32 actual_bitrate = bitrate * 1000;
  774. bool found_match = false;
  775. auto handle_bitrate = [&](UInt32 candidate)
  776. {
  777. if (abs(static_cast<intmax_t>(actual_bitrate - candidate)) <
  778. abs(static_cast<intmax_t>(actual_bitrate - best_match))) {
  779. log_to_dstr(log, ca, "Found new best match %u\n",
  780. static_cast<uint32_t>(candidate));
  781. found_match = true;
  782. best_match = candidate;
  783. }
  784. };
  785. auto helper = [&](UInt32 min_, UInt32 max_)
  786. {
  787. handle_bitrate(min_);
  788. if (min_ == max_)
  789. return;
  790. log_to_dstr(log, ca, "Got actual bit rate range: %u<->%u\n",
  791. static_cast<uint32_t>(min_),
  792. static_cast<uint32_t>(max_));
  793. handle_bitrate(max_);
  794. };
  795. for (UInt32 format_id : aac_formats) {
  796. log_to_dstr(log, ca, "Trying %s (0x%x)\n",
  797. format_id_to_str(format_id), format_id);
  798. auto out = get_default_out_asbd_builder()
  799. .format_id(format_id)
  800. .asbd;
  801. auto converter = get_converter(log, ca, out);
  802. if (converter)
  803. enumerate_bitrates(log, ca, converter.get(),
  804. helper);
  805. else
  806. log_to_dstr(log, ca, "Could not get converter\n");
  807. }
  808. best_match /= 1000;
  809. return found_match;
  810. }
  811. static UInt32 find_matching_bitrate(UInt32 bitrate)
  812. {
  813. static UInt32 match = bitrate;
  814. static once_flag once;
  815. call_once(once, [&]()
  816. {
  817. DStr log;
  818. ca_encoder *ca = nullptr;
  819. if (!find_best_match(log, ca, bitrate, match)) {
  820. CA_CO_DLOG(LOG_ERROR, "No matching bitrates found for "
  821. "target bitrate %u",
  822. static_cast<uint32_t>(bitrate));
  823. match = bitrate;
  824. return;
  825. }
  826. if (match != bitrate) {
  827. CA_CO_DLOG(LOG_INFO, "Default bitrate (%u) isn't "
  828. "supported, returning %u as closest match",
  829. static_cast<uint32_t>(bitrate),
  830. static_cast<uint32_t>(match));
  831. return;
  832. }
  833. if (log->len)
  834. CA_CO_DLOG(LOG_DEBUG, "Default bitrate matching log "
  835. "for bitrate %u",
  836. static_cast<uint32_t>(bitrate));
  837. });
  838. return match;
  839. }
  840. static void aac_defaults(obs_data_t *settings)
  841. {
  842. obs_data_set_default_int(settings, "samplerate", 0); //match input
  843. obs_data_set_default_int(settings, "bitrate",
  844. find_matching_bitrate(128));
  845. obs_data_set_default_bool(settings, "allow he-aac", true);
  846. }
  847. template <typename Func>
  848. static bool query_property_raw(DStr &log, ca_encoder *ca,
  849. AudioFormatPropertyID property,
  850. const char *get_property_info, const char *get_property,
  851. AudioStreamBasicDescription &desc, Func &&func)
  852. {
  853. UInt32 size = 0;
  854. OSStatus code = AudioFormatGetPropertyInfo(property,
  855. sizeof(AudioStreamBasicDescription), &desc, &size);
  856. if (code) {
  857. log_to_dstr(log, ca, "%s: %s\n", get_property_info,
  858. osstatus_to_dstr(code)->array);
  859. return false;
  860. }
  861. if (!size) {
  862. log_to_dstr(log, ca, "%s returned 0 size\n", get_property_info);
  863. return false;
  864. }
  865. vector<uint8_t> buffer;
  866. try {
  867. buffer.resize(size);
  868. } catch (...) {
  869. log_to_dstr(log, ca, "Failed to allocate %u bytes for %s\n",
  870. static_cast<uint32_t>(size), get_property);
  871. return false;
  872. }
  873. code = AudioFormatGetProperty(property,
  874. sizeof(AudioStreamBasicDescription), &desc, &size,
  875. buffer.data());
  876. if (code) {
  877. log_to_dstr(log, ca, "%s: %s\n", get_property,
  878. osstatus_to_dstr(code)->array);
  879. return false;
  880. }
  881. func(size, static_cast<void*>(buffer.data()));
  882. return true;
  883. }
  884. #define EXPAND_PROPERTY_NAMES(x) x, \
  885. "AudioFormatGetPropertyInfo(" #x ")", \
  886. "AudioFormatGetProperty(" #x ")"
  887. template <typename Func>
  888. static bool enumerate_samplerates(DStr &log, ca_encoder *ca,
  889. AudioStreamBasicDescription &desc, Func &&func)
  890. {
  891. auto helper = [&](UInt32 size, void *data)
  892. {
  893. auto range = static_cast<AudioValueRange*>(data);
  894. size_t num_ranges = size / sizeof(AudioValueRange);
  895. for (size_t i = 0; i < num_ranges; i++)
  896. func(range[i]);
  897. };
  898. return query_property_raw(log, ca, EXPAND_PROPERTY_NAMES(
  899. kAudioFormatProperty_AvailableEncodeSampleRates),
  900. desc, helper);
  901. }
  902. #if 0
  903. // Unused because it returns bitrates that aren't actually usable, i.e.
  904. // Available bitrates vs Applicable bitrates
  905. template <typename Func>
  906. static bool enumerate_bitrates(DStr &log, ca_encoder *ca,
  907. AudioStreamBasicDescription &desc, Func &&func)
  908. {
  909. auto helper = [&](UInt32 size, void *data)
  910. {
  911. auto range = static_cast<AudioValueRange*>(data);
  912. size_t num_ranges = size / sizeof(AudioValueRange);
  913. for (size_t i = 0; i < num_ranges; i++)
  914. func(range[i]);
  915. };
  916. return query_property_raw(log, ca, EXPAND_PROPERTY_NAMES(
  917. kAudioFormatProperty_AvailableEncodeBitRates),
  918. desc, helper);
  919. }
  920. #endif
  921. static vector<UInt32> get_samplerates(DStr &log, ca_encoder *ca)
  922. {
  923. vector<UInt32> samplerates;
  924. auto handle_samplerate = [&](UInt32 rate)
  925. {
  926. if (find(begin(samplerates), end(samplerates), rate) ==
  927. end(samplerates)) {
  928. log_to_dstr(log, ca, "Adding sample rate %u\n",
  929. static_cast<uint32_t>(rate));
  930. samplerates.push_back(rate);
  931. } else {
  932. log_to_dstr(log, ca, "Sample rate %u already added\n",
  933. static_cast<uint32_t>(rate));
  934. }
  935. };
  936. auto helper = [&](const AudioValueRange &range)
  937. {
  938. auto min_ = static_cast<UInt32>(range.mMinimum);
  939. auto max_ = static_cast<UInt32>(range.mMaximum);
  940. handle_samplerate(min_);
  941. if (min_ == max_)
  942. return;
  943. log_to_dstr(log, ca, "Got actual sample rate range: %u<->%u\n",
  944. static_cast<uint32_t>(min_),
  945. static_cast<uint32_t>(max_));
  946. handle_samplerate(max_);
  947. };
  948. for (UInt32 format : (ca ? *ca->allowed_formats : aac_formats)) {
  949. log_to_dstr(log, ca, "Trying %s (0x%x)\n",
  950. format_id_to_str(format),
  951. static_cast<uint32_t>(format));
  952. auto asbd = asbd_builder()
  953. .format_id(format)
  954. .asbd;
  955. enumerate_samplerates(log, ca, asbd, helper);
  956. }
  957. return samplerates;
  958. }
  959. static void add_samplerates(obs_property_t *prop, ca_encoder *ca)
  960. {
  961. obs_property_list_add_int(prop,
  962. obs_module_text("UseInputSampleRate"), 0);
  963. DStr log;
  964. auto samplerates = get_samplerates(log, ca);
  965. if (!samplerates.size()) {
  966. CA_CO_DLOG_(LOG_ERROR, "Couldn't find available sample rates");
  967. return;
  968. }
  969. if (log->len)
  970. CA_CO_DLOG_(LOG_DEBUG, "Sample rate enumeration log");
  971. sort(begin(samplerates), end(samplerates));
  972. DStr buffer;
  973. for (UInt32 samplerate : samplerates) {
  974. dstr_printf(buffer, "%d", static_cast<uint32_t>(samplerate));
  975. obs_property_list_add_int(prop, buffer->array, samplerate);
  976. }
  977. }
  978. #define NBSP "\xC2\xA0"
  979. static vector<UInt32> get_bitrates(DStr &log, ca_encoder *ca,
  980. Float64 samplerate)
  981. {
  982. vector<UInt32> bitrates;
  983. auto handle_bitrate = [&](UInt32 bitrate)
  984. {
  985. if (find(begin(bitrates), end(bitrates), bitrate) ==
  986. end(bitrates)) {
  987. log_to_dstr(log, ca, "Adding bitrate %u\n",
  988. static_cast<uint32_t>(bitrate));
  989. bitrates.push_back(bitrate);
  990. } else {
  991. log_to_dstr(log, ca, "Bitrate %u already added\n",
  992. static_cast<uint32_t>(bitrate));
  993. }
  994. };
  995. auto helper = [&](UInt32 min_, UInt32 max_)
  996. {
  997. handle_bitrate(min_);
  998. if (min_ == max_)
  999. return;
  1000. log_to_dstr(log, ca, "Got actual bitrate range: %u<->%u\n",
  1001. static_cast<uint32_t>(min_),
  1002. static_cast<uint32_t>(max_));
  1003. handle_bitrate(max_);
  1004. };
  1005. for (UInt32 format_id : (ca ? *ca->allowed_formats : aac_formats)) {
  1006. log_to_dstr(log, ca, "Trying %s (0x%x) at %g" NBSP "hz\n",
  1007. format_id_to_str(format_id),
  1008. static_cast<uint32_t>(format_id),
  1009. samplerate);
  1010. auto out = get_default_out_asbd_builder()
  1011. .format_id(format_id)
  1012. .sample_rate(samplerate)
  1013. .asbd;
  1014. auto converter = get_converter(log, ca, out);
  1015. if (converter)
  1016. enumerate_bitrates(log, ca, converter.get(), helper);
  1017. }
  1018. return bitrates;
  1019. }
  1020. static void add_bitrates(obs_property_t *prop, ca_encoder *ca,
  1021. Float64 samplerate=44100., UInt32 *selected=nullptr)
  1022. {
  1023. obs_property_list_clear(prop);
  1024. DStr log;
  1025. auto bitrates = get_bitrates(log, ca, samplerate);
  1026. if (!bitrates.size()) {
  1027. CA_CO_DLOG_(LOG_ERROR, "Couldn't find available bitrates");
  1028. return;
  1029. }
  1030. if (log->len)
  1031. CA_CO_DLOG_(LOG_DEBUG, "Bitrate enumeration log");
  1032. bool selected_in_range = true;
  1033. if (selected) {
  1034. selected_in_range = find(begin(bitrates), end(bitrates),
  1035. *selected * 1000) != end(bitrates);
  1036. if (!selected_in_range)
  1037. bitrates.push_back(*selected * 1000);
  1038. }
  1039. sort(begin(bitrates), end(bitrates));
  1040. DStr buffer;
  1041. for (UInt32 bitrate : bitrates) {
  1042. dstr_printf(buffer, "%u", (uint32_t)bitrate / 1000);
  1043. size_t idx = obs_property_list_add_int(prop, buffer->array,
  1044. bitrate / 1000);
  1045. if (selected_in_range || bitrate / 1000 != *selected)
  1046. continue;
  1047. obs_property_list_item_disable(prop, idx, true);
  1048. }
  1049. }
  1050. static bool samplerate_updated(obs_properties_t *props, obs_property_t *prop,
  1051. obs_data_t *settings)
  1052. {
  1053. auto samplerate =
  1054. static_cast<UInt32>(obs_data_get_int(settings, "samplerate"));
  1055. if (!samplerate)
  1056. samplerate = 44100;
  1057. prop = obs_properties_get(props, "bitrate");
  1058. if (prop) {
  1059. auto bitrate = static_cast<UInt32>(
  1060. obs_data_get_int(settings, "bitrate"));
  1061. add_bitrates(prop, nullptr, samplerate, &bitrate);
  1062. return true;
  1063. }
  1064. return false;
  1065. }
  1066. static obs_properties_t *aac_properties(void *data)
  1067. {
  1068. ca_encoder *ca = static_cast<ca_encoder*>(data);
  1069. obs_properties_t *props = obs_properties_create();
  1070. obs_property_t *p = obs_properties_add_list(props, "samplerate",
  1071. obs_module_text("OutputSamplerate"),
  1072. OBS_COMBO_TYPE_LIST, OBS_COMBO_FORMAT_INT);
  1073. add_samplerates(p, ca);
  1074. obs_property_set_modified_callback(p, samplerate_updated);
  1075. p = obs_properties_add_list(props, "bitrate",
  1076. obs_module_text("Bitrate"),
  1077. OBS_COMBO_TYPE_LIST, OBS_COMBO_FORMAT_INT);
  1078. add_bitrates(p, ca);
  1079. obs_properties_add_bool(props, "allow he-aac",
  1080. obs_module_text("AllowHEAAC"));
  1081. return props;
  1082. }
  1083. OBS_DECLARE_MODULE()
  1084. OBS_MODULE_USE_DEFAULT_LOCALE("coreaudio-encoder", "en-US")
  1085. bool obs_module_load(void)
  1086. {
  1087. #ifdef _WIN32
  1088. if (!load_core_audio()) {
  1089. CA_LOG(LOG_WARNING, "CoreAudio AAC encoder not installed on "
  1090. "the system or couldn't be loaded");
  1091. return true;
  1092. }
  1093. CA_LOG(LOG_INFO, "Adding CoreAudio AAC encoder");
  1094. #endif
  1095. struct obs_encoder_info aac_info{};
  1096. aac_info.id = "CoreAudio_AAC";
  1097. aac_info.type = OBS_ENCODER_AUDIO;
  1098. aac_info.codec = "AAC";
  1099. aac_info.get_name = aac_get_name;
  1100. aac_info.destroy = aac_destroy;
  1101. aac_info.create = aac_create;
  1102. aac_info.encode = aac_encode;
  1103. aac_info.get_frame_size = aac_frame_size;
  1104. aac_info.get_audio_info = aac_audio_info;
  1105. aac_info.get_extra_data = aac_extra_data;
  1106. aac_info.get_defaults = aac_defaults;
  1107. aac_info.get_properties = aac_properties;
  1108. obs_register_encoder(&aac_info);
  1109. return true;
  1110. }
  1111. #ifdef _WIN32
  1112. void obs_module_unload(void)
  1113. {
  1114. unload_core_audio();
  1115. }
  1116. #endif