obs-ffmpeg-audio-encoders.c 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562
  1. /******************************************************************************
  2. Copyright (C) 2023 by Lain Bailey <[email protected]>
  3. This program is free software: you can redistribute it and/or modify
  4. it under the terms of the GNU General Public License as published by
  5. the Free Software Foundation, either version 2 of the License, or
  6. (at your option) any later version.
  7. This program is distributed in the hope that it will be useful,
  8. but WITHOUT ANY WARRANTY; without even the implied warranty of
  9. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  10. GNU General Public License for more details.
  11. You should have received a copy of the GNU General Public License
  12. along with this program. If not, see <http://www.gnu.org/licenses/>.
  13. ******************************************************************************/
  14. #include <util/base.h>
  15. #include <util/deque.h>
  16. #include <util/darray.h>
  17. #include <util/dstr.h>
  18. #include <obs-module.h>
  19. #include <libavutil/channel_layout.h>
  20. #include <libavformat/avformat.h>
  21. #include "obs-ffmpeg-formats.h"
  22. #include "obs-ffmpeg-compat.h"
  23. #define do_log(level, format, ...) \
  24. blog(level, "[FFmpeg %s encoder: '%s'] " format, enc->type, obs_encoder_get_name(enc->encoder), ##__VA_ARGS__)
  25. #define warn(format, ...) do_log(LOG_WARNING, format, ##__VA_ARGS__)
  26. #define info(format, ...) do_log(LOG_INFO, format, ##__VA_ARGS__)
  27. #define debug(format, ...) do_log(LOG_DEBUG, format, ##__VA_ARGS__)
  28. struct enc_encoder {
  29. obs_encoder_t *encoder;
  30. const char *type;
  31. const AVCodec *codec;
  32. AVCodecContext *context;
  33. uint8_t *samples[MAX_AV_PLANES];
  34. AVFrame *aframe;
  35. int64_t total_samples;
  36. DARRAY(uint8_t) packet_buffer;
  37. size_t audio_planes;
  38. size_t audio_size;
  39. int frame_size; /* pretty much always 1024 for AAC */
  40. int frame_size_bytes;
  41. };
  42. static const char *aac_getname(void *unused)
  43. {
  44. UNUSED_PARAMETER(unused);
  45. return obs_module_text("FFmpegAAC");
  46. }
  47. static const char *opus_getname(void *unused)
  48. {
  49. UNUSED_PARAMETER(unused);
  50. return obs_module_text("FFmpegOpus");
  51. }
  52. static const char *pcm_getname(void *unused)
  53. {
  54. UNUSED_PARAMETER(unused);
  55. return obs_module_text("FFmpegPCM16Bit");
  56. }
  57. static const char *pcm24_getname(void *unused)
  58. {
  59. UNUSED_PARAMETER(unused);
  60. return obs_module_text("FFmpegPCM24Bit");
  61. }
  62. static const char *pcm32_getname(void *unused)
  63. {
  64. UNUSED_PARAMETER(unused);
  65. return obs_module_text("FFmpegPCM32BitFloat");
  66. }
  67. static const char *alac_getname(void *unused)
  68. {
  69. UNUSED_PARAMETER(unused);
  70. return obs_module_text("FFmpegALAC");
  71. }
  72. static const char *flac_getname(void *unused)
  73. {
  74. UNUSED_PARAMETER(unused);
  75. return obs_module_text("FFmpegFLAC");
  76. }
  77. static void enc_destroy(void *data)
  78. {
  79. struct enc_encoder *enc = data;
  80. if (enc->samples[0])
  81. av_freep(&enc->samples[0]);
  82. if (enc->context)
  83. avcodec_free_context(&enc->context);
  84. if (enc->aframe)
  85. av_frame_free(&enc->aframe);
  86. da_free(enc->packet_buffer);
  87. bfree(enc);
  88. }
  89. static bool initialize_codec(struct enc_encoder *enc)
  90. {
  91. int ret;
  92. int channels;
  93. enc->aframe = av_frame_alloc();
  94. if (!enc->aframe) {
  95. warn("Failed to allocate audio frame");
  96. return false;
  97. }
  98. ret = avcodec_open2(enc->context, enc->codec, NULL);
  99. if (ret < 0) {
  100. struct dstr error_message = {0};
  101. dstr_printf(&error_message, "Failed to open AAC codec: %s", av_err2str(ret));
  102. obs_encoder_set_last_error(enc->encoder, error_message.array);
  103. dstr_free(&error_message);
  104. warn("Failed to open AAC codec: %s", av_err2str(ret));
  105. return false;
  106. }
  107. enc->aframe->format = enc->context->sample_fmt;
  108. channels = enc->context->ch_layout.nb_channels;
  109. enc->aframe->ch_layout = enc->context->ch_layout;
  110. enc->aframe->sample_rate = enc->context->sample_rate;
  111. enc->frame_size = enc->context->frame_size;
  112. if (!enc->frame_size)
  113. enc->frame_size = 1024;
  114. enc->frame_size_bytes = enc->frame_size * (int)enc->audio_size;
  115. ret = av_samples_alloc(enc->samples, NULL, channels, enc->frame_size, enc->context->sample_fmt, 0);
  116. if (ret < 0) {
  117. warn("Failed to create audio buffer: %s", av_err2str(ret));
  118. return false;
  119. }
  120. return true;
  121. }
  122. static void init_sizes(struct enc_encoder *enc, audio_t *audio)
  123. {
  124. const struct audio_output_info *aoi;
  125. enum audio_format format;
  126. aoi = audio_output_get_info(audio);
  127. format = convert_ffmpeg_sample_format(enc->context->sample_fmt);
  128. enc->audio_planes = get_audio_planes(format, aoi->speakers);
  129. enc->audio_size = get_audio_size(format, aoi->speakers, 1);
  130. }
  131. #ifndef MIN
  132. #define MIN(x, y) ((x) < (y) ? (x) : (y))
  133. #endif
  134. static void *enc_create(obs_data_t *settings, obs_encoder_t *encoder, const char *type, const char *alt,
  135. enum AVSampleFormat sample_format)
  136. {
  137. struct enc_encoder *enc;
  138. int bitrate = (int)obs_data_get_int(settings, "bitrate");
  139. audio_t *audio = obs_encoder_audio(encoder);
  140. enc = bzalloc(sizeof(struct enc_encoder));
  141. enc->encoder = encoder;
  142. enc->codec = avcodec_find_encoder_by_name(type);
  143. enc->type = type;
  144. if (!enc->codec && alt) {
  145. enc->codec = avcodec_find_encoder_by_name(alt);
  146. enc->type = alt;
  147. }
  148. blog(LOG_INFO, "---------------------------------");
  149. if (!enc->codec) {
  150. warn("Couldn't find encoder");
  151. goto fail;
  152. }
  153. const AVCodecDescriptor *codec_desc = avcodec_descriptor_get(enc->codec->id);
  154. if (!codec_desc) {
  155. warn("Failed to get codec descriptor");
  156. goto fail;
  157. }
  158. if (!bitrate && !(codec_desc->props & AV_CODEC_PROP_LOSSLESS)) {
  159. warn("Invalid bitrate specified");
  160. goto fail;
  161. }
  162. enc->context = avcodec_alloc_context3(enc->codec);
  163. if (!enc->context) {
  164. warn("Failed to create codec context");
  165. goto fail;
  166. }
  167. if (codec_desc->props & AV_CODEC_PROP_LOSSLESS)
  168. // Set by encoder on init, not known at this time
  169. enc->context->bit_rate = 0;
  170. else
  171. enc->context->bit_rate = bitrate * 1000;
  172. const struct audio_output_info *aoi;
  173. aoi = audio_output_get_info(audio);
  174. av_channel_layout_default(&enc->context->ch_layout, (int)audio_output_get_channels(audio));
  175. /* The avutil default channel layout for 5 channels is 5.0, which OBS
  176. * does not support. Manually set 5 channels to 4.1. */
  177. if (aoi->speakers == SPEAKERS_4POINT1)
  178. enc->context->ch_layout = (AVChannelLayout)AV_CHANNEL_LAYOUT_4POINT1;
  179. /* AAC, ALAC, & FLAC default to 3.0 for 3 channels instead of 2.1.
  180. * Tell the encoder to deal with 2.1 as if it were 3.0. */
  181. if (aoi->speakers == SPEAKERS_2POINT1)
  182. enc->context->ch_layout = (AVChannelLayout)AV_CHANNEL_LAYOUT_SURROUND;
  183. // ALAC supports 7.1 wide instead of regular 7.1.
  184. if (aoi->speakers == SPEAKERS_7POINT1 && astrcmpi(enc->type, "alac") == 0)
  185. enc->context->ch_layout = (AVChannelLayout)AV_CHANNEL_LAYOUT_7POINT1_WIDE_BACK;
  186. enc->context->sample_rate = audio_output_get_sample_rate(audio);
  187. const enum AVSampleFormat *sample_fmts = NULL;
  188. const int *supported_samplerates = NULL;
  189. #if LIBAVCODEC_VERSION_INT < AV_VERSION_INT(61, 13, 100)
  190. sample_fmts = enc->codec->sample_fmts;
  191. supported_samplerates = enc->codec->supported_samplerates;
  192. #else
  193. avcodec_get_supported_config(enc->context, enc->codec, AV_CODEC_CONFIG_SAMPLE_FORMAT, 0,
  194. (const void **)&sample_fmts, NULL);
  195. avcodec_get_supported_config(enc->context, enc->codec, AV_CODEC_CONFIG_SAMPLE_RATE, 0,
  196. (const void **)&supported_samplerates, NULL);
  197. #endif
  198. if (sample_fmts) {
  199. /* Check if the requested format is actually available for the specified
  200. * encoder. This may not always be the case due to FFmpeg changes or a
  201. * fallback being used (for example, when libopus is unavailable). */
  202. const enum AVSampleFormat *fmt = sample_fmts;
  203. while (*fmt != AV_SAMPLE_FMT_NONE) {
  204. if (*fmt == sample_format) {
  205. enc->context->sample_fmt = *fmt;
  206. break;
  207. }
  208. fmt++;
  209. }
  210. /* Fall back to default if requested format was not found. */
  211. if (enc->context->sample_fmt == AV_SAMPLE_FMT_NONE)
  212. enc->context->sample_fmt = sample_fmts[0];
  213. } else {
  214. /* Fall back to planar float if codec does not specify formats. */
  215. enc->context->sample_fmt = AV_SAMPLE_FMT_FLTP;
  216. }
  217. /* check to make sure sample rate is supported */
  218. if (supported_samplerates) {
  219. const int *rate = supported_samplerates;
  220. int cur_rate = enc->context->sample_rate;
  221. int closest = 0;
  222. while (*rate) {
  223. int dist = abs(cur_rate - *rate);
  224. int closest_dist = abs(cur_rate - closest);
  225. if (dist < closest_dist)
  226. closest = *rate;
  227. rate++;
  228. }
  229. if (closest)
  230. enc->context->sample_rate = closest;
  231. }
  232. char buf[256];
  233. av_channel_layout_describe(&enc->context->ch_layout, buf, 256);
  234. info("bitrate: %" PRId64 ", channels: %d, channel_layout: %s, track: %d\n",
  235. (int64_t)enc->context->bit_rate / 1000, (int)enc->context->ch_layout.nb_channels, buf,
  236. (int)obs_encoder_get_mixer_index(enc->encoder) + 1);
  237. init_sizes(enc, audio);
  238. /* enable experimental FFmpeg encoder if the only one available */
  239. enc->context->strict_std_compliance = -2;
  240. enc->context->flags = AV_CODEC_FLAG_GLOBAL_HEADER;
  241. if (initialize_codec(enc))
  242. return enc;
  243. fail:
  244. enc_destroy(enc);
  245. return NULL;
  246. }
  247. static void *aac_create(obs_data_t *settings, obs_encoder_t *encoder)
  248. {
  249. return enc_create(settings, encoder, "aac", NULL, AV_SAMPLE_FMT_NONE);
  250. }
  251. static void *opus_create(obs_data_t *settings, obs_encoder_t *encoder)
  252. {
  253. return enc_create(settings, encoder, "libopus", "opus", AV_SAMPLE_FMT_FLT);
  254. }
  255. static void *pcm_create(obs_data_t *settings, obs_encoder_t *encoder)
  256. {
  257. return enc_create(settings, encoder, "pcm_s16le", NULL, AV_SAMPLE_FMT_NONE);
  258. }
  259. static void *pcm24_create(obs_data_t *settings, obs_encoder_t *encoder)
  260. {
  261. return enc_create(settings, encoder, "pcm_s24le", NULL, AV_SAMPLE_FMT_NONE);
  262. }
  263. static void *pcm32_create(obs_data_t *settings, obs_encoder_t *encoder)
  264. {
  265. return enc_create(settings, encoder, "pcm_f32le", NULL, AV_SAMPLE_FMT_NONE);
  266. }
  267. static void *alac_create(obs_data_t *settings, obs_encoder_t *encoder)
  268. {
  269. return enc_create(settings, encoder, "alac", NULL, AV_SAMPLE_FMT_S32P);
  270. }
  271. static void *flac_create(obs_data_t *settings, obs_encoder_t *encoder)
  272. {
  273. return enc_create(settings, encoder, "flac", NULL, AV_SAMPLE_FMT_S16);
  274. }
  275. static bool do_encode(struct enc_encoder *enc, struct encoder_packet *packet, bool *received_packet)
  276. {
  277. AVRational time_base = {1, enc->context->sample_rate};
  278. AVPacket avpacket = {0};
  279. int got_packet;
  280. int ret;
  281. int channels;
  282. enc->aframe->nb_samples = enc->frame_size;
  283. enc->aframe->pts =
  284. av_rescale_q(enc->total_samples, (AVRational){1, enc->context->sample_rate}, enc->context->time_base);
  285. enc->aframe->ch_layout = enc->context->ch_layout;
  286. channels = enc->context->ch_layout.nb_channels;
  287. ret = avcodec_fill_audio_frame(enc->aframe, channels, enc->context->sample_fmt, enc->samples[0],
  288. enc->frame_size_bytes * channels, 1);
  289. if (ret < 0) {
  290. warn("avcodec_fill_audio_frame failed: %s", av_err2str(ret));
  291. return false;
  292. }
  293. enc->total_samples += enc->frame_size;
  294. ret = avcodec_send_frame(enc->context, enc->aframe);
  295. if (ret == 0)
  296. ret = avcodec_receive_packet(enc->context, &avpacket);
  297. got_packet = (ret == 0);
  298. if (ret == AVERROR_EOF || ret == AVERROR(EAGAIN))
  299. ret = 0;
  300. if (ret < 0) {
  301. warn("avcodec_encode_audio2 failed: %s", av_err2str(ret));
  302. return false;
  303. }
  304. *received_packet = !!got_packet;
  305. if (!got_packet)
  306. return true;
  307. da_resize(enc->packet_buffer, 0);
  308. da_push_back_array(enc->packet_buffer, avpacket.data, avpacket.size);
  309. packet->pts = rescale_ts(avpacket.pts, enc->context, time_base);
  310. packet->dts = rescale_ts(avpacket.dts, enc->context, time_base);
  311. packet->data = enc->packet_buffer.array;
  312. packet->size = avpacket.size;
  313. packet->type = OBS_ENCODER_AUDIO;
  314. packet->keyframe = true;
  315. packet->timebase_num = 1;
  316. packet->timebase_den = (int32_t)enc->context->sample_rate;
  317. av_packet_unref(&avpacket);
  318. return true;
  319. }
  320. static bool enc_encode(void *data, struct encoder_frame *frame, struct encoder_packet *packet, bool *received_packet)
  321. {
  322. struct enc_encoder *enc = data;
  323. for (size_t i = 0; i < enc->audio_planes; i++)
  324. memcpy(enc->samples[i], frame->data[i], enc->frame_size_bytes);
  325. return do_encode(enc, packet, received_packet);
  326. }
  327. static void enc_defaults(obs_data_t *settings)
  328. {
  329. obs_data_set_default_int(settings, "bitrate", 128);
  330. }
  331. static obs_properties_t *enc_properties(void *unused)
  332. {
  333. UNUSED_PARAMETER(unused);
  334. obs_properties_t *props = obs_properties_create();
  335. obs_properties_add_int(props, "bitrate", obs_module_text("Bitrate"), 64, 1024, 32);
  336. return props;
  337. }
  338. static bool enc_extra_data(void *data, uint8_t **extra_data, size_t *size)
  339. {
  340. struct enc_encoder *enc = data;
  341. *extra_data = enc->context->extradata;
  342. *size = enc->context->extradata_size;
  343. return true;
  344. }
  345. static void enc_audio_info(void *data, struct audio_convert_info *info)
  346. {
  347. struct enc_encoder *enc = data;
  348. int channels;
  349. channels = enc->context->ch_layout.nb_channels;
  350. info->format = convert_ffmpeg_sample_format(enc->context->sample_fmt);
  351. info->samples_per_sec = (uint32_t)enc->context->sample_rate;
  352. if (channels != 7 && channels <= 8)
  353. info->speakers = (enum speaker_layout)(channels);
  354. else
  355. info->speakers = SPEAKERS_UNKNOWN;
  356. }
  357. static void enc_audio_info_float(void *data, struct audio_convert_info *info)
  358. {
  359. enc_audio_info(data, info);
  360. info->allow_clipping = true;
  361. }
  362. static size_t enc_frame_size(void *data)
  363. {
  364. struct enc_encoder *enc = data;
  365. return enc->frame_size;
  366. }
  367. struct obs_encoder_info aac_encoder_info = {
  368. .id = "ffmpeg_aac",
  369. .type = OBS_ENCODER_AUDIO,
  370. .codec = "aac",
  371. .get_name = aac_getname,
  372. .create = aac_create,
  373. .destroy = enc_destroy,
  374. .encode = enc_encode,
  375. .get_frame_size = enc_frame_size,
  376. .get_defaults = enc_defaults,
  377. .get_properties = enc_properties,
  378. .get_extra_data = enc_extra_data,
  379. .get_audio_info = enc_audio_info,
  380. };
  381. struct obs_encoder_info opus_encoder_info = {
  382. .id = "ffmpeg_opus",
  383. .type = OBS_ENCODER_AUDIO,
  384. .codec = "opus",
  385. .get_name = opus_getname,
  386. .create = opus_create,
  387. .destroy = enc_destroy,
  388. .encode = enc_encode,
  389. .get_frame_size = enc_frame_size,
  390. .get_defaults = enc_defaults,
  391. .get_properties = enc_properties,
  392. .get_extra_data = enc_extra_data,
  393. .get_audio_info = enc_audio_info,
  394. };
  395. struct obs_encoder_info pcm_encoder_info = {
  396. .id = "ffmpeg_pcm_s16le",
  397. .type = OBS_ENCODER_AUDIO,
  398. .codec = "pcm_s16le",
  399. .get_name = pcm_getname,
  400. .create = pcm_create,
  401. .destroy = enc_destroy,
  402. .encode = enc_encode,
  403. .get_frame_size = enc_frame_size,
  404. .get_defaults = enc_defaults,
  405. .get_properties = enc_properties,
  406. .get_extra_data = enc_extra_data,
  407. .get_audio_info = enc_audio_info,
  408. };
  409. struct obs_encoder_info pcm24_encoder_info = {
  410. .id = "ffmpeg_pcm_s24le",
  411. .type = OBS_ENCODER_AUDIO,
  412. .codec = "pcm_s24le",
  413. .get_name = pcm24_getname,
  414. .create = pcm24_create,
  415. .destroy = enc_destroy,
  416. .encode = enc_encode,
  417. .get_frame_size = enc_frame_size,
  418. .get_defaults = enc_defaults,
  419. .get_properties = enc_properties,
  420. .get_extra_data = enc_extra_data,
  421. .get_audio_info = enc_audio_info,
  422. };
  423. struct obs_encoder_info pcm32_encoder_info = {
  424. .id = "ffmpeg_pcm_f32le",
  425. .type = OBS_ENCODER_AUDIO,
  426. .codec = "pcm_f32le",
  427. .get_name = pcm32_getname,
  428. .create = pcm32_create,
  429. .destroy = enc_destroy,
  430. .encode = enc_encode,
  431. .get_frame_size = enc_frame_size,
  432. .get_defaults = enc_defaults,
  433. .get_properties = enc_properties,
  434. .get_extra_data = enc_extra_data,
  435. .get_audio_info = enc_audio_info_float,
  436. };
  437. struct obs_encoder_info alac_encoder_info = {
  438. .id = "ffmpeg_alac",
  439. .type = OBS_ENCODER_AUDIO,
  440. .codec = "alac",
  441. .get_name = alac_getname,
  442. .create = alac_create,
  443. .destroy = enc_destroy,
  444. .encode = enc_encode,
  445. .get_frame_size = enc_frame_size,
  446. .get_defaults = enc_defaults,
  447. .get_properties = enc_properties,
  448. .get_extra_data = enc_extra_data,
  449. .get_audio_info = enc_audio_info,
  450. };
  451. struct obs_encoder_info flac_encoder_info = {
  452. .id = "ffmpeg_flac",
  453. .type = OBS_ENCODER_AUDIO,
  454. .codec = "flac",
  455. .get_name = flac_getname,
  456. .create = flac_create,
  457. .destroy = enc_destroy,
  458. .encode = enc_encode,
  459. .get_frame_size = enc_frame_size,
  460. .get_defaults = enc_defaults,
  461. .get_properties = enc_properties,
  462. .get_extra_data = enc_extra_data,
  463. .get_audio_info = enc_audio_info,
  464. };