| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463 | #include "../../media-io/audio-resampler.h"#include "../../util/deque.h"#include "../../util/platform.h"#include "../../util/darray.h"#include "../../util/util_uint64.h"#include "../../obs-internal.h"#include "wasapi-output.h"#define ACTUALLY_DEFINE_GUID(name, l, w1, w2, b1, b2, b3, b4, b5, b6, b7, b8) \	EXTERN_C const GUID DECLSPEC_SELECTANY name = {l, w1, w2, {b1, b2, b3, b4, b5, b6, b7, b8}}#define do_log(level, format, ...) \	blog(level, "[audio monitoring: '%s'] " format, obs_source_get_name(monitor->source), ##__VA_ARGS__)#define warn(format, ...) do_log(LOG_WARNING, format, ##__VA_ARGS__)#define info(format, ...) do_log(LOG_INFO, format, ##__VA_ARGS__)#define debug(format, ...) do_log(LOG_DEBUG, format, ##__VA_ARGS__)ACTUALLY_DEFINE_GUID(CLSID_MMDeviceEnumerator, 0xBCDE0395, 0xE52F, 0x467C, 0x8E, 0x3D, 0xC4, 0x57, 0x92, 0x91, 0x69,		     0x2E);ACTUALLY_DEFINE_GUID(IID_IMMDeviceEnumerator, 0xA95664D2, 0x9614, 0x4F35, 0xA7, 0x46, 0xDE, 0x8D, 0xB6, 0x36, 0x17,		     0xE6);ACTUALLY_DEFINE_GUID(IID_IAudioClient, 0x1CB9AD4C, 0xDBFA, 0x4C32, 0xB1, 0x78, 0xC2, 0xF5, 0x68, 0xA7, 0x03, 0xB2);ACTUALLY_DEFINE_GUID(IID_IAudioRenderClient, 0xF294ACFC, 0x3146, 0x4483, 0xA7, 0xBF, 0xAD, 0xDC, 0xA7, 0xC2, 0x60,		     0xE2);struct audio_monitor {	obs_source_t *source;	IAudioClient *client;	IAudioRenderClient *render;	uint64_t last_recv_time;	uint64_t prev_video_ts;	uint64_t time_since_prev;	audio_resampler_t *resampler;	uint32_t sample_rate;	uint32_t channels;	bool source_has_video;	bool ignore;	int64_t lowest_audio_offset;	struct deque delay_buffer;	uint32_t delay_size;	DARRAY(float) buf;	SRWLOCK playback_mutex;};/* #define DEBUG_AUDIO */static bool process_audio_delay(struct audio_monitor *monitor, float **data, uint32_t *frames, uint64_t ts,				uint32_t pad){	obs_source_t *s = monitor->source;	uint64_t last_frame_ts = s->last_frame_ts;	uint64_t cur_time = os_gettime_ns();	uint64_t front_ts;	uint64_t cur_ts;	int64_t diff;	uint32_t blocksize = monitor->channels * sizeof(float);	/* cut off audio if long-since leftover audio in delay buffer */	if (cur_time - monitor->last_recv_time > 1000000000)		deque_free(&monitor->delay_buffer);	monitor->last_recv_time = cur_time;	ts += monitor->source->sync_offset;	deque_push_back(&monitor->delay_buffer, &ts, sizeof(ts));	deque_push_back(&monitor->delay_buffer, frames, sizeof(*frames));	deque_push_back(&monitor->delay_buffer, *data, *frames * blocksize);	if (!monitor->prev_video_ts) {		monitor->prev_video_ts = last_frame_ts;	} else if (monitor->prev_video_ts == last_frame_ts) {		monitor->time_since_prev += util_mul_div64(*frames, 1000000000ULL, monitor->sample_rate);	} else {		monitor->time_since_prev = 0;	}	while (monitor->delay_buffer.size != 0) {		size_t size;		bool bad_diff;		deque_peek_front(&monitor->delay_buffer, &cur_ts, sizeof(ts));		front_ts = cur_ts - util_mul_div64(pad, 1000000000ULL, monitor->sample_rate);		diff = (int64_t)front_ts - (int64_t)last_frame_ts;		bad_diff = !last_frame_ts || llabs(diff) > 5000000000 || monitor->time_since_prev > 100000000ULL;		/* delay audio if rushing */		if (!bad_diff && diff > 75000000) {#ifdef DEBUG_AUDIO			blog(LOG_INFO,			     "audio rushing, cutting audio, "			     "diff: %lld, delay buffer size: %lu, "			     "v: %llu: a: %llu",			     diff, (int)monitor->delay_buffer.size, last_frame_ts, front_ts);#endif			return false;		}		deque_pop_front(&monitor->delay_buffer, NULL, sizeof(ts));		deque_pop_front(&monitor->delay_buffer, frames, sizeof(*frames));		size = *frames * blocksize;		da_resize(monitor->buf, size);		deque_pop_front(&monitor->delay_buffer, monitor->buf.array, size);		/* cut audio if dragging */		if (!bad_diff && diff < -75000000 && monitor->delay_buffer.size > 0) {#ifdef DEBUG_AUDIO			blog(LOG_INFO,			     "audio dragging, cutting audio, "			     "diff: %lld, delay buffer size: %lu, "			     "v: %llu: a: %llu",			     diff, (int)monitor->delay_buffer.size, last_frame_ts, front_ts);#endif			continue;		}		*data = monitor->buf.array;		return true;	}	return false;}static enum speaker_layout convert_speaker_layout(DWORD layout, WORD channels){	switch (layout) {	case KSAUDIO_SPEAKER_2POINT1:		return SPEAKERS_2POINT1;	case KSAUDIO_SPEAKER_SURROUND:		return SPEAKERS_4POINT0;	case KSAUDIO_SPEAKER_4POINT1:		return SPEAKERS_4POINT1;	case KSAUDIO_SPEAKER_5POINT1:		return SPEAKERS_5POINT1;	case KSAUDIO_SPEAKER_7POINT1:		return SPEAKERS_7POINT1;	}	return (enum speaker_layout)channels;}static bool audio_monitor_init_wasapi(struct audio_monitor *monitor){	bool success = false;	IMMDeviceEnumerator *immde = NULL;	WAVEFORMATEX *wfex = NULL;	UINT32 frames;	HRESULT hr;	/* ------------------------------------------ *	 * Init device                                */	hr = CoCreateInstance(&CLSID_MMDeviceEnumerator, NULL, CLSCTX_ALL, &IID_IMMDeviceEnumerator, (void **)&immde);	if (FAILED(hr)) {		warn("%s: Failed to create IMMDeviceEnumerator: %08lX", __FUNCTION__, hr);		return false;	}	IMMDevice *device = NULL;	const char *const id = obs->audio.monitoring_device_id;	if (strcmp(id, "default") == 0) {		hr = immde->lpVtbl->GetDefaultAudioEndpoint(immde, eRender, eConsole, &device);	} else {		wchar_t w_id[512];		os_utf8_to_wcs(id, 0, w_id, 512);		hr = immde->lpVtbl->GetDevice(immde, w_id, &device);	}	if (FAILED(hr)) {		warn("%s: Failed to get device: %08lX", __FUNCTION__, hr);		goto fail;	}	/* ------------------------------------------ *	 * Init client                                */	hr = device->lpVtbl->Activate(device, &IID_IAudioClient, CLSCTX_ALL, NULL, (void **)&monitor->client);	device->lpVtbl->Release(device);	if (FAILED(hr)) {		warn("%s: Failed to activate device: %08lX", __FUNCTION__, hr);		goto fail;	}	hr = monitor->client->lpVtbl->GetMixFormat(monitor->client, &wfex);	if (FAILED(hr)) {		warn("%s: Failed to get mix format: %08lX", __FUNCTION__, hr);		goto fail;	}	hr = monitor->client->lpVtbl->Initialize(monitor->client, AUDCLNT_SHAREMODE_SHARED, 0, 10000000, 0, wfex, NULL);	if (FAILED(hr)) {		warn("%s: Failed to initialize: %08lX", __FUNCTION__, hr);		goto fail;	}	/* ------------------------------------------ *	 * Init resampler                             */	const struct audio_output_info *info = audio_output_get_info(obs->audio.audio);	WAVEFORMATEXTENSIBLE *ext = (WAVEFORMATEXTENSIBLE *)wfex;	struct resample_info from;	struct resample_info to;	from.samples_per_sec = info->samples_per_sec;	from.speakers = info->speakers;	from.format = AUDIO_FORMAT_FLOAT_PLANAR;	to.samples_per_sec = (uint32_t)wfex->nSamplesPerSec;	to.speakers = convert_speaker_layout(ext->dwChannelMask, wfex->nChannels);	to.format = AUDIO_FORMAT_FLOAT;	monitor->sample_rate = (uint32_t)wfex->nSamplesPerSec;	monitor->channels = wfex->nChannels;	monitor->resampler = audio_resampler_create(&to, &from);	if (!monitor->resampler) {		goto fail;	}	/* ------------------------------------------ *	 * Init client                                */	hr = monitor->client->lpVtbl->GetBufferSize(monitor->client, &frames);	if (FAILED(hr)) {		warn("%s: Failed to get buffer size: %08lX", __FUNCTION__, hr);		goto fail;	}	hr = monitor->client->lpVtbl->GetService(monitor->client, &IID_IAudioRenderClient, (void **)&monitor->render);	if (FAILED(hr)) {		warn("%s: Failed to get IAudioRenderClient: %08lX", __FUNCTION__, hr);		goto fail;	}	hr = monitor->client->lpVtbl->Start(monitor->client);	if (FAILED(hr)) {		warn("%s: Failed to start audio: %08lX", __FUNCTION__, hr);		goto fail;	}	success = true;fail:	safe_release(immde);	if (wfex)		CoTaskMemFree(wfex);	return success;}static void audio_monitor_free_for_reconnect(struct audio_monitor *monitor){	if (monitor->client)		monitor->client->lpVtbl->Stop(monitor->client);	if (monitor->render) {		monitor->render->lpVtbl->Release(monitor->render);		monitor->render = NULL;	}	if (monitor->client) {		monitor->client->lpVtbl->Stop(monitor->client);		monitor->client->lpVtbl->Release(monitor->client);		monitor->client = NULL;	}	audio_resampler_destroy(monitor->resampler);	monitor->resampler = NULL;	deque_free(&monitor->delay_buffer);	da_free(monitor->buf);}static void on_audio_playback(void *param, obs_source_t *source, const struct audio_data *audio_data, bool muted){	struct audio_monitor *monitor = param;	uint8_t *resample_data[MAX_AV_PLANES];	float vol = source->user_volume;	uint32_t resample_frames;	uint64_t ts_offset;	bool success;	BYTE *output;	if (!TryAcquireSRWLockExclusive(&monitor->playback_mutex)) {		return;	}	if (os_atomic_load_long(&source->activate_refs) == 0) {		goto unlock;	}	if (!monitor->client && !audio_monitor_init_wasapi(monitor)) {		goto free_for_reconnect;	}	success = audio_resampler_resample(monitor->resampler, resample_data, &resample_frames, &ts_offset,					   (const uint8_t *const *)audio_data->data, (uint32_t)audio_data->frames);	if (!success) {		goto unlock;	}	UINT32 pad = 0;	HRESULT hr = monitor->client->lpVtbl->GetCurrentPadding(monitor->client, &pad);	if (FAILED(hr)) {		goto free_for_reconnect;	}	bool decouple_audio = source->async_unbuffered && source->async_decoupled;	if (monitor->source_has_video && !decouple_audio) {		uint64_t ts = audio_data->timestamp - ts_offset;		if (!process_audio_delay(monitor, (float **)(&resample_data[0]), &resample_frames, ts, pad)) {			goto unlock;		}	}	IAudioRenderClient *const render = monitor->render;	hr = render->lpVtbl->GetBuffer(render, resample_frames, &output);	if (FAILED(hr)) {		goto free_for_reconnect;	}	if (!muted) {		/* apply volume */		if (!close_float(vol, 1.0f, EPSILON)) {			register float *cur = (float *)resample_data[0];			register float *end = cur + resample_frames * monitor->channels;			while (cur < end)				*(cur++) *= vol;		}		memcpy(output, resample_data[0], resample_frames * monitor->channels * sizeof(float));	}	hr = render->lpVtbl->ReleaseBuffer(render, resample_frames, muted ? AUDCLNT_BUFFERFLAGS_SILENT : 0);	if (FAILED(hr)) {		goto free_for_reconnect;	}	goto unlock;free_for_reconnect:	audio_monitor_free_for_reconnect(monitor);unlock:	ReleaseSRWLockExclusive(&monitor->playback_mutex);}static inline void audio_monitor_free(struct audio_monitor *monitor){	if (monitor->ignore)		return;	if (monitor->source) {		obs_source_remove_audio_capture_callback(monitor->source, on_audio_playback, monitor);	}	if (monitor->client)		monitor->client->lpVtbl->Stop(monitor->client);	safe_release(monitor->client);	safe_release(monitor->render);	audio_resampler_destroy(monitor->resampler);	deque_free(&monitor->delay_buffer);	da_free(monitor->buf);}extern bool devices_match(const char *id1, const char *id2);static bool audio_monitor_init(struct audio_monitor *monitor, obs_source_t *source){	monitor->source = source;	const char *id = obs->audio.monitoring_device_id;	if (!id) {		warn("%s: No device ID set", __FUNCTION__);		return false;	}	if (source->info.output_flags & OBS_SOURCE_DO_NOT_SELF_MONITOR) {		obs_data_t *s = obs_source_get_settings(source);		const char *s_dev_id = obs_data_get_string(s, "device_id");		bool match = devices_match(s_dev_id, id);		obs_data_release(s);		if (match) {			monitor->ignore = true;			return true;		}	}	InitializeSRWLock(&monitor->playback_mutex);	return audio_monitor_init_wasapi(monitor);}static void audio_monitor_init_final(struct audio_monitor *monitor){	if (monitor->ignore)		return;	monitor->source_has_video = (monitor->source->info.output_flags & OBS_SOURCE_VIDEO) != 0;	obs_source_add_audio_capture_callback(monitor->source, on_audio_playback, monitor);}struct audio_monitor *audio_monitor_create(obs_source_t *source){	struct audio_monitor monitor = {0};	struct audio_monitor *out;	if (!audio_monitor_init(&monitor, source)) {		goto fail;	}	out = bmemdup(&monitor, sizeof(monitor));	pthread_mutex_lock(&obs->audio.monitoring_mutex);	da_push_back(obs->audio.monitors, &out);	pthread_mutex_unlock(&obs->audio.monitoring_mutex);	audio_monitor_init_final(out);	return out;fail:	audio_monitor_free(&monitor);	return NULL;}void audio_monitor_reset(struct audio_monitor *monitor){	struct audio_monitor new_monitor = {0};	bool success;	AcquireSRWLockExclusive(&monitor->playback_mutex);	success = audio_monitor_init(&new_monitor, monitor->source);	ReleaseSRWLockExclusive(&monitor->playback_mutex);	if (success) {		obs_source_t *source = monitor->source;		audio_monitor_free(monitor);		*monitor = new_monitor;		audio_monitor_init_final(monitor);	} else {		audio_monitor_free(&new_monitor);	}}void audio_monitor_destroy(struct audio_monitor *monitor){	if (monitor) {		audio_monitor_free(monitor);		pthread_mutex_lock(&obs->audio.monitoring_mutex);		da_erase_item(obs->audio.monitors, &monitor);		pthread_mutex_unlock(&obs->audio.monitoring_mutex);		bfree(monitor);	}}
 |