|
@@ -1,47 +1,54 @@
|
|
|
+#include <QMessageBox>
|
|
|
+
|
|
|
+#include <windows.h>
|
|
|
#include <obs-frontend-api.h>
|
|
|
-#include "captions-mssapi-stream.hpp"
|
|
|
#include "captions.hpp"
|
|
|
+#include "captions-handler.hpp"
|
|
|
#include "tool-helpers.hpp"
|
|
|
-#include <sphelper.h>
|
|
|
#include <util/dstr.hpp>
|
|
|
#include <util/platform.h>
|
|
|
-#include <util/windows/HRError.hpp>
|
|
|
+#include <util/windows/WinHandle.hpp>
|
|
|
#include <util/windows/ComPtr.hpp>
|
|
|
-#include <util/windows/CoTaskMemPtr.hpp>
|
|
|
-#include <util/threading.h>
|
|
|
#include <obs-module.h>
|
|
|
+#include <sphelper.h>
|
|
|
|
|
|
+#include <unordered_map>
|
|
|
+#include <vector>
|
|
|
#include <string>
|
|
|
#include <thread>
|
|
|
#include <mutex>
|
|
|
|
|
|
+#include "captions-mssapi.hpp"
|
|
|
+
|
|
|
#define do_log(type, format, ...) blog(type, "[Captions] " format, \
|
|
|
##__VA_ARGS__)
|
|
|
|
|
|
-#define error(format, ...) do_log(LOG_ERROR, format, ##__VA_ARGS__)
|
|
|
+#define warn(format, ...) do_log(LOG_WARNING, format, ##__VA_ARGS__)
|
|
|
#define debug(format, ...) do_log(LOG_DEBUG, format, ##__VA_ARGS__)
|
|
|
|
|
|
using namespace std;
|
|
|
|
|
|
-struct obs_captions {
|
|
|
- thread th;
|
|
|
- recursive_mutex m;
|
|
|
- WinHandle stop_event;
|
|
|
+#define DEFAULT_HANDLER "mssapi"
|
|
|
|
|
|
+struct obs_captions {
|
|
|
+ string handler_id = DEFAULT_HANDLER;
|
|
|
string source_name;
|
|
|
OBSWeakSource source;
|
|
|
- LANGID lang_id;
|
|
|
+ unique_ptr<captions_handler> handler;
|
|
|
+ LANGID lang_id = GetUserDefaultUILanguage();
|
|
|
|
|
|
- void main_thread();
|
|
|
- void start();
|
|
|
- void stop();
|
|
|
+ std::unordered_map<std::string, captions_handler_info&> handler_types;
|
|
|
|
|
|
- inline obs_captions() :
|
|
|
- stop_event(CreateEvent(nullptr, false, false, nullptr)),
|
|
|
- lang_id(GetUserDefaultUILanguage())
|
|
|
+ inline void register_handler(const char *id,
|
|
|
+ captions_handler_info &info)
|
|
|
{
|
|
|
+ handler_types.emplace(id, info);
|
|
|
}
|
|
|
|
|
|
+ void start();
|
|
|
+ void stop();
|
|
|
+
|
|
|
+ obs_captions();
|
|
|
inline ~obs_captions() {stop();}
|
|
|
};
|
|
|
|
|
@@ -72,8 +79,6 @@ CaptionsDialog::CaptionsDialog(QWidget *parent) :
|
|
|
{
|
|
|
ui->setupUi(this);
|
|
|
|
|
|
- lock_guard<recursive_mutex> lock(captions->m);
|
|
|
-
|
|
|
auto cb = [this] (obs_source_t *source)
|
|
|
{
|
|
|
uint32_t caps = obs_source_get_output_flags(source);
|
|
@@ -97,8 +102,19 @@ CaptionsDialog::CaptionsDialog(QWidget *parent) :
|
|
|
return (*static_cast<cb_t*>(data))(source);}, &cb);
|
|
|
ui->source->blockSignals(false);
|
|
|
|
|
|
+ for (auto &ht : captions->handler_types) {
|
|
|
+ QString name = ht.second.name().c_str();
|
|
|
+ QString id = ht.first.c_str();
|
|
|
+ ui->provider->addItem(name, id);
|
|
|
+ }
|
|
|
+
|
|
|
+ QString qhandler_id = captions->handler_id.c_str();
|
|
|
+ int idx = ui->provider->findData(qhandler_id);
|
|
|
+ if (idx != -1)
|
|
|
+ ui->provider->setCurrentIndex(idx);
|
|
|
+
|
|
|
ui->enable->blockSignals(true);
|
|
|
- ui->enable->setChecked(captions->th.joinable());
|
|
|
+ ui->enable->setChecked(!!captions->handler);
|
|
|
ui->enable->blockSignals(false);
|
|
|
|
|
|
vector<locale_info> locales;
|
|
@@ -129,13 +145,11 @@ CaptionsDialog::CaptionsDialog(QWidget *parent) :
|
|
|
ui->language->setEnabled(false);
|
|
|
|
|
|
} else if (!set_language) {
|
|
|
- bool started = captions->th.joinable();
|
|
|
+ bool started = !!captions->handler;
|
|
|
if (started)
|
|
|
captions->stop();
|
|
|
|
|
|
- captions->m.lock();
|
|
|
captions->lang_id = locales[0].id;
|
|
|
- captions->m.unlock();
|
|
|
|
|
|
if (started)
|
|
|
captions->start();
|
|
@@ -144,14 +158,12 @@ CaptionsDialog::CaptionsDialog(QWidget *parent) :
|
|
|
|
|
|
void CaptionsDialog::on_source_currentIndexChanged(int)
|
|
|
{
|
|
|
- bool started = captions->th.joinable();
|
|
|
+ bool started = !!captions->handler;
|
|
|
if (started)
|
|
|
captions->stop();
|
|
|
|
|
|
- captions->m.lock();
|
|
|
captions->source_name = ui->source->currentText().toUtf8().constData();
|
|
|
captions->source = GetWeakSourceByName(captions->source_name.c_str());
|
|
|
- captions->m.unlock();
|
|
|
|
|
|
if (started)
|
|
|
captions->start();
|
|
@@ -159,205 +171,122 @@ void CaptionsDialog::on_source_currentIndexChanged(int)
|
|
|
|
|
|
void CaptionsDialog::on_enable_clicked(bool checked)
|
|
|
{
|
|
|
- if (checked)
|
|
|
+ if (checked) {
|
|
|
captions->start();
|
|
|
- else
|
|
|
+ if (!captions->handler) {
|
|
|
+ ui->enable->blockSignals(true);
|
|
|
+ ui->enable->setChecked(false);
|
|
|
+ ui->enable->blockSignals(false);
|
|
|
+ }
|
|
|
+ } else {
|
|
|
captions->stop();
|
|
|
+ }
|
|
|
}
|
|
|
|
|
|
void CaptionsDialog::on_language_currentIndexChanged(int)
|
|
|
{
|
|
|
- bool started = captions->th.joinable();
|
|
|
+ bool started = !!captions->handler;
|
|
|
if (started)
|
|
|
captions->stop();
|
|
|
|
|
|
- captions->m.lock();
|
|
|
captions->lang_id = (LANGID)ui->language->currentData().toInt();
|
|
|
- captions->m.unlock();
|
|
|
|
|
|
if (started)
|
|
|
captions->start();
|
|
|
}
|
|
|
|
|
|
-/* ------------------------------------------------------------------------- */
|
|
|
-
|
|
|
-void obs_captions::main_thread()
|
|
|
-try {
|
|
|
- ComPtr<CaptionStream> audio;
|
|
|
- ComPtr<ISpObjectToken> token;
|
|
|
- ComPtr<ISpRecoGrammar> grammar;
|
|
|
- ComPtr<ISpRecognizer> recognizer;
|
|
|
- ComPtr<ISpRecoContext> context;
|
|
|
- HRESULT hr;
|
|
|
-
|
|
|
- auto cb = [&] (const struct audio_data *audio_data,
|
|
|
- bool muted)
|
|
|
- {
|
|
|
- audio->PushAudio(audio_data, muted);
|
|
|
- };
|
|
|
-
|
|
|
- using cb_t = decltype(cb);
|
|
|
-
|
|
|
- auto pre_cb = [] (void *param, obs_source_t*,
|
|
|
- const struct audio_data *audio_data, bool muted)
|
|
|
- {
|
|
|
- return (*static_cast<cb_t*>(param))(audio_data, muted);
|
|
|
- };
|
|
|
-
|
|
|
- os_set_thread_name(__FUNCTION__);
|
|
|
-
|
|
|
- CoInitialize(nullptr);
|
|
|
-
|
|
|
- wchar_t lang_str[32];
|
|
|
- _snwprintf(lang_str, 31, L"language=%x", (int)captions->lang_id);
|
|
|
-
|
|
|
- hr = SpFindBestToken(SPCAT_RECOGNIZERS, lang_str, nullptr, &token);
|
|
|
- if (FAILED(hr))
|
|
|
- throw HRError("SpFindBestToken failed", hr);
|
|
|
-
|
|
|
- hr = CoCreateInstance(CLSID_SpInprocRecognizer, nullptr, CLSCTX_ALL,
|
|
|
- __uuidof(ISpRecognizer), (void**)&recognizer);
|
|
|
- if (FAILED(hr))
|
|
|
- throw HRError("CoCreateInstance for recognizer failed", hr);
|
|
|
-
|
|
|
- hr = recognizer->SetRecognizer(token);
|
|
|
- if (FAILED(hr))
|
|
|
- throw HRError("SetRecognizer failed", hr);
|
|
|
-
|
|
|
- hr = recognizer->SetRecoState(SPRST_INACTIVE);
|
|
|
- if (FAILED(hr))
|
|
|
- throw HRError("SetRecoState(SPRST_INACTIVE) failed", hr);
|
|
|
-
|
|
|
- hr = recognizer->CreateRecoContext(&context);
|
|
|
- if (FAILED(hr))
|
|
|
- throw HRError("CreateRecoContext failed", hr);
|
|
|
-
|
|
|
- ULONGLONG interest = SPFEI(SPEI_RECOGNITION) |
|
|
|
- SPFEI(SPEI_END_SR_STREAM);
|
|
|
- hr = context->SetInterest(interest, interest);
|
|
|
- if (FAILED(hr))
|
|
|
- throw HRError("SetInterest failed", hr);
|
|
|
-
|
|
|
- HANDLE notify;
|
|
|
-
|
|
|
- hr = context->SetNotifyWin32Event();
|
|
|
- if (FAILED(hr))
|
|
|
- throw HRError("SetNotifyWin32Event", hr);
|
|
|
-
|
|
|
- notify = context->GetNotifyEventHandle();
|
|
|
- if (notify == INVALID_HANDLE_VALUE)
|
|
|
- throw HRError("GetNotifyEventHandle failed", E_NOINTERFACE);
|
|
|
-
|
|
|
- size_t sample_rate = audio_output_get_sample_rate(obs_get_audio());
|
|
|
- audio = new CaptionStream((DWORD)sample_rate);
|
|
|
- audio->Release();
|
|
|
-
|
|
|
- hr = recognizer->SetInput(audio, false);
|
|
|
- if (FAILED(hr))
|
|
|
- throw HRError("SetInput failed", hr);
|
|
|
-
|
|
|
- hr = context->CreateGrammar(1, &grammar);
|
|
|
- if (FAILED(hr))
|
|
|
- throw HRError("CreateGrammar failed", hr);
|
|
|
-
|
|
|
- hr = grammar->LoadDictation(nullptr, SPLO_STATIC);
|
|
|
- if (FAILED(hr))
|
|
|
- throw HRError("LoadDictation failed", hr);
|
|
|
+void CaptionsDialog::on_provider_currentIndexChanged(int idx)
|
|
|
+{
|
|
|
+ bool started = !!captions->handler;
|
|
|
+ if (started)
|
|
|
+ captions->stop();
|
|
|
|
|
|
- hr = grammar->SetDictationState(SPRS_ACTIVE);
|
|
|
- if (FAILED(hr))
|
|
|
- throw HRError("SetDictationState failed", hr);
|
|
|
+ captions->handler_id =
|
|
|
+ ui->provider->itemData(idx).toString().toUtf8().constData();
|
|
|
|
|
|
- hr = recognizer->SetRecoState(SPRST_ACTIVE);
|
|
|
- if (FAILED(hr))
|
|
|
- throw HRError("SetRecoState(SPRST_ACTIVE) failed", hr);
|
|
|
+ if (started)
|
|
|
+ captions->start();
|
|
|
+}
|
|
|
|
|
|
- HANDLE events[] = {notify, stop_event};
|
|
|
+/* ------------------------------------------------------------------------- */
|
|
|
|
|
|
- {
|
|
|
- captions->source = GetWeakSourceByName(
|
|
|
- captions->source_name.c_str());
|
|
|
- OBSSource strong = OBSGetStrongRef(source);
|
|
|
- if (strong)
|
|
|
- obs_source_add_audio_capture_callback(strong,
|
|
|
- pre_cb, &cb);
|
|
|
+static void caption_text(const std::string &text)
|
|
|
+{
|
|
|
+ obs_output *output = obs_frontend_get_streaming_output();
|
|
|
+ if (output) {
|
|
|
+ obs_output_output_caption_text1(output, text.c_str());
|
|
|
+ obs_output_release(output);
|
|
|
}
|
|
|
+}
|
|
|
|
|
|
- for (;;) {
|
|
|
- DWORD ret = WaitForMultipleObjects(2, events, false, INFINITE);
|
|
|
- if (ret != WAIT_OBJECT_0)
|
|
|
- break;
|
|
|
-
|
|
|
- CSpEvent event;
|
|
|
- bool exit = false;
|
|
|
-
|
|
|
- while (event.GetFrom(context) == S_OK) {
|
|
|
- if (event.eEventId == SPEI_RECOGNITION) {
|
|
|
- ISpRecoResult *result = event.RecoResult();
|
|
|
+static void audio_capture(void*, obs_source_t*,
|
|
|
+ const struct audio_data *audio, bool)
|
|
|
+{
|
|
|
+ captions->handler->push_audio(audio);
|
|
|
+}
|
|
|
|
|
|
- CoTaskMemPtr<wchar_t> text;
|
|
|
- hr = result->GetText((ULONG)-1, (ULONG)-1,
|
|
|
- true, &text, nullptr);
|
|
|
- if (FAILED(hr))
|
|
|
- continue;
|
|
|
+void obs_captions::start()
|
|
|
+{
|
|
|
+ if (!captions->handler && valid_lang(lang_id)) {
|
|
|
+ wchar_t wname[256];
|
|
|
+
|
|
|
+ auto pair = handler_types.find(handler_id);
|
|
|
+ if (pair == handler_types.end()) {
|
|
|
+ warn("Failed to find handler '%s'",
|
|
|
+ handler_id.c_str());
|
|
|
+ return;
|
|
|
+ }
|
|
|
|
|
|
- char text_utf8[512];
|
|
|
- os_wcs_to_utf8(text, 0, text_utf8, 512);
|
|
|
+ if (!LCIDToLocaleName(lang_id, wname, 256, 0)) {
|
|
|
+ warn("Failed to get locale name: %d",
|
|
|
+ (int)GetLastError());
|
|
|
+ return;
|
|
|
+ }
|
|
|
|
|
|
- obs_output_t *output =
|
|
|
- obs_frontend_get_streaming_output();
|
|
|
- if (output)
|
|
|
- obs_output_output_caption_text1(output,
|
|
|
- text_utf8);
|
|
|
+ size_t len = (size_t)wcslen(wname);
|
|
|
|
|
|
- debug("\"%s\"", text_utf8);
|
|
|
+ string lang_name;
|
|
|
+ lang_name.resize(len);
|
|
|
|
|
|
- obs_output_release(output);
|
|
|
+ for (size_t i = 0; i < len; i++)
|
|
|
+ lang_name[i] = (char)wname[i];
|
|
|
|
|
|
- } else if (event.eEventId == SPEI_END_SR_STREAM) {
|
|
|
- exit = true;
|
|
|
- break;
|
|
|
- }
|
|
|
+ OBSSource s = OBSGetStrongRef(source);
|
|
|
+ if (!s) {
|
|
|
+ warn("Source invalid");
|
|
|
+ return;
|
|
|
}
|
|
|
|
|
|
- if (exit)
|
|
|
- break;
|
|
|
- }
|
|
|
+ try {
|
|
|
+ captions_handler *h = pair->second.create(caption_text,
|
|
|
+ lang_name);
|
|
|
+ handler.reset(h);
|
|
|
|
|
|
- {
|
|
|
- OBSSource strong = OBSGetStrongRef(source);
|
|
|
- if (strong)
|
|
|
- obs_source_remove_audio_capture_callback(strong,
|
|
|
- pre_cb, &cb);
|
|
|
- }
|
|
|
-
|
|
|
- audio->Stop();
|
|
|
+ OBSSource s = OBSGetStrongRef(source);
|
|
|
+ obs_source_add_audio_capture_callback(s,
|
|
|
+ audio_capture, nullptr);
|
|
|
|
|
|
- CoUninitialize();
|
|
|
+ } catch (std::string text) {
|
|
|
+ QWidget *window =
|
|
|
+ (QWidget*)obs_frontend_get_main_window();
|
|
|
|
|
|
-} catch (HRError err) {
|
|
|
- error("%s failed: %s (%lX)", __FUNCTION__, err.str, err.hr);
|
|
|
- CoUninitialize();
|
|
|
- captions->th.detach();
|
|
|
-}
|
|
|
+ warn("Failed to create handler: %s", text.c_str());
|
|
|
|
|
|
-void obs_captions::start()
|
|
|
-{
|
|
|
- if (!captions->th.joinable()) {
|
|
|
- ResetEvent(captions->stop_event);
|
|
|
+ QMessageBox::warning(window,
|
|
|
+ obs_module_text("Captions.Error.GenericFail"),
|
|
|
+ text.c_str());
|
|
|
|
|
|
- if (valid_lang(captions->lang_id))
|
|
|
- captions->th = thread([] () {captions->main_thread();});
|
|
|
+ }
|
|
|
}
|
|
|
}
|
|
|
|
|
|
void obs_captions::stop()
|
|
|
{
|
|
|
- if (!captions->th.joinable())
|
|
|
- return;
|
|
|
-
|
|
|
- SetEvent(captions->stop_event);
|
|
|
- captions->th.join();
|
|
|
+ OBSSource s = OBSGetStrongRef(source);
|
|
|
+ if (s)
|
|
|
+ obs_source_remove_audio_capture_callback(s,
|
|
|
+ audio_capture, nullptr);
|
|
|
+ handler.reset();
|
|
|
}
|
|
|
|
|
|
static bool get_locale_name(LANGID id, char *out)
|
|
@@ -455,6 +384,15 @@ static void get_valid_locale_names(vector<locale_info> &locales)
|
|
|
|
|
|
/* ------------------------------------------------------------------------- */
|
|
|
|
|
|
+extern captions_handler_info mssapi_info;
|
|
|
+
|
|
|
+obs_captions::obs_captions()
|
|
|
+{
|
|
|
+ register_handler("mssapi", mssapi_info);
|
|
|
+}
|
|
|
+
|
|
|
+/* ------------------------------------------------------------------------- */
|
|
|
+
|
|
|
extern "C" void FreeCaptions()
|
|
|
{
|
|
|
delete captions;
|
|
@@ -470,37 +408,36 @@ static void obs_event(enum obs_frontend_event event, void *)
|
|
|
static void save_caption_data(obs_data_t *save_data, bool saving, void*)
|
|
|
{
|
|
|
if (saving) {
|
|
|
- lock_guard<recursive_mutex> lock(captions->m);
|
|
|
obs_data_t *obj = obs_data_create();
|
|
|
|
|
|
obs_data_set_string(obj, "source",
|
|
|
captions->source_name.c_str());
|
|
|
- obs_data_set_bool(obj, "enabled", captions->th.joinable());
|
|
|
+ obs_data_set_bool(obj, "enabled", !!captions->handler);
|
|
|
obs_data_set_int(obj, "lang_id", captions->lang_id);
|
|
|
+ obs_data_set_string(obj, "provider",
|
|
|
+ captions->handler_id.c_str());
|
|
|
|
|
|
obs_data_set_obj(save_data, "captions", obj);
|
|
|
obs_data_release(obj);
|
|
|
} else {
|
|
|
captions->stop();
|
|
|
|
|
|
- captions->m.lock();
|
|
|
-
|
|
|
obs_data_t *obj = obs_data_get_obj(save_data, "captions");
|
|
|
if (!obj)
|
|
|
obj = obs_data_create();
|
|
|
|
|
|
obs_data_set_default_int(obj, "lang_id",
|
|
|
GetUserDefaultUILanguage());
|
|
|
+ obs_data_set_default_string(obj, "provider", DEFAULT_HANDLER);
|
|
|
|
|
|
bool enabled = obs_data_get_bool(obj, "enabled");
|
|
|
captions->source_name = obs_data_get_string(obj, "source");
|
|
|
captions->lang_id = (int)obs_data_get_int(obj, "lang_id");
|
|
|
+ captions->handler_id = obs_data_get_string(obj, "provider");
|
|
|
captions->source = GetWeakSourceByName(
|
|
|
captions->source_name.c_str());
|
|
|
obs_data_release(obj);
|
|
|
|
|
|
- captions->m.unlock();
|
|
|
-
|
|
|
if (enabled)
|
|
|
captions->start();
|
|
|
}
|