d3d11-shader.cpp 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537
  1. /******************************************************************************
  2. Copyright (C) 2023 by Lain Bailey <[email protected]>
  3. This program is free software: you can redistribute it and/or modify
  4. it under the terms of the GNU General Public License as published by
  5. the Free Software Foundation, either version 2 of the License, or
  6. (at your option) any later version.
  7. This program is distributed in the hope that it will be useful,
  8. but WITHOUT ANY WARRANTY; without even the implied warranty of
  9. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  10. GNU General Public License for more details.
  11. You should have received a copy of the GNU General Public License
  12. along with this program. If not, see <http://www.gnu.org/licenses/>.
  13. ******************************************************************************/
  14. #include "d3d11-subsystem.hpp"
  15. #include "d3d11-shaderprocessor.hpp"
  16. #include <graphics/vec2.h>
  17. #include <graphics/vec3.h>
  18. #include <graphics/matrix3.h>
  19. #include <graphics/matrix4.h>
  20. #include <util/platform.h>
  21. #include <util/util.hpp>
  22. #include <filesystem>
  23. #include <fstream>
  24. void gs_vertex_shader::GetBuffersExpected(
  25. const vector<D3D11_INPUT_ELEMENT_DESC> &inputs)
  26. {
  27. for (size_t i = 0; i < inputs.size(); i++) {
  28. const D3D11_INPUT_ELEMENT_DESC &input = inputs[i];
  29. if (strcmp(input.SemanticName, "NORMAL") == 0)
  30. hasNormals = true;
  31. else if (strcmp(input.SemanticName, "TANGENT") == 0)
  32. hasTangents = true;
  33. else if (strcmp(input.SemanticName, "COLOR") == 0)
  34. hasColors = true;
  35. else if (strcmp(input.SemanticName, "TEXCOORD") == 0)
  36. nTexUnits++;
  37. }
  38. }
  39. gs_vertex_shader::gs_vertex_shader(gs_device_t *device, const char *file,
  40. const char *shaderString)
  41. : gs_shader(device, gs_type::gs_vertex_shader, GS_SHADER_VERTEX),
  42. hasNormals(false),
  43. hasColors(false),
  44. hasTangents(false),
  45. nTexUnits(0)
  46. {
  47. ShaderProcessor processor(device);
  48. ComPtr<ID3D10Blob> shaderBlob;
  49. string outputString;
  50. HRESULT hr;
  51. processor.Process(shaderString, file);
  52. processor.BuildString(outputString);
  53. processor.BuildParams(params);
  54. processor.BuildInputLayout(layoutData);
  55. GetBuffersExpected(layoutData);
  56. BuildConstantBuffer();
  57. Compile(outputString.c_str(), file, "vs_4_0", shaderBlob.Assign());
  58. data.resize(shaderBlob->GetBufferSize());
  59. memcpy(&data[0], shaderBlob->GetBufferPointer(), data.size());
  60. hr = device->device->CreateVertexShader(data.data(), data.size(), NULL,
  61. shader.Assign());
  62. if (FAILED(hr))
  63. throw HRError("Failed to create vertex shader", hr);
  64. const UINT layoutSize = (UINT)layoutData.size();
  65. if (layoutSize > 0) {
  66. hr = device->device->CreateInputLayout(layoutData.data(),
  67. (UINT)layoutSize,
  68. data.data(), data.size(),
  69. layout.Assign());
  70. if (FAILED(hr))
  71. throw HRError("Failed to create input layout", hr);
  72. }
  73. viewProj = gs_shader_get_param_by_name(this, "ViewProj");
  74. world = gs_shader_get_param_by_name(this, "World");
  75. }
  76. gs_pixel_shader::gs_pixel_shader(gs_device_t *device, const char *file,
  77. const char *shaderString)
  78. : gs_shader(device, gs_type::gs_pixel_shader, GS_SHADER_PIXEL)
  79. {
  80. ShaderProcessor processor(device);
  81. ComPtr<ID3D10Blob> shaderBlob;
  82. string outputString;
  83. HRESULT hr;
  84. processor.Process(shaderString, file);
  85. processor.BuildString(outputString);
  86. processor.BuildParams(params);
  87. processor.BuildSamplers(samplers);
  88. BuildConstantBuffer();
  89. Compile(outputString.c_str(), file, "ps_4_0", shaderBlob.Assign());
  90. data.resize(shaderBlob->GetBufferSize());
  91. memcpy(&data[0], shaderBlob->GetBufferPointer(), data.size());
  92. hr = device->device->CreatePixelShader(data.data(), data.size(), NULL,
  93. shader.Assign());
  94. if (FAILED(hr))
  95. throw HRError("Failed to create pixel shader", hr);
  96. }
  97. /*
  98. * Shader compilers will pack constants in to single registers when possible.
  99. * For example:
  100. *
  101. * uniform float3 test1;
  102. * uniform float test2;
  103. *
  104. * will inhabit a single constant register (c0.xyz for 'test1', and c0.w for
  105. * 'test2')
  106. *
  107. * However, if two constants cannot inhabit the same register, the second one
  108. * must begin at a new register, for example:
  109. *
  110. * uniform float2 test1;
  111. * uniform float3 test2;
  112. *
  113. * 'test1' will inhabit register constant c0.xy. However, because there's no
  114. * room for 'test2, it must use a new register constant entirely (c1.xyz).
  115. *
  116. * So if we want to calculate the position of the constants in the constant
  117. * buffer, we must take this in to account.
  118. */
  119. void gs_shader::BuildConstantBuffer()
  120. {
  121. for (size_t i = 0; i < params.size(); i++) {
  122. gs_shader_param &param = params[i];
  123. size_t size = 0;
  124. switch (param.type) {
  125. case GS_SHADER_PARAM_BOOL:
  126. case GS_SHADER_PARAM_INT:
  127. case GS_SHADER_PARAM_FLOAT:
  128. size = sizeof(float);
  129. break;
  130. case GS_SHADER_PARAM_INT2:
  131. case GS_SHADER_PARAM_VEC2:
  132. size = sizeof(vec2);
  133. break;
  134. case GS_SHADER_PARAM_INT3:
  135. case GS_SHADER_PARAM_VEC3:
  136. size = sizeof(float) * 3;
  137. break;
  138. case GS_SHADER_PARAM_INT4:
  139. case GS_SHADER_PARAM_VEC4:
  140. size = sizeof(vec4);
  141. break;
  142. case GS_SHADER_PARAM_MATRIX4X4:
  143. size = sizeof(float) * 4 * 4;
  144. break;
  145. case GS_SHADER_PARAM_TEXTURE:
  146. case GS_SHADER_PARAM_STRING:
  147. case GS_SHADER_PARAM_UNKNOWN:
  148. continue;
  149. }
  150. if (param.arrayCount)
  151. size *= param.arrayCount;
  152. /* checks to see if this constant needs to start at a new
  153. * register */
  154. if (size && (constantSize & 15) != 0) {
  155. size_t alignMax = (constantSize + 15) & ~15;
  156. if ((size + constantSize) > alignMax)
  157. constantSize = alignMax;
  158. }
  159. param.pos = constantSize;
  160. constantSize += size;
  161. }
  162. memset(&bd, 0, sizeof(bd));
  163. if (constantSize) {
  164. HRESULT hr;
  165. bd.ByteWidth = (constantSize + 15) & 0xFFFFFFF0; /* align */
  166. bd.Usage = D3D11_USAGE_DYNAMIC;
  167. bd.BindFlags = D3D11_BIND_CONSTANT_BUFFER;
  168. bd.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE;
  169. hr = device->device->CreateBuffer(&bd, NULL,
  170. constants.Assign());
  171. if (FAILED(hr))
  172. throw HRError("Failed to create constant buffer", hr);
  173. }
  174. for (size_t i = 0; i < params.size(); i++)
  175. gs_shader_set_default(&params[i]);
  176. }
  177. static uint64_t fnv1a_hash(const char *str, size_t len)
  178. {
  179. const uint64_t FNV_OFFSET = 14695981039346656037ULL;
  180. const uint64_t FNV_PRIME = 1099511628211ULL;
  181. uint64_t hash = FNV_OFFSET;
  182. for (size_t i = 0; i < len; i++) {
  183. hash ^= (uint64_t)str[i];
  184. hash *= FNV_PRIME;
  185. }
  186. return hash;
  187. }
  188. void gs_shader::Compile(const char *shaderString, const char *file,
  189. const char *target, ID3D10Blob **shader)
  190. {
  191. ComPtr<ID3D10Blob> errorsBlob;
  192. HRESULT hr;
  193. bool is_cached = false;
  194. char hashstr[20];
  195. if (!shaderString)
  196. throw "No shader string specified";
  197. size_t shaderStrLen = strlen(shaderString);
  198. uint64_t hash = fnv1a_hash(shaderString, shaderStrLen);
  199. snprintf(hashstr, sizeof(hashstr), "%02llx", hash);
  200. BPtr program_data =
  201. os_get_program_data_path_ptr("obs-studio/shader-cache");
  202. auto cachePath = filesystem::u8path(program_data.Get()) / hashstr;
  203. // Increment if on-disk format changes
  204. cachePath += ".v2";
  205. std::fstream cacheFile;
  206. cacheFile.exceptions(fstream::badbit | fstream::eofbit);
  207. if (filesystem::exists(cachePath) && !filesystem::is_empty(cachePath))
  208. cacheFile.open(cachePath, ios::in | ios::binary | ios::ate);
  209. if (cacheFile.is_open()) {
  210. uint64_t checksum;
  211. try {
  212. streampos len = cacheFile.tellg();
  213. // Not enough data for checksum + shader
  214. if (len <= sizeof(checksum))
  215. throw length_error("File truncated");
  216. cacheFile.seekg(0, ios::beg);
  217. len -= sizeof(checksum);
  218. device->d3dCreateBlob(len, shader);
  219. cacheFile.read((char *)(*shader)->GetBufferPointer(),
  220. len);
  221. uint64_t calculated_checksum = fnv1a_hash(
  222. (char *)(*shader)->GetBufferPointer(), len);
  223. cacheFile.read((char *)&checksum, sizeof(checksum));
  224. if (calculated_checksum != checksum)
  225. throw exception("Checksum mismatch");
  226. is_cached = true;
  227. } catch (const exception &e) {
  228. // Something went wrong reading the cache file, delete it
  229. blog(LOG_WARNING,
  230. "Loading shader cache file failed with \"%s\": %s",
  231. e.what(), file);
  232. cacheFile.close();
  233. filesystem::remove(cachePath);
  234. }
  235. }
  236. if (!is_cached) {
  237. hr = device->d3dCompile(shaderString, shaderStrLen, file, NULL,
  238. NULL, "main", target,
  239. D3D10_SHADER_OPTIMIZATION_LEVEL3, 0,
  240. shader, errorsBlob.Assign());
  241. if (FAILED(hr)) {
  242. if (errorsBlob != NULL && errorsBlob->GetBufferSize())
  243. throw ShaderError(errorsBlob, hr);
  244. else
  245. throw HRError("Failed to compile shader", hr);
  246. }
  247. cacheFile.open(cachePath, ios::out | ios::binary);
  248. if (cacheFile.is_open()) {
  249. try {
  250. uint64_t calculated_checksum = fnv1a_hash(
  251. (char *)(*shader)->GetBufferPointer(),
  252. (*shader)->GetBufferSize());
  253. cacheFile.write(
  254. (char *)(*shader)->GetBufferPointer(),
  255. (*shader)->GetBufferSize());
  256. cacheFile.write((char *)&calculated_checksum,
  257. sizeof(calculated_checksum));
  258. } catch (const exception &e) {
  259. blog(LOG_WARNING,
  260. "Writing shader cache file failed with \"%s\": %s",
  261. e.what(), file);
  262. cacheFile.close();
  263. filesystem::remove(cachePath);
  264. }
  265. }
  266. }
  267. #ifdef DISASSEMBLE_SHADERS
  268. ComPtr<ID3D10Blob> asmBlob;
  269. if (!device->d3dDisassemble)
  270. return;
  271. hr = device->d3dDisassemble((*shader)->GetBufferPointer(),
  272. (*shader)->GetBufferSize(), 0, nullptr,
  273. &asmBlob);
  274. if (SUCCEEDED(hr) && !!asmBlob && asmBlob->GetBufferSize()) {
  275. blog(LOG_INFO, "=============================================");
  276. blog(LOG_INFO, "Disassembly output for shader '%s':\n%s", file,
  277. asmBlob->GetBufferPointer());
  278. }
  279. #endif
  280. }
  281. inline void gs_shader::UpdateParam(vector<uint8_t> &constData,
  282. gs_shader_param &param, bool &upload)
  283. {
  284. if (param.type != GS_SHADER_PARAM_TEXTURE) {
  285. if (!param.curValue.size())
  286. throw "Not all shader parameters were set";
  287. /* padding in case the constant needs to start at a new
  288. * register */
  289. if (param.pos > constData.size()) {
  290. uint8_t zero = 0;
  291. constData.insert(constData.end(),
  292. param.pos - constData.size(), zero);
  293. }
  294. constData.insert(constData.end(), param.curValue.begin(),
  295. param.curValue.end());
  296. if (param.changed) {
  297. upload = true;
  298. param.changed = false;
  299. }
  300. } else if (param.curValue.size() == sizeof(struct gs_shader_texture)) {
  301. struct gs_shader_texture shader_tex;
  302. memcpy(&shader_tex, param.curValue.data(), sizeof(shader_tex));
  303. if (shader_tex.srgb)
  304. device_load_texture_srgb(device, shader_tex.tex,
  305. param.textureID);
  306. else
  307. device_load_texture(device, shader_tex.tex,
  308. param.textureID);
  309. if (param.nextSampler) {
  310. ID3D11SamplerState *state = param.nextSampler->state;
  311. device->context->PSSetSamplers(param.textureID, 1,
  312. &state);
  313. param.nextSampler = nullptr;
  314. }
  315. }
  316. }
  317. void gs_shader::UploadParams()
  318. {
  319. vector<uint8_t> constData;
  320. bool upload = false;
  321. constData.reserve(constantSize);
  322. for (size_t i = 0; i < params.size(); i++)
  323. UpdateParam(constData, params[i], upload);
  324. if (constData.size() != constantSize)
  325. throw "Invalid constant data size given to shader";
  326. if (upload) {
  327. D3D11_MAPPED_SUBRESOURCE map;
  328. HRESULT hr;
  329. hr = device->context->Map(constants, 0, D3D11_MAP_WRITE_DISCARD,
  330. 0, &map);
  331. if (FAILED(hr))
  332. throw HRError("Could not lock constant buffer", hr);
  333. memcpy(map.pData, constData.data(), constData.size());
  334. device->context->Unmap(constants, 0);
  335. }
  336. }
  337. void gs_shader_destroy(gs_shader_t *shader)
  338. {
  339. if (shader && shader->device->lastVertexShader == shader)
  340. shader->device->lastVertexShader = nullptr;
  341. delete shader;
  342. }
  343. int gs_shader_get_num_params(const gs_shader_t *shader)
  344. {
  345. return (int)shader->params.size();
  346. }
  347. gs_sparam_t *gs_shader_get_param_by_idx(gs_shader_t *shader, uint32_t param)
  348. {
  349. return &shader->params[param];
  350. }
  351. gs_sparam_t *gs_shader_get_param_by_name(gs_shader_t *shader, const char *name)
  352. {
  353. for (size_t i = 0; i < shader->params.size(); i++) {
  354. gs_shader_param &param = shader->params[i];
  355. if (strcmp(param.name.c_str(), name) == 0)
  356. return &param;
  357. }
  358. return NULL;
  359. }
  360. gs_sparam_t *gs_shader_get_viewproj_matrix(const gs_shader_t *shader)
  361. {
  362. if (shader->type != GS_SHADER_VERTEX)
  363. return NULL;
  364. return static_cast<const gs_vertex_shader *>(shader)->viewProj;
  365. }
  366. gs_sparam_t *gs_shader_get_world_matrix(const gs_shader_t *shader)
  367. {
  368. if (shader->type != GS_SHADER_VERTEX)
  369. return NULL;
  370. return static_cast<const gs_vertex_shader *>(shader)->world;
  371. }
  372. void gs_shader_get_param_info(const gs_sparam_t *param,
  373. struct gs_shader_param_info *info)
  374. {
  375. if (!param)
  376. return;
  377. info->name = param->name.c_str();
  378. info->type = param->type;
  379. }
  380. static inline void shader_setval_inline(gs_shader_param *param,
  381. const void *data, size_t size)
  382. {
  383. assert(param);
  384. if (!param)
  385. return;
  386. bool size_changed = param->curValue.size() != size;
  387. if (size_changed)
  388. param->curValue.resize(size);
  389. if (size_changed || memcmp(param->curValue.data(), data, size) != 0) {
  390. memcpy(param->curValue.data(), data, size);
  391. param->changed = true;
  392. }
  393. }
  394. void gs_shader_set_bool(gs_sparam_t *param, bool val)
  395. {
  396. int b_val = (int)val;
  397. shader_setval_inline(param, &b_val, sizeof(int));
  398. }
  399. void gs_shader_set_float(gs_sparam_t *param, float val)
  400. {
  401. shader_setval_inline(param, &val, sizeof(float));
  402. }
  403. void gs_shader_set_int(gs_sparam_t *param, int val)
  404. {
  405. shader_setval_inline(param, &val, sizeof(int));
  406. }
  407. void gs_shader_set_matrix3(gs_sparam_t *param, const struct matrix3 *val)
  408. {
  409. struct matrix4 mat;
  410. matrix4_from_matrix3(&mat, val);
  411. shader_setval_inline(param, &mat, sizeof(matrix4));
  412. }
  413. void gs_shader_set_matrix4(gs_sparam_t *param, const struct matrix4 *val)
  414. {
  415. shader_setval_inline(param, val, sizeof(matrix4));
  416. }
  417. void gs_shader_set_vec2(gs_sparam_t *param, const struct vec2 *val)
  418. {
  419. shader_setval_inline(param, val, sizeof(vec2));
  420. }
  421. void gs_shader_set_vec3(gs_sparam_t *param, const struct vec3 *val)
  422. {
  423. shader_setval_inline(param, val, sizeof(float) * 3);
  424. }
  425. void gs_shader_set_vec4(gs_sparam_t *param, const struct vec4 *val)
  426. {
  427. shader_setval_inline(param, val, sizeof(vec4));
  428. }
  429. void gs_shader_set_texture(gs_sparam_t *param, gs_texture_t *val)
  430. {
  431. shader_setval_inline(param, &val, sizeof(gs_texture_t *));
  432. }
  433. void gs_shader_set_val(gs_sparam_t *param, const void *val, size_t size)
  434. {
  435. shader_setval_inline(param, val, size);
  436. }
  437. void gs_shader_set_default(gs_sparam_t *param)
  438. {
  439. if (param->defaultValue.size())
  440. shader_setval_inline(param, param->defaultValue.data(),
  441. param->defaultValue.size());
  442. }
  443. void gs_shader_set_next_sampler(gs_sparam_t *param, gs_samplerstate_t *sampler)
  444. {
  445. param->nextSampler = sampler;
  446. }