CentralDB.cpp 51 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520
  1. /*
  2. * Copyright (c)2019 ZeroTier, Inc.
  3. *
  4. * Use of this software is governed by the Business Source License included
  5. * in the LICENSE.TXT file in the project's root directory.
  6. *
  7. * Change Date: 2026-01-01
  8. *
  9. * On the date above, in accordance with the Business Source License, use
  10. * of this software will be governed by version 2.0 of the Apache License.
  11. */
  12. /****/
  13. #include "CentralDB.hpp"
  14. #ifdef ZT_CONTROLLER_USE_LIBPQ
  15. #include "../../node/Constants.hpp"
  16. #include "../../node/SHA512.hpp"
  17. #include "../../version.h"
  18. #include "BigTableStatusWriter.hpp"
  19. #include "ControllerConfig.hpp"
  20. #include "CtlUtil.hpp"
  21. #include "EmbeddedNetworkController.hpp"
  22. #include "PostgresStatusWriter.hpp"
  23. #include "PubSubListener.hpp"
  24. #include "PubSubWriter.hpp"
  25. #include "Redis.hpp"
  26. #include "RedisListener.hpp"
  27. #include "RedisStatusWriter.hpp"
  28. #include "opentelemetry/trace/provider.h"
  29. #include <chrono>
  30. #include <climits>
  31. #include <iomanip>
  32. #include <libpq-fe.h>
  33. #include <optional>
  34. #include <pqxx/pqxx>
  35. #include <rustybits.h>
  36. #include <sstream>
  37. // #define REDIS_TRACE 1
  38. using json = nlohmann::json;
  39. using namespace ZeroTier;
  40. using Attrs = std::vector<std::pair<std::string, std::string> >;
  41. using Item = std::pair<std::string, Attrs>;
  42. using ItemStream = std::vector<Item>;
  43. CentralDB::CentralDB(
  44. const Identity& myId,
  45. const char* connString,
  46. int listenPort,
  47. CentralDB::ListenerMode listenMode,
  48. CentralDB::StatusWriterMode statusMode,
  49. const ControllerConfig* cc)
  50. : DB()
  51. , _listenerMode(listenMode)
  52. , _statusWriterMode(statusMode)
  53. , _cc(cc)
  54. , _pool()
  55. , _myId(myId)
  56. , _myAddress(myId.address())
  57. , _ready(0)
  58. , _connected(1)
  59. , _run(1)
  60. , _waitNoticePrinted(false)
  61. , _listenPort(listenPort)
  62. , _redis(NULL)
  63. , _cluster(NULL)
  64. , _redisMemberStatus(false)
  65. , _smee(NULL)
  66. {
  67. auto provider = opentelemetry::trace::Provider::GetTracerProvider();
  68. auto tracer = provider->GetTracer("CentralDB");
  69. auto span = tracer->StartSpan("CentralDB::CentralDB");
  70. auto scope = tracer->WithActiveSpan(span);
  71. rustybits::init_async_runtime();
  72. char myAddress[64];
  73. _myAddressStr = myId.address().toString(myAddress);
  74. _connString = std::string(connString);
  75. auto f = std::make_shared<PostgresConnFactory>(_connString);
  76. _pool =
  77. std::make_shared<ConnectionPool<PostgresConnection> >(15, 5, std::static_pointer_cast<ConnectionFactory>(f));
  78. memset(_ssoPsk, 0, sizeof(_ssoPsk));
  79. char* const ssoPskHex = getenv("ZT_SSO_PSK");
  80. #ifdef ZT_TRACE
  81. fprintf(stderr, "ZT_SSO_PSK: %s\n", ssoPskHex);
  82. #endif
  83. if (ssoPskHex) {
  84. // SECURITY: note that ssoPskHex will always be null-terminated if libc actually
  85. // returns something non-NULL. If the hex encodes something shorter than 48 bytes,
  86. // it will be padded at the end with zeroes. If longer, it'll be truncated.
  87. Utils::unhex(ssoPskHex, _ssoPsk, sizeof(_ssoPsk));
  88. }
  89. const char* redisMemberStatus = getenv("ZT_REDIS_MEMBER_STATUS");
  90. if (redisMemberStatus && (strcmp(redisMemberStatus, "true") == 0)) {
  91. _redisMemberStatus = true;
  92. fprintf(stderr, "Using redis for member status\n");
  93. }
  94. if ((listenMode == LISTENER_MODE_REDIS || statusMode == STATUS_WRITER_MODE_REDIS) && _cc->redisConfig != NULL) {
  95. auto innerspan = tracer->StartSpan("CentralDB::CentralDB::configureRedis");
  96. auto innerscope = tracer->WithActiveSpan(innerspan);
  97. sw::redis::ConnectionOptions opts;
  98. sw::redis::ConnectionPoolOptions poolOpts;
  99. opts.host = _cc->redisConfig->hostname;
  100. opts.port = _cc->redisConfig->port;
  101. opts.password = _cc->redisConfig->password;
  102. opts.db = 0;
  103. opts.keep_alive = true;
  104. opts.connect_timeout = std::chrono::seconds(3);
  105. poolOpts.size = 25;
  106. poolOpts.wait_timeout = std::chrono::seconds(5);
  107. poolOpts.connection_lifetime = std::chrono::minutes(3);
  108. poolOpts.connection_idle_time = std::chrono::minutes(1);
  109. if (_cc->redisConfig->clusterMode) {
  110. innerspan->SetAttribute("cluster_mode", "true");
  111. fprintf(stderr, "Using Redis in Cluster Mode\n");
  112. _cluster = std::make_shared<sw::redis::RedisCluster>(opts, poolOpts);
  113. }
  114. else {
  115. innerspan->SetAttribute("cluster_mode", "false");
  116. fprintf(stderr, "Using Redis in Standalone Mode\n");
  117. _redis = std::make_shared<sw::redis::Redis>(opts, poolOpts);
  118. }
  119. }
  120. _readyLock.lock();
  121. fprintf(
  122. stderr, "[%s] NOTICE: %.10llx controller PostgreSQL waiting for initial data download..." ZT_EOL_S,
  123. ::_timestr(), (unsigned long long)_myAddress.toInt());
  124. _waitNoticePrinted = true;
  125. initializeNetworks();
  126. initializeMembers();
  127. _heartbeatThread = std::thread(&CentralDB::heartbeat, this);
  128. switch (listenMode) {
  129. case LISTENER_MODE_REDIS:
  130. fprintf(stderr, "Using Redis for change listeners\n");
  131. if (_cc->redisConfig != NULL) {
  132. if (_cc->redisConfig->clusterMode) {
  133. _membersDbWatcher = std::make_shared<RedisMemberListener>(_myAddressStr, _cluster, this);
  134. _networksDbWatcher = std::make_shared<RedisNetworkListener>(_myAddressStr, _cluster, this);
  135. }
  136. else {
  137. _membersDbWatcher = std::make_shared<RedisMemberListener>(_myAddressStr, _redis, this);
  138. _networksDbWatcher = std::make_shared<RedisNetworkListener>(_myAddressStr, _redis, this);
  139. }
  140. }
  141. else {
  142. throw std::runtime_error("CentralDB: Redis listener mode selected but no Redis configuration provided");
  143. }
  144. case LISTENER_MODE_PUBSUB:
  145. fprintf(stderr, "Using PubSub for change listeners\n");
  146. if (cc->pubSubConfig != NULL) {
  147. _membersDbWatcher =
  148. std::make_shared<PubSubMemberListener>(_myAddressStr, cc->pubSubConfig->project_id, this);
  149. _networksDbWatcher =
  150. std::make_shared<PubSubNetworkListener>(_myAddressStr, cc->pubSubConfig->project_id, this);
  151. }
  152. else {
  153. throw std::runtime_error(
  154. "CentralDB: PubSub listener mode selected but no PubSub configuration provided");
  155. }
  156. break;
  157. case LISTENER_MODE_PGSQL:
  158. default:
  159. fprintf(stderr, "Using PostgreSQL for change listeners\n");
  160. _membersDbWatcher = std::make_shared<PostgresMemberListener>(this, _pool, "member_" + _myAddressStr, 5);
  161. _networksDbWatcher = std::make_shared<PostgresNetworkListener>(this, _pool, "network_" + _myAddressStr, 5);
  162. break;
  163. }
  164. std::shared_ptr<PubSubWriter> pubsubWriter;
  165. switch (statusMode) {
  166. case STATUS_WRITER_MODE_REDIS:
  167. fprintf(stderr, "Using Redis for status writer\n");
  168. if (_cc->redisConfig != NULL) {
  169. if (_cc->redisConfig->clusterMode) {
  170. _statusWriter = std::make_shared<RedisStatusWriter>(_cluster, _myAddressStr);
  171. }
  172. else {
  173. _statusWriter = std::make_shared<RedisStatusWriter>(_redis, _myAddressStr);
  174. }
  175. }
  176. else {
  177. throw std::runtime_error("CentralDB: Redis status mode selected but no Redis configuration provided");
  178. }
  179. break;
  180. case STATUS_WRITER_MODE_BIGTABLE:
  181. fprintf(stderr, "Using BigTable for status writer\n");
  182. if (cc->bigTableConfig == NULL) {
  183. throw std::runtime_error(
  184. "CentralDB: BigTable status mode selected but no BigTable configuration provided");
  185. }
  186. if (cc->pubSubConfig == NULL) {
  187. throw std::runtime_error(
  188. "CentralDB: BigTable status mode selected but no PubSub configuration provided");
  189. }
  190. pubsubWriter = std::make_shared<PubSubWriter>(
  191. cc->pubSubConfig->project_id, "ctl-member-status-update-stream", _myAddressStr);
  192. _statusWriter = std::make_shared<BigTableStatusWriter>(
  193. cc->bigTableConfig->project_id, cc->bigTableConfig->instance_id, cc->bigTableConfig->table_id,
  194. pubsubWriter);
  195. break;
  196. case STATUS_WRITER_MODE_PGSQL:
  197. default:
  198. fprintf(stderr, "Using PostgreSQL for status writer\n");
  199. _statusWriter = std::make_shared<PostgresStatusWriter>(_pool);
  200. break;
  201. }
  202. for (int i = 0; i < ZT_CENTRAL_CONTROLLER_COMMIT_THREADS; ++i) {
  203. _commitThread[i] = std::thread(&CentralDB::commitThread, this);
  204. }
  205. _onlineNotificationThread = std::thread(&CentralDB::onlineNotificationThread, this);
  206. configureSmee();
  207. }
  208. CentralDB::~CentralDB()
  209. {
  210. if (_smee != NULL) {
  211. rustybits::smee_client_delete(_smee);
  212. _smee = NULL;
  213. }
  214. rustybits::shutdown_async_runtime();
  215. _run = 0;
  216. std::this_thread::sleep_for(std::chrono::milliseconds(100));
  217. _heartbeatThread.join();
  218. _commitQueue.stop();
  219. for (int i = 0; i < ZT_CENTRAL_CONTROLLER_COMMIT_THREADS; ++i) {
  220. _commitThread[i].join();
  221. }
  222. _onlineNotificationThread.join();
  223. }
  224. void CentralDB::configureSmee()
  225. {
  226. auto provider = opentelemetry::trace::Provider::GetTracerProvider();
  227. auto tracer = provider->GetTracer("CentralDB");
  228. auto span = tracer->StartSpan("CentralDB::configureSmee");
  229. auto scope = tracer->WithActiveSpan(span);
  230. const char* TEMPORAL_SCHEME = "ZT_TEMPORAL_SCHEME";
  231. const char* TEMPORAL_HOST = "ZT_TEMPORAL_HOST";
  232. const char* TEMPORAL_PORT = "ZT_TEMPORAL_PORT";
  233. const char* TEMPORAL_NAMESPACE = "ZT_TEMPORAL_NAMESPACE";
  234. const char* SMEE_TASK_QUEUE = "ZT_SMEE_TASK_QUEUE";
  235. const char* scheme = getenv(TEMPORAL_SCHEME);
  236. if (scheme == NULL) {
  237. scheme = "http";
  238. }
  239. const char* host = getenv(TEMPORAL_HOST);
  240. const char* port = getenv(TEMPORAL_PORT);
  241. const char* ns = getenv(TEMPORAL_NAMESPACE);
  242. const char* task_queue = getenv(SMEE_TASK_QUEUE);
  243. if (scheme != NULL && host != NULL && port != NULL && ns != NULL && task_queue != NULL) {
  244. fprintf(stderr, "creating smee client\n");
  245. std::string hostPort =
  246. std::string(scheme) + std::string("://") + std::string(host) + std::string(":") + std::string(port);
  247. this->_smee = rustybits::smee_client_new(hostPort.c_str(), ns, task_queue);
  248. }
  249. else {
  250. fprintf(stderr, "Smee client not configured\n");
  251. }
  252. }
  253. bool CentralDB::waitForReady()
  254. {
  255. while (_ready < 2) {
  256. _readyLock.lock();
  257. _readyLock.unlock();
  258. }
  259. return true;
  260. }
  261. bool CentralDB::isReady()
  262. {
  263. return ((_ready == 2) && (_connected));
  264. }
  265. bool CentralDB::save(nlohmann::json& record, bool notifyListeners)
  266. {
  267. auto provider = opentelemetry::trace::Provider::GetTracerProvider();
  268. auto tracer = provider->GetTracer("CentralDB");
  269. auto span = tracer->StartSpan("CentralDB::save");
  270. auto scope = tracer->WithActiveSpan(span);
  271. bool modified = false;
  272. try {
  273. if (! record.is_object()) {
  274. fprintf(stderr, "record is not an object?!?\n");
  275. return false;
  276. }
  277. const std::string objtype = record["objtype"];
  278. if (objtype == "network") {
  279. // fprintf(stderr, "network save\n");
  280. const uint64_t nwid = OSUtils::jsonIntHex(record["id"], 0ULL);
  281. if (nwid) {
  282. nlohmann::json old;
  283. get(nwid, old);
  284. if ((! old.is_object()) || (! _compareRecords(old, record))) {
  285. record["revision"] = OSUtils::jsonInt(record["revision"], 0ULL) + 1ULL;
  286. _commitQueue.post(std::pair<nlohmann::json, bool>(record, notifyListeners));
  287. modified = true;
  288. }
  289. }
  290. }
  291. else if (objtype == "member") {
  292. std::string networkId = record["nwid"];
  293. std::string memberId = record["id"];
  294. const uint64_t nwid = OSUtils::jsonIntHex(record["nwid"], 0ULL);
  295. const uint64_t id = OSUtils::jsonIntHex(record["id"], 0ULL);
  296. // fprintf(stderr, "member save %s-%s\n", networkId.c_str(), memberId.c_str());
  297. if ((id) && (nwid)) {
  298. nlohmann::json network, old;
  299. get(nwid, network, id, old);
  300. if ((! old.is_object()) || (! _compareRecords(old, record))) {
  301. // fprintf(stderr, "commit queue post\n");
  302. record["revision"] = OSUtils::jsonInt(record["revision"], 0ULL) + 1ULL;
  303. _commitQueue.post(std::pair<nlohmann::json, bool>(record, notifyListeners));
  304. modified = true;
  305. }
  306. else {
  307. // fprintf(stderr, "no change\n");
  308. }
  309. }
  310. }
  311. else {
  312. fprintf(stderr, "uhh waaat\n");
  313. }
  314. }
  315. catch (std::exception& e) {
  316. fprintf(stderr, "Error on PostgreSQL::save: %s\n", e.what());
  317. }
  318. catch (...) {
  319. fprintf(stderr, "Unknown error on PostgreSQL::save\n");
  320. }
  321. return modified;
  322. }
  323. void CentralDB::eraseNetwork(const uint64_t networkId)
  324. {
  325. auto provider = opentelemetry::trace::Provider::GetTracerProvider();
  326. auto tracer = provider->GetTracer("CentralDB");
  327. auto span = tracer->StartSpan("CentralDB::eraseNetwork");
  328. auto scope = tracer->WithActiveSpan(span);
  329. char networkIdStr[17];
  330. span->SetAttribute("network_id", Utils::hex(networkId, networkIdStr));
  331. fprintf(stderr, "PostgreSQL::eraseNetwork\n");
  332. char tmp2[24];
  333. waitForReady();
  334. Utils::hex(networkId, tmp2);
  335. std::pair<nlohmann::json, bool> tmp;
  336. tmp.first["id"] = tmp2;
  337. tmp.first["objtype"] = "_delete_network";
  338. tmp.second = true;
  339. _commitQueue.post(tmp);
  340. nlohmann::json nullJson;
  341. _networkChanged(tmp.first, nullJson, true);
  342. }
  343. void CentralDB::eraseMember(const uint64_t networkId, const uint64_t memberId)
  344. {
  345. auto provider = opentelemetry::trace::Provider::GetTracerProvider();
  346. auto tracer = provider->GetTracer("CentralDB");
  347. auto span = tracer->StartSpan("CentralDB::eraseMember");
  348. auto scope = tracer->WithActiveSpan(span);
  349. char networkIdStr[17];
  350. char memberIdStr[11];
  351. span->SetAttribute("network_id", Utils::hex(networkId, networkIdStr));
  352. span->SetAttribute("member_id", Utils::hex10(memberId, memberIdStr));
  353. fprintf(stderr, "PostgreSQL::eraseMember\n");
  354. char tmp2[24];
  355. waitForReady();
  356. std::pair<nlohmann::json, bool> tmp, nw;
  357. Utils::hex(networkId, tmp2);
  358. tmp.first["nwid"] = tmp2;
  359. Utils::hex(memberId, tmp2);
  360. tmp.first["id"] = tmp2;
  361. tmp.first["objtype"] = "_delete_member";
  362. tmp.second = true;
  363. _commitQueue.post(tmp);
  364. nlohmann::json nullJson;
  365. _memberChanged(tmp.first, nullJson, true);
  366. }
  367. void CentralDB::nodeIsOnline(
  368. const uint64_t networkId,
  369. const uint64_t memberId,
  370. const InetAddress& physicalAddress,
  371. const char* osArch)
  372. {
  373. auto provider = opentelemetry::trace::Provider::GetTracerProvider();
  374. auto tracer = provider->GetTracer("CentralDB");
  375. auto span = tracer->StartSpan("CentralDB::nodeIsOnline");
  376. auto scope = tracer->WithActiveSpan(span);
  377. char networkIdStr[17];
  378. char memberIdStr[11];
  379. char ipStr[INET6_ADDRSTRLEN];
  380. span->SetAttribute("network_id", Utils::hex(networkId, networkIdStr));
  381. span->SetAttribute("member_id", Utils::hex10(memberId, memberIdStr));
  382. span->SetAttribute("physical_address", physicalAddress.toString(ipStr));
  383. span->SetAttribute("os_arch", osArch);
  384. std::lock_guard<std::mutex> l(_lastOnline_l);
  385. NodeOnlineRecord& i = _lastOnline[std::pair<uint64_t, uint64_t>(networkId, memberId)];
  386. i.lastSeen = OSUtils::now();
  387. if (physicalAddress) {
  388. i.physicalAddress = physicalAddress;
  389. }
  390. i.osArch = std::string(osArch);
  391. }
  392. void CentralDB::nodeIsOnline(const uint64_t networkId, const uint64_t memberId, const InetAddress& physicalAddress)
  393. {
  394. this->nodeIsOnline(networkId, memberId, physicalAddress, "unknown/unknown");
  395. }
  396. AuthInfo CentralDB::getSSOAuthInfo(const nlohmann::json& member, const std::string& redirectURL)
  397. {
  398. if (_cc->ssoEnabled) {
  399. auto provider = opentelemetry::trace::Provider::GetTracerProvider();
  400. auto tracer = provider->GetTracer("CentralDB");
  401. auto span = tracer->StartSpan("CentralDB::getSSOAuthInfo");
  402. auto scope = tracer->WithActiveSpan(span);
  403. Metrics::db_get_sso_info++;
  404. // NONCE is just a random character string. no semantic meaning
  405. // state = HMAC SHA384 of Nonce based on shared sso key
  406. //
  407. // need nonce timeout in database? make sure it's used within X time
  408. // X is 5 minutes for now. Make configurable later?
  409. //
  410. // how do we tell when a nonce is used? if auth_expiration_time is set
  411. std::string networkId = member["nwid"];
  412. std::string memberId = member["id"];
  413. char authenticationURL[4096] = { 0 };
  414. AuthInfo info;
  415. info.enabled = true;
  416. // if (memberId == "a10dccea52" && networkId == "8056c2e21c24673d") {
  417. // fprintf(stderr, "invalid authinfo for grant's machine\n");
  418. // info.version=1;
  419. // return info;
  420. // }
  421. // fprintf(stderr, "PostgreSQL::updateMemberOnLoad: %s-%s\n", networkId.c_str(), memberId.c_str());
  422. std::shared_ptr<PostgresConnection> c;
  423. try {
  424. c = _pool->borrow();
  425. pqxx::work w(*c->c);
  426. char nonceBytes[16] = { 0 };
  427. std::string nonce = "";
  428. // check if the member exists first.
  429. pqxx::row count = w.exec_params1(
  430. "SELECT count(id) FROM ztc_member WHERE id = $1 AND network_id = $2 AND deleted = false", memberId,
  431. networkId);
  432. if (count[0].as<int>() == 1) {
  433. // get active nonce, if exists.
  434. pqxx::result r = w.exec_params(
  435. "SELECT nonce FROM ztc_sso_expiry "
  436. "WHERE network_id = $1 AND member_id = $2 "
  437. "AND ((NOW() AT TIME ZONE 'UTC') <= authentication_expiry_time) AND ((NOW() AT TIME ZONE 'UTC') <= "
  438. "nonce_expiration)",
  439. networkId, memberId);
  440. if (r.size() == 0) {
  441. // no active nonce.
  442. // find an unused nonce, if one exists.
  443. pqxx::result r = w.exec_params(
  444. "SELECT nonce FROM ztc_sso_expiry "
  445. "WHERE network_id = $1 AND member_id = $2 "
  446. "AND authentication_expiry_time IS NULL AND ((NOW() AT TIME ZONE 'UTC') <= nonce_expiration)",
  447. networkId, memberId);
  448. if (r.size() == 1) {
  449. // we have an existing nonce. Use it
  450. nonce = r.at(0)[0].as<std::string>();
  451. Utils::unhex(nonce.c_str(), nonceBytes, sizeof(nonceBytes));
  452. }
  453. else if (r.empty()) {
  454. // create a nonce
  455. Utils::getSecureRandom(nonceBytes, 16);
  456. char nonceBuf[64] = { 0 };
  457. Utils::hex(nonceBytes, sizeof(nonceBytes), nonceBuf);
  458. nonce = std::string(nonceBuf);
  459. pqxx::result ir = w.exec_params0(
  460. "INSERT INTO ztc_sso_expiry "
  461. "(nonce, nonce_expiration, network_id, member_id) VALUES "
  462. "($1, TO_TIMESTAMP($2::double precision/1000), $3, $4)",
  463. nonce, OSUtils::now() + 300000, networkId, memberId);
  464. w.commit();
  465. }
  466. else {
  467. // > 1 ?!? Thats an error!
  468. fprintf(stderr, "> 1 unused nonce!\n");
  469. exit(6);
  470. }
  471. }
  472. else if (r.size() == 1) {
  473. nonce = r.at(0)[0].as<std::string>();
  474. Utils::unhex(nonce.c_str(), nonceBytes, sizeof(nonceBytes));
  475. }
  476. else {
  477. // more than 1 nonce in use? Uhhh...
  478. fprintf(stderr, "> 1 nonce in use for network member?!?\n");
  479. exit(7);
  480. }
  481. r = w.exec_params(
  482. "SELECT oc.client_id, oc.authorization_endpoint, oc.issuer, oc.provider, oc.sso_impl_version "
  483. "FROM ztc_network AS n "
  484. "INNER JOIN ztc_org o "
  485. " ON o.owner_id = n.owner_id "
  486. "LEFT OUTER JOIN ztc_network_oidc_config noc "
  487. " ON noc.network_id = n.id "
  488. "LEFT OUTER JOIN ztc_oidc_config oc "
  489. " ON noc.client_id = oc.client_id AND oc.org_id = o.org_id "
  490. "WHERE n.id = $1 AND n.sso_enabled = true",
  491. networkId);
  492. std::string client_id = "";
  493. std::string authorization_endpoint = "";
  494. std::string issuer = "";
  495. std::string provider = "";
  496. uint64_t sso_version = 0;
  497. if (r.size() == 1) {
  498. client_id = r.at(0)[0].as<std::optional<std::string> >().value_or("");
  499. authorization_endpoint = r.at(0)[1].as<std::optional<std::string> >().value_or("");
  500. issuer = r.at(0)[2].as<std::optional<std::string> >().value_or("");
  501. provider = r.at(0)[3].as<std::optional<std::string> >().value_or("");
  502. sso_version = r.at(0)[4].as<std::optional<uint64_t> >().value_or(1);
  503. }
  504. else if (r.size() > 1) {
  505. fprintf(
  506. stderr, "ERROR: More than one auth endpoint for an organization?!?!? NetworkID: %s\n",
  507. networkId.c_str());
  508. }
  509. else {
  510. fprintf(stderr, "No client or auth endpoint?!?\n");
  511. }
  512. info.version = sso_version;
  513. // no catch all else because we don't actually care if no records exist here. just continue as normal.
  514. if ((! client_id.empty()) && (! authorization_endpoint.empty())) {
  515. uint8_t state[48];
  516. HMACSHA384(_ssoPsk, nonceBytes, sizeof(nonceBytes), state);
  517. char state_hex[256];
  518. Utils::hex(state, 48, state_hex);
  519. if (info.version == 0) {
  520. char url[2048] = { 0 };
  521. OSUtils::ztsnprintf(
  522. url, sizeof(authenticationURL),
  523. "%s?response_type=id_token&response_mode=form_post&scope=openid+email+profile&redirect_uri="
  524. "%s&nonce=%s&state=%s&client_id=%s",
  525. authorization_endpoint.c_str(), url_encode(redirectURL).c_str(), nonce.c_str(), state_hex,
  526. client_id.c_str());
  527. info.authenticationURL = std::string(url);
  528. }
  529. else if (info.version == 1) {
  530. info.ssoClientID = client_id;
  531. info.issuerURL = issuer;
  532. info.ssoProvider = provider;
  533. info.ssoNonce = nonce;
  534. info.ssoState = std::string(state_hex) + "_" + networkId;
  535. info.centralAuthURL = redirectURL;
  536. #ifdef ZT_DEBUG
  537. fprintf(
  538. stderr,
  539. "ssoClientID: %s\nissuerURL: %s\nssoNonce: %s\nssoState: %s\ncentralAuthURL: %s\nprovider: "
  540. "%s\n",
  541. info.ssoClientID.c_str(), info.issuerURL.c_str(), info.ssoNonce.c_str(),
  542. info.ssoState.c_str(), info.centralAuthURL.c_str(), provider.c_str());
  543. #endif
  544. }
  545. }
  546. else {
  547. fprintf(
  548. stderr, "client_id: %s\nauthorization_endpoint: %s\n", client_id.c_str(),
  549. authorization_endpoint.c_str());
  550. }
  551. }
  552. _pool->unborrow(c);
  553. }
  554. catch (std::exception& e) {
  555. span->SetStatus(opentelemetry::trace::StatusCode::kError, e.what());
  556. fprintf(stderr, "ERROR: Error updating member on load for network %s: %s\n", networkId.c_str(), e.what());
  557. }
  558. return info; // std::string(authenticationURL);
  559. }
  560. return AuthInfo();
  561. }
  562. void CentralDB::initializeNetworks()
  563. {
  564. auto provider = opentelemetry::trace::Provider::GetTracerProvider();
  565. auto tracer = provider->GetTracer("CentralDB");
  566. auto span = tracer->StartSpan("CentralDB::initializeNetworks");
  567. auto scope = tracer->WithActiveSpan(span);
  568. fprintf(stderr, "Initializing networks...\n");
  569. try {
  570. char qbuf[2048];
  571. sprintf(
  572. qbuf,
  573. "SELECT id, name, configuration , (EXTRACT(EPOCH FROM creation_time AT TIME ZONE 'UTC')*1000)::bigint, "
  574. "(EXTRACT(EPOCH FROM last_modified AT TIME ZONE 'UTC')*1000)::bigint, revision, frontend "
  575. "FROM networks_ctl WHERE controller_id = '%s'",
  576. _myAddressStr.c_str());
  577. auto c = _pool->borrow();
  578. pqxx::work w(*c->c);
  579. fprintf(stderr, "Load networks from psql...\n");
  580. auto stream = pqxx::stream_from::query(w, qbuf);
  581. std::tuple<
  582. std::string // network ID
  583. ,
  584. std::optional<std::string> // name
  585. ,
  586. std::string // configuration
  587. ,
  588. std::optional<uint64_t> // creation_time
  589. ,
  590. std::optional<uint64_t> // last_modified
  591. ,
  592. std::optional<uint64_t> // revision
  593. ,
  594. std::string // frontend
  595. >
  596. row;
  597. uint64_t count = 0;
  598. uint64_t total = 0;
  599. while (stream >> row) {
  600. auto start = std::chrono::high_resolution_clock::now();
  601. json empty;
  602. json config;
  603. initNetwork(config);
  604. std::string nwid = std::get<0>(row);
  605. std::string name = std::get<1>(row).value_or("");
  606. json cfgtmp = json::parse(std::get<2>(row));
  607. std::optional<uint64_t> created_at = std::get<3>(row);
  608. std::optional<uint64_t> last_modified = std::get<4>(row);
  609. std::optional<uint64_t> revision = std::get<5>(row);
  610. config["id"] = nwid;
  611. config["name"] = name;
  612. config["creationTime"] = created_at.value_or(0);
  613. config["lastModified"] = last_modified.value_or(0);
  614. config["revision"] = revision.value_or(0);
  615. config["capabilities"] = cfgtmp["capabilities"].is_array() ? cfgtmp["capabilities"] : json::array();
  616. config["enableBroadcast"] =
  617. cfgtmp["enableBroadcast"].is_boolean() ? cfgtmp["enableBroadcast"].get<bool>() : false;
  618. config["mtu"] = cfgtmp["mtu"].is_number() ? cfgtmp["mtu"].get<int32_t>() : 2800;
  619. config["multicastLimit"] =
  620. cfgtmp["multicastLimit"].is_number() ? cfgtmp["multicastLimit"].get<int32_t>() : 64;
  621. config["private"] = cfgtmp["private"].is_boolean() ? cfgtmp["private"].get<bool>() : true;
  622. config["remoteTraceLevel"] =
  623. cfgtmp["remoteTraceLevel"].is_number() ? cfgtmp["remoteTraceLevel"].get<int32_t>() : 0;
  624. config["remoteTraceTarget"] =
  625. cfgtmp["remoteTraceTarget"].is_string() ? cfgtmp["remoteTraceTarget"].get<std::string>() : "";
  626. config["revision"] = revision.value_or(0);
  627. config["rules"] = cfgtmp["rules"].is_array() ? cfgtmp["rules"] : json::array();
  628. config["tags"] = cfgtmp["tags"].is_array() ? cfgtmp["tags"] : json::array();
  629. if (cfgtmp["v4AssignMode"].is_object()) {
  630. config["v4AssignMode"] = cfgtmp["v4AssignMode"];
  631. }
  632. else {
  633. config["v4AssignMode"] = json::object();
  634. config["v4AssignMode"]["zt"] = true;
  635. }
  636. if (cfgtmp["v6AssignMode"].is_object()) {
  637. config["v6AssignMode"] = cfgtmp["v6AssignMode"];
  638. }
  639. else {
  640. config["v6AssignMode"] = json::object();
  641. config["v6AssignMode"]["zt"] = true;
  642. config["v6AssignMode"]["6plane"] = true;
  643. config["v6AssignMode"]["rfc4193"] = false;
  644. }
  645. config["ssoEnabled"] = cfgtmp["ssoEnabled"].is_boolean() ? cfgtmp["ssoEnabled"].get<bool>() : false;
  646. config["objtype"] = "network";
  647. config["routes"] = cfgtmp["routes"].is_array() ? cfgtmp["routes"] : json::array();
  648. config["clientId"] = cfgtmp["clientId"].is_string() ? cfgtmp["clientId"].get<std::string>() : "";
  649. config["authorizationEndpoint"] = cfgtmp["authorizationEndpoint"].is_string()
  650. ? cfgtmp["authorizationEndpoint"].get<std::string>()
  651. : nullptr;
  652. config["provider"] = cfgtmp["ssoProvider"].is_string() ? cfgtmp["ssoProvider"].get<std::string>() : "";
  653. if (! cfgtmp["dns"].is_object()) {
  654. cfgtmp["dns"] = json::object();
  655. cfgtmp["dns"]["domain"] = "";
  656. cfgtmp["dns"]["servers"] = json::array();
  657. }
  658. else {
  659. config["dns"] = cfgtmp["dns"];
  660. }
  661. config["ipAssignmentPools"] =
  662. cfgtmp["ipAssignmentPools"].is_array() ? cfgtmp["ipAssignmentPools"] : json::array();
  663. config["frontend"] = std::get<6>(row);
  664. Metrics::network_count++;
  665. _networkChanged(empty, config, false);
  666. auto end = std::chrono::high_resolution_clock::now();
  667. auto dur = std::chrono::duration_cast<std::chrono::microseconds>(end - start);
  668. ;
  669. total += dur.count();
  670. ++count;
  671. if (count > 0 && count % 10000 == 0) {
  672. fprintf(stderr, "Averaging %lu us per network\n", (total / count));
  673. }
  674. }
  675. w.commit();
  676. _pool->unborrow(c);
  677. fprintf(stderr, "done.\n");
  678. if (++this->_ready == 2) {
  679. if (_waitNoticePrinted) {
  680. fprintf(
  681. stderr, "[%s] NOTICE: %.10llx controller PostgreSQL data download complete." ZT_EOL_S, _timestr(),
  682. (unsigned long long)_myAddress.toInt());
  683. }
  684. _readyLock.unlock();
  685. }
  686. fprintf(stderr, "network init done\n");
  687. }
  688. catch (std::exception& e) {
  689. fprintf(stderr, "ERROR: Error initializing networks: %s\n", e.what());
  690. span->SetStatus(opentelemetry::trace::StatusCode::kError, e.what());
  691. std::this_thread::sleep_for(std::chrono::milliseconds(5000));
  692. exit(-1);
  693. }
  694. }
  695. void CentralDB::initializeMembers()
  696. {
  697. auto provider = opentelemetry::trace::Provider::GetTracerProvider();
  698. auto tracer = provider->GetTracer("CentralDB");
  699. auto span = tracer->StartSpan("CentralDB::initializeMembers");
  700. auto scope = tracer->WithActiveSpan(span);
  701. std::string memberId;
  702. std::string networkId;
  703. try {
  704. std::unordered_map<std::string, std::string> networkMembers;
  705. fprintf(stderr, "Initializing Members...\n");
  706. std::string setKeyBase = "network-nodes-all:{" + _myAddressStr + "}:";
  707. if (_redisMemberStatus) {
  708. fprintf(stderr, "Initialize Redis for members...\n");
  709. std::unique_lock<std::shared_mutex> l(_networks_l);
  710. std::unordered_set<std::string> deletes;
  711. for (auto it : _networks) {
  712. uint64_t nwid_i = it.first;
  713. char nwidTmp[64] = { 0 };
  714. OSUtils::ztsnprintf(nwidTmp, sizeof(nwidTmp), "%.16llx", nwid_i);
  715. std::string nwid(nwidTmp);
  716. std::string key = setKeyBase + nwid;
  717. deletes.insert(key);
  718. }
  719. if (! deletes.empty()) {
  720. try {
  721. if (_cc->redisConfig->clusterMode) {
  722. auto tx = _cluster->transaction(_myAddressStr, true, false);
  723. for (std::string k : deletes) {
  724. tx.del(k);
  725. }
  726. tx.exec();
  727. }
  728. else {
  729. auto tx = _redis->transaction(true, false);
  730. for (std::string k : deletes) {
  731. tx.del(k);
  732. }
  733. tx.exec();
  734. }
  735. }
  736. catch (sw::redis::Error& e) {
  737. // ignore
  738. }
  739. }
  740. }
  741. char qbuf[2048];
  742. sprintf(
  743. qbuf,
  744. "SELECT nm.device_id, nm.network_id, nm.authorized, nm.active_bridge, nm.ip_assignments, "
  745. "nm.no_auto_assign_ips, "
  746. "nm.sso_exempt, (EXTRACT(EPOCH FROM nm.authentication_expiry_time AT TIME ZONE 'UTC')*1000)::bigint, "
  747. "(EXTRACT(EPOCH FROM nm.creation_time AT TIME ZONE 'UTC')*1000)::bigint, nm.identity, "
  748. "(EXTRACT(EPOCH FROM nm.last_authorized_time AT TIME ZONE 'UTC')*1000)::bigint, "
  749. "(EXTRACT(EPOCH FROM nm.last_deauthorized_time AT TIME ZONE 'UTC')*1000)::bigint, "
  750. "nm.remote_trace_level, nm.remote_trace_target, nm.revision, nm.capabilities, nm.tags, "
  751. "nm.frontend "
  752. "FROM network_memberships_ctl nm "
  753. "INNER JOIN networks_ctl n "
  754. " ON nm.network_id = n.id "
  755. "WHERE n.controller_id = '%s'",
  756. _myAddressStr.c_str());
  757. auto c = _pool->borrow();
  758. pqxx::work w(*c->c);
  759. fprintf(stderr, "Load members from psql...\n");
  760. auto stream = pqxx::stream_from::query(w, qbuf);
  761. std::tuple<
  762. std::string // device ID
  763. ,
  764. std::string // network ID
  765. ,
  766. bool // authorized
  767. ,
  768. std::optional<bool> // active_bridge
  769. ,
  770. std::optional<std::string> // ip_assignments
  771. ,
  772. std::optional<bool> // no_auto_assign_ips
  773. ,
  774. std::optional<bool> // sso_exempt
  775. ,
  776. std::optional<uint64_t> // authentication_expiry_time
  777. ,
  778. std::optional<uint64_t> // creation_time
  779. ,
  780. std::optional<std::string> // identity
  781. ,
  782. std::optional<uint64_t> // last_authorized_time
  783. ,
  784. std::optional<uint64_t> // last_deauthorized_time
  785. ,
  786. std::optional<int32_t> // remote_trace_level
  787. ,
  788. std::optional<std::string> // remote_trace_target
  789. ,
  790. std::optional<uint64_t> // revision
  791. ,
  792. std::optional<std::string> // capabilities
  793. ,
  794. std::optional<std::string> // tags
  795. ,
  796. std::string // frontend
  797. >
  798. row;
  799. auto tmp = std::chrono::high_resolution_clock::now();
  800. uint64_t count = 0;
  801. uint64_t total = 0;
  802. while (stream >> row) {
  803. auto start = std::chrono::high_resolution_clock::now();
  804. json empty;
  805. json config;
  806. initMember(config);
  807. memberId = std::get<0>(row);
  808. networkId = std::get<1>(row);
  809. bool authorized = std::get<2>(row);
  810. std::optional<bool> active_bridge = std::get<3>(row);
  811. std::string ip_assignments = std::get<4>(row).value_or("");
  812. std::optional<bool> no_auto_assign_ips = std::get<5>(row);
  813. std::optional<bool> sso_exempt = std::get<6>(row);
  814. std::optional<uint64_t> authentication_expiry_time = std::get<7>(row);
  815. std::optional<uint64_t> creation_time = std::get<8>(row);
  816. std::optional<std::string> identity = std::get<9>(row);
  817. std::optional<uint64_t> last_authorized_time = std::get<10>(row);
  818. std::optional<uint64_t> last_deauthorized_time = std::get<11>(row);
  819. std::optional<int32_t> remote_trace_level = std::get<12>(row);
  820. std::optional<std::string> remote_trace_target = std::get<13>(row);
  821. std::optional<uint64_t> revision = std::get<14>(row);
  822. std::optional<std::string> capabilities = std::get<15>(row);
  823. std::optional<std::string> tags = std::get<16>(row);
  824. networkMembers.insert(std::pair<std::string, std::string>(setKeyBase + networkId, memberId));
  825. config["objtype"] = "member";
  826. config["id"] = memberId;
  827. config["address"] = identity.value_or("");
  828. config["nwid"] = networkId;
  829. config["authorized"] = authorized;
  830. config["activeBridge"] = active_bridge.value_or(false);
  831. config["ipAssignments"] = json::array();
  832. if (ip_assignments != "{}") {
  833. std::string tmp = ip_assignments.substr(1, ip_assignments.length() - 2);
  834. std::vector<std::string> addrs = split(tmp, ',');
  835. for (auto it = addrs.begin(); it != addrs.end(); ++it) {
  836. config["ipAssignments"].push_back(*it);
  837. }
  838. }
  839. config["capabilities"] = json::parse(capabilities.value_or("[]"));
  840. config["creationTime"] = creation_time.value_or(0);
  841. config["lastAuthorizedTime"] = last_authorized_time.value_or(0);
  842. config["lastDeauthorizedTime"] = last_deauthorized_time.value_or(0);
  843. config["noAutoAssignIPs"] = no_auto_assign_ips.value_or(false);
  844. config["remoteTraceLevel"] = remote_trace_level.value_or(0);
  845. config["remoteTraceTarget"] = remote_trace_target.value_or(nullptr);
  846. config["revision"] = revision.value_or(0);
  847. config["ssoExempt"] = sso_exempt.value_or(false);
  848. config["authenticationExpiryTime"] = authentication_expiry_time.value_or(0);
  849. config["tags"] = json::parse(tags.value_or("[]"));
  850. config["ipAssignments"] = json::array();
  851. config["frontend"] = std::get<17>(row);
  852. Metrics::member_count++;
  853. _memberChanged(empty, config, false);
  854. memberId = "";
  855. networkId = "";
  856. auto end = std::chrono::high_resolution_clock::now();
  857. auto dur = std::chrono::duration_cast<std::chrono::microseconds>(end - start);
  858. total += dur.count();
  859. ++count;
  860. if (count > 0 && count % 10000 == 0) {
  861. fprintf(stderr, "Averaging %llu us per member\n", (total / count));
  862. }
  863. }
  864. if (count > 0) {
  865. fprintf(stderr, "Took %llu us per member to load\n", (total / count));
  866. }
  867. stream.complete();
  868. w.commit();
  869. _pool->unborrow(c);
  870. fprintf(stderr, "done.\n");
  871. if (_listenerMode == LISTENER_MODE_REDIS)
  872. if (! networkMembers.empty()) {
  873. if (_redisMemberStatus) {
  874. fprintf(stderr, "Load member data into redis...\n");
  875. if (_cc->redisConfig->clusterMode) {
  876. auto tx = _cluster->transaction(_myAddressStr, true, false);
  877. uint64_t count = 0;
  878. for (auto it : networkMembers) {
  879. tx.sadd(it.first, it.second);
  880. if (++count % 30000 == 0) {
  881. tx.exec();
  882. tx = _cluster->transaction(_myAddressStr, true, false);
  883. }
  884. }
  885. tx.exec();
  886. }
  887. else {
  888. auto tx = _redis->transaction(true, false);
  889. uint64_t count = 0;
  890. for (auto it : networkMembers) {
  891. tx.sadd(it.first, it.second);
  892. if (++count % 30000 == 0) {
  893. tx.exec();
  894. tx = _redis->transaction(true, false);
  895. }
  896. }
  897. tx.exec();
  898. }
  899. fprintf(stderr, "done.\n");
  900. }
  901. }
  902. fprintf(stderr, "Done loading members...\n");
  903. if (++this->_ready == 2) {
  904. if (_waitNoticePrinted) {
  905. fprintf(
  906. stderr, "[%s] NOTICE: %.10llx controller PostgreSQL data download complete." ZT_EOL_S, _timestr(),
  907. (unsigned long long)_myAddress.toInt());
  908. }
  909. _readyLock.unlock();
  910. }
  911. }
  912. catch (sw::redis::Error& e) {
  913. span->SetStatus(opentelemetry::trace::StatusCode::kError, e.what());
  914. fprintf(stderr, "ERROR: Error initializing members (redis): %s\n", e.what());
  915. exit(-1);
  916. }
  917. catch (std::exception& e) {
  918. span->SetStatus(opentelemetry::trace::StatusCode::kError, e.what());
  919. fprintf(stderr, "ERROR: Error initializing member: %s-%s %s\n", networkId.c_str(), memberId.c_str(), e.what());
  920. exit(-1);
  921. }
  922. }
  923. void CentralDB::heartbeat()
  924. {
  925. char publicId[1024];
  926. char hostnameTmp[1024];
  927. _myId.toString(false, publicId);
  928. if (gethostname(hostnameTmp, sizeof(hostnameTmp)) != 0) {
  929. hostnameTmp[0] = (char)0;
  930. }
  931. else {
  932. for (int i = 0; i < (int)sizeof(hostnameTmp); ++i) {
  933. if ((hostnameTmp[i] == '.') || (hostnameTmp[i] == 0)) {
  934. hostnameTmp[i] = (char)0;
  935. break;
  936. }
  937. }
  938. }
  939. const char* controllerId = _myAddressStr.c_str();
  940. const char* publicIdentity = publicId;
  941. const char* hostname = hostnameTmp;
  942. while (_run == 1) {
  943. auto provider = opentelemetry::trace::Provider::GetTracerProvider();
  944. auto tracer = provider->GetTracer("CentralDB");
  945. auto span = tracer->StartSpan("CentralDB::heartbeat");
  946. auto scope = tracer->WithActiveSpan(span);
  947. // fprintf(stderr, "%s: heartbeat\n", controllerId);
  948. auto c = _pool->borrow();
  949. int64_t ts = OSUtils::now();
  950. if (c->c) {
  951. std::string major = std::to_string(ZEROTIER_ONE_VERSION_MAJOR);
  952. std::string minor = std::to_string(ZEROTIER_ONE_VERSION_MINOR);
  953. std::string rev = std::to_string(ZEROTIER_ONE_VERSION_REVISION);
  954. std::string version = major + "." + minor + "." + rev;
  955. std::string versionStr = "v" + version;
  956. try {
  957. pqxx::work w { *c->c };
  958. w.exec_params0(
  959. "INSERT INTO controllers_ctl (id, hostname, last_heartbeat, public_identity, version) VALUES "
  960. "($1, $2, TO_TIMESTAMP($3::double precision/1000), $4, $5) "
  961. "ON CONFLICT (id) DO UPDATE SET hostname = EXCLUDED.hostname, last_heartbeat = "
  962. "EXCLUDED.last_heartbeat, "
  963. "public_identity = EXCLUDED.public_identity, version = EXCLUDED.version",
  964. controllerId, hostname, ts, publicIdentity, versionStr);
  965. w.commit();
  966. }
  967. catch (std::exception& e) {
  968. fprintf(stderr, "%s: Heartbeat update failed: %s\n", controllerId, e.what());
  969. span->End();
  970. std::this_thread::sleep_for(std::chrono::milliseconds(1000));
  971. continue;
  972. }
  973. }
  974. _pool->unborrow(c);
  975. try {
  976. if (_listenerMode == LISTENER_MODE_REDIS && _redisMemberStatus) {
  977. if (_cc->redisConfig->clusterMode) {
  978. _cluster->zadd("controllers", "controllerId", ts);
  979. }
  980. else {
  981. _redis->zadd("controllers", "controllerId", ts);
  982. }
  983. }
  984. }
  985. catch (sw::redis::Error& e) {
  986. fprintf(stderr, "ERROR: Redis error in heartbeat thread: %s\n", e.what());
  987. }
  988. span->End();
  989. std::this_thread::sleep_for(std::chrono::milliseconds(1000));
  990. }
  991. fprintf(stderr, "Exited heartbeat thread\n");
  992. }
  993. void CentralDB::commitThread()
  994. {
  995. fprintf(stderr, "%s: commitThread start\n", _myAddressStr.c_str());
  996. std::pair<nlohmann::json, bool> qitem;
  997. while (_commitQueue.get(qitem) & (_run == 1)) {
  998. auto provider = opentelemetry::trace::Provider::GetTracerProvider();
  999. auto tracer = provider->GetTracer("CentralDB");
  1000. auto span = tracer->StartSpan("CentralDB::commitThread");
  1001. auto scope = tracer->WithActiveSpan(span);
  1002. // fprintf(stderr, "commitThread tick\n");
  1003. if (! qitem.first.is_object()) {
  1004. fprintf(stderr, "not an object\n");
  1005. continue;
  1006. }
  1007. std::shared_ptr<PostgresConnection> c;
  1008. try {
  1009. c = _pool->borrow();
  1010. }
  1011. catch (std::exception& e) {
  1012. fprintf(stderr, "ERROR: %s\n", e.what());
  1013. continue;
  1014. }
  1015. if (! c) {
  1016. fprintf(stderr, "Error getting database connection\n");
  1017. continue;
  1018. }
  1019. Metrics::pgsql_commit_ticks++;
  1020. try {
  1021. nlohmann::json& config = (qitem.first);
  1022. const std::string objtype = config["objtype"];
  1023. if (objtype == "member") {
  1024. auto mspan = tracer->StartSpan("CentralDB::commitThread::member");
  1025. auto mscope = tracer->WithActiveSpan(mspan);
  1026. // fprintf(stderr, "%s: commitThread: member\n", _myAddressStr.c_str());
  1027. std::string memberId;
  1028. std::string networkId;
  1029. try {
  1030. pqxx::work w(*c->c);
  1031. memberId = config["id"];
  1032. networkId = config["nwid"];
  1033. std::string target = "NULL";
  1034. if (! config["remoteTraceTarget"].is_null()) {
  1035. target = config["remoteTraceTarget"];
  1036. }
  1037. pqxx::row nwrow = w.exec_params1("SELECT COUNT(id) FROM networks_ctl WHERE id = $1", networkId);
  1038. int nwcount = nwrow[0].as<int>();
  1039. if (nwcount != 1) {
  1040. fprintf(stderr, "network %s does not exist. skipping member upsert\n", networkId.c_str());
  1041. w.abort();
  1042. _pool->unborrow(c);
  1043. continue;
  1044. }
  1045. pqxx::row mrow = w.exec_params1(
  1046. "SELECT COUNT(device_id) FROM network_memberships_ctl WHERE device_id = $1 AND network_id = $2",
  1047. memberId, networkId);
  1048. int membercount = mrow[0].as<int>();
  1049. bool isNewMember = (membercount == 0);
  1050. pqxx::result res = w.exec_params0(
  1051. "INSERT INTO network_memberships_ctl (device_id, network_id, authorized, active_bridge, "
  1052. "ip_assignments, "
  1053. "no_auto_assign_ips, sso_exempt, authentication_expiry_time, capabilities, creation_time, "
  1054. "identity, last_authorized_time, last_deauthorized_time, "
  1055. "remote_trace_level, remote_trace_target, revision, tags, version_major, version_minor, "
  1056. "version_revision, version_protocol) "
  1057. "VALUES ($1, $2, $3, $4, $5, $6, $7, TO_TIMESTAMP($8::double precision/1000), $9, "
  1058. "TO_TIMESTAMP($10::double precision/1000), $11, TO_TIMESTAMP($12::double precision/1000), "
  1059. "TO_TIMESTAMP($13::double precision/1000), $14, $15, $16, $17, $18, $19, $20, $21) "
  1060. "ON CONFLICT (device_id, network_id) DO UPDATE SET "
  1061. "authorized = EXCLUDED.authorized, active_bridge = EXCLUDED.active_bridge, "
  1062. "ip_assignments = EXCLUDED.ip_assignments, no_auto_assign_ips = EXCLUDED.no_auto_assign_ips, "
  1063. "sso_exempt = EXCLUDED.sso_exempt, authentication_expiry_time = "
  1064. "EXCLUDED.authentication_expiry_time, "
  1065. "capabilities = EXCLUDED.capabilities, creation_time = EXCLUDED.creation_time, "
  1066. "identity = EXCLUDED.identity, last_authorized_time = EXCLUDED.last_authorized_time, "
  1067. "last_deauthorized_time = EXCLUDED.last_deauthorized_time, "
  1068. "remote_trace_level = EXCLUDED.remote_trace_level, remote_trace_target = "
  1069. "EXCLUDED.remote_trace_target, "
  1070. "revision = EXCLUDED.revision, tags = EXCLUDED.tags, version_major = EXCLUDED.version_major, "
  1071. "version_minor = EXCLUDED.version_minor, version_revision = EXCLUDED.version_revision, "
  1072. "version_protocol = EXCLUDED.version_protocol",
  1073. memberId, networkId, (bool)config["authorized"], (bool)config["activeBridge"],
  1074. config["ipAssignments"].get<std::vector<std::string> >(), (bool)config["noAutoAssignIps"],
  1075. (bool)config["ssoExempt"], (uint64_t)config["authenticationExpiryTime"],
  1076. OSUtils::jsonDump(config["capabilities"], -1), (uint64_t)config["creationTime"],
  1077. OSUtils::jsonString(config["identity"], ""), (uint64_t)config["lastAuthorizedTime"],
  1078. (uint64_t)config["lastDeauthorizedTime"], (int)config["remoteTraceLevel"], target,
  1079. (uint64_t)config["revision"], OSUtils::jsonDump(config["tags"], -1), (int)config["vMajor"],
  1080. (int)config["vMinor"], (int)config["vRev"], (int)config["vProto"]);
  1081. w.commit();
  1082. if (_smee != NULL && isNewMember) {
  1083. // TODO: Smee Notifications for New Members
  1084. // pqxx::row row = w.exec_params1(
  1085. // "SELECT "
  1086. // " count(h.hook_id) "
  1087. // "FROM "
  1088. // " ztc_hook h "
  1089. // " INNER JOIN ztc_org o ON o.org_id = h.org_id "
  1090. // " INNER JOIN ztc_network n ON n.owner_id = o.owner_id "
  1091. // " WHERE "
  1092. // "n.id = $1 ",
  1093. // networkId);
  1094. // int64_t hookCount = row[0].as<int64_t>();
  1095. // if (hookCount > 0) {
  1096. // notifyNewMember(networkId, memberId);
  1097. // }
  1098. }
  1099. const uint64_t nwidInt = OSUtils::jsonIntHex(config["nwid"], 0ULL);
  1100. const uint64_t memberidInt = OSUtils::jsonIntHex(config["id"], 0ULL);
  1101. if (nwidInt && memberidInt) {
  1102. nlohmann::json nwOrig;
  1103. nlohmann::json memOrig;
  1104. nlohmann::json memNew(config);
  1105. get(nwidInt, nwOrig, memberidInt, memOrig);
  1106. _memberChanged(memOrig, memNew, qitem.second);
  1107. }
  1108. else {
  1109. fprintf(
  1110. stderr, "%s: Can't notify of change. Error parsing nwid or memberid: %llu-%llu\n",
  1111. _myAddressStr.c_str(), (unsigned long long)nwidInt, (unsigned long long)memberidInt);
  1112. }
  1113. }
  1114. catch (std::exception& e) {
  1115. fprintf(
  1116. stderr, "%s ERROR: Error updating member %s-%s: %s\n", _myAddressStr.c_str(), networkId.c_str(),
  1117. memberId.c_str(), e.what());
  1118. mspan->SetStatus(opentelemetry::trace::StatusCode::kError, e.what());
  1119. }
  1120. }
  1121. else if (objtype == "network") {
  1122. auto nspan = tracer->StartSpan("CentralDB::commitThread::network");
  1123. auto nscope = tracer->WithActiveSpan(nspan);
  1124. try {
  1125. // fprintf(stderr, "%s: commitThread: network\n", _myAddressStr.c_str());
  1126. pqxx::work w(*c->c);
  1127. std::string id = config["id"];
  1128. pqxx::result res = w.exec_params0(
  1129. "INSERT INTO networks_ctl (id, name, configuration, controller_id, revision) "
  1130. "VALUES ($1, $2, $3, $4, $5) "
  1131. "ON CONFLICT (id) DO UPDATE SET "
  1132. "name = EXCLUDED.name, configuration = EXCLUDED.configuration, revision = EXCLUDED.revision+1",
  1133. id, OSUtils::jsonString(config["name"], ""), OSUtils::jsonDump(config, -1), _myAddressStr,
  1134. ((uint64_t)config["revision"]));
  1135. w.commit();
  1136. // res = w.exec_params0("DELETE FROM ztc_network_assignment_pool WHERE network_id = $1", 0);
  1137. // auto pool = config["ipAssignmentPools"];
  1138. // bool err = false;
  1139. // for (auto i = pool.begin(); i != pool.end(); ++i) {
  1140. // std::string start = (*i)["ipRangeStart"];
  1141. // std::string end = (*i)["ipRangeEnd"];
  1142. // res = w.exec_params0(
  1143. // "INSERT INTO ztc_network_assignment_pool (network_id, ip_range_start, ip_range_end) "
  1144. // "VALUES ($1, $2, $3)",
  1145. // id, start, end);
  1146. // }
  1147. const uint64_t nwidInt = OSUtils::jsonIntHex(config["nwid"], 0ULL);
  1148. if (nwidInt) {
  1149. nlohmann::json nwOrig;
  1150. nlohmann::json nwNew(config);
  1151. get(nwidInt, nwOrig);
  1152. _networkChanged(nwOrig, nwNew, qitem.second);
  1153. }
  1154. else {
  1155. fprintf(
  1156. stderr, "%s: Can't notify network changed: %llu\n", _myAddressStr.c_str(),
  1157. (unsigned long long)nwidInt);
  1158. }
  1159. }
  1160. catch (std::exception& e) {
  1161. nspan->SetStatus(opentelemetry::trace::StatusCode::kError, e.what());
  1162. fprintf(stderr, "%s ERROR: Error updating network: %s\n", _myAddressStr.c_str(), e.what());
  1163. }
  1164. if (_listenerMode == LISTENER_MODE_REDIS && _redisMemberStatus) {
  1165. try {
  1166. std::string id = config["id"];
  1167. std::string controllerId = _myAddressStr.c_str();
  1168. std::string key = "networks:{" + controllerId + "}";
  1169. if (_cc->redisConfig->clusterMode) {
  1170. _cluster->sadd(key, id);
  1171. }
  1172. else {
  1173. _redis->sadd(key, id);
  1174. }
  1175. }
  1176. catch (sw::redis::Error& e) {
  1177. nspan->SetStatus(opentelemetry::trace::StatusCode::kError, e.what());
  1178. fprintf(stderr, "ERROR: Error adding network to Redis: %s\n", e.what());
  1179. }
  1180. }
  1181. }
  1182. else if (objtype == "_delete_network") {
  1183. auto dspan = tracer->StartSpan("CentralDB::commitThread::_delete_network");
  1184. auto dscope = tracer->WithActiveSpan(dspan);
  1185. // fprintf(stderr, "%s: commitThread: delete network\n", _myAddressStr.c_str());
  1186. try {
  1187. pqxx::work w(*c->c);
  1188. std::string networkId = config["id"];
  1189. fprintf(stderr, "Deleting network %s\n", networkId.c_str());
  1190. w.exec_params0("DELETE FROM network_memberships_ctl WHERE network_id = $1", networkId);
  1191. w.exec_params0("DELETE FROM networks_ctl WHERE id = $1", networkId);
  1192. w.commit();
  1193. uint64_t nwidInt = OSUtils::jsonIntHex(config["nwid"], 0ULL);
  1194. json oldConfig;
  1195. get(nwidInt, oldConfig);
  1196. json empty;
  1197. _networkChanged(oldConfig, empty, qitem.second);
  1198. }
  1199. catch (std::exception& e) {
  1200. dspan->SetStatus(opentelemetry::trace::StatusCode::kError, e.what());
  1201. fprintf(stderr, "%s ERROR: Error deleting network: %s\n", _myAddressStr.c_str(), e.what());
  1202. }
  1203. if (_listenerMode == LISTENER_MODE_REDIS && _redisMemberStatus) {
  1204. try {
  1205. std::string id = config["id"];
  1206. std::string controllerId = _myAddressStr.c_str();
  1207. std::string key = "networks:{" + controllerId + "}";
  1208. if (_cc->redisConfig->clusterMode) {
  1209. _cluster->srem(key, id);
  1210. _cluster->del("network-nodes-online:{" + controllerId + "}:" + id);
  1211. }
  1212. else {
  1213. _redis->srem(key, id);
  1214. _redis->del("network-nodes-online:{" + controllerId + "}:" + id);
  1215. }
  1216. }
  1217. catch (sw::redis::Error& e) {
  1218. dspan->SetStatus(opentelemetry::trace::StatusCode::kError, e.what());
  1219. fprintf(stderr, "ERROR: Error adding network to Redis: %s\n", e.what());
  1220. }
  1221. }
  1222. }
  1223. else if (objtype == "_delete_member") {
  1224. auto mspan = tracer->StartSpan("CentralDB::commitThread::_delete_member");
  1225. auto mscope = tracer->WithActiveSpan(mspan);
  1226. // fprintf(stderr, "%s commitThread: delete member\n", _myAddressStr.c_str());
  1227. try {
  1228. pqxx::work w(*c->c);
  1229. std::string memberId = config["id"];
  1230. std::string networkId = config["nwid"];
  1231. pqxx::result res = w.exec_params0(
  1232. "DELETE FROM network_memberships_ctl WHERE device_id = $1 AND network_id = $2", memberId,
  1233. networkId);
  1234. w.commit();
  1235. uint64_t nwidInt = OSUtils::jsonIntHex(config["nwid"], 0ULL);
  1236. uint64_t memberidInt = OSUtils::jsonIntHex(config["id"], 0ULL);
  1237. nlohmann::json networkConfig;
  1238. nlohmann::json oldConfig;
  1239. get(nwidInt, networkConfig, memberidInt, oldConfig);
  1240. json empty;
  1241. _memberChanged(oldConfig, empty, qitem.second);
  1242. }
  1243. catch (std::exception& e) {
  1244. mspan->SetStatus(opentelemetry::trace::StatusCode::kError, e.what());
  1245. fprintf(stderr, "%s ERROR: Error deleting member: %s\n", _myAddressStr.c_str(), e.what());
  1246. }
  1247. if (_listenerMode == LISTENER_MODE_REDIS && _redisMemberStatus) {
  1248. try {
  1249. std::string memberId = config["id"];
  1250. std::string networkId = config["nwid"];
  1251. std::string controllerId = _myAddressStr.c_str();
  1252. std::string key = "network-nodes-all:{" + controllerId + "}:" + networkId;
  1253. if (_cc->redisConfig->clusterMode) {
  1254. _cluster->srem(key, memberId);
  1255. _cluster->del("member:{" + controllerId + "}:" + networkId + ":" + memberId);
  1256. }
  1257. else {
  1258. _redis->srem(key, memberId);
  1259. _redis->del("member:{" + controllerId + "}:" + networkId + ":" + memberId);
  1260. }
  1261. }
  1262. catch (sw::redis::Error& e) {
  1263. mspan->SetStatus(opentelemetry::trace::StatusCode::kError, e.what());
  1264. fprintf(stderr, "ERROR: Error deleting member from Redis: %s\n", e.what());
  1265. }
  1266. }
  1267. }
  1268. else {
  1269. fprintf(stderr, "%s ERROR: unknown objtype\n", _myAddressStr.c_str());
  1270. }
  1271. }
  1272. catch (std::exception& e) {
  1273. span->SetStatus(opentelemetry::trace::StatusCode::kError, e.what());
  1274. fprintf(stderr, "%s ERROR: Error getting objtype: %s\n", _myAddressStr.c_str(), e.what());
  1275. }
  1276. _pool->unborrow(c);
  1277. c.reset();
  1278. }
  1279. fprintf(stderr, "%s commitThread finished\n", _myAddressStr.c_str());
  1280. }
  1281. void CentralDB::notifyNewMember(const std::string& networkID, const std::string& memberID)
  1282. {
  1283. auto provider = opentelemetry::trace::Provider::GetTracerProvider();
  1284. auto tracer = provider->GetTracer("CentralDB");
  1285. auto span = tracer->StartSpan("CentralDB::notifyNewMember");
  1286. auto scope = tracer->WithActiveSpan(span);
  1287. rustybits::smee_client_notify_network_joined(_smee, networkID.c_str(), memberID.c_str());
  1288. }
  1289. void CentralDB::onlineNotificationThread()
  1290. {
  1291. waitForReady();
  1292. while (_run == 1) {
  1293. auto provider = opentelemetry::trace::Provider::GetTracerProvider();
  1294. auto tracer = provider->GetTracer("CentralDB");
  1295. auto span = tracer->StartSpan("CentralDB::onlineNotificationThread");
  1296. auto scope = tracer->WithActiveSpan(span);
  1297. try {
  1298. std::unordered_map<std::pair<uint64_t, uint64_t>, NodeOnlineRecord, _PairHasher> lastOnline;
  1299. {
  1300. std::lock_guard<std::mutex> l(_lastOnline_l);
  1301. lastOnline.swap(_lastOnline);
  1302. }
  1303. uint64_t updateCount = 0;
  1304. auto c = _pool->borrow();
  1305. pqxx::work w(*c->c);
  1306. for (auto i = lastOnline.begin(); i != lastOnline.end(); ++i) {
  1307. updateCount += 1;
  1308. uint64_t nwid_i = i->first.first;
  1309. char nwidTmp[64];
  1310. char memTmp[64];
  1311. char ipTmp[64];
  1312. OSUtils::ztsnprintf(nwidTmp, sizeof(nwidTmp), "%.16llx", nwid_i);
  1313. OSUtils::ztsnprintf(memTmp, sizeof(memTmp), "%.10llx", i->first.second);
  1314. nlohmann::json network, member;
  1315. if (! get(nwid_i, network, i->first.second, member)) {
  1316. continue; // skip non existent networks/members
  1317. }
  1318. std::string networkId(nwidTmp);
  1319. std::string memberId(memTmp);
  1320. try {
  1321. // check if the member exists first.
  1322. //
  1323. // exec_params1 will throw pqxx::unexpected_rows if not exactly one row is returned. If that's the
  1324. // case, skip this record and move on.
  1325. pqxx::row r = w.exec_params1(
  1326. "SELECT device_id, network_id FROM network_memberships_ctl WHERE network_id = $1 AND device_id "
  1327. "= $2",
  1328. networkId, memberId);
  1329. }
  1330. catch (pqxx::unexpected_rows& e) {
  1331. continue;
  1332. }
  1333. int64_t ts = i->second.lastSeen;
  1334. std::string ipAddr = i->second.physicalAddress.toIpString(ipTmp);
  1335. std::string timestamp = std::to_string(ts);
  1336. std::string osArch = i->second.osArch;
  1337. std::vector<std::string> osArchSplit = split(osArch, '/');
  1338. std::string os = "unknown";
  1339. std::string arch = "unknown";
  1340. std::string frontend = member["frontend"].get<std::string>();
  1341. std::string vMajor = OSUtils::jsonString(member["vMajor"], "0");
  1342. std::string vMinor = OSUtils::jsonString(member["vMinor"], "0");
  1343. std::string vRev = OSUtils::jsonString(member["vRev"], "0");
  1344. std::string version = "v" + vMajor + "." + vMinor + "." + vRev;
  1345. if (osArchSplit.size() == 2) {
  1346. os = osArchSplit[0];
  1347. arch = osArchSplit[1];
  1348. }
  1349. _statusWriter->updateNodeStatus(
  1350. networkId, memberId, os, arch, version, i->second.physicalAddress, ts, frontend);
  1351. fprintf(stderr, "sent node status update\n");
  1352. }
  1353. _statusWriter->writePending();
  1354. w.commit();
  1355. _pool->unborrow(c);
  1356. }
  1357. catch (std::exception& e) {
  1358. fprintf(stderr, "%s: error in onlinenotification thread: %s\n", _myAddressStr.c_str(), e.what());
  1359. }
  1360. std::this_thread::sleep_for(std::chrono::seconds(10));
  1361. }
  1362. }
  1363. #endif // ZT_CONTROLLER_USE_LIBPQ