threads_pthread.c 29 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971
  1. /*
  2. * Copyright 2016-2025 The OpenSSL Project Authors. All Rights Reserved.
  3. *
  4. * Licensed under the Apache License 2.0 (the "License"). You may not use
  5. * this file except in compliance with the License. You can obtain a copy
  6. * in the file LICENSE in the source distribution or at
  7. * https://www.openssl.org/source/license.html
  8. */
  9. /* We need to use the OPENSSL_fork_*() deprecated APIs */
  10. #define OPENSSL_SUPPRESS_DEPRECATED
  11. #include <openssl/crypto.h>
  12. #include <crypto/cryptlib.h>
  13. #include "internal/cryptlib.h"
  14. #include "internal/rcu.h"
  15. #include "rcu_internal.h"
  16. #if defined(__sun)
  17. # include <atomic.h>
  18. #endif
  19. #if defined(__apple_build_version__) && __apple_build_version__ < 6000000
  20. /*
  21. * OS/X 10.7 and 10.8 had a weird version of clang which has __ATOMIC_ACQUIRE and
  22. * __ATOMIC_ACQ_REL but which expects only one parameter for __atomic_is_lock_free()
  23. * rather than two which has signature __atomic_is_lock_free(sizeof(_Atomic(T))).
  24. * All of this makes impossible to use __atomic_is_lock_free here.
  25. *
  26. * See: https://github.com/llvm/llvm-project/commit/a4c2602b714e6c6edb98164550a5ae829b2de760
  27. */
  28. # define BROKEN_CLANG_ATOMICS
  29. #endif
  30. #if defined(OPENSSL_THREADS) && !defined(CRYPTO_TDEBUG) && !defined(OPENSSL_SYS_WINDOWS)
  31. # if defined(OPENSSL_SYS_UNIX)
  32. # include <sys/types.h>
  33. # include <unistd.h>
  34. # endif
  35. # include <assert.h>
  36. # ifdef PTHREAD_RWLOCK_INITIALIZER
  37. # define USE_RWLOCK
  38. # endif
  39. /*
  40. * For all GNU/clang atomic builtins, we also need fallbacks, to cover all
  41. * other compilers.
  42. * Unfortunately, we can't do that with some "generic type", because there's no
  43. * guarantee that the chosen generic type is large enough to cover all cases.
  44. * Therefore, we implement fallbacks for each applicable type, with composed
  45. * names that include the type they handle.
  46. *
  47. * (an anecdote: we previously tried to use |void *| as the generic type, with
  48. * the thought that the pointer itself is the largest type. However, this is
  49. * not true on 32-bit pointer platforms, as a |uint64_t| is twice as large)
  50. *
  51. * All applicable ATOMIC_ macros take the intended type as first parameter, so
  52. * they can map to the correct fallback function. In the GNU/clang case, that
  53. * parameter is simply ignored.
  54. */
  55. /*
  56. * Internal types used with the ATOMIC_ macros, to make it possible to compose
  57. * fallback function names.
  58. */
  59. typedef void *pvoid;
  60. typedef struct rcu_cb_item *prcu_cb_item;
  61. # if defined(__GNUC__) && defined(__ATOMIC_ACQUIRE) && !defined(BROKEN_CLANG_ATOMICS) \
  62. && !defined(USE_ATOMIC_FALLBACKS)
  63. # if defined(__APPLE__) && defined(__clang__) && defined(__aarch64__) && defined(__LP64__)
  64. /*
  65. * For pointers, Apple M1 virtualized cpu seems to have some problem using the
  66. * ldapr instruction (see https://github.com/openssl/openssl/pull/23974)
  67. * When using the native apple clang compiler, this instruction is emitted for
  68. * atomic loads, which is bad. So, if
  69. * 1) We are building on a target that defines __APPLE__ AND
  70. * 2) We are building on a target using clang (__clang__) AND
  71. * 3) We are building for an M1 processor (__aarch64__) AND
  72. * 4) We are building with 64 bit pointers
  73. * Then we should not use __atomic_load_n and instead implement our own
  74. * function to issue the ldar instruction instead, which produces the proper
  75. * sequencing guarantees
  76. */
  77. static inline void *apple_atomic_load_n_pvoid(void **p,
  78. ossl_unused int memorder)
  79. {
  80. void *ret;
  81. __asm volatile("ldar %0, [%1]" : "=r" (ret): "r" (p):);
  82. return ret;
  83. }
  84. /* For uint64_t, we should be fine, though */
  85. # define apple_atomic_load_n_uint64_t(p, o) __atomic_load_n(p, o)
  86. # define ATOMIC_LOAD_N(t, p, o) apple_atomic_load_n_##t(p, o)
  87. # else
  88. # define ATOMIC_LOAD_N(t, p, o) __atomic_load_n(p, o)
  89. # endif
  90. # define ATOMIC_STORE_N(t, p, v, o) __atomic_store_n(p, v, o)
  91. # define ATOMIC_STORE(t, p, v, o) __atomic_store(p, v, o)
  92. # define ATOMIC_EXCHANGE_N(t, p, v, o) __atomic_exchange_n(p, v, o)
  93. # define ATOMIC_COMPARE_EXCHANGE_N(t, p, e, d, s, f) __atomic_compare_exchange_n(p, e, d, 0, s, f)
  94. # define ATOMIC_ADD_FETCH(p, v, o) __atomic_add_fetch(p, v, o)
  95. # define ATOMIC_FETCH_ADD(p, v, o) __atomic_fetch_add(p, v, o)
  96. # define ATOMIC_SUB_FETCH(p, v, o) __atomic_sub_fetch(p, v, o)
  97. # define ATOMIC_AND_FETCH(p, m, o) __atomic_and_fetch(p, m, o)
  98. # define ATOMIC_OR_FETCH(p, m, o) __atomic_or_fetch(p, m, o)
  99. # else
  100. static pthread_mutex_t atomic_sim_lock = PTHREAD_MUTEX_INITIALIZER;
  101. # define IMPL_fallback_atomic_load_n(t) \
  102. static ossl_inline t fallback_atomic_load_n_##t(t *p) \
  103. { \
  104. t ret; \
  105. \
  106. pthread_mutex_lock(&atomic_sim_lock); \
  107. ret = *p; \
  108. pthread_mutex_unlock(&atomic_sim_lock); \
  109. return ret; \
  110. }
  111. IMPL_fallback_atomic_load_n(uint64_t)
  112. IMPL_fallback_atomic_load_n(pvoid)
  113. # define ATOMIC_LOAD_N(t, p, o) fallback_atomic_load_n_##t(p)
  114. # define IMPL_fallback_atomic_store_n(t) \
  115. static ossl_inline t fallback_atomic_store_n_##t(t *p, t v) \
  116. { \
  117. t ret; \
  118. \
  119. pthread_mutex_lock(&atomic_sim_lock); \
  120. ret = *p; \
  121. *p = v; \
  122. pthread_mutex_unlock(&atomic_sim_lock); \
  123. return ret; \
  124. }
  125. IMPL_fallback_atomic_store_n(uint64_t)
  126. # define ATOMIC_STORE_N(t, p, v, o) fallback_atomic_store_n_##t(p, v)
  127. # define IMPL_fallback_atomic_store(t) \
  128. static ossl_inline void fallback_atomic_store_##t(t *p, t *v) \
  129. { \
  130. pthread_mutex_lock(&atomic_sim_lock); \
  131. *p = *v; \
  132. pthread_mutex_unlock(&atomic_sim_lock); \
  133. }
  134. IMPL_fallback_atomic_store(uint64_t)
  135. IMPL_fallback_atomic_store(pvoid)
  136. # define ATOMIC_STORE(t, p, v, o) fallback_atomic_store_##t(p, v)
  137. # define IMPL_fallback_atomic_exchange_n(t) \
  138. static ossl_inline t fallback_atomic_exchange_n_##t(t *p, t v) \
  139. { \
  140. t ret; \
  141. \
  142. pthread_mutex_lock(&atomic_sim_lock); \
  143. ret = *p; \
  144. *p = v; \
  145. pthread_mutex_unlock(&atomic_sim_lock); \
  146. return ret; \
  147. }
  148. IMPL_fallback_atomic_exchange_n(uint64_t)
  149. IMPL_fallback_atomic_exchange_n(prcu_cb_item)
  150. # define ATOMIC_EXCHANGE_N(t, p, v, o) fallback_atomic_exchange_n_##t(p, v)
  151. # define IMPL_fallback_atomic_compare_exchange_n(t) \
  152. static ossl_inline int fallback_atomic_compare_exchange_n_##t(t *p, t *e, t d, s, f) \
  153. { \
  154. int ret = 1; \
  155. pthread_mutex_lock(&atomic_sim_lock); \
  156. if (*p == *e) \
  157. *p = d; \
  158. else \
  159. ret = 0; \
  160. pthread_mutex_unlock(&atomic_sim_lock); \
  161. return ret; \
  162. }
  163. IMPL_fallback_atomic_exchange_n(uint64_t)
  164. # define ATOMIC_COMPARE_EXCHANGE_N(t, p, e, d, s, f) fallback_atomic_compare_exchange_n_##t(p, e, d, s, f)
  165. /*
  166. * The fallbacks that follow don't need any per type implementation, as
  167. * they are designed for uint64_t only. If there comes a time when multiple
  168. * types need to be covered, it's relatively easy to refactor them the same
  169. * way as the fallbacks above.
  170. */
  171. static ossl_inline uint64_t fallback_atomic_add_fetch(uint64_t *p, uint64_t v)
  172. {
  173. uint64_t ret;
  174. pthread_mutex_lock(&atomic_sim_lock);
  175. *p += v;
  176. ret = *p;
  177. pthread_mutex_unlock(&atomic_sim_lock);
  178. return ret;
  179. }
  180. # define ATOMIC_ADD_FETCH(p, v, o) fallback_atomic_add_fetch(p, v)
  181. static ossl_inline uint64_t fallback_atomic_fetch_add(uint64_t *p, uint64_t v)
  182. {
  183. uint64_t ret;
  184. pthread_mutex_lock(&atomic_sim_lock);
  185. ret = *p;
  186. *p += v;
  187. pthread_mutex_unlock(&atomic_sim_lock);
  188. return ret;
  189. }
  190. # define ATOMIC_FETCH_ADD(p, v, o) fallback_atomic_fetch_add(p, v)
  191. static ossl_inline uint64_t fallback_atomic_sub_fetch(uint64_t *p, uint64_t v)
  192. {
  193. uint64_t ret;
  194. pthread_mutex_lock(&atomic_sim_lock);
  195. *p -= v;
  196. ret = *p;
  197. pthread_mutex_unlock(&atomic_sim_lock);
  198. return ret;
  199. }
  200. # define ATOMIC_SUB_FETCH(p, v, o) fallback_atomic_sub_fetch(p, v)
  201. static ossl_inline uint64_t fallback_atomic_and_fetch(uint64_t *p, uint64_t m)
  202. {
  203. uint64_t ret;
  204. pthread_mutex_lock(&atomic_sim_lock);
  205. *p &= m;
  206. ret = *p;
  207. pthread_mutex_unlock(&atomic_sim_lock);
  208. return ret;
  209. }
  210. # define ATOMIC_AND_FETCH(p, v, o) fallback_atomic_and_fetch(p, v)
  211. static ossl_inline uint64_t fallback_atomic_or_fetch(uint64_t *p, uint64_t m)
  212. {
  213. uint64_t ret;
  214. pthread_mutex_lock(&atomic_sim_lock);
  215. *p |= m;
  216. ret = *p;
  217. pthread_mutex_unlock(&atomic_sim_lock);
  218. return ret;
  219. }
  220. # define ATOMIC_OR_FETCH(p, v, o) fallback_atomic_or_fetch(p, v)
  221. # endif
  222. /*
  223. * users is broken up into 2 parts
  224. * bits 0-15 current readers
  225. * bit 32-63 - ID
  226. */
  227. # define READER_SHIFT 0
  228. # define ID_SHIFT 32
  229. # define READER_SIZE 16
  230. # define ID_SIZE 32
  231. # define READER_MASK (((uint64_t)1 << READER_SIZE) - 1)
  232. # define ID_MASK (((uint64_t)1 << ID_SIZE) - 1)
  233. # define READER_COUNT(x) (((uint64_t)(x) >> READER_SHIFT) & READER_MASK)
  234. # define ID_VAL(x) (((uint64_t)(x) >> ID_SHIFT) & ID_MASK)
  235. # define VAL_READER ((uint64_t)1 << READER_SHIFT)
  236. # define VAL_ID(x) ((uint64_t)x << ID_SHIFT)
  237. /*
  238. * This is the core of an rcu lock. It tracks the readers and writers for the
  239. * current quiescence point for a given lock. Users is the 64 bit value that
  240. * stores the READERS/ID as defined above
  241. *
  242. */
  243. struct rcu_qp {
  244. uint64_t users;
  245. };
  246. struct thread_qp {
  247. struct rcu_qp *qp;
  248. unsigned int depth;
  249. CRYPTO_RCU_LOCK *lock;
  250. };
  251. # define MAX_QPS 10
  252. /*
  253. * This is the per thread tracking data
  254. * that is assigned to each thread participating
  255. * in an rcu qp
  256. *
  257. * qp points to the qp that it last acquired
  258. *
  259. */
  260. struct rcu_thr_data {
  261. struct thread_qp thread_qps[MAX_QPS];
  262. };
  263. /*
  264. * This is the internal version of a CRYPTO_RCU_LOCK
  265. * it is cast from CRYPTO_RCU_LOCK
  266. */
  267. struct rcu_lock_st {
  268. /* Callbacks to call for next ossl_synchronize_rcu */
  269. struct rcu_cb_item *cb_items;
  270. /* The context we are being created against */
  271. OSSL_LIB_CTX *ctx;
  272. /* rcu generation counter for in-order retirement */
  273. uint32_t id_ctr;
  274. /* Array of quiescent points for synchronization */
  275. struct rcu_qp *qp_group;
  276. /* Number of elements in qp_group array */
  277. size_t group_count;
  278. /* Index of the current qp in the qp_group array */
  279. uint64_t reader_idx;
  280. /* value of the next id_ctr value to be retired */
  281. uint32_t next_to_retire;
  282. /* index of the next free rcu_qp in the qp_group */
  283. uint64_t current_alloc_idx;
  284. /* number of qp's in qp_group array currently being retired */
  285. uint32_t writers_alloced;
  286. /* lock protecting write side operations */
  287. pthread_mutex_t write_lock;
  288. /* lock protecting updates to writers_alloced/current_alloc_idx */
  289. pthread_mutex_t alloc_lock;
  290. /* signal to wake threads waiting on alloc_lock */
  291. pthread_cond_t alloc_signal;
  292. /* lock to enforce in-order retirement */
  293. pthread_mutex_t prior_lock;
  294. /* signal to wake threads waiting on prior_lock */
  295. pthread_cond_t prior_signal;
  296. };
  297. /* Read side acquisition of the current qp */
  298. static struct rcu_qp *get_hold_current_qp(struct rcu_lock_st *lock)
  299. {
  300. uint64_t qp_idx;
  301. /* get the current qp index */
  302. for (;;) {
  303. /*
  304. * Notes on use of __ATOMIC_ACQUIRE
  305. * We need to ensure the following:
  306. * 1) That subsequent operations aren't optimized by hoisting them above
  307. * this operation. Specifically, we don't want the below re-load of
  308. * qp_idx to get optimized away
  309. * 2) We want to ensure that any updating of reader_idx on the write side
  310. * of the lock is flushed from a local cpu cache so that we see any
  311. * updates prior to the load. This is a non-issue on cache coherent
  312. * systems like x86, but is relevant on other arches
  313. * Note: This applies to the reload below as well
  314. */
  315. qp_idx = ATOMIC_LOAD_N(uint64_t, &lock->reader_idx, __ATOMIC_ACQUIRE);
  316. /*
  317. * Notes of use of __ATOMIC_RELEASE
  318. * This counter is only read by the write side of the lock, and so we
  319. * specify __ATOMIC_RELEASE here to ensure that the write side of the
  320. * lock see this during the spin loop read of users, as it waits for the
  321. * reader count to approach zero
  322. */
  323. ATOMIC_ADD_FETCH(&lock->qp_group[qp_idx].users, VAL_READER,
  324. __ATOMIC_RELEASE);
  325. /* if the idx hasn't changed, we're good, else try again */
  326. if (qp_idx == ATOMIC_LOAD_N(uint64_t, &lock->reader_idx, __ATOMIC_ACQUIRE))
  327. break;
  328. /*
  329. * Notes on use of __ATOMIC_RELEASE
  330. * As with the add above, we want to ensure that this decrement is
  331. * seen by the write side of the lock as soon as it happens to prevent
  332. * undue spinning waiting for write side completion
  333. */
  334. ATOMIC_SUB_FETCH(&lock->qp_group[qp_idx].users, VAL_READER,
  335. __ATOMIC_RELEASE);
  336. }
  337. return &lock->qp_group[qp_idx];
  338. }
  339. static void ossl_rcu_free_local_data(void *arg)
  340. {
  341. OSSL_LIB_CTX *ctx = arg;
  342. CRYPTO_THREAD_LOCAL *lkey = ossl_lib_ctx_get_rcukey(ctx);
  343. struct rcu_thr_data *data = CRYPTO_THREAD_get_local(lkey);
  344. OPENSSL_free(data);
  345. CRYPTO_THREAD_set_local(lkey, NULL);
  346. }
  347. void ossl_rcu_read_lock(CRYPTO_RCU_LOCK *lock)
  348. {
  349. struct rcu_thr_data *data;
  350. int i, available_qp = -1;
  351. CRYPTO_THREAD_LOCAL *lkey = ossl_lib_ctx_get_rcukey(lock->ctx);
  352. /*
  353. * we're going to access current_qp here so ask the
  354. * processor to fetch it
  355. */
  356. data = CRYPTO_THREAD_get_local(lkey);
  357. if (data == NULL) {
  358. data = OPENSSL_zalloc(sizeof(*data));
  359. OPENSSL_assert(data != NULL);
  360. CRYPTO_THREAD_set_local(lkey, data);
  361. ossl_init_thread_start(NULL, lock->ctx, ossl_rcu_free_local_data);
  362. }
  363. for (i = 0; i < MAX_QPS; i++) {
  364. if (data->thread_qps[i].qp == NULL && available_qp == -1)
  365. available_qp = i;
  366. /* If we have a hold on this lock already, we're good */
  367. if (data->thread_qps[i].lock == lock) {
  368. data->thread_qps[i].depth++;
  369. return;
  370. }
  371. }
  372. /*
  373. * if we get here, then we don't have a hold on this lock yet
  374. */
  375. assert(available_qp != -1);
  376. data->thread_qps[available_qp].qp = get_hold_current_qp(lock);
  377. data->thread_qps[available_qp].depth = 1;
  378. data->thread_qps[available_qp].lock = lock;
  379. }
  380. void ossl_rcu_read_unlock(CRYPTO_RCU_LOCK *lock)
  381. {
  382. int i;
  383. CRYPTO_THREAD_LOCAL *lkey = ossl_lib_ctx_get_rcukey(lock->ctx);
  384. struct rcu_thr_data *data = CRYPTO_THREAD_get_local(lkey);
  385. uint64_t ret;
  386. assert(data != NULL);
  387. for (i = 0; i < MAX_QPS; i++) {
  388. if (data->thread_qps[i].lock == lock) {
  389. /*
  390. * As with read side acquisition, we use __ATOMIC_RELEASE here
  391. * to ensure that the decrement is published immediately
  392. * to any write side waiters
  393. */
  394. data->thread_qps[i].depth--;
  395. if (data->thread_qps[i].depth == 0) {
  396. ret = ATOMIC_SUB_FETCH(&data->thread_qps[i].qp->users, VAL_READER,
  397. __ATOMIC_RELEASE);
  398. OPENSSL_assert(ret != UINT64_MAX);
  399. data->thread_qps[i].qp = NULL;
  400. data->thread_qps[i].lock = NULL;
  401. }
  402. return;
  403. }
  404. }
  405. /*
  406. * If we get here, we're trying to unlock a lock that we never acquired -
  407. * that's fatal.
  408. */
  409. assert(0);
  410. }
  411. /*
  412. * Write side allocation routine to get the current qp
  413. * and replace it with a new one
  414. */
  415. static struct rcu_qp *update_qp(CRYPTO_RCU_LOCK *lock)
  416. {
  417. uint64_t new_id;
  418. uint64_t update;
  419. uint64_t ret;
  420. uint64_t current_idx;
  421. pthread_mutex_lock(&lock->alloc_lock);
  422. /*
  423. * we need at least one qp to be available with one
  424. * left over, so that readers can start working on
  425. * one that isn't yet being waited on
  426. */
  427. while (lock->group_count - lock->writers_alloced < 2)
  428. /* we have to wait for one to be free */
  429. pthread_cond_wait(&lock->alloc_signal, &lock->alloc_lock);
  430. current_idx = lock->current_alloc_idx;
  431. /* Allocate the qp */
  432. lock->writers_alloced++;
  433. /* increment the allocation index */
  434. lock->current_alloc_idx =
  435. (lock->current_alloc_idx + 1) % lock->group_count;
  436. /* get and insert a new id */
  437. new_id = lock->id_ctr;
  438. lock->id_ctr++;
  439. new_id = VAL_ID(new_id);
  440. /*
  441. * Even though we are under a write side lock here
  442. * We need to use atomic instructions to ensure that the results
  443. * of this update are published to the read side prior to updating the
  444. * reader idx below
  445. */
  446. try_again:
  447. ret = ATOMIC_LOAD_N(uint64_t, &lock->qp_group[current_idx].users, __ATOMIC_ACQUIRE);
  448. update = ret & ID_MASK;
  449. update |= new_id;
  450. if (!ATOMIC_COMPARE_EXCHANGE_N(uint64_t, &lock->qp_group[current_idx].users, &ret, update,
  451. __ATOMIC_ACQ_REL, __ATOMIC_RELAXED))
  452. goto try_again;
  453. /*
  454. * Update the reader index to be the prior qp.
  455. * Note the use of __ATOMIC_RELEASE here is based on the corresponding use
  456. * of __ATOMIC_ACQUIRE in get_hold_current_qp, as we want any publication
  457. * of this value to be seen on the read side immediately after it happens
  458. */
  459. ATOMIC_STORE_N(uint64_t, &lock->reader_idx, lock->current_alloc_idx,
  460. __ATOMIC_RELEASE);
  461. /* wake up any waiters */
  462. pthread_cond_signal(&lock->alloc_signal);
  463. pthread_mutex_unlock(&lock->alloc_lock);
  464. return &lock->qp_group[current_idx];
  465. }
  466. static void retire_qp(CRYPTO_RCU_LOCK *lock, struct rcu_qp *qp)
  467. {
  468. pthread_mutex_lock(&lock->alloc_lock);
  469. lock->writers_alloced--;
  470. pthread_cond_signal(&lock->alloc_signal);
  471. pthread_mutex_unlock(&lock->alloc_lock);
  472. }
  473. static struct rcu_qp *allocate_new_qp_group(CRYPTO_RCU_LOCK *lock,
  474. int count)
  475. {
  476. struct rcu_qp *new =
  477. OPENSSL_zalloc(sizeof(*new) * count);
  478. lock->group_count = count;
  479. return new;
  480. }
  481. void ossl_rcu_write_lock(CRYPTO_RCU_LOCK *lock)
  482. {
  483. pthread_mutex_lock(&lock->write_lock);
  484. }
  485. void ossl_rcu_write_unlock(CRYPTO_RCU_LOCK *lock)
  486. {
  487. pthread_mutex_unlock(&lock->write_lock);
  488. }
  489. void ossl_synchronize_rcu(CRYPTO_RCU_LOCK *lock)
  490. {
  491. struct rcu_qp *qp;
  492. uint64_t count;
  493. struct rcu_cb_item *cb_items, *tmpcb;
  494. /*
  495. * __ATOMIC_ACQ_REL is used here to ensure that we get any prior published
  496. * writes before we read, and publish our write immediately
  497. */
  498. cb_items = ATOMIC_EXCHANGE_N(prcu_cb_item, &lock->cb_items, NULL,
  499. __ATOMIC_ACQ_REL);
  500. qp = update_qp(lock);
  501. /*
  502. * wait for the reader count to reach zero
  503. * Note the use of __ATOMIC_ACQUIRE here to ensure that any
  504. * prior __ATOMIC_RELEASE write operation in get_hold_current_qp
  505. * is visible prior to our read
  506. */
  507. do {
  508. count = ATOMIC_LOAD_N(uint64_t, &qp->users, __ATOMIC_ACQUIRE);
  509. } while (READER_COUNT(count) != 0);
  510. /* retire in order */
  511. pthread_mutex_lock(&lock->prior_lock);
  512. while (lock->next_to_retire != ID_VAL(count))
  513. pthread_cond_wait(&lock->prior_signal, &lock->prior_lock);
  514. lock->next_to_retire++;
  515. pthread_cond_broadcast(&lock->prior_signal);
  516. pthread_mutex_unlock(&lock->prior_lock);
  517. retire_qp(lock, qp);
  518. /* handle any callbacks that we have */
  519. while (cb_items != NULL) {
  520. tmpcb = cb_items;
  521. cb_items = cb_items->next;
  522. tmpcb->fn(tmpcb->data);
  523. OPENSSL_free(tmpcb);
  524. }
  525. }
  526. int ossl_rcu_call(CRYPTO_RCU_LOCK *lock, rcu_cb_fn cb, void *data)
  527. {
  528. struct rcu_cb_item *new =
  529. OPENSSL_zalloc(sizeof(*new));
  530. if (new == NULL)
  531. return 0;
  532. new->data = data;
  533. new->fn = cb;
  534. /*
  535. * Use __ATOMIC_ACQ_REL here to indicate that any prior writes to this
  536. * list are visible to us prior to reading, and publish the new value
  537. * immediately
  538. */
  539. new->next = ATOMIC_EXCHANGE_N(prcu_cb_item, &lock->cb_items, new,
  540. __ATOMIC_ACQ_REL);
  541. return 1;
  542. }
  543. void *ossl_rcu_uptr_deref(void **p)
  544. {
  545. return ATOMIC_LOAD_N(pvoid, p, __ATOMIC_ACQUIRE);
  546. }
  547. void ossl_rcu_assign_uptr(void **p, void **v)
  548. {
  549. ATOMIC_STORE(pvoid, p, v, __ATOMIC_RELEASE);
  550. }
  551. CRYPTO_RCU_LOCK *ossl_rcu_lock_new(int num_writers, OSSL_LIB_CTX *ctx)
  552. {
  553. struct rcu_lock_st *new;
  554. /*
  555. * We need a minimum of 3 qp's
  556. */
  557. if (num_writers < 3)
  558. num_writers = 3;
  559. ctx = ossl_lib_ctx_get_concrete(ctx);
  560. if (ctx == NULL)
  561. return 0;
  562. new = OPENSSL_zalloc(sizeof(*new));
  563. if (new == NULL)
  564. return NULL;
  565. new->ctx = ctx;
  566. pthread_mutex_init(&new->write_lock, NULL);
  567. pthread_mutex_init(&new->prior_lock, NULL);
  568. pthread_mutex_init(&new->alloc_lock, NULL);
  569. pthread_cond_init(&new->prior_signal, NULL);
  570. pthread_cond_init(&new->alloc_signal, NULL);
  571. /* By default our first writer is already alloced */
  572. new->writers_alloced = 1;
  573. new->qp_group = allocate_new_qp_group(new, num_writers);
  574. if (new->qp_group == NULL) {
  575. OPENSSL_free(new);
  576. new = NULL;
  577. }
  578. return new;
  579. }
  580. void ossl_rcu_lock_free(CRYPTO_RCU_LOCK *lock)
  581. {
  582. struct rcu_lock_st *rlock = (struct rcu_lock_st *)lock;
  583. if (lock == NULL)
  584. return;
  585. /* make sure we're synchronized */
  586. ossl_synchronize_rcu(rlock);
  587. OPENSSL_free(rlock->qp_group);
  588. /* There should only be a single qp left now */
  589. OPENSSL_free(rlock);
  590. }
  591. CRYPTO_RWLOCK *CRYPTO_THREAD_lock_new(void)
  592. {
  593. # ifdef USE_RWLOCK
  594. CRYPTO_RWLOCK *lock;
  595. if ((lock = OPENSSL_zalloc(sizeof(pthread_rwlock_t))) == NULL)
  596. /* Don't set error, to avoid recursion blowup. */
  597. return NULL;
  598. if (pthread_rwlock_init(lock, NULL) != 0) {
  599. OPENSSL_free(lock);
  600. return NULL;
  601. }
  602. # else
  603. pthread_mutexattr_t attr;
  604. CRYPTO_RWLOCK *lock;
  605. if ((lock = OPENSSL_zalloc(sizeof(pthread_mutex_t))) == NULL)
  606. /* Don't set error, to avoid recursion blowup. */
  607. return NULL;
  608. /*
  609. * We don't use recursive mutexes, but try to catch errors if we do.
  610. */
  611. pthread_mutexattr_init(&attr);
  612. # if !defined (__TANDEM) && !defined (_SPT_MODEL_)
  613. # if !defined(NDEBUG) && !defined(OPENSSL_NO_MUTEX_ERRORCHECK)
  614. pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_ERRORCHECK);
  615. # endif
  616. # else
  617. /* The SPT Thread Library does not define MUTEX attributes. */
  618. # endif
  619. if (pthread_mutex_init(lock, &attr) != 0) {
  620. pthread_mutexattr_destroy(&attr);
  621. OPENSSL_free(lock);
  622. return NULL;
  623. }
  624. pthread_mutexattr_destroy(&attr);
  625. # endif
  626. return lock;
  627. }
  628. __owur int CRYPTO_THREAD_read_lock(CRYPTO_RWLOCK *lock)
  629. {
  630. # ifdef USE_RWLOCK
  631. if (pthread_rwlock_rdlock(lock) != 0)
  632. return 0;
  633. # else
  634. if (pthread_mutex_lock(lock) != 0) {
  635. assert(errno != EDEADLK && errno != EBUSY);
  636. return 0;
  637. }
  638. # endif
  639. return 1;
  640. }
  641. __owur int CRYPTO_THREAD_write_lock(CRYPTO_RWLOCK *lock)
  642. {
  643. # ifdef USE_RWLOCK
  644. if (pthread_rwlock_wrlock(lock) != 0)
  645. return 0;
  646. # else
  647. if (pthread_mutex_lock(lock) != 0) {
  648. assert(errno != EDEADLK && errno != EBUSY);
  649. return 0;
  650. }
  651. # endif
  652. return 1;
  653. }
  654. int CRYPTO_THREAD_unlock(CRYPTO_RWLOCK *lock)
  655. {
  656. # ifdef USE_RWLOCK
  657. if (pthread_rwlock_unlock(lock) != 0)
  658. return 0;
  659. # else
  660. if (pthread_mutex_unlock(lock) != 0) {
  661. assert(errno != EPERM);
  662. return 0;
  663. }
  664. # endif
  665. return 1;
  666. }
  667. void CRYPTO_THREAD_lock_free(CRYPTO_RWLOCK *lock)
  668. {
  669. if (lock == NULL)
  670. return;
  671. # ifdef USE_RWLOCK
  672. pthread_rwlock_destroy(lock);
  673. # else
  674. pthread_mutex_destroy(lock);
  675. # endif
  676. OPENSSL_free(lock);
  677. return;
  678. }
  679. int CRYPTO_THREAD_run_once(CRYPTO_ONCE *once, void (*init)(void))
  680. {
  681. if (pthread_once(once, init) != 0)
  682. return 0;
  683. return 1;
  684. }
  685. int CRYPTO_THREAD_init_local(CRYPTO_THREAD_LOCAL *key, void (*cleanup)(void *))
  686. {
  687. if (pthread_key_create(key, cleanup) != 0)
  688. return 0;
  689. return 1;
  690. }
  691. void *CRYPTO_THREAD_get_local(CRYPTO_THREAD_LOCAL *key)
  692. {
  693. return pthread_getspecific(*key);
  694. }
  695. int CRYPTO_THREAD_set_local(CRYPTO_THREAD_LOCAL *key, void *val)
  696. {
  697. if (pthread_setspecific(*key, val) != 0)
  698. return 0;
  699. return 1;
  700. }
  701. int CRYPTO_THREAD_cleanup_local(CRYPTO_THREAD_LOCAL *key)
  702. {
  703. if (pthread_key_delete(*key) != 0)
  704. return 0;
  705. return 1;
  706. }
  707. CRYPTO_THREAD_ID CRYPTO_THREAD_get_current_id(void)
  708. {
  709. return pthread_self();
  710. }
  711. int CRYPTO_THREAD_compare_id(CRYPTO_THREAD_ID a, CRYPTO_THREAD_ID b)
  712. {
  713. return pthread_equal(a, b);
  714. }
  715. int CRYPTO_atomic_add(int *val, int amount, int *ret, CRYPTO_RWLOCK *lock)
  716. {
  717. # if defined(__GNUC__) && defined(__ATOMIC_ACQ_REL) && !defined(BROKEN_CLANG_ATOMICS)
  718. if (__atomic_is_lock_free(sizeof(*val), val)) {
  719. *ret = __atomic_add_fetch(val, amount, __ATOMIC_ACQ_REL);
  720. return 1;
  721. }
  722. # elif defined(__sun) && (defined(__SunOS_5_10) || defined(__SunOS_5_11))
  723. /* This will work for all future Solaris versions. */
  724. if (ret != NULL) {
  725. *ret = atomic_add_int_nv((volatile unsigned int *)val, amount);
  726. return 1;
  727. }
  728. # endif
  729. if (lock == NULL || !CRYPTO_THREAD_write_lock(lock))
  730. return 0;
  731. *val += amount;
  732. *ret = *val;
  733. if (!CRYPTO_THREAD_unlock(lock))
  734. return 0;
  735. return 1;
  736. }
  737. int CRYPTO_atomic_or(uint64_t *val, uint64_t op, uint64_t *ret,
  738. CRYPTO_RWLOCK *lock)
  739. {
  740. # if defined(__GNUC__) && defined(__ATOMIC_ACQ_REL) && !defined(BROKEN_CLANG_ATOMICS)
  741. if (__atomic_is_lock_free(sizeof(*val), val)) {
  742. *ret = __atomic_or_fetch(val, op, __ATOMIC_ACQ_REL);
  743. return 1;
  744. }
  745. # elif defined(__sun) && (defined(__SunOS_5_10) || defined(__SunOS_5_11))
  746. /* This will work for all future Solaris versions. */
  747. if (ret != NULL) {
  748. *ret = atomic_or_64_nv(val, op);
  749. return 1;
  750. }
  751. # endif
  752. if (lock == NULL || !CRYPTO_THREAD_write_lock(lock))
  753. return 0;
  754. *val |= op;
  755. *ret = *val;
  756. if (!CRYPTO_THREAD_unlock(lock))
  757. return 0;
  758. return 1;
  759. }
  760. int CRYPTO_atomic_load(uint64_t *val, uint64_t *ret, CRYPTO_RWLOCK *lock)
  761. {
  762. # if defined(__GNUC__) && defined(__ATOMIC_ACQUIRE) && !defined(BROKEN_CLANG_ATOMICS)
  763. if (__atomic_is_lock_free(sizeof(*val), val)) {
  764. __atomic_load(val, ret, __ATOMIC_ACQUIRE);
  765. return 1;
  766. }
  767. # elif defined(__sun) && (defined(__SunOS_5_10) || defined(__SunOS_5_11))
  768. /* This will work for all future Solaris versions. */
  769. if (ret != NULL) {
  770. *ret = atomic_or_64_nv(val, 0);
  771. return 1;
  772. }
  773. # endif
  774. if (lock == NULL || !CRYPTO_THREAD_read_lock(lock))
  775. return 0;
  776. *ret = *val;
  777. if (!CRYPTO_THREAD_unlock(lock))
  778. return 0;
  779. return 1;
  780. }
  781. int CRYPTO_atomic_load_int(int *val, int *ret, CRYPTO_RWLOCK *lock)
  782. {
  783. # if defined(__GNUC__) && defined(__ATOMIC_ACQUIRE) && !defined(BROKEN_CLANG_ATOMICS)
  784. if (__atomic_is_lock_free(sizeof(*val), val)) {
  785. __atomic_load(val, ret, __ATOMIC_ACQUIRE);
  786. return 1;
  787. }
  788. # elif defined(__sun) && (defined(__SunOS_5_10) || defined(__SunOS_5_11))
  789. /* This will work for all future Solaris versions. */
  790. if (ret != NULL) {
  791. *ret = (int *)atomic_or_uint_nv((unsigned int *)val, 0);
  792. return 1;
  793. }
  794. # endif
  795. if (lock == NULL || !CRYPTO_THREAD_read_lock(lock))
  796. return 0;
  797. *ret = *val;
  798. if (!CRYPTO_THREAD_unlock(lock))
  799. return 0;
  800. return 1;
  801. }
  802. # ifndef FIPS_MODULE
  803. int openssl_init_fork_handlers(void)
  804. {
  805. return 1;
  806. }
  807. # endif /* FIPS_MODULE */
  808. int openssl_get_fork_id(void)
  809. {
  810. return getpid();
  811. }
  812. #endif