cl5_clcache.c 31 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079
  1. /** BEGIN COPYRIGHT BLOCK
  2. * Copyright (C) 2005 Red Hat, Inc.
  3. * All rights reserved.
  4. *
  5. * License: GPL (version 3 or any later version).
  6. * See LICENSE for details.
  7. * END COPYRIGHT BLOCK **/
  8. #ifdef HAVE_CONFIG_H
  9. # include <config.h>
  10. #endif
  11. #include "errno.h" /* ENOMEM, EVAL used by Berkeley DB */
  12. #include "db.h" /* Berkeley DB */
  13. #include "cl5.h" /* changelog5Config */
  14. #include "cl5_clcache.h"
  15. /* newer bdb uses DB_BUFFER_SMALL instead of ENOMEM as the
  16. error return if the given buffer in which to load a
  17. key or value is too small - if it is not defined, define
  18. it here to ENOMEM
  19. */
  20. #ifndef DB_BUFFER_SMALL
  21. #define DB_BUFFER_SMALL ENOMEM
  22. #endif
  23. /*
  24. * Constants for the buffer pool:
  25. *
  26. * DEFAULT_CLC_BUFFER_PAGE_COUNT
  27. * Little performance boost if it is too small.
  28. *
  29. * DEFAULT_CLC_BUFFER_PAGE_SIZE
  30. * Its value is determined based on the DB requirement that
  31. * the buffer size should be the multiple of 1024.
  32. */
  33. #define DEFAULT_CLC_BUFFER_COUNT_MIN 10
  34. #define DEFAULT_CLC_BUFFER_COUNT_MAX 0
  35. #define DEFAULT_CLC_BUFFER_PAGE_COUNT 32
  36. #define DEFAULT_CLC_BUFFER_PAGE_SIZE 1024
  37. enum {
  38. CLC_STATE_READY = 0, /* ready to iterate */
  39. CLC_STATE_UP_TO_DATE, /* remote RUV already covers the CSN */
  40. CLC_STATE_CSN_GT_RUV, /* local RUV doesn't conver the CSN */
  41. CLC_STATE_NEW_RID, /* unknown RID to local RUVs */
  42. CLC_STATE_UNSAFE_RUV_CHANGE,/* (RUV1 < maxcsn-in-buffer) && (RUV1 < RUV1') */
  43. CLC_STATE_DONE, /* no more change */
  44. CLC_STATE_ABORTING /* abort replication session */
  45. };
  46. typedef struct clc_busy_list CLC_Busy_List;
  47. struct csn_seq_ctrl_block {
  48. ReplicaId rid; /* RID this block serves */
  49. CSN *consumer_maxcsn; /* Don't send CSN <= this */
  50. CSN *local_maxcsn; /* Don't send CSN > this */
  51. CSN *prev_local_maxcsn; /* */
  52. int state; /* CLC_STATE_* */
  53. };
  54. /*
  55. * Each cl5replayiterator acquires a buffer from the buffer pool
  56. * at the beginning of a replication session, and returns it back
  57. * at the end.
  58. */
  59. struct clc_buffer {
  60. char *buf_agmt_name; /* agreement acquired this buffer */
  61. ReplicaId buf_consumer_rid; /* help checking threshold csn */
  62. const RUV *buf_consumer_ruv; /* used to skip change */
  63. const RUV *buf_local_ruv; /* used to refresh local_maxcsn */
  64. /*
  65. * fields for retriving data from DB
  66. */
  67. int buf_state;
  68. CSN *buf_current_csn;
  69. int buf_load_flag; /* db flag DB_MULTIPLE_KEY, DB_SET, DB_NEXT */
  70. DBC *buf_cursor;
  71. DBT buf_key; /* current csn string */
  72. DBT buf_data; /* data retrived from db */
  73. void *buf_record_ptr; /* ptr to the current record in data */
  74. CSN *buf_missing_csn; /* used to detect persistent missing of CSN */
  75. CSN *buf_prev_missing_csn; /* used to surpress the repeated messages */
  76. /* fields for control the CSN sequence sent to the consumer */
  77. struct csn_seq_ctrl_block **buf_cscbs;
  78. int buf_num_cscbs; /* number of csn sequence ctrl blocks */
  79. int buf_max_cscbs;
  80. /* fields for debugging stat */
  81. int buf_load_cnt; /* number of loads for session */
  82. int buf_record_cnt; /* number of changes for session */
  83. int buf_record_skipped; /* number of changes skipped */
  84. int buf_skipped_new_rid; /* number of changes skipped due to new_rid */
  85. int buf_skipped_csn_gt_cons_maxcsn; /* number of changes skipped due to csn greater than consumer maxcsn */
  86. int buf_skipped_up_to_date; /* number of changes skipped due to consumer being up-to-date for the given rid */
  87. int buf_skipped_csn_gt_ruv; /* number of changes skipped due to preceedents are not covered by local RUV snapshot */
  88. int buf_skipped_csn_covered; /* number of changes skipped due to CSNs already covered by consumer RUV */
  89. /*
  90. * fields that should be accessed via bl_lock or pl_lock
  91. */
  92. CLC_Buffer *buf_next; /* next buffer in the same list */
  93. CLC_Busy_List *buf_busy_list; /* which busy list I'm in */
  94. };
  95. /*
  96. * Each changelog has a busy buffer list
  97. */
  98. struct clc_busy_list {
  99. PRLock *bl_lock;
  100. DB *bl_db; /* changelog db handle */
  101. CLC_Buffer *bl_buffers; /* busy buffers of this list */
  102. CLC_Busy_List *bl_next; /* next busy list in the pool */
  103. };
  104. /*
  105. * Each process has a buffer pool
  106. */
  107. struct clc_pool {
  108. Slapi_RWLock *pl_lock; /* cl writer and agreements */
  109. DB_ENV **pl_dbenv; /* pointer to DB_ENV for all the changelog files */
  110. CLC_Busy_List *pl_busy_lists; /* busy buffer lists, one list per changelog file */
  111. int pl_buffer_cnt_now; /* total number of buffers */
  112. int pl_buffer_cnt_min; /* free a newly returned buffer if _now > _min */
  113. int pl_buffer_cnt_max; /* no use */
  114. int pl_buffer_default_pages; /* num of pages in a new buffer */
  115. };
  116. /* static variables */
  117. static struct clc_pool *_pool = NULL; /* process's buffer pool */
  118. /* static prototypes */
  119. static int clcache_adjust_anchorcsn ( CLC_Buffer *buf );
  120. static void clcache_refresh_consumer_maxcsns ( CLC_Buffer *buf );
  121. static int clcache_refresh_local_maxcsns ( CLC_Buffer *buf );
  122. static int clcache_skip_change ( CLC_Buffer *buf );
  123. static int clcache_load_buffer_bulk ( CLC_Buffer *buf, int flag );
  124. static int clcache_open_cursor ( DB_TXN *txn, CLC_Buffer *buf, DBC **cursor );
  125. static int clcache_cursor_get ( DBC *cursor, CLC_Buffer *buf, int flag );
  126. static struct csn_seq_ctrl_block *clcache_new_cscb ();
  127. static void clcache_free_cscb ( struct csn_seq_ctrl_block ** cscb );
  128. static CLC_Buffer *clcache_new_buffer ( ReplicaId consumer_rid );
  129. static void clcache_delete_buffer ( CLC_Buffer **buf );
  130. static CLC_Busy_List *clcache_new_busy_list ();
  131. static void clcache_delete_busy_list ( CLC_Busy_List **bl );
  132. static int clcache_enqueue_busy_list( DB *db, CLC_Buffer *buf );
  133. static void csn_dup_or_init_by_csn ( CSN **csn1, CSN *csn2 );
  134. /*
  135. * Initiates the process buffer pool. This should be done
  136. * once and only once when process starts.
  137. */
  138. int
  139. clcache_init ( DB_ENV **dbenv )
  140. {
  141. if (_pool) {
  142. return 0; /* already initialized */
  143. }
  144. if (NULL == dbenv) {
  145. return -1;
  146. }
  147. _pool = (struct clc_pool*) slapi_ch_calloc ( 1, sizeof ( struct clc_pool ));
  148. _pool->pl_dbenv = dbenv;
  149. _pool->pl_buffer_cnt_min = DEFAULT_CLC_BUFFER_COUNT_MIN;
  150. _pool->pl_buffer_cnt_max = DEFAULT_CLC_BUFFER_COUNT_MAX;
  151. _pool->pl_buffer_default_pages = DEFAULT_CLC_BUFFER_COUNT_MAX;
  152. _pool->pl_lock = slapi_new_rwlock ();
  153. return 0;
  154. }
  155. /*
  156. * This is part of a callback function when changelog configuration
  157. * is read or updated.
  158. */
  159. void
  160. clcache_set_config ()
  161. {
  162. slapi_rwlock_wrlock ( _pool->pl_lock );
  163. _pool->pl_buffer_cnt_max = CL5_DEFAULT_CONFIG_CACHESIZE;
  164. /*
  165. * According to http://www.sleepycat.com/docs/api_c/dbc_get.html,
  166. * data buffer should be a multiple of 1024 bytes in size
  167. * for DB_MULTIPLE_KEY operation.
  168. */
  169. _pool->pl_buffer_default_pages = CL5_DEFAULT_CONFIG_CACHEMEMSIZE / DEFAULT_CLC_BUFFER_PAGE_SIZE + 1;
  170. if ( _pool->pl_buffer_default_pages <= 0 ) { /* this never be true... */
  171. _pool->pl_buffer_default_pages = DEFAULT_CLC_BUFFER_PAGE_COUNT;
  172. }
  173. slapi_rwlock_unlock ( _pool->pl_lock );
  174. }
  175. /*
  176. * Gets the pointer to a thread dedicated buffer, or allocates
  177. * a new buffer if there is no buffer allocated yet for this thread.
  178. *
  179. * This is called when a cl5replayiterator is created for
  180. * a replication session.
  181. */
  182. int
  183. clcache_get_buffer ( CLC_Buffer **buf, DB *db, ReplicaId consumer_rid, const RUV *consumer_ruv, const RUV *local_ruv )
  184. {
  185. int rc = 0;
  186. int need_new;
  187. if ( buf == NULL ) return CL5_BAD_DATA;
  188. *buf = NULL;
  189. /* if the pool was re-initialized, the thread private cache will be invalid,
  190. so we must get a new one */
  191. need_new = (!_pool || !_pool->pl_busy_lists || !_pool->pl_busy_lists->bl_buffers);
  192. if ( (!need_new) && (NULL != ( *buf = (CLC_Buffer*) get_thread_private_cache())) ) {
  193. slapi_log_error ( SLAPI_LOG_REPL, get_thread_private_agmtname(),
  194. "clcache_get_buffer: found thread private buffer cache %p\n", *buf);
  195. slapi_log_error ( SLAPI_LOG_REPL, get_thread_private_agmtname(),
  196. "clcache_get_buffer: _pool is %p _pool->pl_busy_lists is %p _pool->pl_busy_lists->bl_buffers is %p\n",
  197. _pool, _pool ? _pool->pl_busy_lists : NULL,
  198. (_pool && _pool->pl_busy_lists) ? _pool->pl_busy_lists->bl_buffers : NULL);
  199. (*buf)->buf_state = CLC_STATE_READY;
  200. (*buf)->buf_load_cnt = 0;
  201. (*buf)->buf_record_cnt = 0;
  202. (*buf)->buf_record_skipped = 0;
  203. (*buf)->buf_cursor = NULL;
  204. (*buf)->buf_skipped_new_rid = 0;
  205. (*buf)->buf_skipped_csn_gt_cons_maxcsn = 0;
  206. (*buf)->buf_skipped_up_to_date = 0;
  207. (*buf)->buf_skipped_csn_gt_ruv = 0;
  208. (*buf)->buf_skipped_csn_covered = 0;
  209. (*buf)->buf_cscbs = (struct csn_seq_ctrl_block **) slapi_ch_calloc(MAX_NUM_OF_MASTERS + 1,
  210. sizeof(struct csn_seq_ctrl_block *));
  211. (*buf)->buf_num_cscbs = 0;
  212. (*buf)->buf_max_cscbs = MAX_NUM_OF_MASTERS;
  213. }
  214. else {
  215. *buf = clcache_new_buffer ( consumer_rid );
  216. if ( *buf ) {
  217. if ( 0 == clcache_enqueue_busy_list ( db, *buf ) ) {
  218. set_thread_private_cache ( (void*) (*buf) );
  219. }
  220. else {
  221. clcache_delete_buffer ( buf );
  222. }
  223. }
  224. }
  225. if ( NULL != *buf ) {
  226. (*buf)->buf_consumer_ruv = consumer_ruv;
  227. (*buf)->buf_local_ruv = local_ruv;
  228. }
  229. else {
  230. slapi_log_error ( SLAPI_LOG_FATAL, get_thread_private_agmtname(),
  231. "clcache_get_buffer: can't allocate new buffer\n" );
  232. rc = CL5_MEMORY_ERROR;
  233. }
  234. return rc;
  235. }
  236. /*
  237. * Returns a buffer back to the buffer pool.
  238. */
  239. void
  240. clcache_return_buffer ( CLC_Buffer **buf )
  241. {
  242. int i;
  243. slapi_log_error ( SLAPI_LOG_REPL, (*buf)->buf_agmt_name,
  244. "session end: state=%d load=%d sent=%d skipped=%d skipped_new_rid=%d "
  245. "skipped_csn_gt_cons_maxcsn=%d skipped_up_to_date=%d "
  246. "skipped_csn_gt_ruv=%d skipped_csn_covered=%d\n",
  247. (*buf)->buf_state,
  248. (*buf)->buf_load_cnt,
  249. (*buf)->buf_record_cnt - (*buf)->buf_record_skipped,
  250. (*buf)->buf_record_skipped, (*buf)->buf_skipped_new_rid,
  251. (*buf)->buf_skipped_csn_gt_cons_maxcsn,
  252. (*buf)->buf_skipped_up_to_date, (*buf)->buf_skipped_csn_gt_ruv,
  253. (*buf)->buf_skipped_csn_covered);
  254. for ( i = 0; i < (*buf)->buf_num_cscbs; i++ ) {
  255. clcache_free_cscb ( &(*buf)->buf_cscbs[i] );
  256. }
  257. slapi_ch_free((void **)&(*buf)->buf_cscbs);
  258. if ( (*buf)->buf_cursor ) {
  259. (*buf)->buf_cursor->c_close ( (*buf)->buf_cursor );
  260. (*buf)->buf_cursor = NULL;
  261. }
  262. }
  263. /*
  264. * Loads a buffer from DB.
  265. *
  266. * anchorcsn - passed in for the first load of a replication session;
  267. * flag - DB_SET to load in the key CSN record.
  268. * DB_NEXT to load in the records greater than key CSN.
  269. * return - DB error code instead of cl5 one because of the
  270. * historic reason.
  271. */
  272. int
  273. clcache_load_buffer ( CLC_Buffer *buf, CSN *anchorcsn, int flag )
  274. {
  275. int rc = 0;
  276. clcache_refresh_local_maxcsns ( buf );
  277. /* Set the loading key */
  278. if ( anchorcsn ) {
  279. clcache_refresh_consumer_maxcsns ( buf );
  280. buf->buf_load_flag = DB_MULTIPLE_KEY;
  281. csn_as_string ( anchorcsn, 0, (char*)buf->buf_key.data );
  282. slapi_log_error ( SLAPI_LOG_REPL, buf->buf_agmt_name,
  283. "session start: anchorcsn=%s\n", (char*)buf->buf_key.data );
  284. }
  285. else if ( csn_get_time(buf->buf_current_csn) == 0 ) {
  286. /* time == 0 means this csn has never been set */
  287. rc = DB_NOTFOUND;
  288. }
  289. else if ( clcache_adjust_anchorcsn ( buf ) != 0 ) {
  290. rc = DB_NOTFOUND;
  291. }
  292. else {
  293. csn_as_string ( buf->buf_current_csn, 0, (char*)buf->buf_key.data );
  294. slapi_log_error ( SLAPI_LOG_REPL, buf->buf_agmt_name,
  295. "load next: anchorcsn=%s\n", (char*)buf->buf_key.data );
  296. }
  297. if ( rc == 0 ) {
  298. buf->buf_state = CLC_STATE_READY;
  299. rc = clcache_load_buffer_bulk ( buf, flag );
  300. /* Reset some flag variables */
  301. if ( rc == 0 ) {
  302. int i;
  303. for ( i = 0; i < buf->buf_num_cscbs; i++ ) {
  304. buf->buf_cscbs[i]->state = CLC_STATE_READY;
  305. }
  306. }
  307. else if ( anchorcsn ) {
  308. /* Report error only when the missing is persistent */
  309. if ( buf->buf_missing_csn && csn_compare (buf->buf_missing_csn, anchorcsn) == 0 ) {
  310. if (!buf->buf_prev_missing_csn || csn_compare (buf->buf_prev_missing_csn, anchorcsn)) {
  311. slapi_log_error ( SLAPI_LOG_FATAL, buf->buf_agmt_name,
  312. "Can't locate CSN %s in the changelog (DB rc=%d). If replication stops, the consumer may need to be reinitialized.\n",
  313. (char*)buf->buf_key.data, rc );
  314. csn_dup_or_init_by_csn (&buf->buf_prev_missing_csn, anchorcsn);
  315. }
  316. }
  317. else {
  318. csn_dup_or_init_by_csn (&buf->buf_missing_csn, anchorcsn);
  319. }
  320. }
  321. }
  322. if ( rc != 0 ) {
  323. slapi_log_error ( SLAPI_LOG_REPL, buf->buf_agmt_name,
  324. "clcache_load_buffer: rc=%d\n", rc );
  325. }
  326. return rc;
  327. }
  328. static int
  329. clcache_load_buffer_bulk ( CLC_Buffer *buf, int flag )
  330. {
  331. DB_TXN *txn = NULL;
  332. DBC *cursor = NULL;
  333. int rc = 0;
  334. int tries = 0;
  335. #if 0 /* txn control seems not improving anything so turn it off */
  336. if ( *(_pool->pl_dbenv) ) {
  337. txn_begin( *(_pool->pl_dbenv), NULL, &txn, 0 );
  338. }
  339. #endif
  340. if (NULL == buf) {
  341. slapi_log_error ( SLAPI_LOG_FATAL, "clcache_load_buffer_bulk",
  342. "NULL buf\n" );
  343. return rc;
  344. }
  345. if (NULL == buf->buf_busy_list) {
  346. slapi_log_error ( SLAPI_LOG_FATAL, "clcache_load_buffer_bulk",
  347. "%s%sno buf_busy_list\n",
  348. buf->buf_agmt_name?buf->buf_agmt_name:"",
  349. buf->buf_agmt_name?": ":"" );
  350. return rc;
  351. }
  352. PR_Lock ( buf->buf_busy_list->bl_lock );
  353. retry:
  354. if ( 0 == ( rc = clcache_open_cursor ( txn, buf, &cursor )) ) {
  355. if ( flag == DB_NEXT ) {
  356. /* For bulk read, position the cursor before read the next block */
  357. rc = cursor->c_get ( cursor,
  358. & buf->buf_key,
  359. & buf->buf_data,
  360. DB_SET );
  361. }
  362. /*
  363. * Continue if the error is no-mem since we don't need to
  364. * load in the key record anyway with DB_SET.
  365. */
  366. if ( 0 == rc || DB_BUFFER_SMALL == rc )
  367. rc = clcache_cursor_get ( cursor, buf, flag );
  368. }
  369. /*
  370. * Don't keep a cursor open across the whole replication session.
  371. * That had caused noticeable DB resource contention.
  372. */
  373. if ( cursor ) {
  374. cursor->c_close ( cursor );
  375. cursor = NULL;
  376. }
  377. if ((rc == DB_LOCK_DEADLOCK) && (tries < MAX_TRIALS)) {
  378. PRIntervalTime interval;
  379. tries++;
  380. slapi_log_error ( SLAPI_LOG_TRACE, "clcache_load_buffer_bulk",
  381. "deadlock number [%d] - retrying\n", tries );
  382. /* back off */
  383. interval = PR_MillisecondsToInterval(slapi_rand() % 100);
  384. DS_Sleep(interval);
  385. goto retry;
  386. }
  387. if ((rc == DB_LOCK_DEADLOCK) && (tries >= MAX_TRIALS)) {
  388. slapi_log_error ( SLAPI_LOG_REPL, "clcache_load_buffer_bulk",
  389. "could not load buffer from changelog after %d tries\n", tries );
  390. }
  391. #if 0 /* txn control seems not improving anything so turn it off */
  392. if ( txn ) {
  393. txn->commit ( txn, DB_TXN_NOSYNC );
  394. }
  395. #endif
  396. PR_Unlock ( buf->buf_busy_list->bl_lock );
  397. buf->buf_record_ptr = NULL;
  398. if ( 0 == rc ) {
  399. DB_MULTIPLE_INIT ( buf->buf_record_ptr, &buf->buf_data );
  400. if ( NULL == buf->buf_record_ptr )
  401. rc = DB_NOTFOUND;
  402. else
  403. buf->buf_load_cnt++;
  404. }
  405. return rc;
  406. }
  407. /*
  408. * Gets the next change from the buffer.
  409. * *key : output - key of the next change, or NULL if no more change
  410. * *data: output - data of the next change, or NULL if no more change
  411. */
  412. int
  413. clcache_get_next_change ( CLC_Buffer *buf, void **key, size_t *keylen, void **data, size_t *datalen, CSN **csn )
  414. {
  415. int skip = 1;
  416. int rc = 0;
  417. do {
  418. *key = *data = NULL;
  419. *keylen = *datalen = 0;
  420. if ( buf->buf_record_ptr ) {
  421. DB_MULTIPLE_KEY_NEXT ( buf->buf_record_ptr, &buf->buf_data,
  422. *key, *keylen, *data, *datalen );
  423. }
  424. /*
  425. * We're done with the current buffer. Now load the next chunk.
  426. */
  427. if ( NULL == *key && CLC_STATE_READY == buf->buf_state ) {
  428. rc = clcache_load_buffer ( buf, NULL, DB_NEXT );
  429. if ( 0 == rc && buf->buf_record_ptr ) {
  430. DB_MULTIPLE_KEY_NEXT ( buf->buf_record_ptr, &buf->buf_data,
  431. *key, *keylen, *data, *datalen );
  432. }
  433. }
  434. /* Compare the new change to the local and remote RUVs */
  435. if ( NULL != *key ) {
  436. buf->buf_record_cnt++;
  437. csn_init_by_string ( buf->buf_current_csn, (char*)*key );
  438. skip = clcache_skip_change ( buf );
  439. if (skip) buf->buf_record_skipped++;
  440. }
  441. }
  442. while ( rc == 0 && *key && skip );
  443. if ( NULL == *key ) {
  444. *key = NULL;
  445. *csn = NULL;
  446. rc = DB_NOTFOUND;
  447. }
  448. else {
  449. *csn = buf->buf_current_csn;
  450. slapi_log_error ( SLAPI_LOG_REPL, buf->buf_agmt_name,
  451. "load=%d rec=%d csn=%s\n",
  452. buf->buf_load_cnt, buf->buf_record_cnt, (char*)*key );
  453. }
  454. return rc;
  455. }
  456. static void
  457. clcache_refresh_consumer_maxcsns ( CLC_Buffer *buf )
  458. {
  459. int i;
  460. for ( i = 0; i < buf->buf_num_cscbs; i++ ) {
  461. csn_free(&buf->buf_cscbs[i]->consumer_maxcsn);
  462. ruv_get_largest_csn_for_replica (
  463. buf->buf_consumer_ruv,
  464. buf->buf_cscbs[i]->rid,
  465. &buf->buf_cscbs[i]->consumer_maxcsn );
  466. }
  467. }
  468. static int
  469. clcache_refresh_local_maxcsn ( const ruv_enum_data *rid_data, void *data )
  470. {
  471. struct clc_buffer *buf = (struct clc_buffer*) data;
  472. ReplicaId rid;
  473. int rc = 0;
  474. int i;
  475. rid = csn_get_replicaid ( rid_data->csn );
  476. /*
  477. * No need to create cscb for consumer's RID.
  478. * If RID==65535, the CSN is originated from a
  479. * legacy consumer. In this case the supplier
  480. * and the consumer may have the same RID.
  481. */
  482. if ( rid == buf->buf_consumer_rid && rid != MAX_REPLICA_ID )
  483. return rc;
  484. for ( i = 0; i < buf->buf_num_cscbs; i++ ) {
  485. if ( buf->buf_cscbs[i]->rid == rid )
  486. break;
  487. }
  488. if ( i >= buf->buf_num_cscbs ) {
  489. if( i + 1 > buf->buf_max_cscbs){
  490. buf->buf_cscbs = (struct csn_seq_ctrl_block **) slapi_ch_realloc((char *)buf->buf_cscbs,
  491. (i + 2) * sizeof(struct csn_seq_ctrl_block *));
  492. buf->buf_max_cscbs = i + 1;
  493. }
  494. buf->buf_cscbs[i] = clcache_new_cscb();
  495. if ( buf->buf_cscbs[i] == NULL ) {
  496. return -1;
  497. }
  498. buf->buf_cscbs[i]->rid = rid;
  499. buf->buf_num_cscbs++;
  500. }
  501. csn_dup_or_init_by_csn ( &buf->buf_cscbs[i]->local_maxcsn, rid_data->csn );
  502. if ( buf->buf_cscbs[i]->consumer_maxcsn &&
  503. csn_compare (buf->buf_cscbs[i]->consumer_maxcsn, rid_data->csn) >= 0 ) {
  504. /* No change need to be sent for this RID */
  505. buf->buf_cscbs[i]->state = CLC_STATE_UP_TO_DATE;
  506. }
  507. return rc;
  508. }
  509. static int
  510. clcache_refresh_local_maxcsns ( CLC_Buffer *buf )
  511. {
  512. int i;
  513. for ( i = 0; i < buf->buf_num_cscbs; i++ ) {
  514. csn_dup_or_init_by_csn ( &buf->buf_cscbs[i]->prev_local_maxcsn,
  515. buf->buf_cscbs[i]->local_maxcsn );
  516. }
  517. return ruv_enumerate_elements ( buf->buf_local_ruv, clcache_refresh_local_maxcsn, buf );
  518. }
  519. /*
  520. * Algorithm:
  521. *
  522. * 1. Snapshot local RUVs;
  523. * 2. Load buffer;
  524. * 3. Send to the consumer only those CSNs that are covered
  525. * by the RUVs snapshot taken in the first step;
  526. * All CSNs that are covered by the RUVs snapshot taken in the
  527. * first step are guaranteed in consecutive order for the respected
  528. * RIDs because of the the CSN pending list control;
  529. * A CSN that is not covered by the RUVs snapshot may be out of order
  530. * since it is possible that a smaller CSN might not have committed
  531. * yet by the time the buffer was loaded.
  532. * 4. Determine anchorcsn for each RID:
  533. *
  534. * Case| Local vs. Buffer | New Local | Next
  535. * | MaxCSN MaxCSN | MaxCSN | Anchor-CSN
  536. * ----+-------------------+-----------+----------------
  537. * 1 | Cl >= Cb | * | Cb
  538. * 2 | Cl < Cb | Cl | Cb
  539. * 3 | Cl < Cb | Cl2 | Cl
  540. *
  541. * 5. Determine anchorcsn for next load:
  542. * Anchor-CSN = min { all Next-Anchor-CSN, Buffer-MaxCSN }
  543. */
  544. static int
  545. clcache_adjust_anchorcsn ( CLC_Buffer *buf )
  546. {
  547. PRBool hasChange = PR_FALSE;
  548. struct csn_seq_ctrl_block *cscb;
  549. int i;
  550. if ( buf->buf_state == CLC_STATE_READY ) {
  551. for ( i = 0; i < buf->buf_num_cscbs; i++ ) {
  552. cscb = buf->buf_cscbs[i];
  553. if ( cscb->state == CLC_STATE_UP_TO_DATE )
  554. continue;
  555. /*
  556. * Case 3 unsafe ruv change: next buffer load should start
  557. * from where the maxcsn in the old ruv was. Since each
  558. * cscb has remembered the maxcsn sent to the consumer,
  559. * CSNs that may be loaded again could easily be skipped.
  560. */
  561. if ( cscb->prev_local_maxcsn &&
  562. csn_compare (cscb->prev_local_maxcsn, buf->buf_current_csn) < 0 &&
  563. csn_compare (cscb->local_maxcsn, cscb->prev_local_maxcsn) != 0 ) {
  564. hasChange = PR_TRUE;
  565. cscb->state = CLC_STATE_READY;
  566. csn_init_by_csn ( buf->buf_current_csn, cscb->prev_local_maxcsn );
  567. csn_as_string ( cscb->prev_local_maxcsn, 0, (char*)buf->buf_key.data );
  568. slapi_log_error ( SLAPI_LOG_REPL, buf->buf_agmt_name,
  569. "adjust anchor csn upon %s\n",
  570. ( cscb->state == CLC_STATE_CSN_GT_RUV ? "out of sequence csn" : "unsafe ruv change") );
  571. continue;
  572. }
  573. /*
  574. * check if there are still changes to send for this RID
  575. * Assume we had compared the local maxcsn and the consumer
  576. * max csn before this function was called and hence the
  577. * cscb->state had been set accordingly.
  578. */
  579. if ( hasChange == PR_FALSE &&
  580. csn_compare (cscb->local_maxcsn, buf->buf_current_csn) > 0 ) {
  581. hasChange = PR_TRUE;
  582. }
  583. }
  584. }
  585. if ( !hasChange ) {
  586. buf->buf_state = CLC_STATE_DONE;
  587. }
  588. return buf->buf_state;
  589. }
  590. static int
  591. clcache_skip_change ( CLC_Buffer *buf )
  592. {
  593. struct csn_seq_ctrl_block *cscb = NULL;
  594. ReplicaId rid;
  595. int skip = 1;
  596. int i;
  597. char buf_cur_csn_str[CSN_STRSIZE];
  598. char oth_csn_str[CSN_STRSIZE];
  599. do {
  600. rid = csn_get_replicaid ( buf->buf_current_csn );
  601. /*
  602. * Skip CSN that is originated from the consumer,
  603. * unless the CSN is newer than the maxcsn.
  604. * If RID==65535, the CSN is originated from a
  605. * legacy consumer. In this case the supplier
  606. * and the consumer may have the same RID.
  607. */
  608. if (rid == buf->buf_consumer_rid && rid != MAX_REPLICA_ID){
  609. CSN *cons_maxcsn = NULL;
  610. ruv_get_max_csn(buf->buf_consumer_ruv, &cons_maxcsn);
  611. if ( csn_compare ( buf->buf_current_csn, cons_maxcsn) > 0 ) {
  612. /*
  613. * The consumer must have been "restored" and needs this newer update.
  614. */
  615. skip = 0;
  616. } else if (slapi_is_loglevel_set(SLAPI_LOG_REPL)) {
  617. csn_as_string(buf->buf_current_csn, 0, buf_cur_csn_str);
  618. csn_as_string(cons_maxcsn, 0, oth_csn_str);
  619. slapi_log_error(SLAPI_LOG_REPL, buf->buf_agmt_name,
  620. "Skipping update because the changelog buffer current csn [%s] is "
  621. "less than or equal to the consumer max csn [%s]\n",
  622. buf_cur_csn_str, oth_csn_str);
  623. buf->buf_skipped_csn_gt_cons_maxcsn++;
  624. }
  625. csn_free(&cons_maxcsn);
  626. break;
  627. }
  628. /* Skip helper entry (ENTRY_COUNT, PURGE_RUV and so on) */
  629. if ( cl5HelperEntry ( NULL, buf->buf_current_csn ) == PR_TRUE ) {
  630. slapi_log_error ( SLAPI_LOG_REPL, buf->buf_agmt_name,
  631. "Skip helper entry type=%ld\n", csn_get_time( buf->buf_current_csn ));
  632. break;
  633. }
  634. /* Find csn sequence control block for the current rid */
  635. for (i = 0; i < buf->buf_num_cscbs && buf->buf_cscbs[i]->rid != rid; i++);
  636. /* Skip CSN whose RID is unknown to the local RUV snapshot */
  637. if ( i >= buf->buf_num_cscbs ) {
  638. if (slapi_is_loglevel_set(SLAPI_LOG_REPL)) {
  639. csn_as_string(buf->buf_current_csn, 0, buf_cur_csn_str);
  640. slapi_log_error(SLAPI_LOG_REPL, buf->buf_agmt_name,
  641. "Skipping update because the changelog buffer current csn [%s] rid "
  642. "[%d] is not in the list of changelog csn buffers (length %d)\n",
  643. buf_cur_csn_str, rid, buf->buf_num_cscbs);
  644. }
  645. buf->buf_skipped_new_rid++;
  646. break;
  647. }
  648. cscb = buf->buf_cscbs[i];
  649. /* Skip if the consumer is already up-to-date for the RID */
  650. if ( cscb->state == CLC_STATE_UP_TO_DATE ) {
  651. buf->buf_skipped_up_to_date++;
  652. break;
  653. }
  654. /* Skip CSN whose preceedents are not covered by local RUV snapshot */
  655. if ( cscb->state == CLC_STATE_CSN_GT_RUV ) {
  656. buf->buf_skipped_csn_gt_ruv++;
  657. break;
  658. }
  659. /* Skip CSNs already covered by consumer RUV */
  660. if ( cscb->consumer_maxcsn &&
  661. csn_compare ( buf->buf_current_csn, cscb->consumer_maxcsn ) <= 0 ) {
  662. buf->buf_skipped_csn_covered++;
  663. break;
  664. }
  665. /* Send CSNs that are covered by the local RUV snapshot */
  666. if ( csn_compare ( buf->buf_current_csn, cscb->local_maxcsn ) <= 0 ) {
  667. skip = 0;
  668. csn_dup_or_init_by_csn ( &cscb->consumer_maxcsn, buf->buf_current_csn );
  669. break;
  670. }
  671. /*
  672. * Promote the local maxcsn to its next neighbor
  673. * to keep the current session going. Skip if we
  674. * are not sure if current_csn is the neighbor.
  675. */
  676. if ( csn_time_difference(buf->buf_current_csn, cscb->local_maxcsn) == 0 &&
  677. (csn_get_seqnum(buf->buf_current_csn) ==
  678. csn_get_seqnum(cscb->local_maxcsn) + 1) )
  679. {
  680. csn_init_by_csn ( cscb->local_maxcsn, buf->buf_current_csn );
  681. if(cscb->consumer_maxcsn){
  682. csn_init_by_csn ( cscb->consumer_maxcsn, buf->buf_current_csn );
  683. }
  684. skip = 0;
  685. break;
  686. }
  687. /* Skip CSNs not covered by local RUV snapshot */
  688. cscb->state = CLC_STATE_CSN_GT_RUV;
  689. buf->buf_skipped_csn_gt_ruv++;
  690. } while (0);
  691. #ifdef DEBUG
  692. if (skip && cscb) {
  693. char consumer[24] = {'\0'};
  694. char local[24] = {'\0'};
  695. char current[24] = {'\0'};
  696. if ( cscb->consumer_maxcsn )
  697. csn_as_string ( cscb->consumer_maxcsn, PR_FALSE, consumer );
  698. if ( cscb->local_maxcsn )
  699. csn_as_string ( cscb->local_maxcsn, PR_FALSE, local );
  700. csn_as_string ( buf->buf_current_csn, PR_FALSE, current );
  701. slapi_log_error ( SLAPI_LOG_REPL, buf->buf_agmt_name,
  702. "Skip %s consumer=%s local=%s\n", current, consumer, local );
  703. }
  704. #endif
  705. return skip;
  706. }
  707. static struct csn_seq_ctrl_block *
  708. clcache_new_cscb ()
  709. {
  710. struct csn_seq_ctrl_block *cscb;
  711. cscb = (struct csn_seq_ctrl_block *) slapi_ch_calloc ( 1, sizeof (struct csn_seq_ctrl_block) );
  712. if (cscb == NULL) {
  713. slapi_log_error ( SLAPI_LOG_FATAL, NULL, "clcache: malloc failure\n" );
  714. }
  715. return cscb;
  716. }
  717. static void
  718. clcache_free_cscb ( struct csn_seq_ctrl_block ** cscb )
  719. {
  720. csn_free ( & (*cscb)->consumer_maxcsn );
  721. csn_free ( & (*cscb)->local_maxcsn );
  722. csn_free ( & (*cscb)->prev_local_maxcsn );
  723. slapi_ch_free ( (void **) cscb );
  724. }
  725. /*
  726. * Allocate and initialize a new buffer
  727. * It is called when there is a request for a buffer while
  728. * buffer free list is empty.
  729. */
  730. static CLC_Buffer *
  731. clcache_new_buffer ( ReplicaId consumer_rid )
  732. {
  733. CLC_Buffer *buf = NULL;
  734. int welldone = 0;
  735. do {
  736. buf = (CLC_Buffer*) slapi_ch_calloc (1, sizeof(CLC_Buffer));
  737. if ( NULL == buf )
  738. break;
  739. buf->buf_key.flags = DB_DBT_USERMEM;
  740. buf->buf_key.ulen = CSN_STRSIZE + 1;
  741. buf->buf_key.size = CSN_STRSIZE;
  742. buf->buf_key.data = slapi_ch_calloc( 1, buf->buf_key.ulen );
  743. if ( NULL == buf->buf_key.data )
  744. break;
  745. buf->buf_data.flags = DB_DBT_USERMEM;
  746. buf->buf_data.ulen = _pool->pl_buffer_default_pages * DEFAULT_CLC_BUFFER_PAGE_SIZE;
  747. buf->buf_data.data = slapi_ch_malloc( buf->buf_data.ulen );
  748. if ( NULL == buf->buf_data.data )
  749. break;
  750. if ( NULL == ( buf->buf_current_csn = csn_new()) )
  751. break;
  752. buf->buf_state = CLC_STATE_READY;
  753. buf->buf_agmt_name = get_thread_private_agmtname();
  754. buf->buf_consumer_rid = consumer_rid;
  755. buf->buf_num_cscbs = 0;
  756. buf->buf_max_cscbs = MAX_NUM_OF_MASTERS;
  757. buf->buf_cscbs = (struct csn_seq_ctrl_block **) slapi_ch_calloc(MAX_NUM_OF_MASTERS + 1,
  758. sizeof(struct csn_seq_ctrl_block *));
  759. welldone = 1;
  760. } while (0);
  761. if ( !welldone ) {
  762. clcache_delete_buffer ( &buf );
  763. }
  764. return buf;
  765. }
  766. /*
  767. * Deallocates a buffer.
  768. * It is called when a buffer is returned to the buffer pool
  769. * and the pool size is over the limit.
  770. */
  771. static void
  772. clcache_delete_buffer ( CLC_Buffer **buf )
  773. {
  774. if ( buf && *buf ) {
  775. slapi_ch_free (&( (*buf)->buf_key.data ));
  776. slapi_ch_free (&( (*buf)->buf_data.data ));
  777. csn_free (&( (*buf)->buf_current_csn ));
  778. csn_free (&( (*buf)->buf_missing_csn ));
  779. csn_free (&( (*buf)->buf_prev_missing_csn ));
  780. slapi_ch_free ( (void **) buf );
  781. }
  782. }
  783. static CLC_Busy_List *
  784. clcache_new_busy_list ()
  785. {
  786. CLC_Busy_List *bl;
  787. int welldone = 0;
  788. do {
  789. if ( NULL == (bl = ( CLC_Busy_List* ) slapi_ch_calloc (1, sizeof(CLC_Busy_List)) ))
  790. break;
  791. if ( NULL == (bl->bl_lock = PR_NewLock ()) )
  792. break;
  793. /*
  794. if ( NULL == (bl->bl_max_csn = csn_new ()) )
  795. break;
  796. */
  797. welldone = 1;
  798. }
  799. while (0);
  800. if ( !welldone ) {
  801. clcache_delete_busy_list ( &bl );
  802. }
  803. return bl;
  804. }
  805. static void
  806. clcache_delete_busy_list ( CLC_Busy_List **bl )
  807. {
  808. if ( bl && *bl ) {
  809. CLC_Buffer *buf = NULL;
  810. if ( (*bl)->bl_lock ) {
  811. PR_Lock ( (*bl)->bl_lock );
  812. }
  813. buf = (*bl)->bl_buffers;
  814. while (buf) {
  815. CLC_Buffer *next = buf->buf_next;
  816. clcache_delete_buffer(&buf);
  817. buf = next;
  818. }
  819. (*bl)->bl_buffers = NULL;
  820. (*bl)->bl_db = NULL;
  821. if ( (*bl)->bl_lock ) {
  822. PR_Unlock ( (*bl)->bl_lock );
  823. PR_DestroyLock ( (*bl)->bl_lock );
  824. (*bl)->bl_lock = NULL;
  825. }
  826. /* csn_free (&( (*bl)->bl_max_csn )); */
  827. slapi_ch_free ( (void **) bl );
  828. }
  829. }
  830. static int
  831. clcache_enqueue_busy_list ( DB *db, CLC_Buffer *buf )
  832. {
  833. CLC_Busy_List *bl;
  834. int rc = 0;
  835. slapi_rwlock_rdlock ( _pool->pl_lock );
  836. for ( bl = _pool->pl_busy_lists; bl && bl->bl_db != db; bl = bl->bl_next );
  837. slapi_rwlock_unlock ( _pool->pl_lock );
  838. if ( NULL == bl ) {
  839. if ( NULL == ( bl = clcache_new_busy_list ()) ) {
  840. rc = CL5_MEMORY_ERROR;
  841. }
  842. else {
  843. slapi_rwlock_wrlock ( _pool->pl_lock );
  844. bl->bl_db = db;
  845. bl->bl_next = _pool->pl_busy_lists;
  846. _pool->pl_busy_lists = bl;
  847. slapi_rwlock_unlock ( _pool->pl_lock );
  848. }
  849. }
  850. if ( NULL != bl ) {
  851. PR_Lock ( bl->bl_lock );
  852. buf->buf_busy_list = bl;
  853. buf->buf_next = bl->bl_buffers;
  854. bl->bl_buffers = buf;
  855. PR_Unlock ( bl->bl_lock );
  856. }
  857. return rc;
  858. }
  859. static int
  860. clcache_open_cursor ( DB_TXN *txn, CLC_Buffer *buf, DBC **cursor )
  861. {
  862. int rc;
  863. rc = buf->buf_busy_list->bl_db->cursor ( buf->buf_busy_list->bl_db, txn, cursor, 0 );
  864. if ( rc != 0 ) {
  865. slapi_log_error ( SLAPI_LOG_FATAL, get_thread_private_agmtname(),
  866. "clcache: failed to open cursor; db error - %d %s\n",
  867. rc, db_strerror(rc));
  868. }
  869. return rc;
  870. }
  871. static int
  872. clcache_cursor_get ( DBC *cursor, CLC_Buffer *buf, int flag )
  873. {
  874. int rc;
  875. rc = cursor->c_get ( cursor,
  876. & buf->buf_key,
  877. & buf->buf_data,
  878. buf->buf_load_flag | flag );
  879. if ( DB_BUFFER_SMALL == rc ) {
  880. /*
  881. * The record takes more space than the current size of the
  882. * buffer. Fortunately, buf->buf_data.size has been set by
  883. * c_get() to the actual data size needed. So we can
  884. * reallocate the data buffer and try to read again.
  885. */
  886. buf->buf_data.ulen = ( buf->buf_data.size / DEFAULT_CLC_BUFFER_PAGE_SIZE + 1 ) * DEFAULT_CLC_BUFFER_PAGE_SIZE;
  887. buf->buf_data.data = slapi_ch_realloc ( buf->buf_data.data, buf->buf_data.ulen );
  888. if ( buf->buf_data.data != NULL ) {
  889. rc = cursor->c_get ( cursor,
  890. &( buf->buf_key ),
  891. &( buf->buf_data ),
  892. buf->buf_load_flag | flag );
  893. slapi_log_error ( SLAPI_LOG_REPL, buf->buf_agmt_name,
  894. "clcache: (%d | %d) buf key len %d reallocated and retry returns %d\n", buf->buf_load_flag, flag, buf->buf_key.size, rc );
  895. }
  896. }
  897. switch ( rc ) {
  898. case EINVAL:
  899. slapi_log_error ( SLAPI_LOG_FATAL, buf->buf_agmt_name,
  900. "clcache_cursor_get: invalid parameter\n" );
  901. break;
  902. case DB_BUFFER_SMALL:
  903. slapi_log_error ( SLAPI_LOG_FATAL, buf->buf_agmt_name,
  904. "clcache_cursor_get: can't allocate %u bytes\n", buf->buf_data.ulen );
  905. break;
  906. default:
  907. break;
  908. }
  909. return rc;
  910. }
  911. static void
  912. csn_dup_or_init_by_csn ( CSN **csn1, CSN *csn2 )
  913. {
  914. if ( *csn1 == NULL )
  915. *csn1 = csn_new();
  916. csn_init_by_csn ( *csn1, csn2 );
  917. }
  918. void
  919. clcache_destroy()
  920. {
  921. if (_pool) {
  922. CLC_Busy_List *bl = NULL;
  923. if (_pool->pl_lock) {
  924. slapi_rwlock_wrlock (_pool->pl_lock);
  925. }
  926. bl = _pool->pl_busy_lists;
  927. while (bl) {
  928. CLC_Busy_List *next = bl->bl_next;
  929. clcache_delete_busy_list(&bl);
  930. bl = next;
  931. }
  932. _pool->pl_busy_lists = NULL;
  933. _pool->pl_dbenv = NULL;
  934. if (_pool->pl_lock) {
  935. slapi_rwlock_unlock(_pool->pl_lock);
  936. slapi_destroy_rwlock(_pool->pl_lock);
  937. _pool->pl_lock = NULL;
  938. }
  939. slapi_ch_free ( (void **) &_pool );
  940. }
  941. }