Jelajahi Sumber

Ticket 564 - Is ldbm_txn_ruv_modify_context still required

Bug description

	Ticket 47358 introduces backend optimizations using a configuration switch.
	By default all optimisation are disabled.
	One optimisation is related to changelog and DB RUV being written back to disk in the same txn with the same value.
	This is a consequence of bug fix 543633. Although the write back of the DB RUV brings a little overhead
	it is not strictly required. In fact since 633168, changelog RUV is in sync with the data store. So
	changelog RUV and DB RUV have the same values.

Fix Description:

	This fix enables by default the ticket 47358 optimization BACKEND_OPT_NO_RUV_UPDATE that does
	not compute/update the database RUV.
	Two consequences of this are:
		1. In case of disordely shutdown (crash) and after recovery, we need to rebuild the DB RUV
		2. Provide a mechanism to monitor replication status, as DB ruv would be late compare
		   to the current update status
	1. is achieve using the changelog RUV (in sync with the data store) to rebuild the database RUV.
	2. was done with ticket https://fedorahosted.org/389/ticket/47350

https://fedorahosted.org/389/ticket/564

Reviewed by: Rich, Ludwig (thanks to you both !)

Platforms tested: fedora 17

Flag Day: no

Doc impact: no
Thierry bordaz (tbordaz) 12 tahun lalu
induk
melakukan
7f5268f6f4

+ 44 - 29
ldap/servers/plugins/replication/repl5_replica.c

@@ -1494,41 +1494,56 @@ int replica_check_for_data_reload (Replica *r, void *arg)
 			 * sessions.
 			 */
 
-            rc = ruv_compare_ruv(upper_bound_ruv, "changelog max RUV", r_ruv, "database RUV", 0, SLAPI_LOG_FATAL);
-            if (RUV_COMP_IS_FATAL(rc))
-            {
-                /* create a temporary replica object to conform to the interface */
-                r_obj = object_new (r, NULL);
-
-                /* We can't use existing changelog - remove existing file */
+            if (slapi_disordely_shutdown(PR_FALSE)) {
                 slapi_log_error(SLAPI_LOG_FATAL, repl_plugin_name, "replica_check_for_data_reload: "
-                    "Warning: data for replica %s does not match the data in the changelog. "
-                    "Recreating the changelog file. "
-                    "This could affect replication with replica's consumers in which case the "
-                    "consumers should be reinitialized.\n",
+                    "Warning: disordely shutdown for replica %s. Check if DB RUV needs to be updated\n",
                     slapi_sdn_get_dn(r->repl_root));
+                
+                if (ruv_covers_ruv(upper_bound_ruv, r_ruv) && !ruv_covers_ruv(r_ruv, upper_bound_ruv)) {
+                    /*
+                     * The Changelog RUV is ahead of the RUV in the DB.
+                     * RUV DB was likely not flushed on disk.
+                     */
+
+                    ruv_force_csn_update_from_ruv(upper_bound_ruv, r_ruv, 
+                            "Force update of database RUV (from CL RUV) -> ", SLAPI_LOG_FATAL);
+                    replica_set_ruv_dirty(r);
+                }
+                
+            } else {
 
-                rc = cl5DeleteDBSync (r_obj);
+                rc = ruv_compare_ruv(upper_bound_ruv, "changelog max RUV", r_ruv, "database RUV", 0, SLAPI_LOG_FATAL);
+                if (RUV_COMP_IS_FATAL(rc)) {
+                    /* create a temporary replica object to conform to the interface */
+                    r_obj = object_new(r, NULL);
 
-                object_release (r_obj);
+                    /* We can't use existing changelog - remove existing file */
+                    slapi_log_error(SLAPI_LOG_FATAL, repl_plugin_name, "replica_check_for_data_reload: "
+                            "Warning: data for replica %s does not match the data in the changelog. "
+                            "Recreating the changelog file. "
+                            "This could affect replication with replica's consumers in which case the "
+                            "consumers should be reinitialized.\n",
+                            slapi_sdn_get_dn(r->repl_root));
 
-                if (rc == CL5_SUCCESS)
-                {
-                    /* log changes to mark starting point for replication */
-                    rc = replica_log_ruv_elements (r);
+                    rc = cl5DeleteDBSync(r_obj);
+
+                    object_release(r_obj);
+
+                    if (rc == CL5_SUCCESS) {
+                        /* log changes to mark starting point for replication */
+                        rc = replica_log_ruv_elements(r);
+                    }
+                } else if (rc) {
+                    slapi_log_error(SLAPI_LOG_FATAL, repl_plugin_name, "replica_check_for_data_reload: "
+                            "Warning: for replica %s there were some differences between the changelog max RUV and the "
+                            "database RUV.  If there are obsolete elements in the database RUV, you "
+                            "should remove them using the CLEANALLRUV task.  If they are not obsolete, "
+                            "you should check their status to see why there are no changes from those "
+                            "servers in the changelog.\n",
+                            slapi_sdn_get_dn(r->repl_root));
+                    rc = 0;
                 }
-            }
-            else if (rc)
-            {
-                slapi_log_error(SLAPI_LOG_FATAL, repl_plugin_name, "replica_check_for_data_reload: "
-                    "Warning: for replica %s there were some differences between the changelog max RUV and the "
-                    "database RUV.  If there are obsolete elements in the database RUV, you "
-                    "should remove them using the CLEANALLRUV task.  If they are not obsolete, "
-                    "you should check their status to see why there are no changes from those "
-                    "servers in the changelog.\n",
-                    slapi_sdn_get_dn(r->repl_root));
-                rc = 0;
-            }
+            } // slapi_disordely_shutdown
 
             object_release (ruv_obj);
         }

+ 31 - 0
ldap/servers/plugins/replication/repl5_ruv.c

@@ -2255,6 +2255,37 @@ ruv_is_newer (Object *sruvobj, Object *cruvobj)
 	return is_newer;
 }
 
+/*
+ * This routine is called after a disordely shutdown
+ * The Database RUV was found late compare to the changelog RUV
+ */
+void
+ruv_force_csn_update_from_ruv(RUV *src_ruv, RUV *tgt_ruv, char *msg, int logLevel) {
+    RUVElement *replica = NULL;
+    char csnStr [CSN_STRSIZE];
+    int cookie;
+
+    slapi_rwlock_rdlock(src_ruv->lock);
+    
+    for (replica = dl_get_first(src_ruv->elements, &cookie);
+            NULL != replica;
+            replica = dl_get_next(src_ruv->elements, &cookie)) {
+        /* 
+         * In case the DB RUV (tgt_ruv) is behind the CL RUV (src_ruv)
+         * updates the DB RUV.
+         */
+        if (!ruv_covers_csn(tgt_ruv, replica->csn)) {
+            ruv_force_csn_update(tgt_ruv, replica->csn);
+            csn_as_string(replica->csn, PR_FALSE, csnStr);
+            slapi_log_error(logLevel, repl_plugin_name, "%s %s\n",
+                    msg, csnStr);
+        }
+    }
+
+    slapi_rwlock_unlock(src_ruv->lock);
+    
+}
+
 void
 ruv_force_csn_update (RUV *ruv, CSN *csn)
 {

+ 1 - 0
ldap/servers/plugins/replication/repl5_ruv.h

@@ -144,6 +144,7 @@ int ruv_local_contains_supplier(RUV *ruv, ReplicaId rid);
 PRBool ruv_has_csns(const RUV *ruv);
 PRBool ruv_has_both_csns(const RUV *ruv);
 PRBool ruv_is_newer (Object *sruv, Object *cruv);
+void ruv_force_csn_update_from_ruv(RUV *src_ruv, RUV *tgt_ruv, char *msg, int logLevel);
 void ruv_force_csn_update (RUV *ruv, CSN *csn);
 void ruv_insert_dummy_min_csn (RUV *ruv);
 int ruv_compare_ruv(const RUV *ruv1, const char *ruv1name, const RUV *ruv2, const char *ruv2name, int strict, int loglevel);

+ 1 - 0
ldap/servers/slapd/back-ldbm/dblayer.c

@@ -1643,6 +1643,7 @@ dblayer_start(struct ldbminfo *li, int dbmode)
                 LDAPDebug(LDAP_DEBUG_ANY, "Detected Disorderly Shutdown last "
                           "time Directory Server was running, recovering "
                           "database.\n", 0, 0, 0);
+                slapi_disordely_shutdown(PR_TRUE);
             }
         }
         switch  (dbmode&DBLAYER_RESTORE_MASK) {

+ 1 - 1
ldap/servers/slapd/back-ldbm/ldbm_config.c

@@ -1436,7 +1436,7 @@ static config_info ldbm_config[] = {
     {CONFIG_PAGEDLOOKTHROUGHLIMIT, CONFIG_TYPE_INT, "0", &ldbm_config_pagedlookthroughlimit_get, &ldbm_config_pagedlookthroughlimit_set, CONFIG_FLAG_ALWAYS_SHOW|CONFIG_FLAG_ALLOW_RUNNING_CHANGE},
     {CONFIG_PAGEDIDLISTSCANLIMIT, CONFIG_TYPE_INT, "0", &ldbm_config_pagedallidsthreshold_get, &ldbm_config_pagedallidsthreshold_set, CONFIG_FLAG_ALWAYS_SHOW|CONFIG_FLAG_ALLOW_RUNNING_CHANGE},
     {CONFIG_RANGELOOKTHROUGHLIMIT, CONFIG_TYPE_INT, "5000", &ldbm_config_rangelookthroughlimit_get, &ldbm_config_rangelookthroughlimit_set, CONFIG_FLAG_ALWAYS_SHOW|CONFIG_FLAG_ALLOW_RUNNING_CHANGE},
-    {CONFIG_BACKEND_OPT_LEVEL, CONFIG_TYPE_INT, "0", &ldbm_config_backend_opt_level_get, &ldbm_config_backend_opt_level_set, CONFIG_FLAG_ALWAYS_SHOW},
+    {CONFIG_BACKEND_OPT_LEVEL, CONFIG_TYPE_INT, "1", &ldbm_config_backend_opt_level_get, &ldbm_config_backend_opt_level_set, CONFIG_FLAG_ALWAYS_SHOW},
     {NULL, 0, NULL, NULL, NULL, 0}
 };
 

+ 11 - 0
ldap/servers/slapd/plugin.c

@@ -3223,3 +3223,14 @@ slapi_set_plugin_open_rootdn_bind(Slapi_PBlock *pb){
 
 	ptd_set_special_data(&(config->plgc_bind_subtrees), PLGC_DATA_BIND_ROOT);
 }
+
+PRBool
+slapi_disordely_shutdown(PRBool set)
+{
+    static PRBool is_disordely_shutdown = PR_FALSE;
+    
+    if (set) {
+        is_disordely_shutdown = PR_TRUE;
+    }
+    return (is_disordely_shutdown);
+}

+ 2 - 0
ldap/servers/slapd/slapi-plugin.h

@@ -7362,6 +7362,8 @@ uint64_t slapi_str_to_u64(const char *s);
 
 void slapi_set_plugin_open_rootdn_bind(Slapi_PBlock *pb);
 
+PRBool slapi_disordely_shutdown(PRBool set);
+
 /* 
  * Public entry extension getter/setter functions
  *