瀏覽代碼

Ticket 47871 - 389-ds-base-1.3.2.21-1.fc20 crashed over the weekend

Bug Description:
	Retro-changelog trimming uses a slapi_search_internal_callback_pb to check
	if the oldest record needs to be trim.
	The callback function handle_getchangerecord_search stores the entry
	in the callback structure.
	Later it evaluates the entry to check its changetime.
	This is not valid, as the entry is only valid inside the callback.

Fix Description:
	The callback function 'handle_getchangetime_search' evaluate
	the changetime and returns it in the callback struct.

https://fedorahosted.org/389/ticket/47871

Reviewed by: Noriko Hosoi (Thanks Noriko !)

Platforms tested: F17/F20

Flag Day: no

Doc impact: no
Thierry bordaz (tbordaz) 11 年之前
父節點
當前提交
495151b730

+ 306 - 0
dirsrvtests/tickets/ticket47871_test.py

@@ -0,0 +1,306 @@
+'''
+Created on Nov 7, 2013
+
+@author: tbordaz
+'''
+import os
+import sys
+import time
+import ldap
+import logging
+import socket
+import time
+import logging
+import pytest
+import re
+from lib389 import DirSrv, Entry, tools
+from lib389.tools import DirSrvTools
+from lib389._constants import *
+from lib389.properties import *
+from constants import *
+
+logging.getLogger(__name__).setLevel(logging.DEBUG)
+log = logging.getLogger(__name__)
+
+installation_prefix = None
+
+TEST_REPL_DN = "cn=test_repl, %s" % SUFFIX
+ENTRY_DN = "cn=test_entry, %s" % SUFFIX
+
+OTHER_NAME = 'other_entry'
+MAX_OTHERS = 10
+
+ATTRIBUTES = [ 'street', 'countryName', 'description', 'postalAddress', 'postalCode', 'title', 'l', 'roomNumber' ]
+
+class TopologyMasterConsumer(object):
+    def __init__(self, master, consumer):
+        master.open()
+        self.master = master
+        
+        consumer.open()
+        self.consumer = consumer
+
+    def __repr__(self):
+            return "Master[%s] -> Consumer[%s" % (self.master, self.consumer)
+
+
[email protected](scope="module")
+def topology(request):
+    '''
+        This fixture is used to create a replicated topology for the 'module'.
+        The replicated topology is MASTER -> Consumer.
+        At the beginning, It may exists a master instance and/or a consumer instance.
+        It may also exists a backup for the master and/or the consumer.
+    
+        Principle:
+            If master instance exists:
+                restart it
+            If consumer instance exists:
+                restart it
+            If backup of master AND backup of consumer exists:
+                create or rebind to consumer
+                create or rebind to master
+
+                restore master   from backup
+                restore consumer from backup
+            else:
+                Cleanup everything
+                    remove instances
+                    remove backups
+                Create instances
+                Initialize replication
+                Create backups
+    '''
+    global installation_prefix
+
+    if installation_prefix:
+        args_instance[SER_DEPLOYED_DIR] = installation_prefix
+        
+    master   = DirSrv(verbose=False)
+    consumer = DirSrv(verbose=False)
+    
+    # Args for the master instance
+    args_instance[SER_HOST] = HOST_MASTER
+    args_instance[SER_PORT] = PORT_MASTER
+    args_instance[SER_SERVERID_PROP] = SERVERID_MASTER
+    args_master = args_instance.copy()
+    master.allocate(args_master)
+    
+    # Args for the consumer instance
+    args_instance[SER_HOST] = HOST_CONSUMER
+    args_instance[SER_PORT] = PORT_CONSUMER
+    args_instance[SER_SERVERID_PROP] = SERVERID_CONSUMER
+    args_consumer = args_instance.copy()
+    consumer.allocate(args_consumer)
+
+    
+    # Get the status of the backups
+    backup_master   = master.checkBackupFS()
+    backup_consumer = consumer.checkBackupFS()
+    
+    # Get the status of the instance and restart it if it exists
+    instance_master   = master.exists()
+    if instance_master:
+        master.stop(timeout=10)
+        master.start(timeout=10)
+        
+    instance_consumer = consumer.exists()
+    if instance_consumer:
+        consumer.stop(timeout=10)
+        consumer.start(timeout=10)
+    
+    if backup_master and backup_consumer:
+        # The backups exist, assuming they are correct 
+        # we just re-init the instances with them
+        if not instance_master:
+            master.create()
+            # Used to retrieve configuration information (dbdir, confdir...)
+            master.open()
+        
+        if not instance_consumer:
+            consumer.create()
+            # Used to retrieve configuration information (dbdir, confdir...)
+            consumer.open()
+        
+        # restore master from backup
+        master.stop(timeout=10)
+        master.restoreFS(backup_master)
+        master.start(timeout=10)
+        
+        # restore consumer from backup
+        consumer.stop(timeout=10)
+        consumer.restoreFS(backup_consumer)
+        consumer.start(timeout=10)
+    else:
+        # We should be here only in two conditions
+        #      - This is the first time a test involve master-consumer
+        #        so we need to create everything
+        #      - Something weird happened (instance/backup destroyed)
+        #        so we discard everything and recreate all
+        
+        # Remove all the backups. So even if we have a specific backup file
+        # (e.g backup_master) we clear all backups that an instance my have created
+        if backup_master:
+            master.clearBackupFS()
+        if backup_consumer:
+            consumer.clearBackupFS()
+        
+        # Remove all the instances
+        if instance_master:
+            master.delete()
+        if instance_consumer:
+            consumer.delete()
+                        
+        # Create the instances
+        master.create()
+        master.open()
+        consumer.create()
+        consumer.open()
+    
+        # 
+        # Now prepare the Master-Consumer topology
+        #
+        # First Enable replication
+        master.replica.enableReplication(suffix=SUFFIX, role=REPLICAROLE_MASTER, replicaId=REPLICAID_MASTER)
+        consumer.replica.enableReplication(suffix=SUFFIX, role=REPLICAROLE_CONSUMER)
+        
+        # Initialize the supplier->consumer
+        
+        properties = {RA_NAME:      r'meTo_$host:$port',
+                      RA_BINDDN:    defaultProperties[REPLICATION_BIND_DN],
+                      RA_BINDPW:    defaultProperties[REPLICATION_BIND_PW],
+                      RA_METHOD:    defaultProperties[REPLICATION_BIND_METHOD],
+                      RA_TRANSPORT_PROT: defaultProperties[REPLICATION_TRANSPORT]}
+        repl_agreement = master.agreement.create(suffix=SUFFIX, host=consumer.host, port=consumer.port, properties=properties)
+    
+        if not repl_agreement:
+            log.fatal("Fail to create a replica agreement")
+            sys.exit(1)
+            
+        log.debug("%s created" % repl_agreement)
+        master.agreement.init(SUFFIX, HOST_CONSUMER, PORT_CONSUMER)
+        master.waitForReplInit(repl_agreement)
+        
+        # Check replication is working fine
+        master.add_s(Entry((TEST_REPL_DN, {
+                                                'objectclass': "top person".split(),
+                                                'sn': 'test_repl',
+                                                'cn': 'test_repl'})))
+        loop = 0
+        while loop <= 10:
+            try:
+                ent = consumer.getEntry(TEST_REPL_DN, ldap.SCOPE_BASE, "(objectclass=*)")
+                break
+            except ldap.NO_SUCH_OBJECT:
+                time.sleep(1)
+                loop += 1
+                
+        # Time to create the backups
+        master.stop(timeout=10)
+        master.backupfile = master.backupFS()
+        master.start(timeout=10)
+        
+        consumer.stop(timeout=10)
+        consumer.backupfile = consumer.backupFS()
+        consumer.start(timeout=10)
+
+    # clear the tmp directory
+    master.clearTmpDir(__file__)
+
+    # 
+    # Here we have two instances master and consumer
+    # with replication working. Either coming from a backup recovery
+    # or from a fresh (re)init
+    # Time to return the topology
+    return TopologyMasterConsumer(master, consumer)
+
+
+def test_ticket47871_init(topology):
+    """ 
+        Initialize the test environment
+    """
+    topology.master.plugins.enable(name=PLUGIN_RETRO_CHANGELOG)
+    mod = [(ldap.MOD_REPLACE, 'nsslapd-changelogmaxage', "10s"), # 10 second triming
+           (ldap.MOD_REPLACE, 'nsslapd-changelog-trim-interval', "5s")] 
+    topology.master.modify_s("cn=%s,%s" % (PLUGIN_RETRO_CHANGELOG, DN_PLUGIN), mod)
+    #topology.master.plugins.enable(name=PLUGIN_MEMBER_OF)
+    #topology.master.plugins.enable(name=PLUGIN_REFER_INTEGRITY)
+    topology.master.stop(timeout=10)
+    topology.master.start(timeout=10)
+    
+    topology.master.log.info("test_ticket47871_init topology %r" % (topology))
+    # the test case will check if a warning message is logged in the 
+    # error log of the supplier
+    topology.master.errorlog_file = open(topology.master.errlog, "r")
+    
+    
+def test_ticket47871_1(topology):
+    '''
+    ADD entries and check they are all in the retrocl
+    '''
+    # add dummy entries
+    for cpt in range(MAX_OTHERS):
+        name = "%s%d" % (OTHER_NAME, cpt)
+        topology.master.add_s(Entry(("cn=%s,%s" % (name, SUFFIX), {
+                                            'objectclass': "top person".split(),
+                                            'sn': name,
+                                            'cn': name})))
+        
+    topology.master.log.info("test_ticket47871_init: %d entries ADDed %s[0..%d]" % (MAX_OTHERS, OTHER_NAME, MAX_OTHERS-1))
+    
+    # Check the number of entries in the retro changelog
+    time.sleep(1)
+    ents = topology.master.search_s(RETROCL_SUFFIX, ldap.SCOPE_ONELEVEL, "(objectclass=*)")
+    assert len(ents) == MAX_OTHERS
+    topology.master.log.info("Added entries are")
+    for ent in ents:
+        topology.master.log.info("%s" % ent.dn)
+
+def test_ticket47871_2(topology):
+    '''
+    Wait until there is just a last entries
+    '''
+    MAX_TRIES = 10
+    TRY_NO = 1
+    while TRY_NO <= MAX_TRIES:
+        time.sleep(6) # at least 1 trimming occurred
+        ents = topology.master.search_s(RETROCL_SUFFIX, ldap.SCOPE_ONELEVEL, "(objectclass=*)")
+        assert len(ents) <= MAX_OTHERS
+        topology.master.log.info("\nTry no %d it remains %d entries" % (TRY_NO, len(ents)))
+        for ent in ents:
+            topology.master.log.info("%s" % ent.dn)
+        if len(ents) > 1:
+            TRY_NO += 1
+        else:
+            break
+    assert TRY_NO <= MAX_TRIES
+    assert len(ents) <= 1
+        
+    
+
+def test_ticket47871_final(topology):
+    topology.master.stop(timeout=10)
+    topology.consumer.stop(timeout=10)
+
+def run_isolated():
+    '''
+        run_isolated is used to run these test cases independently of a test scheduler (xunit, py.test..)
+        To run isolated without py.test, you need to 
+            - edit this file and comment '@pytest.fixture' line before 'topology' function.
+            - set the installation prefix
+            - run this program
+    '''
+    global installation_prefix
+    installation_prefix =  None
+        
+    topo = topology(True)
+    test_ticket47871_init(topo)
+    test_ticket47871_1(topo)
+    test_ticket47871_2(topo)
+
+    test_ticket47871_final(topo)
+
+
+if __name__ == '__main__':
+    run_isolated()
+

+ 1 - 1
ldap/servers/plugins/retrocl/retrocl.h

@@ -62,7 +62,7 @@ typedef unsigned long changeNumber;
 typedef struct _cnum_result_t {
     int		crt_nentries;	/* number of entries returned from search */
     int		crt_err;	/* err returned from backend */
-    Slapi_Entry	*crt_entry;	/* The entry returned from the backend */
+    time_t	crt_time;	/* The changetime of the entry retrieved from the backend */
 } cnum_result_t;
 
 typedef struct _cnumRet {

+ 86 - 77
ldap/servers/plugins/retrocl/retrocl_trim.c

@@ -131,51 +131,68 @@ delete_changerecord( changeNumber cnum )
 }
 
 /*
- * Function: handle_getchangerecord_result
+ * Function: handle_getchangetime_result
  * Arguments: op - pointer to Operation struct for this operation
  *            err - error code returned from search
  * Returns: nothing
- * Description: result handler for get_changerecord().  Sets the crt_err
+ * Description: result handler for get_changetime().  Sets the crt_err
  *              field of the cnum_result_t struct to the error returned
  *              from the backend.
  */
 static void
-handle_getchangerecord_result( int err, void *callback_data )
+handle_getchangetime_result( int err, void *callback_data )
 {
     cnum_result_t *crt = callback_data;
 
     if ( crt == NULL ) {
 	slapi_log_error( SLAPI_LOG_FATAL, RETROCL_PLUGIN_NAME,
-		"handle_getchangerecord_result: callback_data NULL\n" );
+		"handle_getchangetime_result: callback_data NULL\n" );
     } else {
 	crt->crt_err = err;
     }
 }
 
 /*
- * Function: handle_getchangerecord_search
+ * Function: handle_getchangetime_search
  * Arguments: op - pointer to Operation struct for this operation
  *            e - entry returned by backend
  * Returns: 0 in all cases
- * Description: Search result operation handler for get_changerecord().
+ * Description: Search result operation handler for get_changetime().
  *              Sets fields in the cnum_result_t struct pointed to by
  *              op->o_handler_data.
  */
 static int
-handle_getchangerecord_search( Slapi_Entry *e, void *callback_data)
+handle_getchangetime_search( Slapi_Entry *e, void *callback_data)
 {
     cnum_result_t *crt = callback_data;
-
-    if ( crt == NULL ) {
-	slapi_log_error( SLAPI_LOG_FATAL, RETROCL_PLUGIN_NAME,
-		"handle_getchangerecord_search: op->o_handler_data NULL\n" );
-    } else if ( crt->crt_nentries > 0 ) {
-	/* only return the first entry, I guess */
-	slapi_log_error( SLAPI_LOG_FATAL, RETROCL_PLUGIN_NAME,
-		"handle_getchangerecord_search: multiple entries returned\n" );
+    int rc;
+    Slapi_Attr	*attr;
+
+    if (crt == NULL) {
+        slapi_log_error(SLAPI_LOG_FATAL, RETROCL_PLUGIN_NAME,
+                "handle_getchangetime_search: op->o_handler_data NULL\n");
+    } else if (crt->crt_nentries > 0) {
+        /* only return the first entry, I guess */
+        slapi_log_error(SLAPI_LOG_FATAL, RETROCL_PLUGIN_NAME,
+                "handle_getchangetime_search: multiple entries returned\n");
     } else {
-	crt->crt_nentries++;
-	crt->crt_entry = e;
+        crt->crt_nentries++;
+        crt->crt_time = 0;
+
+        if (NULL != e) {
+            Slapi_Value *sval = NULL;
+            const struct berval *val = NULL;
+            rc = slapi_entry_attr_find(e, attr_changetime, &attr);
+            /* Bug 624442: Logic checking for lack of timestamp was
+               reversed. */
+            if (0 != rc || slapi_attr_first_value(attr, &sval) == -1 ||
+                    (val = slapi_value_get_berval(sval)) == NULL ||
+                    NULL == val->bv_val) {
+                crt->crt_time = 0;
+            } else {
+                crt->crt_time = parse_localTime(val->bv_val);
+            }
+        }
     }
 
     return 0;
@@ -183,47 +200,52 @@ handle_getchangerecord_search( Slapi_Entry *e, void *callback_data)
 
 
 /*
- * Function: get_changerecord
+ * Function: get_changetime
  * Arguments: cnum - number of change record to retrieve
- * Returns: Pointer to an entry structure.  The caller must free the entry.
- *          If "err" is non-NULL, an error code is returned in the memory
- *          location it points to.
- * Description: Retrieve the change record entry whose number is "cnum".
+ * Returns: Taking the attr_changetime of the 'cnum' entry,
+ * it converts it into time_t (parse_localTime) and returns this time value.
+ * It returns 0 in the following cases:
+ *  - changerecord entry has not attr_changetime
+ *  - attr_changetime attribute has no value
+ *  - attr_changetime attribute value is empty
+ * 
+ * Description: Retrieve attr_changetime ("changetime") from a changerecord whose number is "cnum".
  */
-static Slapi_Entry *get_changerecord( changeNumber cnum, int *err )
+static time_t get_changetime( changeNumber cnum, int *err ) 
 {
-    cnum_result_t	crt, *crtp = &crt;
-    char		fstr[ 16 + CNUMSTR_LEN + 2 ];
+    cnum_result_t crt, *crtp = &crt;
+    char fstr[ 16 + CNUMSTR_LEN + 2 ];
     Slapi_PBlock *pb;
 
-    if ( cnum == 0UL ) {
-	if ( err != NULL ) {
-	    *err = LDAP_PARAM_ERROR;
-	}
-	return NULL;
+    if (cnum == 0UL) {
+        if (err != NULL) {
+            *err = LDAP_PARAM_ERROR;
+        }
+        return 0;
     }
-    crtp->crt_nentries = crtp->crt_err = 0; crtp->crt_entry = NULL;
-    PR_snprintf( fstr, sizeof(fstr), "%s=%ld", attr_changenumber, cnum );
-    
-    pb = slapi_pblock_new ();
-    slapi_search_internal_set_pb (pb, RETROCL_CHANGELOG_DN, 
-				  LDAP_SCOPE_SUBTREE, fstr,
-				  (char **)get_cleattrs(),  /* cast const */
-				  0 /* attrsonly */,
-				  NULL /* controls */, NULL /* uniqueid */,
-				  g_plg_identity[PLUGIN_RETROCL], 
-				  0 /* actions */);
-
-    slapi_search_internal_callback_pb (pb, crtp, 
-				       handle_getchangerecord_result, 
-				       handle_getchangerecord_search, NULL );
-    if ( err != NULL ) {
-	*err = crtp->crt_err;
+    crtp->crt_nentries = crtp->crt_err = 0;
+    crtp->crt_time = 0;
+    PR_snprintf(fstr, sizeof (fstr), "%s=%ld", attr_changenumber, cnum);
+
+    pb = slapi_pblock_new();
+    slapi_search_internal_set_pb(pb, RETROCL_CHANGELOG_DN,
+            LDAP_SCOPE_SUBTREE, fstr,
+            (char **) get_cleattrs(), /* cast const */
+            0 /* attrsonly */,
+            NULL /* controls */, NULL /* uniqueid */,
+            g_plg_identity[PLUGIN_RETROCL],
+            0 /* actions */);
+
+    slapi_search_internal_callback_pb(pb, crtp,
+            handle_getchangetime_result,
+            handle_getchangetime_search, NULL);
+    if (err != NULL) {
+        *err = crtp->crt_err;
     }
 
-    slapi_pblock_destroy (pb);
+    slapi_pblock_destroy(pb);
 
-    return( crtp->crt_entry );
+    return ( crtp->crt_time);
 }
 
 /*
@@ -241,7 +263,6 @@ static int trim_changelog(void)
     int			rc = 0, ldrc, done;
     time_t		now;
     changeNumber	first_in_log = 0, last_in_log = 0;
-    Slapi_Entry		*e = NULL;
     int			num_deleted = 0;
     int me,lt;
     
@@ -264,7 +285,6 @@ static int trim_changelog(void)
 
 	while ( !done && retrocl_trimming == 1 ) {
 	    int		did_delete;
-	    Slapi_Attr	*attr;
 
 	    did_delete = 0;
 	    first_in_log = retrocl_get_first_changenumber();
@@ -282,32 +302,21 @@ static int trim_changelog(void)
 		break;
 	    }
 	    if ( me > 0L ) {
-	        e = get_changerecord( first_in_log, &ldrc );
-		if ( NULL != e ) {
-		    Slapi_Value *sval = NULL;
-		    const struct berval *val = NULL;
-		    rc = slapi_entry_attr_find( e, attr_changetime, &attr );
-		    /* Bug 624442: Logic checking for lack of timestamp was
-		       reversed. */
-		    if ( 0 != rc  || slapi_attr_first_value( attr,&sval ) == -1 ||
-			    (val = slapi_value_get_berval ( sval )) == NULL ||
-				NULL == val->bv_val ) {
-			/* What to do if there's no timestamp? Just delete it. */
-		      retrocl_set_first_changenumber( first_in_log + 1 );
-		      ldrc = delete_changerecord( first_in_log );
-		      num_deleted++;
-		      did_delete = 1;
-		    } else {
-			time_t change_time = parse_localTime( val->bv_val );
-			if ( change_time + me < now ) {
-			    retrocl_set_first_changenumber( first_in_log + 1 );
-			    ldrc = delete_changerecord( first_in_log );
-			    num_deleted++;
-			    did_delete = 1;
-			}
-		    /* slapi_entry_free( e ); */ /* XXXggood should we be freeing this? */
-		    }
-		}
+            time_t change_time = get_changetime(first_in_log, &ldrc);
+            if (change_time) {
+                if ((change_time + me) < now) {
+                    retrocl_set_first_changenumber(first_in_log + 1);
+                    ldrc = delete_changerecord(first_in_log);
+                    num_deleted++;
+                    did_delete = 1;
+                }
+            } else {
+                /* What to do if there's no timestamp? Just delete it. */
+                retrocl_set_first_changenumber(first_in_log + 1);
+                ldrc = delete_changerecord(first_in_log);
+                num_deleted++;
+                did_delete = 1;
+             }   
 	    }
 	    if ( !did_delete ) {
 		done = 1;