Explorar o código

Ticket 47756 - Improve import logging and abort processing

Description:  When importing a heavily nested LDIF file, the numsobordinates
              and ancestorID creation can take a very long time.  The current
              logging doesn't state that it started working on this areas, and
              it can appear that the import is hung, when in fact is it working.

              Also, there are a few places, when an error occurs(e.g. out of diskspace)
              where the the import will hang, and aborting the import doesn't work,
              and shutting the server down will also hang, and then it has to be killed.

              Added the missing "abort" checks in some loops, and also improved
              the import logging(progress stats, etc) for numbsubordinates and
              ancestorid creation.

https://fedorahosted.org/389/ticket/47756

Revewed by: nhosoi(Thanks!)
Mark Reynolds %!s(int64=11) %!d(string=hai) anos
pai
achega
67ba61b0f3

+ 74 - 22
ldap/servers/slapd/back-ldbm/ancestorid.c

@@ -42,6 +42,7 @@
 
 
 #include "back-ldbm.h"
+#include "import.h"
 
 static char *sourcefile = LDBM_ANCESTORID_STR;
 
@@ -70,10 +71,10 @@ static void id2idl_hash_destroy(id2idl_hash *ht);
 static int ldbm_parentid(backend *be, DB_TXN *txn, ID id, ID *ppid);
 static int check_cache(id2idl_hash *ht);
 static IDList *idl_union_allids(backend *be, struct attrinfo *ai, IDList *a, IDList *b);
-static int ldbm_ancestorid_default_create_index(backend *be);
-static int ldbm_ancestorid_new_idl_create_index(backend *be);
+static int ldbm_ancestorid_default_create_index(backend *be, ImportJob *job);
+static int ldbm_ancestorid_new_idl_create_index(backend *be, ImportJob *job);
 
-static int ldbm_get_nonleaf_ids(backend *be, DB_TXN *txn, IDList **idl)
+static int ldbm_get_nonleaf_ids(backend *be, DB_TXN *txn, IDList **idl, ImportJob *job)
 {
     int ret = 0;
     DB *db    = NULL;
@@ -83,6 +84,8 @@ static int ldbm_get_nonleaf_ids(backend *be, DB_TXN *txn, IDList **idl)
     struct attrinfo *ai = NULL;
     IDList *nodes = NULL;
     ID id;
+    int started_progress_logging = 0;
+    int key_count = 0;
 
     /* Open the parentid index */
     ainfo_get( be, LDBM_PARENTID_STR, &ai );
@@ -100,7 +103,7 @@ static int ldbm_get_nonleaf_ids(backend *be, DB_TXN *txn, IDList **idl)
         ldbm_nasty(sourcefile,13020,ret);
         goto out;
     }
-
+    import_log_notice(job, "Gathering ancestorid non-leaf IDs...");
     /* For each key which is an equality key */
     do {
         ret = dbc->c_get(dbc,&key,&data,DB_NEXT_NODUP);
@@ -108,8 +111,21 @@ static int ldbm_get_nonleaf_ids(backend *be, DB_TXN *txn, IDList **idl)
             id = (ID) strtoul((char*)key.data+1, NULL, 10);
             idl_insert(&nodes, id);
         }
-    } while (ret == 0);
+        key_count++;
+        if(job && !(key_count % PROGRESS_INTERVAL)){
+            import_log_notice(job, "Gathering ancestorid non-leaf IDs: processed %d%% (ID count %d)",
+                    (key_count * 100 / job->numsubordinates), key_count);
+            started_progress_logging = 1;
+        }
+    } while (ret == 0 && !(job->flags & FLAG_ABORT));
+
+    if(started_progress_logging){
+        /* finish what we started logging */
+        import_log_notice(job, "Gathering ancestorid non-leaf IDs: processed %d%% (ID count %d)",
+                (key_count * 100 / job->numsubordinates), key_count);
 
+    }
+    import_log_notice(job, "Finished gathering ancestorid non-leaf IDs.");
     /* Check for success */
     if (ret == DB_NOTFOUND) ret = 0;
     if (ret != 0) ldbm_nasty(sourcefile,13030,ret);
@@ -158,11 +174,11 @@ static int ldbm_get_nonleaf_ids(backend *be, DB_TXN *txn, IDList **idl)
  *   (guaranteed after a database import but not after a subtree move)
  *
  */
-int ldbm_ancestorid_create_index(backend *be)
+int ldbm_ancestorid_create_index(backend *be, ImportJob *job)
 {
 	return (idl_get_idl_new()) ?
-		ldbm_ancestorid_new_idl_create_index(be) :
-	    ldbm_ancestorid_default_create_index(be);
+		ldbm_ancestorid_new_idl_create_index(be, job) :
+	    ldbm_ancestorid_default_create_index(be, job);
 }
 
 /*
@@ -171,8 +187,9 @@ int ldbm_ancestorid_create_index(backend *be)
  * quite a bit slower than ldbm_ancestorid_new_idl_create_index()
  * when the new mode is used, particularly with large databases.
  */
-static int ldbm_ancestorid_default_create_index(backend *be)
+static int ldbm_ancestorid_default_create_index(backend *be, ImportJob *job)
 {
+    int key_count = 0;
     int ret = 0;
     DB *db_pid    = NULL;
     DB *db_aid    = NULL;
@@ -187,6 +204,7 @@ static int ldbm_ancestorid_default_create_index(backend *be)
     ID id, parentid;
     id2idl_hash *ht = NULL;
     id2idl *ididl;
+    int started_progress_logging = 0;
 
     /*
      * We need to iterate depth-first through the non-leaf nodes
@@ -199,10 +217,8 @@ static int ldbm_ancestorid_default_create_index(backend *be)
      * correct order.
      */
 
-    LDAPDebug(LDAP_DEBUG_TRACE, "Creating ancestorid index\n", 0,0,0);
-
     /* Get the non-leaf node IDs */
-    ret = ldbm_get_nonleaf_ids(be, txn, &nodes);
+    ret = ldbm_get_nonleaf_ids(be, txn, &nodes, job);
     if (ret != 0) return ret;
 
     /* Get the ancestorid index */
@@ -243,6 +259,7 @@ static int ldbm_ancestorid_default_create_index(backend *be)
     key.ulen = sizeof(keybuf);
     key.flags = DB_DBT_USERMEM;
 
+    import_log_notice(job, "Creating ancestorid index (old idl)...");
     /* Iterate from highest to lowest ID */
     nids = nodes->b_nids;
     do {
@@ -261,6 +278,20 @@ static int ldbm_ancestorid_default_create_index(backend *be)
             break;
         }
 
+        /* check if we need to abort */
+        if(job->flags & FLAG_ABORT){
+            import_log_notice(job, "ancestorid creation aborted.");
+            ret = -1;
+            break;
+        }
+
+        key_count++;
+        if(job && !(key_count % PROGRESS_INTERVAL)){
+        	import_log_notice(job, "Creating ancestorid index: processed %d%% (ID count %d)",
+        	        (key_count * 100 / job->numsubordinates), key_count);
+        	 started_progress_logging = 1;
+        }
+
         /* Insert into ancestorid for this node */
         if (id2idl_hash_lookup(ht, &id, &ididl)) {
             descendants = idl_union_allids(be, ai_aid, ididl->idl, children);
@@ -314,11 +345,6 @@ static int ldbm_ancestorid_default_create_index(backend *be)
     ret = check_cache(ht);
 
  out:
-    if (ret == 0) {
-        LDAPDebug(LDAP_DEBUG_TRACE, "Created ancestorid index\n", 0,0,0);
-    } else {
-        LDAPDebug(LDAP_DEBUG_ANY, "Failed to create ancestorid index\n", 0,0,0);
-    }
 
     /* Destroy the cache */
     id2idl_hash_destroy(ht);
@@ -338,6 +364,12 @@ static int ldbm_ancestorid_default_create_index(backend *be)
 
     /* Enable the index */
     if (ret == 0) {
+        if(started_progress_logging){
+            /* finish what we started logging */
+            import_log_notice(job, "Creating ancestorid index: processed %d%% (ID count %d)",
+                    (key_count * 100 / job->numsubordinates), key_count);
+        }
+        import_log_notice(job, "Created ancestorid index (old idl).");
         ai_aid->ai_indexmask &= ~INDEX_OFFLINE;
     }
 
@@ -351,8 +383,9 @@ static int ldbm_ancestorid_default_create_index(backend *be)
  * ldbm_ancestorid_default_create_index(), particularly on
  * large databases.  Cf. bug 469800.
  */
-static int ldbm_ancestorid_new_idl_create_index(backend *be)
+static int ldbm_ancestorid_new_idl_create_index(backend *be, ImportJob *job)
 {
+    int key_count = 0;
     int ret = 0;
     DB *db_pid    = NULL;
     DB *db_aid    = NULL;
@@ -365,6 +398,7 @@ static int ldbm_ancestorid_new_idl_create_index(backend *be)
     IDList *children = NULL;
     NIDS nids;
     ID id, parentid;
+    int started_progress_logging = 0;
 
     /*
      * We need to iterate depth-first through the non-leaf nodes
@@ -377,8 +411,6 @@ static int ldbm_ancestorid_new_idl_create_index(backend *be)
      * correct order.
      */
 
-    LDAPDebug(LDAP_DEBUG_TRACE, "Creating ancestorid index\n", 0,0,0);
-
 	/* Bail now if we did not get here honestly. */
 	if (!idl_get_idl_new()) {
 		LDAPDebug(LDAP_DEBUG_ANY, "Cannot create ancestorid index.  " 
@@ -387,7 +419,7 @@ static int ldbm_ancestorid_new_idl_create_index(backend *be)
 	}
 
     /* Get the non-leaf node IDs */
-    ret = ldbm_get_nonleaf_ids(be, txn, &nodes);
+    ret = ldbm_get_nonleaf_ids(be, txn, &nodes, job);
     if (ret != 0) return ret;
 
     /* Get the ancestorid index */
@@ -425,6 +457,7 @@ static int ldbm_ancestorid_new_idl_create_index(backend *be)
     key.ulen = sizeof(keybuf);
     key.flags = DB_DBT_USERMEM;
 
+    import_log_notice(job, "Creating ancestorid index (new idl)...");
     /* Iterate from highest to lowest ID */
     nids = nodes->b_nids;
     do {
@@ -443,6 +476,20 @@ static int ldbm_ancestorid_new_idl_create_index(backend *be)
             break;
         }
 
+        /* check if we need to abort */
+        if(job->flags & FLAG_ABORT){
+            import_log_notice(job, "ancestorid creation aborted.");
+            ret = -1;
+            break;
+        }
+
+        key_count++;
+        if(job && !(key_count % PROGRESS_INTERVAL)){
+            import_log_notice(job, "Creating ancestorid index: progress %d%% (ID count %d)",
+                    (key_count * 100 / job->numsubordinates), key_count);
+            started_progress_logging = 1;
+        }
+
 		/* Instead of maintaining a full accounting of IDs in a hashtable
 		 * as is done with ldbm_ancestorid_default_create_index(), perform
 		 * incremental updates straight to the DB with idl_new_store_block()
@@ -498,7 +545,12 @@ static int ldbm_ancestorid_new_idl_create_index(backend *be)
 
  out:
     if (ret == 0) {
-        LDAPDebug(LDAP_DEBUG_TRACE, "Created ancestorid index\n", 0,0,0);
+        if(started_progress_logging){
+            /* finish what we started logging */
+            import_log_notice(job, "Creating ancestorid index: processed %d%% (ID count %d)",
+                    (key_count * 100 / job->numsubordinates), key_count);
+        }
+        import_log_notice(job, "Created ancestorid index (new idl).");
     } else {
         LDAPDebug(LDAP_DEBUG_ANY, "Failed to create ancestorid index\n", 0,0,0);
     }

+ 5 - 4
ldap/servers/slapd/back-ldbm/import-threads.c

@@ -741,7 +741,7 @@ import_producer(void *param)
         job->fifo.item[idx].esize = newesize;
 
         /* Add the entry size to total fifo size */
-        job->fifo.c_bsize += ep->ep_entry? job->fifo.item[idx].esize : 0;
+        job->fifo.c_bsize += ep->ep_entry ? job->fifo.item[idx].esize : 0;
 
         /* Update the job to show our progress */
         job->lead_ID = id;
@@ -2201,16 +2201,17 @@ import_wait_for_space_in_fifo(ImportJob *job, size_t new_esize)
     sleeptime = PR_MillisecondsToInterval(import_sleep_time);
 
     /* Now check if fifo has enough space for the new entry */
-    while ((job->fifo.c_bsize + new_esize) > job->fifo.bsize) {
+    while ((job->fifo.c_bsize + new_esize) > job->fifo.bsize && !(job->flags & FLAG_ABORT)) {
         for ( i = 0, slot_found = 0 ; i < job->fifo.size ; i++ ) {
             temp_ep = job->fifo.item[i].entry;
             if (temp_ep) {
                 if (temp_ep->ep_refcnt == 0 && temp_ep->ep_id <= job->ready_EID) {
                     job->fifo.item[i].entry = NULL;
-                    if (job->fifo.c_bsize > job->fifo.item[i].esize)
+                    if (job->fifo.c_bsize > job->fifo.item[i].esize){
                         job->fifo.c_bsize -= job->fifo.item[i].esize;
-                    else
+                    } else {
                         job->fifo.c_bsize = 0;
+                    }
                     backentry_free(&temp_ep);
                     slot_found = 1;
                 }

+ 5 - 4
ldap/servers/slapd/back-ldbm/import.c

@@ -1378,9 +1378,10 @@ int import_main_offline(void *arg)
     import_log_notice(job, "Indexing complete.  Post-processing...");
     /* Now do the numsubordinates attribute */
     /* [610066] reindexed db cannot be used in the following backup/restore */
+    import_log_notice(job, "Generating numsubordinates (this may take several minutes to complete)...");
     if ( (!(job->flags & FLAG_REINDEXING) || (job->flags & FLAG_DN2RDN)) &&
-         (ret = update_subordinatecounts(be, job->mothers, job->encrypt, NULL))
-         != 0 ) {
+         (ret = update_subordinatecounts(be, job, NULL)) != 0 )
+    {
         import_log_notice(job, "Failed to update numsubordinates attributes");
         goto error;
     }
@@ -1390,10 +1391,10 @@ int import_main_offline(void *arg)
         /* And the ancestorid index */
         /* Creating ancestorid from the scratch; delete the index file first. */
         struct attrinfo *ai = NULL;
+
         ainfo_get(be, "ancestorid", &ai);
         dblayer_erase_index_file(be, ai, 0);
- 
-        if ((ret = ldbm_ancestorid_create_index(be)) != 0) {
+        if ((ret = ldbm_ancestorid_create_index(be, job)) != 0) {
             import_log_notice(job, "Failed to create ancestorid index");
             goto error;
         }

+ 8 - 0
ldap/servers/slapd/back-ldbm/import.h

@@ -162,6 +162,7 @@ typedef struct {
     int encrypt;
     Slapi_Value *usn_value;     /* entryusn for import */
     FILE *upgradefd;            /* used for the upgrade */
+    int numsubordinates;
 } ImportJob;
 
 #define FLAG_INDEX_ATTRS	0x01	/* should we index the attributes? */
@@ -211,6 +212,9 @@ struct _import_worker_info {
 #define ABORTED    0x8
 #define QUIT       0x10  /* quit intentionally.
                           * introduced to distinguish from ABORTED, FINISHED */
+
+#define PROGRESS_INTERVAL 100000 /* number of IDs processed that triggers an update */
+
 #define CORESTATE  0xff
 #define DN_NORM    0x100 /* do dn normalization in upgrade */
 #define DN_NORM_SP 0x200 /* do dn normalization for multi spaces in upgrade */
@@ -247,6 +251,7 @@ void reset_progress( void );
 void report_progress( int count, int done );
 int add_op_attrs(Slapi_PBlock *pb, struct ldbminfo *li, struct backentry *ep,
 		 int *status);
+int update_subordinatecounts(backend *be, ImportJob *job, DB_TXN *txn);
 
 /* import-threads.c */
 void import_producer(void *param);
@@ -254,3 +259,6 @@ void index_producer(void *param);
 void upgradedn_producer(void *param);
 void import_foreman(void *param);
 void import_worker(void *param);
+
+/* ancestorid.c */
+int ldbm_ancestorid_create_index(backend *be, ImportJob *job);

+ 26 - 3
ldap/servers/slapd/back-ldbm/ldif2ldbm.c

@@ -505,9 +505,12 @@ static int import_subcount_trawl(backend *be,
  * Returns: Nothing
  * 
  */
-int update_subordinatecounts(backend *be, import_subcount_stuff *mothers,
-                             int isencrypted, DB_TXN *txn)
+int update_subordinatecounts(backend *be, ImportJob *job, DB_TXN *txn)
 {
+    import_subcount_stuff *mothers = job->mothers;
+    int isencrypted = job->encrypt;
+    int started_progress_logging = 0;
+    int key_count = 0;
     int ret = 0;
     DB *db    = NULL;
     DBC *dbc  = NULL; 
@@ -524,7 +527,6 @@ int update_subordinatecounts(backend *be, import_subcount_stuff *mothers,
         ldbm_nasty(sourcefile,67,ret);
         return(ret);
     }
-
     /* Get a cursor so we can walk through the parentid */
     ret = db->cursor(db,txn,&dbc,0);
     if (ret != 0 ) {
@@ -557,6 +559,21 @@ int update_subordinatecounts(backend *be, import_subcount_stuff *mothers,
             }
             break;
         }
+        /* check if we need to abort */
+        if(job->flags & FLAG_ABORT){
+            import_log_notice(job, "numsubordinate generation aborted.");
+            break;
+        }
+        /*
+         * Do an update count
+         */
+        key_count++;
+        if(!(key_count % PROGRESS_INTERVAL)){
+            import_log_notice(job, "numsubordinate generation: processed %d entries...",
+                    key_count);
+            started_progress_logging = 1;
+        }
+
         if (*(char*)key.data == EQ_PREFIX) {
             char *idptr = NULL;
     
@@ -606,6 +623,12 @@ int update_subordinatecounts(backend *be, import_subcount_stuff *mothers,
             key.data = NULL;
         }
     }
+    if(started_progress_logging){
+        /* Finish what we started... */
+        import_log_notice(job, "numsubordinate generation: processed %d entries.",
+                key_count);
+        job->numsubordinates = key_count;
+    }
 
     ret = dbc->c_close(dbc);
     if (0 != ret) {

+ 1 - 2
ldap/servers/slapd/back-ldbm/proto-back-ldbm.h

@@ -44,6 +44,7 @@
 #ifndef _PROTO_BACK_LDBM
 #define _PROTO_BACK_LDBM
 
+
 /*
  * attr.c
  */
@@ -343,7 +344,6 @@ int import_subcount_mother_init(import_subcount_stuff *mothers,ID parent_id, siz
 int import_subcount_mother_count(import_subcount_stuff *mothers,ID parent_id);
 void import_subcount_stuff_init(import_subcount_stuff *stuff);
 void import_subcount_stuff_term(import_subcount_stuff *stuff);
-int update_subordinatecounts(backend *be,import_subcount_stuff *mothers, int isencrypted, DB_TXN *txn);
 void import_configure_index_buffer_size(size_t size);
 size_t import_get_index_buffer_size();
 int ldbm_back_fetch_incl_excl(Slapi_PBlock *pb, char ***include,
@@ -665,7 +665,6 @@ void replace_ldbm_config_value(char *conftype, char *val, struct ldbminfo *li);
 /*
  * ancestorid.c
  */
-int ldbm_ancestorid_create_index(backend *be);
 int ldbm_ancestorid_index_entry(backend *be, struct backentry *e, int flags, back_txn *txn);
 int ldbm_ancestorid_read(backend *be, back_txn *txn, ID id, IDList **idl);
 int ldbm_ancestorid_read_ext(backend *be, back_txn *txn, ID id, IDList **idl, int allidslimit);