Date: Tue, 08 Aug 2000 00:18:48 +0200 From: Lorenzo Campedelli To: htdig3-dev Subject: [htdig3-dev] PATCH : Reduce memory usage. --- htdig-3.2.0b2/htmerge/db.cc.orig Wed Apr 12 00:53:21 2000 +++ htdig-3.2.0b2/htmerge/db.cc Tue Jun 20 23:08:47 2000 @@ -18,6 +18,55 @@ #include "htmerge.h" #include "good_strtok.h" +class CallbackData : public Object +{ +public: + CallbackData(HtWordList * w, Dictionary * d, int o) + { word_db = w; dup_ids = d; docIDOffset = o; } + + HtWordList * word_db; + Dictionary * dup_ids; + int docIDOffset; +}; + + +int +OverrideCallback(WordList * wl, + WordDBCursor &, + const WordReference * w, + Object & d) +{ + CallbackData & data = ((CallbackData &)d); + HtWordReference * ht_wr = (HtWordReference *)w; + String docIDKey; + + docIDKey << ht_wr->DocID(); + if (!((data.dup_ids)->Exists(docIDKey))) + { + ht_wr->DocID(ht_wr->DocID() + data.docIDOffset); + (data.word_db)->Override(*ht_wr); + } + + return OK; +} + +int +DeleteCallback(WordList * wl, + WordDBCursor &, + const WordReference * w, + Object & d) +{ + CallbackData & data = ((CallbackData &)d); + HtWordReference * ht_wr = (HtWordReference *)w; + String docIDKey; + + docIDKey << ht_wr->DocID(); + if ((data.dup_ids)->Exists(docIDKey)) + (data.word_db)->Delete(*ht_wr); + + return OK; +} + //***************************************************************************** // void mergeDB() // @@ -143,8 +192,6 @@ // OK, after merging the doc DBs, we do the same for the words HtWordList mergeWordDB(config), wordDB(config); - List *words; - String docIDKey; if (wordDB.Open(config["word_db"], O_RDWR) < 0) { @@ -160,31 +207,18 @@ // Start the merging by going through all the URLs that are in // the database to be merged - - words = mergeWordDB.WordRefs(); - words->Start_Get(); - HtWordReference *word; - while ((word = (HtWordReference *) words->Get_Next())) { - docIDKey = word->DocID(); - if (merge_dup_ids.Exists(docIDKey)) - continue; - - word->DocID(word->DocID() + docIDOffset); - wordDB.Override(*word); + CallbackData data(&wordDB, &merge_dup_ids, docIDOffset); + WordSearchDescription description(OverrideCallback, (Object *)&data); + mergeWordDB.Walk(description); } - delete words; - words = wordDB.WordRefs(); - words->Start_Get(); - while ((word = (HtWordReference *) words->Get_Next())) { - docIDKey = word->DocID(); - if (db_dup_ids.Exists(docIDKey)) - wordDB.Delete(*word); + CallbackData data(&wordDB, &db_dup_ids, 0); + WordSearchDescription description(DeleteCallback, (Object *)&data); + wordDB.Walk(description); } - delete words; // Cleanup--just close the two word databases mergeWordDB.Close();