From: Gilles Detillieux To: htdig@htdig.org Subject: [htdig] Patch This patch turns the maximum word length into a run-time option, rather than compile-time. --- htdig-3.1.2.bak/include/htconfig.h.in Wed Apr 21 21:47:58 1999 +++ htdig-3.1.2/include/htconfig.h.in Wed Aug 4 10:43:33 1999 @@ -5,7 +5,6 @@ #define _config_h_ #define VERSION 1 -#define MAX_WORD_LENGTH 12 /* Define if on AIX 3. System headers sometimes define this. --- htdig-3.1.2.bak/htcommon/WordReference.h Wed Apr 21 21:47:57 1999 +++ htdig-3.1.2/htcommon/WordReference.h Wed Aug 4 10:44:12 1999 @@ -25,7 +25,7 @@ public: WordReference() {} ~WordReference() {} - char Word[MAX_WORD_LENGTH + 1]; + String Word; int WordCount; int Weight; int Location; --- htdig-3.1.2.bak/htcommon/WordList.cc Wed Apr 21 21:47:57 1999 +++ htdig-3.1.2/htcommon/WordList.cc Wed Aug 4 12:22:31 1999 @@ -46,11 +46,12 @@ void WordList::Word(char *word, int loca if (weight_factor == 0.0) // Why should we add words with no weight? return; String shortword = word; + static int maximum_word_length = config.Value("maximum_word_length", 12); shortword.lowercase(); word = shortword.get(); - if (shortword.length() > MAX_WORD_LENGTH) - word[MAX_WORD_LENGTH] = '\0'; + if (shortword.length() > maximum_word_length) + word[maximum_word_length] = '\0'; if (!valid_word(word)) return; @@ -80,7 +81,7 @@ void WordList::Word(char *word, int loca wordRef->DocumentID = docID; wordRef->Weight = int((1000 - location) * weight_factor); wordRef->Anchor = anchor_number; - strcpy(wordRef->Word, word); + wordRef->Word = word; words->Add(word, wordRef); } } @@ -145,7 +146,7 @@ void WordList::Flush() while ((wordRef = (WordReference *) words->Get_NextElement())) { - fprintf(fl, "%s",wordRef->Word); + fprintf(fl, "%s",wordRef->Word.get()); fprintf(fl, "\ti:%d\tl:%d\tw:%d", wordRef->DocumentID, wordRef->Location, @@ -220,15 +221,16 @@ void WordList::BadWordFile(char *filenam char buffer[1000]; char *word; String new_word; - int minimum_word_length = config.Value("minimum_word_length", 3); + static int minimum_word_length = config.Value("minimum_word_length", 3); + static int maximum_word_length = config.Value("maximum_word_length", 12); while (fl && fgets(buffer, sizeof(buffer), fl)) { word = strtok(buffer, "\r\n \t"); if (word && *word) { - if (strlen(word) > MAX_WORD_LENGTH) - word[MAX_WORD_LENGTH] = '\0'; + if (strlen(word) > maximum_word_length) + word[maximum_word_length] = '\0'; new_word = word; // We need to clean it up before we add it new_word.lowercase(); // Just in case someone enters an odd one HtStripPunctuation(new_word); --- htdig-3.1.2.bak/htcommon/DocumentRef.cc Wed Apr 21 21:47:57 1999 +++ htdig-3.1.2/htcommon/DocumentRef.cc Wed Aug 4 10:45:30 1999 @@ -571,8 +571,7 @@ void DocumentRef::AddDescription(char *d static double description_factor = config.Double("description_factor"); static int max_descriptions = config.Value("max_descriptions", 5); - // Not restricted to this size, just used as a hint. - String word(MAX_WORD_LENGTH); + String word; while (*p) { --- htdig-3.1.2.bak/htcommon/defaults.cc Wed Apr 21 21:47:57 1999 +++ htdig-3.1.2/htcommon/defaults.cc Wed Aug 4 10:47:44 1999 @@ -89,6 +89,7 @@ ConfigDefaults defaults[] = {"max_prefix_matches", "1000"}, {"max_stars", "4"}, {"maximum_pages", "10"}, + {"maximum_word_length", "12"}, {"metaphone_db", "${database_base}.metaphone.db"}, {"meta_description_factor", "50"}, {"method_names", "and All or Any boolean Boolean"}, --- htdig-3.1.2.bak/htsearch/parser.cc Wed Apr 21 21:47:58 1999 +++ htdig-3.1.2/htsearch/parser.cc Wed Aug 4 10:50:41 1999 @@ -202,6 +202,7 @@ Parser::setError(char *expected) void Parser::perform_push() { + static int maximum_word_length = config.Value("maximum_word_length", 12); String temp = current->word.get(); String data; char *p; @@ -220,8 +221,8 @@ Parser::perform_push() } temp.lowercase(); p = temp.get(); - if (temp.length() > MAX_WORD_LENGTH) - p[MAX_WORD_LENGTH] = '\0'; + if (temp.length() > maximum_word_length) + p[maximum_word_length] = '\0'; if (dbf->Get(p, data) == OK) { p = data.get(); --- htdig-3.1.2.bak/htdoc/attrs.html Wed Apr 21 21:47:57 1999 +++ htdig-3.1.2/htdoc/attrs.html Wed Aug 4 10:58:59 1999 @@ -3124,6 +3124,51 @@
+ + maximum_word_length +
+
+
+
+ type: +
+
+ number +
+
+ used by: +
+
+ htdig and + htsearch +
+
+ default: +
+
+ 12 +
+
+ description: +
+
+ This sets the maximum length of words that will be + indexed. Words longer than this value will be silently + truncated when put into the index, or searched in the + index. +
+
+ example: +
+
+ maximum_word_length: 15 +
+
+
+
+
+
+
meta_description_factor
--- htdig-3.1.2.bak/htdoc/cf_byname.html Wed Apr 21 21:47:57 1999 +++ htdig-3.1.2/htdoc/cf_byname.html Wed Aug 4 10:59:30 1999 @@ -96,6 +96,7 @@ * max_prefix_matches
* max_stars
* maximum_pages
+ * maximum_word_length
* meta_description_factor
* metaphone_db
* method_names
--- htdig-3.1.2.bak/htdoc/cf_byprog.html Wed Apr 21 21:47:57 1999 +++ htdig-3.1.2/htdoc/cf_byprog.html Wed Aug 4 11:00:31 1999 @@ -54,6 +54,7 @@ * max_head_length
* max_hop_count
* max_meta_description_length
+ * maximum_word_length
* meta_description_factor
* minimum_word_length
* modification_time_is_now
@@ -132,6 +133,7 @@ * max_prefix_matches
* max_stars
* maximum_pages
+ * maximum_word_length
* method_names
* minimum_prefix_length
* minimum_word_length