From grdetil@scrc.umanitoba.ca Thu Jul 26 13:55:19 2001 Date: Thu, 26 Jul 2001 15:31:55 -0500 (CDT) From: Gilles Detillieux To: "ht://Dig mailing list" Subject: [htdig] PATCH: new boolean_keywords patch for 3.1.5 Hi, folks. Here's an updated version of the bool_keywords.1 patch for 3.1.5. This one adds two new attributes, boolean_keywords and boolean_syntax_errors, so you can fully internationalize the htsearch user interface. I've already committed this code to the 3.1.6 CVS tree, so it'll be in the next snapshot, but I'm posting it here for two reasons: 1) to encourage more testing of these changes (or the upcoming snapshot which has all sorts of other goodies), and 2) to ask for someone to port these changes to 3.2.0b4. As usual, apply with "patch -p0 < this-message". Comments are welcome, but after tomorrow, I won't be checking my e-mail for 3 weeks. --- htcommon/defaults.cc.orig Thu Feb 24 20:29:10 2000 +++ htcommon/defaults.cc Thu Jul 26 12:40:37 2001 @@ -36,6 +36,8 @@ ConfigDefaults defaults[] = {"bad_extensions", ".wav .gz .z .sit .au .zip .tar .hqx .exe .com .gif .jpg .jpeg .aiff .class .map .ram .tgz .bin .rpm .mpg .mov .avi"}, {"bad_querystr", ""}, {"bad_word_list", "${common_dir}/bad_words"}, + {"boolean_keywords", "and or not"}, + {"boolean_syntax_errors", "Expected 'a search word' 'at the end' 'instead of' 'end of expression'"}, {"build_select_lists", ""}, {"case_sensitive", "true"}, {"common_url_parts", "http:// http://www. ftp:// ftp://ftp. /pub/ .html .htm .gif .jpg .jpeg /index.html /index.htm .com/ .com mailto:"}, --- htsearch/htsearch.cc.orig Thu Feb 24 20:29:11 2000 +++ htsearch/htsearch.cc Wed Jul 25 23:18:10 2001 @@ -46,6 +46,7 @@ void usage(); int debug = 0; int minimum_word_length = 3; +StringList boolean_keywords; //***************************************************************************** @@ -201,6 +202,14 @@ main(int ac, char **av) reportError(form("Invalid url_part_aliases or common_url_parts: %s", url_part_errors.get())); + // Load boolean_keywords from configuration + // they should be placed in this order: + // 0 1 2 + // and or not + boolean_keywords.Create(config["boolean_keywords"], "| \t\r\n\001"); + if (boolean_keywords.Count() != 3) + reportError("boolean_keywords attribute is not correctly specified"); + Parser *parser = new Parser(); // @@ -305,11 +314,11 @@ createLogicalWords(List &searchWords, St if (!ww->isHidden) { if (strcmp(ww->word, "&") == 0 && wasHidden == 0) - logicalWords << " and "; + logicalWords << " " << boolean_keywords[0] << " "; else if (strcmp(ww->word, "|") == 0 && wasHidden == 0) - logicalWords << " or "; + logicalWords << " " << boolean_keywords[1] << " "; else if (strcmp(ww->word, "!") == 0 && wasHidden == 0) - logicalWords << " not "; + logicalWords << " " << boolean_keywords[2] << " "; else if (wasHidden == 0) { logicalWords << ww->word; @@ -409,15 +418,15 @@ setupWords(char *allWords, List &searchW pos--; word.lowercase(); - if (boolean && mystrcasecmp(word.get(), "and") == 0) + if (boolean && mystrcasecmp(word.get(), boolean_keywords[0]) == 0) { tempWords.Add(new WeightWord("&", -1.0)); } - else if (boolean && mystrcasecmp(word.get(), "or") == 0) + else if (boolean && mystrcasecmp(word.get(), boolean_keywords[1]) == 0) { tempWords.Add(new WeightWord("|", -1.0)); } - else if (boolean && mystrcasecmp(word.get(), "not") == 0) + else if (boolean && mystrcasecmp(word.get(), boolean_keywords[2]) == 0) { tempWords.Add(new WeightWord("!", -1.0)); } --- htsearch/parser.cc.orig Wed Sep 1 15:14:08 1999 +++ htsearch/parser.cc Thu Jul 26 13:27:06 2001 @@ -9,10 +9,14 @@ static char RCSid[] = "$Id: parser.cc,v #endif #include "parser.h" +#include "QuotedStringList.h" #define WORD 1000 #define DONE 1001 +extern StringList boolean_keywords; +QuotedStringList boolean_syntax_errors; + //***************************************************************************** Parser::Parser() @@ -33,6 +37,14 @@ Parser::Parser() int Parser::checkSyntax(List *tokenList) { + void reportError(char *); + // Load boolean_syntax_errors from configuration + // they should be placed in this order: + // 0 1 2 3 4 + // Expected "a search word" "at the end" "instead of" "end of expression" + boolean_syntax_errors.Create(config["boolean_syntax_errors"], "| \t\r\n\001"); + if (boolean_syntax_errors.Count() != 5) + reportError("boolean_syntax_errors attribute is not correctly specified"); tokens = tokenList; valid = 1; fullexpr(0); @@ -48,7 +60,7 @@ Parser::fullexpr(int output) expr(output); if (valid && lookahead != DONE) { - setError("end of expression"); + setError(boolean_syntax_errors[4]); } } @@ -100,7 +112,10 @@ Parser::expr(int output) } if (valid && lookahead == WORD) { - setError("'AND' or 'OR'"); + String expected = "'"; + expected << boolean_keywords[0] << "' " << boolean_keywords[1] << " '" + << boolean_keywords[1] << "'"; + setError(expected.get()); } } @@ -152,7 +167,7 @@ Parser::factor(int output) } else { - setError("a search word"); + setError(boolean_syntax_errors[1]); } } @@ -177,20 +192,26 @@ Parser::setError(char *expected) { valid = 0; error = 0; - error << "Expected " << expected; + error << boolean_syntax_errors[0] << ' ' << expected; if (lookahead == DONE || !current) { - error << " at the end"; + error << ' ' << boolean_syntax_errors[2]; } else { - error << " instead of '" << current->word.get(); - error << '\''; + error << ' ' << boolean_syntax_errors[3] << " '" + << current->word.get() << "'"; switch (lookahead) { - case '&': error << " or 'AND'"; break; - case '|': error << " or 'OR'"; break; - case '!': error << " or 'NOT'"; break; + case '&': error << ' ' << boolean_keywords[1] << " '" + << boolean_keywords[0] << "'"; + break; + case '|': error << ' ' << boolean_keywords[1] << " '" + << boolean_keywords[1] << "'"; + break; + case '!': error << ' ' << boolean_keywords[1] << " '" + << boolean_keywords[2] << "'"; + break; } } } @@ -407,9 +428,11 @@ Parser::parse(List *tokenList, ResultLis ResultList *result = (ResultList *) stack.pop(); if (!result) // Ouch! { + if (!valid) + return; valid = 0; error = 0; - error << "Expected to have something to parse!"; + error << boolean_syntax_errors[0] << ' ' << boolean_syntax_errors[1]; return; } List *elements = result->elements(); --- htdoc/attrs.html.orig Thu Feb 24 20:29:10 2000 +++ htdoc/attrs.html Thu Jul 26 14:08:55 2001 @@ -520,6 +520,104 @@
+ boolean_keywords +
+
+
+
+ type: +
+
+ string list +
+
+ used by: +
+
+ htsearch +
+
+ default: +
+
+ and or not +
+
+ description: +
+
+ These 3 strings are used as the keywords used in + constructing the LOGICAL_WORDS template variable, + and in parsing the words input + parameter when the method parameter + or match_method attribute + is set to boolean. +
+
+ example: +
+
+ boolean_keywords: et ou non +
+
+
+
+
+
+
+ boolean_syntax_errors +
+
+
+
+ type: +
+
+ quoted string list +
+
+ used by: +
+
+ htsearch +
+
+ default: +
+
+ Expected 'a search word' 'at the end' 'instead of' 'end of expression' +
+
+ description: +
+
+ These 5 strings are used to construct various syntax + error messages for errors encountered in parsing + the words input parameter, when the + method parameter or + match_method attribute + is set to boolean. They are used in conjunction with + the words in the + boolean_keywords + attribute, and comprise all the English-specific + parts of these error messages. The order in which + the strings are put together may not be ideal, or + even gramatically correct, for all languages, but they + can be used to make fairly intelligible messages in + many languages. +
+
+ example: +
+
+ boolean_syntax_errors: Attendait "un mot" "à la fin" "au lieu de" "fin d'expression" +
+
+
+
+
+
+
build_select_lists
--- htdoc/cf_byname.html.orig Thu Feb 24 20:29:10 2000 +++ htdoc/cf_byname.html Thu Jul 26 14:11:07 2001 @@ -26,6 +26,8 @@ * bad_querystr
* bad_word_list
* bin_dir
+ * boolean_keywords
+ * boolean_syntax_errors
* build_select_lists

C
--- htdoc/cf_byprog.html.orig Thu Feb 24 20:29:10 2000 +++ htdoc/cf_byprog.html Thu Jul 26 14:11:12 2001 @@ -119,6 +119,8 @@ * add_anchors_to_excerpt
* allow_in_form
* backlink_factor
+ * boolean_keywords
+ * boolean_syntax_errors
* build_select_lists
* common_url_parts
* database_base
-- Gilles R. Detillieux E-mail: Spinal Cord Research Centre WWW: http://www.scrc.umanitoba.ca/~grdetil Dept. Physiology, U. of Manitoba Phone: (204)789-3766 Winnipeg, MB R3E 3J7 (Canada) Fax: (204)789-3930 _______________________________________________ htdig-general mailing list To unsubscribe, send a message to with a subject of unsubscribe FAQ: http://htdig.sourceforge.net/FAQ.html