From grdetil@scrc.umanitoba.ca Tue Aug 24 12:42:22 1999 Date: Tue, 24 Aug 1999 13:46:53 -0500 (CDT) From: Gilles Detillieux To: htdig@htdig.org Subject: [htdig] patch to boost performance of HtWordType functions This patch should help speed up the performance of the HtWordType class's inline functions. (That should help offset the performance penalty of the compound word handling patch I hope to work on next.) --- ../htdig-3.1.2.bak/htlib/HtWordType.h Wed Apr 21 21:47:58 1999 +++ ../htdig-3.1.2/htlib/HtWordType.h Tue Aug 24 13:28:31 1999 @@ -15,8 +15,8 @@ // Inline friend-functions are used together with an all-statics // class (name that pattern!) to spare the user from having // to manage the valid_punctuation and extra_word_characters -// attributes, while in theory still having the runtime -// performance of strchr() + isalnum(). +// attributes, while in theory still having better runtime +// performance than strchr() + isalnum(). // class HtWordType @@ -40,6 +40,7 @@ private: char *extra_word_characters; // Likewise. char *other_chars_in_word; // Attribute "valid_punctuation" plus // "extra_word_characters". + char chrtypes[256]; // quick lookup table for types } statics; // These methods are not supposed to be implemented (or accessed). @@ -48,19 +49,25 @@ private: void operator=(const HtWordType &); }; +// Bits to set in chrtypes[]: +#define HtWt_Alpha 0x01 +#define HtWt_Digit 0x02 +#define HtWt_Extra 0x04 +#define HtWt_ValidPunct 0x08 + // One for characters that when put together are a word // (including punctuation). inline int HtIsWordChar(int c) { - return isalnum(c) || (c && strchr(HtWordType::statics.other_chars_in_word, c)); + return (HtWordType::statics.chrtypes[(unsigned char)c] & (HtWt_Alpha|HtWt_Digit|HtWt_Extra|HtWt_ValidPunct)) != 0; } // Similar, but no punctuation characters. inline int HtIsStrictWordChar(int c) { - return isalnum(c) || (c && strchr(HtWordType::statics.extra_word_characters, c)); + return (HtWordType::statics.chrtypes[(unsigned char)c] & (HtWt_Alpha|HtWt_Digit|HtWt_Extra)) != 0; } // Let caller get rid of getting and holding a configuration parameter. --- ../htdig-3.1.2.bak/htlib/HtWordType.cc Wed Apr 21 21:47:58 1999 +++ ../htdig-3.1.2/htlib/HtWordType.cc Tue Aug 24 13:28:52 1999 @@ -23,4 +23,17 @@ HtWordType::Initialize(Configuration &co HtWordType::statics.extra_word_characters = extra_word_chars; HtWordType::statics.valid_punctuation = valid_punct; HtWordType::statics.other_chars_in_word = punct_and_extra; + HtWordType::statics.chrtypes[0] = 0; + for (int i = 1; i < 256; i++) + { + HtWordType::statics.chrtypes[i] = 0; + if (isalpha(i)) + HtWordType::statics.chrtypes[i] |= HtWt_Alpha; + if (isdigit(i)) + HtWordType::statics.chrtypes[i] |= HtWt_Digit; + if (strchr(extra_word_chars, i)) + HtWordType::statics.chrtypes[i] |= HtWt_Extra; + if (strchr(valid_punct, i)) + HtWordType::statics.chrtypes[i] |= HtWt_ValidPunct; + } } -- Gilles R. Detillieux E-mail: Spinal Cord Research Centre WWW: http://www.scrc.umanitoba.ca/~grdetil Dept. Physiology, U. of Manitoba Phone: (204)789-3766 Winnipeg, MB R3E 3J7 (Canada) Fax: (204)789-3930 ------------------------------------ To unsubscribe from the htdig mailing list, send a message to htdig@htdig.org containing the single word unsubscribe in the SUBJECT of the message.