diff -cprN ../htdig-3.1.4-with-url_seed_score/htcommon/defaults.cc ./htcommon/defaults.cc *** ../htdig-3.1.4-with-url_seed_score/htcommon/defaults.cc Sun Jan 30 13:44:57 2000 --- ./htcommon/defaults.cc Sun Jan 30 13:51:35 2000 *************** ConfigDefaults defaults[] = *** 122,127 **** --- 122,128 ---- {"search_algorithm", "exact:1"}, {"search_results_footer", "${common_dir}/footer.html"}, {"search_results_header", "${common_dir}/header.html"}, + {"search_results_order", ""}, {"search_results_wrapper", ""}, {"server_aliases", ""}, {"server_wait_time", "0"}, diff -cprN ../htdig-3.1.4-with-url_seed_score/htdoc/attrs.html ./htdoc/attrs.html *** ../htdig-3.1.4-with-url_seed_score/htdoc/attrs.html Sun Jan 30 13:44:57 2000 --- ./htdoc/attrs.html Sun Jan 30 12:43:13 2000 *************** *** 5256,5261 **** --- 5256,5317 ----
+ + search_results_order +
+
+
+
+ type: +
+
+ string list +
+
+ used by: +
+
+ htsearch +
+
+ default: +
+
+ <empty> +
+
+ description: +
+
+ This specifies a list of patterns for URLs in + search results. Results will be displayed in the + specified order, with the search algorithm result + as the second order. Remaining areas, that do not + match any of the specified patterns, can be placed + by using * as the pattern. If no * is specified, + one will be implicitly placed at the end of the + list.
+ See also url_seed_score. +
+
+ example: +
+
+ + + + +
+ search_results_order: /docs/|faq.html * + /maillist/ /testresults/ +
+
+
+
+
+
+
+
search_results_wrapper
*************** *** 6864,6870 **** point constants.
More straightforward is to think of the format as "newscore = oldscore*N+M", ! but with the "newscore = oldscore" part left out.
example: --- 6920,6928 ---- point constants.
More straightforward is to think of the format as "newscore = oldscore*N+M", ! but with the "newscore = oldscore" part left out.
! See also ! search_results_order.
example: diff -cprN ../htdig-3.1.4-with-url_seed_score/htdoc/cf_byname.html ./htdoc/cf_byname.html *** ../htdig-3.1.4-with-url_seed_score/htdoc/cf_byname.html Sun Jan 30 13:44:57 2000 --- ./htdoc/cf_byname.html Sun Jan 30 12:43:13 2000 *************** *** 142,147 **** --- 142,148 ---- * search_algorithm
* search_results_footer
* search_results_header
+ * search_results_order
* search_results_wrapper
* server_aliases
* server_max_docs
diff -cprN ../htdig-3.1.4-with-url_seed_score/htdoc/cf_byprog.html ./htdoc/cf_byprog.html *** ../htdig-3.1.4-with-url_seed_score/htdoc/cf_byprog.html Sun Jan 30 13:44:57 2000 --- ./htdoc/cf_byprog.html Sun Jan 30 12:43:13 2000 *************** *** 159,164 **** --- 159,165 ---- * search_algorithm
* search_results_footer
* search_results_header
+ * search_results_order
* search_results_wrapper
* sort
* sort_names
diff -cprN ../htdig-3.1.4-with-url_seed_score/htlib/List.cc ./htlib/List.cc *** ../htdig-3.1.4-with-url_seed_score/htlib/List.cc Fri Apr 16 20:47:40 1999 --- ./htlib/List.cc Sun Jan 30 12:43:13 2000 *************** List &List::operator=(List &list) *** 425,427 **** --- 425,461 ---- } + //********************************************************************* + // void AppendList(List &list) + // Move contents of other list to the end of this list, and empty the + // other list. + // + void List::AppendList(List &list) + { + // Never mind an empty list or ourselves. + if (list.number == 0 || &list == this) + return; + + // Correct our pointers in head and tail. + if (tail) + { + // Link in other list. + tail->next = list.head; + list.head->prev = tail; + + // Update members for added contents. + number += list.number; + tail = list.tail; + } + else + { + head = list.head; + tail = list.tail; + number = list.number; + } + + // Clear others members to be an empty list. + list.head = list.tail = list.current = 0; + list.current_index = -1; + list.number = 0; + } diff -cprN ../htdig-3.1.4-with-url_seed_score/htlib/List.h ./htlib/List.h *** ../htdig-3.1.4-with-url_seed_score/htlib/List.h Mon Feb 3 18:11:04 1997 --- ./htlib/List.h Sun Jan 30 12:43:13 2000 *************** public: *** 112,117 **** --- 112,120 ---- List &operator= (List *list) {return *this = *list;} List &operator= (List &list); + // Move one list to the end of another, emptying the other list. + void AppendList (List &list); + protected: // // Pointers into the list diff -cprN ../htdig-3.1.4-with-url_seed_score/htsearch/Display.cc ./htsearch/Display.cc *** ../htdig-3.1.4-with-url_seed_score/htsearch/Display.cc Sun Jan 30 13:44:57 2000 --- ./htsearch/Display.cc Sun Jan 30 13:10:27 2000 *************** static char RCSid[] = "$Id: Display.cc,v *** 25,30 **** --- 25,31 ---- #include "HtURLCodec.h" #include "HtWordType.h" #include "HtURLSeedScore.h" + #include "SplitMatches.h" //***************************************************************************** // *************** Display::buildMatchList() *** 938,944 **** char *id; String coded_url, url; ResultMatch *thisMatch; ! List *matches = new List(); double backlink_factor = config.Double("backlink_factor"); double date_factor = config.Double("date_factor"); SortType typ = sortType(); --- 939,945 ---- char *id; String coded_url, url; ResultMatch *thisMatch; ! SplitMatches matches(config); double backlink_factor = config.Double("backlink_factor"); double date_factor = config.Double("date_factor"); SortType typ = sortType(); *************** Display::buildMatchList() *** 1029,1048 **** // // Append this match to our list of matches. // ! matches->Add(thisMatch); ! if (matches->Count() == 1 || maxScore < score) maxScore = score; ! if (matches->Count() == 1 || minScore > score) minScore = score; } // ! // The matches need to be ordered by relevance level. ! // Sort it. // ! sort(matches); ! return matches; } //***************************************************************************** --- 1030,1054 ---- // // Append this match to our list of matches. // ! matches.Add(thisMatch, url.get()); ! ! if (maxScore < score) maxScore = score; ! if (minScore > score) minScore = score; } // ! // Each sub-area is then sorted by relevance level. // ! List *matches_part; // Outside of loop to keep for-scope warnings away. ! for (matches_part = matches.Get_First(); ! matches_part != 0; ! matches_part = matches.Get_Next()) ! sort(matches_part); ! // Then all sub-lists are concatenated and put in a new list. ! return matches.JoinedLists(); } //***************************************************************************** diff -cprN ../htdig-3.1.4-with-url_seed_score/htsearch/Makefile.in ./htsearch/Makefile.in *** ../htdig-3.1.4-with-url_seed_score/htsearch/Makefile.in Fri Apr 16 20:47:50 1999 --- ./htsearch/Makefile.in Sun Jan 30 12:43:13 2000 *************** include $(top_builddir)/Makefile.config *** 9,15 **** OBJS= Display.o DocMatch.o ResultList.o ResultMatch.o \ Template.o TemplateList.o WeightWord.o htsearch.o \ ! parser.o FOBJS= $(top_builddir)/htfuzzy/libfuzzy.a TARGET= htsearch --- 9,15 ---- OBJS= Display.o DocMatch.o ResultList.o ResultMatch.o \ Template.o TemplateList.o WeightWord.o htsearch.o \ ! parser.o SplitMatches.o FOBJS= $(top_builddir)/htfuzzy/libfuzzy.a TARGET= htsearch diff -cprN ../htdig-3.1.4-with-url_seed_score/htsearch/SplitMatches.cc ./htsearch/SplitMatches.cc *** ../htdig-3.1.4-with-url_seed_score/htsearch/SplitMatches.cc Thu Jan 1 01:00:00 1970 --- ./htsearch/SplitMatches.cc Sun Jan 30 12:43:13 2000 *************** *** 0 **** --- 1,175 ---- + // + // SplitMatches.cc + // + // SplitMatches: + // Holds a list of lists with the matches, as specified in + // search_results_order. + // + // Part of the ht://Dig package + // Copyright (c) 2000 The ht://Dig Group + // For copyright details, see the file COPYING in your distribution + // or the GNU Public License version 2 or later + // + // + // $Id$ + + #include "StringList.h" + #include "StringMatch.h" + #include "SplitMatches.h" + #include + #include + + // This class is only used in private members of SplitMatches. + // The OO-right thing would be to nest this inside the private + // declaration of SplitMatches, but that would cause portability + // problems according to + // . + // + // It is used as a container for a key (String) and a list. + // + class MatchArea : public Object + { + public: + // Construct from a string applicable to StringMatch. + MatchArea(const String &); + + ~MatchArea(); + + // Does this item match? + inline bool Match(char *s) + { return match.hasPattern() && match.FindFirst(s) != -1; } + + // Return the contained list. + List *MatchList() { return &myList; } + + private: + StringMatch match; + List myList; + + // These member functions are not supposed to be implemented, but + // mentioned here as private so the compiler will not generate them if + // someone puts in buggy code that would use them. + MatchArea(); + MatchArea(const MatchArea &); + void operator= (const MatchArea &); + }; + + MatchArea::MatchArea(const String &url_regex) + { + // We do not want to "install" the catch-the-rest pattern as a real + // pattern; it must always return false for the "Match" operator. + if (strcmp("*", url_regex.get()) != 0) + match.Pattern(url_regex.get()); + } + + MatchArea::~MatchArea() + { + } + + SplitMatches::SplitMatches(Configuration &config) + { + char *config_item = "search_results_order"; + + StringList sl(config[config_item], "\t \r\n"); + + mySubAreas = new List(); + myDefaultList = 0; + + // Parse each as in TemplateList::createFromString. + for (int i = 0; i < sl.Count(); i++) + { + String sub_area_pattern = sl[i]; + MatchArea *match_item = new MatchArea(sub_area_pattern); + mySubAreas->Add(match_item); + + // If this is the magic catch-rest sub-area-pattern, we want to + // use its list-pointer to store all URLs that do not match + // anything else. + // We will iterate over a list where one of the patterns is + // known to not match, but that's a small penalty for keeping + // the code simple. + if (strcmp("*", sub_area_pattern.get()) == 0) + myDefaultList = match_item->MatchList(); + } + + // If we did not have a catch-the-rest pattern, install one at the + // end of the list. + if (myDefaultList == 0) + { + MatchArea *match_item = new MatchArea(String("*")); + mySubAreas->Add(match_item); + + myDefaultList = match_item->MatchList(); + } + } + + SplitMatches::~SplitMatches() + { + // myDefaultList is a pointer to one of the items in mySubAreas and + // must not be explicitly deleted here. + + delete mySubAreas; + } + + void + SplitMatches::Add(ResultMatch *match, char *url) + { + List *area_list = mySubAreas; + MatchArea *area_item; + + area_list->Start_Get(); + + // This is a linear search. If there's a problem with that, we + // can improve it. For now, a list with tens of areas seems lots, + // and break-even with a more clever search-scheme is probably in + // the hundreds. + while ((area_item = (MatchArea *) area_list->Get_Next())) + { + // Use the first match only. + if (area_item->Match(url)) + { + area_item->MatchList()->Add(match); + return; + } + } + + // We'll get here if no match was found, so we add to the + // catch-the-rest list. + myDefaultList->Add(match); + } + + // Just a simple iterator function. + List * + SplitMatches::Get_Next() + { + MatchArea *next_area = (MatchArea *) mySubAreas->Get_Next(); + List *next_area_list = 0; + + if (next_area != 0) + next_area_list = next_area->MatchList(); + + return next_area_list; + } + + // Rip out the sub-areas lists and concatenate them into one list. + List * + SplitMatches::JoinedLists() + { + + // We make a new list here, so we don't have to worry about + // mySubAreas being dangling or null. + List *all_areas = new List(); + List *sub_areas = mySubAreas; + MatchArea *area; + + sub_areas->Start_Get(); + + while (area = (MatchArea *) sub_areas->Get_Next()) + { + // "Destructively" move the contents of the list, + // leaving the original list empty. + all_areas->AppendList(*(area->MatchList())); + } + + return all_areas; + } diff -cprN ../htdig-3.1.4-with-url_seed_score/htsearch/SplitMatches.h ./htsearch/SplitMatches.h *** ../htdig-3.1.4-with-url_seed_score/htsearch/SplitMatches.h Thu Jan 1 01:00:00 1970 --- ./htsearch/SplitMatches.h Sun Jan 30 12:43:13 2000 *************** *** 0 **** --- 1,53 ---- + // + // SplitMatches.h + // + // SplitMatches: Constructed from a Configuration, see doc + // for format of config item "search_results_order". + // Used to contain a number of ResultMatches, putting them in separate + // lists depending on the URL with method Add. + // Iterator methods Get_First and Get_Next returns the sub-lists. + // Method Joined returns a new list with all the sub-lists + // concatenated. + // + // $Id$ + // + // Part of the ht://Dig package + // Copyright (c) 2000 The ht://Dig Group + // For copyright details, see the file COPYING in your distribution + // or the GNU Public License version 2 or later + // + // + #ifndef _splitmatches_h + #define _splitmatches_h + + #include "Configuration.h" + #include "ResultMatch.h" + #include "List.h" + + class SplitMatches + { + public: + SplitMatches(Configuration &); + ~SplitMatches(); + + void Add(ResultMatch *, char *); + List *JoinedLists(); + List *Get_First() + { mySubAreas->Start_Get(); return Get_Next(); } + + List *Get_Next(); + + private: + // These member functions are not supposed to be implemented. + SplitMatches(); + SplitMatches(const SplitMatches &); + void operator= (const SplitMatches &); + + // (Lists of) Matches for each sub-area regex. + List *mySubAreas; + + // Matches for everything else. + List *myDefaultList; + }; + + #endif /* _splitmatches_h */