From: Scott Gifford <sgifford@suspectclass.com>
Subject: Updated Patch for supporting multiple noindex_start/noindex_end
To: htdig-dev@lists.sourceforge.net
Date: 08 Jul 2002 04:08:00 -0500

My multiple noindex_start/noindex_end has been updated.  The old one
had some typos that I didn't notice, which caused crashes on some
systems.

You can get the new version at:

    http://www.suspectclass.com/~sgifford/htdig/htdig-3.1.6-multiple-noindex.patch

More information about this patch is available at:

    http://www.suspectclass.com/~sgifford/htdig/htdig-3.1.6-multiple-noindex.README

For posterity, the old version is available at:

    http://www.suspectclass.com/~sgifford/htdig/htdig-3.1.6-multiple-noindex-0.1.patch

-----ScottG.


******************************


From: Scott Gifford <sgifford@suspectclass.com>
Subject: Patch for supporting multiple noindex_start/noindex_stop
To: htdig-dev@lists.sourceforge.net
Date: 30 Mar 2002 05:39:31 -0500

Here's a brief patch for 3.1.6 to support multiple noindex_start and
noindex_stop directives in the config file.  It's a big of a kludge,
but it solved the problem I was trying to solve.

It adds 10 new noindex_start directives, "noindex_start1" through
"noindex_start10".  It also adds 10 corresponding noindex_end
directives, "noindex_end1" through "noindex_end10".  The standard
noindex_start and noindex_end directives are still supported, and are
considered to be "noindex_start0" and "noindex_end0".  The
noindex_start* tags are scanned sequentially, so whichever one matches
first will be the one that is used.  Only the end tag for the start
tag that was found will be recognized.

I'm new to this list and somewhat new to htdig, and I hacked this
patch together in a little over an hour, so if there's something
really stupid about it, cut me some slack, tell me what it is, and
I'll fix it.  :-)

Patch is at:

    http://www.suspectclass.com/~sgifford/htdig/htdig-3.1.6-multiple-noindex.patch

I look forward to your comments,

----ScottG.


diff -ur htdig-3.1.6/htcommon/defaults.cc htdig-3.1.6-sg1/htcommon/defaults.cc
--- htdig-3.1.6/htcommon/defaults.cc	Thu Jan 31 18:47:17 2002
+++ htdig-3.1.6-sg1/htcommon/defaults.cc	Fri Mar 29 15:57:30 2002
@@ -125,6 +125,24 @@
     {"no_excerpt_show_top",             "false"},
     {"noindex_start",                   "<!--htdig_noindex-->"},
     {"noindex_end",                     "<!--/htdig_noindex-->"},
+    {"noindex_start1",                   ""},
+    {"noindex_end1",                     ""},
+    {"noindex_start2",                   ""},
+    {"noindex_end2",                     ""},
+    {"noindex_start3",                   ""},
+    {"noindex_end3",                     ""},
+    {"noindex_start4",                   ""},
+    {"noindex_end4",                     ""},
+    {"noindex_start5",                   ""},
+    {"noindex_end5",                     ""},
+    {"noindex_start6",                   ""},
+    {"noindex_end7",                     ""},
+    {"noindex_start8",                   ""},
+    {"noindex_end8",                     ""},
+    {"noindex_start9",                   ""},
+    {"noindex_end9",                     ""},
+    {"noindex_start10",                   ""},
+    {"noindex_end10",                     ""},
     {"no_next_page_text",		"[next]"},
     {"no_page_list_header",		""},
     {"no_page_number_text",		""},
diff -ur htdig-3.1.6/htdig/HTML.cc htdig-3.1.6-sg1/htdig/HTML.cc
--- htdig-3.1.6/htdig/HTML.cc	Thu Jan 31 18:47:17 2002
+++ htdig-3.1.6-sg1/htdig/HTML.cc	Fri Mar 29 16:07:55 2002
@@ -158,10 +158,31 @@
     unsigned char	*position = (unsigned char *) contents->get();
     unsigned char       *text = (unsigned char *) new char[contents->length()+1];
     unsigned char       *ptext = text;
-    static char         *skip_start = config["noindex_start"];
-    static char         *skip_end = config["noindex_end"];
-    int			skip_start_len = strlen(skip_start);
-    int			skip_end_len = strlen(skip_end);
+    static char         *skip_start[11];
+    static char         *skip_end[11];
+    int			skip_start_len[11];
+    int			skip_end_len[11];
+    int 		should_skip;
+    int			i;
+
+
+    skip_start[0] = config["noindex_start"]; skip_end[0] = config["noindex_end"];
+    skip_start[1] = config["noindex_start1"]; skip_end[1] = config["noindex_end1"];
+    skip_start[2] = config["noindex_start2"]; skip_end[2] = config["noindex_end2"];
+    skip_start[3] = config["noindex_start3"]; skip_end[3] = config["noindex_end3"];
+    skip_start[4] = config["noindex_start4"]; skip_end[4] = config["noindex_end4"];
+    skip_start[5] = config["noindex_start5"]; skip_end[5] = config["noindex_end5"];
+    skip_start[6] = config["noindex_start6"]; skip_end[6] = config["noindex_end6"];
+    skip_start[7] = config["noindex_start7"]; skip_end[7] = config["noindex_end7"];
+    skip_start[8] = config["noindex_start8"]; skip_end[8] = config["noindex_end8"];
+    skip_start[9] = config["noindex_start9"]; skip_end[9] = config["noindex_end9"];
+    skip_start[10] = config["noindex_start10"]; skip_end[10] = config["noindex_end10"];
+
+    for(i=0;i<11;i++)
+    {
+        skip_start_len[i] = strlen(skip_start[i]);
+        skip_end_len[i] = strlen(skip_end[i]);
+    }
 
     keywordsCount = 0;
     offset = 0;
@@ -178,21 +199,27 @@
 	
     while (*position)
     {
-
       //
       // Filter out section marked to be ignored for indexing. 
       // This can contain any HTML. 
       //
-      if (*skip_start &&
-	  mystrncasecmp((char *)position, skip_start, skip_start_len) == 0)
+      should_skip = 0;
+      for(i=0;i<11;i++)
+      {
+        if (skip_start_len[i] &&
+	  mystrncasecmp((char *)position, skip_start[i], skip_start_len[i]) == 0)
 	{
-	  q = (unsigned char*)mystrcasestr((char *)position, skip_end);
+	  q = (unsigned char*)mystrcasestr((char *)position, skip_end[i]);
 	  if (!q)
 	    *position = '\0';       // Rest of document will be skipped...
 	  else
-	    position = q + skip_end_len;
-	  continue;
+	    position = q + skip_end_len[i];
+          should_skip = 1;
+	  break;
 	}
+      }
+      if (should_skip)
+        continue;
 
       if (strncmp((char *)position, "<!", 2) == 0)
 	{

