
From grdetil@scrc.umanitoba.ca Mon May 15 13:04:41 2000
Date: Mon, 15 May 2000 14:12:00 -0500 (CDT)
From: Gilles Detillieux <grdetil@scrc.umanitoba.ca>
To: Joe.Sanderson@ecora.com
Cc: ht3bugs@htdig.org, htdig3-bugs@htdig.org, htdig@htdig.org
Subject: [htdig] Re: 3.2.0b2 - problem with either no stars, or infinite loop writing out (PR#846)

According to Joe.Sanderson@ecora.com:
> I've downloaded the 3.2.0b2 beta, and built on Linux.  I have run into a
> problem involving the stars in the output html produced by htsearch.
> 
> For all searches, the output html contains a number of star_blank
> references for all matches - I never get the star.gif references, no
> matter how good the match is.
> 
> Another problem that may be related is that for some searches (have not
> yet seen a pattern of what search string causes this) htsearch goes into
> an infinite loop outputting <img src = "/htdig/star_blank.gif" alt = "
> "> to the output file.

Yes, Terry Luedtke reported this problem and posted a patch for it, to
htdig3-dev@htdig.org, back on May 3rd.  It didn't seem to make it into
Joe's patch archive, so I'll repost it here for those who missed it.
The patch fixes a few bugs in the score calculation which cause the
problems in star generation.

Date: Wed, 03 May 2000 17:56:50 -0400
From: "Terry Luedtke" <LuedtkT@mail.nlm.nih.gov>
To: <grdetil@scrc.umanitoba.ca>
Cc: <htdig3-dev@htdig.org>
Subject: Re: [htdig3-dev] Too many stars

Gilles Detillieux <grdetil@scrc.umanitoba.ca> 03-May-00 17:24:00 >>>
>According to Terry Luedtke:
>> I'm not sure how to extract a patch, so here's the diff's for each file.
>
> diff -rup dir1 dir2     or    diff -rc3p dir1 dir2
>
> Add an "N" to the options if you created new files, which you want included
>in the patch.

Hmm doesn't work with Solaris's diff.  Built GNU's.  Here's the changes for scoring.

Terry Luedtke
Natinal library of Medicine


diff -rup htdig-3.2.0b2.orig/htcommon/DocumentRef.h htdig-3.2.0b2/htcommon/DocumentRef.h
--- htdig-3.2.0b2.orig/htcommon/DocumentRef.h	Tue Apr 11 18:53:19 2000
+++ htdig-3.2.0b2/htcommon/DocumentRef.h	Wed May  3 15:13:39 2000
@@ -64,7 +64,7 @@ class DocumentRef : public Object
     ReferenceState	DocState()			{return docState;}
     int			DocSize()			{return docSize;}
     List		*DocAnchors()			{return &docAnchors;}
-    int			DocScore()			{return docScore;}
+    double     		DocScore()			{return docScore;}
     int                 DocSig()                        {return docSig;}
     int			DocAnchor()			{return docAnchor;}
     int			DocHopCount()			{return docHopCount;}
@@ -89,7 +89,7 @@ class DocumentRef : public Object
     void                DocSig(int s)                   {docSig = s;}
     void		DocAnchors(List &l)		{docAnchors = l;}
     void		AddAnchor(const char *a);
-    void		DocScore(int s)			{docScore = s;}
+    void		DocScore(double s)		{docScore = s;}
     void		DocAnchor(int a)		{docAnchor = a;}
     void		DocHopCount(int h)		{docHopCount = h;}
     void		DocEmail(const char *e)		{docEmail = e;}
@@ -156,7 +156,7 @@ class DocumentRef : public Object
     //
     
     // This is the current score of this document.
-    int			docScore;
+    double			docScore;
     // This is the nearest anchor for the search word.
     int			docAnchor;
 
diff -rup htdig-3.2.0b2.orig/htsearch/ResultMatch.cc htdig-3.2.0b2/htsearch/ResultMatch.cc
--- htdig-3.2.0b2.orig/htsearch/ResultMatch.cc	Tue Apr 11 18:53:21 2000
+++ htdig-3.2.0b2/htsearch/ResultMatch.cc	Wed May  3 15:32:28 2000
@@ -79,8 +79,17 @@ ScoreMatch::compare(const void *a1, cons
 {
     ResultMatch	*m1 = *((ResultMatch **) a1);
     ResultMatch	*m2 = *((ResultMatch **) a2);
+    double score1 = m1->getScore();
+    double score2 = m2->getScore();
 
-    return m2->getScore() - m1->getScore();
+    if(score1 == score2)
+       return 0;
+    else if(score1 < score2)
+       return 1;
+    else
+       return -1;
+
+    //    return m2->getScore() - m1->getScore();
 }
 
 ResultMatch::CmpFun

diff -rup htdig-3.2.0b2.orig/htsearch/ResultMatch.h htdig-3.2.0b2/htsearch/ResultMatch.h
--- htdig-3.2.0b2.orig/htsearch/ResultMatch.h	Tue Apr 11 18:53:21 2000
+++ htdig-3.2.0b2/htsearch/ResultMatch.h	Wed May  3 15:08:53 2000
@@ -38,10 +38,10 @@ public:
 	//
 	void			setAnchor(int a)	{anchor = a;}
 	void			setID(int i)		{id = i;}
-	void			setScore(float s)	{score = s;}
+	void			setScore(double s)	{score = s;}
 	
 	int				getAnchor()	{return anchor;}
-	int				getScore()	{return (int) score;}
+	double				getScore()	{return score;}
 	int			getID()			{return id;}
 
         // Multiple database support
@@ -74,7 +74,7 @@ private:
 	    SortByID
 	};
 
-	float			score;
+	double			score;
 	int				anchor;
 	int				id;
         Collection              *collection;


diff -rup htdig-3.2.0b2.orig/htsearch/Display.cc htdig-3.2.0b2/htsearch/Display.cc
--- htdig-3.2.0b2.orig/htsearch/Display.cc	Tue Apr 11 18:53:21 2000
+++ htdig-3.2.0b2/htsearch/Display.cc	Wed May  3 17:15:05 2000
@@ -293,7 +293,7 @@ Display::displayMatch(ResultMatch *match
 	vars.Remove("ANCHOR");
       }
     
-    vars.Add("SCORE", new String(form("%d", ref->DocScore())));
+    vars.Add("SCORE", new String(form("%f", ref->DocScore())));
     vars.Add("CURRENT", new String(form("%d", current)));
     char	*title = ref->DocTitle();
     if (!title || !*title)
@@ -860,9 +860,12 @@ Display::generateStars(DocumentRef *ref,
     const String blank = config["star_blank"];
     double	score;
 
+ 
+
     if (maxScore != 0)
     {
 	score = (ref->DocScore() - minScore) / (maxScore - minScore);
+        if(debug) cerr << "generateStars: doc, min, max " << ref->DocScore() << ", " << minScore << ", " << maxScore <<endl;
     }
     else
     {
@@ -871,6 +874,8 @@ Display::generateStars(DocumentRef *ref,
     }
     int		nStars = int(score * (maxStars - 1) + 0.5) + 1;
 
+    if(debug) cerr << "generateStars: nStars " << nStars << " of " << maxStars <<endl;
+
     if (right)
     {
 	for (i = 0; i < maxStars - nStars; i++)
@@ -1121,19 +1126,20 @@ Display::buildMatchList()
 	// We want older docs to have smaller values and the
 	// ultimate values to be a reasonable size (max about 100)
 
-	if (date_factor != 0.0 || backlink_factor != 0.0)
+	if (date_factor != 0.0)
 	{
 	    score += date_factor * 
 	      ((thisRef->DocTime() * 1000 / (double)time(0)) - 900);
+        }
   
+	if (backlink_factor != 0.0)
+	{
 	    int links = thisRef->DocLinks();
 	    if (links == 0)
 	      links = 1; // It's a hack, but it helps...
   
 	    score += backlink_factor
 	      * (thisRef->DocBackLinks() / (double)links);
-	    if (score <= 1.0)
-	      score = 1.0;
 	}
 
 	thisMatch->setTime(thisRef->DocTime());   
@@ -1144,18 +1150,28 @@ Display::buildMatchList()
 	// Get rid of it to free the memory!
 	delete thisRef;
 
-	thisMatch->setScore(1.0 + log(score));
+	score = log(1.0 + score);
+	thisMatch->setScore(score);
 	thisMatch->setAnchor(dm->anchor);
 		
 	//
 	// Append this match to our list of matches.
 	//
  	matches.Add(thisMatch, url.get());
+
+        if (debug)
+        {
+	  cerr << "score " << score << "(" << thisMatch->getScore() << "), maxScore " << maxScore <<", minScore " << minScore << endl;
+        }
  
  	if (maxScore < score)
- 	    maxScore = score;
+	  {if(debug) cerr << "Set maxScore = score" <<endl;
+           maxScore = score;
+          }
  	if (minScore > score)
+	  {if(debug) cerr << "Set minScore = score" <<endl;
  	    minScore = score;
+          }
     }
   }
 


-- 
Gilles R. Detillieux              E-mail: <grdetil@scrc.umanitoba.ca>
Spinal Cord Research Centre       WWW:    http://www.scrc.umanitoba.ca/~grdetil
Dept. Physiology, U. of Manitoba  Phone:  (204)789-3766
Winnipeg, MB  R3E 3J7  (Canada)   Fax:    (204)789-3930

------------------------------------
To unsubscribe from the htdig mailing list, send a message to
htdig-unsubscribe@htdig.org
You will receive a message to confirm this.
