diff -urd htdig-3.1.6/Makefile.config.in htdig-3.1.6-ssl/Makefile.config.in --- htdig-3.1.6/Makefile.config.in Fri Feb 1 00:47:14 2002 +++ htdig-3.1.6-ssl/Makefile.config.in Mon Oct 21 17:30:40 2002 @@ -23,14 +23,15 @@ PDF_PARSER= @PDF_PARSER@ SENDMAIL= @SENDMAIL@ -DEFINES= -DDEFAULT_CONFIG_FILE=\"$(DEFAULT_CONFIG_FILE)\" +DEFINES= @DEFS@ -DDEFAULT_CONFIG_FILE=\"$(DEFAULT_CONFIG_FILE)\" -DHAVE_SSL +LIBDIRS= -L../htlib -L../htcommon -L../db/dist -L/usr/lib -L$(OPENSSL)/lib LIBDIRS= -L../htlib -L../htcommon -L../db/dist -L/usr/lib INCS= -I$(top_srcdir)/htlib -I$(top_srcdir)/htcommon \ - -I../db/dist -I../include + -I../db/dist -I../include -I$(OPENSSL)/include HTLIBS= ../htcommon/libcommon.a \ ../htlib/libht.a \ ../db/dist/libdb.a -LIBS= $(HTLIBS) @LIBS@ +LIBS= $(HTLIBS) @LIBS@ -lssl -lcrypto DIST= @PACKAGE@-@VERSION@ DISTDIR= $(top_srcdir)/../$(DIST) diff -urd htdig-3.1.6/htcommon/DocumentDB.cc htdig-3.1.6-ssl/htcommon/DocumentDB.cc --- htdig-3.1.6/htcommon/DocumentDB.cc Fri Feb 1 00:47:17 2002 +++ htdig-3.1.6-ssl/htcommon/DocumentDB.cc Mon Oct 21 04:40:54 2002 @@ -231,7 +231,11 @@ while ((key = dbf->Get_Next())) { dbf->Get(key, data); +#ifndef HAVE_SSL if (strncmp(HtURLCodec::instance()->decode(key), "http:", 5) == 0) +#else /* HAVE_SSL */ + if (strncmp(HtURLCodec::instance()->decode(key), "http:", 5) == 0 || strncmp(HtURLCodec::instance()->decode(key), "https:", 6) == 0) +#endif /* HAVE_SSL */ { ref = new DocumentRef; ref->Deserialize(data); @@ -419,7 +423,11 @@ while ((coded_key = dbf->Get_Next())) { String key = HtURLCodec::instance()->decode(coded_key); +#ifndef HAVE_SSL if (mystrncasecmp(key, "http:", 5) == 0) +#else /* HAVE_SSL */ + if (mystrncasecmp(key, "http:", 5) == 0 || mystrncasecmp(key, "https:", 6) == 0) +#endif /* HAVE_SSL */ { DocumentRef *ref = (*this)[key]; if (ref) diff -urd htdig-3.1.6/htcommon/defaults.cc htdig-3.1.6-ssl/htcommon/defaults.cc --- htdig-3.1.6/htcommon/defaults.cc Fri Feb 1 00:47:17 2002 +++ htdig-3.1.6-ssl/htcommon/defaults.cc Mon Oct 21 04:40:54 2002 @@ -43,7 +43,11 @@ {"boolean_syntax_errors", "Expected 'a search word' 'at the end' 'instead of' 'end of expression'"}, {"build_select_lists", ""}, {"case_sensitive", "true"}, +#ifndef HAVE_SSL {"common_url_parts", "http:// http://www. ftp:// ftp://ftp. /pub/ .html .htm .gif .jpg .jpeg /index.html /index.htm .com/ .com mailto:"}, +#else /* HAVE_SSL */ + {"common_url_parts", "https:// https://www. http:// http://www. ftp:// ftp://ftp. /pub/ .html .htm .gif .jpg .jpeg /index.html /index.htm .com/ .com mailto:"}, +#endif /* HAVE_SSL */ {"create_image_list", "false"}, {"create_url_list", "false"}, {"compression_level", "0"}, diff -urd htdig-3.1.6/htdig/Document.cc htdig-3.1.6-ssl/htdig/Document.cc --- htdig-3.1.6/htdig/Document.cc Fri Feb 1 00:47:17 2002 +++ htdig-3.1.6-ssl/htdig/Document.cc Mon Oct 21 17:37:56 2002 @@ -474,7 +474,10 @@ return Document_no_host; } } - + +#ifdef HAVE_SSL + c.assign_ssl(strcmp(url->service(), "https") == 0); +#endif /* HAVE_SSL */ if (c.connect(1) == NOTOK) { if (debug) @@ -484,6 +487,10 @@ { cout << "(Via proxy " << proxy->host() << ':' << proxy->port() << ')' << endl; } +#ifdef HAVE_SSL + if (strcmp(url->service(), "https") == 0) + cout << c.ssl_error_message() << endl; +#endif /* HAVE_SSL */ } c.close(); return Document_no_server; diff -urd htdig-3.1.6/htdig/Images.cc htdig-3.1.6-ssl/htdig/Images.cc --- htdig-3.1.6/htdig/Images.cc Fri Feb 1 00:47:17 2002 +++ htdig-3.1.6-ssl/htdig/Images.cc Mon Oct 21 04:40:54 2002 @@ -61,7 +61,11 @@ { String u = url; URL Url(url); +#ifndef HAVE_SSL if (strcmp(Url.service(), "http") != 0) +#else /* HAVE_SSL */ + if (strcmp(Url.service(), "http") != 0 && strcmp(Url.service(), "https") != 0) +#endif /* HAVE_SSL */ return 0; u.lowercase(); @@ -81,6 +85,9 @@ return 0; if (c.assign_server(Url.host()) == NOTOK) return 0; +#ifdef HAVE_SSL + c.assign_ssl(strcmp(Url.service(), "https") == 0); +#endif /* HAVE_SSL */ if (c.connect(1) == NOTOK) { diff -urd htdig-3.1.6/htdig/Retriever.cc htdig-3.1.6-ssl/htdig/Retriever.cc --- htdig-3.1.6/htdig/Retriever.cc Fri Feb 1 00:47:17 2002 +++ htdig-3.1.6-ssl/htdig/Retriever.cc Tue Oct 22 06:44:31 2002 @@ -137,10 +137,14 @@ cout << "\t" << from << ":" << (int) log << ":" << url; if (!server) { - String robotsURL = "http://"; - robotsURL << u.host() << "/robots.txt"; + String robotsURL = u.service(); + robotsURL << "://" << u.host() << "/robots.txt"; StringList *localRobotsFiles = GetLocal(robotsURL.get()); +#ifndef HAVE_SSL server = new Server(u.host(), u.port(), localRobotsFiles); +#else /* HAVE_SSL */ + server = new Server(u.host(), u.port(), strcmp(u.service(), "https") == 0, localRobotsFiles); +#endif /* HAVE_SSL */ servers.Add(u.signature(), server); delete localRobotsFiles; } @@ -679,10 +683,18 @@ // Currently, we only deal with HTTP URLs. Gopher and ftp will // come later... ***FIX*** // +#ifndef HAVE_SSL if (strstr(u, "/../") || strncmp(u, "http://", 7) != 0) +#else /* HAVE_SSL */ + if (strstr(u, "/../") || (strncmp(u, "http://", 7) != 0 && strncmp(u, "https://", 8) != 0)) +#endif /* HAVE_SSL */ { if (debug > 2) +#ifndef HAVE_SSL cout << endl <<" Rejected: Not an http or relative link!"; +#else /* HAVE_SSL */ + cout << endl <<" Rejected: Not an http, https or relative link!"; +#endif /* HAVE_SSL */ return FALSE; } @@ -1365,10 +1377,14 @@ // // Hadn't seen this server, yet. Register it // - String robotsURL = "http://"; - robotsURL << url.host() << "/robots.txt"; + String robotsURL = url.service(); + robotsURL << "://" << url.host() << "/robots.txt"; StringList *localRobotsFile = GetLocal(robotsURL.get()); +#ifndef HAVE_SSL server = new Server(url.host(), url.port(), localRobotsFile); +#else /* HAVE_SSL */ + server = new Server(url.host(), url.port(), strcmp(url.service(), "https") == 0, localRobotsFile); +#endif /* HAVE_SSL */ servers.Add(url.signature(), server); delete localRobotsFile; } @@ -1509,10 +1525,14 @@ // // Hadn't seen this server, yet. Register it // - String robotsURL = "http://"; - robotsURL << url.host() << "/robots.txt"; + String robotsURL = url.service(); + robotsURL << "://" << url.host() << "/robots.txt"; StringList *localRobotsFile = GetLocal(robotsURL.get()); +#ifndef HAVE_SSL server = new Server(url.host(), url.port(), localRobotsFile); +#else /* HAVE_SSL */ + server = new Server(url.host(), url.port(), strcmp(url.service(), "https") == 0, localRobotsFile); +#endif /* HAVE_SSL */ servers.Add(url.signature(), server); delete localRobotsFile; } diff -urd htdig-3.1.6/htdig/Server.cc htdig-3.1.6-ssl/htdig/Server.cc --- htdig-3.1.6/htdig/Server.cc Fri Feb 1 00:47:17 2002 +++ htdig-3.1.6-ssl/htdig/Server.cc Mon Oct 21 19:57:02 2002 @@ -20,9 +20,17 @@ //***************************************************************************** -// Server::Server(char *host, int port, StringList *local_robots_files) +//#ifndef HAVE_SSL +// Server::Server(char *host, int port, StringList *local_robots_files) +//#else /* HAVE_SSL */ +// Server::Server(char *host, int port, int ssl, StringList *local_robots_files) +//#endif /* HAVE_SSL */ // -Server::Server(char *host, int port, StringList *local_robots_files) +#ifndef HAVE_SSL +Server::Server(char *host, int port, StringList *local_robots_files) +#else /* HAVE_SSL */ +Server::Server(char *host, int port, int ssl, StringList *local_robots_files) +#endif /* HAVE_SSL */ { if (debug > 0) cout << endl << "New server: " << host << ", " << port << endl; @@ -41,6 +49,9 @@ // Attempt to get a robots.txt file from the specified server // String url = "http://"; +#ifdef HAVE_SSL + if (ssl) url = "https://"; +#endif /* HAVE_SSL */ url << host << ':' << port << "/robots.txt"; Document doc(url, 0); diff -urd htdig-3.1.6/htdig/Server.h htdig-3.1.6-ssl/htdig/Server.h --- htdig-3.1.6/htdig/Server.h Fri Feb 1 00:47:17 2002 +++ htdig-3.1.6-ssl/htdig/Server.h Mon Oct 21 19:57:02 2002 @@ -26,7 +26,11 @@ // // Construction/Destruction // - Server(char *host, int port, StringList *local_robots_files = NULL); +#ifndef HAVE_SSL + Server(char *host, int port, StringList *local_robots_files = NULL); +#else /* HAVE_SSL */ + Server(char *host, int port, int ssl, StringList *local_robots_files = NULL); +#endif /* HAVE_SSL */ ~Server(); // diff -urd htdig-3.1.6/htdig/htdig.cc htdig-3.1.6-ssl/htdig/htdig.cc --- htdig-3.1.6/htdig/htdig.cc Fri Feb 1 00:47:17 2002 +++ htdig-3.1.6-ssl/htdig/htdig.cc Mon Oct 21 05:35:33 2002 @@ -386,7 +386,11 @@ { cout << "usage: htdig [-v][-i][-c configfile][-t][-h hopcount][-s] \\\n"; cout << " [-u username:password][-a][-m minimalfile][file]\n"; - cout << "This program is part of ht://Dig " << VERSION << "\n\n"; + cout << "This program is part of ht://Dig " << VERSION; +#ifdef HAVE_SSL + cout << "/SSL"; +#endif /* HAVE_SSL */ + cout << "\n\n"; cout << "Options:\n"; cout << "\t-v\tVerbose mode. This increases the verbosity of the\n"; diff -urd htdig-3.1.6/htlib/Connection.cc htdig-3.1.6-ssl/htlib/Connection.cc --- htdig-3.1.6/htlib/Connection.cc Fri Feb 1 00:47:17 2002 +++ htdig-3.1.6-ssl/htlib/Connection.cc Tue Oct 22 06:44:31 2002 @@ -42,6 +42,10 @@ int rresvport(int *); } +#ifdef HAVE_SSL +SSL_CTX *Connection::ctx = NULL; +SSL_METHOD *Connection::meth = NULL; +#endif /* HAVE_SSL */ List all_connections; Connection::Connection() @@ -54,7 +58,47 @@ timeout_value = 0; retry_value = 1; wait_time = 5; // wait 5 seconds after a failed connection +#ifdef HAVE_SSL + ssl = NULL; + m_ssl_on = 0; +#endif /* HAVE_SSL */ +} + +#ifdef HAVE_SSL +void Connection::initSSL() +{ + static int init = 0; + + if (! init) + { + init = 1; + + SSL_library_init(); + SSL_load_error_strings(); +#ifdef SSL_NEED_RANDFILE + char buffer[200]; + RAND_load_file(RAND_file_name(buffer, sizeof buffer), -1); +#endif /* SSL_NEED_RANDFILE */ +#ifdef SSL_NEEDS_EGD_SOCKET + if (getenv("EGD_SOCKET") != NULL) + { + RAND_egd(getenv("EGD_SOCKET")); + } +#endif /* SSL_NEEDS_EGD_SOCKET */ + meth = SSLv23_client_method(); + } + + if (ctx == NULL) + { + ctx = SSL_CTX_new(meth); + if (ctx == NULL) + { + cout << ssl_error_message() << endl; + exit(1); + } + } } +#endif /* HAVE_SSL */ //************************************************************************* @@ -79,6 +117,10 @@ timeout_value = 0; retry_value = 1; wait_time = 5; +#ifdef HAVE_SSL + ssl = NULL; + m_ssl_on = 0; +#endif /* HAVE_SSL */ } @@ -91,6 +133,13 @@ this->close(); delete peer; delete server_name; +#ifdef HAVE_SSL + if (ctx != NULL) + { + SSL_CTX_free(ctx); + ctx = NULL; + } +#endif } @@ -164,7 +213,15 @@ connected = 0; if (sock >= 0) { +#ifdef HAVE_SSL + if (m_ssl_on) + SSL_shutdown(ssl); +#endif /* HAVE_SSL */ int ret = ::close(sock); +#ifdef HAVE_SSL + SSL_free(ssl); + ssl = NULL; +#endif /* HAVE_SSL */ sock = -1; return ret; } @@ -245,6 +302,27 @@ static void handler_timeout(int) { } + +#ifdef HAVE_SSL +//***************************************************************************** +// int Connection::assign_ssl(int ssl_on) +// +int Connection::assign_ssl(int ssl_on) +{ + m_ssl_on = ssl_on; + return OK; +} + +//***************************************************************************** +// char* Connection::ssl_error_message(void) +// +char* Connection::ssl_error_message(void) +{ + return ERR_error_string(ERR_get_error(), NULL); +} +#endif /* HAVE_SSL */ + + //***************************************************************************** // int Connection::connect(int allow_EINTR) // @@ -253,6 +331,11 @@ int status; int retries = retry_value; +#ifdef HAVE_SSL + if (m_ssl_on) + initSSL(); +#endif + while (retries--) { // @@ -278,8 +361,29 @@ if (status == 0 || errno == EALREADY || errno == EISCONN) { +#ifndef HAVE_SSL connected = 1; return OK; +#else /* HAVE_SSL */ + if (m_ssl_on) + { + ssl = SSL_new(ctx); + if (ssl != NULL) + { + SSL_set_fd(ssl, sock); + if (SSL_connect(ssl) > 0) + { + connected = 1; + return OK; + } + } + } + else + { + connected = 1; + return OK; + } +#endif /* HAVE_SSL */ } // @@ -441,7 +545,16 @@ } if (!need_io_stop) + { +#ifndef HAVE_SSL count = ::read(sock, buffer, maxlength); +#else /* HAVE_SSL */ + if (ssl != NULL) + count = SSL_read(ssl, buffer, maxlength); + else + count = ::read(sock, buffer, maxlength); +#endif /* HAVE_SSL */ + } else count = -1; // Input timed out } @@ -461,7 +574,14 @@ do { +#ifndef HAVE_SSL count = ::write(sock, buffer, maxlength); +#else /* HAVE_SSL */ + if (ssl != NULL) + count = SSL_write(ssl, buffer, maxlength); + else + count = ::write(sock, buffer, maxlength); +#endif /* HAVE_SSL */ } while (count < 0 && errno == EINTR && !need_io_stop); need_io_stop = 0; diff -urd htdig-3.1.6/htlib/Connection.h htdig-3.1.6-ssl/htlib/Connection.h --- htdig-3.1.6/htlib/Connection.h Fri Feb 1 00:47:17 2002 +++ htdig-3.1.6-ssl/htlib/Connection.h Mon Oct 21 05:57:32 2002 @@ -44,6 +44,16 @@ #include #include +#ifdef HAVE_SSL +#include +#include +#include +#include +#include +#include +#include +#endif /* HAVE_SSL */ + class String; class Connection : public io @@ -73,6 +83,13 @@ int assign_server(unsigned int addr = INADDR_ANY); char *get_server() {return server_name;} +#ifdef HAVE_SSL + // SLL stuff + void initSSL(); + int assign_ssl(int ssl_on); + char *ssl_error_message(void); +#endif /* HAVE_SSL */ + // Connection establishment int connect(int allow_EINTR = 0); Connection *accept(int priv = 0); @@ -99,6 +116,12 @@ private: int sock; +#ifdef HAVE_SSL + int m_ssl_on; + SSL *ssl; + static SSL_CTX *ctx; + static SSL_METHOD *meth; +#endif /* HAVE_SSL */ struct sockaddr_in server; int connected; char *peer; diff -urd htdig-3.1.6/htlib/URL.cc htdig-3.1.6-ssl/htlib/URL.cc --- htdig-3.1.6/htlib/URL.cc Fri Feb 1 00:47:17 2002 +++ htdig-3.1.6-ssl/htlib/URL.cc Tue Oct 22 06:44:31 2002 @@ -131,9 +131,21 @@ while (isalpha(*p)) p++; int hasService = (*p == ':'); +#ifndef HAVE_SSL if ((hasService && ((strncmp(ref, "http://", 7) == 0) || (strncmp(ref, "http:", 5) != 0))) || strncmp(ref, "//", 2) == 0) +#else /* HAVE_SSL */ + + if ((hasService && + ( (strncmp(ref, "http://", 7) == 0) || + (strncmp(ref, "http:", 5) != 0) || + (strncmp(ref, "https://", 8) == 0) || + (strncmp(ref, "https:", 6) != 0) + ) + ) + || (strncmp(ref, "//", 2) == 0)) +#endif /* HAVE_SSL */ { // // No need to look at the parent url since this is a complete url... @@ -218,7 +230,14 @@ _url << ":"; if (_host.length()) _url << "//" << _host; + // Add on the port if its not one of the standard ones +#ifndef HAVE_SSL if (_port != 80 && strcmp(_service, "http") == 0) +#else /* HAVE_SSL */ + if ((strcmp(_service, "https") == 0 && _port != 443)|| + (strcmp(_service, "http" ) == 0 && _port != 80) + ) +#endif /* HAVE_SSL */ _url << ':' << _port; _url << _path; } @@ -292,13 +311,29 @@ if (p) _port = atoi(p); if (!p || _port <= 0) +#ifndef HAVE_SSL _port = 80; +#else /* HAVE_SSL */ + { + if( strcmp( _service, "https" ) == 0 ) + _port = 443; + else + _port = 80; + } +#endif /* HAVE_SSL */ } else { _host = strtok(p, "/"); _host.chop(" \t"); +#ifndef HAVE_SSL _port = 80; +#else /* HAVE_SSL */ + if( strcmp( _service, "https" ) == 0 ) + _port = 443; + else + _port = 80; +#endif /* HAVE_SSL */ } // @@ -317,7 +352,12 @@ // _url = _service; _url << "://" << _host; +#ifndef HAVE_SSL if (_port != 80) +#else /* HAVE_SSL */ + if ((strcmp(_service, "https") == 0 && _port != 443)|| + (strcmp(_service, "http" ) == 0 && _port != 80)) +#endif /* HAVE_SSL */ _url << ':' << _port; _url << _path; } @@ -446,7 +486,12 @@ _path.lowercase(); _url = _service; _url << "://" << _host; +#ifndef HAVE_SSL if (_port != 80) +#else /* HAVE_SSL */ + if ((strcmp(_service, "https") == 0 && _port != 443)|| + (strcmp(_service, "http" ) == 0 && _port != 80)) +#endif /* HAVE_SSL */ _url << ':' << _port; _url << _path; } @@ -496,7 +541,12 @@ if (_service.length() == 0 || _normal) return; +#ifndef HAVE_SSL if (strcmp(_service, "http") != 0) +#else /* HAVE_SSL */ + if (strcmp(_service, "http") != 0 && + strcmp(_service, "https") != 0) +#endif /* HAVE_SSL */ return; removeIndex(_path); @@ -553,7 +603,12 @@ _url << ":"; if (_host.length()) _url << "//" << _host; +#ifndef HAVE_SSL if (_port != 80 && strcmp(_service, "http") == 0) +#else /* HAVE_SSL */ + if ((strcmp(_service, "https") == 0 && _port != 443)|| + (strcmp(_service, "http" ) == 0 && _port != 80)) +#endif /* HAVE_SSL */ _url << ':' << _port; _url << _path; _normal = 1; diff -urd htdig-3.1.6/htlib/URL.h htdig-3.1.6-ssl/htlib/URL.h --- htdig-3.1.6/htlib/URL.h Fri Feb 1 00:47:17 2002 +++ htdig-3.1.6-ssl/htlib/URL.h Mon Oct 21 19:57:02 2002 @@ -59,18 +59,18 @@ void parse(char *url); - char *host() {return _host;} + char *host() {return _host.get();} void host(char *h) {_host = h;} int port() {return _port;} void port(int p) {_port = p;} - char *service() {return _service;} + char *service() {return _service.get();} void service(char *s) {_service = s;} - char *path() {return _path;} + char *path() {return _path.get();} void path(char *p); int hopcount() {return _hopcount;} void hopcount(int h) {_hopcount = h;} - char *get() {return _url;} + char *get() {return _url.get();} void dump(); void normalize(); void rewrite();