+2013-08-22 Tim Ruehsen <tim.ruehsen@gmx.de>
+
+ * wget.texi: added description for --https-only
+
2013-08-13 Hrvoje Niksic <hniksic@xemacs.org>
* wget.texi (Download Options): Fix misspelling.
choose the correct protocol version. Fortunately, such servers are
quite rare.
+@item --https-only
+When in recursive mode, only HTTPS links are followed.
+
@cindex SSL certificate, check
@item --no-check-certificate
Don't check the server certificate against the available certificate
+2013-08-22 Tim Ruehsen <tim.ruehsen@gmx.de>
+
+ * main.c: Add new option --https-only.
+ * options.h: Likewise.
+ * recur.c (download_child_p): add check for HTTPS.
+
2013-08-09 Tim Ruehsen <tim.ruehsen@gmx.de>
* gnutls.c (ssl_init): Prevent CA files from being loaded twice
{ "httppasswd", &opt.http_passwd, cmd_string }, /* deprecated */
{ "httppassword", &opt.http_passwd, cmd_string },
{ "httpproxy", &opt.http_proxy, cmd_string },
+#ifdef HAVE_SSL
+ { "httpsonly", &opt.https_only, cmd_boolean },
+#endif
{ "httpsproxy", &opt.https_proxy, cmd_string },
{ "httpuser", &opt.http_user, cmd_string },
{ "ignorecase", &opt.ignore_case, cmd_boolean },
{ "http-passwd", 0, OPT_VALUE, "httppassword", -1 }, /* deprecated */
{ "http-password", 0, OPT_VALUE, "httppassword", -1 },
{ "http-user", 0, OPT_VALUE, "httpuser", -1 },
+ { IF_SSL ("https-only"), 0, OPT_BOOLEAN, "httpsonly", -1 },
{ "ignore-case", 0, OPT_BOOLEAN, "ignorecase", -1 },
{ "ignore-length", 0, OPT_BOOLEAN, "ignorelength", -1 },
{ "ignore-tags", 0, OPT_VALUE, "ignoretags", -1 },
N_("\
--secure-protocol=PR choose secure protocol, one of auto, SSLv2,\n\
SSLv3, and TLSv1.\n"),
+ N_("\
+ --https-only only follow secure HTTPS links\n"),
N_("\
--no-check-certificate don't validate the server's certificate.\n"),
N_("\
char *ca_directory; /* CA directory (hash files) */
char *ca_cert; /* CA certificate file to use */
-
char *random_file; /* file with random data to seed the PRNG */
char *egd_file; /* file name of the egd daemon socket */
+ bool https_only; /* whether to follow HTTPS only */
#endif /* HAVE_SSL */
bool cookies; /* whether cookies are used. */
}
/* Several things to check for:
- 1. if scheme is not http, and we don't load it
- 2. check for relative links (if relative_only is set)
- 3. check for domain
- 4. check for no-parent
- 5. check for excludes && includes
- 6. check for suffix
- 7. check for same host (if spanhost is unset), with possible
+ 1. if scheme is not https and https_only requested
+ 2. if scheme is not http, and we don't load it
+ 3. check for relative links (if relative_only is set)
+ 4. check for domain
+ 5. check for no-parent
+ 6. check for excludes && includes
+ 7. check for suffix
+ 8. check for same host (if spanhost is unset), with possible
gethostbyname baggage
- 8. check for robots.txt
+ 9. check for robots.txt
Addendum: If the URL is FTP, and it is to be loaded, only the
domain and suffix settings are "stronger".
More time- and memory- consuming tests should be put later on
the list. */
+ if (opt.https_only && u->scheme != SCHEME_HTTPS)
+ {
+ DEBUGP (("Not following non-HTTPS links.\n"));
+ goto out;
+ }
+
/* Determine whether URL under consideration has a HTTP-like scheme. */
u_scheme_like_http = schemes_are_similar_p (u->scheme, SCHEME_HTTP);
+2013-08-22 Tim Ruehsen <tim.ruehsen@gmx.de>
+
+ * Makefile.am (EXTRA_DIST): Add Test--httpsonly-r.px.
+ * run-px (tests): Likewise.
+ * Test--httpsonly-r.px: New file.
+
2013-03-12 Darshit Shah <darnir@gmail.com>
* Makefile.am (EXTRA_DIST): Add Test--post-file.px.
Test--spider-r--no-content-disposition.px \
Test--spider-r--no-content-disposition-trivial.px \
Test--spider-r.px \
+ Test--httpsonly-r.px \
run-px certs
check_PROGRAMS = unit-tests
--- /dev/null
+#!/usr/bin/env perl
+
+use strict;
+use warnings;
+
+use HTTPTest;
+
+
+###############################################################################
+
+my $mainpage = <<EOF;
+<html>
+<head>
+ <title>Main Page</title>
+</head>
+<body>
+ <p>
+ Some text and a link to a <a href="http://localhost:{{port}}/secondpage.html">second page</a>.
+ </p>
+</body>
+</html>
+EOF
+
+my $secondpage = <<EOF;
+<html>
+<head>
+ <title>Second Page</title>
+</head>
+<body>
+ <p>
+ Anything.
+ </p>
+</body>
+</html>
+EOF
+
+# code, msg, headers, content
+my %urls = (
+ '/index.html' => {
+ code => "200",
+ msg => "Dontcare",
+ headers => {
+ "Content-type" => "text/html",
+ },
+ content => $mainpage,
+ },
+ '/secondpage.html' => {
+ code => "200",
+ msg => "Dontcare",
+ headers => {
+ "Content-type" => "text/html",
+ },
+ content => $secondpage,
+ }
+);
+
+my $cmdline = $WgetTest::WGETPATH . " --https-only -r -nH http://localhost:{{port}}/";
+
+my $expected_error_code = 0;
+
+my %expected_downloaded_files = (
+ 'index.html' => {
+ content => $mainpage,
+ },
+);
+
+###############################################################################
+
+my $the_test = HTTPTest->new (name => "Test--httpsonly-r",
+ input => \%urls,
+ cmdline => $cmdline,
+ errcode => $expected_error_code,
+ output => \%expected_downloaded_files);
+print $expected_error_code."\n";
+
+exit $the_test->run();
+
+# vim: et ts=4 sw=4
+
'Test--spider-r--no-content-disposition.px',
'Test--spider-r--no-content-disposition-trivial.px',
'Test--spider-r.px',
+ 'Test--httpsonly-r.px',
);
foreach my $var (qw(SYSTEM_WGETRC WGETRC)) {