mirror of
https://github.com/python/cpython.git
synced 2025-09-27 18:59:43 +00:00
Set proper User-agent header (Python-webchecker/<version>).
When -x is combined with -q, still do the checking, but don't print the error in this phase -- they are reported by report_errors().
This commit is contained in:
parent
2739cd74b3
commit
c59a5d449f
1 changed files with 21 additions and 14 deletions
|
@ -73,8 +73,7 @@ hyperlinks. It does honor the <BASE> tag.
|
||||||
- Checking external links is not done by default; use -x to enable
|
- Checking external links is not done by default; use -x to enable
|
||||||
this feature. This is done because checking external links usually
|
this feature. This is done because checking external links usually
|
||||||
takes a lot of time. When enabled, this check is executed during the
|
takes a lot of time. When enabled, this check is executed during the
|
||||||
report generation phase (so -x is ignored when -q is specified). Even
|
report generation phase (even when the report is silent).
|
||||||
when -x is enabled, only ``http:'' URLs are checked.
|
|
||||||
|
|
||||||
|
|
||||||
Usage: webchecker.py [option] ... [rooturl] ...
|
Usage: webchecker.py [option] ... [rooturl] ...
|
||||||
|
@ -96,7 +95,7 @@ rooturl -- URL to start checking
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
__version__ = "0.1"
|
__version__ = "0.2"
|
||||||
|
|
||||||
|
|
||||||
import sys
|
import sys
|
||||||
|
@ -283,26 +282,29 @@ class Checker:
|
||||||
print "Report (%d to do, %d done, %d external, %d bad)" % (
|
print "Report (%d to do, %d done, %d external, %d bad)" % (
|
||||||
len(self.todo), len(self.done),
|
len(self.todo), len(self.done),
|
||||||
len(self.ext), len(self.bad))
|
len(self.ext), len(self.bad))
|
||||||
if verbose > 0:
|
if verbose > 0 or checkext:
|
||||||
self.report_extrefs(checkext)
|
self.report_extrefs(checkext)
|
||||||
# Report errors last because the output may get truncated
|
# Report errors last because the output may get truncated
|
||||||
self.report_errors()
|
self.report_errors()
|
||||||
|
|
||||||
def report_extrefs(self, checkext=0):
|
def report_extrefs(self, checkext=0):
|
||||||
if not self.ext:
|
if not self.ext:
|
||||||
print
|
if verbose > 0:
|
||||||
print "No external URLs"
|
print
|
||||||
|
print "No external URLs"
|
||||||
return
|
return
|
||||||
print
|
if verbose > 0:
|
||||||
if checkext:
|
print
|
||||||
print "External URLs (checking validity):"
|
if checkext:
|
||||||
else:
|
print "External URLs (checking validity):"
|
||||||
print "External URLs (not checked):"
|
else:
|
||||||
print
|
print "External URLs (not checked):"
|
||||||
|
print
|
||||||
urls = self.ext.keys()
|
urls = self.ext.keys()
|
||||||
urls.sort()
|
urls.sort()
|
||||||
for url in urls:
|
for url in urls:
|
||||||
show("HREF ", url, " from", self.ext[url])
|
if verbose > 0:
|
||||||
|
show("HREF ", url, " from", self.ext[url])
|
||||||
if not checkext:
|
if not checkext:
|
||||||
continue
|
continue
|
||||||
if url[:7] == 'mailto:':
|
if url[:7] == 'mailto:':
|
||||||
|
@ -315,7 +317,7 @@ class Checker:
|
||||||
if verbose > 3: print "OK"
|
if verbose > 3: print "OK"
|
||||||
except IOError, msg:
|
except IOError, msg:
|
||||||
msg = sanitize(msg)
|
msg = sanitize(msg)
|
||||||
print "Error", msg
|
if verbose > 0: print "Error", msg
|
||||||
self.bad[url] = msg
|
self.bad[url] = msg
|
||||||
|
|
||||||
def report_errors(self):
|
def report_errors(self):
|
||||||
|
@ -488,6 +490,11 @@ class MyURLopener(urllib.FancyURLopener):
|
||||||
|
|
||||||
http_error_default = urllib.URLopener.http_error_default
|
http_error_default = urllib.URLopener.http_error_default
|
||||||
|
|
||||||
|
def __init__(*args):
|
||||||
|
self = args[0]
|
||||||
|
apply(urllib.FancyURLopener.__init__, args)
|
||||||
|
self.addheaders = [('User-agent', 'Python-webchecker/%s' % __version__)]
|
||||||
|
|
||||||
def open_file(self, url):
|
def open_file(self, url):
|
||||||
path = urllib.url2pathname(urllib.unquote(url))
|
path = urllib.url2pathname(urllib.unquote(url))
|
||||||
if path[-1] != os.sep:
|
if path[-1] != os.sep:
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue