mirror of
https://github.com/python/cpython.git
synced 2025-11-11 14:44:57 +00:00
Some refinements of the external-link checking code: insert the errors
in the 'bad' dictionary (sanitize them so they are picklable; the sanitation code is now a subroutine); don't check mailto: URLs; omit colon in Error message.
This commit is contained in:
parent
de66268588
commit
2739cd74b3
1 changed files with 22 additions and 9 deletions
|
|
@ -305,13 +305,18 @@ class Checker:
|
||||||
show("HREF ", url, " from", self.ext[url])
|
show("HREF ", url, " from", self.ext[url])
|
||||||
if not checkext:
|
if not checkext:
|
||||||
continue
|
continue
|
||||||
|
if url[:7] == 'mailto:':
|
||||||
|
if verbose > 2: print "Not checking", url
|
||||||
|
continue
|
||||||
if verbose > 2: print "Checking", url, "..."
|
if verbose > 2: print "Checking", url, "..."
|
||||||
try:
|
try:
|
||||||
f = self.urlopener.open(url)
|
f = self.urlopener.open(url)
|
||||||
f.close()
|
f.close()
|
||||||
if verbose > 3: print "OK"
|
if verbose > 3: print "OK"
|
||||||
except IOError, msg:
|
except IOError, msg:
|
||||||
print "Error:", msg
|
msg = sanitize(msg)
|
||||||
|
print "Error", msg
|
||||||
|
self.bad[url] = msg
|
||||||
|
|
||||||
def report_errors(self):
|
def report_errors(self):
|
||||||
if not self.bad:
|
if not self.bad:
|
||||||
|
|
@ -327,7 +332,10 @@ class Checker:
|
||||||
try:
|
try:
|
||||||
origins = self.done[url]
|
origins = self.done[url]
|
||||||
except KeyError:
|
except KeyError:
|
||||||
|
try:
|
||||||
origins = self.todo[url]
|
origins = self.todo[url]
|
||||||
|
except KeyError:
|
||||||
|
origins = self.ext[url]
|
||||||
for source, rawlink in origins:
|
for source, rawlink in origins:
|
||||||
triple = url, rawlink, self.bad[url]
|
triple = url, rawlink, self.bad[url]
|
||||||
try:
|
try:
|
||||||
|
|
@ -406,13 +414,7 @@ class Checker:
|
||||||
try:
|
try:
|
||||||
f = self.urlopener.open(url)
|
f = self.urlopener.open(url)
|
||||||
except IOError, msg:
|
except IOError, msg:
|
||||||
if (type(msg) == TupleType and
|
msg = sanitize(msg)
|
||||||
len(msg) >= 4 and
|
|
||||||
msg[0] == 'http error' and
|
|
||||||
type(msg[3]) == InstanceType):
|
|
||||||
# Remove the Message instance -- it may contain
|
|
||||||
# a file object which prevents pickling.
|
|
||||||
msg = msg[:3] + msg[4:]
|
|
||||||
if verbose > 0:
|
if verbose > 0:
|
||||||
print "Error ", msg
|
print "Error ", msg
|
||||||
if verbose > 0:
|
if verbose > 0:
|
||||||
|
|
@ -549,5 +551,16 @@ def show(p1, link, p2, origins):
|
||||||
print
|
print
|
||||||
|
|
||||||
|
|
||||||
|
def sanitize(msg):
|
||||||
|
if (type(msg) == TupleType and
|
||||||
|
len(msg) >= 4 and
|
||||||
|
msg[0] == 'http error' and
|
||||||
|
type(msg[3]) == InstanceType):
|
||||||
|
# Remove the Message instance -- it may contain
|
||||||
|
# a file object which prevents pickling.
|
||||||
|
msg = msg[:3] + msg[4:]
|
||||||
|
return msg
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
main()
|
main()
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue