Added the 7 new top level domains, and reworded the nameorgs output.

Not sure this is better in all cases. parse(): Fixed a bug in the output; the dict is referred to in the code as `countries' not `country'. Also added no-case-fold for the string "U.S." since the Virgin Islands name no longer wraps those in parentheses. main(): Fixed the argument parsing to agree with the docstring, i.e. --outputdict instead of --output. In the module docstring: - updated my email address - we don't need to explain about Python 1.5 regexps <wink> We also don't need to wrap the import of re with a try/except. Other style fixes: - untabification - revert back to <> style everywhere (and consistently)
2025-12-10 19:10:59 +00:00 · 2002-06-07 15:48:52 +00:00 · 2002-06-07 15:48:52 +00:00 · aef8371acb
commit aef8371acb
parent 9e9d4f8ed8
1 changed files with 109 additions and 112 deletions
--- a/Tools/world/world
+++ b/Tools/world/world
@ -3,7 +3,7 @@
 """world -- Print mappings between country names and DNS country codes.
 Contact: Barry Warsaw
-Email:   bwarsaw@python.org
+Email:   barry@python.org
 Version: %(__version__)s
 This script will take a list of Internet addresses and print out where in the
@ -14,9 +14,9 @@ code found in the address.  Addresses can be in any of the following forms:
    host.domain.xx    -- any Internet host or network name
    somebody@where.xx -- an Internet email address
-If no match is found, the address is interpreted as a regular expression [*]
+If no match is found, the address is interpreted as a regular expression and a
-and a reverse lookup is attempted.  This script will search the country names
+reverse lookup is attempted.  This script will search the country names and
-and print a list of matching entries.  You can force reverse mappings with the
+print a list of matching entries.  You can force reverse mappings with the
 `-r' flag (see below).
 For example:
@ -34,10 +34,6 @@ For example:
        tz: Tanzania, United Republic of
        gb: United Kingdom
 [*] Note that regular expressions must conform to Python 1.5's re.py module
 syntax.  The comparison is done with the search() method.
 Country codes are maintained by the RIPE Network Coordination Centre,
 in coordination with the ISO 3166 Maintenance Agency at DIN Berlin.  The
 authoritative source of country code mappings is:
@ -69,7 +65,7 @@ Usage: %(PROGRAM)s [-d] [-p file] [-o] [-h] addr [addr ...]
        When used in conjunction with the `-p' option, output is in the form
        of a Python dictionary, and country names are normalized
        w.r.t. capitalization.  This makes it appropriate for cutting and
-        pasting back into this file.
+        pasting back into this file.  Output is always to standard out.
    --reverse
    -r
@ -82,18 +78,13 @@ Usage: %(PROGRAM)s [-d] [-p file] [-o] [-h] addr [addr ...]
    -h
    --help
        Print this message.
 """
 __version__ = '$Revision$'
 import sys
 import getopt
-try:
+import re
    import re
 except ImportError:
    print sys.argv[0], 'requires Python 1.5'
    sys.exit(1)
 PROGRAM = sys.argv[0]
@ -114,11 +105,7 @@ def resolve(rawaddr):
        return rawaddr
    addr = parts[-1]
    if nameorgs.has_key(addr):
-        if nameorgs[addr][0].lower() in 'aeiou':
+        print rawaddr, 'is in the', nameorgs[addr], 'top level domain'
            ana = 'an'
        else:
            ana = 'a'
 	print rawaddr, 'is from', ana, nameorgs[addr], 'organization'
        return None
    elif countries.has_key(addr):
        print rawaddr, 'originated from', countries[addr]
@ -161,7 +148,7 @@ def parse(file, normalize):
    scanning = 0
    if normalize:
-	print 'country = {'
+        print 'countries = {'
    while 1:
        line = fp.readline()
@ -186,16 +173,17 @@ def parse(file, normalize):
                    # XXX special cases
                    if w in ('AND', 'OF', 'OF)', 'name:', 'METROPOLITAN'):
                        words[i] = w.lower()
-		    elif w == 'THE' and i != 1:
+                    elif w == 'THE' and i <> 1:
                        words[i] = w.lower()
                    elif len(w) > 3 and w[1] == "'":
                        words[i] = w[0:3].upper() + w[3:].lower()
-		    elif w == '(U.S.)':
+                    elif w in ('(U.S.)', 'U.S.'):
                        pass
-		    elif w[0] == '(' and w != '(local':
+                    elif w[0] == '(' and w <> '(local':
                        words[i] = '(' + w[1:].capitalize()
-		    elif w.find('-') != -1:
+                    elif w.find('-') <> -1:
-			words[i] = '-'.join([s.capitalize() for s in w.split('-')])
+                        words[i] = '-'.join(
                            [s.capitalize() for s in w.split('-')])
                    else:
                        words[i] = w.capitalize()
                code = code.lower()
@ -234,7 +222,7 @@ def main():
            dump = 1
        elif opt in ('-p', '--parse'):
            parsefile = arg
-	elif opt in ('-o', '--output'):
+        elif opt in ('-o', '--outputdict'):
            normalize = 1
        elif opt in ('-r', '--reverse'):
            forcerev = 1
@ -267,14 +255,23 @@ def main():
 # The mappings
 nameorgs = {
    # New top level domains as described by ICANN
    # http://www.icann.org/tlds/
    "aero": "air-transport industry",
    "arpa": "Arpanet",
    "biz": "business",
    "com": "commercial",
    "coop": "cooperatives",
    "edu": "educational",
    "gov": "government",
    "info": "unrestricted `info'",
    "int": "international",
    "mil": "military",
    "museum": "museums",
    "name": "`name' (for registration by individuals)",
    "net": "networking",
    "org": "non-commercial",
-    "int": "international",
+    "pro": "professionals",
    # This isn't in the same class as those above, but is included here
    # because `uk' is the common practice country code for the United Kingdom.
    # AFAICT, the official `gb' code is routinely ignored!
@ -525,7 +522,7 @@ countries = {
    "ve": "Venezuela",
    "vn": "Viet Nam",
    "vg": "Virgin Islands, British",
-    "vi": "Virgin Islands, U.s.",
+    "vi": "Virgin Islands, U.S.",
    "wf": "Wallis and Futuna",
    "eh": "Western Sahara",
    "ye": "Yemen",