mirror of
				https://github.com/python/cpython.git
				synced 2025-10-26 00:08:32 +00:00 
			
		
		
		
	 49fd7fa443
			
		
	
	
		49fd7fa443
		
	
	
	
	
		
			
			number of tests, all because of the codecs/_multibytecodecs issue described here (it's not a Py3K issue, just something Py3K discovers): http://mail.python.org/pipermail/python-dev/2006-April/064051.html Hye-Shik Chang promised to look for a fix, so no need to fix it here. The tests that are expected to break are: test_codecencodings_cn test_codecencodings_hk test_codecencodings_jp test_codecencodings_kr test_codecencodings_tw test_codecs test_multibytecodec This merge fixes an actual test failure (test_weakref) in this branch, though, so I believe merging is the right thing to do anyway.
		
			
				
	
	
		
			131 lines
		
	
	
	
		
			3.8 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			131 lines
		
	
	
	
		
			3.8 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| #! /usr/bin/env python
 | |
| 
 | |
| """Show file statistics by extension."""
 | |
| 
 | |
| import os
 | |
| import sys
 | |
| 
 | |
| class Stats:
 | |
| 
 | |
|     def __init__(self):
 | |
|         self.stats = {}
 | |
| 
 | |
|     def statargs(self, args):
 | |
|         for arg in args:
 | |
|             if os.path.isdir(arg):
 | |
|                 self.statdir(arg)
 | |
|             elif os.path.isfile(arg):
 | |
|                 self.statfile(arg)
 | |
|             else:
 | |
|                 sys.stderr.write("Can't find %s\n" % arg)
 | |
|                 self.addstats("<???>", "unknown", 1)
 | |
| 
 | |
|     def statdir(self, dir):
 | |
|         self.addstats("<dir>", "dirs", 1)
 | |
|         try:
 | |
|             names = os.listdir(dir)
 | |
|         except os.error, err:
 | |
|             sys.stderr.write("Can't list %s: %s\n" % (dir, err))
 | |
|             self.addstats("<dir>", "unlistable", 1)
 | |
|             return
 | |
|         names.sort()
 | |
|         for name in names:
 | |
|             if name.startswith(".#"):
 | |
|                 continue # Skip CVS temp files
 | |
|             if name.endswith("~"):
 | |
|                 continue# Skip Emacs backup files
 | |
|             full = os.path.join(dir, name)
 | |
|             if os.path.islink(full):
 | |
|                 self.addstats("<lnk>", "links", 1)
 | |
|             elif os.path.isdir(full):
 | |
|                 self.statdir(full)
 | |
|             else:
 | |
|                 self.statfile(full)
 | |
| 
 | |
|     def statfile(self, filename):
 | |
|         head, ext = os.path.splitext(filename)
 | |
|         head, base = os.path.split(filename)
 | |
|         if ext == base:
 | |
|             ext = "" # E.g. .cvsignore is deemed not to have an extension
 | |
|         ext = os.path.normcase(ext)
 | |
|         if not ext:
 | |
|             ext = "<none>"
 | |
|         self.addstats(ext, "files", 1)
 | |
|         try:
 | |
|             f = open(filename, "rb")
 | |
|         except IOError, err:
 | |
|             sys.stderr.write("Can't open %s: %s\n" % (filename, err))
 | |
|             self.addstats(ext, "unopenable", 1)
 | |
|             return
 | |
|         data = f.read()
 | |
|         f.close()
 | |
|         self.addstats(ext, "bytes", len(data))
 | |
|         if '\0' in data:
 | |
|             self.addstats(ext, "binary", 1)
 | |
|             return
 | |
|         if not data:
 | |
|             self.addstats(ext, "empty", 1)
 | |
|         #self.addstats(ext, "chars", len(data))
 | |
|         lines = data.splitlines()
 | |
|         self.addstats(ext, "lines", len(lines))
 | |
|         del lines
 | |
|         words = data.split()
 | |
|         self.addstats(ext, "words", len(words))
 | |
| 
 | |
|     def addstats(self, ext, key, n):
 | |
|         d = self.stats.setdefault(ext, {})
 | |
|         d[key] = d.get(key, 0) + n
 | |
| 
 | |
|     def report(self):
 | |
|         exts = self.stats.keys()
 | |
|         exts.sort()
 | |
|         # Get the column keys
 | |
|         columns = {}
 | |
|         for ext in exts:
 | |
|             columns.update(self.stats[ext])
 | |
|         cols = columns.keys()
 | |
|         cols.sort()
 | |
|         colwidth = {}
 | |
|         colwidth["ext"] = max([len(ext) for ext in exts])
 | |
|         minwidth = 6
 | |
|         self.stats["TOTAL"] = {}
 | |
|         for col in cols:
 | |
|             total = 0
 | |
|             cw = max(minwidth, len(col))
 | |
|             for ext in exts:
 | |
|                 value = self.stats[ext].get(col)
 | |
|                 if value is None:
 | |
|                     w = 0
 | |
|                 else:
 | |
|                     w = len("%d" % value)
 | |
|                     total += value
 | |
|                 cw = max(cw, w)
 | |
|             cw = max(cw, len(str(total)))
 | |
|             colwidth[col] = cw
 | |
|             self.stats["TOTAL"][col] = total
 | |
|         exts.append("TOTAL")
 | |
|         for ext in exts:
 | |
|             self.stats[ext]["ext"] = ext
 | |
|         cols.insert(0, "ext")
 | |
|         def printheader():
 | |
|             for col in cols:
 | |
|                 print "%*s" % (colwidth[col], col),
 | |
|             print
 | |
|         printheader()
 | |
|         for ext in exts:
 | |
|             for col in cols:
 | |
|                 value = self.stats[ext].get(col, "")
 | |
|                 print "%*s" % (colwidth[col], value),
 | |
|             print
 | |
|         printheader() # Another header at the bottom
 | |
| 
 | |
| def main():
 | |
|     args = sys.argv[1:]
 | |
|     if not args:
 | |
|         args = [os.curdir]
 | |
|     s = Stats()
 | |
|     s.statargs(args)
 | |
|     s.report()
 | |
| 
 | |
| if __name__ == "__main__":
 | |
|     main()
 |