mirror of
https://github.com/python/cpython.git
synced 2025-08-04 08:59:19 +00:00
Fix a delimiter detection problem in sniffer. Sniffing "a|b|c\r\n" was
returning 'a' as the delimiter. It now returns '|', but not because I understood better what the code was supposed to do. Would someone that understands the idea behind _guess_delimiter() (see its doc string) look to see if my fallback choice is better than before or if it's just serendipity that I picked the proper delimiter?
This commit is contained in:
parent
0174dddc65
commit
39b29be8a6
2 changed files with 17 additions and 3 deletions
13
Lib/csv.py
13
Lib/csv.py
|
@ -152,10 +152,13 @@ class Sniffer:
|
||||||
|
|
||||||
quotechar, delimiter, skipinitialspace = \
|
quotechar, delimiter, skipinitialspace = \
|
||||||
self._guess_quote_and_delimiter(sample, delimiters)
|
self._guess_quote_and_delimiter(sample, delimiters)
|
||||||
if delimiter is None:
|
if not delimiter:
|
||||||
delimiter, skipinitialspace = self._guess_delimiter(sample,
|
delimiter, skipinitialspace = self._guess_delimiter(sample,
|
||||||
delimiters)
|
delimiters)
|
||||||
|
|
||||||
|
if not delimiter:
|
||||||
|
raise Error, "Could not determine delimiter"
|
||||||
|
|
||||||
class dialect(Dialect):
|
class dialect(Dialect):
|
||||||
_name = "sniffed"
|
_name = "sniffed"
|
||||||
lineterminator = '\r\n'
|
lineterminator = '\r\n'
|
||||||
|
@ -329,8 +332,12 @@ class Sniffer:
|
||||||
data[0].count("%c " % d))
|
data[0].count("%c " % d))
|
||||||
return (d, skipinitialspace)
|
return (d, skipinitialspace)
|
||||||
|
|
||||||
# finally, just return the first damn character in the list
|
# nothing else indicates a preference, pick the character that
|
||||||
delim = delims.keys()[0]
|
# dominates(?)
|
||||||
|
items = [(v,k) for (k,v) in delims.items()]
|
||||||
|
items.sort()
|
||||||
|
delim = items[-1][1]
|
||||||
|
|
||||||
skipinitialspace = (data[0].count(delim) ==
|
skipinitialspace = (data[0].count(delim) ==
|
||||||
data[0].count("%c " % delim))
|
data[0].count("%c " % delim))
|
||||||
return (delim, skipinitialspace)
|
return (delim, skipinitialspace)
|
||||||
|
|
|
@ -852,6 +852,8 @@ Stonecutters Seafood and Chop House, Lemont, IL, 12/19/02, Week Back
|
||||||
'''
|
'''
|
||||||
|
|
||||||
sample5 = "aaa\tbbb\r\nAAA\t\r\nBBB\t\r\n"
|
sample5 = "aaa\tbbb\r\nAAA\t\r\nBBB\t\r\n"
|
||||||
|
sample6 = "a|b|c\r\nd|e|f\r\n"
|
||||||
|
sample7 = "'a'|'b'|'c'\r\n'd'|e|f\r\n"
|
||||||
|
|
||||||
def test_has_header(self):
|
def test_has_header(self):
|
||||||
sniffer = csv.Sniffer()
|
sniffer = csv.Sniffer()
|
||||||
|
@ -882,6 +884,11 @@ Stonecutters Seafood and Chop House, Lemont, IL, 12/19/02, Week Back
|
||||||
self.assertEqual(dialect.delimiter, ";")
|
self.assertEqual(dialect.delimiter, ";")
|
||||||
dialect = sniffer.sniff(self.sample5)
|
dialect = sniffer.sniff(self.sample5)
|
||||||
self.assertEqual(dialect.delimiter, "\t")
|
self.assertEqual(dialect.delimiter, "\t")
|
||||||
|
dialect = sniffer.sniff(self.sample6)
|
||||||
|
self.assertEqual(dialect.delimiter, "|")
|
||||||
|
dialect = sniffer.sniff(self.sample7)
|
||||||
|
self.assertEqual(dialect.delimiter, "|")
|
||||||
|
self.assertEqual(dialect.quotechar, "'")
|
||||||
|
|
||||||
if not hasattr(sys, "gettotalrefcount"):
|
if not hasattr(sys, "gettotalrefcount"):
|
||||||
if test_support.verbose: print "*** skipping leakage tests ***"
|
if test_support.verbose: print "*** skipping leakage tests ***"
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue