mirror of
https://github.com/python/cpython.git
synced 2025-07-08 03:45:36 +00:00
Fix a delimiter detection problem in sniffer. Sniffing "a|b|c\r\n" was
returning 'a' as the delimiter. It now returns '|', but not because I understood better what the code was supposed to do. Would someone that understands the idea behind _guess_delimiter() (see its doc string) look to see if my fallback choice is better than before or if it's just serendipity that I picked the proper delimiter?
This commit is contained in:
parent
0174dddc65
commit
39b29be8a6
2 changed files with 17 additions and 3 deletions
13
Lib/csv.py
13
Lib/csv.py
|
@ -152,10 +152,13 @@ class Sniffer:
|
|||
|
||||
quotechar, delimiter, skipinitialspace = \
|
||||
self._guess_quote_and_delimiter(sample, delimiters)
|
||||
if delimiter is None:
|
||||
if not delimiter:
|
||||
delimiter, skipinitialspace = self._guess_delimiter(sample,
|
||||
delimiters)
|
||||
|
||||
if not delimiter:
|
||||
raise Error, "Could not determine delimiter"
|
||||
|
||||
class dialect(Dialect):
|
||||
_name = "sniffed"
|
||||
lineterminator = '\r\n'
|
||||
|
@ -329,8 +332,12 @@ class Sniffer:
|
|||
data[0].count("%c " % d))
|
||||
return (d, skipinitialspace)
|
||||
|
||||
# finally, just return the first damn character in the list
|
||||
delim = delims.keys()[0]
|
||||
# nothing else indicates a preference, pick the character that
|
||||
# dominates(?)
|
||||
items = [(v,k) for (k,v) in delims.items()]
|
||||
items.sort()
|
||||
delim = items[-1][1]
|
||||
|
||||
skipinitialspace = (data[0].count(delim) ==
|
||||
data[0].count("%c " % delim))
|
||||
return (delim, skipinitialspace)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue