Fix a delimiter detection problem in sniffer. Sniffing "a|b|c\r\n" was

returning 'a' as the delimiter.  It now returns '|', but not because I
understood better what the code was supposed to do.  Would someone that
understands the idea behind _guess_delimiter() (see its doc string) look to
see if my fallback choice is better than before or if it's just serendipity
that I picked the proper delimiter?
This commit is contained in:
Skip Montanaro 2005-12-30 05:09:48 +00:00
parent 0174dddc65
commit 39b29be8a6
2 changed files with 17 additions and 3 deletions

View file

@ -152,10 +152,13 @@ class Sniffer:
quotechar, delimiter, skipinitialspace = \
self._guess_quote_and_delimiter(sample, delimiters)
if delimiter is None:
if not delimiter:
delimiter, skipinitialspace = self._guess_delimiter(sample,
delimiters)
if not delimiter:
raise Error, "Could not determine delimiter"
class dialect(Dialect):
_name = "sniffed"
lineterminator = '\r\n'
@ -329,8 +332,12 @@ class Sniffer:
data[0].count("%c " % d))
return (d, skipinitialspace)
# finally, just return the first damn character in the list
delim = delims.keys()[0]
# nothing else indicates a preference, pick the character that
# dominates(?)
items = [(v,k) for (k,v) in delims.items()]
items.sort()
delim = items[-1][1]
skipinitialspace = (data[0].count(delim) ==
data[0].count("%c " % delim))
return (delim, skipinitialspace)