Introduce location tracking in the tokenizer and parser (#710)

* Add locations * Add PartialEq * Add PartialEq * Add some tests * Fix rebase conflicts * Fix clippy Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>
2025-08-30 18:57:21 +00:00 · 2022-12-05 11:47:42 -08:00 · 2022-12-05 11:47:42 -08:00 · 813f4a2eff
commit 813f4a2eff
parent 512a159f08
4 changed files with 404 additions and 174 deletions
--- a/.gitignore
+++ b/.gitignore
@ -13,3 +13,5 @@ Cargo.lock
 # IDEs
 .idea
 .vscode
+
+*.swp
--- a/src/dialect/postgresql.rs
+++ b/src/dialect/postgresql.rs
@ -52,7 +52,7 @@ pub fn parse_comment(parser: &mut Parser) -> Result<Statement, ParserError> {
    parser.expect_keyword(Keyword::ON)?;
    let token = parser.next_token();

-    let (object_type, object_name) = match token {
+    let (object_type, object_name) = match token.token {
        Token::Word(w) if w.keyword == Keyword::COLUMN => {
            let object_name = parser.parse_object_name()?;
            (CommentObject::Column, object_name)
--- a/src/parser.rs
+++ b/src/parser.rs
@ -105,7 +105,7 @@ impl fmt::Display for ParserError {
 impl std::error::Error for ParserError {}

 pub struct Parser<'a> {
-    tokens: Vec<Token>,
+    tokens: Vec<TokenWithLocation>,
    /// The index of the first unprocessed token in `self.tokens`
    index: usize,
    dialect: &'a dyn Dialect,
@ -113,7 +113,26 @@ pub struct Parser<'a> {

 impl<'a> Parser<'a> {
    /// Parse the specified tokens
+    /// To avoid breaking backwards compatibility, this function accepts
+    /// bare tokens.
    pub fn new(tokens: Vec<Token>, dialect: &'a dyn Dialect) -> Self {
+        Parser::new_without_locations(tokens, dialect)
+    }
+
+    pub fn new_without_locations(tokens: Vec<Token>, dialect: &'a dyn Dialect) -> Self {
+        // Put in dummy locations
+        let tokens_with_locations: Vec<TokenWithLocation> = tokens
+            .into_iter()
+            .map(|token| TokenWithLocation {
+                token,
+                location: Location { line: 0, column: 0 },
+            })
+            .collect();
+        Parser::new_with_locations(tokens_with_locations, dialect)
+    }
+
+    /// Parse the specified tokens
+    pub fn new_with_locations(tokens: Vec<TokenWithLocation>, dialect: &'a dyn Dialect) -> Self {
        Parser {
            tokens,
            index: 0,
@ -157,7 +176,8 @@ impl<'a> Parser<'a> {
            return statement;
        }

-        match self.next_token() {
+        let next_token = self.next_token();
+        match &next_token.token {
            Token::Word(w) => match w.keyword {
                Keyword::KILL => Ok(self.parse_kill()?),
                Keyword::DESCRIBE => Ok(self.parse_explain(true)?),
@ -202,13 +222,13 @@ impl<'a> Parser<'a> {
                Keyword::EXECUTE => Ok(self.parse_execute()?),
                Keyword::PREPARE => Ok(self.parse_prepare()?),
                Keyword::MERGE => Ok(self.parse_merge()?),
-                _ => self.expected("an SQL statement", Token::Word(w)),
+                _ => self.expected("an SQL statement", next_token),
            },
            Token::LParen => {
                self.prev_token();
                Ok(Statement::Query(Box::new(self.parse_query()?)))
            }
-            unexpected => self.expected("an SQL statement", unexpected),
+            _ => self.expected("an SQL statement", next_token),
        }
    }

@ -314,18 +334,20 @@ impl<'a> Parser<'a> {
    pub fn parse_wildcard_expr(&mut self) -> Result<WildcardExpr, ParserError> {
        let index = self.index;

-        match self.next_token() {
-            Token::Word(w) if self.peek_token() == Token::Period => {
+        let next_token = self.next_token();
+        match next_token.token {
+            Token::Word(w) if self.peek_token().token == Token::Period => {
                let mut id_parts: Vec<Ident> = vec![w.to_ident()];

                while self.consume_token(&Token::Period) {
-                    match self.next_token() {
+                    let next_token = self.next_token();
+                    match next_token.token {
                        Token::Word(w) => id_parts.push(w.to_ident()),
                        Token::Mul => {
                            return Ok(WildcardExpr::QualifiedWildcard(ObjectName(id_parts)));
                        }
-                        unexpected => {
-                            return self.expected("an identifier or a '*' after '.'", unexpected);
+                        _ => {
+                            return self.expected("an identifier or a '*' after '.'", next_token);
                        }
                    }
                }
@ -385,7 +407,7 @@ impl<'a> Parser<'a> {
    pub fn get_next_interval_precedence(&self) -> Result<u8, ParserError> {
        let token = self.peek_token();

-        match token {
+        match token.token {
            Token::Word(w) if w.keyword == Keyword::AND => Ok(0),
            Token::Word(w) if w.keyword == Keyword::OR => Ok(0),
            Token::Word(w) if w.keyword == Keyword::XOR => Ok(0),
@ -450,7 +472,8 @@ impl<'a> Parser<'a> {
            }
        }));

-        let expr = match self.next_token() {
+        let next_token = self.next_token();
+        let expr = match next_token.token {
            Token::Word(w) => match w.keyword {
                Keyword::TRUE | Keyword::FALSE | Keyword::NULL => {
                    self.prev_token();
@ -510,15 +533,16 @@ impl<'a> Parser<'a> {
                }
                // Here `w` is a word, check if it's a part of a multi-part
                // identifier, a function call, or a simple identifier:
-                _ => match self.peek_token() {
+                _ => match self.peek_token().token {
                    Token::LParen | Token::Period => {
                        let mut id_parts: Vec<Ident> = vec![w.to_ident()];
                        while self.consume_token(&Token::Period) {
-                            match self.next_token() {
+                            let next_token = self.next_token();
+                            match next_token.token {
                                Token::Word(w) => id_parts.push(w.to_ident()),
-                                unexpected => {
+                                _ => {
                                    return self
-                                        .expected("an identifier or a '*' after '.'", unexpected);
+                                        .expected("an identifier or a '*' after '.'", next_token);
                                }
                            }
                        }
@ -598,7 +622,7 @@ impl<'a> Parser<'a> {
                    Ok(expr)
                } else {
                    let tok = self.next_token();
-                    let key = match tok {
+                    let key = match tok.token {
                        Token::Word(word) => word.to_ident(),
                        _ => return parser_err!(format!("Expected identifier, found: {}", tok)),
                    };
@ -612,7 +636,7 @@ impl<'a> Parser<'a> {
                self.prev_token();
                Ok(Expr::Value(self.parse_value()?))
            }
-            unexpected => self.expected("an expression:", unexpected),
+            _ => self.expected("an expression:", next_token),
        }?;

        if self.parse_keyword(Keyword::COLLATE) {
@ -684,14 +708,15 @@ impl<'a> Parser<'a> {
    }

    pub fn parse_window_frame_units(&mut self) -> Result<WindowFrameUnits, ParserError> {
-        match self.next_token() {
+        let next_token = self.next_token();
+        match &next_token.token {
            Token::Word(w) => match w.keyword {
                Keyword::ROWS => Ok(WindowFrameUnits::Rows),
                Keyword::RANGE => Ok(WindowFrameUnits::Range),
                Keyword::GROUPS => Ok(WindowFrameUnits::Groups),
-                _ => self.expected("ROWS, RANGE, GROUPS", Token::Word(w))?,
+                _ => self.expected("ROWS, RANGE, GROUPS", next_token)?,
            },
-            unexpected => self.expected("ROWS, RANGE, GROUPS", unexpected),
+            _ => self.expected("ROWS, RANGE, GROUPS", next_token),
        }
    }

@ -720,7 +745,7 @@ impl<'a> Parser<'a> {
            let rows = if self.parse_keyword(Keyword::UNBOUNDED) {
                None
            } else {
-                Some(Box::new(match self.peek_token() {
+                Some(Box::new(match self.peek_token().token {
                    Token::SingleQuotedString(_) => self.parse_interval()?,
                    _ => self.parse_expr()?,
                }))
@ -980,7 +1005,7 @@ impl<'a> Parser<'a> {
    pub fn parse_trim_expr(&mut self) -> Result<Expr, ParserError> {
        self.expect_token(&Token::LParen)?;
        let mut trim_where = None;
-        if let Token::Word(word) = self.peek_token() {
+        if let Token::Word(word) = self.peek_token().token {
            if [Keyword::BOTH, Keyword::LEADING, Keyword::TRAILING]
                .iter()
                .any(|d| word.keyword == *d)
@ -1009,21 +1034,22 @@ impl<'a> Parser<'a> {
    }

    pub fn parse_trim_where(&mut self) -> Result<TrimWhereField, ParserError> {
-        match self.next_token() {
+        let next_token = self.next_token();
+        match &next_token.token {
            Token::Word(w) => match w.keyword {
                Keyword::BOTH => Ok(TrimWhereField::Both),
                Keyword::LEADING => Ok(TrimWhereField::Leading),
                Keyword::TRAILING => Ok(TrimWhereField::Trailing),
-                _ => self.expected("trim_where field", Token::Word(w))?,
+                _ => self.expected("trim_where field", next_token)?,
            },
-            unexpected => self.expected("trim_where field", unexpected),
+            _ => self.expected("trim_where field", next_token),
        }
    }

    /// Parses an array expression `[ex1, ex2, ..]`
    /// if `named` is `true`, came from an expression like  `ARRAY[ex1, ex2]`
    pub fn parse_array_expr(&mut self, named: bool) -> Result<Expr, ParserError> {
-        if self.peek_token() == Token::RBracket {
+        if self.peek_token().token == Token::RBracket {
            let _ = self.next_token();
            Ok(Expr::Array(Array {
                elem: vec![],
@ -1060,7 +1086,7 @@ impl<'a> Parser<'a> {
                Some(ListAggOnOverflow::Error)
            } else {
                self.expect_keyword(Keyword::TRUNCATE)?;
-                let filler = match self.peek_token() {
+                let filler = match self.peek_token().token {
                    Token::Word(w)
                        if w.keyword == Keyword::WITH || w.keyword == Keyword::WITHOUT =>
                    {
@ -1070,9 +1096,10 @@ impl<'a> Parser<'a> {
                    | Token::EscapedStringLiteral(_)
                    | Token::NationalStringLiteral(_)
                    | Token::HexStringLiteral(_) => Some(Box::new(self.parse_expr()?)),
-                    unexpected => {
-                        self.expected("either filler, WITH, or WITHOUT in LISTAGG", unexpected)?
-                    }
+                    _ => self.expected(
+                        "either filler, WITH, or WITHOUT in LISTAGG",
+                        self.peek_token(),
+                    )?,
                };
                let with_count = self.parse_keyword(Keyword::WITH);
                if !with_count && !self.parse_keyword(Keyword::WITHOUT) {
@ -1158,7 +1185,8 @@ impl<'a> Parser<'a> {
    // EXTRACT supports a wider set of date/time fields than interval qualifiers,
    // so this function may need to be split in two.
    pub fn parse_date_time_field(&mut self) -> Result<DateTimeField, ParserError> {
-        match self.next_token() {
+        let next_token = self.next_token();
+        match &next_token.token {
            Token::Word(w) => match w.keyword {
                Keyword::YEAR => Ok(DateTimeField::Year),
                Keyword::MONTH => Ok(DateTimeField::Month),
@ -1186,14 +1214,14 @@ impl<'a> Parser<'a> {
                Keyword::TIMEZONE => Ok(DateTimeField::Timezone),
                Keyword::TIMEZONE_HOUR => Ok(DateTimeField::TimezoneHour),
                Keyword::TIMEZONE_MINUTE => Ok(DateTimeField::TimezoneMinute),
-                _ => self.expected("date/time field", Token::Word(w))?,
+                _ => self.expected("date/time field", next_token),
            },
-            unexpected => self.expected("date/time field", unexpected),
+            _ => self.expected("date/time field", next_token),
        }
    }

    pub fn parse_not(&mut self) -> Result<Expr, ParserError> {
-        match self.peek_token() {
+        match self.peek_token().token {
            Token::Word(w) => match w.keyword {
                Keyword::EXISTS => {
                    let negated = true;
@ -1291,7 +1319,7 @@ impl<'a> Parser<'a> {
        //
        // Note that PostgreSQL allows omitting the qualifier, so we provide
        // this more general implementation.
-        let leading_field = match self.peek_token() {
+        let leading_field = match self.peek_token().token {
            Token::Word(kw)
                if [
                    Keyword::YEAR,
@ -1371,7 +1399,7 @@ impl<'a> Parser<'a> {

        let tok = self.next_token();

-        let regular_binary_operator = match &tok {
+        let regular_binary_operator = match &tok.token {
            Token::Spaceship => Some(BinaryOperator::Spaceship),
            Token::DoubleEq => Some(BinaryOperator::Eq),
            Token::Eq => Some(BinaryOperator::Eq),
@ -1451,7 +1479,7 @@ impl<'a> Parser<'a> {
                    right: Box::new(self.parse_subexpr(precedence)?),
                })
            }
-        } else if let Token::Word(w) = &tok {
+        } else if let Token::Word(w) = &tok.token {
            match w.keyword {
                Keyword::IS => {
                    if self.parse_keyword(Keyword::NULL) {
@ -1489,7 +1517,7 @@ impl<'a> Parser<'a> {
                    //     self.expect_keyword(Keyword::ZONE)?;
                    if self.parse_keywords(&[Keyword::TIME, Keyword::ZONE]) {
                        let time_zone = self.next_token();
-                        match time_zone {
+                        match time_zone.token {
                            Token::SingleQuotedString(time_zone) => {
                                log::trace!("Peek token: {:?}", self.peek_token());
                                Ok(Expr::AtTimeZone {
@ -1497,9 +1525,9 @@ impl<'a> Parser<'a> {
                                    time_zone,
                                })
                            }
-                            tok => self.expected(
+                            _ => self.expected(
                                "Expected Token::SingleQuotedString after AT TIME ZONE",
-                                tok,
+                                time_zone,
                            ),
                        }
                    } else {
@ -1544,7 +1572,7 @@ impl<'a> Parser<'a> {
                    }
                }
                // Can only happen if `get_next_precedence` got out of sync with this function
-                _ => parser_err!(format!("No infix parser for token {:?}", tok)),
+                _ => parser_err!(format!("No infix parser for token {:?}", tok.token)),
            }
        } else if Token::DoubleColon == tok {
            self.parse_pg_cast(expr)
@ -1571,7 +1599,7 @@ impl<'a> Parser<'a> {
            || Token::HashArrow == tok
            || Token::HashLongArrow == tok
        {
-            let operator = match tok {
+            let operator = match tok.token {
                Token::Arrow => JsonOperator::Arrow,
                Token::LongArrow => JsonOperator::LongArrow,
                Token::HashArrow => JsonOperator::HashArrow,
@ -1585,7 +1613,7 @@ impl<'a> Parser<'a> {
            })
        } else {
            // Can only happen if `get_next_precedence` got out of sync with this function
-            parser_err!(format!("No infix parser for token {:?}", tok))
+            parser_err!(format!("No infix parser for token {:?}", tok.token))
        }
    }

@ -1713,13 +1741,13 @@ impl<'a> Parser<'a> {
        let token_1 = self.peek_nth_token(1);
        let token_2 = self.peek_nth_token(2);
        debug!("0: {token_0} 1: {token_1} 2: {token_2}");
-        match token {
+        match token.token {
            Token::Word(w) if w.keyword == Keyword::OR => Ok(Self::OR_PREC),
            Token::Word(w) if w.keyword == Keyword::AND => Ok(Self::AND_PREC),
            Token::Word(w) if w.keyword == Keyword::XOR => Ok(Self::XOR_PREC),

            Token::Word(w) if w.keyword == Keyword::AT => {
-                match (self.peek_nth_token(1), self.peek_nth_token(2)) {
+                match (self.peek_nth_token(1).token, self.peek_nth_token(2).token) {
                    (Token::Word(w), Token::Word(w2))
                        if w.keyword == Keyword::TIME && w2.keyword == Keyword::ZONE =>
                    {
@ -1729,7 +1757,7 @@ impl<'a> Parser<'a> {
                }
            }

-            Token::Word(w) if w.keyword == Keyword::NOT => match self.peek_nth_token(1) {
+            Token::Word(w) if w.keyword == Keyword::NOT => match self.peek_nth_token(1).token {
                // The precedence of NOT varies depending on keyword that
                // follows it. If it is followed by IN, BETWEEN, or LIKE,
                // it takes on the precedence of those tokens. Otherwise it
@ -1780,20 +1808,26 @@ impl<'a> Parser<'a> {

    /// Return the first non-whitespace token that has not yet been processed
    /// (or None if reached end-of-file)
-    pub fn peek_token(&self) -> Token {
+    pub fn peek_token(&self) -> TokenWithLocation {
        self.peek_nth_token(0)
    }

    /// Return nth non-whitespace token that has not yet been processed
-    pub fn peek_nth_token(&self, mut n: usize) -> Token {
+    pub fn peek_nth_token(&self, mut n: usize) -> TokenWithLocation {
        let mut index = self.index;
        loop {
            index += 1;
            match self.tokens.get(index - 1) {
-                Some(Token::Whitespace(_)) => continue,
+                Some(TokenWithLocation {
+                    token: Token::Whitespace(_),
+                    location: _,
+                }) => continue,
                non_whitespace => {
                    if n == 0 {
-                        return non_whitespace.cloned().unwrap_or(Token::EOF);
+                        return non_whitespace.cloned().unwrap_or(TokenWithLocation {
+                            token: Token::EOF,
+                            location: Location { line: 0, column: 0 },
+                        });
                    }
                    n -= 1;
                }
@ -1804,18 +1838,25 @@ impl<'a> Parser<'a> {
    /// Return the first non-whitespace token that has not yet been processed
    /// (or None if reached end-of-file) and mark it as processed. OK to call
    /// repeatedly after reaching EOF.
-    pub fn next_token(&mut self) -> Token {
+    pub fn next_token(&mut self) -> TokenWithLocation {
        loop {
            self.index += 1;
            match self.tokens.get(self.index - 1) {
-                Some(Token::Whitespace(_)) => continue,
-                token => return token.cloned().unwrap_or(Token::EOF),
+                Some(TokenWithLocation {
+                    token: Token::Whitespace(_),
+                    location: _,
+                }) => continue,
+                token => {
+                    return token
+                        .cloned()
+                        .unwrap_or_else(|| TokenWithLocation::wrap(Token::EOF))
+                }
            }
        }
    }

    /// Return the first unprocessed token, possibly whitespace.
-    pub fn next_token_no_skip(&mut self) -> Option<&Token> {
+    pub fn next_token_no_skip(&mut self) -> Option<&TokenWithLocation> {
        self.index += 1;
        self.tokens.get(self.index - 1)
    }
@ -1827,7 +1868,11 @@ impl<'a> Parser<'a> {
        loop {
            assert!(self.index > 0);
            self.index -= 1;
-            if let Some(Token::Whitespace(_)) = self.tokens.get(self.index) {
+            if let Some(TokenWithLocation {
+                token: Token::Whitespace(_),
+                location: _,
+            }) = self.tokens.get(self.index)
+            {
                continue;
            }
            return;
@ -1835,14 +1880,14 @@ impl<'a> Parser<'a> {
    }

    /// Report unexpected token
-    pub fn expected<T>(&self, expected: &str, found: Token) -> Result<T, ParserError> {
+    pub fn expected<T>(&self, expected: &str, found: TokenWithLocation) -> Result<T, ParserError> {
        parser_err!(format!("Expected {}, found: {}", expected, found))
    }

    /// Look for an expected keyword and consume it if it exists
    #[must_use]
    pub fn parse_keyword(&mut self, expected: Keyword) -> bool {
-        match self.peek_token() {
+        match self.peek_token().token {
            Token::Word(w) if expected == w.keyword => {
                self.next_token();
                true
@ -1869,7 +1914,7 @@ impl<'a> Parser<'a> {
    /// Look for one of the given keywords and return the one that matches.
    #[must_use]
    pub fn parse_one_of_keywords(&mut self, keywords: &[Keyword]) -> Option<Keyword> {
-        match self.peek_token() {
+        match self.peek_token().token {
            Token::Word(w) => {
                keywords
                    .iter()
@ -1945,7 +1990,7 @@ impl<'a> Parser<'a> {
                // BigQuery allows trailing commas.
                // e.g. `SELECT 1, 2, FROM t`
                // https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#trailing_commas
-                match self.peek_token() {
+                match self.peek_token().token {
                    Token::Word(kw)
                        if keywords::RESERVED_FOR_COLUMN_ALIAS
                            .iter()
@ -2057,14 +2102,14 @@ impl<'a> Parser<'a> {
        let (mut table_flag, mut options, mut has_as, mut query) = (None, vec![], false, None);
        if self.parse_keyword(Keyword::TABLE) {
            let table_name = self.parse_object_name()?;
-            if self.peek_token() != Token::EOF {
-                if let Token::Word(word) = self.peek_token() {
+            if self.peek_token().token != Token::EOF {
+                if let Token::Word(word) = self.peek_token().token {
                    if word.keyword == Keyword::OPTIONS {
                        options = self.parse_options(Keyword::OPTIONS)?
                    }
                };

-                if self.peek_token() != Token::EOF {
+                if self.peek_token().token != Token::EOF {
                    let (a, q) = self.parse_as_query()?;
                    has_as = a;
                    query = Some(q);
@ -2091,7 +2136,7 @@ impl<'a> Parser<'a> {
            if self.parse_keyword(Keyword::TABLE) {
                let table_name = self.parse_object_name()?;
                if self.peek_token() != Token::EOF {
-                    if let Token::Word(word) = self.peek_token() {
+                    if let Token::Word(word) = self.peek_token().token {
                        if word.keyword == Keyword::OPTIONS {
                            options = self.parse_options(Keyword::OPTIONS)?
                        }
@ -2130,7 +2175,7 @@ impl<'a> Parser<'a> {

    /// Parse 'AS' before as query,such as `WITH XXX AS SELECT XXX` oer `CACHE TABLE AS SELECT XXX`
    pub fn parse_as_query(&mut self) -> Result<(bool, Query), ParserError> {
-        match self.peek_token() {
+        match self.peek_token().token {
            Token::Word(word) => match word.keyword {
                Keyword::AS => {
                    self.next_token();
@ -2148,7 +2193,7 @@ impl<'a> Parser<'a> {
        if has_table {
            let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]);
            let table_name = self.parse_object_name()?;
-            if self.peek_token() == Token::EOF {
+            if self.peek_token().token == Token::EOF {
                Ok(Statement::UNCache {
                    table_name,
                    if_exists,
@ -2248,7 +2293,7 @@ impl<'a> Parser<'a> {
            Keyword::ARCHIVE => Ok(Some(CreateFunctionUsing::Archive(uri))),
            _ => self.expected(
                "JAR, FILE or ARCHIVE, got {:?}",
-                Token::make_keyword(format!("{:?}", keyword).as_str()),
+                TokenWithLocation::wrap(Token::make_keyword(format!("{:?}", keyword).as_str())),
            ),
        }
    }
@ -2410,7 +2455,8 @@ impl<'a> Parser<'a> {
    }

    pub fn parse_file_format(&mut self) -> Result<FileFormat, ParserError> {
-        match self.next_token() {
+        let next_token = self.next_token();
+        match &next_token.token {
            Token::Word(w) => match w.keyword {
                Keyword::AVRO => Ok(FileFormat::AVRO),
                Keyword::JSONFILE => Ok(FileFormat::JSONFILE),
@ -2419,21 +2465,22 @@ impl<'a> Parser<'a> {
                Keyword::RCFILE => Ok(FileFormat::RCFILE),
                Keyword::SEQUENCEFILE => Ok(FileFormat::SEQUENCEFILE),
                Keyword::TEXTFILE => Ok(FileFormat::TEXTFILE),
-                _ => self.expected("fileformat", Token::Word(w)),
+                _ => self.expected("fileformat", next_token),
            },
-            unexpected => self.expected("fileformat", unexpected),
+            _ => self.expected("fileformat", next_token),
        }
    }

    pub fn parse_analyze_format(&mut self) -> Result<AnalyzeFormat, ParserError> {
-        match self.next_token() {
+        let next_token = self.next_token();
+        match &next_token.token {
            Token::Word(w) => match w.keyword {
                Keyword::TEXT => Ok(AnalyzeFormat::TEXT),
                Keyword::GRAPHVIZ => Ok(AnalyzeFormat::GRAPHVIZ),
                Keyword::JSON => Ok(AnalyzeFormat::JSON),
-                _ => self.expected("fileformat", Token::Word(w)),
+                _ => self.expected("fileformat", next_token),
            },
-            unexpected => self.expected("fileformat", unexpected),
+            _ => self.expected("fileformat", next_token),
        }
    }

@ -2949,10 +2996,11 @@ impl<'a> Parser<'a> {

        // Clickhouse has `ON CLUSTER 'cluster'` syntax for DDLs
        let on_cluster = if self.parse_keywords(&[Keyword::ON, Keyword::CLUSTER]) {
-            match self.next_token() {
+            let next_token = self.next_token();
+            match next_token.token {
                Token::SingleQuotedString(s) => Some(s),
                Token::Word(s) => Some(s.to_string()),
-                unexpected => self.expected("identifier or cluster literal", unexpected)?,
+                _ => self.expected("identifier or cluster literal", next_token)?,
            }
        } else {
            None
@ -2990,9 +3038,10 @@ impl<'a> Parser<'a> {

        let engine = if self.parse_keyword(Keyword::ENGINE) {
            self.expect_token(&Token::Eq)?;
-            match self.next_token() {
+            let next_token = self.next_token();
+            match next_token.token {
                Token::Word(w) => Some(w.value),
-                unexpected => self.expected("identifier", unexpected)?,
+                _ => self.expected("identifier", next_token)?,
            }
        } else {
            None
@ -3000,9 +3049,10 @@ impl<'a> Parser<'a> {

        let default_charset = if self.parse_keywords(&[Keyword::DEFAULT, Keyword::CHARSET]) {
            self.expect_token(&Token::Eq)?;
-            match self.next_token() {
+            let next_token = self.next_token();
+            match next_token.token {
                Token::Word(w) => Some(w.value),
-                unexpected => self.expected("identifier", unexpected)?,
+                _ => self.expected("identifier", next_token)?,
            }
        } else {
            None
@ -3010,9 +3060,10 @@ impl<'a> Parser<'a> {

        let collation = if self.parse_keywords(&[Keyword::COLLATE]) {
            self.expect_token(&Token::Eq)?;
-            match self.next_token() {
+            let next_token = self.next_token();
+            match next_token.token {
                Token::Word(w) => Some(w.value),
-                unexpected => self.expected("identifier", unexpected)?,
+                _ => self.expected("identifier", next_token)?,
            }
        } else {
            None
@ -3068,7 +3119,7 @@ impl<'a> Parser<'a> {
        loop {
            if let Some(constraint) = self.parse_optional_table_constraint()? {
                constraints.push(constraint);
-            } else if let Token::Word(_) = self.peek_token() {
+            } else if let Token::Word(_) = self.peek_token().token {
                columns.push(self.parse_column_def()?);
            } else {
                return self.expected("column name or constraint definition", self.peek_token());
@ -3125,9 +3176,10 @@ impl<'a> Parser<'a> {
        } else if self.parse_keywords(&[Keyword::NOT, Keyword::NULL]) {
            Ok(Some(ColumnOption::NotNull))
        } else if self.parse_keywords(&[Keyword::COMMENT]) {
-            match self.next_token() {
+            let next_token = self.next_token();
+            match next_token.token {
                Token::SingleQuotedString(value, ..) => Ok(Some(ColumnOption::Comment(value))),
-                unexpected => self.expected("string", unexpected),
+                _ => self.expected("string", next_token),
            }
        } else if self.parse_keyword(Keyword::NULL) {
            Ok(Some(ColumnOption::Null))
@ -3218,7 +3270,9 @@ impl<'a> Parser<'a> {
        } else {
            None
        };
-        match self.next_token() {
+
+        let next_token = self.next_token();
+        match next_token.token {
            Token::Word(w) if w.keyword == Keyword::PRIMARY || w.keyword == Keyword::UNIQUE => {
                let is_primary = w.keyword == Keyword::PRIMARY;
                if is_primary {
@ -3271,7 +3325,7 @@ impl<'a> Parser<'a> {
            {
                let display_as_key = w.keyword == Keyword::KEY;

-                let name = match self.peek_token() {
+                let name = match self.peek_token().token {
                    Token::Word(word) if word.keyword == Keyword::USING => None,
                    _ => self.maybe_parse(|parser| parser.parse_identifier()),
                };
@ -3297,7 +3351,10 @@ impl<'a> Parser<'a> {
                if let Some(name) = name {
                    return self.expected(
                        "FULLTEXT or SPATIAL option without constraint name",
-                        Token::make_keyword(&name.to_string()),
+                        TokenWithLocation {
+                            token: Token::make_keyword(&name.to_string()),
+                            location: next_token.location,
+                        },
                    );
                }

@ -3322,9 +3379,9 @@ impl<'a> Parser<'a> {
                    columns,
                }))
            }
-            unexpected => {
+            _ => {
                if name.is_some() {
-                    self.expected("PRIMARY, UNIQUE, FOREIGN, or CHECK", unexpected)
+                    self.expected("PRIMARY, UNIQUE, FOREIGN, or CHECK", next_token)
                } else {
                    self.prev_token();
                    Ok(None)
@ -3712,7 +3769,7 @@ impl<'a> Parser<'a> {
    pub fn parse_tab_value(&mut self) -> Vec<Option<String>> {
        let mut values = vec![];
        let mut content = String::from("");
-        while let Some(t) = self.next_token_no_skip() {
+        while let Some(t) = self.next_token_no_skip().map(|t| &t.token) {
            match t {
                Token::Whitespace(Whitespace::Tab) => {
                    values.push(Some(content.to_string()));
@ -3726,7 +3783,7 @@ impl<'a> Parser<'a> {
                    if self.consume_token(&Token::Period) {
                        return values;
                    }
-                    if let Token::Word(w) = self.next_token() {
+                    if let Token::Word(w) = self.next_token().token {
                        if w.value == "N" {
                            values.push(None);
                        }
@ -3742,7 +3799,9 @@ impl<'a> Parser<'a> {

    /// Parse a literal value (numbers, strings, date/time, booleans)
    pub fn parse_value(&mut self) -> Result<Value, ParserError> {
-        match self.next_token() {
+        let next_token = self.next_token();
+        let location = next_token.location;
+        match next_token.token {
            Token::Word(w) => match w.keyword {
                Keyword::TRUE => Ok(Value::Boolean(true)),
                Keyword::FALSE => Ok(Value::Boolean(false)),
@ -3750,13 +3809,25 @@ impl<'a> Parser<'a> {
                Keyword::NoKeyword if w.quote_style.is_some() => match w.quote_style {
                    Some('"') => Ok(Value::DoubleQuotedString(w.value)),
                    Some('\'') => Ok(Value::SingleQuotedString(w.value)),
-                    _ => self.expected("A value?", Token::Word(w))?,
+                    _ => self.expected(
+                        "A value?",
+                        TokenWithLocation {
+                            token: Token::Word(w),
+                            location,
+                        },
+                    )?,
                },
                // Case when Snowflake Semi-structured data like key:value
                Keyword::NoKeyword | Keyword::LOCATION if dialect_of!(self is SnowflakeDialect | GenericDialect) => {
                    Ok(Value::UnQuotedString(w.value))
                }
-                _ => self.expected("a concrete value", Token::Word(w)),
+                _ => self.expected(
+                    "a concrete value",
+                    TokenWithLocation {
+                        token: Token::Word(w),
+                        location,
+                    },
+                ),
            },
            // The call to n.parse() returns a bigdecimal when the
            // bigdecimal feature is enabled, and is otherwise a no-op
@ -3776,7 +3847,13 @@ impl<'a> Parser<'a> {
                let placeholder = tok.to_string() + &ident.value;
                Ok(Value::Placeholder(placeholder))
            }
-            unexpected => self.expected("a value", unexpected),
+            unexpected => self.expected(
+                "a value",
+                TokenWithLocation {
+                    token: unexpected,
+                    location,
+                },
+            ),
        }
    }

@ -3793,30 +3870,33 @@ impl<'a> Parser<'a> {

    /// Parse an unsigned literal integer/long
    pub fn parse_literal_uint(&mut self) -> Result<u64, ParserError> {
-        match self.next_token() {
+        let next_token = self.next_token();
+        match next_token.token {
            Token::Number(s, _) => s.parse::<u64>().map_err(|e| {
                ParserError::ParserError(format!("Could not parse '{}' as u64: {}", s, e))
            }),
-            unexpected => self.expected("literal int", unexpected),
+            _ => self.expected("literal int", next_token),
        }
    }

    /// Parse a literal string
    pub fn parse_literal_string(&mut self) -> Result<String, ParserError> {
-        match self.next_token() {
+        let next_token = self.next_token();
+        match next_token.token {
            Token::Word(Word { value, keyword, .. }) if keyword == Keyword::NoKeyword => Ok(value),
            Token::SingleQuotedString(s) => Ok(s),
            Token::DoubleQuotedString(s) => Ok(s),
            Token::EscapedStringLiteral(s) if dialect_of!(self is PostgreSqlDialect | GenericDialect) => {
                Ok(s)
            }
-            unexpected => self.expected("literal string", unexpected),
+            _ => self.expected("literal string", next_token),
        }
    }

    /// Parse a map key string
    pub fn parse_map_key(&mut self) -> Result<Expr, ParserError> {
-        match self.next_token() {
+        let next_token = self.next_token();
+        match next_token.token {
            Token::Word(Word { value, keyword, .. }) if keyword == Keyword::NoKeyword => {
                if self.peek_token() == Token::LParen {
                    return self.parse_function(ObjectName(vec![Ident::new(value)]));
@ -3828,13 +3908,14 @@ impl<'a> Parser<'a> {
            Token::Number(s, _) => Ok(Expr::Value(Value::Number(s, false))),
            #[cfg(feature = "bigdecimal")]
            Token::Number(s, _) => Ok(Expr::Value(Value::Number(s.parse().unwrap(), false))),
-            unexpected => self.expected("literal string, number or function", unexpected),
+            _ => self.expected("literal string, number or function", next_token),
        }
    }

    /// Parse a SQL datatype (in the context of a CREATE TABLE statement for example)
    pub fn parse_data_type(&mut self) -> Result<DataType, ParserError> {
-        let mut data = match self.next_token() {
+        let next_token = self.next_token();
+        let mut data = match next_token.token {
            Token::Word(w) => match w.keyword {
                Keyword::BOOLEAN => Ok(DataType::Boolean),
                Keyword::FLOAT => Ok(DataType::Float(self.parse_optional_precision()?)),
@ -4003,7 +4084,7 @@ impl<'a> Parser<'a> {
                    }
                }
            },
-            unexpected => self.expected("a data type name", unexpected),
+            _ => self.expected("a data type name", next_token),
        }?;

        // Parse array data types. Note: this is postgresql-specific and different from
@ -4019,14 +4100,16 @@ impl<'a> Parser<'a> {
        self.expect_token(&Token::LParen)?;
        let mut values = Vec::new();
        loop {
-            match self.next_token() {
+            let next_token = self.next_token();
+            match next_token.token {
                Token::SingleQuotedString(value) => values.push(value),
-                unexpected => self.expected("a string", unexpected)?,
+                _ => self.expected("a string", next_token)?,
            }
-            match self.next_token() {
+            let next_token = self.next_token();
+            match next_token.token {
                Token::Comma => (),
                Token::RParen => break,
-                unexpected => self.expected(", or }", unexpected)?,
+                _ => self.expected(", or }", next_token)?,
            }
        }
        Ok(values)
@ -4040,7 +4123,8 @@ impl<'a> Parser<'a> {
        reserved_kwds: &[Keyword],
    ) -> Result<Option<Ident>, ParserError> {
        let after_as = self.parse_keyword(Keyword::AS);
-        match self.next_token() {
+        let next_token = self.next_token();
+        match next_token.token {
            // Accept any identifier after `AS` (though many dialects have restrictions on
            // keywords that may appear here). If there's no `AS`: don't parse keywords,
            // which may start a construct allowed in this position, to be parsed as aliases.
@ -4064,9 +4148,9 @@ impl<'a> Parser<'a> {
            Token::SingleQuotedString(s) => Ok(Some(Ident::with_quote('\'', s))),
            // Support for MySql dialect double qouted string, `AS "HOUR"` for example
            Token::DoubleQuotedString(s) => Ok(Some(Ident::with_quote('\"', s))),
-            not_an_ident => {
+            _ => {
                if after_as {
-                    return self.expected("an identifier after AS", not_an_ident);
+                    return self.expected("an identifier after AS", next_token);
                }
                self.prev_token();
                Ok(None) // no alias found
@ -4108,7 +4192,7 @@ impl<'a> Parser<'a> {
    pub fn parse_identifiers(&mut self) -> Result<Vec<Ident>, ParserError> {
        let mut idents = vec![];
        loop {
-            match self.peek_token() {
+            match self.peek_token().token {
                Token::Word(w) => {
                    idents.push(w.to_ident());
                }
@ -4122,11 +4206,12 @@ impl<'a> Parser<'a> {

    /// Parse a simple one-word identifier (possibly quoted, possibly a keyword)
    pub fn parse_identifier(&mut self) -> Result<Ident, ParserError> {
-        match self.next_token() {
+        let next_token = self.next_token();
+        match next_token.token {
            Token::Word(w) => Ok(w.to_ident()),
            Token::SingleQuotedString(s) => Ok(Ident::with_quote('\'', s)),
            Token::DoubleQuotedString(s) => Ok(Ident::with_quote('\"', s)),
-            unexpected => self.expected("identifier", unexpected),
+            _ => self.expected("identifier", next_token),
        }
    }

@ -4231,7 +4316,8 @@ impl<'a> Parser<'a> {
        if self.consume_token(&Token::LParen) {
            let mut modifiers = Vec::new();
            loop {
-                match self.next_token() {
+                let next_token = self.next_token();
+                match next_token.token {
                    Token::Word(w) => modifiers.push(w.to_string()),
                    Token::Number(n, _) => modifiers.push(n),
                    Token::SingleQuotedString(s) => modifiers.push(s),
@ -4242,7 +4328,7 @@ impl<'a> Parser<'a> {
                    Token::RParen => {
                        break;
                    }
-                    unexpected => self.expected("type modifiers", unexpected)?,
+                    _ => self.expected("type modifiers", next_token)?,
                }
            }

@ -4485,7 +4571,7 @@ impl<'a> Parser<'a> {

        loop {
            // The query can be optionally followed by a set operator:
-            let op = self.parse_set_operator(&self.peek_token());
+            let op = self.parse_set_operator(&self.peek_token().token);
            let next_precedence = match op {
                // UNION and EXCEPT have the same binding power and evaluate left-to-right
                Some(SetOperator::Union) | Some(SetOperator::Except) => 10,
@ -4683,7 +4769,7 @@ impl<'a> Parser<'a> {
        let table_name;
        let schema_name;
        if token2 == Token::Period {
-            match token1 {
+            match token1.token {
                Token::Word(w) => {
                    schema_name = w.value;
                }
@ -4691,7 +4777,7 @@ impl<'a> Parser<'a> {
                    return self.expected("Schema name", token1);
                }
            }
-            match token3 {
+            match token3.token {
                Token::Word(w) => {
                    table_name = w.value;
                }
@ -4704,7 +4790,7 @@ impl<'a> Parser<'a> {
                schema_name: Some(schema_name),
            })
        } else {
-            match token1 {
+            match token1.token {
                Token::Word(w) => {
                    table_name = w.value;
                }
@ -4986,7 +5072,7 @@ impl<'a> Parser<'a> {
                }
            } else {
                let natural = self.parse_keyword(Keyword::NATURAL);
-                let peek_keyword = if let Token::Word(w) = self.peek_token() {
+                let peek_keyword = if let Token::Word(w) = self.peek_token().token {
                    w.keyword
                } else {
                    Keyword::NoKeyword
@ -6704,4 +6790,31 @@ mod tests {
            r#"UPDATE test SET name = $1, value = $2, where = $3, create = $4, is_default = $5, classification = $6, sort = $7 WHERE id = $8"#
        );
    }
+
+    #[test]
+    fn test_tokenizer_error_loc() {
+        let sql = "foo '";
+        let ast = Parser::parse_sql(&GenericDialect, sql);
+        assert_eq!(
+            ast,
+            Err(ParserError::TokenizerError(
+                "Unterminated string literal at Line: 1, Column 5".to_string()
+            ))
+        );
+    }
+
+    #[test]
+    fn test_parser_error_loc() {
+        // TODO: Once we thread token locations through the parser, we should update this
+        // test to assert the locations of the referenced token
+        let sql = "SELECT this is a syntax error";
+        let ast = Parser::parse_sql(&GenericDialect, sql);
+        assert_eq!(
+            ast,
+            Err(ParserError::ParserError(
+                "Expected [NOT] NULL or TRUE|FALSE or [NOT] DISTINCT FROM after IS, found: a"
+                    .to_string()
+            ))
+        );
+    }
 }
--- a/src/tokenizer.rs
+++ b/src/tokenizer.rs
@ -300,6 +300,53 @@ impl fmt::Display for Whitespace {
    }
 }

+/// Location in input string
+#[derive(Debug, Eq, PartialEq, Clone)]
+pub struct Location {
+    /// Line number, starting from 1
+    pub line: u64,
+    /// Line column, starting from 1
+    pub column: u64,
+}
+
+/// A [Token] with [Location] attached to it
+#[derive(Debug, Eq, PartialEq, Clone)]
+pub struct TokenWithLocation {
+    pub token: Token,
+    pub location: Location,
+}
+
+impl TokenWithLocation {
+    pub fn new(token: Token, line: u64, column: u64) -> TokenWithLocation {
+        TokenWithLocation {
+            token,
+            location: Location { line, column },
+        }
+    }
+
+    pub fn wrap(token: Token) -> TokenWithLocation {
+        TokenWithLocation::new(token, 0, 0)
+    }
+}
+
+impl PartialEq<Token> for TokenWithLocation {
+    fn eq(&self, other: &Token) -> bool {
+        &self.token == other
+    }
+}
+
+impl PartialEq<TokenWithLocation> for Token {
+    fn eq(&self, other: &TokenWithLocation) -> bool {
+        self == &other.token
+    }
+}
+
+impl fmt::Display for TokenWithLocation {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        self.token.fmt(f)
+    }
+}
+
 /// Tokenizer error
 #[derive(Debug, PartialEq, Eq)]
 pub struct TokenizerError {
@ -321,58 +368,88 @@ impl fmt::Display for TokenizerError {
 #[cfg(feature = "std")]
 impl std::error::Error for TokenizerError {}

+struct State<'a> {
+    peekable: Peekable<Chars<'a>>,
+    pub line: u64,
+    pub col: u64,
+}
+
+impl<'a> State<'a> {
+    pub fn next(&mut self) -> Option<char> {
+        match self.peekable.next() {
+            None => None,
+            Some(s) => {
+                if s == '\n' {
+                    self.line += 1;
+                    self.col = 1;
+                } else {
+                    self.col += 1;
+                }
+                Some(s)
+            }
+        }
+    }
+
+    pub fn peek(&mut self) -> Option<&char> {
+        self.peekable.peek()
+    }
+
+    pub fn location(&self) -> Location {
+        Location {
+            line: self.line,
+            column: self.col,
+        }
+    }
+}
+
 /// SQL Tokenizer
 pub struct Tokenizer<'a> {
    dialect: &'a dyn Dialect,
    query: &'a str,
-    line: u64,
-    col: u64,
 }

 impl<'a> Tokenizer<'a> {
    /// Create a new SQL tokenizer for the specified SQL statement
    pub fn new(dialect: &'a dyn Dialect, query: &'a str) -> Self {
-        Self {
-            dialect,
-            query,
-            line: 1,
-            col: 1,
-        }
+        Self { dialect, query }
    }

    /// Tokenize the statement and produce a vector of tokens
    pub fn tokenize(&mut self) -> Result<Vec<Token>, TokenizerError> {
-        let mut peekable = self.query.chars().peekable();
+        let twl = self.tokenize_with_location()?;

        let mut tokens: Vec<Token> = vec![];
+        tokens.reserve(twl.len());
+        for token_with_location in twl {
+            tokens.push(token_with_location.token);
+        }
+        Ok(tokens)
+    }

-        while let Some(token) = self.next_token(&mut peekable)? {
-            match &token {
-                Token::Whitespace(Whitespace::Newline) => {
-                    self.line += 1;
-                    self.col = 1;
-                }
+    /// Tokenize the statement and produce a vector of tokens with location information
+    pub fn tokenize_with_location(&mut self) -> Result<Vec<TokenWithLocation>, TokenizerError> {
+        let mut state = State {
+            peekable: self.query.chars().peekable(),
+            line: 1,
+            col: 1,
+        };

-                Token::Whitespace(Whitespace::Tab) => self.col += 4,
-                Token::Word(w) => {
-                    self.col += w.value.chars().count() as u64;
-                    if w.quote_style.is_some() {
-                        self.col += 2
-                    }
-                }
-                Token::Number(s, _) => self.col += s.chars().count() as u64,
-                Token::SingleQuotedString(s) => self.col += s.chars().count() as u64,
-                Token::Placeholder(s) => self.col += s.chars().count() as u64,
-                _ => self.col += 1,
-            }
+        let mut tokens: Vec<TokenWithLocation> = vec![];

-            tokens.push(token);
+        let mut location = state.location();
+        while let Some(token) = self.next_token(&mut state)? {
+            tokens.push(TokenWithLocation {
+                token,
+                location: location.clone(),
+            });
+
+            location = state.location();
        }
        Ok(tokens)
    }

    /// Get the next token or return None
-    fn next_token(&self, chars: &mut Peekable<Chars<'_>>) -> Result<Option<Token>, TokenizerError> {
+    fn next_token(&self, chars: &mut State) -> Result<Option<Token>, TokenizerError> {
        //println!("next_token: {:?}", chars.peek());
        match chars.peek() {
            Some(&ch) => match ch {
@ -405,10 +482,12 @@ impl<'a> Tokenizer<'a> {
                }
                // PostgreSQL accepts "escape" string constants, which are an extension to the SQL standard.
                x @ 'e' | x @ 'E' => {
+                    let starting_loc = chars.location();
                    chars.next(); // consume, to check the next char
                    match chars.peek() {
                        Some('\'') => {
-                            let s = self.tokenize_escaped_single_quoted_string(chars)?;
+                            let s =
+                                self.tokenize_escaped_single_quoted_string(starting_loc, chars)?;
                            Ok(Some(Token::EscapedStringLiteral(s)))
                        }
                        _ => {
@ -441,7 +520,12 @@ impl<'a> Tokenizer<'a> {
                    let s = self.tokenize_word(ch, chars);

                    if s.chars().all(|x| ('0'..='9').contains(&x) || x == '.') {
-                        let mut s = peeking_take_while(&mut s.chars().peekable(), |ch| {
+                        let mut inner_state = State {
+                            peekable: s.chars().peekable(),
+                            line: 0,
+                            col: 0,
+                        };
+                        let mut s = peeking_take_while(&mut inner_state, |ch| {
                            matches!(ch, '0'..='9' | '.')
                        });
                        let s2 = peeking_take_while(chars, |ch| matches!(ch, '0'..='9' | '.'));
@ -469,8 +553,9 @@ impl<'a> Tokenizer<'a> {
                    if self.dialect.is_delimited_identifier_start(ch)
                        && self
                            .dialect
-                            .is_proper_identifier_inside_quotes(chars.clone()) =>
+                            .is_proper_identifier_inside_quotes(chars.peekable.clone()) =>
                {
+                    let error_loc = chars.location();
                    chars.next(); // consume the opening quote
                    let quote_end = Word::matching_end_quote(quote_start);
                    let (s, last_char) = parse_quoted_ident(chars, quote_end);
@ -478,10 +563,10 @@ impl<'a> Tokenizer<'a> {
                    if last_char == Some(quote_end) {
                        Ok(Some(Token::make_word(&s, Some(quote_start))))
                    } else {
-                        self.tokenizer_error(format!(
-                            "Expected close delimiter '{}' before EOF.",
-                            quote_end
-                        ))
+                        self.tokenizer_error(
+                            error_loc,
+                            format!("Expected close delimiter '{}' before EOF.", quote_end),
+                        )
                    }
                }
                // numbers and period
@ -698,16 +783,20 @@ impl<'a> Tokenizer<'a> {
        }
    }

-    fn tokenizer_error<R>(&self, message: impl Into<String>) -> Result<R, TokenizerError> {
+    fn tokenizer_error<R>(
+        &self,
+        loc: Location,
+        message: impl Into<String>,
+    ) -> Result<R, TokenizerError> {
        Err(TokenizerError {
            message: message.into(),
-            col: self.col,
-            line: self.line,
+            col: loc.column,
+            line: loc.line,
        })
    }

    // Consume characters until newline
-    fn tokenize_single_line_comment(&self, chars: &mut Peekable<Chars<'_>>) -> String {
+    fn tokenize_single_line_comment(&self, chars: &mut State) -> String {
        let mut comment = peeking_take_while(chars, |ch| ch != '\n');
        if let Some(ch) = chars.next() {
            assert_eq!(ch, '\n');
@ -717,7 +806,7 @@ impl<'a> Tokenizer<'a> {
    }

    /// Tokenize an identifier or keyword, after the first char is already consumed.
-    fn tokenize_word(&self, first_char: char, chars: &mut Peekable<Chars<'_>>) -> String {
+    fn tokenize_word(&self, first_char: char, chars: &mut State) -> String {
        let mut s = first_char.to_string();
        s.push_str(&peeking_take_while(chars, |ch| {
            self.dialect.is_identifier_part(ch)
@ -728,9 +817,13 @@ impl<'a> Tokenizer<'a> {
    /// Read a single quoted string, starting with the opening quote.
    fn tokenize_escaped_single_quoted_string(
        &self,
-        chars: &mut Peekable<Chars<'_>>,
+        starting_loc: Location,
+        chars: &mut State,
    ) -> Result<String, TokenizerError> {
        let mut s = String::new();
+
+        // This case is a bit tricky
+
        chars.next(); // consume the opening quote

        // slash escaping
@ -782,16 +875,18 @@ impl<'a> Tokenizer<'a> {
                }
            }
        }
-        self.tokenizer_error("Unterminated encoded string literal")
+        self.tokenizer_error(starting_loc, "Unterminated encoded string literal")
    }

    /// Read a single quoted string, starting with the opening quote.
    fn tokenize_quoted_string(
        &self,
-        chars: &mut Peekable<Chars<'_>>,
+        chars: &mut State,
        quote_style: char,
    ) -> Result<String, TokenizerError> {
        let mut s = String::new();
+        let error_loc = chars.location();
+
        chars.next(); // consume the opening quote

        // slash escaping is specific to MySQL dialect
@ -824,12 +919,12 @@ impl<'a> Tokenizer<'a> {
                }
            }
        }
-        self.tokenizer_error("Unterminated string literal")
+        self.tokenizer_error(error_loc, "Unterminated string literal")
    }

    fn tokenize_multiline_comment(
        &self,
-        chars: &mut Peekable<Chars<'_>>,
+        chars: &mut State,
    ) -> Result<Option<Token>, TokenizerError> {
        let mut s = String::new();
        let mut nested = 1;
@ -850,7 +945,12 @@ impl<'a> Tokenizer<'a> {
                    s.push(ch);
                    last_ch = ch;
                }
-                None => break self.tokenizer_error("Unexpected EOF while in a multi-line comment"),
+                None => {
+                    break self.tokenizer_error(
+                        chars.location(),
+                        "Unexpected EOF while in a multi-line comment",
+                    )
+                }
            }
        }
    }
@ -858,7 +958,7 @@ impl<'a> Tokenizer<'a> {
    #[allow(clippy::unnecessary_wraps)]
    fn consume_and_return(
        &self,
-        chars: &mut Peekable<Chars<'_>>,
+        chars: &mut State,
        t: Token,
    ) -> Result<Option<Token>, TokenizerError> {
        chars.next();
@ -869,10 +969,7 @@ impl<'a> Tokenizer<'a> {
 /// Read from `chars` until `predicate` returns `false` or EOF is hit.
 /// Return the characters read as String, and keep the first non-matching
 /// char available as `chars.next()`.
-fn peeking_take_while(
-    chars: &mut Peekable<Chars<'_>>,
-    mut predicate: impl FnMut(char) -> bool,
-) -> String {
+fn peeking_take_while(chars: &mut State, mut predicate: impl FnMut(char) -> bool) -> String {
    let mut s = String::new();
    while let Some(&ch) = chars.peek() {
        if predicate(ch) {
@ -885,7 +982,7 @@ fn peeking_take_while(
    s
 }

-fn parse_quoted_ident(chars: &mut Peekable<Chars<'_>>, quote_end: char) -> (String, Option<char>) {
+fn parse_quoted_ident(chars: &mut State, quote_end: char) -> (String, Option<char>) {
    let mut last_char = None;
    let mut s = String::new();
    while let Some(ch) = chars.next() {
@ -1518,7 +1615,25 @@ mod tests {
        compare(expected, tokens);
    }

-    fn compare(expected: Vec<Token>, actual: Vec<Token>) {
+    #[test]
+    fn tokenize_with_location() {
+        let sql = "SELECT a,\n b";
+        let dialect = GenericDialect {};
+        let mut tokenizer = Tokenizer::new(&dialect, sql);
+        let tokens = tokenizer.tokenize_with_location().unwrap();
+        let expected = vec![
+            TokenWithLocation::new(Token::make_keyword("SELECT"), 1, 1),
+            TokenWithLocation::new(Token::Whitespace(Whitespace::Space), 1, 7),
+            TokenWithLocation::new(Token::make_word("a", None), 1, 8),
+            TokenWithLocation::new(Token::Comma, 1, 9),
+            TokenWithLocation::new(Token::Whitespace(Whitespace::Newline), 1, 10),
+            TokenWithLocation::new(Token::Whitespace(Whitespace::Space), 2, 1),
+            TokenWithLocation::new(Token::make_word("b", None), 2, 2),
+        ];
+        compare(expected, tokens);
+    }
+
+    fn compare<T: PartialEq + std::fmt::Debug>(expected: Vec<T>, actual: Vec<T>) {
        //println!("------------------------------");
        //println!("tokens   = {:?}", actual);
        //println!("expected = {:?}", expected);