mirror of
https://github.com/apache/datafusion-sqlparser-rs.git
synced 2025-07-07 17:04:59 +00:00
replace with code from datafusion
This commit is contained in:
parent
a86bd30515
commit
0c23392adb
14 changed files with 1762 additions and 595 deletions
18
Cargo.toml
18
Cargo.toml
|
@ -1,6 +1,22 @@
|
||||||
[package]
|
[package]
|
||||||
name = "datafusion-sql"
|
name = "sqlparser"
|
||||||
|
description = "ANSI SQL parser"
|
||||||
version = "0.1.0"
|
version = "0.1.0"
|
||||||
authors = ["Andy Grove <andygrove73@gmail.com>"]
|
authors = ["Andy Grove <andygrove73@gmail.com>"]
|
||||||
|
homepage = "https://github.com/andygrove/sqlparser-rs"
|
||||||
|
documentation = "https://github.com/andygrove/sqlparser-rs"
|
||||||
|
keywords = [ "sql", "lexer", "parser" ]
|
||||||
|
repository = "https://github.com/andygrove/sqlparser-rs"
|
||||||
|
license = "Apache-2.0"
|
||||||
|
include = [
|
||||||
|
"src/**/*.rs",
|
||||||
|
"Cargo.toml",
|
||||||
|
]
|
||||||
|
|
||||||
|
[lib]
|
||||||
|
name = "sqlparser"
|
||||||
|
path = "src/lib.rs"
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
|
fnv = "1.0.3"
|
||||||
|
lazy_static = "1.0"
|
201
LICENSE
201
LICENSE
|
@ -1,201 +0,0 @@
|
||||||
Apache License
|
|
||||||
Version 2.0, January 2004
|
|
||||||
http://www.apache.org/licenses/
|
|
||||||
|
|
||||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
|
||||||
|
|
||||||
1. Definitions.
|
|
||||||
|
|
||||||
"License" shall mean the terms and conditions for use, reproduction,
|
|
||||||
and distribution as defined by Sections 1 through 9 of this document.
|
|
||||||
|
|
||||||
"Licensor" shall mean the copyright owner or entity authorized by
|
|
||||||
the copyright owner that is granting the License.
|
|
||||||
|
|
||||||
"Legal Entity" shall mean the union of the acting entity and all
|
|
||||||
other entities that control, are controlled by, or are under common
|
|
||||||
control with that entity. For the purposes of this definition,
|
|
||||||
"control" means (i) the power, direct or indirect, to cause the
|
|
||||||
direction or management of such entity, whether by contract or
|
|
||||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
|
||||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
|
||||||
|
|
||||||
"You" (or "Your") shall mean an individual or Legal Entity
|
|
||||||
exercising permissions granted by this License.
|
|
||||||
|
|
||||||
"Source" form shall mean the preferred form for making modifications,
|
|
||||||
including but not limited to software source code, documentation
|
|
||||||
source, and configuration files.
|
|
||||||
|
|
||||||
"Object" form shall mean any form resulting from mechanical
|
|
||||||
transformation or translation of a Source form, including but
|
|
||||||
not limited to compiled object code, generated documentation,
|
|
||||||
and conversions to other media types.
|
|
||||||
|
|
||||||
"Work" shall mean the work of authorship, whether in Source or
|
|
||||||
Object form, made available under the License, as indicated by a
|
|
||||||
copyright notice that is included in or attached to the work
|
|
||||||
(an example is provided in the Appendix below).
|
|
||||||
|
|
||||||
"Derivative Works" shall mean any work, whether in Source or Object
|
|
||||||
form, that is based on (or derived from) the Work and for which the
|
|
||||||
editorial revisions, annotations, elaborations, or other modifications
|
|
||||||
represent, as a whole, an original work of authorship. For the purposes
|
|
||||||
of this License, Derivative Works shall not include works that remain
|
|
||||||
separable from, or merely link (or bind by name) to the interfaces of,
|
|
||||||
the Work and Derivative Works thereof.
|
|
||||||
|
|
||||||
"Contribution" shall mean any work of authorship, including
|
|
||||||
the original version of the Work and any modifications or additions
|
|
||||||
to that Work or Derivative Works thereof, that is intentionally
|
|
||||||
submitted to Licensor for inclusion in the Work by the copyright owner
|
|
||||||
or by an individual or Legal Entity authorized to submit on behalf of
|
|
||||||
the copyright owner. For the purposes of this definition, "submitted"
|
|
||||||
means any form of electronic, verbal, or written communication sent
|
|
||||||
to the Licensor or its representatives, including but not limited to
|
|
||||||
communication on electronic mailing lists, source code control systems,
|
|
||||||
and issue tracking systems that are managed by, or on behalf of, the
|
|
||||||
Licensor for the purpose of discussing and improving the Work, but
|
|
||||||
excluding communication that is conspicuously marked or otherwise
|
|
||||||
designated in writing by the copyright owner as "Not a Contribution."
|
|
||||||
|
|
||||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
|
||||||
on behalf of whom a Contribution has been received by Licensor and
|
|
||||||
subsequently incorporated within the Work.
|
|
||||||
|
|
||||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
|
||||||
this License, each Contributor hereby grants to You a perpetual,
|
|
||||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
|
||||||
copyright license to reproduce, prepare Derivative Works of,
|
|
||||||
publicly display, publicly perform, sublicense, and distribute the
|
|
||||||
Work and such Derivative Works in Source or Object form.
|
|
||||||
|
|
||||||
3. Grant of Patent License. Subject to the terms and conditions of
|
|
||||||
this License, each Contributor hereby grants to You a perpetual,
|
|
||||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
|
||||||
(except as stated in this section) patent license to make, have made,
|
|
||||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
|
||||||
where such license applies only to those patent claims licensable
|
|
||||||
by such Contributor that are necessarily infringed by their
|
|
||||||
Contribution(s) alone or by combination of their Contribution(s)
|
|
||||||
with the Work to which such Contribution(s) was submitted. If You
|
|
||||||
institute patent litigation against any entity (including a
|
|
||||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
|
||||||
or a Contribution incorporated within the Work constitutes direct
|
|
||||||
or contributory patent infringement, then any patent licenses
|
|
||||||
granted to You under this License for that Work shall terminate
|
|
||||||
as of the date such litigation is filed.
|
|
||||||
|
|
||||||
4. Redistribution. You may reproduce and distribute copies of the
|
|
||||||
Work or Derivative Works thereof in any medium, with or without
|
|
||||||
modifications, and in Source or Object form, provided that You
|
|
||||||
meet the following conditions:
|
|
||||||
|
|
||||||
(a) You must give any other recipients of the Work or
|
|
||||||
Derivative Works a copy of this License; and
|
|
||||||
|
|
||||||
(b) You must cause any modified files to carry prominent notices
|
|
||||||
stating that You changed the files; and
|
|
||||||
|
|
||||||
(c) You must retain, in the Source form of any Derivative Works
|
|
||||||
that You distribute, all copyright, patent, trademark, and
|
|
||||||
attribution notices from the Source form of the Work,
|
|
||||||
excluding those notices that do not pertain to any part of
|
|
||||||
the Derivative Works; and
|
|
||||||
|
|
||||||
(d) If the Work includes a "NOTICE" text file as part of its
|
|
||||||
distribution, then any Derivative Works that You distribute must
|
|
||||||
include a readable copy of the attribution notices contained
|
|
||||||
within such NOTICE file, excluding those notices that do not
|
|
||||||
pertain to any part of the Derivative Works, in at least one
|
|
||||||
of the following places: within a NOTICE text file distributed
|
|
||||||
as part of the Derivative Works; within the Source form or
|
|
||||||
documentation, if provided along with the Derivative Works; or,
|
|
||||||
within a display generated by the Derivative Works, if and
|
|
||||||
wherever such third-party notices normally appear. The contents
|
|
||||||
of the NOTICE file are for informational purposes only and
|
|
||||||
do not modify the License. You may add Your own attribution
|
|
||||||
notices within Derivative Works that You distribute, alongside
|
|
||||||
or as an addendum to the NOTICE text from the Work, provided
|
|
||||||
that such additional attribution notices cannot be construed
|
|
||||||
as modifying the License.
|
|
||||||
|
|
||||||
You may add Your own copyright statement to Your modifications and
|
|
||||||
may provide additional or different license terms and conditions
|
|
||||||
for use, reproduction, or distribution of Your modifications, or
|
|
||||||
for any such Derivative Works as a whole, provided Your use,
|
|
||||||
reproduction, and distribution of the Work otherwise complies with
|
|
||||||
the conditions stated in this License.
|
|
||||||
|
|
||||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
|
||||||
any Contribution intentionally submitted for inclusion in the Work
|
|
||||||
by You to the Licensor shall be under the terms and conditions of
|
|
||||||
this License, without any additional terms or conditions.
|
|
||||||
Notwithstanding the above, nothing herein shall supersede or modify
|
|
||||||
the terms of any separate license agreement you may have executed
|
|
||||||
with Licensor regarding such Contributions.
|
|
||||||
|
|
||||||
6. Trademarks. This License does not grant permission to use the trade
|
|
||||||
names, trademarks, service marks, or product names of the Licensor,
|
|
||||||
except as required for reasonable and customary use in describing the
|
|
||||||
origin of the Work and reproducing the content of the NOTICE file.
|
|
||||||
|
|
||||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
|
||||||
agreed to in writing, Licensor provides the Work (and each
|
|
||||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
|
||||||
implied, including, without limitation, any warranties or conditions
|
|
||||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
|
||||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
|
||||||
appropriateness of using or redistributing the Work and assume any
|
|
||||||
risks associated with Your exercise of permissions under this License.
|
|
||||||
|
|
||||||
8. Limitation of Liability. In no event and under no legal theory,
|
|
||||||
whether in tort (including negligence), contract, or otherwise,
|
|
||||||
unless required by applicable law (such as deliberate and grossly
|
|
||||||
negligent acts) or agreed to in writing, shall any Contributor be
|
|
||||||
liable to You for damages, including any direct, indirect, special,
|
|
||||||
incidental, or consequential damages of any character arising as a
|
|
||||||
result of this License or out of the use or inability to use the
|
|
||||||
Work (including but not limited to damages for loss of goodwill,
|
|
||||||
work stoppage, computer failure or malfunction, or any and all
|
|
||||||
other commercial damages or losses), even if such Contributor
|
|
||||||
has been advised of the possibility of such damages.
|
|
||||||
|
|
||||||
9. Accepting Warranty or Additional Liability. While redistributing
|
|
||||||
the Work or Derivative Works thereof, You may choose to offer,
|
|
||||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
|
||||||
or other liability obligations and/or rights consistent with this
|
|
||||||
License. However, in accepting such obligations, You may act only
|
|
||||||
on Your own behalf and on Your sole responsibility, not on behalf
|
|
||||||
of any other Contributor, and only if You agree to indemnify,
|
|
||||||
defend, and hold each Contributor harmless for any liability
|
|
||||||
incurred by, or claims asserted against, such Contributor by reason
|
|
||||||
of your accepting any such warranty or additional liability.
|
|
||||||
|
|
||||||
END OF TERMS AND CONDITIONS
|
|
||||||
|
|
||||||
APPENDIX: How to apply the Apache License to your work.
|
|
||||||
|
|
||||||
To apply the Apache License to your work, attach the following
|
|
||||||
boilerplate notice, with the fields enclosed by brackets "[]"
|
|
||||||
replaced with your own identifying information. (Don't include
|
|
||||||
the brackets!) The text should be enclosed in the appropriate
|
|
||||||
comment syntax for the file format. We also recommend that a
|
|
||||||
file or class name and description of purpose be included on the
|
|
||||||
same "printed page" as the copyright notice for easier
|
|
||||||
identification within third-party archives.
|
|
||||||
|
|
||||||
Copyright [yyyy] [name of copyright owner]
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
you may not use this file except in compliance with the License.
|
|
||||||
You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License.
|
|
201
LICENSE.TXT
Normal file
201
LICENSE.TXT
Normal file
|
@ -0,0 +1,201 @@
|
||||||
|
Apache License
|
||||||
|
Version 2.0, January 2004
|
||||||
|
http://www.apache.org/licenses/
|
||||||
|
|
||||||
|
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||||
|
|
||||||
|
1. Definitions.
|
||||||
|
|
||||||
|
"License" shall mean the terms and conditions for use, reproduction,
|
||||||
|
and distribution as defined by Sections 1 through 9 of this document.
|
||||||
|
|
||||||
|
"Licensor" shall mean the copyright owner or entity authorized by
|
||||||
|
the copyright owner that is granting the License.
|
||||||
|
|
||||||
|
"Legal Entity" shall mean the union of the acting entity and all
|
||||||
|
other entities that control, are controlled by, or are under common
|
||||||
|
control with that entity. For the purposes of this definition,
|
||||||
|
"control" means (i) the power, direct or indirect, to cause the
|
||||||
|
direction or management of such entity, whether by contract or
|
||||||
|
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||||
|
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||||
|
|
||||||
|
"You" (or "Your") shall mean an individual or Legal Entity
|
||||||
|
exercising permissions granted by this License.
|
||||||
|
|
||||||
|
"Source" form shall mean the preferred form for making modifications,
|
||||||
|
including but not limited to software source code, documentation
|
||||||
|
source, and configuration files.
|
||||||
|
|
||||||
|
"Object" form shall mean any form resulting from mechanical
|
||||||
|
transformation or translation of a Source form, including but
|
||||||
|
not limited to compiled object code, generated documentation,
|
||||||
|
and conversions to other media types.
|
||||||
|
|
||||||
|
"Work" shall mean the work of authorship, whether in Source or
|
||||||
|
Object form, made available under the License, as indicated by a
|
||||||
|
copyright notice that is included in or attached to the work
|
||||||
|
(an example is provided in the Appendix below).
|
||||||
|
|
||||||
|
"Derivative Works" shall mean any work, whether in Source or Object
|
||||||
|
form, that is based on (or derived from) the Work and for which the
|
||||||
|
editorial revisions, annotations, elaborations, or other modifications
|
||||||
|
represent, as a whole, an original work of authorship. For the purposes
|
||||||
|
of this License, Derivative Works shall not include works that remain
|
||||||
|
separable from, or merely link (or bind by name) to the interfaces of,
|
||||||
|
the Work and Derivative Works thereof.
|
||||||
|
|
||||||
|
"Contribution" shall mean any work of authorship, including
|
||||||
|
the original version of the Work and any modifications or additions
|
||||||
|
to that Work or Derivative Works thereof, that is intentionally
|
||||||
|
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||||
|
or by an individual or Legal Entity authorized to submit on behalf of
|
||||||
|
the copyright owner. For the purposes of this definition, "submitted"
|
||||||
|
means any form of electronic, verbal, or written communication sent
|
||||||
|
to the Licensor or its representatives, including but not limited to
|
||||||
|
communication on electronic mailing lists, source code control systems,
|
||||||
|
and issue tracking systems that are managed by, or on behalf of, the
|
||||||
|
Licensor for the purpose of discussing and improving the Work, but
|
||||||
|
excluding communication that is conspicuously marked or otherwise
|
||||||
|
designated in writing by the copyright owner as "Not a Contribution."
|
||||||
|
|
||||||
|
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||||
|
on behalf of whom a Contribution has been received by Licensor and
|
||||||
|
subsequently incorporated within the Work.
|
||||||
|
|
||||||
|
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||||
|
this License, each Contributor hereby grants to You a perpetual,
|
||||||
|
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||||
|
copyright license to reproduce, prepare Derivative Works of,
|
||||||
|
publicly display, publicly perform, sublicense, and distribute the
|
||||||
|
Work and such Derivative Works in Source or Object form.
|
||||||
|
|
||||||
|
3. Grant of Patent License. Subject to the terms and conditions of
|
||||||
|
this License, each Contributor hereby grants to You a perpetual,
|
||||||
|
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||||
|
(except as stated in this section) patent license to make, have made,
|
||||||
|
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||||
|
where such license applies only to those patent claims licensable
|
||||||
|
by such Contributor that are necessarily infringed by their
|
||||||
|
Contribution(s) alone or by combination of their Contribution(s)
|
||||||
|
with the Work to which such Contribution(s) was submitted. If You
|
||||||
|
institute patent litigation against any entity (including a
|
||||||
|
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||||
|
or a Contribution incorporated within the Work constitutes direct
|
||||||
|
or contributory patent infringement, then any patent licenses
|
||||||
|
granted to You under this License for that Work shall terminate
|
||||||
|
as of the date such litigation is filed.
|
||||||
|
|
||||||
|
4. Redistribution. You may reproduce and distribute copies of the
|
||||||
|
Work or Derivative Works thereof in any medium, with or without
|
||||||
|
modifications, and in Source or Object form, provided that You
|
||||||
|
meet the following conditions:
|
||||||
|
|
||||||
|
(a) You must give any other recipients of the Work or
|
||||||
|
Derivative Works a copy of this License; and
|
||||||
|
|
||||||
|
(b) You must cause any modified files to carry prominent notices
|
||||||
|
stating that You changed the files; and
|
||||||
|
|
||||||
|
(c) You must retain, in the Source form of any Derivative Works
|
||||||
|
that You distribute, all copyright, patent, trademark, and
|
||||||
|
attribution notices from the Source form of the Work,
|
||||||
|
excluding those notices that do not pertain to any part of
|
||||||
|
the Derivative Works; and
|
||||||
|
|
||||||
|
(d) If the Work includes a "NOTICE" text file as part of its
|
||||||
|
distribution, then any Derivative Works that You distribute must
|
||||||
|
include a readable copy of the attribution notices contained
|
||||||
|
within such NOTICE file, excluding those notices that do not
|
||||||
|
pertain to any part of the Derivative Works, in at least one
|
||||||
|
of the following places: within a NOTICE text file distributed
|
||||||
|
as part of the Derivative Works; within the Source form or
|
||||||
|
documentation, if provided along with the Derivative Works; or,
|
||||||
|
within a display generated by the Derivative Works, if and
|
||||||
|
wherever such third-party notices normally appear. The contents
|
||||||
|
of the NOTICE file are for informational purposes only and
|
||||||
|
do not modify the License. You may add Your own attribution
|
||||||
|
notices within Derivative Works that You distribute, alongside
|
||||||
|
or as an addendum to the NOTICE text from the Work, provided
|
||||||
|
that such additional attribution notices cannot be construed
|
||||||
|
as modifying the License.
|
||||||
|
|
||||||
|
You may add Your own copyright statement to Your modifications and
|
||||||
|
may provide additional or different license terms and conditions
|
||||||
|
for use, reproduction, or distribution of Your modifications, or
|
||||||
|
for any such Derivative Works as a whole, provided Your use,
|
||||||
|
reproduction, and distribution of the Work otherwise complies with
|
||||||
|
the conditions stated in this License.
|
||||||
|
|
||||||
|
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||||
|
any Contribution intentionally submitted for inclusion in the Work
|
||||||
|
by You to the Licensor shall be under the terms and conditions of
|
||||||
|
this License, without any additional terms or conditions.
|
||||||
|
Notwithstanding the above, nothing herein shall supersede or modify
|
||||||
|
the terms of any separate license agreement you may have executed
|
||||||
|
with Licensor regarding such Contributions.
|
||||||
|
|
||||||
|
6. Trademarks. This License does not grant permission to use the trade
|
||||||
|
names, trademarks, service marks, or product names of the Licensor,
|
||||||
|
except as required for reasonable and customary use in describing the
|
||||||
|
origin of the Work and reproducing the content of the NOTICE file.
|
||||||
|
|
||||||
|
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||||
|
agreed to in writing, Licensor provides the Work (and each
|
||||||
|
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||||
|
implied, including, without limitation, any warranties or conditions
|
||||||
|
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||||
|
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||||
|
appropriateness of using or redistributing the Work and assume any
|
||||||
|
risks associated with Your exercise of permissions under this License.
|
||||||
|
|
||||||
|
8. Limitation of Liability. In no event and under no legal theory,
|
||||||
|
whether in tort (including negligence), contract, or otherwise,
|
||||||
|
unless required by applicable law (such as deliberate and grossly
|
||||||
|
negligent acts) or agreed to in writing, shall any Contributor be
|
||||||
|
liable to You for damages, including any direct, indirect, special,
|
||||||
|
incidental, or consequential damages of any character arising as a
|
||||||
|
result of this License or out of the use or inability to use the
|
||||||
|
Work (including but not limited to damages for loss of goodwill,
|
||||||
|
work stoppage, computer failure or malfunction, or any and all
|
||||||
|
other commercial damages or losses), even if such Contributor
|
||||||
|
has been advised of the possibility of such damages.
|
||||||
|
|
||||||
|
9. Accepting Warranty or Additional Liability. While redistributing
|
||||||
|
the Work or Derivative Works thereof, You may choose to offer,
|
||||||
|
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||||
|
or other liability obligations and/or rights consistent with this
|
||||||
|
License. However, in accepting such obligations, You may act only
|
||||||
|
on Your own behalf and on Your sole responsibility, not on behalf
|
||||||
|
of any other Contributor, and only if You agree to indemnify,
|
||||||
|
defend, and hold each Contributor harmless for any liability
|
||||||
|
incurred by, or claims asserted against, such Contributor by reason
|
||||||
|
of your accepting any such warranty or additional liability.
|
||||||
|
|
||||||
|
END OF TERMS AND CONDITIONS
|
||||||
|
|
||||||
|
APPENDIX: How to apply the Apache License to your work.
|
||||||
|
|
||||||
|
To apply the Apache License to your work, attach the following
|
||||||
|
boilerplate notice, with the fields enclosed by brackets "[]"
|
||||||
|
replaced with your own identifying information. (Don't include
|
||||||
|
the brackets!) The text should be enclosed in the appropriate
|
||||||
|
comment syntax for the file format. We also recommend that a
|
||||||
|
file or class name and description of purpose be included on the
|
||||||
|
same "printed page" as the copyright notice for easier
|
||||||
|
identification within third-party archives.
|
||||||
|
|
||||||
|
Copyright [yyyy] [name of copyright owner]
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
13
README.md
13
README.md
|
@ -1,12 +1,3 @@
|
||||||
# datafusion-sql
|
# SQL Parser
|
||||||
|
|
||||||
This is a work-in-progress to develop a new version of the DataFusion SQL Parser.
|
|
||||||
|
|
||||||
Goals for this version:
|
|
||||||
|
|
||||||
- Support for custom SQL dialects, so other projects can implement their own parsers easily
|
|
||||||
- Good error reporting (e.g. show line / column numbers and descriptive messages)
|
|
||||||
- Zero-copy of tokens when parsing
|
|
||||||
- Concise code
|
|
||||||
- Full support for ANSI SQL:2011 standard
|
|
||||||
|
|
||||||
|
TBD
|
|
@ -1,20 +0,0 @@
|
||||||
use std::sync::{Arc, Mutex};
|
|
||||||
|
|
||||||
extern crate datafusion_sql;
|
|
||||||
|
|
||||||
use datafusion_sql::ansi::tokenizer::ANSISQLTokenizer;
|
|
||||||
use datafusion_sql::ansi::parser::ANSISQLParser;
|
|
||||||
use datafusion_sql::tokenizer::*;
|
|
||||||
use datafusion_sql::parser::*;
|
|
||||||
|
|
||||||
|
|
||||||
fn main() {
|
|
||||||
|
|
||||||
let sql = "SELECT 1 + 1";
|
|
||||||
|
|
||||||
// Create parsers
|
|
||||||
match ANSISQLParser::parse(sql).unwrap() {
|
|
||||||
Some(ast) => println!("{:?}", ast),
|
|
||||||
_ => {}
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,3 +0,0 @@
|
||||||
|
|
||||||
pub mod tokenizer;
|
|
||||||
pub mod parser;
|
|
|
@ -1,70 +0,0 @@
|
||||||
use std::cmp::PartialEq;
|
|
||||||
use std::fmt::Debug;
|
|
||||||
//use std::rc::Rc;
|
|
||||||
//use std::sync::{Arc, Mutex};
|
|
||||||
|
|
||||||
use super::tokenizer::ANSISQLTokenizer;
|
|
||||||
use super::super::tokenizer::*;
|
|
||||||
use super::super::parser::*;
|
|
||||||
|
|
||||||
pub struct ANSISQLParser {
|
|
||||||
tokenizer: Box<SQLTokenizer>
|
|
||||||
}
|
|
||||||
|
|
||||||
impl ANSISQLParser where {
|
|
||||||
|
|
||||||
pub fn parse(sql: &str) -> Result<Option<Box<SQLExpr>>, ParserError> {
|
|
||||||
let mut parser = ANSISQLParser { tokenizer: Box::new(ANSISQLTokenizer::new(sql)) };
|
|
||||||
parser.parse_expr()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl SQLParser for ANSISQLParser {
|
|
||||||
|
|
||||||
fn parse_expr(&mut self) -> Result<Option<Box<SQLExpr>>, ParserError> {
|
|
||||||
|
|
||||||
let precedence: usize = 0;
|
|
||||||
|
|
||||||
let mut e = self.parse_prefix()?;
|
|
||||||
|
|
||||||
match e {
|
|
||||||
Some(mut expr) => {
|
|
||||||
while let Some(token) = self.tokenizer.peek_token()? {
|
|
||||||
let next_precedence = self.tokenizer.precedence(&token);
|
|
||||||
|
|
||||||
if precedence >= next_precedence {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
expr = self.parse_infix(&expr, next_precedence)?.unwrap(); //TODO: fix me
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(Some(expr))
|
|
||||||
}
|
|
||||||
_ => {
|
|
||||||
Ok(None)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
fn parse_prefix(&mut self) -> Result<Option<Box<SQLExpr>>, ParserError> {
|
|
||||||
|
|
||||||
match self.tokenizer.next_token()? {
|
|
||||||
Some(SQLToken::Keyword(ref k)) => match k.to_uppercase().as_ref() {
|
|
||||||
"INSERT" => unimplemented!(),
|
|
||||||
"UPDATE" => unimplemented!(),
|
|
||||||
"DELETE" => unimplemented!(),
|
|
||||||
"SELECT" => unimplemented!(),
|
|
||||||
"CREATE" => unimplemented!(),
|
|
||||||
_ => unimplemented!()
|
|
||||||
},
|
|
||||||
_ => unimplemented!()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn parse_infix(&mut self, _left: &SQLExpr, _precedence: usize) -> Result<Option<Box<SQLExpr>>, ParserError> {
|
|
||||||
unimplemented!()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
|
@ -1,56 +0,0 @@
|
||||||
use std::cmp::PartialEq;
|
|
||||||
use std::fmt::Debug;
|
|
||||||
|
|
||||||
use super::super::tokenizer::*;
|
|
||||||
|
|
||||||
pub struct ANSISQLTokenizer {
|
|
||||||
chars: CharSeq
|
|
||||||
}
|
|
||||||
|
|
||||||
impl ANSISQLTokenizer {
|
|
||||||
pub fn new(sql: &str) -> Self {
|
|
||||||
ANSISQLTokenizer { chars: CharSeq::new(sql) }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl SQLTokenizer for ANSISQLTokenizer {
|
|
||||||
|
|
||||||
fn precedence(&self, _token: &SQLToken) -> usize {
|
|
||||||
unimplemented!()
|
|
||||||
}
|
|
||||||
|
|
||||||
fn peek_token(&mut self) -> Result<Option<SQLToken>, TokenizerError> {
|
|
||||||
unimplemented!()
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
fn next_token(&mut self) -> Result<Option<SQLToken>, TokenizerError> {
|
|
||||||
match self.chars.next() {
|
|
||||||
Some(ch) => match ch {
|
|
||||||
' ' | '\t' | '\n' => Ok(Some(SQLToken::Whitespace(ch))),
|
|
||||||
'0' ... '9' => {
|
|
||||||
let mut s = String::new();
|
|
||||||
s.push(ch);
|
|
||||||
while let Some(&ch) = self.chars.peek() {
|
|
||||||
match ch {
|
|
||||||
'0' ... '9' => {
|
|
||||||
self.chars.next(); // consume
|
|
||||||
s.push(ch);
|
|
||||||
},
|
|
||||||
_ => break
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Ok(Some(SQLToken::Literal(s)))
|
|
||||||
},
|
|
||||||
'+' => Ok(Some(SQLToken::Plus)),
|
|
||||||
'-' => Ok(Some(SQLToken::Minus)),
|
|
||||||
'*' => Ok(Some(SQLToken::Mult)),
|
|
||||||
'/' => Ok(Some(SQLToken::Divide)),
|
|
||||||
_ => Err(TokenizerError::UnexpectedChar(ch,Position::new(0, 0)))
|
|
||||||
},
|
|
||||||
None => Ok(None)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
25
src/lib.rs
25
src/lib.rs
|
@ -1,3 +1,22 @@
|
||||||
pub mod ansi;
|
// Copyright 2018 Grove Enterprises LLC
|
||||||
pub mod tokenizer;
|
//
|
||||||
pub mod parser;
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
extern crate fnv;
|
||||||
|
|
||||||
|
#[macro_use]
|
||||||
|
extern crate lazy_static;
|
||||||
|
|
||||||
|
pub mod sqlast;
|
||||||
|
pub mod sqlparser;
|
||||||
|
pub mod sqltokenizer;
|
||||||
|
|
106
src/parser.rs
106
src/parser.rs
|
@ -1,106 +0,0 @@
|
||||||
use std::cmp::PartialEq;
|
|
||||||
use std::fmt::Debug;
|
|
||||||
|
|
||||||
use super::tokenizer::*;
|
|
||||||
|
|
||||||
// https://jakewheat.github.io/sql-overview/sql-2011-foundation-grammar.html
|
|
||||||
|
|
||||||
/// ANSI SQL:2011 Data Types
|
|
||||||
#[derive(Debug)]
|
|
||||||
pub enum SQLDataType {
|
|
||||||
/// BOOLEAN
|
|
||||||
Boolean,
|
|
||||||
/// NUMERIC, DECIMAL, DEC
|
|
||||||
Numeric { precision: usize, scale: Option<usize> },
|
|
||||||
/// SMALLINT
|
|
||||||
SmallInt,
|
|
||||||
/// INT, INTEGER
|
|
||||||
Int,
|
|
||||||
/// BIGINT
|
|
||||||
BigInt,
|
|
||||||
/// Floating point: `FLOAT(precision)`
|
|
||||||
Float(usize),
|
|
||||||
/// REAL
|
|
||||||
Real,
|
|
||||||
/// Double: `DOUBLE PRECISION`
|
|
||||||
Double,
|
|
||||||
/// Fixed-length character. `CHAR, CHARACTER`
|
|
||||||
Char(usize),
|
|
||||||
/// Variable-length character: `VARCHAR, CHARACTER VARYING, CHAR VARYING`
|
|
||||||
VarChar(usize),
|
|
||||||
/// Character Large Object: `CHARACTER LARGE OBJECT, CHAR LARGE OBJECT, CLOB`
|
|
||||||
Clob(usize),
|
|
||||||
/// Fixed-length character. `NCHAR, NATIONAL CHAR, NATIONAL CHARACTER`
|
|
||||||
NChar(usize),
|
|
||||||
/// Variable-length character: `NCHAR VARYING, NATIONAL CHARACTER VARYING, NATIONAL CHAR VARYING`
|
|
||||||
NVarChar(usize),
|
|
||||||
/// National Character Large Object: `NATIONAL CHARACTER LARGE OBJECT, NCHAR LARGE OBJECT, NCLOB`
|
|
||||||
NClob(usize),
|
|
||||||
/// Fixed-length binary
|
|
||||||
Binary(usize),
|
|
||||||
/// Variable-length binary
|
|
||||||
VarBinary(usize),
|
|
||||||
/// Binary large object
|
|
||||||
Blob(usize),
|
|
||||||
/// Date
|
|
||||||
Date,
|
|
||||||
/// Time: `TIME [(precision)] [WITH TIME ZONE | WITHOUT TIME ZONE]`
|
|
||||||
Time { precision: usize, tz: bool },
|
|
||||||
/// Time: `TIMESTAMP [(precision)] [WITH TIME ZONE | WITHOUT TIME ZONE]`
|
|
||||||
Timestamp { precision: usize, tz: bool },
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#[derive(Debug)]
|
|
||||||
pub enum SQLOperator {
|
|
||||||
Plus,
|
|
||||||
Minus,
|
|
||||||
Mult,
|
|
||||||
Div,
|
|
||||||
Eq,
|
|
||||||
Gt,
|
|
||||||
GtEq,
|
|
||||||
Lt,
|
|
||||||
LtEq,
|
|
||||||
}
|
|
||||||
|
|
||||||
/// SQL Expressions
|
|
||||||
#[derive(Debug)]
|
|
||||||
pub enum SQLExpr{
|
|
||||||
/// Identifier e.g. table name or column name
|
|
||||||
Identifier(String),
|
|
||||||
/// Literal value
|
|
||||||
Literal(String),
|
|
||||||
/// Binary expression e.g. `1 + 2` or `fname LIKE "A%"`
|
|
||||||
Binary(Box<SQLExpr>, SQLOperator, Box<SQLExpr>),
|
|
||||||
/// Function invocation with function name and list of argument expressions
|
|
||||||
FunctionCall(String, Vec<SQLExpr>),
|
|
||||||
Insert,
|
|
||||||
Update,
|
|
||||||
Delete,
|
|
||||||
Select,
|
|
||||||
CreateTable,
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug)]
|
|
||||||
pub enum ParserError {
|
|
||||||
WrongToken { expected: Vec<SQLToken>, actual: SQLToken, line: usize, col: usize },
|
|
||||||
Custom(String)
|
|
||||||
}
|
|
||||||
|
|
||||||
impl From<TokenizerError> for ParserError {
|
|
||||||
fn from(e: TokenizerError) -> Self {
|
|
||||||
ParserError::Custom(format!("{:?}", e))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
pub trait SQLParser {
|
|
||||||
fn parse_expr(&mut self) -> Result<Option<Box<SQLExpr>>, ParserError>;
|
|
||||||
/// parse the prefix and stop once an infix operator is reached
|
|
||||||
fn parse_prefix(&mut self) -> Result<Option<Box<SQLExpr>>, ParserError> ;
|
|
||||||
/// parse the next infix expression, returning None if the precedence has changed
|
|
||||||
fn parse_infix(&mut self, left: &SQLExpr, precedence: usize) -> Result<Option<Box<SQLExpr>>, ParserError>;
|
|
||||||
}
|
|
||||||
|
|
122
src/sqlast.rs
Normal file
122
src/sqlast.rs
Normal file
|
@ -0,0 +1,122 @@
|
||||||
|
// Copyright 2018 Grove Enterprises LLC
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
//! SQL Abstract Syntax Tree (AST) types
|
||||||
|
|
||||||
|
/// Supported file types for `CREATE EXTERNAL TABLE`
|
||||||
|
#[derive(Debug, Clone, PartialEq)]
|
||||||
|
pub enum FileType {
|
||||||
|
CSV,
|
||||||
|
NdJson,
|
||||||
|
Parquet,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// SQL Abstract Syntax Tree (AST)
|
||||||
|
#[derive(Debug, Clone, PartialEq)]
|
||||||
|
pub enum ASTNode {
|
||||||
|
SQLIdentifier(String),
|
||||||
|
SQLWildcard,
|
||||||
|
SQLCompoundIdentifier(Vec<String>),
|
||||||
|
SQLIsNull(Box<ASTNode>),
|
||||||
|
SQLIsNotNull(Box<ASTNode>),
|
||||||
|
SQLBinaryExpr {
|
||||||
|
left: Box<ASTNode>,
|
||||||
|
op: SQLOperator,
|
||||||
|
right: Box<ASTNode>,
|
||||||
|
},
|
||||||
|
SQLCast {
|
||||||
|
expr: Box<ASTNode>,
|
||||||
|
data_type: SQLType,
|
||||||
|
},
|
||||||
|
SQLNested(Box<ASTNode>),
|
||||||
|
SQLUnary {
|
||||||
|
operator: SQLOperator,
|
||||||
|
rex: Box<ASTNode>,
|
||||||
|
},
|
||||||
|
SQLLiteralLong(i64),
|
||||||
|
SQLLiteralDouble(f64),
|
||||||
|
SQLLiteralString(String),
|
||||||
|
SQLFunction {
|
||||||
|
id: String,
|
||||||
|
args: Vec<ASTNode>,
|
||||||
|
},
|
||||||
|
SQLOrderBy {
|
||||||
|
expr: Box<ASTNode>,
|
||||||
|
asc: bool,
|
||||||
|
},
|
||||||
|
SQLSelect {
|
||||||
|
projection: Vec<ASTNode>,
|
||||||
|
relation: Option<Box<ASTNode>>,
|
||||||
|
selection: Option<Box<ASTNode>>,
|
||||||
|
order_by: Option<Vec<ASTNode>>,
|
||||||
|
group_by: Option<Vec<ASTNode>>,
|
||||||
|
having: Option<Box<ASTNode>>,
|
||||||
|
limit: Option<Box<ASTNode>>,
|
||||||
|
},
|
||||||
|
SQLCreateTable {
|
||||||
|
/// Table name
|
||||||
|
name: String,
|
||||||
|
/// Optional schema
|
||||||
|
columns: Vec<SQLColumnDef>,
|
||||||
|
/// File type (CSV or Parquet)
|
||||||
|
file_type: FileType,
|
||||||
|
/// For CSV files, indicate whether the file has a header row or not
|
||||||
|
header_row: bool,
|
||||||
|
/// Path to file or directory contianing files
|
||||||
|
location: String,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
/// SQL column definition
|
||||||
|
#[derive(Debug, Clone, PartialEq)]
|
||||||
|
pub struct SQLColumnDef {
|
||||||
|
pub name: String,
|
||||||
|
pub data_type: SQLType,
|
||||||
|
pub allow_null: bool,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// SQL datatypes for literals in SQL statements
|
||||||
|
#[derive(Debug, Clone, PartialEq)]
|
||||||
|
pub enum SQLType {
|
||||||
|
Boolean,
|
||||||
|
UInt8,
|
||||||
|
UInt16,
|
||||||
|
UInt32,
|
||||||
|
UInt64,
|
||||||
|
Int8,
|
||||||
|
Int16,
|
||||||
|
Int32,
|
||||||
|
Int64,
|
||||||
|
Float32,
|
||||||
|
Double64,
|
||||||
|
Utf8(usize),
|
||||||
|
}
|
||||||
|
|
||||||
|
/// SQL Operator
|
||||||
|
#[derive(Debug, PartialEq, Clone)]
|
||||||
|
pub enum SQLOperator {
|
||||||
|
Plus,
|
||||||
|
Minus,
|
||||||
|
Multiply,
|
||||||
|
Divide,
|
||||||
|
Modulus,
|
||||||
|
Gt,
|
||||||
|
Lt,
|
||||||
|
GtEq,
|
||||||
|
LtEq,
|
||||||
|
Eq,
|
||||||
|
NotEq,
|
||||||
|
And,
|
||||||
|
Or,
|
||||||
|
}
|
971
src/sqlparser.rs
Normal file
971
src/sqlparser.rs
Normal file
|
@ -0,0 +1,971 @@
|
||||||
|
// Copyright 2018 Grove Enterprises LLC
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
//! SQL Parser
|
||||||
|
|
||||||
|
use super::sqlast::*;
|
||||||
|
use super::sqltokenizer::*;
|
||||||
|
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
pub enum ParserError {
|
||||||
|
TokenizerError(String),
|
||||||
|
ParserError(String),
|
||||||
|
}
|
||||||
|
|
||||||
|
macro_rules! parser_err {
|
||||||
|
($MSG:expr) => {
|
||||||
|
Err(ParserError::ParserError($MSG.to_string()))
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<TokenizerError> for ParserError {
|
||||||
|
fn from(e: TokenizerError) -> Self {
|
||||||
|
ParserError::TokenizerError(format!("{:?}", e))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// SQL Parser
|
||||||
|
pub struct Parser {
|
||||||
|
tokens: Vec<Token>,
|
||||||
|
index: usize,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Parser {
|
||||||
|
/// Parse the specified tokens
|
||||||
|
pub fn new(tokens: Vec<Token>) -> Self {
|
||||||
|
Parser {
|
||||||
|
tokens: tokens,
|
||||||
|
index: 0,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Parse a SQL statement and produce an Abstract Syntax Tree (AST)
|
||||||
|
pub fn parse_sql(sql: String) -> Result<ASTNode, ParserError> {
|
||||||
|
let mut tokenizer = Tokenizer::new(&sql);
|
||||||
|
let tokens = tokenizer.tokenize()?;
|
||||||
|
let mut parser = Parser::new(tokens);
|
||||||
|
parser.parse()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Parse a new expression
|
||||||
|
pub fn parse(&mut self) -> Result<ASTNode, ParserError> {
|
||||||
|
self.parse_expr(0)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Parse tokens until the precedence changes
|
||||||
|
fn parse_expr(&mut self, precedence: u8) -> Result<ASTNode, ParserError> {
|
||||||
|
// println!("parse_expr() precendence = {}", precedence);
|
||||||
|
|
||||||
|
let mut expr = self.parse_prefix()?;
|
||||||
|
// println!("parsed prefix: {:?}", expr);
|
||||||
|
|
||||||
|
loop {
|
||||||
|
let next_precedence = self.get_next_precedence()?;
|
||||||
|
if precedence >= next_precedence {
|
||||||
|
// println!("break on precedence change ({} >= {})", precedence, next_precedence);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Some(infix_expr) = self.parse_infix(expr.clone(), next_precedence)? {
|
||||||
|
// println!("parsed infix: {:?}", infix_expr);
|
||||||
|
expr = infix_expr;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// println!("parse_expr() returning {:?}", expr);
|
||||||
|
|
||||||
|
Ok(expr)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Parse an expression prefix
|
||||||
|
fn parse_prefix(&mut self) -> Result<ASTNode, ParserError> {
|
||||||
|
match self.next_token() {
|
||||||
|
Some(t) => {
|
||||||
|
match t {
|
||||||
|
Token::Keyword(k) => match k.to_uppercase().as_ref() {
|
||||||
|
"SELECT" => Ok(self.parse_select()?),
|
||||||
|
"CREATE" => Ok(self.parse_create()?),
|
||||||
|
_ => return parser_err!(format!("No prefix parser for keyword {}", k)),
|
||||||
|
},
|
||||||
|
Token::Mult => Ok(ASTNode::SQLWildcard),
|
||||||
|
Token::Identifier(id) => {
|
||||||
|
match self.peek_token() {
|
||||||
|
Some(Token::LParen) => {
|
||||||
|
self.next_token(); // skip lparen
|
||||||
|
match id.to_uppercase().as_ref() {
|
||||||
|
"CAST" => self.parse_cast_expression(),
|
||||||
|
_ => {
|
||||||
|
let args = self.parse_expr_list()?;
|
||||||
|
self.next_token(); // skip rparen
|
||||||
|
Ok(ASTNode::SQLFunction { id, args })
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Some(Token::Period) => {
|
||||||
|
let mut id_parts: Vec<String> = vec![id];
|
||||||
|
while self.peek_token() == Some(Token::Period) {
|
||||||
|
self.consume_token(&Token::Period)?;
|
||||||
|
match self.next_token() {
|
||||||
|
Some(Token::Identifier(id)) => id_parts.push(id),
|
||||||
|
_ => {
|
||||||
|
return parser_err!(format!(
|
||||||
|
"Error parsing compound identifier"
|
||||||
|
))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(ASTNode::SQLCompoundIdentifier(id_parts))
|
||||||
|
}
|
||||||
|
_ => Ok(ASTNode::SQLIdentifier(id)),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Token::Number(ref n) if n.contains(".") => match n.parse::<f64>() {
|
||||||
|
Ok(n) => Ok(ASTNode::SQLLiteralDouble(n)),
|
||||||
|
Err(e) => parser_err!(format!("Could not parse '{}' as i64: {}", n, e)),
|
||||||
|
},
|
||||||
|
Token::Number(ref n) => match n.parse::<i64>() {
|
||||||
|
Ok(n) => Ok(ASTNode::SQLLiteralLong(n)),
|
||||||
|
Err(e) => parser_err!(format!("Could not parse '{}' as i64: {}", n, e)),
|
||||||
|
},
|
||||||
|
Token::String(ref s) => Ok(ASTNode::SQLLiteralString(s.to_string())),
|
||||||
|
_ => parser_err!(format!(
|
||||||
|
"Prefix parser expected a keyword but found {:?}",
|
||||||
|
t
|
||||||
|
)),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
None => parser_err!(format!("Prefix parser expected a keyword but hit EOF")),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Parse a SQL CAST function e.g. `CAST(expr AS FLOAT)`
|
||||||
|
fn parse_cast_expression(&mut self) -> Result<ASTNode, ParserError> {
|
||||||
|
let expr = self.parse_expr(0)?;
|
||||||
|
self.consume_token(&Token::Keyword("AS".to_string()))?;
|
||||||
|
let data_type = self.parse_data_type()?;
|
||||||
|
self.consume_token(&Token::RParen)?;
|
||||||
|
Ok(ASTNode::SQLCast {
|
||||||
|
expr: Box::new(expr),
|
||||||
|
data_type,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Parse an expression infix (typically an operator)
|
||||||
|
fn parse_infix(
|
||||||
|
&mut self,
|
||||||
|
expr: ASTNode,
|
||||||
|
precedence: u8,
|
||||||
|
) -> Result<Option<ASTNode>, ParserError> {
|
||||||
|
match self.next_token() {
|
||||||
|
Some(tok) => match tok {
|
||||||
|
Token::Keyword(ref k) => if k == "IS" {
|
||||||
|
if self.parse_keywords(vec!["NULL"]) {
|
||||||
|
Ok(Some(ASTNode::SQLIsNull(Box::new(expr))))
|
||||||
|
} else if self.parse_keywords(vec!["NOT", "NULL"]) {
|
||||||
|
Ok(Some(ASTNode::SQLIsNotNull(Box::new(expr))))
|
||||||
|
} else {
|
||||||
|
parser_err!("Invalid tokens after IS")
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
Ok(Some(ASTNode::SQLBinaryExpr {
|
||||||
|
left: Box::new(expr),
|
||||||
|
op: self.to_sql_operator(&tok)?,
|
||||||
|
right: Box::new(self.parse_expr(precedence)?),
|
||||||
|
}))
|
||||||
|
},
|
||||||
|
Token::Eq
|
||||||
|
| Token::Neq
|
||||||
|
| Token::Gt
|
||||||
|
| Token::GtEq
|
||||||
|
| Token::Lt
|
||||||
|
| Token::LtEq
|
||||||
|
| Token::Plus
|
||||||
|
| Token::Minus
|
||||||
|
| Token::Mult
|
||||||
|
| Token::Mod
|
||||||
|
| Token::Div => Ok(Some(ASTNode::SQLBinaryExpr {
|
||||||
|
left: Box::new(expr),
|
||||||
|
op: self.to_sql_operator(&tok)?,
|
||||||
|
right: Box::new(self.parse_expr(precedence)?),
|
||||||
|
})),
|
||||||
|
_ => parser_err!(format!("No infix parser for token {:?}", tok)),
|
||||||
|
},
|
||||||
|
None => Ok(None),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Convert a token operator to an AST operator
|
||||||
|
fn to_sql_operator(&self, tok: &Token) -> Result<SQLOperator, ParserError> {
|
||||||
|
match tok {
|
||||||
|
&Token::Eq => Ok(SQLOperator::Eq),
|
||||||
|
&Token::Neq => Ok(SQLOperator::NotEq),
|
||||||
|
&Token::Lt => Ok(SQLOperator::Lt),
|
||||||
|
&Token::LtEq => Ok(SQLOperator::LtEq),
|
||||||
|
&Token::Gt => Ok(SQLOperator::Gt),
|
||||||
|
&Token::GtEq => Ok(SQLOperator::GtEq),
|
||||||
|
&Token::Plus => Ok(SQLOperator::Plus),
|
||||||
|
&Token::Minus => Ok(SQLOperator::Minus),
|
||||||
|
&Token::Mult => Ok(SQLOperator::Multiply),
|
||||||
|
&Token::Div => Ok(SQLOperator::Divide),
|
||||||
|
&Token::Mod => Ok(SQLOperator::Modulus),
|
||||||
|
&Token::Keyword(ref k) if k == "AND" => Ok(SQLOperator::And),
|
||||||
|
&Token::Keyword(ref k) if k == "OR" => Ok(SQLOperator::Or),
|
||||||
|
_ => parser_err!(format!("Unsupported SQL operator {:?}", tok)),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get the precedence of the next token
|
||||||
|
fn get_next_precedence(&self) -> Result<u8, ParserError> {
|
||||||
|
if self.index < self.tokens.len() {
|
||||||
|
self.get_precedence(&self.tokens[self.index])
|
||||||
|
} else {
|
||||||
|
Ok(0)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get the precedence of a token
|
||||||
|
fn get_precedence(&self, tok: &Token) -> Result<u8, ParserError> {
|
||||||
|
//println!("get_precedence() {:?}", tok);
|
||||||
|
|
||||||
|
match tok {
|
||||||
|
&Token::Keyword(ref k) if k == "OR" => Ok(5),
|
||||||
|
&Token::Keyword(ref k) if k == "AND" => Ok(10),
|
||||||
|
&Token::Keyword(ref k) if k == "IS" => Ok(15),
|
||||||
|
&Token::Eq | &Token::Lt | &Token::LtEq | &Token::Neq | &Token::Gt | &Token::GtEq => {
|
||||||
|
Ok(20)
|
||||||
|
}
|
||||||
|
&Token::Plus | &Token::Minus => Ok(30),
|
||||||
|
&Token::Mult | &Token::Div | &Token::Mod => Ok(40),
|
||||||
|
_ => Ok(0),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Peek at the next token
|
||||||
|
fn peek_token(&mut self) -> Option<Token> {
|
||||||
|
if self.index < self.tokens.len() {
|
||||||
|
Some(self.tokens[self.index].clone())
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get the next token and increment the token index
|
||||||
|
fn next_token(&mut self) -> Option<Token> {
|
||||||
|
if self.index < self.tokens.len() {
|
||||||
|
self.index = self.index + 1;
|
||||||
|
Some(self.tokens[self.index - 1].clone())
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get the previous token and decrement the token index
|
||||||
|
fn prev_token(&mut self) -> Option<Token> {
|
||||||
|
if self.index > 0 {
|
||||||
|
Some(self.tokens[self.index - 1].clone())
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Look for an expected keyword and consume it if it exists
|
||||||
|
fn parse_keyword(&mut self, expected: &'static str) -> bool {
|
||||||
|
match self.peek_token() {
|
||||||
|
Some(Token::Keyword(k)) => {
|
||||||
|
if expected.eq_ignore_ascii_case(k.as_str()) {
|
||||||
|
self.next_token();
|
||||||
|
true
|
||||||
|
} else {
|
||||||
|
false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ => false,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Look for an expected sequence of keywords and consume them if they exist
|
||||||
|
fn parse_keywords(&mut self, keywords: Vec<&'static str>) -> bool {
|
||||||
|
let index = self.index;
|
||||||
|
for keyword in keywords {
|
||||||
|
//println!("parse_keywords aborting .. expecting {}", keyword);
|
||||||
|
if !self.parse_keyword(&keyword) {
|
||||||
|
//println!("parse_keywords aborting .. did not find {}", keyword);
|
||||||
|
// reset index and return immediately
|
||||||
|
self.index = index;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
true
|
||||||
|
}
|
||||||
|
|
||||||
|
// fn parse_identifier(&mut self) -> Result<ASTNode::SQLIdentifier, Err> {
|
||||||
|
// let expr = self.parse_expr()?;
|
||||||
|
// match expr {
|
||||||
|
// Some(ASTNode::SQLIdentifier { .. }) => Ok(expr),
|
||||||
|
// _ => parser_err!(format!("Expected identifier but found {:?}", expr)))
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
|
||||||
|
/// Consume the next token if it matches the expected token, otherwise return an error
|
||||||
|
fn consume_token(&mut self, expected: &Token) -> Result<bool, ParserError> {
|
||||||
|
match self.peek_token() {
|
||||||
|
Some(ref t) => if *t == *expected {
|
||||||
|
self.next_token();
|
||||||
|
Ok(true)
|
||||||
|
} else {
|
||||||
|
Ok(false)
|
||||||
|
},
|
||||||
|
_ => parser_err!(format!(
|
||||||
|
"expected token {:?} but was {:?}",
|
||||||
|
expected,
|
||||||
|
self.prev_token()
|
||||||
|
)),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Parse a SQL CREATE statement
|
||||||
|
fn parse_create(&mut self) -> Result<ASTNode, ParserError> {
|
||||||
|
if self.parse_keywords(vec!["EXTERNAL", "TABLE"]) {
|
||||||
|
match self.next_token() {
|
||||||
|
Some(Token::Identifier(id)) => {
|
||||||
|
// parse optional column list (schema)
|
||||||
|
let mut columns = vec![];
|
||||||
|
if self.consume_token(&Token::LParen)? {
|
||||||
|
loop {
|
||||||
|
if let Some(Token::Identifier(column_name)) = self.next_token() {
|
||||||
|
if let Ok(data_type) = self.parse_data_type() {
|
||||||
|
let allow_null = if self.parse_keywords(vec!["NOT", "NULL"]) {
|
||||||
|
false
|
||||||
|
} else if self.parse_keyword("NULL") {
|
||||||
|
true
|
||||||
|
} else {
|
||||||
|
true
|
||||||
|
};
|
||||||
|
|
||||||
|
match self.peek_token() {
|
||||||
|
Some(Token::Comma) => {
|
||||||
|
self.next_token();
|
||||||
|
columns.push(SQLColumnDef {
|
||||||
|
name: column_name,
|
||||||
|
data_type: data_type,
|
||||||
|
allow_null,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
Some(Token::RParen) => {
|
||||||
|
self.next_token();
|
||||||
|
columns.push(SQLColumnDef {
|
||||||
|
name: column_name,
|
||||||
|
data_type: data_type,
|
||||||
|
allow_null,
|
||||||
|
});
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
_ => {
|
||||||
|
return parser_err!(
|
||||||
|
"Expected ',' or ')' after column definition"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
return parser_err!(
|
||||||
|
"Error parsing data type in column definition"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
return parser_err!("Error parsing column name");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
//println!("Parsed {} column defs", columns.len());
|
||||||
|
|
||||||
|
let mut headers = true;
|
||||||
|
let file_type: FileType = if self.parse_keywords(vec!["STORED", "AS", "CSV"]) {
|
||||||
|
if self.parse_keywords(vec!["WITH", "HEADER", "ROW"]) {
|
||||||
|
headers = true;
|
||||||
|
} else if self.parse_keywords(vec!["WITHOUT", "HEADER", "ROW"]) {
|
||||||
|
headers = false;
|
||||||
|
}
|
||||||
|
FileType::CSV
|
||||||
|
} else if self.parse_keywords(vec!["STORED", "AS", "NDJSON"]) {
|
||||||
|
FileType::NdJson
|
||||||
|
} else if self.parse_keywords(vec!["STORED", "AS", "PARQUET"]) {
|
||||||
|
FileType::Parquet
|
||||||
|
} else {
|
||||||
|
return parser_err!(format!(
|
||||||
|
"Expected 'STORED AS' clause, found {:?}",
|
||||||
|
self.peek_token()
|
||||||
|
));
|
||||||
|
};
|
||||||
|
|
||||||
|
let location: String = if self.parse_keywords(vec!["LOCATION"]) {
|
||||||
|
self.parse_literal_string()?
|
||||||
|
} else {
|
||||||
|
return parser_err!("Missing 'LOCATION' clause");
|
||||||
|
};
|
||||||
|
|
||||||
|
Ok(ASTNode::SQLCreateTable {
|
||||||
|
name: id,
|
||||||
|
columns,
|
||||||
|
file_type,
|
||||||
|
header_row: headers,
|
||||||
|
location,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
_ => parser_err!(format!(
|
||||||
|
"Unexpected token after CREATE EXTERNAL TABLE: {:?}",
|
||||||
|
self.peek_token()
|
||||||
|
)),
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
parser_err!(format!(
|
||||||
|
"Unexpected token after CREATE: {:?}",
|
||||||
|
self.peek_token()
|
||||||
|
))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Parse a literal integer/long
|
||||||
|
fn parse_literal_int(&mut self) -> Result<i64, ParserError> {
|
||||||
|
match self.next_token() {
|
||||||
|
Some(Token::Number(s)) => s.parse::<i64>().map_err(|e| {
|
||||||
|
ParserError::ParserError(format!("Could not parse '{}' as i64: {}", s, e))
|
||||||
|
}),
|
||||||
|
other => parser_err!(format!("Expected literal int, found {:?}", other)),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Parse a literal string
|
||||||
|
fn parse_literal_string(&mut self) -> Result<String, ParserError> {
|
||||||
|
match self.next_token() {
|
||||||
|
Some(Token::String(ref s)) => Ok(s.clone()),
|
||||||
|
other => parser_err!(format!("Expected literal string, found {:?}", other)),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Parse a SQL datatype (in the context of a CREATE TABLE statement for example)
|
||||||
|
fn parse_data_type(&mut self) -> Result<SQLType, ParserError> {
|
||||||
|
match self.next_token() {
|
||||||
|
Some(Token::Keyword(k)) => match k.to_uppercase().as_ref() {
|
||||||
|
"BOOLEAN" => Ok(SQLType::Boolean),
|
||||||
|
"UINT8" => Ok(SQLType::UInt8),
|
||||||
|
"UINT16" => Ok(SQLType::UInt16),
|
||||||
|
"UINT32" => Ok(SQLType::UInt32),
|
||||||
|
"UINT64" => Ok(SQLType::UInt64),
|
||||||
|
"INT8" => Ok(SQLType::Int8),
|
||||||
|
"INT16" => Ok(SQLType::Int16),
|
||||||
|
"INT32" | "INT" | "INTEGER" => Ok(SQLType::Int32),
|
||||||
|
"INT64" | "LONG" => Ok(SQLType::Int64),
|
||||||
|
"FLOAT32" | "FLOAT" => Ok(SQLType::Float32),
|
||||||
|
"FLOAT64" | "DOUBLE" => Ok(SQLType::Double64),
|
||||||
|
"UTF8" | "VARCHAR" | "STRING" => {
|
||||||
|
// optional length
|
||||||
|
if self.consume_token(&Token::LParen)? {
|
||||||
|
let n = self.parse_literal_int()?;
|
||||||
|
self.consume_token(&Token::RParen)?;
|
||||||
|
Ok(SQLType::Utf8(n as usize))
|
||||||
|
} else {
|
||||||
|
Ok(SQLType::Utf8(100 as usize))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ => parser_err!(format!("Invalid data type '{:?}'", k)),
|
||||||
|
},
|
||||||
|
other => parser_err!(format!("Invalid data type: '{:?}'", other)),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Parse a SELECT statement
|
||||||
|
fn parse_select(&mut self) -> Result<ASTNode, ParserError> {
|
||||||
|
let projection = self.parse_expr_list()?;
|
||||||
|
|
||||||
|
let relation: Option<Box<ASTNode>> = if self.parse_keyword("FROM") {
|
||||||
|
//TODO: add support for JOIN
|
||||||
|
Some(Box::new(self.parse_expr(0)?))
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
};
|
||||||
|
|
||||||
|
let selection = if self.parse_keyword("WHERE") {
|
||||||
|
Some(Box::new(self.parse_expr(0)?))
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
};
|
||||||
|
|
||||||
|
let group_by = if self.parse_keywords(vec!["GROUP", "BY"]) {
|
||||||
|
Some(self.parse_expr_list()?)
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
};
|
||||||
|
|
||||||
|
let having = if self.parse_keyword("HAVING") {
|
||||||
|
Some(Box::new(self.parse_expr(0)?))
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
};
|
||||||
|
|
||||||
|
let order_by = if self.parse_keywords(vec!["ORDER", "BY"]) {
|
||||||
|
Some(self.parse_order_by_expr_list()?)
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
};
|
||||||
|
|
||||||
|
let limit = if self.parse_keyword("LIMIT") {
|
||||||
|
self.parse_limit()?
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
};
|
||||||
|
|
||||||
|
if let Some(next_token) = self.peek_token() {
|
||||||
|
parser_err!(format!(
|
||||||
|
"Unexpected token at end of SELECT: {:?}",
|
||||||
|
next_token
|
||||||
|
))
|
||||||
|
} else {
|
||||||
|
Ok(ASTNode::SQLSelect {
|
||||||
|
projection,
|
||||||
|
selection,
|
||||||
|
relation,
|
||||||
|
limit,
|
||||||
|
order_by,
|
||||||
|
group_by,
|
||||||
|
having,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Parse a comma-delimited list of SQL expressions
|
||||||
|
fn parse_expr_list(&mut self) -> Result<Vec<ASTNode>, ParserError> {
|
||||||
|
let mut expr_list: Vec<ASTNode> = vec![];
|
||||||
|
loop {
|
||||||
|
expr_list.push(self.parse_expr(0)?);
|
||||||
|
if let Some(t) = self.peek_token() {
|
||||||
|
if t == Token::Comma {
|
||||||
|
self.next_token();
|
||||||
|
} else {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
//EOF
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(expr_list)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Parse a comma-delimited list of SQL ORDER BY expressions
|
||||||
|
fn parse_order_by_expr_list(&mut self) -> Result<Vec<ASTNode>, ParserError> {
|
||||||
|
let mut expr_list: Vec<ASTNode> = vec![];
|
||||||
|
loop {
|
||||||
|
let expr = self.parse_expr(0)?;
|
||||||
|
|
||||||
|
// look for optional ASC / DESC specifier
|
||||||
|
let asc = match self.peek_token() {
|
||||||
|
Some(Token::Keyword(k)) => {
|
||||||
|
self.next_token(); // consume it
|
||||||
|
match k.to_uppercase().as_ref() {
|
||||||
|
"ASC" => true,
|
||||||
|
"DESC" => false,
|
||||||
|
_ => {
|
||||||
|
return parser_err!(format!(
|
||||||
|
"Invalid modifier for ORDER BY expression: {:?}",
|
||||||
|
k
|
||||||
|
))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Some(Token::Comma) => true,
|
||||||
|
Some(other) => {
|
||||||
|
return parser_err!(format!("Unexpected token after ORDER BY expr: {:?}", other))
|
||||||
|
}
|
||||||
|
None => true,
|
||||||
|
};
|
||||||
|
|
||||||
|
expr_list.push(ASTNode::SQLOrderBy {
|
||||||
|
expr: Box::new(expr),
|
||||||
|
asc,
|
||||||
|
});
|
||||||
|
|
||||||
|
if let Some(t) = self.peek_token() {
|
||||||
|
if t == Token::Comma {
|
||||||
|
self.next_token();
|
||||||
|
} else {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// EOF
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(expr_list)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Parse a LIMIT clause
|
||||||
|
fn parse_limit(&mut self) -> Result<Option<Box<ASTNode>>, ParserError> {
|
||||||
|
if self.parse_keyword("ALL") {
|
||||||
|
Ok(None)
|
||||||
|
} else {
|
||||||
|
self.parse_literal_int()
|
||||||
|
.map(|n| Some(Box::new(ASTNode::SQLLiteralLong(n))))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn parse_simple_select() {
|
||||||
|
let sql = String::from("SELECT id, fname, lname FROM customer WHERE id = 1 LIMIT 5");
|
||||||
|
let ast = parse_sql(&sql);
|
||||||
|
match ast {
|
||||||
|
ASTNode::SQLSelect {
|
||||||
|
projection, limit, ..
|
||||||
|
} => {
|
||||||
|
assert_eq!(3, projection.len());
|
||||||
|
assert_eq!(Some(Box::new(ASTNode::SQLLiteralLong(5))), limit);
|
||||||
|
}
|
||||||
|
_ => assert!(false),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn parse_select_wildcard() {
|
||||||
|
let sql = String::from("SELECT * FROM customer");
|
||||||
|
let ast = parse_sql(&sql);
|
||||||
|
match ast {
|
||||||
|
ASTNode::SQLSelect { projection, .. } => {
|
||||||
|
assert_eq!(1, projection.len());
|
||||||
|
assert_eq!(ASTNode::SQLWildcard, projection[0]);
|
||||||
|
}
|
||||||
|
_ => assert!(false),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn parse_select_count_wildcard() {
|
||||||
|
let sql = String::from("SELECT COUNT(*) FROM customer");
|
||||||
|
let ast = parse_sql(&sql);
|
||||||
|
match ast {
|
||||||
|
ASTNode::SQLSelect { projection, .. } => {
|
||||||
|
assert_eq!(1, projection.len());
|
||||||
|
assert_eq!(
|
||||||
|
ASTNode::SQLFunction {
|
||||||
|
id: "COUNT".to_string(),
|
||||||
|
args: vec![ASTNode::SQLWildcard],
|
||||||
|
},
|
||||||
|
projection[0]
|
||||||
|
);
|
||||||
|
}
|
||||||
|
_ => assert!(false),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn parse_select_string_predicate() {
|
||||||
|
let sql = String::from(
|
||||||
|
"SELECT id, fname, lname FROM customer \
|
||||||
|
WHERE salary != 'Not Provided' AND salary != ''",
|
||||||
|
);
|
||||||
|
let _ast = parse_sql(&sql);
|
||||||
|
//TODO: add assertions
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn parse_projection_nested_type() {
|
||||||
|
let sql = String::from("SELECT customer.address.state FROM foo");
|
||||||
|
let _ast = parse_sql(&sql);
|
||||||
|
//TODO: add assertions
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn parse_compound_expr_1() {
|
||||||
|
use self::ASTNode::*;
|
||||||
|
use self::SQLOperator::*;
|
||||||
|
let sql = String::from("a + b * c");
|
||||||
|
let ast = parse_sql(&sql);
|
||||||
|
assert_eq!(
|
||||||
|
SQLBinaryExpr {
|
||||||
|
left: Box::new(SQLIdentifier("a".to_string())),
|
||||||
|
op: Plus,
|
||||||
|
right: Box::new(SQLBinaryExpr {
|
||||||
|
left: Box::new(SQLIdentifier("b".to_string())),
|
||||||
|
op: Multiply,
|
||||||
|
right: Box::new(SQLIdentifier("c".to_string()))
|
||||||
|
})
|
||||||
|
},
|
||||||
|
ast
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn parse_compound_expr_2() {
|
||||||
|
use self::ASTNode::*;
|
||||||
|
use self::SQLOperator::*;
|
||||||
|
let sql = String::from("a * b + c");
|
||||||
|
let ast = parse_sql(&sql);
|
||||||
|
assert_eq!(
|
||||||
|
SQLBinaryExpr {
|
||||||
|
left: Box::new(SQLBinaryExpr {
|
||||||
|
left: Box::new(SQLIdentifier("a".to_string())),
|
||||||
|
op: Multiply,
|
||||||
|
right: Box::new(SQLIdentifier("b".to_string()))
|
||||||
|
}),
|
||||||
|
op: Plus,
|
||||||
|
right: Box::new(SQLIdentifier("c".to_string()))
|
||||||
|
},
|
||||||
|
ast
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn parse_is_null() {
|
||||||
|
use self::ASTNode::*;
|
||||||
|
let sql = String::from("a IS NULL");
|
||||||
|
let ast = parse_sql(&sql);
|
||||||
|
assert_eq!(SQLIsNull(Box::new(SQLIdentifier("a".to_string()))), ast);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn parse_is_not_null() {
|
||||||
|
use self::ASTNode::*;
|
||||||
|
let sql = String::from("a IS NOT NULL");
|
||||||
|
let ast = parse_sql(&sql);
|
||||||
|
assert_eq!(SQLIsNotNull(Box::new(SQLIdentifier("a".to_string()))), ast);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn parse_select_order_by() {
|
||||||
|
let sql = String::from(
|
||||||
|
"SELECT id, fname, lname FROM customer WHERE id < 5 ORDER BY lname ASC, fname DESC",
|
||||||
|
);
|
||||||
|
let ast = parse_sql(&sql);
|
||||||
|
match ast {
|
||||||
|
ASTNode::SQLSelect { order_by, .. } => {
|
||||||
|
assert_eq!(
|
||||||
|
Some(vec![
|
||||||
|
ASTNode::SQLOrderBy {
|
||||||
|
expr: Box::new(ASTNode::SQLIdentifier("lname".to_string())),
|
||||||
|
asc: true,
|
||||||
|
},
|
||||||
|
ASTNode::SQLOrderBy {
|
||||||
|
expr: Box::new(ASTNode::SQLIdentifier("fname".to_string())),
|
||||||
|
asc: false,
|
||||||
|
},
|
||||||
|
]),
|
||||||
|
order_by
|
||||||
|
);
|
||||||
|
}
|
||||||
|
_ => assert!(false),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn parse_select_group_by() {
|
||||||
|
let sql = String::from("SELECT id, fname, lname FROM customer GROUP BY lname, fname");
|
||||||
|
let ast = parse_sql(&sql);
|
||||||
|
match ast {
|
||||||
|
ASTNode::SQLSelect { group_by, .. } => {
|
||||||
|
assert_eq!(
|
||||||
|
Some(vec![
|
||||||
|
ASTNode::SQLIdentifier("lname".to_string()),
|
||||||
|
ASTNode::SQLIdentifier("fname".to_string()),
|
||||||
|
]),
|
||||||
|
group_by
|
||||||
|
);
|
||||||
|
}
|
||||||
|
_ => assert!(false),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn parse_limit_accepts_all() {
|
||||||
|
let sql = String::from("SELECT id, fname, lname FROM customer WHERE id = 1 LIMIT ALL");
|
||||||
|
let ast = parse_sql(&sql);
|
||||||
|
match ast {
|
||||||
|
ASTNode::SQLSelect {
|
||||||
|
projection, limit, ..
|
||||||
|
} => {
|
||||||
|
assert_eq!(3, projection.len());
|
||||||
|
assert_eq!(None, limit);
|
||||||
|
}
|
||||||
|
_ => assert!(false),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn parse_cast() {
|
||||||
|
let sql = String::from("SELECT CAST(id AS DOUBLE) FROM customer");
|
||||||
|
let ast = parse_sql(&sql);
|
||||||
|
match ast {
|
||||||
|
ASTNode::SQLSelect { projection, .. } => {
|
||||||
|
assert_eq!(1, projection.len());
|
||||||
|
assert_eq!(
|
||||||
|
ASTNode::SQLCast {
|
||||||
|
expr: Box::new(ASTNode::SQLIdentifier("id".to_string())),
|
||||||
|
data_type: SQLType::Double64
|
||||||
|
},
|
||||||
|
projection[0]
|
||||||
|
);
|
||||||
|
}
|
||||||
|
_ => assert!(false),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn parse_create_external_table_csv_with_header_row() {
|
||||||
|
let sql = String::from(
|
||||||
|
"CREATE EXTERNAL TABLE uk_cities (\
|
||||||
|
name VARCHAR(100) NOT NULL,\
|
||||||
|
lat DOUBLE NULL,\
|
||||||
|
lng DOUBLE NULL) \
|
||||||
|
STORED AS CSV WITH HEADER ROW \
|
||||||
|
LOCATION '/mnt/ssd/uk_cities.csv'",
|
||||||
|
);
|
||||||
|
let ast = parse_sql(&sql);
|
||||||
|
match ast {
|
||||||
|
ASTNode::SQLCreateTable {
|
||||||
|
name,
|
||||||
|
columns,
|
||||||
|
file_type,
|
||||||
|
header_row,
|
||||||
|
location,
|
||||||
|
} => {
|
||||||
|
assert_eq!("uk_cities", name);
|
||||||
|
assert_eq!(3, columns.len());
|
||||||
|
assert_eq!(FileType::CSV, file_type);
|
||||||
|
assert_eq!(true, header_row);
|
||||||
|
assert_eq!("/mnt/ssd/uk_cities.csv", location);
|
||||||
|
|
||||||
|
let c_name = &columns[0];
|
||||||
|
assert_eq!("name", c_name.name);
|
||||||
|
assert_eq!(SQLType::Utf8(100), c_name.data_type);
|
||||||
|
assert_eq!(false, c_name.allow_null);
|
||||||
|
|
||||||
|
let c_lat = &columns[1];
|
||||||
|
assert_eq!("lat", c_lat.name);
|
||||||
|
assert_eq!(SQLType::Double64, c_lat.data_type);
|
||||||
|
assert_eq!(true, c_lat.allow_null);
|
||||||
|
|
||||||
|
let c_lng = &columns[2];
|
||||||
|
assert_eq!("lng", c_lng.name);
|
||||||
|
assert_eq!(SQLType::Double64, c_lng.data_type);
|
||||||
|
assert_eq!(true, c_lng.allow_null);
|
||||||
|
}
|
||||||
|
_ => assert!(false),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn parse_create_external_table_csv_without_header_row() {
|
||||||
|
let sql = String::from(
|
||||||
|
"CREATE EXTERNAL TABLE uk_cities (\
|
||||||
|
name VARCHAR(100) NOT NULL,\
|
||||||
|
lat DOUBLE NOT NULL,\
|
||||||
|
lng DOUBLE NOT NULL) \
|
||||||
|
STORED AS CSV WITHOUT HEADER ROW \
|
||||||
|
LOCATION '/mnt/ssd/uk_cities.csv'",
|
||||||
|
);
|
||||||
|
let ast = parse_sql(&sql);
|
||||||
|
match ast {
|
||||||
|
ASTNode::SQLCreateTable {
|
||||||
|
name,
|
||||||
|
columns,
|
||||||
|
file_type,
|
||||||
|
header_row,
|
||||||
|
location,
|
||||||
|
} => {
|
||||||
|
assert_eq!("uk_cities", name);
|
||||||
|
assert_eq!(3, columns.len());
|
||||||
|
assert_eq!(FileType::CSV, file_type);
|
||||||
|
assert_eq!(false, header_row);
|
||||||
|
assert_eq!("/mnt/ssd/uk_cities.csv", location);
|
||||||
|
}
|
||||||
|
_ => assert!(false),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn parse_create_external_table_parquet() {
|
||||||
|
let sql = String::from(
|
||||||
|
"CREATE EXTERNAL TABLE uk_cities \
|
||||||
|
STORED AS PARQUET \
|
||||||
|
LOCATION '/mnt/ssd/uk_cities.parquet'",
|
||||||
|
);
|
||||||
|
let ast = parse_sql(&sql);
|
||||||
|
match ast {
|
||||||
|
ASTNode::SQLCreateTable {
|
||||||
|
name,
|
||||||
|
columns,
|
||||||
|
file_type,
|
||||||
|
location,
|
||||||
|
..
|
||||||
|
} => {
|
||||||
|
assert_eq!("uk_cities", name);
|
||||||
|
assert_eq!(0, columns.len());
|
||||||
|
assert_eq!(FileType::Parquet, file_type);
|
||||||
|
assert_eq!("/mnt/ssd/uk_cities.parquet", location);
|
||||||
|
}
|
||||||
|
_ => assert!(false),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn parse_scalar_function_in_projection() {
|
||||||
|
let sql = String::from("SELECT sqrt(id) FROM foo");
|
||||||
|
let ast = parse_sql(&sql);
|
||||||
|
if let ASTNode::SQLSelect { projection, .. } = ast {
|
||||||
|
assert_eq!(
|
||||||
|
vec![ASTNode::SQLFunction {
|
||||||
|
id: String::from("sqrt"),
|
||||||
|
args: vec![ASTNode::SQLIdentifier(String::from("id"))],
|
||||||
|
}],
|
||||||
|
projection
|
||||||
|
);
|
||||||
|
} else {
|
||||||
|
assert!(false);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn parse_aggregate_with_group_by() {
|
||||||
|
let sql = String::from("SELECT a, COUNT(1), MIN(b), MAX(b) FROM foo GROUP BY a");
|
||||||
|
let _ast = parse_sql(&sql);
|
||||||
|
//TODO: assertions
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn parse_select_version() {
|
||||||
|
let sql = "SELECT @@version";
|
||||||
|
match parse_sql(&sql) {
|
||||||
|
ASTNode::SQLSelect { ref projection, .. } => {
|
||||||
|
assert_eq!(
|
||||||
|
projection[0],
|
||||||
|
ASTNode::SQLIdentifier("@@version".to_string())
|
||||||
|
);
|
||||||
|
}
|
||||||
|
_ => panic!(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_sql(sql: &str) -> ASTNode {
|
||||||
|
let mut tokenizer = Tokenizer::new(&sql);
|
||||||
|
let tokens = tokenizer.tokenize().unwrap();
|
||||||
|
let mut parser = Parser::new(tokens);
|
||||||
|
let ast = parser.parse().unwrap();
|
||||||
|
ast
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
427
src/sqltokenizer.rs
Normal file
427
src/sqltokenizer.rs
Normal file
|
@ -0,0 +1,427 @@
|
||||||
|
// Copyright 2018 Grove Enterprises LLC
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
//! SQL Tokenizer
|
||||||
|
|
||||||
|
use std::iter::Peekable;
|
||||||
|
use std::str::Chars;
|
||||||
|
|
||||||
|
use fnv::FnvHashSet;
|
||||||
|
|
||||||
|
/// SQL Token enumeration
|
||||||
|
#[derive(Debug, Clone, PartialEq)]
|
||||||
|
pub enum Token {
|
||||||
|
/// SQL identifier e.g. table or column name
|
||||||
|
Identifier(String),
|
||||||
|
/// SQL keyword e.g. Keyword("SELECT")
|
||||||
|
Keyword(String),
|
||||||
|
/// Numeric literal
|
||||||
|
Number(String),
|
||||||
|
/// String literal
|
||||||
|
String(String),
|
||||||
|
/// Comma
|
||||||
|
Comma,
|
||||||
|
/// Whitespace (space, tab, etc)
|
||||||
|
Whitespace,
|
||||||
|
/// Equality operator `=`
|
||||||
|
Eq,
|
||||||
|
/// Not Equals operator `!=` or `<>`
|
||||||
|
Neq,
|
||||||
|
/// Less Than operator `<`
|
||||||
|
Lt,
|
||||||
|
/// Greater han operator `>`
|
||||||
|
Gt,
|
||||||
|
/// Less Than Or Equals operator `<=`
|
||||||
|
LtEq,
|
||||||
|
/// Greater Than Or Equals operator `>=`
|
||||||
|
GtEq,
|
||||||
|
/// Plus operator `+`
|
||||||
|
Plus,
|
||||||
|
/// Minus operator `-`
|
||||||
|
Minus,
|
||||||
|
/// Multiplication operator `*`
|
||||||
|
Mult,
|
||||||
|
/// Division operator `/`
|
||||||
|
Div,
|
||||||
|
/// Modulo Operator `%`
|
||||||
|
Mod,
|
||||||
|
/// Left parenthesis `(`
|
||||||
|
LParen,
|
||||||
|
/// Right parenthesis `)`
|
||||||
|
RParen,
|
||||||
|
/// Period (used for compound identifiers or projections into nested types)
|
||||||
|
Period,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Tokenizer error
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub struct TokenizerError(String);
|
||||||
|
|
||||||
|
lazy_static! {
|
||||||
|
static ref KEYWORDS: FnvHashSet<&'static str> = {
|
||||||
|
let mut m = FnvHashSet::default();
|
||||||
|
|
||||||
|
m.insert("SELECT");
|
||||||
|
m.insert("FROM");
|
||||||
|
m.insert("WHERE");
|
||||||
|
m.insert("LIMIT");
|
||||||
|
m.insert("ORDER");
|
||||||
|
m.insert("GROUP");
|
||||||
|
m.insert("BY");
|
||||||
|
m.insert("HAVING");
|
||||||
|
m.insert("UNION");
|
||||||
|
m.insert("ALL");
|
||||||
|
m.insert("INSERT");
|
||||||
|
m.insert("UPDATE");
|
||||||
|
m.insert("DELETE");
|
||||||
|
m.insert("IN");
|
||||||
|
m.insert("IS");
|
||||||
|
m.insert("NULL");
|
||||||
|
m.insert("SET");
|
||||||
|
m.insert("CREATE");
|
||||||
|
m.insert("EXTERNAL");
|
||||||
|
m.insert("TABLE");
|
||||||
|
m.insert("ASC");
|
||||||
|
m.insert("DESC");
|
||||||
|
m.insert("AND");
|
||||||
|
m.insert("OR");
|
||||||
|
m.insert("NOT");
|
||||||
|
m.insert("AS");
|
||||||
|
m.insert("STORED");
|
||||||
|
m.insert("CSV");
|
||||||
|
m.insert("PARQUET");
|
||||||
|
m.insert("LOCATION");
|
||||||
|
m.insert("WITH");
|
||||||
|
m.insert("WITHOUT");
|
||||||
|
m.insert("HEADER");
|
||||||
|
m.insert("ROW");
|
||||||
|
|
||||||
|
// SQL types
|
||||||
|
m.insert("STRING");
|
||||||
|
m.insert("VARCHAR");
|
||||||
|
m.insert("FLOAT");
|
||||||
|
m.insert("DOUBLE");
|
||||||
|
m.insert("INT");
|
||||||
|
m.insert("INTEGER");
|
||||||
|
m.insert("LONG");
|
||||||
|
|
||||||
|
// Arrow native types
|
||||||
|
m.insert("BOOLEAN");
|
||||||
|
m.insert("UINT8");
|
||||||
|
m.insert("UINT16");
|
||||||
|
m.insert("UINT32");
|
||||||
|
m.insert("UINT64");
|
||||||
|
m.insert("INT8");
|
||||||
|
m.insert("INT16");
|
||||||
|
m.insert("INT32");
|
||||||
|
m.insert("INT64");
|
||||||
|
m.insert("FLOAT32");
|
||||||
|
m.insert("FLOAT64");
|
||||||
|
m.insert("UTF8");
|
||||||
|
|
||||||
|
m
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
/// SQL Tokenizer
|
||||||
|
pub struct Tokenizer {
|
||||||
|
pub query: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Tokenizer {
|
||||||
|
/// Create a new SQL tokenizer for the specified SQL statement
|
||||||
|
pub fn new(query: &str) -> Self {
|
||||||
|
Self {
|
||||||
|
query: query.to_string(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Tokenize the statement and produce a vector of tokens
|
||||||
|
pub fn tokenize(&mut self) -> Result<Vec<Token>, TokenizerError> {
|
||||||
|
let mut peekable = self.query.chars().peekable();
|
||||||
|
|
||||||
|
let mut tokens: Vec<Token> = vec![];
|
||||||
|
|
||||||
|
while let Some(token) = self.next_token(&mut peekable)? {
|
||||||
|
tokens.push(token);
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(tokens
|
||||||
|
.into_iter()
|
||||||
|
.filter(|t| match t {
|
||||||
|
Token::Whitespace => false,
|
||||||
|
_ => true,
|
||||||
|
})
|
||||||
|
.collect())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get the next token or return None
|
||||||
|
fn next_token(&self, chars: &mut Peekable<Chars>) -> Result<Option<Token>, TokenizerError> {
|
||||||
|
//println!("next_token: {:?}", chars.peek());
|
||||||
|
match chars.peek() {
|
||||||
|
Some(&ch) => match ch {
|
||||||
|
// whitespace
|
||||||
|
' ' | '\t' | '\n' => {
|
||||||
|
chars.next(); // consume
|
||||||
|
Ok(Some(Token::Whitespace))
|
||||||
|
}
|
||||||
|
// identifier or keyword
|
||||||
|
'a'...'z' | 'A'...'Z' | '_' | '@' => {
|
||||||
|
let mut s = String::new();
|
||||||
|
while let Some(&ch) = chars.peek() {
|
||||||
|
match ch {
|
||||||
|
'a'...'z' | 'A'...'Z' | '_' | '0'...'9' | '@' => {
|
||||||
|
chars.next(); // consume
|
||||||
|
s.push(ch);
|
||||||
|
}
|
||||||
|
_ => break,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
let upper_str = s.to_uppercase();
|
||||||
|
if KEYWORDS.contains(upper_str.as_str()) {
|
||||||
|
Ok(Some(Token::Keyword(upper_str)))
|
||||||
|
} else {
|
||||||
|
Ok(Some(Token::Identifier(s)))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// string
|
||||||
|
'\'' => {
|
||||||
|
//TODO: handle escaped quotes in string
|
||||||
|
//TODO: handle EOF before terminating quote
|
||||||
|
let mut s = String::new();
|
||||||
|
chars.next(); // consume
|
||||||
|
while let Some(&ch) = chars.peek() {
|
||||||
|
match ch {
|
||||||
|
'\'' => {
|
||||||
|
chars.next(); // consume
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
_ => {
|
||||||
|
chars.next(); // consume
|
||||||
|
s.push(ch);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(Some(Token::String(s)))
|
||||||
|
}
|
||||||
|
// numbers
|
||||||
|
'0'...'9' => {
|
||||||
|
let mut s = String::new();
|
||||||
|
while let Some(&ch) = chars.peek() {
|
||||||
|
match ch {
|
||||||
|
'0'...'9' | '.' => {
|
||||||
|
chars.next(); // consume
|
||||||
|
s.push(ch);
|
||||||
|
}
|
||||||
|
_ => break,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(Some(Token::Number(s)))
|
||||||
|
}
|
||||||
|
// punctuation
|
||||||
|
',' => {
|
||||||
|
chars.next();
|
||||||
|
Ok(Some(Token::Comma))
|
||||||
|
}
|
||||||
|
'(' => {
|
||||||
|
chars.next();
|
||||||
|
Ok(Some(Token::LParen))
|
||||||
|
}
|
||||||
|
')' => {
|
||||||
|
chars.next();
|
||||||
|
Ok(Some(Token::RParen))
|
||||||
|
}
|
||||||
|
// operators
|
||||||
|
'+' => {
|
||||||
|
chars.next();
|
||||||
|
Ok(Some(Token::Plus))
|
||||||
|
}
|
||||||
|
'-' => {
|
||||||
|
chars.next();
|
||||||
|
Ok(Some(Token::Minus))
|
||||||
|
}
|
||||||
|
'*' => {
|
||||||
|
chars.next();
|
||||||
|
Ok(Some(Token::Mult))
|
||||||
|
}
|
||||||
|
'/' => {
|
||||||
|
chars.next();
|
||||||
|
Ok(Some(Token::Div))
|
||||||
|
}
|
||||||
|
'%' => {
|
||||||
|
chars.next();
|
||||||
|
Ok(Some(Token::Mod))
|
||||||
|
}
|
||||||
|
'=' => {
|
||||||
|
chars.next();
|
||||||
|
Ok(Some(Token::Eq))
|
||||||
|
}
|
||||||
|
'.' => {
|
||||||
|
chars.next();
|
||||||
|
Ok(Some(Token::Period))
|
||||||
|
}
|
||||||
|
'!' => {
|
||||||
|
chars.next(); // consume
|
||||||
|
match chars.peek() {
|
||||||
|
Some(&ch) => match ch {
|
||||||
|
'=' => {
|
||||||
|
chars.next();
|
||||||
|
Ok(Some(Token::Neq))
|
||||||
|
}
|
||||||
|
_ => Err(TokenizerError(format!("TBD"))),
|
||||||
|
},
|
||||||
|
None => Err(TokenizerError(format!("TBD"))),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
'<' => {
|
||||||
|
chars.next(); // consume
|
||||||
|
match chars.peek() {
|
||||||
|
Some(&ch) => match ch {
|
||||||
|
'=' => {
|
||||||
|
chars.next();
|
||||||
|
Ok(Some(Token::LtEq))
|
||||||
|
}
|
||||||
|
'>' => {
|
||||||
|
chars.next();
|
||||||
|
Ok(Some(Token::Neq))
|
||||||
|
}
|
||||||
|
_ => Ok(Some(Token::Lt)),
|
||||||
|
},
|
||||||
|
None => Ok(Some(Token::Lt)),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
'>' => {
|
||||||
|
chars.next(); // consume
|
||||||
|
match chars.peek() {
|
||||||
|
Some(&ch) => match ch {
|
||||||
|
'=' => {
|
||||||
|
chars.next();
|
||||||
|
Ok(Some(Token::GtEq))
|
||||||
|
}
|
||||||
|
_ => Ok(Some(Token::Gt)),
|
||||||
|
},
|
||||||
|
None => Ok(Some(Token::Gt)),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ => Err(TokenizerError(format!(
|
||||||
|
"unhandled char '{}' in tokenizer",
|
||||||
|
ch
|
||||||
|
))),
|
||||||
|
},
|
||||||
|
None => Ok(None),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn tokenize_select_1() {
|
||||||
|
let sql = String::from("SELECT 1");
|
||||||
|
let mut tokenizer = Tokenizer::new(&sql);
|
||||||
|
let tokens = tokenizer.tokenize().unwrap();
|
||||||
|
|
||||||
|
let expected = vec![
|
||||||
|
Token::Keyword(String::from("SELECT")),
|
||||||
|
Token::Number(String::from("1")),
|
||||||
|
];
|
||||||
|
|
||||||
|
compare(expected, tokens);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn tokenize_scalar_function() {
|
||||||
|
let sql = String::from("SELECT sqrt(1)");
|
||||||
|
let mut tokenizer = Tokenizer::new(&sql);
|
||||||
|
let tokens = tokenizer.tokenize().unwrap();
|
||||||
|
|
||||||
|
let expected = vec![
|
||||||
|
Token::Keyword(String::from("SELECT")),
|
||||||
|
Token::Identifier(String::from("sqrt")),
|
||||||
|
Token::LParen,
|
||||||
|
Token::Number(String::from("1")),
|
||||||
|
Token::RParen,
|
||||||
|
];
|
||||||
|
|
||||||
|
compare(expected, tokens);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn tokenize_simple_select() {
|
||||||
|
let sql = String::from("SELECT * FROM customer WHERE id = 1 LIMIT 5");
|
||||||
|
let mut tokenizer = Tokenizer::new(&sql);
|
||||||
|
let tokens = tokenizer.tokenize().unwrap();
|
||||||
|
|
||||||
|
let expected = vec![
|
||||||
|
Token::Keyword(String::from("SELECT")),
|
||||||
|
Token::Mult,
|
||||||
|
Token::Keyword(String::from("FROM")),
|
||||||
|
Token::Identifier(String::from("customer")),
|
||||||
|
Token::Keyword(String::from("WHERE")),
|
||||||
|
Token::Identifier(String::from("id")),
|
||||||
|
Token::Eq,
|
||||||
|
Token::Number(String::from("1")),
|
||||||
|
Token::Keyword(String::from("LIMIT")),
|
||||||
|
Token::Number(String::from("5")),
|
||||||
|
];
|
||||||
|
|
||||||
|
compare(expected, tokens);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn tokenize_string_predicate() {
|
||||||
|
let sql = String::from("SELECT * FROM customer WHERE salary != 'Not Provided'");
|
||||||
|
let mut tokenizer = Tokenizer::new(&sql);
|
||||||
|
let tokens = tokenizer.tokenize().unwrap();
|
||||||
|
|
||||||
|
let expected = vec![
|
||||||
|
Token::Keyword(String::from("SELECT")),
|
||||||
|
Token::Mult,
|
||||||
|
Token::Keyword(String::from("FROM")),
|
||||||
|
Token::Identifier(String::from("customer")),
|
||||||
|
Token::Keyword(String::from("WHERE")),
|
||||||
|
Token::Identifier(String::from("salary")),
|
||||||
|
Token::Neq,
|
||||||
|
Token::String(String::from("Not Provided")),
|
||||||
|
];
|
||||||
|
|
||||||
|
compare(expected, tokens);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn tokenize_is_null() {
|
||||||
|
let sql = String::from("a IS NULL");
|
||||||
|
let mut tokenizer = Tokenizer::new(&sql);
|
||||||
|
let tokens = tokenizer.tokenize().unwrap();
|
||||||
|
|
||||||
|
let expected = vec![
|
||||||
|
Token::Identifier(String::from("a")),
|
||||||
|
Token::Keyword("IS".to_string()),
|
||||||
|
Token::Keyword("NULL".to_string()),
|
||||||
|
];
|
||||||
|
|
||||||
|
compare(expected, tokens);
|
||||||
|
}
|
||||||
|
|
||||||
|
fn compare(expected: Vec<Token>, actual: Vec<Token>) {
|
||||||
|
//println!("------------------------------");
|
||||||
|
//println!("tokens = {:?}", actual);
|
||||||
|
//println!("expected = {:?}", expected);
|
||||||
|
//println!("------------------------------");
|
||||||
|
assert_eq!(expected, actual);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
124
src/tokenizer.rs
124
src/tokenizer.rs
|
@ -1,124 +0,0 @@
|
||||||
use std::cmp::PartialEq;
|
|
||||||
use std::fmt::Debug;
|
|
||||||
|
|
||||||
/// Simple holder for a sequence of characters that supports iteration and mark/reset methods
|
|
||||||
pub struct CharSeq {
|
|
||||||
chars: Vec<char>,
|
|
||||||
i: usize,
|
|
||||||
m: usize
|
|
||||||
}
|
|
||||||
|
|
||||||
impl CharSeq {
|
|
||||||
|
|
||||||
/// Create a CharSeq from a string
|
|
||||||
pub fn new(sql: &str) -> Self {
|
|
||||||
CharSeq {
|
|
||||||
chars: sql.chars().collect(),
|
|
||||||
i: 0,
|
|
||||||
m: 0
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Mark the current index
|
|
||||||
pub fn mark(&mut self) {
|
|
||||||
self.m = self.i;
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Reset the index
|
|
||||||
pub fn reset(&mut self) {
|
|
||||||
self.i = self.m;
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Peek the next char
|
|
||||||
pub fn peek(&mut self) -> Option<&char> {
|
|
||||||
if self.i < self.chars.len() {
|
|
||||||
Some(&self.chars[self.i])
|
|
||||||
} else {
|
|
||||||
None
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Get the next char
|
|
||||||
pub fn next(&mut self) -> Option<char> {
|
|
||||||
if self.i < self.chars.len() {
|
|
||||||
self.i += 1;
|
|
||||||
Some(self.chars[self.i-1])
|
|
||||||
} else {
|
|
||||||
None
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug)]
|
|
||||||
pub struct Position {
|
|
||||||
line: usize,
|
|
||||||
col: usize
|
|
||||||
}
|
|
||||||
impl Position {
|
|
||||||
pub fn new(line: usize, col: usize) -> Self {
|
|
||||||
Position { line, col }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug)]
|
|
||||||
pub enum TokenizerError {
|
|
||||||
UnexpectedChar(char,Position),
|
|
||||||
UnexpectedEof(Position),
|
|
||||||
UnterminatedStringLiteral(Position),
|
|
||||||
Custom(String)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// SQL Tokens
|
|
||||||
#[derive(Debug,PartialEq)]
|
|
||||||
pub enum SQLToken {
|
|
||||||
Whitespace(char),
|
|
||||||
Keyword(String),
|
|
||||||
Identifier(String),
|
|
||||||
Literal(String), //TODO: need to model different types of literal
|
|
||||||
Plus,
|
|
||||||
Minus,
|
|
||||||
Mult,
|
|
||||||
Divide,
|
|
||||||
Eq,
|
|
||||||
Not,
|
|
||||||
NotEq,
|
|
||||||
Gt,
|
|
||||||
GtEq,
|
|
||||||
Lt,
|
|
||||||
LtEq,
|
|
||||||
LParen,
|
|
||||||
RParen,
|
|
||||||
Comma,
|
|
||||||
}
|
|
||||||
|
|
||||||
pub trait SQLTokenizer {
|
|
||||||
|
|
||||||
/// get the precendence of a token
|
|
||||||
fn precedence(&self, token: &SQLToken) -> usize;
|
|
||||||
|
|
||||||
fn peek_token(&mut self) -> Result<Option<SQLToken>, TokenizerError>;
|
|
||||||
|
|
||||||
/// return a reference to the next token and advance the index
|
|
||||||
fn next_token(&mut self) -> Result<Option<SQLToken>, TokenizerError>;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
pub fn tokenize(sql: &str, tokenizer: &mut SQLTokenizer) -> Result<Vec<SQLToken>, TokenizerError> {
|
|
||||||
|
|
||||||
let mut chars = CharSeq::new(sql);
|
|
||||||
|
|
||||||
let mut tokens : Vec<SQLToken> = vec![];
|
|
||||||
|
|
||||||
loop {
|
|
||||||
match tokenizer.next_token()? {
|
|
||||||
Some(SQLToken::Whitespace(_)) => { /* ignore */ },
|
|
||||||
Some(token) => {
|
|
||||||
println!("Token: {:?}", token);
|
|
||||||
tokens.push(token)
|
|
||||||
},
|
|
||||||
None => break
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(tokens)
|
|
||||||
}
|
|
Loading…
Add table
Add a link
Reference in a new issue