mirror of
https://github.com/python/cpython.git
synced 2025-08-03 16:39:00 +00:00
The usual
This commit is contained in:
parent
330c660f66
commit
0b23348aa9
15 changed files with 782 additions and 124 deletions
186
Lib/dos-8x3/reconver.py
Normal file
186
Lib/dos-8x3/reconver.py
Normal file
|
@ -0,0 +1,186 @@
|
|||
#! /usr/bin/env python1.5
|
||||
|
||||
"""Convert old ("regex") regular expressions to new syntax ("re").
|
||||
|
||||
When imported as a module, there are two functions, with their own
|
||||
strings:
|
||||
|
||||
convert(s, syntax=None) -- convert a regex regular expression to re syntax
|
||||
|
||||
quote(s) -- return a quoted string literal
|
||||
|
||||
When used as a script, read a Python string literal (or any other
|
||||
expression evaluating to a string) from stdin, and write the
|
||||
translated expression to stdout as a string literal. Unless stdout is
|
||||
a tty, no trailing \n is written to stdout. This is done so that it
|
||||
can be used with Emacs C-U M-| (shell-command-on-region with argument
|
||||
which filters the region through the shell command).
|
||||
|
||||
No attempt has been made at coding for performance.
|
||||
|
||||
Translation table...
|
||||
|
||||
\( ( (unless RE_NO_BK_PARENS set)
|
||||
\) ) (unless RE_NO_BK_PARENS set)
|
||||
\| | (unless RE_NO_BK_VBAR set)
|
||||
\< \b (not quite the same, but alla...)
|
||||
\> \b (not quite the same, but alla...)
|
||||
\` \A
|
||||
\' \Z
|
||||
|
||||
Not translated...
|
||||
|
||||
.
|
||||
^
|
||||
$
|
||||
*
|
||||
+ (unless RE_BK_PLUS_QM set, then to \+)
|
||||
? (unless RE_BK_PLUS_QM set, then to \?)
|
||||
\
|
||||
\b
|
||||
\B
|
||||
\w
|
||||
\W
|
||||
\1 ... \9
|
||||
|
||||
Special cases...
|
||||
|
||||
Non-printable characters are always replaced by their 3-digit
|
||||
escape code (except \t, \n, \r, which use mnemonic escapes)
|
||||
|
||||
Newline is turned into | when RE_NEWLINE_OR is set
|
||||
|
||||
XXX To be done...
|
||||
|
||||
[...] (different treatment of backslashed items?)
|
||||
[^...] (different treatment of backslashed items?)
|
||||
^ $ * + ? (in some error contexts these are probably treated differently)
|
||||
\vDD \DD (in the regex docs but only works when RE_ANSI_HEX set)
|
||||
|
||||
"""
|
||||
|
||||
|
||||
import regex
|
||||
from regex_syntax import * # RE_*
|
||||
|
||||
# Default translation table
|
||||
mastertable = {
|
||||
r'\<': r'\b',
|
||||
r'\>': r'\b',
|
||||
r'\`': r'\A',
|
||||
r'\'': r'\Z',
|
||||
r'\(': '(',
|
||||
r'\)': ')',
|
||||
r'\|': '|',
|
||||
'(': r'\(',
|
||||
')': r'\)',
|
||||
'|': r'\|',
|
||||
'\t': r'\t',
|
||||
'\n': r'\n',
|
||||
'\r': r'\r',
|
||||
}
|
||||
|
||||
|
||||
def convert(s, syntax=None):
|
||||
"""Convert a regex regular expression to re syntax.
|
||||
|
||||
The first argument is the regular expression, as a string object,
|
||||
just like it would be passed to regex.compile(). (I.e., pass the
|
||||
actual string object -- string quotes must already have been
|
||||
removed and the standard escape processing has already been done,
|
||||
e.g. by eval().)
|
||||
|
||||
The optional second argument is the regex syntax variant to be
|
||||
used. This is an integer mask as passed to regex.set_syntax();
|
||||
the flag bits are defined in regex_syntax. When not specified, or
|
||||
when None is given, the current regex syntax mask (as retrieved by
|
||||
regex.get_syntax()) is used -- which is 0 by default.
|
||||
|
||||
The return value is a regular expression, as a string object that
|
||||
could be passed to re.compile(). (I.e., no string quotes have
|
||||
been added -- use quote() below, or repr().)
|
||||
|
||||
The conversion is not always guaranteed to be correct. More
|
||||
syntactical analysis should be performed to detect borderline
|
||||
cases and decide what to do with them. For example, 'x*?' is not
|
||||
translated correctly.
|
||||
|
||||
"""
|
||||
table = mastertable.copy()
|
||||
if syntax is None:
|
||||
syntax = regex.get_syntax()
|
||||
if syntax & RE_NO_BK_PARENS:
|
||||
del table[r'\('], table[r'\)']
|
||||
del table['('], table[')']
|
||||
if syntax & RE_NO_BK_VBAR:
|
||||
del table[r'\|']
|
||||
del table['|']
|
||||
if syntax & RE_BK_PLUS_QM:
|
||||
table['+'] = r'\+'
|
||||
table['?'] = r'\?'
|
||||
table[r'\+'] = '+'
|
||||
table[r'\?'] = '?'
|
||||
if syntax & RE_NEWLINE_OR:
|
||||
table['\n'] = '|'
|
||||
res = ""
|
||||
|
||||
i = 0
|
||||
end = len(s)
|
||||
while i < end:
|
||||
c = s[i]
|
||||
i = i+1
|
||||
if c == '\\':
|
||||
c = s[i]
|
||||
i = i+1
|
||||
key = '\\' + c
|
||||
key = table.get(key, key)
|
||||
res = res + key
|
||||
else:
|
||||
c = table.get(c, c)
|
||||
res = res + c
|
||||
return res
|
||||
|
||||
|
||||
def quote(s, quote=None):
|
||||
"""Convert a string object to a quoted string literal.
|
||||
|
||||
This is similar to repr() but will return a "raw" string (r'...'
|
||||
or r"...") when the string contains backslashes, instead of
|
||||
doubling all backslashes. The resulting string does *not* always
|
||||
evaluate to the same string as the original; however it will do
|
||||
just the right thing when passed into re.compile().
|
||||
|
||||
The optional second argument forces the string quote; it must be
|
||||
a single character which is a valid Python string quote.
|
||||
|
||||
"""
|
||||
if quote is None:
|
||||
q = "'"
|
||||
altq = "'"
|
||||
if q in s and altq not in s:
|
||||
q = altq
|
||||
else:
|
||||
assert quote in ('"', "'")
|
||||
q = quote
|
||||
res = q
|
||||
for c in s:
|
||||
if c == q: c = '\\' + c
|
||||
elif c < ' ' or c > '~': c = "\\%03o" % ord(c)
|
||||
res = res + c
|
||||
res = res + q
|
||||
if '\\' in res:
|
||||
res = 'r' + res
|
||||
return res
|
||||
|
||||
|
||||
def main():
|
||||
"""Main program -- called when run as a script."""
|
||||
import sys
|
||||
s = eval(sys.stdin.read())
|
||||
sys.stdout.write(quote(convert(s)))
|
||||
if sys.stdout.isatty():
|
||||
sys.stdout.write("\n")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
Loading…
Add table
Add a link
Reference in a new issue