Issue #21765: Add support for non-ascii identifiers to HyperParser

This commit is contained in:
Tal Einat 2014-07-16 16:33:36 +03:00
parent cfa8950aab
commit 9b7f9e6c5a
4 changed files with 212 additions and 43 deletions

View file

@ -30,6 +30,7 @@ class HyperParserTest(unittest.TestCase):
"z = ((r'asdf')+('a')))\n"
'[x for x in\n'
'for = False\n'
'cliché = "this is a string with unicode, what a cliché"'
)
@classmethod
@ -93,6 +94,8 @@ class HyperParserTest(unittest.TestCase):
self.assertTrue(p.is_in_string())
p = get('4.6')
self.assertTrue(p.is_in_string())
p = get('12.54')
self.assertTrue(p.is_in_string())
def test_is_in_code(self):
get = self.get_parser
@ -180,12 +183,91 @@ class HyperParserTest(unittest.TestCase):
p = get('10.0')
self.assertEqual(p.get_expression(), '')
p = get('10.6')
self.assertEqual(p.get_expression(), '')
p = get('10.11')
self.assertEqual(p.get_expression(), '')
p = get('11.3')
self.assertEqual(p.get_expression(), '')
p = get('11.11')
self.assertEqual(p.get_expression(), 'False')
p = get('12.6')
self.assertEqual(p.get_expression(), 'cliché')
def test_eat_identifier(self):
def is_valid_id(candidate):
result = HyperParser._eat_identifier(candidate, 0, len(candidate))
if result == len(candidate):
return True
elif result == 0:
return False
else:
err_msg = "Unexpected result: {} (expected 0 or {}".format(
result, len(candidate)
)
raise Exception(err_msg)
# invalid first character which is valid elsewhere in an identifier
self.assertFalse(is_valid_id('2notid'))
# ASCII-only valid identifiers
self.assertTrue(is_valid_id('valid_id'))
self.assertTrue(is_valid_id('_valid_id'))
self.assertTrue(is_valid_id('valid_id_'))
self.assertTrue(is_valid_id('_2valid_id'))
# keywords which should be "eaten"
self.assertTrue(is_valid_id('True'))
self.assertTrue(is_valid_id('False'))
self.assertTrue(is_valid_id('None'))
# keywords which should not be "eaten"
self.assertFalse(is_valid_id('for'))
self.assertFalse(is_valid_id('import'))
self.assertFalse(is_valid_id('return'))
# valid unicode identifiers
self.assertTrue(is_valid_id('cliche'))
self.assertTrue(is_valid_id('cliché'))
self.assertTrue(is_valid_id(''))
# invalid unicode identifiers
self.assertFalse(is_valid_id('2a'))
self.assertFalse(is_valid_id('٢a'))
self.assertFalse(is_valid_id(''))
# valid identifier after "punctuation"
self.assertEqual(HyperParser._eat_identifier('+ var', 0, 5), len('var'))
self.assertEqual(HyperParser._eat_identifier('+var', 0, 4), len('var'))
self.assertEqual(HyperParser._eat_identifier('.var', 0, 4), len('var'))
# invalid identifiers
self.assertFalse(is_valid_id('+'))
self.assertFalse(is_valid_id(' '))
self.assertFalse(is_valid_id(':'))
self.assertFalse(is_valid_id('?'))
self.assertFalse(is_valid_id('^'))
self.assertFalse(is_valid_id('\\'))
self.assertFalse(is_valid_id('"'))
self.assertFalse(is_valid_id('"a string"'))
def test_eat_identifier_various_lengths(self):
eat_id = HyperParser._eat_identifier
for length in range(1, 21):
self.assertEqual(eat_id('a' * length, 0, length), length)
self.assertEqual(eat_id('é' * length, 0, length), length)
self.assertEqual(eat_id('a' + '2' * (length - 1), 0, length), length)
self.assertEqual(eat_id('é' + '2' * (length - 1), 0, length), length)
self.assertEqual(eat_id('é' + 'a' * (length - 1), 0, length), length)
self.assertEqual(eat_id('é' * (length - 1) + 'a', 0, length), length)
self.assertEqual(eat_id('+' * length, 0, length), 0)
self.assertEqual(eat_id('2' + 'a' * (length - 1), 0, length), 0)
self.assertEqual(eat_id('2' + 'é' * (length - 1), 0, length), 0)
if __name__ == '__main__':
unittest.main(verbosity=2)