Skip to content

Commit

Permalink
Change string/comment to non-greedy regex match
Browse files Browse the repository at this point in the history
  • Loading branch information
shubhamugare committed Oct 25, 2023
1 parent 2408e43 commit f2f2424
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 3 deletions.
4 changes: 2 additions & 2 deletions llm_cfg/python_grammar.lark
Original file line number Diff line number Diff line change
Expand Up @@ -171,8 +171,8 @@ _NL: ( /(\r?\n[\t ]*)+/ | COMMENT)+

// STRING : /[ubf]?r?("(?!"").*?(?<!\\)(\\\\)*?"|'(?!'').*?(?<!\\)(\\\\)*?')/i
// LONG_STRING: /[ubf]?r?(""".*?(?<!\\)(\\\\)*?"""|'''.*?(?<!\\)(\\\\)*?''')/is
STRING : /[ubf]?r?(".*"|'.*')/i
LONG_STRING: /[ubf]?r?(""".*""")/is
STRING : /[ubf]?r?(".*?"|'.*?')/i
LONG_STRING: /[ubf]?r?(""".*?""")/is

DEC_NUMBER: /0|[1-9]\d*/i
HEX_NUMBER.2: /0x[\da-f]*/i
Expand Down
9 changes: 8 additions & 1 deletion llm_cfg/test_grammar.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,13 @@ def test_parser12():
# This should not crash. Earlier version was crashing on this
_, next_ac_terminals, _ = inc_parser.get_acceptable_next_terminals(partial_code)

def test_parser13():
inc_parser = IncrementalParser()
partial_code = 'from typing import List\n\n\ndef has_close_elements(numbers: List[float], threshold: float) -> bool:\n\t""" Check if in given list of numbers, are any two numbers closer to each other than\n\tgiven threshold.\n\t>>> has_close_elements([1.0, 2.0, 3.0], 0.5)\n\tFalse\n\t>>> has_close_elements([1.0, 2.8, 3.0, 4.0, 5.0, 2.0], 0.3)\n\tTrue\n\t"""\n\tfor i in range(len(numbers)):\n\t\tfor j in range(i + 1, len(numbers)):\n\t\t\tif abs(numbers[i] - numbers[j]) < threshold:\n\t\t\t\treturn True\n\treturn False\n\n\ndef has_close_elements_in_range(numbers: List[float], lower_bound: float, upper_bound: float) -> bool:\n\t"""'
# This was not working correctly when the regex for comments was greedy (i.e., .* instead of .*?)
_, next_ac_terminals, cur_term_str = inc_parser.get_acceptable_next_terminals(partial_code)
assert cur_term_str == '"'

def test_incremental_parser():
inc_parser = IncrementalParser()
partial_code = 'from typing import List\n\n\ndef has_close_elements(numbers: List[float], threshold: float) -> bool:\n\t""" Check if in given list of numbers, are any two numbers closer to each other than\n\tgiven threshold.\n\t>>> has_close_elements([1.0, 2.0, 3.0], 0.5)\n\tFalse\n\t>>> has_close_elements([1.0, 2.8, 3.0, 4.0, 5.0, 2.0], 0.3)\n\tTrue\n\t"""\n\tfor i in range(len(numbers) -1, -1, -1):\n\t\tfor j in range(i+1, len(numbers) -1, -1):\n\t\t\tif abs(numbers[i] - numbers[j] ) < threshold:\n\t\t\t\treturn True\n\treturn False\n\n\ndef has_close_elements_with_threshold(numbers: List[float] , threshold: float) -> bool:\n\t""'
Expand Down Expand Up @@ -239,6 +246,6 @@ def test_prefix_terminal_match():
assert not "RPAR" in inc_parser.get_prefix_terminals_match("(")


tests = [test_get_matching_terminals, test_parser1, test_parser2, test_parser3, test_parser4, test_parser5, test_parser6, test_parser7, test_parser8, test_parser9, test_parser10, test_parser11, test_parser12,test_incremental_parser, test_incremental_parser2, test_incremental_parser3, test_incremental_parser4, test_prefix_terminal_match]
tests = [test_get_matching_terminals, test_parser1, test_parser2, test_parser3, test_parser4, test_parser5, test_parser6, test_parser7, test_parser8, test_parser9, test_parser10, test_parser11, test_parser12, test_parser13, test_incremental_parser, test_incremental_parser2, test_incremental_parser3, test_incremental_parser4, test_prefix_terminal_match]

run_tests(tests)

0 comments on commit f2f2424

Please sign in to comment.