From d29e729000a3724e2aebaa64318dfd7530a55370 Mon Sep 17 00:00:00 2001 From: Philip Hazel Date: Wed, 4 Sep 2024 16:18:35 +0100 Subject: [PATCH] Fix non-recognition of some octal escapes in substitute replacement strings --- src/pcre2_compile.c | 15 ++++++++------- src/pcre2_substitute.c | 4 ++-- testdata/testinput11 | 6 ++++++ testdata/testinput2 | 12 ++++++++++++ testdata/testinput5 | 3 +++ testdata/testinput9 | 8 ++++++++ testdata/testoutput11-16 | 8 ++++++++ testdata/testoutput11-32 | 8 ++++++++ testdata/testoutput2 | 16 ++++++++++++++++ testdata/testoutput5 | 4 ++++ testdata/testoutput9 | 10 ++++++++++ 11 files changed, 85 insertions(+), 9 deletions(-) diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c index 8e6787a..a4064f8 100644 --- a/src/pcre2_compile.c +++ b/src/pcre2_compile.c @@ -1528,8 +1528,8 @@ final code unit of the escape sequence. This function is also called from pcre2_substitute() to handle escape sequences in replacement strings. In this case, the cb argument is NULL, and in the case of escapes that have further processing, only sequences that define a data -character are recognised. The isclass argument is not relevant; the options -argument is the final value of the compiled pattern's options. +character are recognised. The options argument is the final value of the +compiled pattern's options. Arguments: ptrptr points to the input position pointer @@ -1538,7 +1538,7 @@ Arguments: errorcodeptr points to the errorcode variable (containing zero) options the current options bits xoptions the current extra options bits - isclass TRUE if inside a character class + isclassorsub TRUE if in a character class or called from pcre2_substitute() cb compile data block or NULL when called from pcre2_substitute() Returns: zero => a data character @@ -1549,7 +1549,7 @@ Returns: zero => a data character int PRIV(check_escape)(PCRE2_SPTR *ptrptr, PCRE2_SPTR ptrend, uint32_t *chptr, - int *errorcodeptr, uint32_t options, uint32_t xoptions, BOOL isclass, + int *errorcodeptr, uint32_t options, uint32_t xoptions, BOOL isclassorsub, compile_block *cb) { BOOL utf = (options & PCRE2_UTF) != 0; @@ -1660,7 +1660,8 @@ else if (cb == NULL) { - if (c != CHAR_c && c != CHAR_o && c != CHAR_x) + if (c < CHAR_0 || + (c > CHAR_9 && (c != CHAR_c && c != CHAR_o && c != CHAR_x))) { *errorcodeptr = ERR3; return 0; @@ -1778,7 +1779,7 @@ else */ case CHAR_g: - if (isclass) break; + if (isclassorsub) break; if (ptr >= ptrend) { @@ -1854,7 +1855,7 @@ else case CHAR_1: case CHAR_2: case CHAR_3: case CHAR_4: case CHAR_5: case CHAR_6: case CHAR_7: case CHAR_8: case CHAR_9: - if (!isclass) + if (!isclassorsub) { oldptr = ptr; ptr--; /* Back to the digit */ diff --git a/src/pcre2_substitute.c b/src/pcre2_substitute.c index 3ab94eb..069598c 100644 --- a/src/pcre2_substitute.c +++ b/src/pcre2_substitute.c @@ -130,7 +130,7 @@ for (; ptr < ptrend; ptr++) ptr += 1; /* Must point after \ */ erc = PRIV(check_escape)(&ptr, ptrend, &ch, &errorcode, - code->overall_options, code->extra_options, FALSE, NULL); + code->overall_options, code->extra_options, TRUE, NULL); ptr -= 1; /* Back to last code unit of escape */ if (errorcode != 0) { @@ -864,7 +864,7 @@ do ptr++; /* Point after \ */ rc = PRIV(check_escape)(&ptr, repend, &ch, &errorcode, - code->overall_options, code->extra_options, FALSE, NULL); + code->overall_options, code->extra_options, TRUE, NULL); if (errorcode != 0) goto BADESCAPE; switch(rc) diff --git a/testdata/testinput11 b/testdata/testinput11 index 2bc8a25..69aea35 100644 --- a/testdata/testinput11 +++ b/testdata/testinput11 @@ -371,4 +371,10 @@ /(?i:A{1,}\6666666666)/ A\x{1b6}6666666 +/abc/substitute_extended,replace=>\777< + abc + +/abc/substitute_extended,replace=>\o{012345}< + abc + # End of testinput11 diff --git a/testdata/testinput2 b/testdata/testinput2 index bdd41ab..a8bd69d 100644 --- a/testdata/testinput2 +++ b/testdata/testinput2 @@ -4670,6 +4670,18 @@ B)x/alt_verbnames,mark /abcd/g >abcd1234abcd5678<\=replace=wxyz,substitute_matched +/abc/substitute_extended,replace=>\045< + abc + +/abc/substitute_extended,replace=>\45< + abc + +/abc/substitute_extended,replace=>\o{45}< + abc + +/abc/substitute_extended,replace=>\845< + abc + /^(o(\1{72}{\"{\\{00000059079}\d*){74}}){19}/I /((p(?'K/ diff --git a/testdata/testinput5 b/testdata/testinput5 index 7e04873..ff120e0 100644 --- a/testdata/testinput5 +++ b/testdata/testinput5 @@ -2531,4 +2531,7 @@ /(?\777< + abc + # End of testinput5 diff --git a/testdata/testinput9 b/testdata/testinput9 index 4eb228a..f2f5003 100644 --- a/testdata/testinput9 +++ b/testdata/testinput9 @@ -263,4 +263,12 @@ /(?i:A{1,}\6666666666)/ A\x{1b6}6666666 +# Should cause an error +/abc/substitute_extended,replace=>\777< + abc + +# Should cause an error +/abc/substitute_extended,replace=>\o{012345}< + abc + # End of testinput9 diff --git a/testdata/testoutput11-16 b/testdata/testoutput11-16 index 8768785..5d1ccab 100644 --- a/testdata/testoutput11-16 +++ b/testdata/testoutput11-16 @@ -665,4 +665,12 @@ Subject length lower bound = 1 A\x{1b6}6666666 0: A\x{1b6}6666666 +/abc/substitute_extended,replace=>\777< + abc + 1: >\x{1ff}< + +/abc/substitute_extended,replace=>\o{012345}< + abc + 1: >\x{14e5}< + # End of testinput11 diff --git a/testdata/testoutput11-32 b/testdata/testoutput11-32 index 2c95f61..c1b8547 100644 --- a/testdata/testoutput11-32 +++ b/testdata/testoutput11-32 @@ -671,4 +671,12 @@ Subject length lower bound = 1 A\x{1b6}6666666 0: A\x{1b6}6666666 +/abc/substitute_extended,replace=>\777< + abc + 1: >\x{1ff}< + +/abc/substitute_extended,replace=>\o{012345}< + abc + 1: >\x{14e5}< + # End of testinput11 diff --git a/testdata/testoutput2 b/testdata/testoutput2 index d59b26a..be0e1f6 100644 --- a/testdata/testoutput2 +++ b/testdata/testoutput2 @@ -14913,6 +14913,22 @@ Failed: error -55 at offset 3 in replacement: requested value is not set >abcd1234abcd5678<\=replace=wxyz,substitute_matched 2: >wxyz1234wxyz5678< +/abc/substitute_extended,replace=>\045< + abc + 1: >%< + +/abc/substitute_extended,replace=>\45< + abc + 1: >%< + +/abc/substitute_extended,replace=>\o{45}< + abc + 1: >%< + +/abc/substitute_extended,replace=>\845< + abc + 1: >845< + /^(o(\1{72}{\"{\\{00000059079}\d*){74}}){19}/I Capture group count = 2 Max back reference = 1 diff --git a/testdata/testoutput5 b/testdata/testoutput5 index b79959b..8db45a6 100644 --- a/testdata/testoutput5 +++ b/testdata/testoutput5 @@ -5515,4 +5515,8 @@ No match (?\777< + abc + 1: >\x{1ff}< + # End of testinput5 diff --git a/testdata/testoutput9 b/testdata/testoutput9 index 1ec4317..17eb44b 100644 --- a/testdata/testoutput9 +++ b/testdata/testoutput9 @@ -371,4 +371,14 @@ Failed: error 176 at offset 259: name is too long in (*MARK), (*PRUNE), (*SKIP), Failed: error 151 at offset 13: octal value is greater than \377 in 8-bit non-UTF-8 mode A\x{1b6}6666666 +# Should cause an error +/abc/substitute_extended,replace=>\777< + abc +Failed: error -57 at offset 5 in replacement: bad escape sequence in replacement string + +# Should cause an error +/abc/substitute_extended,replace=>\o{012345}< + abc +Failed: error -57 at offset 10 in replacement: bad escape sequence in replacement string + # End of testinput9