1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
|
From d29e729000a3724e2aebaa64318dfd7530a55370 Mon Sep 17 00:00:00 2001
From: Philip Hazel <Philip.Hazel@gmail.com>
Date: Wed, 4 Sep 2024 16:18:35 +0100
Subject: [PATCH] Fix non-recognition of some octal escapes in substitute
replacement strings
---
src/pcre2_compile.c | 15 ++++++++-------
src/pcre2_substitute.c | 4 ++--
testdata/testinput11 | 6 ++++++
testdata/testinput2 | 12 ++++++++++++
testdata/testinput5 | 3 +++
testdata/testinput9 | 8 ++++++++
testdata/testoutput11-16 | 8 ++++++++
testdata/testoutput11-32 | 8 ++++++++
testdata/testoutput2 | 16 ++++++++++++++++
testdata/testoutput5 | 4 ++++
testdata/testoutput9 | 10 ++++++++++
11 files changed, 85 insertions(+), 9 deletions(-)
diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c
index 8e6787a..a4064f8 100644
--- a/src/pcre2_compile.c
+++ b/src/pcre2_compile.c
@@ -1528,8 +1528,8 @@ final code unit of the escape sequence.
This function is also called from pcre2_substitute() to handle escape sequences
in replacement strings. In this case, the cb argument is NULL, and in the case
of escapes that have further processing, only sequences that define a data
-character are recognised. The isclass argument is not relevant; the options
-argument is the final value of the compiled pattern's options.
+character are recognised. The options argument is the final value of the
+compiled pattern's options.
Arguments:
ptrptr points to the input position pointer
@@ -1538,7 +1538,7 @@ Arguments:
errorcodeptr points to the errorcode variable (containing zero)
options the current options bits
xoptions the current extra options bits
- isclass TRUE if inside a character class
+ isclassorsub TRUE if in a character class or called from pcre2_substitute()
cb compile data block or NULL when called from pcre2_substitute()
Returns: zero => a data character
@@ -1549,7 +1549,7 @@ Returns: zero => a data character
int
PRIV(check_escape)(PCRE2_SPTR *ptrptr, PCRE2_SPTR ptrend, uint32_t *chptr,
- int *errorcodeptr, uint32_t options, uint32_t xoptions, BOOL isclass,
+ int *errorcodeptr, uint32_t options, uint32_t xoptions, BOOL isclassorsub,
compile_block *cb)
{
BOOL utf = (options & PCRE2_UTF) != 0;
@@ -1660,7 +1660,8 @@ else
if (cb == NULL)
{
- if (c != CHAR_c && c != CHAR_o && c != CHAR_x)
+ if (c < CHAR_0 ||
+ (c > CHAR_9 && (c != CHAR_c && c != CHAR_o && c != CHAR_x)))
{
*errorcodeptr = ERR3;
return 0;
@@ -1778,7 +1779,7 @@ else
*/
case CHAR_g:
- if (isclass) break;
+ if (isclassorsub) break;
if (ptr >= ptrend)
{
@@ -1854,7 +1855,7 @@ else
case CHAR_1: case CHAR_2: case CHAR_3: case CHAR_4: case CHAR_5:
case CHAR_6: case CHAR_7: case CHAR_8: case CHAR_9:
- if (!isclass)
+ if (!isclassorsub)
{
oldptr = ptr;
ptr--; /* Back to the digit */
diff --git a/src/pcre2_substitute.c b/src/pcre2_substitute.c
index 3ab94eb..069598c 100644
--- a/src/pcre2_substitute.c
+++ b/src/pcre2_substitute.c
@@ -130,7 +130,7 @@ for (; ptr < ptrend; ptr++)
ptr += 1; /* Must point after \ */
erc = PRIV(check_escape)(&ptr, ptrend, &ch, &errorcode,
- code->overall_options, code->extra_options, FALSE, NULL);
+ code->overall_options, code->extra_options, TRUE, NULL);
ptr -= 1; /* Back to last code unit of escape */
if (errorcode != 0)
{
@@ -864,7 +864,7 @@ do
ptr++; /* Point after \ */
rc = PRIV(check_escape)(&ptr, repend, &ch, &errorcode,
- code->overall_options, code->extra_options, FALSE, NULL);
+ code->overall_options, code->extra_options, TRUE, NULL);
if (errorcode != 0) goto BADESCAPE;
switch(rc)
diff --git a/testdata/testinput11 b/testdata/testinput11
index 2bc8a25..69aea35 100644
--- a/testdata/testinput11
+++ b/testdata/testinput11
@@ -371,4 +371,10 @@
/(?i:A{1,}\6666666666)/
A\x{1b6}6666666
+/abc/substitute_extended,replace=>\777<
+ abc
+
+/abc/substitute_extended,replace=>\o{012345}<
+ abc
+
# End of testinput11
diff --git a/testdata/testinput2 b/testdata/testinput2
index bdd41ab..a8bd69d 100644
--- a/testdata/testinput2
+++ b/testdata/testinput2
@@ -4670,6 +4670,18 @@ B)x/alt_verbnames,mark
/abcd/g
>abcd1234abcd5678<\=replace=wxyz,substitute_matched
+/abc/substitute_extended,replace=>\045<
+ abc
+
+/abc/substitute_extended,replace=>\45<
+ abc
+
+/abc/substitute_extended,replace=>\o{45}<
+ abc
+
+/abc/substitute_extended,replace=>\845<
+ abc
+
/^(o(\1{72}{\"{\\{00000059079}\d*){74}}){19}/I
/((p(?'K/
diff --git a/testdata/testinput5 b/testdata/testinput5
index 7e04873..ff120e0 100644
--- a/testdata/testinput5
+++ b/testdata/testinput5
@@ -2531,4 +2531,7 @@
/(?<!(|l ))/utf
(?<!(|l ))
+/abc/utf,substitute_extended,replace=>\777<
+ abc
+
# End of testinput5
diff --git a/testdata/testinput9 b/testdata/testinput9
index 4eb228a..f2f5003 100644
--- a/testdata/testinput9
+++ b/testdata/testinput9
@@ -263,4 +263,12 @@
/(?i:A{1,}\6666666666)/
A\x{1b6}6666666
+# Should cause an error
+/abc/substitute_extended,replace=>\777<
+ abc
+
+# Should cause an error
+/abc/substitute_extended,replace=>\o{012345}<
+ abc
+
# End of testinput9
diff --git a/testdata/testoutput11-16 b/testdata/testoutput11-16
index 8768785..5d1ccab 100644
--- a/testdata/testoutput11-16
+++ b/testdata/testoutput11-16
@@ -665,4 +665,12 @@ Subject length lower bound = 1
A\x{1b6}6666666
0: A\x{1b6}6666666
+/abc/substitute_extended,replace=>\777<
+ abc
+ 1: >\x{1ff}<
+
+/abc/substitute_extended,replace=>\o{012345}<
+ abc
+ 1: >\x{14e5}<
+
# End of testinput11
diff --git a/testdata/testoutput11-32 b/testdata/testoutput11-32
index 2c95f61..c1b8547 100644
--- a/testdata/testoutput11-32
+++ b/testdata/testoutput11-32
@@ -671,4 +671,12 @@ Subject length lower bound = 1
A\x{1b6}6666666
0: A\x{1b6}6666666
+/abc/substitute_extended,replace=>\777<
+ abc
+ 1: >\x{1ff}<
+
+/abc/substitute_extended,replace=>\o{012345}<
+ abc
+ 1: >\x{14e5}<
+
# End of testinput11
diff --git a/testdata/testoutput2 b/testdata/testoutput2
index d59b26a..be0e1f6 100644
--- a/testdata/testoutput2
+++ b/testdata/testoutput2
@@ -14913,6 +14913,22 @@ Failed: error -55 at offset 3 in replacement: requested value is not set
>abcd1234abcd5678<\=replace=wxyz,substitute_matched
2: >wxyz1234wxyz5678<
+/abc/substitute_extended,replace=>\045<
+ abc
+ 1: >%<
+
+/abc/substitute_extended,replace=>\45<
+ abc
+ 1: >%<
+
+/abc/substitute_extended,replace=>\o{45}<
+ abc
+ 1: >%<
+
+/abc/substitute_extended,replace=>\845<
+ abc
+ 1: >845<
+
/^(o(\1{72}{\"{\\{00000059079}\d*){74}}){19}/I
Capture group count = 2
Max back reference = 1
diff --git a/testdata/testoutput5 b/testdata/testoutput5
index b79959b..8db45a6 100644
--- a/testdata/testoutput5
+++ b/testdata/testoutput5
@@ -5515,4 +5515,8 @@ No match
(?<!(|l ))
No match
+/abc/utf,substitute_extended,replace=>\777<
+ abc
+ 1: >\x{1ff}<
+
# End of testinput5
diff --git a/testdata/testoutput9 b/testdata/testoutput9
index 1ec4317..17eb44b 100644
--- a/testdata/testoutput9
+++ b/testdata/testoutput9
@@ -371,4 +371,14 @@ Failed: error 176 at offset 259: name is too long in (*MARK), (*PRUNE), (*SKIP),
Failed: error 151 at offset 13: octal value is greater than \377 in 8-bit non-UTF-8 mode
A\x{1b6}6666666
+# Should cause an error
+/abc/substitute_extended,replace=>\777<
+ abc
+Failed: error -57 at offset 5 in replacement: bad escape sequence in replacement string
+
+# Should cause an error
+/abc/substitute_extended,replace=>\o{012345}<
+ abc
+Failed: error -57 at offset 10 in replacement: bad escape sequence in replacement string
+
# End of testinput9
|