1 |
From 31152356367ef3cf3440c0431d2898f198e4dd18 Mon Sep 17 00:00:00 2001 |
2 |
From: ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15> |
3 |
Date: Tue, 27 May 2014 13:18:31 +0000 |
4 |
Subject: [PATCH] Fix empty-matching possessive zero-repeat groups bug. |
5 |
MIME-Version: 1.0 |
6 |
Content-Type: text/plain; charset=UTF-8 |
7 |
Content-Transfer-Encoding: 8bit |
8 |
|
9 |
git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1478 2f5784b3-3f2a-0410-8824-cb99058d5e15 |
10 |
Signed-off-by: Petr Písař <ppisar@redhat.com> |
11 |
|
12 |
Petr Pisar: Ported to 8.35. |
13 |
|
14 |
Signed-off-by: Petr Písař <ppisar@redhat.com> |
15 |
--- |
16 |
pcre_exec.c | 43 +++++++++++++++++++++++++++---------------- |
17 |
testdata/testinput1 | 9 +++++++++ |
18 |
testdata/testinput8 | 6 ++++++ |
19 |
testdata/testoutput1 | 12 ++++++++++++ |
20 |
testdata/testoutput8 | 8 ++++++++ |
21 |
5 files changed, 62 insertions(+), 16 deletions(-) |
22 |
|
23 |
diff --git a/pcre_exec.c b/pcre_exec.c |
24 |
index 5dec992..5a8dbad 100644 |
25 |
--- a/pcre_exec.c |
26 |
+++ b/pcre_exec.c |
27 |
@@ -1167,11 +1167,16 @@ for (;;) |
28 |
if (rrc == MATCH_KETRPOS) |
29 |
{ |
30 |
offset_top = md->end_offset_top; |
31 |
- eptr = md->end_match_ptr; |
32 |
ecode = md->start_code + code_offset; |
33 |
save_capture_last = md->capture_last; |
34 |
matched_once = TRUE; |
35 |
mstart = md->start_match_ptr; /* In case \K changed it */ |
36 |
+ if (eptr == md->end_match_ptr) /* Matched an empty string */ |
37 |
+ { |
38 |
+ do ecode += GET(ecode, 1); while (*ecode == OP_ALT); |
39 |
+ break; |
40 |
+ } |
41 |
+ eptr = md->end_match_ptr; |
42 |
continue; |
43 |
} |
44 |
|
45 |
@@ -1241,10 +1246,15 @@ for (;;) |
46 |
if (rrc == MATCH_KETRPOS) |
47 |
{ |
48 |
offset_top = md->end_offset_top; |
49 |
- eptr = md->end_match_ptr; |
50 |
ecode = md->start_code + code_offset; |
51 |
matched_once = TRUE; |
52 |
mstart = md->start_match_ptr; /* In case \K reset it */ |
53 |
+ if (eptr == md->end_match_ptr) /* Matched an empty string */ |
54 |
+ { |
55 |
+ do ecode += GET(ecode, 1); while (*ecode == OP_ALT); |
56 |
+ break; |
57 |
+ } |
58 |
+ eptr = md->end_match_ptr; |
59 |
continue; |
60 |
} |
61 |
|
62 |
@@ -1894,7 +1904,7 @@ for (;;) |
63 |
case OP_KETRMAX: |
64 |
case OP_KETRPOS: |
65 |
prev = ecode - GET(ecode, 1); |
66 |
- |
67 |
+ |
68 |
/* If this was a group that remembered the subject start, in order to break |
69 |
infinite repeats of empty string matches, retrieve the subject start from |
70 |
the chain. Otherwise, set it NULL. */ |
71 |
@@ -1919,7 +1929,7 @@ for (;;) |
72 |
md->start_match_ptr = mstart; |
73 |
RRETURN(MATCH_MATCH); /* Sets md->mark */ |
74 |
} |
75 |
- |
76 |
+ |
77 |
/* For capturing groups we have to check the group number back at the start |
78 |
and if necessary complete handling an extraction by setting the offsets and |
79 |
bumping the high water mark. Whole-pattern recursion is coded as a recurse |
80 |
@@ -1979,6 +1989,19 @@ for (;;) |
81 |
} |
82 |
} |
83 |
|
84 |
+ /* OP_KETRPOS is a possessive repeating ket. Remember the current position, |
85 |
+ and return the MATCH_KETRPOS. This makes it possible to do the repeats one |
86 |
+ at a time from the outer level, thus saving stack. This must precede the |
87 |
+ empty string test - in this case that test is done at the outer level. */ |
88 |
+ |
89 |
+ if (*ecode == OP_KETRPOS) |
90 |
+ { |
91 |
+ md->start_match_ptr = mstart; /* In case \K reset it */ |
92 |
+ md->end_match_ptr = eptr; |
93 |
+ md->end_offset_top = offset_top; |
94 |
+ RRETURN(MATCH_KETRPOS); |
95 |
+ } |
96 |
+ |
97 |
/* For an ordinary non-repeating ket, just continue at this level. This |
98 |
also happens for a repeating ket if no characters were matched in the |
99 |
group. This is the forcible breaking of infinite loops as implemented in |
100 |
@@ -2001,18 +2024,6 @@ for (;;) |
101 |
break; |
102 |
} |
103 |
|
104 |
- /* OP_KETRPOS is a possessive repeating ket. Remember the current position, |
105 |
- and return the MATCH_KETRPOS. This makes it possible to do the repeats one |
106 |
- at a time from the outer level, thus saving stack. */ |
107 |
- |
108 |
- if (*ecode == OP_KETRPOS) |
109 |
- { |
110 |
- md->start_match_ptr = mstart; /* In case \K reset it */ |
111 |
- md->end_match_ptr = eptr; |
112 |
- md->end_offset_top = offset_top; |
113 |
- RRETURN(MATCH_KETRPOS); |
114 |
- } |
115 |
- |
116 |
/* The normal repeating kets try the rest of the pattern or restart from |
117 |
the preceding bracket, in the appropriate order. In the second case, we can |
118 |
use tail recursion to avoid using another stack frame, unless we have an |
119 |
diff --git a/testdata/testinput1 b/testdata/testinput1 |
120 |
index f933692..ffb9455 100644 |
121 |
--- a/testdata/testinput1 |
122 |
+++ b/testdata/testinput1 |
123 |
@@ -5675,4 +5675,13 @@ AbcdCBefgBhiBqz |
124 |
/[\Q]a\E]+/ |
125 |
aa]] |
126 |
|
127 |
+'\A(?:[^\"]++|\"(?:[^\"]*+|\"\")*+\")++' |
128 |
+ NON QUOTED \"QUOT\"\"ED\" AFTER \"NOT MATCHED |
129 |
+ |
130 |
+'\A(?:[^\"]++|\"(?:[^\"]++|\"\")*+\")++' |
131 |
+ NON QUOTED \"QUOT\"\"ED\" AFTER \"NOT MATCHED |
132 |
+ |
133 |
+'\A(?:[^\"]++|\"(?:[^\"]++|\"\")++\")++' |
134 |
+ NON QUOTED \"QUOT\"\"ED\" AFTER \"NOT MATCHED |
135 |
+ |
136 |
/-- End of testinput1 --/ |
137 |
diff --git a/testdata/testinput8 b/testdata/testinput8 |
138 |
index bb2747b..06334cd 100644 |
139 |
--- a/testdata/testinput8 |
140 |
+++ b/testdata/testinput8 |
141 |
@@ -4831,4 +4831,10 @@ |
142 |
/[ab]{2,}?/ |
143 |
aaaa |
144 |
|
145 |
+'\A(?:[^\"]++|\"(?:[^\"]*+|\"\")*+\")++' |
146 |
+ NON QUOTED \"QUOT\"\"ED\" AFTER \"NOT MATCHED |
147 |
+ |
148 |
+'\A(?:[^\"]++|\"(?:[^\"]++|\"\")*+\")++' |
149 |
+ NON QUOTED \"QUOT\"\"ED\" AFTER \"NOT MATCHED |
150 |
+ |
151 |
/-- End of testinput8 --/ |
152 |
diff --git a/testdata/testoutput1 b/testdata/testoutput1 |
153 |
index 3d9a328..b2ae430 100644 |
154 |
--- a/testdata/testoutput1 |
155 |
+++ b/testdata/testoutput1 |
156 |
@@ -9325,4 +9325,16 @@ No match |
157 |
aa]] |
158 |
0: aa]] |
159 |
|
160 |
+'\A(?:[^\"]++|\"(?:[^\"]*+|\"\")*+\")++' |
161 |
+ NON QUOTED \"QUOT\"\"ED\" AFTER \"NOT MATCHED |
162 |
+ 0: NON QUOTED "QUOT""ED" AFTER |
163 |
+ |
164 |
+'\A(?:[^\"]++|\"(?:[^\"]++|\"\")*+\")++' |
165 |
+ NON QUOTED \"QUOT\"\"ED\" AFTER \"NOT MATCHED |
166 |
+ 0: NON QUOTED "QUOT""ED" AFTER |
167 |
+ |
168 |
+'\A(?:[^\"]++|\"(?:[^\"]++|\"\")++\")++' |
169 |
+ NON QUOTED \"QUOT\"\"ED\" AFTER \"NOT MATCHED |
170 |
+ 0: NON QUOTED "QUOT""ED" AFTER |
171 |
+ |
172 |
/-- End of testinput1 --/ |
173 |
diff --git a/testdata/testoutput8 b/testdata/testoutput8 |
174 |
index 3861ea4..95c4e4d 100644 |
175 |
--- a/testdata/testoutput8 |
176 |
+++ b/testdata/testoutput8 |
177 |
@@ -7777,4 +7777,12 @@ Matched, but offsets vector is too small to show all matches |
178 |
1: aaa |
179 |
2: aa |
180 |
|
181 |
+'\A(?:[^\"]++|\"(?:[^\"]*+|\"\")*+\")++' |
182 |
+ NON QUOTED \"QUOT\"\"ED\" AFTER \"NOT MATCHED |
183 |
+ 0: NON QUOTED "QUOT""ED" AFTER |
184 |
+ |
185 |
+'\A(?:[^\"]++|\"(?:[^\"]++|\"\")*+\")++' |
186 |
+ NON QUOTED \"QUOT\"\"ED\" AFTER \"NOT MATCHED |
187 |
+ 0: NON QUOTED "QUOT""ED" AFTER |
188 |
+ |
189 |
/-- End of testinput8 --/ |
190 |
-- |
191 |
1.9.3 |
192 |
|