blob: acfb7ede5c500358910ee1ba7bb7e5cbba255ba7 [file] [log] [blame]
darin@apple.comee752e72007-11-11 18:56:13 +00001/* This is JavaScriptCore's variant of the PCRE library. While this library
2started out as a copy of PCRE, many of the features of PCRE have been
3removed. This library now supports only the regular expression features
4required by the JavaScript language specification, and has only the functions
5needed by JavaScriptCore and the rest of WebKit.
darind7737ab2005-09-09 00:51:07 +00006
darin@apple.comee752e72007-11-11 18:56:13 +00007 Originally written by Philip Hazel
darinae790da2007-10-17 05:38:39 +00008 Copyright (c) 1997-2006 University of Cambridge
darince72b7a2007-02-06 19:42:35 +00009 Copyright (C) 2002, 2004, 2006, 2007 Apple Inc. All rights reserved.
10
darind7737ab2005-09-09 00:51:07 +000011-----------------------------------------------------------------------------
12Redistribution and use in source and binary forms, with or without
13modification, are permitted provided that the following conditions are met:
14
15 * Redistributions of source code must retain the above copyright notice,
16 this list of conditions and the following disclaimer.
17
18 * Redistributions in binary form must reproduce the above copyright
19 notice, this list of conditions and the following disclaimer in the
20 documentation and/or other materials provided with the distribution.
21
22 * Neither the name of the University of Cambridge nor the names of its
23 contributors may be used to endorse or promote products derived from
24 this software without specific prior written permission.
25
26THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36POSSIBILITY OF SUCH DAMAGE.
37-----------------------------------------------------------------------------
38*/
39
darin@apple.comee752e72007-11-11 18:56:13 +000040/* This module contains jsRegExpExecute(), the externally visible function
41that does pattern matching using an NFA algorithm, following the rules from
42the JavaScript specification. There are also some supporting functions. */
darinae790da2007-10-17 05:38:39 +000043
darind7737ab2005-09-09 00:51:07 +000044#include "pcre_internal.h"
45
darin@apple.comee752e72007-11-11 18:56:13 +000046#include <wtf/ASCIICType.h>
47#include <wtf/Vector.h>
48
49using namespace WTF;
50
51#ifdef __GNUC__
52#define USE_COMPUTED_GOTO_FOR_MATCH_RECURSION
53//#define USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP
54#endif
55
darinb847b442006-10-27 16:48:28 +000056/* Avoid warnings on Windows. */
57#undef min
58#undef max
darind7737ab2005-09-09 00:51:07 +000059
60/* Structure for building a chain of data that actually lives on the
61stack, for holding the values of the subject pointer at the start of each
62subpattern, so as to detect when an empty string has been matched by a
63subpattern - to break infinite loops. When NO_RECURSE is set, these blocks
64are on the heap, not on the stack. */
65
66typedef struct eptrblock {
67 struct eptrblock *epb_prev;
darinae790da2007-10-17 05:38:39 +000068 USPTR epb_saved_eptr;
darind7737ab2005-09-09 00:51:07 +000069} eptrblock;
70
darin@apple.comee752e72007-11-11 18:56:13 +000071/* Structure for remembering the local variables in a private frame */
72
73typedef struct matchframe {
74 /* Where to jump back to */
75#ifndef USE_COMPUTED_GOTO_FOR_MATCH_RECURSION
76 int where;
77#else
78 void *where;
79#endif
80
81 struct matchframe *prevframe;
82
83 /* Function arguments that may change */
84
85 const pcre_uchar *eptr;
86 const uschar *ecode;
87 int offset_top;
88 eptrblock *eptrb;
89
90 /* Function local variables */
91
92 const uschar *data;
93 const uschar *next;
94 const pcre_uchar *pp;
95 const uschar *prev;
96 const pcre_uchar *saved_eptr;
97
98 int repeat_othercase;
99
100 int ctype;
101 int fc;
102 int fi;
103 int length;
104 int max;
105 int number;
106 int offset;
107 int save_offset1, save_offset2, save_offset3;
108
109 eptrblock newptrb;
110} matchframe;
111
112/* Structure for passing "static" information around between the functions
113doing traditional NFA matching, so that they are thread-safe. */
114
115typedef struct match_data {
116 unsigned long int match_call_count; /* As it says */
117 int *offset_vector; /* Offset vector */
118 int offset_end; /* One past the end */
119 int offset_max; /* The maximum usable for return data */
120 const uschar *lcc; /* Points to lower casing table */
121 const uschar *ctypes; /* Points to table of type maps */
122 BOOL offset_overflow; /* Set if too many extractions */
123 USPTR start_subject; /* Start of the subject string */
124 USPTR end_subject; /* End of the subject string */
125 USPTR end_match_ptr; /* Subject position at end match */
126 int end_offset_top; /* Highwater mark at end of match */
127 BOOL multiline;
128 BOOL caseless;
129} match_data;
130
darin@apple.coma7c3b872007-11-04 05:22:44 +0000131#define match_isgroup TRUE /* Set if start of bracketed group */
darind7737ab2005-09-09 00:51:07 +0000132
133/* Non-error returns from the match() function. Error returns are externally
134defined PCRE_ERROR_xxx codes, which are all negative. */
135
136#define MATCH_MATCH 1
137#define MATCH_NOMATCH 0
138
darind7737ab2005-09-09 00:51:07 +0000139/* Min and max values for the common repeats; for the maxima, 0 => infinity */
140
141static const char rep_min[] = { 0, 0, 1, 1, 0, 0 };
142static const char rep_max[] = { 0, 0, 0, 0, 1, 1 };
143
144
145
146#ifdef DEBUG
147/*************************************************
148* Debugging function to print chars *
149*************************************************/
150
151/* Print a sequence of chars in printable format, stopping at the end of the
152subject if the requested.
153
154Arguments:
155 p points to characters
156 length number to print
157 is_subject TRUE if printing from within md->start_subject
158 md pointer to matching data block, if is_subject is TRUE
159
160Returns: nothing
161*/
162
163static void
164pchars(const pcre_uchar *p, int length, BOOL is_subject, match_data *md)
165{
166int c;
167if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
168while (length-- > 0)
169 if (isprint(c = *(p++))) printf("%c", c);
darind7737ab2005-09-09 00:51:07 +0000170 else if (c < 256) printf("\\x%02x", c);
171 else printf("\\x{%x}", c);
darind7737ab2005-09-09 00:51:07 +0000172}
173#endif
174
175
176
177/*************************************************
178* Match a back-reference *
179*************************************************/
180
181/* If a back reference hasn't been set, the length that is passed is greater
182than the number of characters left in the string, so the match fails.
183
184Arguments:
185 offset index into the offset vector
186 eptr points into the subject
187 length length to be matched
188 md points to match data block
darind7737ab2005-09-09 00:51:07 +0000189
190Returns: TRUE if matched
191*/
192
193static BOOL
darin@apple.coma7c3b872007-11-04 05:22:44 +0000194match_ref(int offset, register USPTR eptr, int length, match_data *md)
darind7737ab2005-09-09 00:51:07 +0000195{
darinae790da2007-10-17 05:38:39 +0000196USPTR p = md->start_subject + md->offset_vector[offset];
darind7737ab2005-09-09 00:51:07 +0000197
198#ifdef DEBUG
199if (eptr >= md->end_subject)
200 printf("matching subject <null>");
201else
202 {
203 printf("matching subject ");
204 pchars(eptr, length, TRUE, md);
205 }
206printf(" against backref ");
207pchars(p, length, FALSE, md);
208printf("\n");
209#endif
210
211/* Always fail if not enough characters left */
212
213if (length > md->end_subject - eptr) return FALSE;
214
215/* Separate the caselesss case for speed */
216
darin@apple.coma7c3b872007-11-04 05:22:44 +0000217if (md->caseless)
darind7737ab2005-09-09 00:51:07 +0000218 {
219 while (length-- > 0)
darin@apple.coma7c3b872007-11-04 05:22:44 +0000220 {
221 pcre_uchar c = *p++;
222 int othercase = _pcre_ucp_othercase(c);
223 pcre_uchar d = *eptr++;
224 if (c != d && othercase != d) return FALSE;
225 }
darind7737ab2005-09-09 00:51:07 +0000226 }
227else
228 { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }
229
230return TRUE;
231}
232
233
234
235/***************************************************************************
236****************************************************************************
237 RECURSION IN THE match() FUNCTION
238
darin@apple.comee752e72007-11-11 18:56:13 +0000239The original match() function was highly recursive. The current version
240still has the remnants of the original in that recursive processing of the
241regular expression is triggered by invoking a macro named RMATCH. This is
242no longer really much like a recursive call to match() itself.
darind7737ab2005-09-09 00:51:07 +0000243****************************************************************************
244***************************************************************************/
245
darinae790da2007-10-17 05:38:39 +0000246/* These versions of the macros use the stack, as normal. There are debugging
247versions and production versions. */
darind7737ab2005-09-09 00:51:07 +0000248
darin@apple.comee752e72007-11-11 18:56:13 +0000249#ifndef USE_COMPUTED_GOTO_FOR_MATCH_RECURSION
darince72b7a2007-02-06 19:42:35 +0000250
darined76fb52007-02-06 21:55:25 +0000251/* Use numbered labels and switch statement at the bottom of the match function. */
darince72b7a2007-02-06 19:42:35 +0000252
darined76fb52007-02-06 21:55:25 +0000253#define RMATCH_WHERE(num) num
254#define RRETURN_LABEL RRETURN_SWITCH
darind7737ab2005-09-09 00:51:07 +0000255
darince72b7a2007-02-06 19:42:35 +0000256#else
257
darined76fb52007-02-06 21:55:25 +0000258/* Use GCC's computed goto extension. */
darince72b7a2007-02-06 19:42:35 +0000259
darined76fb52007-02-06 21:55:25 +0000260/* For one test case this is more than 40% faster than the switch statement.
261We could avoid the use of the num argument entirely by using local labels,
262but using it for the GCC case as well as the non-GCC case allows us to share
263a bit more code and notice if we use conflicting numbers.*/
264
265#define RMATCH_WHERE(num) &&RRETURN_##num
darin@apple.com7ecf0c32007-11-04 06:18:31 +0000266#define RRETURN_LABEL *frame->where
darined76fb52007-02-06 21:55:25 +0000267
268#endif
269
darin@apple.comaf5544c2007-11-04 08:28:22 +0000270#define RMATCH(num, ra, rb, rc)\
darince72b7a2007-02-06 19:42:35 +0000271 {\
darince72b7a2007-02-06 19:42:35 +0000272 if (frame >= stackframes && frame + 1 < stackframesend)\
273 newframe = frame + 1;\
274 else\
darin@apple.comee752e72007-11-11 18:56:13 +0000275 newframe = new matchframe;\
darin@apple.com7ecf0c32007-11-04 06:18:31 +0000276 newframe->eptr = frame->eptr;\
darin@apple.comaf5544c2007-11-04 08:28:22 +0000277 newframe->ecode = (ra);\
darin@apple.com7ecf0c32007-11-04 06:18:31 +0000278 newframe->offset_top = frame->offset_top;\
darin@apple.comaf5544c2007-11-04 08:28:22 +0000279 newframe->eptrb = (rb);\
280 is_group_start = (rc);\
darin@apple.coma7c3b872007-11-04 05:22:44 +0000281 ++rdepth;\
darin@apple.com7ecf0c32007-11-04 06:18:31 +0000282 newframe->prevframe = frame;\
darince72b7a2007-02-06 19:42:35 +0000283 frame = newframe;\
darin@apple.comee752e72007-11-11 18:56:13 +0000284 frame->where = RMATCH_WHERE(num);\
darince72b7a2007-02-06 19:42:35 +0000285 DPRINTF(("restarting from line %d\n", __LINE__));\
darin@apple.comee752e72007-11-11 18:56:13 +0000286 goto RECURSE;\
darined76fb52007-02-06 21:55:25 +0000287RRETURN_##num:\
darin@apple.comee752e72007-11-11 18:56:13 +0000288 newframe = frame;\
289 frame = frame->prevframe;\
290 if (!(newframe >= stackframes && newframe < stackframesend))\
291 delete newframe;\
darin@apple.coma7c3b872007-11-04 05:22:44 +0000292 --rdepth;\
darin@apple.comaf5544c2007-11-04 08:28:22 +0000293 DPRINTF(("did a goto back to line %d\n", __LINE__));\
darince72b7a2007-02-06 19:42:35 +0000294 }
darin@apple.comee752e72007-11-11 18:56:13 +0000295
296#define RRETURN goto RRETURN_LABEL
darince72b7a2007-02-06 19:42:35 +0000297
darin@apple.comee752e72007-11-11 18:56:13 +0000298#define RRETURN_NO_MATCH \
darince72b7a2007-02-06 19:42:35 +0000299 {\
darin@apple.comee752e72007-11-11 18:56:13 +0000300 is_match = FALSE;\
301 RRETURN;\
darince72b7a2007-02-06 19:42:35 +0000302 }
303
darin@apple.comee752e72007-11-11 18:56:13 +0000304#define RRETURN_ERROR(error) \
305 { \
306 i = (error); \
307 goto RETURN_ERROR; \
308 }
darind7737ab2005-09-09 00:51:07 +0000309
310/*************************************************
311* Match from current position *
312*************************************************/
313
314/* On entry ecode points to the first opcode, and eptr to the first character
315in the subject string, while eptrb holds the value of eptr at the start of the
316last bracketed group - used for breaking infinite loops matching zero-length
317strings. This function is called recursively in many circumstances. Whenever it
318returns a negative (error) response, the outer incarnation must also return the
319same response.
320
darind7737ab2005-09-09 00:51:07 +0000321Arguments:
322 eptr pointer in subject
323 ecode position in code
324 offset_top current top pointer
325 md pointer to "static" info for the match
darind7737ab2005-09-09 00:51:07 +0000326
327Returns: MATCH_MATCH if matched ) these values are >= 0
328 MATCH_NOMATCH if failed to match )
329 a negative PCRE_ERROR_xxx value if aborted by an error condition
darinae790da2007-10-17 05:38:39 +0000330 (e.g. stopped by repeated call or recursion limit)
darind7737ab2005-09-09 00:51:07 +0000331*/
332
darin@apple.comee752e72007-11-11 18:56:13 +0000333static int match(USPTR eptr, const uschar *ecode, int offset_top, match_data *md)
darind7737ab2005-09-09 00:51:07 +0000334{
darin@apple.comee752e72007-11-11 18:56:13 +0000335register int is_match = FALSE;
336register int i;
337register int c;
darind7737ab2005-09-09 00:51:07 +0000338
darin@apple.coma7c3b872007-11-04 05:22:44 +0000339unsigned rdepth = 0;
340
341BOOL cur_is_word;
342BOOL prev_is_word;
343BOOL is_group_start = TRUE;
darin@apple.comaf5544c2007-11-04 08:28:22 +0000344int min;
345BOOL minimize = FALSE; /* Initialization not really needed, but some compilers think so. */
darind7737ab2005-09-09 00:51:07 +0000346
darince72b7a2007-02-06 19:42:35 +0000347/* The value 16 here is large enough that most regular expressions don't require
348any calls to pcre_stack_malloc, yet the amount of stack used for the array is
349modest enough that we don't run out of stack. */
darin@apple.com7ecf0c32007-11-04 06:18:31 +0000350matchframe stackframes[16];
351matchframe *stackframesend = stackframes + sizeof(stackframes) / sizeof(stackframes[0]);
darince72b7a2007-02-06 19:42:35 +0000352
darin@apple.com7ecf0c32007-11-04 06:18:31 +0000353matchframe *frame = stackframes;
354matchframe *newframe;
darind7737ab2005-09-09 00:51:07 +0000355
darin@apple.comee752e72007-11-11 18:56:13 +0000356/* The opcode jump table. */
357#ifdef USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP
358#define EMIT_JUMP_TABLE_ENTRY(opcode) &&LABEL_OP_##opcode,
359static void* opcode_jump_table[256] = { FOR_EACH_OPCODE(EMIT_JUMP_TABLE_ENTRY) };
360#undef EMIT_JUMP_TABLE_ENTRY
361#endif
362
363/* One-time setup of the opcode jump table. */
364#ifdef USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP
365i = 255;
366while (!opcode_jump_table[i])
367 opcode_jump_table[i--] = &&CAPTURING_BRACKET;
368#endif
369
370#ifdef USE_COMPUTED_GOTO_FOR_MATCH_RECURSION
371frame->where = &&RETURN;
372#else
373frame->where = 0;
374#endif
darind7737ab2005-09-09 00:51:07 +0000375
darin@apple.com7ecf0c32007-11-04 06:18:31 +0000376frame->eptr = eptr;
377frame->ecode = ecode;
378frame->offset_top = offset_top;
379frame->eptrb = NULL;
darind7737ab2005-09-09 00:51:07 +0000380
381/* This is where control jumps back to to effect "recursion" */
382
darin@apple.comee752e72007-11-11 18:56:13 +0000383RECURSE:
darind7737ab2005-09-09 00:51:07 +0000384
darinae790da2007-10-17 05:38:39 +0000385/* OK, now we can get on with the real code of the function. Recursive calls
386are specified by the macro RMATCH and RRETURN is used to return. When
387NO_RECURSE is *not* defined, these just turn into a recursive call to match()
388and a "return", respectively (possibly with some debugging if DEBUG is
389defined). However, RMATCH isn't like a function call because it's quite a
390complicated macro. It has to be used in one particular way. This shouldn't,
391however, impact performance when true recursion is being used. */
darind7737ab2005-09-09 00:51:07 +0000392
darinae790da2007-10-17 05:38:39 +0000393/* First check that we haven't called match() too many times, or that we
394haven't exceeded the recursive call limit. */
darince72b7a2007-02-06 19:42:35 +0000395
darin@apple.comee752e72007-11-11 18:56:13 +0000396if (md->match_call_count++ >= MATCH_LIMIT) RRETURN_ERROR(JSRegExpErrorMatchLimit);
397if (rdepth >= MATCH_LIMIT_RECURSION) RRETURN_ERROR(JSRegExpErrorRecursionLimit);
darind7737ab2005-09-09 00:51:07 +0000398
399/* At the start of a bracketed group, add the current subject pointer to the
400stack of such pointers, to be re-instated at the end of the group when we hit
401the closing ket. When match() is called in other circumstances, we don't add to
402this stack. */
403
darin@apple.coma7c3b872007-11-04 05:22:44 +0000404if (is_group_start)
darind7737ab2005-09-09 00:51:07 +0000405 {
darin@apple.com7ecf0c32007-11-04 06:18:31 +0000406 frame->newptrb.epb_prev = frame->eptrb;
407 frame->newptrb.epb_saved_eptr = frame->eptr;
408 frame->eptrb = &frame->newptrb;
darind7737ab2005-09-09 00:51:07 +0000409 }
410
411/* Now start processing the operations. */
412
darin@apple.comee752e72007-11-11 18:56:13 +0000413#ifndef USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP
darind7737ab2005-09-09 00:51:07 +0000414for (;;)
darin@apple.comee752e72007-11-11 18:56:13 +0000415#endif
darind7737ab2005-09-09 00:51:07 +0000416 {
darin@apple.comee752e72007-11-11 18:56:13 +0000417
418#ifdef USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP
419 #define BEGIN_OPCODE(opcode) LABEL_OP_##opcode
420 #define NEXT_OPCODE goto *opcode_jump_table[*frame->ecode]
421#else
422 #define BEGIN_OPCODE(opcode) case OP_##opcode
423 #define NEXT_OPCODE continue
424#endif
425
426#ifdef USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP
427 NEXT_OPCODE;
428#else
darin@apple.comaf5544c2007-11-04 08:28:22 +0000429 switch (*frame->ecode)
darin@apple.comee752e72007-11-11 18:56:13 +0000430#endif
darind7737ab2005-09-09 00:51:07 +0000431 {
darin@apple.comee752e72007-11-11 18:56:13 +0000432 /* Non-capturing bracket: optimized */
433
434 BEGIN_OPCODE(BRA):
darin@apple.comaf5544c2007-11-04 08:28:22 +0000435 NON_CAPTURING_BRACKET:
darind7737ab2005-09-09 00:51:07 +0000436 DPRINTF(("start bracket 0\n"));
437 do
438 {
darin@apple.comaf5544c2007-11-04 08:28:22 +0000439 RMATCH(2, frame->ecode + 1 + LINK_SIZE, frame->eptrb, match_isgroup);
darin@apple.comee752e72007-11-11 18:56:13 +0000440 if (is_match) RRETURN;
darin@apple.com7ecf0c32007-11-04 06:18:31 +0000441 frame->ecode += GET(frame->ecode, 1);
darind7737ab2005-09-09 00:51:07 +0000442 }
darin@apple.com7ecf0c32007-11-04 06:18:31 +0000443 while (*frame->ecode == OP_ALT);
darind7737ab2005-09-09 00:51:07 +0000444 DPRINTF(("bracket 0 failed\n"));
darin@apple.comee752e72007-11-11 18:56:13 +0000445 RRETURN;
darind7737ab2005-09-09 00:51:07 +0000446
darin@apple.coma7c3b872007-11-04 05:22:44 +0000447 /* End of the pattern. */
darind7737ab2005-09-09 00:51:07 +0000448
darin@apple.comee752e72007-11-11 18:56:13 +0000449 BEGIN_OPCODE(END):
darin@apple.com7ecf0c32007-11-04 06:18:31 +0000450 md->end_match_ptr = frame->eptr; /* Record where we ended */
451 md->end_offset_top = frame->offset_top; /* and how many extracts were taken */
darin@apple.comee752e72007-11-11 18:56:13 +0000452 is_match = TRUE;
453 RRETURN;
darind7737ab2005-09-09 00:51:07 +0000454
darind7737ab2005-09-09 00:51:07 +0000455 /* Assertion brackets. Check the alternative branches in turn - the
456 matching won't pass the KET for an assertion. If any one branch matches,
457 the assertion is true. Lookbehind assertions have an OP_REVERSE item at the
458 start of each branch to move the current point backwards, so the code at
459 this level is identical to the lookahead case. */
460
darin@apple.comee752e72007-11-11 18:56:13 +0000461 BEGIN_OPCODE(ASSERT):
darind7737ab2005-09-09 00:51:07 +0000462 do
463 {
darin@apple.comaf5544c2007-11-04 08:28:22 +0000464 RMATCH(6, frame->ecode + 1 + LINK_SIZE, NULL, match_isgroup);
darin@apple.comee752e72007-11-11 18:56:13 +0000465 if (is_match) break;
darin@apple.com7ecf0c32007-11-04 06:18:31 +0000466 frame->ecode += GET(frame->ecode, 1);
darind7737ab2005-09-09 00:51:07 +0000467 }
darin@apple.com7ecf0c32007-11-04 06:18:31 +0000468 while (*frame->ecode == OP_ALT);
darin@apple.comee752e72007-11-11 18:56:13 +0000469 if (*frame->ecode == OP_KET) RRETURN_NO_MATCH;
darind7737ab2005-09-09 00:51:07 +0000470
darind7737ab2005-09-09 00:51:07 +0000471 /* Continue from after the assertion, updating the offsets high water
472 mark, since extracts may have been taken during the assertion. */
473
darin@apple.com7ecf0c32007-11-04 06:18:31 +0000474 do frame->ecode += GET(frame->ecode,1); while (*frame->ecode == OP_ALT);
475 frame->ecode += 1 + LINK_SIZE;
476 frame->offset_top = md->end_offset_top;
darin@apple.comee752e72007-11-11 18:56:13 +0000477 NEXT_OPCODE;
darind7737ab2005-09-09 00:51:07 +0000478
479 /* Negative assertion: all branches must fail to match */
480
darin@apple.comee752e72007-11-11 18:56:13 +0000481 BEGIN_OPCODE(ASSERT_NOT):
darind7737ab2005-09-09 00:51:07 +0000482 do
483 {
darin@apple.comaf5544c2007-11-04 08:28:22 +0000484 RMATCH(7, frame->ecode + 1 + LINK_SIZE, NULL, match_isgroup);
darin@apple.comee752e72007-11-11 18:56:13 +0000485 if (is_match) RRETURN_NO_MATCH;
darin@apple.com7ecf0c32007-11-04 06:18:31 +0000486 frame->ecode += GET(frame->ecode,1);
darind7737ab2005-09-09 00:51:07 +0000487 }
darin@apple.com7ecf0c32007-11-04 06:18:31 +0000488 while (*frame->ecode == OP_ALT);
darind7737ab2005-09-09 00:51:07 +0000489
darin@apple.com7ecf0c32007-11-04 06:18:31 +0000490 frame->ecode += 1 + LINK_SIZE;
darin@apple.comee752e72007-11-11 18:56:13 +0000491 NEXT_OPCODE;
darind7737ab2005-09-09 00:51:07 +0000492
darind7737ab2005-09-09 00:51:07 +0000493 /* "Once" brackets are like assertion brackets except that after a match,
494 the point in the subject string is not moved back. Thus there can never be
495 a move back into the brackets. Friedl calls these "atomic" subpatterns.
496 Check the alternative branches in turn - the matching won't pass the KET
497 for this kind of subpattern. If any one branch matches, we carry on as at
498 the end of a normal bracket, leaving the subject pointer. */
499
darin@apple.comee752e72007-11-11 18:56:13 +0000500 BEGIN_OPCODE(ONCE):
darind7737ab2005-09-09 00:51:07 +0000501 {
darin@apple.com7ecf0c32007-11-04 06:18:31 +0000502 frame->prev = frame->ecode;
503 frame->saved_eptr = frame->eptr;
darind7737ab2005-09-09 00:51:07 +0000504
505 do
506 {
darin@apple.comaf5544c2007-11-04 08:28:22 +0000507 RMATCH(9, frame->ecode + 1 + LINK_SIZE, frame->eptrb, match_isgroup);
darin@apple.comee752e72007-11-11 18:56:13 +0000508 if (is_match) break;
darin@apple.com7ecf0c32007-11-04 06:18:31 +0000509 frame->ecode += GET(frame->ecode,1);
darind7737ab2005-09-09 00:51:07 +0000510 }
darin@apple.com7ecf0c32007-11-04 06:18:31 +0000511 while (*frame->ecode == OP_ALT);
darind7737ab2005-09-09 00:51:07 +0000512
513 /* If hit the end of the group (which could be repeated), fail */
514
darin@apple.comee752e72007-11-11 18:56:13 +0000515 if (*frame->ecode != OP_ONCE && *frame->ecode != OP_ALT) RRETURN;
darind7737ab2005-09-09 00:51:07 +0000516
517 /* Continue as from after the assertion, updating the offsets high water
518 mark, since extracts may have been taken. */
519
darin@apple.com7ecf0c32007-11-04 06:18:31 +0000520 do frame->ecode += GET(frame->ecode,1); while (*frame->ecode == OP_ALT);
darind7737ab2005-09-09 00:51:07 +0000521
darin@apple.com7ecf0c32007-11-04 06:18:31 +0000522 frame->offset_top = md->end_offset_top;
523 frame->eptr = md->end_match_ptr;
darind7737ab2005-09-09 00:51:07 +0000524
525 /* For a non-repeating ket, just continue at this level. This also
526 happens for a repeating ket if no characters were matched in the group.
527 This is the forcible breaking of infinite loops as implemented in Perl
528 5.005. If there is an options reset, it will get obeyed in the normal
529 course of events. */
530
darin@apple.com7ecf0c32007-11-04 06:18:31 +0000531 if (*frame->ecode == OP_KET || frame->eptr == frame->saved_eptr)
darind7737ab2005-09-09 00:51:07 +0000532 {
darin@apple.com7ecf0c32007-11-04 06:18:31 +0000533 frame->ecode += 1+LINK_SIZE;
darin@apple.comee752e72007-11-11 18:56:13 +0000534 NEXT_OPCODE;
darind7737ab2005-09-09 00:51:07 +0000535 }
536
537 /* The repeating kets try the rest of the pattern or restart from the
538 preceding bracket, in the appropriate order. We need to reset any options
539 that changed within the bracket before re-running it, so check the next
540 opcode. */
541
darin@apple.com7ecf0c32007-11-04 06:18:31 +0000542 if (*frame->ecode == OP_KETRMIN)
darind7737ab2005-09-09 00:51:07 +0000543 {
darin@apple.comaf5544c2007-11-04 08:28:22 +0000544 RMATCH(10, frame->ecode + 1 + LINK_SIZE, frame->eptrb, 0);
darin@apple.comee752e72007-11-11 18:56:13 +0000545 if (is_match) RRETURN;
darin@apple.comaf5544c2007-11-04 08:28:22 +0000546 RMATCH(11, frame->prev, frame->eptrb, match_isgroup);
darin@apple.comee752e72007-11-11 18:56:13 +0000547 if (is_match) RRETURN;
darind7737ab2005-09-09 00:51:07 +0000548 }
549 else /* OP_KETRMAX */
550 {
darin@apple.comaf5544c2007-11-04 08:28:22 +0000551 RMATCH(12, frame->prev, frame->eptrb, match_isgroup);
darin@apple.comee752e72007-11-11 18:56:13 +0000552 if (is_match) RRETURN;
darin@apple.comaf5544c2007-11-04 08:28:22 +0000553 RMATCH(13, frame->ecode + 1+LINK_SIZE, frame->eptrb, 0);
darin@apple.comee752e72007-11-11 18:56:13 +0000554 if (is_match) RRETURN;
darind7737ab2005-09-09 00:51:07 +0000555 }
556 }
darin@apple.comee752e72007-11-11 18:56:13 +0000557 RRETURN;
darind7737ab2005-09-09 00:51:07 +0000558
559 /* An alternation is the end of a branch; scan along to find the end of the
560 bracketed group and go to there. */
561
darin@apple.comee752e72007-11-11 18:56:13 +0000562 BEGIN_OPCODE(ALT):
darin@apple.com7ecf0c32007-11-04 06:18:31 +0000563 do frame->ecode += GET(frame->ecode,1); while (*frame->ecode == OP_ALT);
darin@apple.comee752e72007-11-11 18:56:13 +0000564 NEXT_OPCODE;
darind7737ab2005-09-09 00:51:07 +0000565
566 /* BRAZERO and BRAMINZERO occur just before a bracket group, indicating
567 that it may occur zero times. It may repeat infinitely, or not at all -
568 i.e. it could be ()* or ()? in the pattern. Brackets with fixed upper
569 repeat limits are compiled as a number of copies, with the optional ones
570 preceded by BRAZERO or BRAMINZERO. */
571
darin@apple.comee752e72007-11-11 18:56:13 +0000572 BEGIN_OPCODE(BRAZERO):
darind7737ab2005-09-09 00:51:07 +0000573 {
darin@apple.com7ecf0c32007-11-04 06:18:31 +0000574 frame->next = frame->ecode+1;
darin@apple.comaf5544c2007-11-04 08:28:22 +0000575 RMATCH(14, frame->next, frame->eptrb, match_isgroup);
darin@apple.comee752e72007-11-11 18:56:13 +0000576 if (is_match) RRETURN;
darin@apple.com7ecf0c32007-11-04 06:18:31 +0000577 do frame->next += GET(frame->next,1); while (*frame->next == OP_ALT);
578 frame->ecode = frame->next + 1+LINK_SIZE;
darind7737ab2005-09-09 00:51:07 +0000579 }
darin@apple.comee752e72007-11-11 18:56:13 +0000580 NEXT_OPCODE;
darind7737ab2005-09-09 00:51:07 +0000581
darin@apple.comee752e72007-11-11 18:56:13 +0000582 BEGIN_OPCODE(BRAMINZERO):
darind7737ab2005-09-09 00:51:07 +0000583 {
darin@apple.com7ecf0c32007-11-04 06:18:31 +0000584 frame->next = frame->ecode+1;
585 do frame->next += GET(frame->next,1); while (*frame->next == OP_ALT);
darin@apple.comaf5544c2007-11-04 08:28:22 +0000586 RMATCH(15, frame->next + 1+LINK_SIZE, frame->eptrb, match_isgroup);
darin@apple.comee752e72007-11-11 18:56:13 +0000587 if (is_match) RRETURN;
darin@apple.com7ecf0c32007-11-04 06:18:31 +0000588 frame->ecode++;
darind7737ab2005-09-09 00:51:07 +0000589 }
darin@apple.comee752e72007-11-11 18:56:13 +0000590 NEXT_OPCODE;
darind7737ab2005-09-09 00:51:07 +0000591
592 /* End of a group, repeated or non-repeating. If we are at the end of
593 an assertion "group", stop matching and return MATCH_MATCH, but record the
594 current high water mark for use by positive assertions. Do this also
595 for the "once" (not-backup up) groups. */
596
darin@apple.comee752e72007-11-11 18:56:13 +0000597 BEGIN_OPCODE(KET):
598 BEGIN_OPCODE(KETRMIN):
599 BEGIN_OPCODE(KETRMAX):
darind7737ab2005-09-09 00:51:07 +0000600 {
darin@apple.com7ecf0c32007-11-04 06:18:31 +0000601 frame->prev = frame->ecode - GET(frame->ecode, 1);
602 frame->saved_eptr = frame->eptrb->epb_saved_eptr;
darind7737ab2005-09-09 00:51:07 +0000603
604 /* Back up the stack of bracket start pointers. */
605
darin@apple.com7ecf0c32007-11-04 06:18:31 +0000606 frame->eptrb = frame->eptrb->epb_prev;
darind7737ab2005-09-09 00:51:07 +0000607
darin@apple.com7ecf0c32007-11-04 06:18:31 +0000608 if (*frame->prev == OP_ASSERT || *frame->prev == OP_ASSERT_NOT || *frame->prev == OP_ONCE)
darind7737ab2005-09-09 00:51:07 +0000609 {
darin@apple.com7ecf0c32007-11-04 06:18:31 +0000610 md->end_match_ptr = frame->eptr; /* For ONCE */
611 md->end_offset_top = frame->offset_top;
darin@apple.comee752e72007-11-11 18:56:13 +0000612 is_match = TRUE;
613 RRETURN;
darind7737ab2005-09-09 00:51:07 +0000614 }
615
616 /* In all other cases except a conditional group we have to check the
617 group number back at the start and if necessary complete handling an
618 extraction by setting the offsets and bumping the high water mark. */
619
darind7737ab2005-09-09 00:51:07 +0000620 {
darin@apple.com7ecf0c32007-11-04 06:18:31 +0000621 frame->number = *frame->prev - OP_BRA;
darind7737ab2005-09-09 00:51:07 +0000622
623 /* For extended extraction brackets (large number), we have to fish out
624 the number from a dummy opcode at the start. */
625
darin@apple.com7ecf0c32007-11-04 06:18:31 +0000626 if (frame->number > EXTRACT_BASIC_MAX) frame->number = GET2(frame->prev, 2+LINK_SIZE);
627 frame->offset = frame->number << 1;
darind7737ab2005-09-09 00:51:07 +0000628
629#ifdef DEBUG
darin@apple.com7ecf0c32007-11-04 06:18:31 +0000630 printf("end bracket %d", frame->number);
darind7737ab2005-09-09 00:51:07 +0000631 printf("\n");
632#endif
633
634 /* Test for a numbered group. This includes groups called as a result
635 of recursion. Note that whole-pattern recursion is coded as a recurse
636 into group 0, so it won't be picked up here. Instead, we catch it when
637 the OP_END is reached. */
638
darin@apple.com7ecf0c32007-11-04 06:18:31 +0000639 if (frame->number > 0)
darind7737ab2005-09-09 00:51:07 +0000640 {
darin@apple.com7ecf0c32007-11-04 06:18:31 +0000641 if (frame->offset >= md->offset_max) md->offset_overflow = TRUE; else
darind7737ab2005-09-09 00:51:07 +0000642 {
darin@apple.com7ecf0c32007-11-04 06:18:31 +0000643 md->offset_vector[frame->offset] =
644 md->offset_vector[md->offset_end - frame->number];
645 md->offset_vector[frame->offset+1] = frame->eptr - md->start_subject;
646 if (frame->offset_top <= frame->offset) frame->offset_top = frame->offset + 2;
darind7737ab2005-09-09 00:51:07 +0000647 }
darind7737ab2005-09-09 00:51:07 +0000648 }
649 }
650
darind7737ab2005-09-09 00:51:07 +0000651 /* For a non-repeating ket, just continue at this level. This also
652 happens for a repeating ket if no characters were matched in the group.
653 This is the forcible breaking of infinite loops as implemented in Perl
654 5.005. If there is an options reset, it will get obeyed in the normal
655 course of events. */
656
darin@apple.com7ecf0c32007-11-04 06:18:31 +0000657 if (*frame->ecode == OP_KET || frame->eptr == frame->saved_eptr)
darind7737ab2005-09-09 00:51:07 +0000658 {
darin@apple.com7ecf0c32007-11-04 06:18:31 +0000659 frame->ecode += 1 + LINK_SIZE;
darin@apple.comee752e72007-11-11 18:56:13 +0000660 NEXT_OPCODE;
darind7737ab2005-09-09 00:51:07 +0000661 }
662
663 /* The repeating kets try the rest of the pattern or restart from the
664 preceding bracket, in the appropriate order. */
665
darin@apple.com7ecf0c32007-11-04 06:18:31 +0000666 if (*frame->ecode == OP_KETRMIN)
darind7737ab2005-09-09 00:51:07 +0000667 {
darin@apple.comaf5544c2007-11-04 08:28:22 +0000668 RMATCH(16, frame->ecode + 1+LINK_SIZE, frame->eptrb, 0);
darin@apple.comee752e72007-11-11 18:56:13 +0000669 if (is_match) RRETURN;
darin@apple.comaf5544c2007-11-04 08:28:22 +0000670 RMATCH(17, frame->prev, frame->eptrb, match_isgroup);
darin@apple.comee752e72007-11-11 18:56:13 +0000671 if (is_match) RRETURN;
darind7737ab2005-09-09 00:51:07 +0000672 }
673 else /* OP_KETRMAX */
674 {
darin@apple.comaf5544c2007-11-04 08:28:22 +0000675 RMATCH(18, frame->prev, frame->eptrb, match_isgroup);
darin@apple.comee752e72007-11-11 18:56:13 +0000676 if (is_match) RRETURN;
darin@apple.comaf5544c2007-11-04 08:28:22 +0000677 RMATCH(19, frame->ecode + 1+LINK_SIZE, frame->eptrb, 0);
darin@apple.comee752e72007-11-11 18:56:13 +0000678 if (is_match) RRETURN;
darind7737ab2005-09-09 00:51:07 +0000679 }
680 }
darin@apple.comee752e72007-11-11 18:56:13 +0000681 RRETURN;
darind7737ab2005-09-09 00:51:07 +0000682
683 /* Start of subject unless notbol, or after internal newline if multiline */
684
darin@apple.comee752e72007-11-11 18:56:13 +0000685 BEGIN_OPCODE(CIRC):
darin@apple.coma7c3b872007-11-04 05:22:44 +0000686 if (md->multiline)
darind7737ab2005-09-09 00:51:07 +0000687 {
darin@apple.com7ecf0c32007-11-04 06:18:31 +0000688 if (frame->eptr != md->start_subject && !IS_NEWLINE(frame->eptr[-1]))
darin@apple.comee752e72007-11-11 18:56:13 +0000689 RRETURN_NO_MATCH;
darin@apple.com7ecf0c32007-11-04 06:18:31 +0000690 frame->ecode++;
darin@apple.comee752e72007-11-11 18:56:13 +0000691 NEXT_OPCODE;
darind7737ab2005-09-09 00:51:07 +0000692 }
darin@apple.comee752e72007-11-11 18:56:13 +0000693 if (frame->eptr != md->start_subject) RRETURN_NO_MATCH;
darin@apple.com7ecf0c32007-11-04 06:18:31 +0000694 frame->ecode++;
darin@apple.comee752e72007-11-11 18:56:13 +0000695 NEXT_OPCODE;
darind7737ab2005-09-09 00:51:07 +0000696
darind7737ab2005-09-09 00:51:07 +0000697 /* Assert before internal newline if multiline, or before a terminating
698 newline unless endonly is set, else end of subject unless noteol is set. */
699
darin@apple.comee752e72007-11-11 18:56:13 +0000700 BEGIN_OPCODE(DOLL):
darin@apple.coma7c3b872007-11-04 05:22:44 +0000701 if (md->multiline)
darind7737ab2005-09-09 00:51:07 +0000702 {
darin@apple.com7ecf0c32007-11-04 06:18:31 +0000703 if (frame->eptr < md->end_subject)
darin@apple.comee752e72007-11-11 18:56:13 +0000704 { if (!IS_NEWLINE(*frame->eptr)) RRETURN_NO_MATCH; }
darin@apple.com7ecf0c32007-11-04 06:18:31 +0000705 frame->ecode++;
darind7737ab2005-09-09 00:51:07 +0000706 }
707 else
708 {
darin@apple.combb14d632007-11-04 07:54:56 +0000709 if (frame->eptr < md->end_subject - 1 ||
710 (frame->eptr == md->end_subject - 1 && !IS_NEWLINE(*frame->eptr)))
darin@apple.comee752e72007-11-11 18:56:13 +0000711 RRETURN_NO_MATCH;
darin@apple.combb14d632007-11-04 07:54:56 +0000712 frame->ecode++;
darind7737ab2005-09-09 00:51:07 +0000713 }
darin@apple.comee752e72007-11-11 18:56:13 +0000714 NEXT_OPCODE;
darind7737ab2005-09-09 00:51:07 +0000715
darind7737ab2005-09-09 00:51:07 +0000716 /* Word boundary assertions */
717
darin@apple.comee752e72007-11-11 18:56:13 +0000718 BEGIN_OPCODE(NOT_WORD_BOUNDARY):
719 BEGIN_OPCODE(WORD_BOUNDARY):
darind7737ab2005-09-09 00:51:07 +0000720 {
darind7737ab2005-09-09 00:51:07 +0000721 /* Find out if the previous and current characters are "word" characters.
darin@apple.coma7c3b872007-11-04 05:22:44 +0000722 It takes a bit more work in UTF-8 mode. Characters > 128 are assumed to
darind7737ab2005-09-09 00:51:07 +0000723 be "non-word" characters. */
724
darind7737ab2005-09-09 00:51:07 +0000725 {
darin@apple.com7ecf0c32007-11-04 06:18:31 +0000726 if (frame->eptr == md->start_subject) prev_is_word = FALSE; else
darind7737ab2005-09-09 00:51:07 +0000727 {
darin@apple.com7ecf0c32007-11-04 06:18:31 +0000728 const pcre_uchar *lastptr = frame->eptr - 1;
darind7737ab2005-09-09 00:51:07 +0000729 while(ISMIDCHAR(*lastptr)) lastptr--;
730 GETCHAR(c, lastptr);
darin@apple.coma7c3b872007-11-04 05:22:44 +0000731 prev_is_word = c < 128 && (md->ctypes[c] & ctype_word) != 0;
darind7737ab2005-09-09 00:51:07 +0000732 }
darin@apple.com7ecf0c32007-11-04 06:18:31 +0000733 if (frame->eptr >= md->end_subject) cur_is_word = FALSE; else
darind7737ab2005-09-09 00:51:07 +0000734 {
darin@apple.com7ecf0c32007-11-04 06:18:31 +0000735 GETCHAR(c, frame->eptr);
darin@apple.coma7c3b872007-11-04 05:22:44 +0000736 cur_is_word = c < 128 && (md->ctypes[c] & ctype_word) != 0;
darind7737ab2005-09-09 00:51:07 +0000737 }
738 }
darind7737ab2005-09-09 00:51:07 +0000739
740 /* Now see if the situation is what we want */
741
darin@apple.com7ecf0c32007-11-04 06:18:31 +0000742 if ((*frame->ecode++ == OP_WORD_BOUNDARY)?
darind7737ab2005-09-09 00:51:07 +0000743 cur_is_word == prev_is_word : cur_is_word != prev_is_word)
darin@apple.comee752e72007-11-11 18:56:13 +0000744 RRETURN_NO_MATCH;
darind7737ab2005-09-09 00:51:07 +0000745 }
darin@apple.comee752e72007-11-11 18:56:13 +0000746 NEXT_OPCODE;
darind7737ab2005-09-09 00:51:07 +0000747
748 /* Match a single character type; inline for speed */
749
darin@apple.comee752e72007-11-11 18:56:13 +0000750 BEGIN_OPCODE(ANY):
darin@apple.com7ecf0c32007-11-04 06:18:31 +0000751 if (frame->eptr < md->end_subject && IS_NEWLINE(*frame->eptr))
darin@apple.comee752e72007-11-11 18:56:13 +0000752 RRETURN_NO_MATCH;
753 if (frame->eptr++ >= md->end_subject) RRETURN_NO_MATCH;
darin@apple.com7ecf0c32007-11-04 06:18:31 +0000754 while (frame->eptr < md->end_subject && ISMIDCHAR(*frame->eptr)) frame->eptr++;
755 frame->ecode++;
darin@apple.comee752e72007-11-11 18:56:13 +0000756 NEXT_OPCODE;
darind7737ab2005-09-09 00:51:07 +0000757
darin@apple.comee752e72007-11-11 18:56:13 +0000758 BEGIN_OPCODE(NOT_DIGIT):
759 if (frame->eptr >= md->end_subject) RRETURN_NO_MATCH;
darin@apple.com7ecf0c32007-11-04 06:18:31 +0000760 GETCHARINCTEST(c, frame->eptr);
darin@apple.comee752e72007-11-11 18:56:13 +0000761 if (isASCIIDigit(c))
762 RRETURN_NO_MATCH;
darin@apple.com7ecf0c32007-11-04 06:18:31 +0000763 frame->ecode++;
darin@apple.comee752e72007-11-11 18:56:13 +0000764 NEXT_OPCODE;
darind7737ab2005-09-09 00:51:07 +0000765
darin@apple.comee752e72007-11-11 18:56:13 +0000766 BEGIN_OPCODE(DIGIT):
767 if (frame->eptr >= md->end_subject) RRETURN_NO_MATCH;
darin@apple.com7ecf0c32007-11-04 06:18:31 +0000768 GETCHARINCTEST(c, frame->eptr);
darin@apple.comee752e72007-11-11 18:56:13 +0000769 if (!isASCIIDigit(c))
770 RRETURN_NO_MATCH;
darin@apple.com7ecf0c32007-11-04 06:18:31 +0000771 frame->ecode++;
darin@apple.comee752e72007-11-11 18:56:13 +0000772 NEXT_OPCODE;
darind7737ab2005-09-09 00:51:07 +0000773
darin@apple.comee752e72007-11-11 18:56:13 +0000774 BEGIN_OPCODE(NOT_WHITESPACE):
775 if (frame->eptr >= md->end_subject) RRETURN_NO_MATCH;
darin@apple.com7ecf0c32007-11-04 06:18:31 +0000776 GETCHARINCTEST(c, frame->eptr);
darin@apple.comee752e72007-11-11 18:56:13 +0000777 if (c < 128 && (md->ctypes[c] & ctype_space) != 0)
778 RRETURN_NO_MATCH;
darin@apple.com7ecf0c32007-11-04 06:18:31 +0000779 frame->ecode++;
darin@apple.comee752e72007-11-11 18:56:13 +0000780 NEXT_OPCODE;
darind7737ab2005-09-09 00:51:07 +0000781
darin@apple.comee752e72007-11-11 18:56:13 +0000782 BEGIN_OPCODE(WHITESPACE):
783 if (frame->eptr >= md->end_subject) RRETURN_NO_MATCH;
darin@apple.com7ecf0c32007-11-04 06:18:31 +0000784 GETCHARINCTEST(c, frame->eptr);
darin@apple.comee752e72007-11-11 18:56:13 +0000785 if (c >= 128 || (md->ctypes[c] & ctype_space) == 0)
786 RRETURN_NO_MATCH;
darin@apple.com7ecf0c32007-11-04 06:18:31 +0000787 frame->ecode++;
darin@apple.comee752e72007-11-11 18:56:13 +0000788 NEXT_OPCODE;
darind7737ab2005-09-09 00:51:07 +0000789
darin@apple.comee752e72007-11-11 18:56:13 +0000790 BEGIN_OPCODE(NOT_WORDCHAR):
791 if (frame->eptr >= md->end_subject) RRETURN_NO_MATCH;
darin@apple.com7ecf0c32007-11-04 06:18:31 +0000792 GETCHARINCTEST(c, frame->eptr);
darin@apple.comee752e72007-11-11 18:56:13 +0000793 if (c < 128 && (md->ctypes[c] & ctype_word) != 0)
794 RRETURN_NO_MATCH;
darin@apple.com7ecf0c32007-11-04 06:18:31 +0000795 frame->ecode++;
darin@apple.comee752e72007-11-11 18:56:13 +0000796 NEXT_OPCODE;
darind7737ab2005-09-09 00:51:07 +0000797
darin@apple.comee752e72007-11-11 18:56:13 +0000798 BEGIN_OPCODE(WORDCHAR):
799 if (frame->eptr >= md->end_subject) RRETURN_NO_MATCH;
darin@apple.com7ecf0c32007-11-04 06:18:31 +0000800 GETCHARINCTEST(c, frame->eptr);
darin@apple.comee752e72007-11-11 18:56:13 +0000801 if (c >= 128 || (md->ctypes[c] & ctype_word) == 0)
802 RRETURN_NO_MATCH;
darin@apple.com7ecf0c32007-11-04 06:18:31 +0000803 frame->ecode++;
darin@apple.comee752e72007-11-11 18:56:13 +0000804 NEXT_OPCODE;
darind7737ab2005-09-09 00:51:07 +0000805
darind7737ab2005-09-09 00:51:07 +0000806 /* Match a back reference, possibly repeatedly. Look past the end of the
807 item to see if there is repeat information following. The code is similar
808 to that for character classes, but repeated for efficiency. Then obey
809 similar code to character type repeats - written out again for speed.
810 However, if the referenced string is the empty string, always treat
811 it as matched, any number of times (otherwise there could be infinite
812 loops). */
813
darin@apple.comee752e72007-11-11 18:56:13 +0000814 BEGIN_OPCODE(REF):
darind7737ab2005-09-09 00:51:07 +0000815 {
darin@apple.com7ecf0c32007-11-04 06:18:31 +0000816 frame->offset = GET2(frame->ecode, 1) << 1; /* Doubled ref number */
817 frame->ecode += 3; /* Advance past item */
darind7737ab2005-09-09 00:51:07 +0000818
819 /* If the reference is unset, set the length to be longer than the amount
820 of subject left; this ensures that every attempt at a match fails. We
821 can't just fail here, because of the possibility of quantifiers with zero
822 minima. */
823
darin@apple.com7ecf0c32007-11-04 06:18:31 +0000824 frame->length = (frame->offset >= frame->offset_top || md->offset_vector[frame->offset] < 0)?
darin@apple.coma7c3b872007-11-04 05:22:44 +0000825 0 :
darin@apple.com7ecf0c32007-11-04 06:18:31 +0000826 md->offset_vector[frame->offset+1] - md->offset_vector[frame->offset];
darind7737ab2005-09-09 00:51:07 +0000827
828 /* Set up for repetition, or handle the non-repeated case */
829
darin@apple.com7ecf0c32007-11-04 06:18:31 +0000830 switch (*frame->ecode)
darind7737ab2005-09-09 00:51:07 +0000831 {
832 case OP_CRSTAR:
833 case OP_CRMINSTAR:
834 case OP_CRPLUS:
835 case OP_CRMINPLUS:
836 case OP_CRQUERY:
837 case OP_CRMINQUERY:
darin@apple.com7ecf0c32007-11-04 06:18:31 +0000838 c = *frame->ecode++ - OP_CRSTAR;
darin@apple.comaf5544c2007-11-04 08:28:22 +0000839 minimize = (c & 1) != 0;
840 min = rep_min[c]; /* Pick up values from tables; */
darin@apple.com7ecf0c32007-11-04 06:18:31 +0000841 frame->max = rep_max[c]; /* zero for max => infinity */
842 if (frame->max == 0) frame->max = INT_MAX;
darind7737ab2005-09-09 00:51:07 +0000843 break;
844
845 case OP_CRRANGE:
846 case OP_CRMINRANGE:
darin@apple.comaf5544c2007-11-04 08:28:22 +0000847 minimize = (*frame->ecode == OP_CRMINRANGE);
848 min = GET2(frame->ecode, 1);
darin@apple.com7ecf0c32007-11-04 06:18:31 +0000849 frame->max = GET2(frame->ecode, 3);
850 if (frame->max == 0) frame->max = INT_MAX;
851 frame->ecode += 5;
darind7737ab2005-09-09 00:51:07 +0000852 break;
853
854 default: /* No repeat follows */
darin@apple.comee752e72007-11-11 18:56:13 +0000855 if (!match_ref(frame->offset, frame->eptr, frame->length, md)) RRETURN_NO_MATCH;
darin@apple.com7ecf0c32007-11-04 06:18:31 +0000856 frame->eptr += frame->length;
darin@apple.comee752e72007-11-11 18:56:13 +0000857 NEXT_OPCODE;
darind7737ab2005-09-09 00:51:07 +0000858 }
859
860 /* If the length of the reference is zero, just continue with the
861 main loop. */
862
darin@apple.comee752e72007-11-11 18:56:13 +0000863 if (frame->length == 0)
864 NEXT_OPCODE;
darind7737ab2005-09-09 00:51:07 +0000865
darin@apple.com7ecf0c32007-11-04 06:18:31 +0000866 /* First, ensure the minimum number of matches are present. */
darind7737ab2005-09-09 00:51:07 +0000867
darin@apple.comaf5544c2007-11-04 08:28:22 +0000868 for (i = 1; i <= min; i++)
darind7737ab2005-09-09 00:51:07 +0000869 {
darin@apple.comee752e72007-11-11 18:56:13 +0000870 if (!match_ref(frame->offset, frame->eptr, frame->length, md)) RRETURN_NO_MATCH;
darin@apple.com7ecf0c32007-11-04 06:18:31 +0000871 frame->eptr += frame->length;
darind7737ab2005-09-09 00:51:07 +0000872 }
873
874 /* If min = max, continue at the same level without recursion.
875 They are not both allowed to be zero. */
876
darin@apple.comee752e72007-11-11 18:56:13 +0000877 if (min == frame->max)
878 NEXT_OPCODE;
darind7737ab2005-09-09 00:51:07 +0000879
880 /* If minimizing, keep trying and advancing the pointer */
881
darin@apple.comaf5544c2007-11-04 08:28:22 +0000882 if (minimize)
darind7737ab2005-09-09 00:51:07 +0000883 {
darin@apple.comaf5544c2007-11-04 08:28:22 +0000884 for (frame->fi = min;; frame->fi++)
darind7737ab2005-09-09 00:51:07 +0000885 {
darin@apple.comaf5544c2007-11-04 08:28:22 +0000886 RMATCH(20, frame->ecode, frame->eptrb, 0);
darin@apple.comee752e72007-11-11 18:56:13 +0000887 if (is_match) RRETURN;
darin@apple.com7ecf0c32007-11-04 06:18:31 +0000888 if (frame->fi >= frame->max || !match_ref(frame->offset, frame->eptr, frame->length, md))
darin@apple.comee752e72007-11-11 18:56:13 +0000889 RRETURN;
darin@apple.com7ecf0c32007-11-04 06:18:31 +0000890 frame->eptr += frame->length;
darind7737ab2005-09-09 00:51:07 +0000891 }
892 /* Control never gets here */
893 }
894
895 /* If maximizing, find the longest string and work backwards */
896
897 else
898 {
darin@apple.com7ecf0c32007-11-04 06:18:31 +0000899 frame->pp = frame->eptr;
darin@apple.comaf5544c2007-11-04 08:28:22 +0000900 for (i = min; i < frame->max; i++)
darind7737ab2005-09-09 00:51:07 +0000901 {
darin@apple.com7ecf0c32007-11-04 06:18:31 +0000902 if (!match_ref(frame->offset, frame->eptr, frame->length, md)) break;
903 frame->eptr += frame->length;
darind7737ab2005-09-09 00:51:07 +0000904 }
darin@apple.com7ecf0c32007-11-04 06:18:31 +0000905 while (frame->eptr >= frame->pp)
darind7737ab2005-09-09 00:51:07 +0000906 {
darin@apple.comaf5544c2007-11-04 08:28:22 +0000907 RMATCH(21, frame->ecode, frame->eptrb, 0);
darin@apple.comee752e72007-11-11 18:56:13 +0000908 if (is_match) RRETURN;
darin@apple.com7ecf0c32007-11-04 06:18:31 +0000909 frame->eptr -= frame->length;
darind7737ab2005-09-09 00:51:07 +0000910 }
darin@apple.comee752e72007-11-11 18:56:13 +0000911 RRETURN_NO_MATCH;
darind7737ab2005-09-09 00:51:07 +0000912 }
913 }
914 /* Control never gets here */
915
darind7737ab2005-09-09 00:51:07 +0000916 /* Match a bit-mapped character class, possibly repeatedly. This op code is
917 used when all the characters in the class have values in the range 0-255,
918 and either the matching is caseful, or the characters are in the range
919 0-127 when UTF-8 processing is enabled. The only difference between
920 OP_CLASS and OP_NCLASS occurs when a data character outside the range is
921 encountered.
922
923 First, look past the end of the item to see if there is repeat information
924 following. Then obey similar code to character type repeats - written out
925 again for speed. */
926
darin@apple.comee752e72007-11-11 18:56:13 +0000927 BEGIN_OPCODE(NCLASS):
928 BEGIN_OPCODE(CLASS):
darind7737ab2005-09-09 00:51:07 +0000929 {
darin@apple.com7ecf0c32007-11-04 06:18:31 +0000930 frame->data = frame->ecode + 1; /* Save for matching */
931 frame->ecode += 33; /* Advance past the item */
darind7737ab2005-09-09 00:51:07 +0000932
darin@apple.com7ecf0c32007-11-04 06:18:31 +0000933 switch (*frame->ecode)
darind7737ab2005-09-09 00:51:07 +0000934 {
935 case OP_CRSTAR:
936 case OP_CRMINSTAR:
937 case OP_CRPLUS:
938 case OP_CRMINPLUS:
939 case OP_CRQUERY:
940 case OP_CRMINQUERY:
darin@apple.com7ecf0c32007-11-04 06:18:31 +0000941 c = *frame->ecode++ - OP_CRSTAR;
darin@apple.comaf5544c2007-11-04 08:28:22 +0000942 minimize = (c & 1) != 0;
943 min = rep_min[c]; /* Pick up values from tables; */
darin@apple.com7ecf0c32007-11-04 06:18:31 +0000944 frame->max = rep_max[c]; /* zero for max => infinity */
945 if (frame->max == 0) frame->max = INT_MAX;
darind7737ab2005-09-09 00:51:07 +0000946 break;
947
948 case OP_CRRANGE:
949 case OP_CRMINRANGE:
darin@apple.comaf5544c2007-11-04 08:28:22 +0000950 minimize = (*frame->ecode == OP_CRMINRANGE);
951 min = GET2(frame->ecode, 1);
darin@apple.com7ecf0c32007-11-04 06:18:31 +0000952 frame->max = GET2(frame->ecode, 3);
953 if (frame->max == 0) frame->max = INT_MAX;
954 frame->ecode += 5;
darind7737ab2005-09-09 00:51:07 +0000955 break;
956
957 default: /* No repeat follows */
darin@apple.comaf5544c2007-11-04 08:28:22 +0000958 min = frame->max = 1;
darind7737ab2005-09-09 00:51:07 +0000959 break;
960 }
961
962 /* First, ensure the minimum number of matches are present. */
963
darind7737ab2005-09-09 00:51:07 +0000964 {
darin@apple.comaf5544c2007-11-04 08:28:22 +0000965 for (i = 1; i <= min; i++)
darind7737ab2005-09-09 00:51:07 +0000966 {
darin@apple.comee752e72007-11-11 18:56:13 +0000967 if (frame->eptr >= md->end_subject) RRETURN_NO_MATCH;
darin@apple.com7ecf0c32007-11-04 06:18:31 +0000968 GETCHARINC(c, frame->eptr);
darind7737ab2005-09-09 00:51:07 +0000969 if (c > 255)
970 {
darin@apple.comee752e72007-11-11 18:56:13 +0000971 if (frame->data[-1] == OP_CLASS) RRETURN_NO_MATCH;
darind7737ab2005-09-09 00:51:07 +0000972 }
973 else
974 {
darin@apple.comee752e72007-11-11 18:56:13 +0000975 if ((frame->data[c/8] & (1 << (c&7))) == 0) RRETURN_NO_MATCH;
darind7737ab2005-09-09 00:51:07 +0000976 }
977 }
978 }
darind7737ab2005-09-09 00:51:07 +0000979
980 /* If max == min we can continue with the main loop without the
981 need to recurse. */
982
darin@apple.comee752e72007-11-11 18:56:13 +0000983 if (min == frame->max)
984 NEXT_OPCODE;
darind7737ab2005-09-09 00:51:07 +0000985
986 /* If minimizing, keep testing the rest of the expression and advancing
987 the pointer while it matches the class. */
darin@apple.comaf5544c2007-11-04 08:28:22 +0000988 if (minimize)
darind7737ab2005-09-09 00:51:07 +0000989 {
darind7737ab2005-09-09 00:51:07 +0000990 {
darin@apple.comaf5544c2007-11-04 08:28:22 +0000991 for (frame->fi = min;; frame->fi++)
darind7737ab2005-09-09 00:51:07 +0000992 {
darin@apple.comaf5544c2007-11-04 08:28:22 +0000993 RMATCH(22, frame->ecode, frame->eptrb, 0);
darin@apple.comee752e72007-11-11 18:56:13 +0000994 if (is_match) RRETURN;
995 if (frame->fi >= frame->max || frame->eptr >= md->end_subject) RRETURN;
darin@apple.com7ecf0c32007-11-04 06:18:31 +0000996 GETCHARINC(c, frame->eptr);
darind7737ab2005-09-09 00:51:07 +0000997 if (c > 255)
998 {
darin@apple.comee752e72007-11-11 18:56:13 +0000999 if (frame->data[-1] == OP_CLASS) RRETURN;
darind7737ab2005-09-09 00:51:07 +00001000 }
1001 else
1002 {
darin@apple.comee752e72007-11-11 18:56:13 +00001003 if ((frame->data[c/8] & (1 << (c&7))) == 0) RRETURN;
darind7737ab2005-09-09 00:51:07 +00001004 }
1005 }
1006 }
darind7737ab2005-09-09 00:51:07 +00001007 /* Control never gets here */
1008 }
darind7737ab2005-09-09 00:51:07 +00001009 /* If maximizing, find the longest possible run, then work backwards. */
darind7737ab2005-09-09 00:51:07 +00001010 else
1011 {
darin@apple.com7ecf0c32007-11-04 06:18:31 +00001012 frame->pp = frame->eptr;
darind7737ab2005-09-09 00:51:07 +00001013
darin@apple.comaf5544c2007-11-04 08:28:22 +00001014 for (i = min; i < frame->max; i++)
darind7737ab2005-09-09 00:51:07 +00001015 {
1016 int len = 1;
darin@apple.com7ecf0c32007-11-04 06:18:31 +00001017 if (frame->eptr >= md->end_subject) break;
1018 GETCHARLEN(c, frame->eptr, len);
darind7737ab2005-09-09 00:51:07 +00001019 if (c > 255)
1020 {
darin@apple.comaf5544c2007-11-04 08:28:22 +00001021 if (frame->data[-1] == OP_CLASS) break;
darind7737ab2005-09-09 00:51:07 +00001022 }
1023 else
1024 {
darin@apple.com7ecf0c32007-11-04 06:18:31 +00001025 if ((frame->data[c/8] & (1 << (c&7))) == 0) break;
darind7737ab2005-09-09 00:51:07 +00001026 }
darin@apple.com7ecf0c32007-11-04 06:18:31 +00001027 frame->eptr += len;
darind7737ab2005-09-09 00:51:07 +00001028 }
1029 for (;;)
1030 {
darin@apple.comaf5544c2007-11-04 08:28:22 +00001031 RMATCH(24, frame->ecode, frame->eptrb, 0);
darin@apple.comee752e72007-11-11 18:56:13 +00001032 if (is_match) RRETURN;
darin@apple.com7ecf0c32007-11-04 06:18:31 +00001033 if (frame->eptr-- == frame->pp) break; /* Stop if tried at original pos */
1034 BACKCHAR(frame->eptr);
darind7737ab2005-09-09 00:51:07 +00001035 }
darin@apple.comee752e72007-11-11 18:56:13 +00001036
1037 RRETURN;
darind7737ab2005-09-09 00:51:07 +00001038 }
1039 }
1040 /* Control never gets here */
1041
darind7737ab2005-09-09 00:51:07 +00001042 /* Match an extended character class. This opcode is encountered only
1043 in UTF-8 mode, because that's the only time it is compiled. */
1044
darin@apple.comee752e72007-11-11 18:56:13 +00001045 BEGIN_OPCODE(XCLASS):
darind7737ab2005-09-09 00:51:07 +00001046 {
darin@apple.com7ecf0c32007-11-04 06:18:31 +00001047 frame->data = frame->ecode + 1 + LINK_SIZE; /* Save for matching */
1048 frame->ecode += GET(frame->ecode, 1); /* Advance past the item */
darind7737ab2005-09-09 00:51:07 +00001049
darin@apple.com7ecf0c32007-11-04 06:18:31 +00001050 switch (*frame->ecode)
darind7737ab2005-09-09 00:51:07 +00001051 {
1052 case OP_CRSTAR:
1053 case OP_CRMINSTAR:
1054 case OP_CRPLUS:
1055 case OP_CRMINPLUS:
1056 case OP_CRQUERY:
1057 case OP_CRMINQUERY:
darin@apple.com7ecf0c32007-11-04 06:18:31 +00001058 c = *frame->ecode++ - OP_CRSTAR;
darin@apple.comaf5544c2007-11-04 08:28:22 +00001059 minimize = (c & 1) != 0;
1060 min = rep_min[c]; /* Pick up values from tables; */
darin@apple.com7ecf0c32007-11-04 06:18:31 +00001061 frame->max = rep_max[c]; /* zero for max => infinity */
1062 if (frame->max == 0) frame->max = INT_MAX;
darind7737ab2005-09-09 00:51:07 +00001063 break;
1064
1065 case OP_CRRANGE:
1066 case OP_CRMINRANGE:
darin@apple.comaf5544c2007-11-04 08:28:22 +00001067 minimize = (*frame->ecode == OP_CRMINRANGE);
1068 min = GET2(frame->ecode, 1);
darin@apple.com7ecf0c32007-11-04 06:18:31 +00001069 frame->max = GET2(frame->ecode, 3);
1070 if (frame->max == 0) frame->max = INT_MAX;
1071 frame->ecode += 5;
darind7737ab2005-09-09 00:51:07 +00001072 break;
1073
1074 default: /* No repeat follows */
darin@apple.comaf5544c2007-11-04 08:28:22 +00001075 min = frame->max = 1;
darind7737ab2005-09-09 00:51:07 +00001076 }
1077
1078 /* First, ensure the minimum number of matches are present. */
1079
darin@apple.comaf5544c2007-11-04 08:28:22 +00001080 for (i = 1; i <= min; i++)
darind7737ab2005-09-09 00:51:07 +00001081 {
darin@apple.comee752e72007-11-11 18:56:13 +00001082 if (frame->eptr >= md->end_subject) RRETURN_NO_MATCH;
darin@apple.com7ecf0c32007-11-04 06:18:31 +00001083 GETCHARINC(c, frame->eptr);
darin@apple.comee752e72007-11-11 18:56:13 +00001084 if (!_pcre_xclass(c, frame->data)) RRETURN_NO_MATCH;
darind7737ab2005-09-09 00:51:07 +00001085 }
1086
1087 /* If max == min we can continue with the main loop without the
1088 need to recurse. */
1089
darin@apple.comee752e72007-11-11 18:56:13 +00001090 if (min == frame->max)
1091 NEXT_OPCODE;
darind7737ab2005-09-09 00:51:07 +00001092
1093 /* If minimizing, keep testing the rest of the expression and advancing
1094 the pointer while it matches the class. */
1095
darin@apple.comaf5544c2007-11-04 08:28:22 +00001096 if (minimize)
darind7737ab2005-09-09 00:51:07 +00001097 {
darin@apple.comaf5544c2007-11-04 08:28:22 +00001098 for (frame->fi = min;; frame->fi++)
darind7737ab2005-09-09 00:51:07 +00001099 {
darin@apple.comaf5544c2007-11-04 08:28:22 +00001100 RMATCH(26, frame->ecode, frame->eptrb, 0);
darin@apple.comee752e72007-11-11 18:56:13 +00001101 if (is_match) RRETURN;
1102 if (frame->fi >= frame->max || frame->eptr >= md->end_subject) RRETURN;
darin@apple.com7ecf0c32007-11-04 06:18:31 +00001103 GETCHARINC(c, frame->eptr);
darin@apple.comee752e72007-11-11 18:56:13 +00001104 if (!_pcre_xclass(c, frame->data)) RRETURN;
darind7737ab2005-09-09 00:51:07 +00001105 }
1106 /* Control never gets here */
1107 }
1108
1109 /* If maximizing, find the longest possible run, then work backwards. */
1110
1111 else
1112 {
darin@apple.com7ecf0c32007-11-04 06:18:31 +00001113 frame->pp = frame->eptr;
darin@apple.comaf5544c2007-11-04 08:28:22 +00001114 for (i = min; i < frame->max; i++)
darind7737ab2005-09-09 00:51:07 +00001115 {
1116 int len = 1;
darin@apple.com7ecf0c32007-11-04 06:18:31 +00001117 if (frame->eptr >= md->end_subject) break;
1118 GETCHARLEN(c, frame->eptr, len);
1119 if (!_pcre_xclass(c, frame->data)) break;
1120 frame->eptr += len;
darind7737ab2005-09-09 00:51:07 +00001121 }
1122 for(;;)
1123 {
darin@apple.comaf5544c2007-11-04 08:28:22 +00001124 RMATCH(27, frame->ecode, frame->eptrb, 0);
darin@apple.comee752e72007-11-11 18:56:13 +00001125 if (is_match) RRETURN;
darin@apple.com7ecf0c32007-11-04 06:18:31 +00001126 if (frame->eptr-- == frame->pp) break; /* Stop if tried at original pos */
1127 BACKCHAR(frame->eptr)
darind7737ab2005-09-09 00:51:07 +00001128 }
darin@apple.comee752e72007-11-11 18:56:13 +00001129 RRETURN;
darind7737ab2005-09-09 00:51:07 +00001130 }
1131
1132 /* Control never gets here */
1133 }
darind7737ab2005-09-09 00:51:07 +00001134
1135 /* Match a single character, casefully */
1136
darin@apple.comee752e72007-11-11 18:56:13 +00001137 BEGIN_OPCODE(CHAR):
darind7737ab2005-09-09 00:51:07 +00001138 {
darin@apple.com7ecf0c32007-11-04 06:18:31 +00001139 frame->length = 1;
1140 frame->ecode++;
1141 GETUTF8CHARLEN(frame->fc, frame->ecode, frame->length);
eseidel67d65af2005-09-29 22:05:12 +00001142 {
darina8702f52006-01-13 09:32:51 +00001143 int dc;
darin@apple.com7ecf0c32007-11-04 06:18:31 +00001144 frame->ecode += frame->length;
1145 switch (md->end_subject - frame->eptr)
hyatt6c974dd2006-01-06 22:43:44 +00001146 {
1147 case 0:
darin@apple.comee752e72007-11-11 18:56:13 +00001148 RRETURN_NO_MATCH;
hyatt6c974dd2006-01-06 22:43:44 +00001149 case 1:
darin@apple.com7ecf0c32007-11-04 06:18:31 +00001150 dc = *frame->eptr++;
hyatt6c974dd2006-01-06 22:43:44 +00001151 if (IS_LEADING_SURROGATE(dc))
darin@apple.comee752e72007-11-11 18:56:13 +00001152 RRETURN_NO_MATCH;
hyatt6c974dd2006-01-06 22:43:44 +00001153 break;
1154 default:
darin@apple.com7ecf0c32007-11-04 06:18:31 +00001155 GETCHARINC(dc, frame->eptr);
hyatt6c974dd2006-01-06 22:43:44 +00001156 }
darin@apple.comee752e72007-11-11 18:56:13 +00001157 if (frame->fc != dc) RRETURN_NO_MATCH;
darind7737ab2005-09-09 00:51:07 +00001158 }
darin@apple.comee752e72007-11-11 18:56:13 +00001159 }
1160 NEXT_OPCODE;
darind7737ab2005-09-09 00:51:07 +00001161
1162 /* Match a single character, caselessly */
1163
darin@apple.comee752e72007-11-11 18:56:13 +00001164 BEGIN_OPCODE(CHARNC):
darind7737ab2005-09-09 00:51:07 +00001165 {
darin@apple.com7ecf0c32007-11-04 06:18:31 +00001166 frame->length = 1;
1167 frame->ecode++;
1168 GETUTF8CHARLEN(frame->fc, frame->ecode, frame->length);
darind7737ab2005-09-09 00:51:07 +00001169
darin@apple.comee752e72007-11-11 18:56:13 +00001170 if (md->end_subject - frame->eptr == 0) RRETURN_NO_MATCH;
darind7737ab2005-09-09 00:51:07 +00001171
darind7737ab2005-09-09 00:51:07 +00001172 {
1173 int dc;
darin@apple.com7ecf0c32007-11-04 06:18:31 +00001174 if (md->end_subject - frame->eptr == 1) {
1175 dc = *frame->eptr++;
eseidel67d65af2005-09-29 22:05:12 +00001176 if (IS_LEADING_SURROGATE(dc))
darin@apple.comee752e72007-11-11 18:56:13 +00001177 RRETURN_NO_MATCH;
eseidel67d65af2005-09-29 22:05:12 +00001178 } else
darin@apple.com7ecf0c32007-11-04 06:18:31 +00001179 GETCHARINC(dc, frame->eptr);
1180 frame->ecode += frame->length;
darind7737ab2005-09-09 00:51:07 +00001181
1182 /* If we have Unicode property support, we can use it to test the other
darinae790da2007-10-17 05:38:39 +00001183 case of the character, if there is one. */
darind7737ab2005-09-09 00:51:07 +00001184
darin@apple.com7ecf0c32007-11-04 06:18:31 +00001185 if (frame->fc != dc)
darind7737ab2005-09-09 00:51:07 +00001186 {
darin@apple.com7ecf0c32007-11-04 06:18:31 +00001187 if (dc != _pcre_ucp_othercase(frame->fc))
darin@apple.comee752e72007-11-11 18:56:13 +00001188 RRETURN_NO_MATCH;
darind7737ab2005-09-09 00:51:07 +00001189 }
1190 }
1191 }
darin@apple.comee752e72007-11-11 18:56:13 +00001192 NEXT_OPCODE;
darind7737ab2005-09-09 00:51:07 +00001193
darin@apple.comaf5544c2007-11-04 08:28:22 +00001194 /* Match a single ASCII character. */
1195
darin@apple.comee752e72007-11-11 18:56:13 +00001196 BEGIN_OPCODE(ASCII_CHAR):
darin@apple.comaf5544c2007-11-04 08:28:22 +00001197 if (md->end_subject == frame->eptr)
darin@apple.comee752e72007-11-11 18:56:13 +00001198 RRETURN_NO_MATCH;
darin@apple.comaf5544c2007-11-04 08:28:22 +00001199 if (*frame->eptr != frame->ecode[1])
darin@apple.comee752e72007-11-11 18:56:13 +00001200 RRETURN_NO_MATCH;
darin@apple.comaf5544c2007-11-04 08:28:22 +00001201 ++frame->eptr;
1202 frame->ecode += 2;
darin@apple.comee752e72007-11-11 18:56:13 +00001203 NEXT_OPCODE;
darin@apple.comaf5544c2007-11-04 08:28:22 +00001204
1205 /* Match one of two cases of an ASCII character. */
1206
darin@apple.comee752e72007-11-11 18:56:13 +00001207 BEGIN_OPCODE(ASCII_LETTER_NC):
darin@apple.comaf5544c2007-11-04 08:28:22 +00001208 if (md->end_subject == frame->eptr)
darin@apple.comee752e72007-11-11 18:56:13 +00001209 RRETURN_NO_MATCH;
darin@apple.comaf5544c2007-11-04 08:28:22 +00001210 if ((*frame->eptr | 0x20) != frame->ecode[1])
darin@apple.comee752e72007-11-11 18:56:13 +00001211 RRETURN_NO_MATCH;
darin@apple.comaf5544c2007-11-04 08:28:22 +00001212 ++frame->eptr;
1213 frame->ecode += 2;
darin@apple.comee752e72007-11-11 18:56:13 +00001214 NEXT_OPCODE;
darin@apple.comaf5544c2007-11-04 08:28:22 +00001215
darind7737ab2005-09-09 00:51:07 +00001216 /* Match a single character repeatedly; different opcodes share code. */
1217
darin@apple.comee752e72007-11-11 18:56:13 +00001218 BEGIN_OPCODE(EXACT):
darin@apple.comaf5544c2007-11-04 08:28:22 +00001219 min = frame->max = GET2(frame->ecode, 1);
1220 minimize = FALSE;
darin@apple.com7ecf0c32007-11-04 06:18:31 +00001221 frame->ecode += 3;
darind7737ab2005-09-09 00:51:07 +00001222 goto REPEATCHAR;
1223
darin@apple.comee752e72007-11-11 18:56:13 +00001224 BEGIN_OPCODE(UPTO):
1225 BEGIN_OPCODE(MINUPTO):
darin@apple.comaf5544c2007-11-04 08:28:22 +00001226 min = 0;
darin@apple.com7ecf0c32007-11-04 06:18:31 +00001227 frame->max = GET2(frame->ecode, 1);
darin@apple.comaf5544c2007-11-04 08:28:22 +00001228 minimize = *frame->ecode == OP_MINUPTO;
darin@apple.com7ecf0c32007-11-04 06:18:31 +00001229 frame->ecode += 3;
darind7737ab2005-09-09 00:51:07 +00001230 goto REPEATCHAR;
1231
darin@apple.comee752e72007-11-11 18:56:13 +00001232 BEGIN_OPCODE(STAR):
1233 BEGIN_OPCODE(MINSTAR):
1234 BEGIN_OPCODE(PLUS):
1235 BEGIN_OPCODE(MINPLUS):
1236 BEGIN_OPCODE(QUERY):
1237 BEGIN_OPCODE(MINQUERY):
darin@apple.com7ecf0c32007-11-04 06:18:31 +00001238 c = *frame->ecode++ - OP_STAR;
darin@apple.comaf5544c2007-11-04 08:28:22 +00001239 minimize = (c & 1) != 0;
1240 min = rep_min[c]; /* Pick up values from tables; */
darin@apple.com7ecf0c32007-11-04 06:18:31 +00001241 frame->max = rep_max[c]; /* zero for max => infinity */
1242 if (frame->max == 0) frame->max = INT_MAX;
darind7737ab2005-09-09 00:51:07 +00001243
1244 /* Common code for all repeated single-character matches. We can give
1245 up quickly if there are fewer than the minimum number of characters left in
1246 the subject. */
1247
1248 REPEATCHAR:
hyatt6c974dd2006-01-06 22:43:44 +00001249
darin@apple.com7ecf0c32007-11-04 06:18:31 +00001250 frame->length = 1;
1251 GETUTF8CHARLEN(frame->fc, frame->ecode, frame->length);
darina8702f52006-01-13 09:32:51 +00001252 {
darin@apple.comee752e72007-11-11 18:56:13 +00001253 if (min * (frame->fc > 0xFFFF ? 2 : 1) > md->end_subject - frame->eptr) RRETURN_NO_MATCH;
darin@apple.com7ecf0c32007-11-04 06:18:31 +00001254 frame->ecode += frame->length;
darind7737ab2005-09-09 00:51:07 +00001255
darin@apple.com7ecf0c32007-11-04 06:18:31 +00001256 if (frame->fc <= 0xFFFF)
darind7737ab2005-09-09 00:51:07 +00001257 {
darin@apple.com7ecf0c32007-11-04 06:18:31 +00001258 int othercase = md->caseless ? _pcre_ucp_othercase(frame->fc) : -1;
darind7737ab2005-09-09 00:51:07 +00001259
darin@apple.comaf5544c2007-11-04 08:28:22 +00001260 for (i = 1; i <= min; i++)
darind7737ab2005-09-09 00:51:07 +00001261 {
darin@apple.comee752e72007-11-11 18:56:13 +00001262 if (*frame->eptr != frame->fc && *frame->eptr != othercase) RRETURN_NO_MATCH;
darin@apple.com7ecf0c32007-11-04 06:18:31 +00001263 ++frame->eptr;
darind7737ab2005-09-09 00:51:07 +00001264 }
1265
darin@apple.comee752e72007-11-11 18:56:13 +00001266 if (min == frame->max)
1267 NEXT_OPCODE;
darind7737ab2005-09-09 00:51:07 +00001268
darin@apple.comaf5544c2007-11-04 08:28:22 +00001269 if (minimize)
darind7737ab2005-09-09 00:51:07 +00001270 {
darin@apple.com7ecf0c32007-11-04 06:18:31 +00001271 frame->repeat_othercase = othercase;
darin@apple.comaf5544c2007-11-04 08:28:22 +00001272 for (frame->fi = min;; frame->fi++)
darind7737ab2005-09-09 00:51:07 +00001273 {
darin@apple.comaf5544c2007-11-04 08:28:22 +00001274 RMATCH(28, frame->ecode, frame->eptrb, 0);
darin@apple.comee752e72007-11-11 18:56:13 +00001275 if (is_match) RRETURN;
1276 if (frame->fi >= frame->max || frame->eptr >= md->end_subject) RRETURN;
1277 if (*frame->eptr != frame->fc && *frame->eptr != frame->repeat_othercase) RRETURN;
darin@apple.com7ecf0c32007-11-04 06:18:31 +00001278 ++frame->eptr;
darind7737ab2005-09-09 00:51:07 +00001279 }
1280 /* Control never gets here */
1281 }
1282 else
1283 {
darin@apple.com7ecf0c32007-11-04 06:18:31 +00001284 frame->pp = frame->eptr;
darin@apple.comaf5544c2007-11-04 08:28:22 +00001285 for (i = min; i < frame->max; i++)
darind7737ab2005-09-09 00:51:07 +00001286 {
darin@apple.com7ecf0c32007-11-04 06:18:31 +00001287 if (frame->eptr >= md->end_subject) break;
1288 if (*frame->eptr != frame->fc && *frame->eptr != othercase) break;
1289 ++frame->eptr;
darind7737ab2005-09-09 00:51:07 +00001290 }
darin@apple.com7ecf0c32007-11-04 06:18:31 +00001291 while (frame->eptr >= frame->pp)
darind7737ab2005-09-09 00:51:07 +00001292 {
darin@apple.comaf5544c2007-11-04 08:28:22 +00001293 RMATCH(29, frame->ecode, frame->eptrb, 0);
darin@apple.comee752e72007-11-11 18:56:13 +00001294 if (is_match) RRETURN;
darin@apple.com7ecf0c32007-11-04 06:18:31 +00001295 --frame->eptr;
darind7737ab2005-09-09 00:51:07 +00001296 }
darin@apple.comee752e72007-11-11 18:56:13 +00001297 RRETURN_NO_MATCH;
darind7737ab2005-09-09 00:51:07 +00001298 }
1299 /* Control never gets here */
1300 }
1301 else
1302 {
1303 /* No case on surrogate pairs, so no need to bother with "othercase". */
1304
darin@apple.comaf5544c2007-11-04 08:28:22 +00001305 for (i = 1; i <= min; i++)
darind7737ab2005-09-09 00:51:07 +00001306 {
1307 int nc;
darin@apple.com7ecf0c32007-11-04 06:18:31 +00001308 GETCHAR(nc, frame->eptr);
darin@apple.comee752e72007-11-11 18:56:13 +00001309 if (nc != frame->fc) RRETURN_NO_MATCH;
darin@apple.com7ecf0c32007-11-04 06:18:31 +00001310 frame->eptr += 2;
darind7737ab2005-09-09 00:51:07 +00001311 }
1312
darin@apple.comee752e72007-11-11 18:56:13 +00001313 if (min == frame->max)
1314 NEXT_OPCODE;
darind7737ab2005-09-09 00:51:07 +00001315
darin@apple.comaf5544c2007-11-04 08:28:22 +00001316 if (minimize)
darind7737ab2005-09-09 00:51:07 +00001317 {
darin@apple.comaf5544c2007-11-04 08:28:22 +00001318 for (frame->fi = min;; frame->fi++)
darind7737ab2005-09-09 00:51:07 +00001319 {
1320 int nc;
darin@apple.comaf5544c2007-11-04 08:28:22 +00001321 RMATCH(30, frame->ecode, frame->eptrb, 0);
darin@apple.comee752e72007-11-11 18:56:13 +00001322 if (is_match) RRETURN;
1323 if (frame->fi >= frame->max || frame->eptr >= md->end_subject) RRETURN;
darin@apple.com7ecf0c32007-11-04 06:18:31 +00001324 GETCHAR(nc, frame->eptr);
darin@apple.comee752e72007-11-11 18:56:13 +00001325 if (*frame->eptr != frame->fc) RRETURN;
darin@apple.com7ecf0c32007-11-04 06:18:31 +00001326 frame->eptr += 2;
darind7737ab2005-09-09 00:51:07 +00001327 }
1328 /* Control never gets here */
1329 }
1330 else
1331 {
darin@apple.com7ecf0c32007-11-04 06:18:31 +00001332 frame->pp = frame->eptr;
darin@apple.comaf5544c2007-11-04 08:28:22 +00001333 for (i = min; i < frame->max; i++)
darind7737ab2005-09-09 00:51:07 +00001334 {
1335 int nc;
darin@apple.com7ecf0c32007-11-04 06:18:31 +00001336 if (frame->eptr > md->end_subject - 2) break;
1337 GETCHAR(nc, frame->eptr);
1338 if (*frame->eptr != frame->fc) break;
1339 frame->eptr += 2;
darind7737ab2005-09-09 00:51:07 +00001340 }
darin@apple.com7ecf0c32007-11-04 06:18:31 +00001341 while (frame->eptr >= frame->pp)
darind7737ab2005-09-09 00:51:07 +00001342 {
darin@apple.comaf5544c2007-11-04 08:28:22 +00001343 RMATCH(31, frame->ecode, frame->eptrb, 0);
darin@apple.comee752e72007-11-11 18:56:13 +00001344 if (is_match) RRETURN;
darin@apple.com7ecf0c32007-11-04 06:18:31 +00001345 frame->eptr -= 2;
darind7737ab2005-09-09 00:51:07 +00001346 }
darin@apple.comee752e72007-11-11 18:56:13 +00001347 RRETURN_NO_MATCH;
darind7737ab2005-09-09 00:51:07 +00001348 }
1349 /* Control never gets here */
1350 }
1351 /* Control never gets here */
darina8702f52006-01-13 09:32:51 +00001352 }
darind7737ab2005-09-09 00:51:07 +00001353
1354 /* Match a negated single one-byte character. The character we are
1355 checking can be multibyte. */
1356
darin@apple.comee752e72007-11-11 18:56:13 +00001357 BEGIN_OPCODE(NOT):
1358 if (frame->eptr >= md->end_subject) RRETURN_NO_MATCH;
darin@apple.com7ecf0c32007-11-04 06:18:31 +00001359 frame->ecode++;
1360 GETCHARINCTEST(c, frame->eptr);
darin@apple.coma7c3b872007-11-04 05:22:44 +00001361 if (md->caseless)
darind7737ab2005-09-09 00:51:07 +00001362 {
darin@apple.coma7c3b872007-11-04 05:22:44 +00001363 if (c < 128)
1364 c = md->lcc[c];
darin@apple.comee752e72007-11-11 18:56:13 +00001365 if (md->lcc[*frame->ecode++] == c) RRETURN_NO_MATCH;
darind7737ab2005-09-09 00:51:07 +00001366 }
1367 else
1368 {
darin@apple.comee752e72007-11-11 18:56:13 +00001369 if (*frame->ecode++ == c) RRETURN_NO_MATCH;
darind7737ab2005-09-09 00:51:07 +00001370 }
darin@apple.comee752e72007-11-11 18:56:13 +00001371 NEXT_OPCODE;
darind7737ab2005-09-09 00:51:07 +00001372
1373 /* Match a negated single one-byte character repeatedly. This is almost a
1374 repeat of the code for a repeated single character, but I haven't found a
1375 nice way of commoning these up that doesn't require a test of the
1376 positive/negative option for each character match. Maybe that wouldn't add
1377 very much to the time taken, but character matching *is* what this is all
1378 about... */
1379
darin@apple.comee752e72007-11-11 18:56:13 +00001380 BEGIN_OPCODE(NOTEXACT):
darin@apple.comaf5544c2007-11-04 08:28:22 +00001381 min = frame->max = GET2(frame->ecode, 1);
1382 minimize = FALSE;
darin@apple.com7ecf0c32007-11-04 06:18:31 +00001383 frame->ecode += 3;
darind7737ab2005-09-09 00:51:07 +00001384 goto REPEATNOTCHAR;
1385
darin@apple.comee752e72007-11-11 18:56:13 +00001386 BEGIN_OPCODE(NOTUPTO):
1387 BEGIN_OPCODE(NOTMINUPTO):
darin@apple.comaf5544c2007-11-04 08:28:22 +00001388 min = 0;
darin@apple.com7ecf0c32007-11-04 06:18:31 +00001389 frame->max = GET2(frame->ecode, 1);
darin@apple.comaf5544c2007-11-04 08:28:22 +00001390 minimize = *frame->ecode == OP_NOTMINUPTO;
darin@apple.com7ecf0c32007-11-04 06:18:31 +00001391 frame->ecode += 3;
darind7737ab2005-09-09 00:51:07 +00001392 goto REPEATNOTCHAR;
1393
darin@apple.comee752e72007-11-11 18:56:13 +00001394 BEGIN_OPCODE(NOTSTAR):
1395 BEGIN_OPCODE(NOTMINSTAR):
1396 BEGIN_OPCODE(NOTPLUS):
1397 BEGIN_OPCODE(NOTMINPLUS):
1398 BEGIN_OPCODE(NOTQUERY):
1399 BEGIN_OPCODE(NOTMINQUERY):
darin@apple.com7ecf0c32007-11-04 06:18:31 +00001400 c = *frame->ecode++ - OP_NOTSTAR;
darin@apple.comaf5544c2007-11-04 08:28:22 +00001401 minimize = (c & 1) != 0;
1402 min = rep_min[c]; /* Pick up values from tables; */
darin@apple.com7ecf0c32007-11-04 06:18:31 +00001403 frame->max = rep_max[c]; /* zero for max => infinity */
1404 if (frame->max == 0) frame->max = INT_MAX;
darind7737ab2005-09-09 00:51:07 +00001405
1406 /* Common code for all repeated single-byte matches. We can give up quickly
1407 if there are fewer than the minimum number of bytes left in the
1408 subject. */
1409
1410 REPEATNOTCHAR:
darin@apple.comee752e72007-11-11 18:56:13 +00001411 if (min > md->end_subject - frame->eptr) RRETURN_NO_MATCH;
darin@apple.com7ecf0c32007-11-04 06:18:31 +00001412 frame->fc = *frame->ecode++;
darind7737ab2005-09-09 00:51:07 +00001413
1414 /* The code is duplicated for the caseless and caseful cases, for speed,
1415 since matching characters is likely to be quite common. First, ensure the
1416 minimum number of matches are present. If min = max, continue at the same
1417 level without recursing. Otherwise, if minimizing, keep trying the rest of
1418 the expression and advancing one matching character if failing, up to the
1419 maximum. Alternatively, if maximizing, find the maximum number of
1420 characters and work backwards. */
1421
darin@apple.comaf5544c2007-11-04 08:28:22 +00001422 DPRINTF(("negative matching %c{%d,%d}\n", frame->fc, min, frame->max));
darind7737ab2005-09-09 00:51:07 +00001423
darin@apple.coma7c3b872007-11-04 05:22:44 +00001424 if (md->caseless)
darind7737ab2005-09-09 00:51:07 +00001425 {
darin@apple.comee752e72007-11-11 18:56:13 +00001426 if (frame->fc < 128)
1427 frame->fc = md->lcc[frame->fc];
darind7737ab2005-09-09 00:51:07 +00001428
darind7737ab2005-09-09 00:51:07 +00001429 {
1430 register int d;
darin@apple.comaf5544c2007-11-04 08:28:22 +00001431 for (i = 1; i <= min; i++)
darind7737ab2005-09-09 00:51:07 +00001432 {
darin@apple.com7ecf0c32007-11-04 06:18:31 +00001433 GETCHARINC(d, frame->eptr);
darin@apple.coma7c3b872007-11-04 05:22:44 +00001434 if (d < 128) d = md->lcc[d];
darin@apple.comee752e72007-11-11 18:56:13 +00001435 if (frame->fc == d) RRETURN_NO_MATCH;
darind7737ab2005-09-09 00:51:07 +00001436 }
1437 }
darind7737ab2005-09-09 00:51:07 +00001438
darin@apple.comee752e72007-11-11 18:56:13 +00001439 if (min == frame->max)
1440 NEXT_OPCODE;
darind7737ab2005-09-09 00:51:07 +00001441
darin@apple.comaf5544c2007-11-04 08:28:22 +00001442 if (minimize)
darind7737ab2005-09-09 00:51:07 +00001443 {
darind7737ab2005-09-09 00:51:07 +00001444 {
1445 register int d;
darin@apple.comaf5544c2007-11-04 08:28:22 +00001446 for (frame->fi = min;; frame->fi++)
darind7737ab2005-09-09 00:51:07 +00001447 {
darin@apple.comaf5544c2007-11-04 08:28:22 +00001448 RMATCH(38, frame->ecode, frame->eptrb, 0);
darin@apple.comee752e72007-11-11 18:56:13 +00001449 if (is_match) RRETURN;
darin@apple.com7ecf0c32007-11-04 06:18:31 +00001450 GETCHARINC(d, frame->eptr);
darin@apple.coma7c3b872007-11-04 05:22:44 +00001451 if (d < 128) d = md->lcc[d];
darin@apple.com7ecf0c32007-11-04 06:18:31 +00001452 if (frame->fi >= frame->max || frame->eptr >= md->end_subject || frame->fc == d)
darin@apple.comee752e72007-11-11 18:56:13 +00001453 RRETURN;
darind7737ab2005-09-09 00:51:07 +00001454 }
1455 }
darind7737ab2005-09-09 00:51:07 +00001456 /* Control never gets here */
1457 }
1458
1459 /* Maximize case */
1460
1461 else
1462 {
darin@apple.com7ecf0c32007-11-04 06:18:31 +00001463 frame->pp = frame->eptr;
darind7737ab2005-09-09 00:51:07 +00001464
darind7737ab2005-09-09 00:51:07 +00001465 {
1466 register int d;
darin@apple.comaf5544c2007-11-04 08:28:22 +00001467 for (i = min; i < frame->max; i++)
darind7737ab2005-09-09 00:51:07 +00001468 {
1469 int len = 1;
darin@apple.com7ecf0c32007-11-04 06:18:31 +00001470 if (frame->eptr >= md->end_subject) break;
1471 GETCHARLEN(d, frame->eptr, len);
darin@apple.coma7c3b872007-11-04 05:22:44 +00001472 if (d < 128) d = md->lcc[d];
darin@apple.com7ecf0c32007-11-04 06:18:31 +00001473 if (frame->fc == d) break;
1474 frame->eptr += len;
darind7737ab2005-09-09 00:51:07 +00001475 }
1476 for(;;)
1477 {
darin@apple.comaf5544c2007-11-04 08:28:22 +00001478 RMATCH(40, frame->ecode, frame->eptrb, 0);
darin@apple.comee752e72007-11-11 18:56:13 +00001479 if (is_match) RRETURN;
darin@apple.com7ecf0c32007-11-04 06:18:31 +00001480 if (frame->eptr-- == frame->pp) break; /* Stop if tried at original pos */
1481 BACKCHAR(frame->eptr);
darind7737ab2005-09-09 00:51:07 +00001482 }
1483 }
darind7737ab2005-09-09 00:51:07 +00001484
darin@apple.comee752e72007-11-11 18:56:13 +00001485 RRETURN;
darind7737ab2005-09-09 00:51:07 +00001486 }
1487 /* Control never gets here */
1488 }
1489
1490 /* Caseful comparisons */
1491
1492 else
1493 {
darind7737ab2005-09-09 00:51:07 +00001494 {
1495 register int d;
darin@apple.comaf5544c2007-11-04 08:28:22 +00001496 for (i = 1; i <= min; i++)
darind7737ab2005-09-09 00:51:07 +00001497 {
darin@apple.com7ecf0c32007-11-04 06:18:31 +00001498 GETCHARINC(d, frame->eptr);
darin@apple.comee752e72007-11-11 18:56:13 +00001499 if (frame->fc == d) RRETURN_NO_MATCH;
darind7737ab2005-09-09 00:51:07 +00001500 }
1501 }
darind7737ab2005-09-09 00:51:07 +00001502
darin@apple.comee752e72007-11-11 18:56:13 +00001503 if (min == frame->max)
1504 NEXT_OPCODE;
darind7737ab2005-09-09 00:51:07 +00001505
darin@apple.comaf5544c2007-11-04 08:28:22 +00001506 if (minimize)
darind7737ab2005-09-09 00:51:07 +00001507 {
darind7737ab2005-09-09 00:51:07 +00001508 {
1509 register int d;
darin@apple.comaf5544c2007-11-04 08:28:22 +00001510 for (frame->fi = min;; frame->fi++)
darind7737ab2005-09-09 00:51:07 +00001511 {
darin@apple.comaf5544c2007-11-04 08:28:22 +00001512 RMATCH(42, frame->ecode, frame->eptrb, 0);
darin@apple.comee752e72007-11-11 18:56:13 +00001513 if (is_match) RRETURN;
darin@apple.com7ecf0c32007-11-04 06:18:31 +00001514 GETCHARINC(d, frame->eptr);
1515 if (frame->fi >= frame->max || frame->eptr >= md->end_subject || frame->fc == d)
darin@apple.comee752e72007-11-11 18:56:13 +00001516 RRETURN;
darind7737ab2005-09-09 00:51:07 +00001517 }
1518 }
darind7737ab2005-09-09 00:51:07 +00001519 /* Control never gets here */
1520 }
1521
1522 /* Maximize case */
1523
1524 else
1525 {
darin@apple.com7ecf0c32007-11-04 06:18:31 +00001526 frame->pp = frame->eptr;
darind7737ab2005-09-09 00:51:07 +00001527
darind7737ab2005-09-09 00:51:07 +00001528 {
1529 register int d;
darin@apple.comaf5544c2007-11-04 08:28:22 +00001530 for (i = min; i < frame->max; i++)
darind7737ab2005-09-09 00:51:07 +00001531 {
1532 int len = 1;
darin@apple.com7ecf0c32007-11-04 06:18:31 +00001533 if (frame->eptr >= md->end_subject) break;
1534 GETCHARLEN(d, frame->eptr, len);
1535 if (frame->fc == d) break;
1536 frame->eptr += len;
darind7737ab2005-09-09 00:51:07 +00001537 }
1538 for(;;)
1539 {
darin@apple.comaf5544c2007-11-04 08:28:22 +00001540 RMATCH(44, frame->ecode, frame->eptrb, 0);
darin@apple.comee752e72007-11-11 18:56:13 +00001541 if (is_match) RRETURN;
darin@apple.com7ecf0c32007-11-04 06:18:31 +00001542 if (frame->eptr-- == frame->pp) break; /* Stop if tried at original pos */
1543 BACKCHAR(frame->eptr);
darind7737ab2005-09-09 00:51:07 +00001544 }
1545 }
darind7737ab2005-09-09 00:51:07 +00001546
darin@apple.comee752e72007-11-11 18:56:13 +00001547 RRETURN;
darind7737ab2005-09-09 00:51:07 +00001548 }
1549 }
1550 /* Control never gets here */
1551
1552 /* Match a single character type repeatedly; several different opcodes
1553 share code. This is very similar to the code for single characters, but we
1554 repeat it in the interests of efficiency. */
1555
darin@apple.comee752e72007-11-11 18:56:13 +00001556 BEGIN_OPCODE(TYPEEXACT):
darin@apple.comaf5544c2007-11-04 08:28:22 +00001557 min = frame->max = GET2(frame->ecode, 1);
1558 minimize = TRUE;
darin@apple.com7ecf0c32007-11-04 06:18:31 +00001559 frame->ecode += 3;
darind7737ab2005-09-09 00:51:07 +00001560 goto REPEATTYPE;
1561
darin@apple.comee752e72007-11-11 18:56:13 +00001562 BEGIN_OPCODE(TYPEUPTO):
1563 BEGIN_OPCODE(TYPEMINUPTO):
darin@apple.comaf5544c2007-11-04 08:28:22 +00001564 min = 0;
darin@apple.com7ecf0c32007-11-04 06:18:31 +00001565 frame->max = GET2(frame->ecode, 1);
darin@apple.comaf5544c2007-11-04 08:28:22 +00001566 minimize = *frame->ecode == OP_TYPEMINUPTO;
darin@apple.com7ecf0c32007-11-04 06:18:31 +00001567 frame->ecode += 3;
darind7737ab2005-09-09 00:51:07 +00001568 goto REPEATTYPE;
1569
darin@apple.comee752e72007-11-11 18:56:13 +00001570 BEGIN_OPCODE(TYPESTAR):
1571 BEGIN_OPCODE(TYPEMINSTAR):
1572 BEGIN_OPCODE(TYPEPLUS):
1573 BEGIN_OPCODE(TYPEMINPLUS):
1574 BEGIN_OPCODE(TYPEQUERY):
1575 BEGIN_OPCODE(TYPEMINQUERY):
darin@apple.com7ecf0c32007-11-04 06:18:31 +00001576 c = *frame->ecode++ - OP_TYPESTAR;
darin@apple.comaf5544c2007-11-04 08:28:22 +00001577 minimize = (c & 1) != 0;
1578 min = rep_min[c]; /* Pick up values from tables; */
darin@apple.com7ecf0c32007-11-04 06:18:31 +00001579 frame->max = rep_max[c]; /* zero for max => infinity */
1580 if (frame->max == 0) frame->max = INT_MAX;
darind7737ab2005-09-09 00:51:07 +00001581
1582 /* Common code for all repeated single character type matches. Note that
1583 in UTF-8 mode, '.' matches a character of any length, but for the other
1584 character types, the valid characters are all one-byte long. */
1585
1586 REPEATTYPE:
darin@apple.com7ecf0c32007-11-04 06:18:31 +00001587 frame->ctype = *frame->ecode++; /* Code for the character type */
darind7737ab2005-09-09 00:51:07 +00001588
darind7737ab2005-09-09 00:51:07 +00001589 /* First, ensure the minimum number of matches are present. Use inline
1590 code for maximizing the speed, and do the type test once at the start
1591 (i.e. keep it out of the loop). Also we can test that there are at least
1592 the minimum number of bytes before we start. This isn't as effective in
1593 UTF-8 mode, but it does no harm. Separate the UTF-8 code completely as that
1594 is tidier. Also separate the UCP code, which can be the same for both UTF-8
1595 and single-bytes. */
1596
darin@apple.comee752e72007-11-11 18:56:13 +00001597 if (min > md->end_subject - frame->eptr) RRETURN_NO_MATCH;
darin@apple.comaf5544c2007-11-04 08:28:22 +00001598 if (min > 0)
darind7737ab2005-09-09 00:51:07 +00001599 {
darin@apple.com7ecf0c32007-11-04 06:18:31 +00001600 switch(frame->ctype)
darind7737ab2005-09-09 00:51:07 +00001601 {
1602 case OP_ANY:
darin@apple.comaf5544c2007-11-04 08:28:22 +00001603 for (i = 1; i <= min; i++)
darind7737ab2005-09-09 00:51:07 +00001604 {
darin@apple.com7ecf0c32007-11-04 06:18:31 +00001605 if (frame->eptr >= md->end_subject || IS_NEWLINE(*frame->eptr))
darin@apple.comee752e72007-11-11 18:56:13 +00001606 RRETURN_NO_MATCH;
darin@apple.com7ecf0c32007-11-04 06:18:31 +00001607 ++frame->eptr;
1608 while (frame->eptr < md->end_subject && ISMIDCHAR(*frame->eptr)) frame->eptr++;
darind7737ab2005-09-09 00:51:07 +00001609 }
1610 break;
1611
darind7737ab2005-09-09 00:51:07 +00001612 case OP_NOT_DIGIT:
darin@apple.comaf5544c2007-11-04 08:28:22 +00001613 for (i = 1; i <= min; i++)
darind7737ab2005-09-09 00:51:07 +00001614 {
darin@apple.comee752e72007-11-11 18:56:13 +00001615 if (frame->eptr >= md->end_subject) RRETURN_NO_MATCH;
darin@apple.com7ecf0c32007-11-04 06:18:31 +00001616 GETCHARINC(c, frame->eptr);
darin@apple.comee752e72007-11-11 18:56:13 +00001617 if (isASCIIDigit(c))
1618 RRETURN_NO_MATCH;
darind7737ab2005-09-09 00:51:07 +00001619 }
1620 break;
1621
1622 case OP_DIGIT:
darin@apple.comaf5544c2007-11-04 08:28:22 +00001623 for (i = 1; i <= min; i++)
darind7737ab2005-09-09 00:51:07 +00001624 {
darin@apple.comee752e72007-11-11 18:56:13 +00001625 if (frame->eptr >= md->end_subject || !isASCIIDigit(*frame->eptr++))
1626 RRETURN_NO_MATCH;
darind7737ab2005-09-09 00:51:07 +00001627 /* No need to skip more bytes - we know it's a 1-byte character */
1628 }
1629 break;
1630
1631 case OP_NOT_WHITESPACE:
darin@apple.comaf5544c2007-11-04 08:28:22 +00001632 for (i = 1; i <= min; i++)
darind7737ab2005-09-09 00:51:07 +00001633 {
darin@apple.com7ecf0c32007-11-04 06:18:31 +00001634 if (frame->eptr >= md->end_subject ||
1635 (*frame->eptr < 128 && (md->ctypes[*frame->eptr] & ctype_space) != 0))
darin@apple.comee752e72007-11-11 18:56:13 +00001636 RRETURN_NO_MATCH;
darin@apple.com7ecf0c32007-11-04 06:18:31 +00001637 while (++frame->eptr < md->end_subject && ISMIDCHAR(*frame->eptr));
darind7737ab2005-09-09 00:51:07 +00001638 }
1639 break;
1640
1641 case OP_WHITESPACE:
darin@apple.comaf5544c2007-11-04 08:28:22 +00001642 for (i = 1; i <= min; i++)
darind7737ab2005-09-09 00:51:07 +00001643 {
darin@apple.com7ecf0c32007-11-04 06:18:31 +00001644 if (frame->eptr >= md->end_subject ||
1645 *frame->eptr >= 128 || (md->ctypes[*frame->eptr++] & ctype_space) == 0)
darin@apple.comee752e72007-11-11 18:56:13 +00001646 RRETURN_NO_MATCH;
darind7737ab2005-09-09 00:51:07 +00001647 /* No need to skip more bytes - we know it's a 1-byte character */
1648 }
1649 break;
1650
1651 case OP_NOT_WORDCHAR:
darin@apple.comaf5544c2007-11-04 08:28:22 +00001652 for (i = 1; i <= min; i++)
darind7737ab2005-09-09 00:51:07 +00001653 {
darin@apple.com7ecf0c32007-11-04 06:18:31 +00001654 if (frame->eptr >= md->end_subject ||
1655 (*frame->eptr < 128 && (md->ctypes[*frame->eptr] & ctype_word) != 0))
darin@apple.comee752e72007-11-11 18:56:13 +00001656 RRETURN_NO_MATCH;
darin@apple.com7ecf0c32007-11-04 06:18:31 +00001657 while (++frame->eptr < md->end_subject && ISMIDCHAR(*frame->eptr));
darind7737ab2005-09-09 00:51:07 +00001658 }
1659 break;
1660
1661 case OP_WORDCHAR:
darin@apple.comaf5544c2007-11-04 08:28:22 +00001662 for (i = 1; i <= min; i++)
darind7737ab2005-09-09 00:51:07 +00001663 {
darin@apple.com7ecf0c32007-11-04 06:18:31 +00001664 if (frame->eptr >= md->end_subject ||
1665 *frame->eptr >= 128 || (md->ctypes[*frame->eptr++] & ctype_word) == 0)
darin@apple.comee752e72007-11-11 18:56:13 +00001666 RRETURN_NO_MATCH;
darind7737ab2005-09-09 00:51:07 +00001667 /* No need to skip more bytes - we know it's a 1-byte character */
1668 }
1669 break;
1670
1671 default:
darin@apple.comee752e72007-11-11 18:56:13 +00001672 ASSERT_NOT_REACHED();
1673 RRETURN_ERROR(JSRegExpErrorInternal);
darin@apple.com7ecf0c32007-11-04 06:18:31 +00001674 } /* End switch(frame->ctype) */
darind7737ab2005-09-09 00:51:07 +00001675 }
1676
1677 /* If min = max, continue at the same level without recursing */
1678
darin@apple.comee752e72007-11-11 18:56:13 +00001679 if (min == frame->max)
1680 NEXT_OPCODE;
darind7737ab2005-09-09 00:51:07 +00001681
1682 /* If minimizing, we have to test the rest of the pattern before each
darin@apple.comaf5544c2007-11-04 08:28:22 +00001683 subsequent match. */
darind7737ab2005-09-09 00:51:07 +00001684
darin@apple.comaf5544c2007-11-04 08:28:22 +00001685 if (minimize)
darind7737ab2005-09-09 00:51:07 +00001686 {
darind7737ab2005-09-09 00:51:07 +00001687 {
darin@apple.comaf5544c2007-11-04 08:28:22 +00001688 for (frame->fi = min;; frame->fi++)
darind7737ab2005-09-09 00:51:07 +00001689 {
darin@apple.comaf5544c2007-11-04 08:28:22 +00001690 RMATCH(48, frame->ecode, frame->eptrb, 0);
darin@apple.comee752e72007-11-11 18:56:13 +00001691 if (is_match) RRETURN;
1692 if (frame->fi >= frame->max || frame->eptr >= md->end_subject) RRETURN;
darind7737ab2005-09-09 00:51:07 +00001693
darin@apple.com7ecf0c32007-11-04 06:18:31 +00001694 GETCHARINC(c, frame->eptr);
1695 switch(frame->ctype)
darind7737ab2005-09-09 00:51:07 +00001696 {
1697 case OP_ANY:
darin@apple.comee752e72007-11-11 18:56:13 +00001698 if (IS_NEWLINE(c)) RRETURN;
darind7737ab2005-09-09 00:51:07 +00001699 break;
1700
1701 case OP_NOT_DIGIT:
darin@apple.comee752e72007-11-11 18:56:13 +00001702 if (isASCIIDigit(c))
1703 RRETURN;
darind7737ab2005-09-09 00:51:07 +00001704 break;
1705
1706 case OP_DIGIT:
darin@apple.comee752e72007-11-11 18:56:13 +00001707 if (!isASCIIDigit(c))
1708 RRETURN;
darind7737ab2005-09-09 00:51:07 +00001709 break;
1710
1711 case OP_NOT_WHITESPACE:
darin@apple.coma7c3b872007-11-04 05:22:44 +00001712 if (c < 128 && (md->ctypes[c] & ctype_space) != 0)
darin@apple.comee752e72007-11-11 18:56:13 +00001713 RRETURN;
darind7737ab2005-09-09 00:51:07 +00001714 break;
1715
1716 case OP_WHITESPACE:
darin@apple.coma7c3b872007-11-04 05:22:44 +00001717 if (c >= 128 || (md->ctypes[c] & ctype_space) == 0)
darin@apple.comee752e72007-11-11 18:56:13 +00001718 RRETURN;
darind7737ab2005-09-09 00:51:07 +00001719 break;
1720
1721 case OP_NOT_WORDCHAR:
darin@apple.coma7c3b872007-11-04 05:22:44 +00001722 if (c < 128 && (md->ctypes[c] & ctype_word) != 0)
darin@apple.comee752e72007-11-11 18:56:13 +00001723 RRETURN;
darind7737ab2005-09-09 00:51:07 +00001724 break;
1725
1726 case OP_WORDCHAR:
darin@apple.coma7c3b872007-11-04 05:22:44 +00001727 if (c >= 128 || (md->ctypes[c] & ctype_word) == 0)
darin@apple.comee752e72007-11-11 18:56:13 +00001728 RRETURN;
darind7737ab2005-09-09 00:51:07 +00001729 break;
1730
1731 default:
darin@apple.comee752e72007-11-11 18:56:13 +00001732 ASSERT_NOT_REACHED();
1733 RRETURN_ERROR(JSRegExpErrorInternal);
darind7737ab2005-09-09 00:51:07 +00001734 }
1735 }
1736 }
1737 /* Control never gets here */
1738 }
1739
1740 /* If maximizing it is worth using inline code for speed, doing the type
darin@apple.comaf5544c2007-11-04 08:28:22 +00001741 test once at the start (i.e. keep it out of the loop). */
darind7737ab2005-09-09 00:51:07 +00001742
1743 else
1744 {
darin@apple.com7ecf0c32007-11-04 06:18:31 +00001745 frame->pp = frame->eptr; /* Remember where we started */
darind7737ab2005-09-09 00:51:07 +00001746
darin@apple.com7ecf0c32007-11-04 06:18:31 +00001747 switch(frame->ctype)
darind7737ab2005-09-09 00:51:07 +00001748 {
1749 case OP_ANY:
1750
1751 /* Special code is required for UTF8, but when the maximum is unlimited
1752 we don't need it, so we repeat the non-UTF8 code. This is probably
1753 worth it, because .* is quite a common idiom. */
1754
darin@apple.com7ecf0c32007-11-04 06:18:31 +00001755 if (frame->max < INT_MAX)
darind7737ab2005-09-09 00:51:07 +00001756 {
darind7737ab2005-09-09 00:51:07 +00001757 {
darin@apple.comaf5544c2007-11-04 08:28:22 +00001758 for (i = min; i < frame->max; i++)
darind7737ab2005-09-09 00:51:07 +00001759 {
darin@apple.com7ecf0c32007-11-04 06:18:31 +00001760 if (frame->eptr >= md->end_subject || IS_NEWLINE(*frame->eptr)) break;
1761 frame->eptr++;
1762 while (frame->eptr < md->end_subject && (*frame->eptr & 0xc0) == 0x80) frame->eptr++;
darind7737ab2005-09-09 00:51:07 +00001763 }
1764 }
darind7737ab2005-09-09 00:51:07 +00001765 }
1766
1767 /* Handle unlimited UTF-8 repeat */
1768
1769 else
1770 {
darind7737ab2005-09-09 00:51:07 +00001771 {
darin@apple.comaf5544c2007-11-04 08:28:22 +00001772 for (i = min; i < frame->max; i++)
darind7737ab2005-09-09 00:51:07 +00001773 {
darin@apple.com7ecf0c32007-11-04 06:18:31 +00001774 if (frame->eptr >= md->end_subject || IS_NEWLINE(*frame->eptr)) break;
1775 frame->eptr++;
darind7737ab2005-09-09 00:51:07 +00001776 }
1777 break;
1778 }
darind7737ab2005-09-09 00:51:07 +00001779 }
1780 break;
1781
darind7737ab2005-09-09 00:51:07 +00001782 case OP_NOT_DIGIT:
darin@apple.comaf5544c2007-11-04 08:28:22 +00001783 for (i = min; i < frame->max; i++)
darind7737ab2005-09-09 00:51:07 +00001784 {
1785 int len = 1;
darin@apple.com7ecf0c32007-11-04 06:18:31 +00001786 if (frame->eptr >= md->end_subject) break;
1787 GETCHARLEN(c, frame->eptr, len);
darin@apple.comee752e72007-11-11 18:56:13 +00001788 if (isASCIIDigit(c)) break;
darin@apple.com7ecf0c32007-11-04 06:18:31 +00001789 frame->eptr+= len;
darind7737ab2005-09-09 00:51:07 +00001790 }
1791 break;
1792
1793 case OP_DIGIT:
darin@apple.comaf5544c2007-11-04 08:28:22 +00001794 for (i = min; i < frame->max; i++)
darind7737ab2005-09-09 00:51:07 +00001795 {
1796 int len = 1;
darin@apple.com7ecf0c32007-11-04 06:18:31 +00001797 if (frame->eptr >= md->end_subject) break;
1798 GETCHARLEN(c, frame->eptr, len);
darin@apple.comee752e72007-11-11 18:56:13 +00001799 if (!isASCIIDigit(c)) break;
darin@apple.com7ecf0c32007-11-04 06:18:31 +00001800 frame->eptr+= len;
darind7737ab2005-09-09 00:51:07 +00001801 }
1802 break;
1803
1804 case OP_NOT_WHITESPACE:
darin@apple.comaf5544c2007-11-04 08:28:22 +00001805 for (i = min; i < frame->max; i++)
darind7737ab2005-09-09 00:51:07 +00001806 {
1807 int len = 1;
darin@apple.com7ecf0c32007-11-04 06:18:31 +00001808 if (frame->eptr >= md->end_subject) break;
1809 GETCHARLEN(c, frame->eptr, len);
darin@apple.coma7c3b872007-11-04 05:22:44 +00001810 if (c < 128 && (md->ctypes[c] & ctype_space) != 0) break;
darin@apple.com7ecf0c32007-11-04 06:18:31 +00001811 frame->eptr+= len;
darind7737ab2005-09-09 00:51:07 +00001812 }
1813 break;
1814
1815 case OP_WHITESPACE:
darin@apple.comaf5544c2007-11-04 08:28:22 +00001816 for (i = min; i < frame->max; i++)
darind7737ab2005-09-09 00:51:07 +00001817 {
1818 int len = 1;
darin@apple.com7ecf0c32007-11-04 06:18:31 +00001819 if (frame->eptr >= md->end_subject) break;
1820 GETCHARLEN(c, frame->eptr, len);
darin@apple.coma7c3b872007-11-04 05:22:44 +00001821 if (c >= 128 ||(md->ctypes[c] & ctype_space) == 0) break;
darin@apple.com7ecf0c32007-11-04 06:18:31 +00001822 frame->eptr+= len;
darind7737ab2005-09-09 00:51:07 +00001823 }
1824 break;
1825
1826 case OP_NOT_WORDCHAR:
darin@apple.comaf5544c2007-11-04 08:28:22 +00001827 for (i = min; i < frame->max; i++)
darind7737ab2005-09-09 00:51:07 +00001828 {
1829 int len = 1;
darin@apple.com7ecf0c32007-11-04 06:18:31 +00001830 if (frame->eptr >= md->end_subject) break;
1831 GETCHARLEN(c, frame->eptr, len);
darin@apple.coma7c3b872007-11-04 05:22:44 +00001832 if (c < 128 && (md->ctypes[c] & ctype_word) != 0) break;
darin@apple.com7ecf0c32007-11-04 06:18:31 +00001833 frame->eptr+= len;
darind7737ab2005-09-09 00:51:07 +00001834 }
1835 break;
1836
1837 case OP_WORDCHAR:
darin@apple.comaf5544c2007-11-04 08:28:22 +00001838 for (i = min; i < frame->max; i++)
darind7737ab2005-09-09 00:51:07 +00001839 {
1840 int len = 1;
darin@apple.com7ecf0c32007-11-04 06:18:31 +00001841 if (frame->eptr >= md->end_subject) break;
1842 GETCHARLEN(c, frame->eptr, len);
darin@apple.coma7c3b872007-11-04 05:22:44 +00001843 if (c >= 128 || (md->ctypes[c] & ctype_word) == 0) break;
darin@apple.com7ecf0c32007-11-04 06:18:31 +00001844 frame->eptr+= len;
darind7737ab2005-09-09 00:51:07 +00001845 }
1846 break;
1847
1848 default:
darin@apple.comee752e72007-11-11 18:56:13 +00001849 ASSERT_NOT_REACHED();
1850 RRETURN_ERROR(JSRegExpErrorInternal);
darind7737ab2005-09-09 00:51:07 +00001851 }
1852
darin@apple.com7ecf0c32007-11-04 06:18:31 +00001853 /* frame->eptr is now past the end of the maximum run */
darind7737ab2005-09-09 00:51:07 +00001854
1855 for(;;)
1856 {
darin@apple.comaf5544c2007-11-04 08:28:22 +00001857 RMATCH(52, frame->ecode, frame->eptrb, 0);
darin@apple.comee752e72007-11-11 18:56:13 +00001858 if (is_match) RRETURN;
darin@apple.com7ecf0c32007-11-04 06:18:31 +00001859 if (frame->eptr-- == frame->pp) break; /* Stop if tried at original pos */
1860 BACKCHAR(frame->eptr);
darind7737ab2005-09-09 00:51:07 +00001861 }
darind7737ab2005-09-09 00:51:07 +00001862
1863 /* Get here if we can't make it match with any permitted repetitions */
1864
darin@apple.comee752e72007-11-11 18:56:13 +00001865 RRETURN;
darind7737ab2005-09-09 00:51:07 +00001866 }
1867 /* Control never gets here */
1868
darin@apple.comee752e72007-11-11 18:56:13 +00001869 BEGIN_OPCODE(BRANUMBER):
1870 BEGIN_OPCODE(CRMINPLUS):
1871 BEGIN_OPCODE(CRMINQUERY):
1872 BEGIN_OPCODE(CRMINRANGE):
1873 BEGIN_OPCODE(CRMINSTAR):
1874 BEGIN_OPCODE(CRPLUS):
1875 BEGIN_OPCODE(CRQUERY):
1876 BEGIN_OPCODE(CRRANGE):
1877 BEGIN_OPCODE(CRSTAR):
1878 ASSERT_NOT_REACHED();
1879 RRETURN_ERROR(JSRegExpErrorInternal);
1880
1881#ifdef USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP
1882 CAPTURING_BRACKET:
1883#else
darin@apple.comaf5544c2007-11-04 08:28:22 +00001884 default:
darin@apple.comee752e72007-11-11 18:56:13 +00001885#endif
darin@apple.comaf5544c2007-11-04 08:28:22 +00001886 /* Opening capturing bracket. If there is space in the offset vector, save
1887 the current subject position in the working slot at the top of the vector. We
1888 mustn't change the current values of the data slot, because they may be set
1889 from a previous iteration of this group, and be referred to by a reference
1890 inside the group.
1891
1892 If the bracket fails to match, we need to restore this value and also the
1893 values of the final offsets, in case they were set by a previous iteration of
1894 the same bracket.
1895
1896 If there isn't enough space in the offset vector, treat this as if it were a
1897 non-capturing bracket. Don't worry about setting the flag for the error case
1898 here; that is handled in the code for KET. */
1899
darin@apple.comee752e72007-11-11 18:56:13 +00001900 ASSERT(*frame->ecode > OP_BRA);
1901
darin@apple.comaf5544c2007-11-04 08:28:22 +00001902 frame->number = *frame->ecode - OP_BRA;
1903
1904 /* For extended extraction brackets (large number), we have to fish out the
1905 number from a dummy opcode at the start. */
1906
1907 if (frame->number > EXTRACT_BASIC_MAX)
1908 frame->number = GET2(frame->ecode, 2+LINK_SIZE);
1909 frame->offset = frame->number << 1;
1910
1911#ifdef DEBUG
1912 printf("start bracket %d subject=", frame->number);
1913 pchars(frame->eptr, 16, TRUE, md);
1914 printf("\n");
1915#endif
1916
1917 if (frame->offset < md->offset_max)
1918 {
1919 frame->save_offset1 = md->offset_vector[frame->offset];
darin@apple.comee752e72007-11-11 18:56:13 +00001920 frame->save_offset2 = md->offset_vector[frame->offset + 1];
darin@apple.comaf5544c2007-11-04 08:28:22 +00001921 frame->save_offset3 = md->offset_vector[md->offset_end - frame->number];
1922
1923 DPRINTF(("saving %d %d %d\n", frame->save_offset1, frame->save_offset2, frame->save_offset3));
1924 md->offset_vector[md->offset_end - frame->number] = frame->eptr - md->start_subject;
1925
1926 do
1927 {
1928 RMATCH(1, frame->ecode + 1 + LINK_SIZE, frame->eptrb, match_isgroup);
darin@apple.comee752e72007-11-11 18:56:13 +00001929 if (is_match) RRETURN;
darin@apple.comaf5544c2007-11-04 08:28:22 +00001930 frame->ecode += GET(frame->ecode, 1);
1931 }
1932 while (*frame->ecode == OP_ALT);
1933
1934 DPRINTF(("bracket %d failed\n", frame->number));
1935
1936 md->offset_vector[frame->offset] = frame->save_offset1;
darin@apple.comee752e72007-11-11 18:56:13 +00001937 md->offset_vector[frame->offset + 1] = frame->save_offset2;
darin@apple.comaf5544c2007-11-04 08:28:22 +00001938 md->offset_vector[md->offset_end - frame->number] = frame->save_offset3;
1939
darin@apple.comee752e72007-11-11 18:56:13 +00001940 RRETURN;
darin@apple.comaf5544c2007-11-04 08:28:22 +00001941 }
1942
1943 /* Insufficient room for saving captured contents */
1944
1945 goto NON_CAPTURING_BRACKET;
darind7737ab2005-09-09 00:51:07 +00001946 }
1947
1948 /* Do not stick any code in here without much thought; it is assumed
1949 that "continue" in the code above comes out to here to repeat the main
1950 loop. */
1951
darin@apple.comee752e72007-11-11 18:56:13 +00001952 } /* End of main loop */
1953
darind7737ab2005-09-09 00:51:07 +00001954/* Control never reaches here */
darined76fb52007-02-06 21:55:25 +00001955
darin@apple.comee752e72007-11-11 18:56:13 +00001956#ifndef USE_COMPUTED_GOTO_FOR_MATCH_RECURSION
darined76fb52007-02-06 21:55:25 +00001957
1958RRETURN_SWITCH:
darin@apple.com7ecf0c32007-11-04 06:18:31 +00001959switch (frame->where)
darined76fb52007-02-06 21:55:25 +00001960 {
darin@apple.comee752e72007-11-11 18:56:13 +00001961 case 0: goto RETURN;
darined76fb52007-02-06 21:55:25 +00001962 case 1: goto RRETURN_1;
1963 case 2: goto RRETURN_2;
darined76fb52007-02-06 21:55:25 +00001964 case 6: goto RRETURN_6;
1965 case 7: goto RRETURN_7;
darined76fb52007-02-06 21:55:25 +00001966 case 9: goto RRETURN_9;
1967 case 10: goto RRETURN_10;
1968 case 11: goto RRETURN_11;
1969 case 12: goto RRETURN_12;
1970 case 13: goto RRETURN_13;
1971 case 14: goto RRETURN_14;
1972 case 15: goto RRETURN_15;
1973 case 16: goto RRETURN_16;
1974 case 17: goto RRETURN_17;
1975 case 18: goto RRETURN_18;
1976 case 19: goto RRETURN_19;
1977 case 20: goto RRETURN_20;
1978 case 21: goto RRETURN_21;
1979 case 22: goto RRETURN_22;
darined76fb52007-02-06 21:55:25 +00001980 case 24: goto RRETURN_24;
darined76fb52007-02-06 21:55:25 +00001981 case 26: goto RRETURN_26;
1982 case 27: goto RRETURN_27;
1983 case 28: goto RRETURN_28;
1984 case 29: goto RRETURN_29;
1985 case 30: goto RRETURN_30;
1986 case 31: goto RRETURN_31;
darined76fb52007-02-06 21:55:25 +00001987 case 38: goto RRETURN_38;
darined76fb52007-02-06 21:55:25 +00001988 case 40: goto RRETURN_40;
darined76fb52007-02-06 21:55:25 +00001989 case 42: goto RRETURN_42;
darined76fb52007-02-06 21:55:25 +00001990 case 44: goto RRETURN_44;
darined76fb52007-02-06 21:55:25 +00001991 case 48: goto RRETURN_48;
darined76fb52007-02-06 21:55:25 +00001992 case 52: goto RRETURN_52;
darined76fb52007-02-06 21:55:25 +00001993 }
1994
darined76fb52007-02-06 21:55:25 +00001995abort();
darin@apple.comee752e72007-11-11 18:56:13 +00001996RRETURN_ERROR(JSRegExpErrorInternal);
darined76fb52007-02-06 21:55:25 +00001997
1998#endif
darinae790da2007-10-17 05:38:39 +00001999
darin@apple.comee752e72007-11-11 18:56:13 +00002000RETURN:
2001 return is_match ? MATCH_MATCH : MATCH_NOMATCH;
2002
2003RETURN_ERROR:
2004 while (!(frame >= stackframes && frame < stackframesend)) {
2005 newframe = frame->prevframe;
2006 delete frame;
2007 frame = newframe;
2008 }
2009 return i;
darind7737ab2005-09-09 00:51:07 +00002010}
2011
2012
darind7737ab2005-09-09 00:51:07 +00002013/*************************************************
2014* Execute a Regular Expression *
2015*************************************************/
2016
2017/* This function applies a compiled re to a subject string and picks out
2018portions of the string if it matches. Two elements in the vector are set for
2019each substring: the offsets to the start and end of the substring.
2020
2021Arguments:
2022 argument_re points to the compiled expression
2023 extra_data points to extra data or is NULL
2024 subject points to the subject string
2025 length length of subject string (may contain binary zeros)
2026 start_offset where to start in the subject string
2027 options option bits
2028 offsets points to a vector of ints to be filled in with offsets
2029 offsetcount the number of elements in the vector
2030
2031Returns: > 0 => success; value is the number of elements filled in
2032 = 0 => success, but offsets is not big enough
2033 -1 => failed to match
2034 < -1 => some kind of unexpected problem
2035*/
2036
darin@apple.coma7c3b872007-11-04 05:22:44 +00002037int
2038jsRegExpExecute(const pcre *argument_re,
darin@apple.comee752e72007-11-11 18:56:13 +00002039 const UChar* subject, int length, int start_offset, int *offsets,
darind7737ab2005-09-09 00:51:07 +00002040 int offsetcount)
2041{
2042int rc, resetcount, ocount;
2043int first_byte = -1;
2044int req_byte = -1;
2045int req_byte2 = -1;
darind7737ab2005-09-09 00:51:07 +00002046BOOL using_temporary_offsets = FALSE;
darind7737ab2005-09-09 00:51:07 +00002047BOOL first_byte_caseless = FALSE;
darin@apple.coma7c3b872007-11-04 05:22:44 +00002048BOOL startline;
darind7737ab2005-09-09 00:51:07 +00002049BOOL req_byte_caseless = FALSE;
2050match_data match_block;
darinae790da2007-10-17 05:38:39 +00002051USPTR start_match = (USPTR)subject + start_offset;
2052USPTR end_subject;
2053USPTR req_byte_ptr = start_match - 1;
darin@apple.coma7c3b872007-11-04 05:22:44 +00002054const uschar *start_code;
darind7737ab2005-09-09 00:51:07 +00002055
darind7737ab2005-09-09 00:51:07 +00002056const real_pcre *external_re = (const real_pcre *)argument_re;
2057const real_pcre *re = external_re;
2058
2059/* Plausibility checks */
2060
darin@apple.coma7c3b872007-11-04 05:22:44 +00002061ASSERT(re);
2062ASSERT(subject);
2063ASSERT(offsetcount >= 0);
2064ASSERT(offsets || offsetcount == 0);
darind7737ab2005-09-09 00:51:07 +00002065
darind7737ab2005-09-09 00:51:07 +00002066/* Set up other data */
2067
darind7737ab2005-09-09 00:51:07 +00002068startline = (re->options & PCRE_STARTLINE) != 0;
darind7737ab2005-09-09 00:51:07 +00002069
2070/* The code starts after the real_pcre block and the capture name table. */
2071
darin@apple.com7ecf0c32007-11-04 06:18:31 +00002072start_code = (const uschar *)(external_re + 1);
darind7737ab2005-09-09 00:51:07 +00002073
darinae790da2007-10-17 05:38:39 +00002074match_block.start_subject = (USPTR)subject;
darind7737ab2005-09-09 00:51:07 +00002075match_block.end_subject = match_block.start_subject + length;
2076end_subject = match_block.end_subject;
2077
darin@apple.com7ecf0c32007-11-04 06:18:31 +00002078match_block.lcc = _pcre_default_tables + lcc_offset;
2079match_block.ctypes = _pcre_default_tables + ctypes_offset;
darind7737ab2005-09-09 00:51:07 +00002080
darin@apple.coma7c3b872007-11-04 05:22:44 +00002081match_block.multiline = (re->options & PCRE_MULTILINE) != 0;
2082match_block.caseless = (re->options & PCRE_CASELESS) != 0;
darind7737ab2005-09-09 00:51:07 +00002083
2084/* If the expression has got more back references than the offsets supplied can
2085hold, we get a temporary chunk of working store to use during the matching.
2086Otherwise, we can use the vector supplied, rounding down its size to a multiple
2087of 3. */
2088
2089ocount = offsetcount - (offsetcount % 3);
2090
2091if (re->top_backref > 0 && re->top_backref >= ocount/3)
2092 {
2093 ocount = re->top_backref * 3 + 3;
darin@apple.comee752e72007-11-11 18:56:13 +00002094 match_block.offset_vector = new int[ocount];
2095 if (match_block.offset_vector == NULL) return JSRegExpErrorNoMemory;
darind7737ab2005-09-09 00:51:07 +00002096 using_temporary_offsets = TRUE;
2097 DPRINTF(("Got memory to hold back references\n"));
2098 }
2099else match_block.offset_vector = offsets;
2100
2101match_block.offset_end = ocount;
2102match_block.offset_max = (2*ocount)/3;
2103match_block.offset_overflow = FALSE;
darind7737ab2005-09-09 00:51:07 +00002104
2105/* Compute the minimum number of offsets that we need to reset each time. Doing
2106this makes a huge difference to execution time when there aren't many brackets
2107in the pattern. */
2108
2109resetcount = 2 + re->top_bracket * 2;
2110if (resetcount > offsetcount) resetcount = ocount;
2111
2112/* Reset the working variable associated with each extraction. These should
2113never be used unless previously set, but they get saved and restored, and so we
2114initialize them to avoid reading uninitialized locations. */
2115
2116if (match_block.offset_vector != NULL)
2117 {
2118 register int *iptr = match_block.offset_vector + ocount;
2119 register int *iend = iptr - resetcount/2 + 1;
2120 while (--iptr >= iend) *iptr = -1;
2121 }
2122
2123/* Set up the first character to match, if available. The first_byte value is
2124never set for an anchored regular expression, but the anchoring may be forced
2125at run time, so we have to test for anchoring. The first char may be unset for
2126an unanchored pattern, of course. If there's no first char and the pattern was
2127studied, there may be a bitmap of possible first characters. */
2128
darind7737ab2005-09-09 00:51:07 +00002129 if ((re->options & PCRE_FIRSTSET) != 0)
2130 {
2131 first_byte = re->first_byte & 255;
2132 if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)
2133 first_byte = match_block.lcc[first_byte];
2134 }
darind7737ab2005-09-09 00:51:07 +00002135
2136/* For anchored or unanchored matches, there may be a "last known required
2137character" set. */
2138
2139if ((re->options & PCRE_REQCHSET) != 0)
2140 {
2141 req_byte = re->req_byte & 255;
2142 req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0;
darin@apple.com7ecf0c32007-11-04 06:18:31 +00002143 req_byte2 = (_pcre_default_tables + fcc_offset)[req_byte]; /* case flipped */
darind7737ab2005-09-09 00:51:07 +00002144 }
2145
2146/* Loop for handling unanchored repeated matching attempts; for anchored regexs
2147the loop runs just once. */
2148
2149do
2150 {
darinae790da2007-10-17 05:38:39 +00002151 USPTR save_end_subject = end_subject;
darind7737ab2005-09-09 00:51:07 +00002152
2153 /* Reset the maximum number of extractions we might see. */
2154
2155 if (match_block.offset_vector != NULL)
2156 {
2157 register int *iptr = match_block.offset_vector;
2158 register int *iend = iptr + resetcount;
2159 while (iptr < iend) *iptr++ = -1;
2160 }
2161
2162 /* Advance to a unique first char if possible. If firstline is TRUE, the
2163 start of the match is constrained to the first line of a multiline string.
2164 Implement this by temporarily adjusting end_subject so that we stop scanning
2165 at a newline. If the match fails at the newline, later code breaks this loop.
2166 */
2167
darind7737ab2005-09-09 00:51:07 +00002168 /* Now test for a unique first byte */
2169
2170 if (first_byte >= 0)
2171 {
darince72b7a2007-02-06 19:42:35 +00002172 pcre_uchar first_char = first_byte;
darind7737ab2005-09-09 00:51:07 +00002173 if (first_byte_caseless)
2174 while (start_match < end_subject)
2175 {
2176 int sm = *start_match;
darind7737ab2005-09-09 00:51:07 +00002177 if (sm > 127)
2178 break;
darince72b7a2007-02-06 19:42:35 +00002179 if (match_block.lcc[sm] == first_char)
darind7737ab2005-09-09 00:51:07 +00002180 break;
2181 start_match++;
2182 }
2183 else
darince72b7a2007-02-06 19:42:35 +00002184 while (start_match < end_subject && *start_match != first_char)
darind7737ab2005-09-09 00:51:07 +00002185 start_match++;
2186 }
2187
2188 /* Or to just after \n for a multiline match if possible */
2189
2190 else if (startline)
2191 {
2192 if (start_match > match_block.start_subject + start_offset)
2193 {
darin@apple.com3fbfe082007-11-03 16:28:51 +00002194 while (start_match < end_subject && !IS_NEWLINE(start_match[-1]))
darind7737ab2005-09-09 00:51:07 +00002195 start_match++;
2196 }
2197 }
2198
darind7737ab2005-09-09 00:51:07 +00002199 /* Restore fudged end_subject */
2200
2201 end_subject = save_end_subject;
2202
2203#ifdef DEBUG /* Sigh. Some compilers never learn. */
2204 printf(">>>> Match against: ");
2205 pchars(start_match, end_subject - start_match, TRUE, &match_block);
2206 printf("\n");
2207#endif
2208
2209 /* If req_byte is set, we know that that character must appear in the subject
2210 for the match to succeed. If the first character is set, req_byte must be
2211 later in the subject; otherwise the test starts at the match point. This
2212 optimization can save a huge amount of backtracking in patterns with nested
2213 unlimited repeats that aren't going to match. Writing separate code for
2214 cased/caseless versions makes it go faster, as does using an autoincrement
2215 and backing off on a match.
2216
2217 HOWEVER: when the subject string is very, very long, searching to its end can
2218 take a long time, and give bad performance on quite ordinary patterns. This
2219 showed up when somebody was matching /^C/ on a 32-megabyte string... so we
2220 don't do this when the string is sufficiently long.
2221
2222 ALSO: this processing is disabled when partial matching is requested.
2223 */
2224
2225 if (req_byte >= 0 &&
darin@apple.coma7c3b872007-11-04 05:22:44 +00002226 end_subject - start_match < REQ_BYTE_MAX)
darind7737ab2005-09-09 00:51:07 +00002227 {
darinae790da2007-10-17 05:38:39 +00002228 register USPTR p = start_match + ((first_byte >= 0)? 1 : 0);
darind7737ab2005-09-09 00:51:07 +00002229
2230 /* We don't need to repeat the search if we haven't yet reached the
2231 place we found it at last time. */
2232
2233 if (p > req_byte_ptr)
2234 {
2235 if (req_byte_caseless)
2236 {
2237 while (p < end_subject)
2238 {
2239 register int pp = *p++;
2240 if (pp == req_byte || pp == req_byte2) { p--; break; }
2241 }
2242 }
2243 else
2244 {
2245 while (p < end_subject)
2246 {
2247 if (*p++ == req_byte) { p--; break; }
2248 }
2249 }
2250
2251 /* If we can't find the required character, break the matching loop */
2252
2253 if (p >= end_subject) break;
2254
2255 /* If we have found the required character, save the point where we
2256 found it, so that we don't search again next time round the loop if
2257 the start hasn't passed this character yet. */
2258
2259 req_byte_ptr = p;
2260 }
2261 }
2262
2263 /* When a match occurs, substrings will be set for all internal extractions;
2264 we just need to set up the whole thing as substring 0 before returning. If
2265 there were too many extractions, set the return code to zero. In the case
2266 where we had to get some local store to hold offsets for backreferences, copy
2267 those back references that we can. In this case there need not be overflow
2268 if certain parts of the pattern were not used. */
2269
darind7737ab2005-09-09 00:51:07 +00002270 match_block.match_call_count = 0;
2271
darin@apple.coma7c3b872007-11-04 05:22:44 +00002272 rc = match(start_match, start_code, 2, &match_block);
darind7737ab2005-09-09 00:51:07 +00002273
2274 /* When the result is no match, if the subject's first character was a
2275 newline and the PCRE_FIRSTLINE option is set, break (which will return
2276 PCRE_ERROR_NOMATCH). The option requests that a match occur before the first
2277 newline in the subject. Otherwise, advance the pointer to the next character
2278 and continue - but the continuation will actually happen only when the
2279 pattern is not anchored. */
2280
2281 if (rc == MATCH_NOMATCH)
2282 {
darind7737ab2005-09-09 00:51:07 +00002283 start_match++;
darin496882e2006-07-15 15:30:03 +00002284 while(start_match < end_subject && ISMIDCHAR(*start_match))
darind7737ab2005-09-09 00:51:07 +00002285 start_match++;
darind7737ab2005-09-09 00:51:07 +00002286 continue;
2287 }
2288
2289 if (rc != MATCH_MATCH)
2290 {
2291 DPRINTF((">>>> error: returning %d\n", rc));
2292 return rc;
2293 }
2294
2295 /* We have a match! Copy the offset information from temporary store if
2296 necessary */
2297
2298 if (using_temporary_offsets)
2299 {
2300 if (offsetcount >= 4)
2301 {
2302 memcpy(offsets + 2, match_block.offset_vector + 2,
2303 (offsetcount - 2) * sizeof(int));
2304 DPRINTF(("Copied offsets from temporary memory\n"));
2305 }
2306 if (match_block.end_offset_top > offsetcount)
2307 match_block.offset_overflow = TRUE;
2308
2309 DPRINTF(("Freeing temporary memory\n"));
darin@apple.comee752e72007-11-11 18:56:13 +00002310 delete [] match_block.offset_vector;
darind7737ab2005-09-09 00:51:07 +00002311 }
2312
2313 rc = match_block.offset_overflow? 0 : match_block.end_offset_top/2;
2314
2315 if (offsetcount < 2) rc = 0; else
2316 {
darinae790da2007-10-17 05:38:39 +00002317 offsets[0] = start_match - match_block.start_subject;
2318 offsets[1] = match_block.end_match_ptr - match_block.start_subject;
darind7737ab2005-09-09 00:51:07 +00002319 }
2320
2321 DPRINTF((">>>> returning %d\n", rc));
2322 return rc;
2323 }
2324
2325/* This "while" is the end of the "do" above */
2326
darin@apple.coma7c3b872007-11-04 05:22:44 +00002327while (start_match <= end_subject);
darind7737ab2005-09-09 00:51:07 +00002328
2329if (using_temporary_offsets)
2330 {
2331 DPRINTF(("Freeing temporary memory\n"));
darin@apple.comee752e72007-11-11 18:56:13 +00002332 delete [] match_block.offset_vector;
darind7737ab2005-09-09 00:51:07 +00002333 }
2334
darind7737ab2005-09-09 00:51:07 +00002335 DPRINTF((">>>> returning PCRE_ERROR_NOMATCH\n"));
darin@apple.comee752e72007-11-11 18:56:13 +00002336 return JSRegExpErrorNoMatch;
darind7737ab2005-09-09 00:51:07 +00002337}