11914 lines
356 KiB
C
11914 lines
356 KiB
C
|
/*************************************************
|
||
|
* Perl-Compatible Regular Expressions *
|
||
|
*************************************************/
|
||
|
|
||
|
/* PCRE is a library of functions to support regular expressions whose syntax
|
||
|
and semantics are as close as possible to those of the Perl 5 language.
|
||
|
|
||
|
Written by Philip Hazel
|
||
|
Copyright (c) 1997-2013 University of Cambridge
|
||
|
|
||
|
The machine code generator part (this module) was written by Zoltan Herczeg
|
||
|
Copyright (c) 2010-2013
|
||
|
|
||
|
-----------------------------------------------------------------------------
|
||
|
Redistribution and use in source and binary forms, with or without
|
||
|
modification, are permitted provided that the following conditions are met:
|
||
|
|
||
|
* Redistributions of source code must retain the above copyright notice,
|
||
|
this list of conditions and the following disclaimer.
|
||
|
|
||
|
* Redistributions in binary form must reproduce the above copyright
|
||
|
notice, this list of conditions and the following disclaimer in the
|
||
|
documentation and/or other materials provided with the distribution.
|
||
|
|
||
|
* Neither the name of the University of Cambridge nor the names of its
|
||
|
contributors may be used to endorse or promote products derived from
|
||
|
this software without specific prior written permission.
|
||
|
|
||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||
|
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||
|
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||
|
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||
|
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||
|
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||
|
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||
|
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||
|
POSSIBILITY OF SUCH DAMAGE.
|
||
|
-----------------------------------------------------------------------------
|
||
|
*/
|
||
|
|
||
|
#ifdef HAVE_CONFIG_H
|
||
|
#include "config.h"
|
||
|
#endif
|
||
|
|
||
|
#include "pcre_internal.h"
|
||
|
|
||
|
#if defined SUPPORT_JIT
|
||
|
|
||
|
/* All-in-one: Since we use the JIT compiler only from here,
|
||
|
we just include it. This way we don't need to touch the build
|
||
|
system files. */
|
||
|
|
||
|
#define SLJIT_MALLOC(size, allocator_data) (PUBL(malloc))(size)
|
||
|
#define SLJIT_FREE(ptr, allocator_data) (PUBL(free))(ptr)
|
||
|
#define SLJIT_CONFIG_AUTO 1
|
||
|
#define SLJIT_CONFIG_STATIC 1
|
||
|
#define SLJIT_VERBOSE 0
|
||
|
#define SLJIT_DEBUG 0
|
||
|
|
||
|
#include "sljit/sljitLir.c"
|
||
|
|
||
|
#if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED
|
||
|
#error Unsupported architecture
|
||
|
#endif
|
||
|
|
||
|
/* Defines for debugging purposes. */
|
||
|
|
||
|
/* 1 - Use unoptimized capturing brackets.
|
||
|
2 - Enable capture_last_ptr (includes option 1). */
|
||
|
/* #define DEBUG_FORCE_UNOPTIMIZED_CBRAS 2 */
|
||
|
|
||
|
/* 1 - Always have a control head. */
|
||
|
/* #define DEBUG_FORCE_CONTROL_HEAD 1 */
|
||
|
|
||
|
/* Allocate memory for the regex stack on the real machine stack.
|
||
|
Fast, but limited size. */
|
||
|
#define MACHINE_STACK_SIZE 32768
|
||
|
|
||
|
/* Growth rate for stack allocated by the OS. Should be the multiply
|
||
|
of page size. */
|
||
|
#define STACK_GROWTH_RATE 8192
|
||
|
|
||
|
/* Enable to check that the allocation could destroy temporaries. */
|
||
|
#if defined SLJIT_DEBUG && SLJIT_DEBUG
|
||
|
#define DESTROY_REGISTERS 1
|
||
|
#endif
|
||
|
|
||
|
/*
|
||
|
Short summary about the backtracking mechanism empolyed by the jit code generator:
|
||
|
|
||
|
The code generator follows the recursive nature of the PERL compatible regular
|
||
|
expressions. The basic blocks of regular expressions are condition checkers
|
||
|
whose execute different commands depending on the result of the condition check.
|
||
|
The relationship between the operators can be horizontal (concatenation) and
|
||
|
vertical (sub-expression) (See struct backtrack_common for more details).
|
||
|
|
||
|
'ab' - 'a' and 'b' regexps are concatenated
|
||
|
'a+' - 'a' is the sub-expression of the '+' operator
|
||
|
|
||
|
The condition checkers are boolean (true/false) checkers. Machine code is generated
|
||
|
for the checker itself and for the actions depending on the result of the checker.
|
||
|
The 'true' case is called as the matching path (expected path), and the other is called as
|
||
|
the 'backtrack' path. Branch instructions are expesive for all CPUs, so we avoid taken
|
||
|
branches on the matching path.
|
||
|
|
||
|
Greedy star operator (*) :
|
||
|
Matching path: match happens.
|
||
|
Backtrack path: match failed.
|
||
|
Non-greedy star operator (*?) :
|
||
|
Matching path: no need to perform a match.
|
||
|
Backtrack path: match is required.
|
||
|
|
||
|
The following example shows how the code generated for a capturing bracket
|
||
|
with two alternatives. Let A, B, C, D are arbirary regular expressions, and
|
||
|
we have the following regular expression:
|
||
|
|
||
|
A(B|C)D
|
||
|
|
||
|
The generated code will be the following:
|
||
|
|
||
|
A matching path
|
||
|
'(' matching path (pushing arguments to the stack)
|
||
|
B matching path
|
||
|
')' matching path (pushing arguments to the stack)
|
||
|
D matching path
|
||
|
return with successful match
|
||
|
|
||
|
D backtrack path
|
||
|
')' backtrack path (If we arrived from "C" jump to the backtrack of "C")
|
||
|
B backtrack path
|
||
|
C expected path
|
||
|
jump to D matching path
|
||
|
C backtrack path
|
||
|
A backtrack path
|
||
|
|
||
|
Notice, that the order of backtrack code paths are the opposite of the fast
|
||
|
code paths. In this way the topmost value on the stack is always belong
|
||
|
to the current backtrack code path. The backtrack path must check
|
||
|
whether there is a next alternative. If so, it needs to jump back to
|
||
|
the matching path eventually. Otherwise it needs to clear out its own stack
|
||
|
frame and continue the execution on the backtrack code paths.
|
||
|
*/
|
||
|
|
||
|
/*
|
||
|
Saved stack frames:
|
||
|
|
||
|
Atomic blocks and asserts require reloading the values of private data
|
||
|
when the backtrack mechanism performed. Because of OP_RECURSE, the data
|
||
|
are not necessarly known in compile time, thus we need a dynamic restore
|
||
|
mechanism.
|
||
|
|
||
|
The stack frames are stored in a chain list, and have the following format:
|
||
|
([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ]
|
||
|
|
||
|
Thus we can restore the private data to a particular point in the stack.
|
||
|
*/
|
||
|
|
||
|
typedef struct jit_arguments {
|
||
|
/* Pointers first. */
|
||
|
struct sljit_stack *stack;
|
||
|
const pcre_uchar *str;
|
||
|
const pcre_uchar *begin;
|
||
|
const pcre_uchar *end;
|
||
|
int *offsets;
|
||
|
pcre_uchar *mark_ptr;
|
||
|
void *callout_data;
|
||
|
/* Everything else after. */
|
||
|
sljit_u32 limit_match;
|
||
|
int real_offset_count;
|
||
|
int offset_count;
|
||
|
sljit_u8 notbol;
|
||
|
sljit_u8 noteol;
|
||
|
sljit_u8 notempty;
|
||
|
sljit_u8 notempty_atstart;
|
||
|
} jit_arguments;
|
||
|
|
||
|
typedef struct executable_functions {
|
||
|
void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES];
|
||
|
void *read_only_data_heads[JIT_NUMBER_OF_COMPILE_MODES];
|
||
|
sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];
|
||
|
PUBL(jit_callback) callback;
|
||
|
void *userdata;
|
||
|
sljit_u32 top_bracket;
|
||
|
sljit_u32 limit_match;
|
||
|
} executable_functions;
|
||
|
|
||
|
typedef struct jump_list {
|
||
|
struct sljit_jump *jump;
|
||
|
struct jump_list *next;
|
||
|
} jump_list;
|
||
|
|
||
|
typedef struct stub_list {
|
||
|
struct sljit_jump *start;
|
||
|
struct sljit_label *quit;
|
||
|
struct stub_list *next;
|
||
|
} stub_list;
|
||
|
|
||
|
typedef struct label_addr_list {
|
||
|
struct sljit_label *label;
|
||
|
sljit_uw *update_addr;
|
||
|
struct label_addr_list *next;
|
||
|
} label_addr_list;
|
||
|
|
||
|
enum frame_types {
|
||
|
no_frame = -1,
|
||
|
no_stack = -2
|
||
|
};
|
||
|
|
||
|
enum control_types {
|
||
|
type_mark = 0,
|
||
|
type_then_trap = 1
|
||
|
};
|
||
|
|
||
|
typedef int (SLJIT_FUNC *jit_function)(jit_arguments *args);
|
||
|
|
||
|
/* The following structure is the key data type for the recursive
|
||
|
code generator. It is allocated by compile_matchingpath, and contains
|
||
|
the arguments for compile_backtrackingpath. Must be the first member
|
||
|
of its descendants. */
|
||
|
typedef struct backtrack_common {
|
||
|
/* Concatenation stack. */
|
||
|
struct backtrack_common *prev;
|
||
|
jump_list *nextbacktracks;
|
||
|
/* Internal stack (for component operators). */
|
||
|
struct backtrack_common *top;
|
||
|
jump_list *topbacktracks;
|
||
|
/* Opcode pointer. */
|
||
|
pcre_uchar *cc;
|
||
|
} backtrack_common;
|
||
|
|
||
|
typedef struct assert_backtrack {
|
||
|
backtrack_common common;
|
||
|
jump_list *condfailed;
|
||
|
/* Less than 0 if a frame is not needed. */
|
||
|
int framesize;
|
||
|
/* Points to our private memory word on the stack. */
|
||
|
int private_data_ptr;
|
||
|
/* For iterators. */
|
||
|
struct sljit_label *matchingpath;
|
||
|
} assert_backtrack;
|
||
|
|
||
|
typedef struct bracket_backtrack {
|
||
|
backtrack_common common;
|
||
|
/* Where to coninue if an alternative is successfully matched. */
|
||
|
struct sljit_label *alternative_matchingpath;
|
||
|
/* For rmin and rmax iterators. */
|
||
|
struct sljit_label *recursive_matchingpath;
|
||
|
/* For greedy ? operator. */
|
||
|
struct sljit_label *zero_matchingpath;
|
||
|
/* Contains the branches of a failed condition. */
|
||
|
union {
|
||
|
/* Both for OP_COND, OP_SCOND. */
|
||
|
jump_list *condfailed;
|
||
|
assert_backtrack *assert;
|
||
|
/* For OP_ONCE. Less than 0 if not needed. */
|
||
|
int framesize;
|
||
|
} u;
|
||
|
/* Points to our private memory word on the stack. */
|
||
|
int private_data_ptr;
|
||
|
} bracket_backtrack;
|
||
|
|
||
|
typedef struct bracketpos_backtrack {
|
||
|
backtrack_common common;
|
||
|
/* Points to our private memory word on the stack. */
|
||
|
int private_data_ptr;
|
||
|
/* Reverting stack is needed. */
|
||
|
int framesize;
|
||
|
/* Allocated stack size. */
|
||
|
int stacksize;
|
||
|
} bracketpos_backtrack;
|
||
|
|
||
|
typedef struct braminzero_backtrack {
|
||
|
backtrack_common common;
|
||
|
struct sljit_label *matchingpath;
|
||
|
} braminzero_backtrack;
|
||
|
|
||
|
typedef struct char_iterator_backtrack {
|
||
|
backtrack_common common;
|
||
|
/* Next iteration. */
|
||
|
struct sljit_label *matchingpath;
|
||
|
union {
|
||
|
jump_list *backtracks;
|
||
|
struct {
|
||
|
unsigned int othercasebit;
|
||
|
pcre_uchar chr;
|
||
|
BOOL enabled;
|
||
|
} charpos;
|
||
|
} u;
|
||
|
} char_iterator_backtrack;
|
||
|
|
||
|
typedef struct ref_iterator_backtrack {
|
||
|
backtrack_common common;
|
||
|
/* Next iteration. */
|
||
|
struct sljit_label *matchingpath;
|
||
|
} ref_iterator_backtrack;
|
||
|
|
||
|
typedef struct recurse_entry {
|
||
|
struct recurse_entry *next;
|
||
|
/* Contains the function entry. */
|
||
|
struct sljit_label *entry;
|
||
|
/* Collects the calls until the function is not created. */
|
||
|
jump_list *calls;
|
||
|
/* Points to the starting opcode. */
|
||
|
sljit_sw start;
|
||
|
} recurse_entry;
|
||
|
|
||
|
typedef struct recurse_backtrack {
|
||
|
backtrack_common common;
|
||
|
BOOL inlined_pattern;
|
||
|
} recurse_backtrack;
|
||
|
|
||
|
#define OP_THEN_TRAP OP_TABLE_LENGTH
|
||
|
|
||
|
typedef struct then_trap_backtrack {
|
||
|
backtrack_common common;
|
||
|
/* If then_trap is not NULL, this structure contains the real
|
||
|
then_trap for the backtracking path. */
|
||
|
struct then_trap_backtrack *then_trap;
|
||
|
/* Points to the starting opcode. */
|
||
|
sljit_sw start;
|
||
|
/* Exit point for the then opcodes of this alternative. */
|
||
|
jump_list *quit;
|
||
|
/* Frame size of the current alternative. */
|
||
|
int framesize;
|
||
|
} then_trap_backtrack;
|
||
|
|
||
|
#define MAX_RANGE_SIZE 4
|
||
|
|
||
|
typedef struct compiler_common {
|
||
|
/* The sljit ceneric compiler. */
|
||
|
struct sljit_compiler *compiler;
|
||
|
/* First byte code. */
|
||
|
pcre_uchar *start;
|
||
|
/* Maps private data offset to each opcode. */
|
||
|
sljit_s32 *private_data_ptrs;
|
||
|
/* Chain list of read-only data ptrs. */
|
||
|
void *read_only_data_head;
|
||
|
/* Tells whether the capturing bracket is optimized. */
|
||
|
sljit_u8 *optimized_cbracket;
|
||
|
/* Tells whether the starting offset is a target of then. */
|
||
|
sljit_u8 *then_offsets;
|
||
|
/* Current position where a THEN must jump. */
|
||
|
then_trap_backtrack *then_trap;
|
||
|
/* Starting offset of private data for capturing brackets. */
|
||
|
sljit_s32 cbra_ptr;
|
||
|
/* Output vector starting point. Must be divisible by 2. */
|
||
|
sljit_s32 ovector_start;
|
||
|
/* Points to the starting character of the current match. */
|
||
|
sljit_s32 start_ptr;
|
||
|
/* Last known position of the requested byte. */
|
||
|
sljit_s32 req_char_ptr;
|
||
|
/* Head of the last recursion. */
|
||
|
sljit_s32 recursive_head_ptr;
|
||
|
/* First inspected character for partial matching.
|
||
|
(Needed for avoiding zero length partial matches.) */
|
||
|
sljit_s32 start_used_ptr;
|
||
|
/* Starting pointer for partial soft matches. */
|
||
|
sljit_s32 hit_start;
|
||
|
/* Pointer of the match end position. */
|
||
|
sljit_s32 match_end_ptr;
|
||
|
/* Points to the marked string. */
|
||
|
sljit_s32 mark_ptr;
|
||
|
/* Recursive control verb management chain. */
|
||
|
sljit_s32 control_head_ptr;
|
||
|
/* Points to the last matched capture block index. */
|
||
|
sljit_s32 capture_last_ptr;
|
||
|
/* Fast forward skipping byte code pointer. */
|
||
|
pcre_uchar *fast_forward_bc_ptr;
|
||
|
/* Locals used by fast fail optimization. */
|
||
|
sljit_s32 fast_fail_start_ptr;
|
||
|
sljit_s32 fast_fail_end_ptr;
|
||
|
|
||
|
/* Flipped and lower case tables. */
|
||
|
const sljit_u8 *fcc;
|
||
|
sljit_sw lcc;
|
||
|
/* Mode can be PCRE_STUDY_JIT_COMPILE and others. */
|
||
|
int mode;
|
||
|
/* TRUE, when minlength is greater than 0. */
|
||
|
BOOL might_be_empty;
|
||
|
/* \K is found in the pattern. */
|
||
|
BOOL has_set_som;
|
||
|
/* (*SKIP:arg) is found in the pattern. */
|
||
|
BOOL has_skip_arg;
|
||
|
/* (*THEN) is found in the pattern. */
|
||
|
BOOL has_then;
|
||
|
/* (*SKIP) or (*SKIP:arg) is found in lookbehind assertion. */
|
||
|
BOOL has_skip_in_assert_back;
|
||
|
/* Currently in recurse or negative assert. */
|
||
|
BOOL local_exit;
|
||
|
/* Currently in a positive assert. */
|
||
|
BOOL positive_assert;
|
||
|
/* Newline control. */
|
||
|
int nltype;
|
||
|
sljit_u32 nlmax;
|
||
|
sljit_u32 nlmin;
|
||
|
int newline;
|
||
|
int bsr_nltype;
|
||
|
sljit_u32 bsr_nlmax;
|
||
|
sljit_u32 bsr_nlmin;
|
||
|
/* Dollar endonly. */
|
||
|
int endonly;
|
||
|
/* Tables. */
|
||
|
sljit_sw ctypes;
|
||
|
/* Named capturing brackets. */
|
||
|
pcre_uchar *name_table;
|
||
|
sljit_sw name_count;
|
||
|
sljit_sw name_entry_size;
|
||
|
|
||
|
/* Labels and jump lists. */
|
||
|
struct sljit_label *partialmatchlabel;
|
||
|
struct sljit_label *quit_label;
|
||
|
struct sljit_label *forced_quit_label;
|
||
|
struct sljit_label *accept_label;
|
||
|
struct sljit_label *ff_newline_shortcut;
|
||
|
stub_list *stubs;
|
||
|
label_addr_list *label_addrs;
|
||
|
recurse_entry *entries;
|
||
|
recurse_entry *currententry;
|
||
|
jump_list *partialmatch;
|
||
|
jump_list *quit;
|
||
|
jump_list *positive_assert_quit;
|
||
|
jump_list *forced_quit;
|
||
|
jump_list *accept;
|
||
|
jump_list *calllimit;
|
||
|
jump_list *stackalloc;
|
||
|
jump_list *revertframes;
|
||
|
jump_list *wordboundary;
|
||
|
jump_list *anynewline;
|
||
|
jump_list *hspace;
|
||
|
jump_list *vspace;
|
||
|
jump_list *casefulcmp;
|
||
|
jump_list *caselesscmp;
|
||
|
jump_list *reset_match;
|
||
|
BOOL jscript_compat;
|
||
|
#ifdef SUPPORT_UTF
|
||
|
BOOL utf;
|
||
|
#ifdef SUPPORT_UCP
|
||
|
BOOL use_ucp;
|
||
|
jump_list *getucd;
|
||
|
#endif
|
||
|
#ifdef COMPILE_PCRE8
|
||
|
jump_list *utfreadchar;
|
||
|
jump_list *utfreadchar16;
|
||
|
jump_list *utfreadtype8;
|
||
|
#endif
|
||
|
#endif /* SUPPORT_UTF */
|
||
|
} compiler_common;
|
||
|
|
||
|
/* For byte_sequence_compare. */
|
||
|
|
||
|
typedef struct compare_context {
|
||
|
int length;
|
||
|
int sourcereg;
|
||
|
#if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
|
||
|
int ucharptr;
|
||
|
union {
|
||
|
sljit_s32 asint;
|
||
|
sljit_u16 asushort;
|
||
|
#if defined COMPILE_PCRE8
|
||
|
sljit_u8 asbyte;
|
||
|
sljit_u8 asuchars[4];
|
||
|
#elif defined COMPILE_PCRE16
|
||
|
sljit_u16 asuchars[2];
|
||
|
#elif defined COMPILE_PCRE32
|
||
|
sljit_u32 asuchars[1];
|
||
|
#endif
|
||
|
} c;
|
||
|
union {
|
||
|
sljit_s32 asint;
|
||
|
sljit_u16 asushort;
|
||
|
#if defined COMPILE_PCRE8
|
||
|
sljit_u8 asbyte;
|
||
|
sljit_u8 asuchars[4];
|
||
|
#elif defined COMPILE_PCRE16
|
||
|
sljit_u16 asuchars[2];
|
||
|
#elif defined COMPILE_PCRE32
|
||
|
sljit_u32 asuchars[1];
|
||
|
#endif
|
||
|
} oc;
|
||
|
#endif
|
||
|
} compare_context;
|
||
|
|
||
|
/* Undefine sljit macros. */
|
||
|
#undef CMP
|
||
|
|
||
|
/* Used for accessing the elements of the stack. */
|
||
|
#define STACK(i) ((i) * (int)sizeof(sljit_sw))
|
||
|
|
||
|
#ifdef SLJIT_PREF_SHIFT_REG
|
||
|
#if SLJIT_PREF_SHIFT_REG == SLJIT_R2
|
||
|
/* Nothing. */
|
||
|
#elif SLJIT_PREF_SHIFT_REG == SLJIT_R3
|
||
|
#define SHIFT_REG_IS_R3
|
||
|
#else
|
||
|
#error "Unsupported shift register"
|
||
|
#endif
|
||
|
#endif
|
||
|
|
||
|
#define TMP1 SLJIT_R0
|
||
|
#ifdef SHIFT_REG_IS_R3
|
||
|
#define TMP2 SLJIT_R3
|
||
|
#define TMP3 SLJIT_R2
|
||
|
#else
|
||
|
#define TMP2 SLJIT_R2
|
||
|
#define TMP3 SLJIT_R3
|
||
|
#endif
|
||
|
#define STR_PTR SLJIT_S0
|
||
|
#define STR_END SLJIT_S1
|
||
|
#define STACK_TOP SLJIT_R1
|
||
|
#define STACK_LIMIT SLJIT_S2
|
||
|
#define COUNT_MATCH SLJIT_S3
|
||
|
#define ARGUMENTS SLJIT_S4
|
||
|
#define RETURN_ADDR SLJIT_R4
|
||
|
|
||
|
/* Local space layout. */
|
||
|
/* These two locals can be used by the current opcode. */
|
||
|
#define LOCALS0 (0 * sizeof(sljit_sw))
|
||
|
#define LOCALS1 (1 * sizeof(sljit_sw))
|
||
|
/* Two local variables for possessive quantifiers (char1 cannot use them). */
|
||
|
#define POSSESSIVE0 (2 * sizeof(sljit_sw))
|
||
|
#define POSSESSIVE1 (3 * sizeof(sljit_sw))
|
||
|
/* Max limit of recursions. */
|
||
|
#define LIMIT_MATCH (4 * sizeof(sljit_sw))
|
||
|
/* The output vector is stored on the stack, and contains pointers
|
||
|
to characters. The vector data is divided into two groups: the first
|
||
|
group contains the start / end character pointers, and the second is
|
||
|
the start pointers when the end of the capturing group has not yet reached. */
|
||
|
#define OVECTOR_START (common->ovector_start)
|
||
|
#define OVECTOR(i) (OVECTOR_START + (i) * (sljit_sw)sizeof(sljit_sw))
|
||
|
#define OVECTOR_PRIV(i) (common->cbra_ptr + (i) * (sljit_sw)sizeof(sljit_sw))
|
||
|
#define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start])
|
||
|
|
||
|
#if defined COMPILE_PCRE8
|
||
|
#define MOV_UCHAR SLJIT_MOV_U8
|
||
|
#elif defined COMPILE_PCRE16
|
||
|
#define MOV_UCHAR SLJIT_MOV_U16
|
||
|
#elif defined COMPILE_PCRE32
|
||
|
#define MOV_UCHAR SLJIT_MOV_U32
|
||
|
#else
|
||
|
#error Unsupported compiling mode
|
||
|
#endif
|
||
|
|
||
|
/* Shortcuts. */
|
||
|
#define DEFINE_COMPILER \
|
||
|
struct sljit_compiler *compiler = common->compiler
|
||
|
#define OP1(op, dst, dstw, src, srcw) \
|
||
|
sljit_emit_op1(compiler, (op), (dst), (dstw), (src), (srcw))
|
||
|
#define OP2(op, dst, dstw, src1, src1w, src2, src2w) \
|
||
|
sljit_emit_op2(compiler, (op), (dst), (dstw), (src1), (src1w), (src2), (src2w))
|
||
|
#define LABEL() \
|
||
|
sljit_emit_label(compiler)
|
||
|
#define JUMP(type) \
|
||
|
sljit_emit_jump(compiler, (type))
|
||
|
#define JUMPTO(type, label) \
|
||
|
sljit_set_label(sljit_emit_jump(compiler, (type)), (label))
|
||
|
#define JUMPHERE(jump) \
|
||
|
sljit_set_label((jump), sljit_emit_label(compiler))
|
||
|
#define SET_LABEL(jump, label) \
|
||
|
sljit_set_label((jump), (label))
|
||
|
#define CMP(type, src1, src1w, src2, src2w) \
|
||
|
sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w))
|
||
|
#define CMPTO(type, src1, src1w, src2, src2w, label) \
|
||
|
sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))
|
||
|
#define OP_FLAGS(op, dst, dstw, type) \
|
||
|
sljit_emit_op_flags(compiler, (op), (dst), (dstw), (type))
|
||
|
#define GET_LOCAL_BASE(dst, dstw, offset) \
|
||
|
sljit_get_local_base(compiler, (dst), (dstw), (offset))
|
||
|
|
||
|
#define READ_CHAR_MAX 0x7fffffff
|
||
|
|
||
|
#define INVALID_UTF_CHAR 888
|
||
|
|
||
|
static pcre_uchar *bracketend(pcre_uchar *cc)
|
||
|
{
|
||
|
SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
|
||
|
do cc += GET(cc, 1); while (*cc == OP_ALT);
|
||
|
SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
|
||
|
cc += 1 + LINK_SIZE;
|
||
|
return cc;
|
||
|
}
|
||
|
|
||
|
static int no_alternatives(pcre_uchar *cc)
|
||
|
{
|
||
|
int count = 0;
|
||
|
SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
|
||
|
do
|
||
|
{
|
||
|
cc += GET(cc, 1);
|
||
|
count++;
|
||
|
}
|
||
|
while (*cc == OP_ALT);
|
||
|
SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
|
||
|
return count;
|
||
|
}
|
||
|
|
||
|
/* Functions whose might need modification for all new supported opcodes:
|
||
|
next_opcode
|
||
|
check_opcode_types
|
||
|
set_private_data_ptrs
|
||
|
get_framesize
|
||
|
init_frame
|
||
|
get_private_data_copy_length
|
||
|
copy_private_data
|
||
|
compile_matchingpath
|
||
|
compile_backtrackingpath
|
||
|
*/
|
||
|
|
||
|
static pcre_uchar *next_opcode(compiler_common *common, pcre_uchar *cc)
|
||
|
{
|
||
|
SLJIT_UNUSED_ARG(common);
|
||
|
switch(*cc)
|
||
|
{
|
||
|
case OP_SOD:
|
||
|
case OP_SOM:
|
||
|
case OP_SET_SOM:
|
||
|
case OP_NOT_WORD_BOUNDARY:
|
||
|
case OP_WORD_BOUNDARY:
|
||
|
case OP_NOT_DIGIT:
|
||
|
case OP_DIGIT:
|
||
|
case OP_NOT_WHITESPACE:
|
||
|
case OP_WHITESPACE:
|
||
|
case OP_NOT_WORDCHAR:
|
||
|
case OP_WORDCHAR:
|
||
|
case OP_ANY:
|
||
|
case OP_ALLANY:
|
||
|
case OP_NOTPROP:
|
||
|
case OP_PROP:
|
||
|
case OP_ANYNL:
|
||
|
case OP_NOT_HSPACE:
|
||
|
case OP_HSPACE:
|
||
|
case OP_NOT_VSPACE:
|
||
|
case OP_VSPACE:
|
||
|
case OP_EXTUNI:
|
||
|
case OP_EODN:
|
||
|
case OP_EOD:
|
||
|
case OP_CIRC:
|
||
|
case OP_CIRCM:
|
||
|
case OP_DOLL:
|
||
|
case OP_DOLLM:
|
||
|
case OP_CRSTAR:
|
||
|
case OP_CRMINSTAR:
|
||
|
case OP_CRPLUS:
|
||
|
case OP_CRMINPLUS:
|
||
|
case OP_CRQUERY:
|
||
|
case OP_CRMINQUERY:
|
||
|
case OP_CRRANGE:
|
||
|
case OP_CRMINRANGE:
|
||
|
case OP_CRPOSSTAR:
|
||
|
case OP_CRPOSPLUS:
|
||
|
case OP_CRPOSQUERY:
|
||
|
case OP_CRPOSRANGE:
|
||
|
case OP_CLASS:
|
||
|
case OP_NCLASS:
|
||
|
case OP_REF:
|
||
|
case OP_REFI:
|
||
|
case OP_DNREF:
|
||
|
case OP_DNREFI:
|
||
|
case OP_RECURSE:
|
||
|
case OP_CALLOUT:
|
||
|
case OP_ALT:
|
||
|
case OP_KET:
|
||
|
case OP_KETRMAX:
|
||
|
case OP_KETRMIN:
|
||
|
case OP_KETRPOS:
|
||
|
case OP_REVERSE:
|
||
|
case OP_ASSERT:
|
||
|
case OP_ASSERT_NOT:
|
||
|
case OP_ASSERTBACK:
|
||
|
case OP_ASSERTBACK_NOT:
|
||
|
case OP_ONCE:
|
||
|
case OP_ONCE_NC:
|
||
|
case OP_BRA:
|
||
|
case OP_BRAPOS:
|
||
|
case OP_CBRA:
|
||
|
case OP_CBRAPOS:
|
||
|
case OP_COND:
|
||
|
case OP_SBRA:
|
||
|
case OP_SBRAPOS:
|
||
|
case OP_SCBRA:
|
||
|
case OP_SCBRAPOS:
|
||
|
case OP_SCOND:
|
||
|
case OP_CREF:
|
||
|
case OP_DNCREF:
|
||
|
case OP_RREF:
|
||
|
case OP_DNRREF:
|
||
|
case OP_DEF:
|
||
|
case OP_BRAZERO:
|
||
|
case OP_BRAMINZERO:
|
||
|
case OP_BRAPOSZERO:
|
||
|
case OP_PRUNE:
|
||
|
case OP_SKIP:
|
||
|
case OP_THEN:
|
||
|
case OP_COMMIT:
|
||
|
case OP_FAIL:
|
||
|
case OP_ACCEPT:
|
||
|
case OP_ASSERT_ACCEPT:
|
||
|
case OP_CLOSE:
|
||
|
case OP_SKIPZERO:
|
||
|
return cc + PRIV(OP_lengths)[*cc];
|
||
|
|
||
|
case OP_CHAR:
|
||
|
case OP_CHARI:
|
||
|
case OP_NOT:
|
||
|
case OP_NOTI:
|
||
|
case OP_STAR:
|
||
|
case OP_MINSTAR:
|
||
|
case OP_PLUS:
|
||
|
case OP_MINPLUS:
|
||
|
case OP_QUERY:
|
||
|
case OP_MINQUERY:
|
||
|
case OP_UPTO:
|
||
|
case OP_MINUPTO:
|
||
|
case OP_EXACT:
|
||
|
case OP_POSSTAR:
|
||
|
case OP_POSPLUS:
|
||
|
case OP_POSQUERY:
|
||
|
case OP_POSUPTO:
|
||
|
case OP_STARI:
|
||
|
case OP_MINSTARI:
|
||
|
case OP_PLUSI:
|
||
|
case OP_MINPLUSI:
|
||
|
case OP_QUERYI:
|
||
|
case OP_MINQUERYI:
|
||
|
case OP_UPTOI:
|
||
|
case OP_MINUPTOI:
|
||
|
case OP_EXACTI:
|
||
|
case OP_POSSTARI:
|
||
|
case OP_POSPLUSI:
|
||
|
case OP_POSQUERYI:
|
||
|
case OP_POSUPTOI:
|
||
|
case OP_NOTSTAR:
|
||
|
case OP_NOTMINSTAR:
|
||
|
case OP_NOTPLUS:
|
||
|
case OP_NOTMINPLUS:
|
||
|
case OP_NOTQUERY:
|
||
|
case OP_NOTMINQUERY:
|
||
|
case OP_NOTUPTO:
|
||
|
case OP_NOTMINUPTO:
|
||
|
case OP_NOTEXACT:
|
||
|
case OP_NOTPOSSTAR:
|
||
|
case OP_NOTPOSPLUS:
|
||
|
case OP_NOTPOSQUERY:
|
||
|
case OP_NOTPOSUPTO:
|
||
|
case OP_NOTSTARI:
|
||
|
case OP_NOTMINSTARI:
|
||
|
case OP_NOTPLUSI:
|
||
|
case OP_NOTMINPLUSI:
|
||
|
case OP_NOTQUERYI:
|
||
|
case OP_NOTMINQUERYI:
|
||
|
case OP_NOTUPTOI:
|
||
|
case OP_NOTMINUPTOI:
|
||
|
case OP_NOTEXACTI:
|
||
|
case OP_NOTPOSSTARI:
|
||
|
case OP_NOTPOSPLUSI:
|
||
|
case OP_NOTPOSQUERYI:
|
||
|
case OP_NOTPOSUPTOI:
|
||
|
cc += PRIV(OP_lengths)[*cc];
|
||
|
#ifdef SUPPORT_UTF
|
||
|
if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
|
||
|
#endif
|
||
|
return cc;
|
||
|
|
||
|
/* Special cases. */
|
||
|
case OP_TYPESTAR:
|
||
|
case OP_TYPEMINSTAR:
|
||
|
case OP_TYPEPLUS:
|
||
|
case OP_TYPEMINPLUS:
|
||
|
case OP_TYPEQUERY:
|
||
|
case OP_TYPEMINQUERY:
|
||
|
case OP_TYPEUPTO:
|
||
|
case OP_TYPEMINUPTO:
|
||
|
case OP_TYPEEXACT:
|
||
|
case OP_TYPEPOSSTAR:
|
||
|
case OP_TYPEPOSPLUS:
|
||
|
case OP_TYPEPOSQUERY:
|
||
|
case OP_TYPEPOSUPTO:
|
||
|
return cc + PRIV(OP_lengths)[*cc] - 1;
|
||
|
|
||
|
case OP_ANYBYTE:
|
||
|
#ifdef SUPPORT_UTF
|
||
|
if (common->utf) return NULL;
|
||
|
#endif
|
||
|
return cc + 1;
|
||
|
|
||
|
#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
|
||
|
case OP_XCLASS:
|
||
|
return cc + GET(cc, 1);
|
||
|
#endif
|
||
|
|
||
|
case OP_MARK:
|
||
|
case OP_PRUNE_ARG:
|
||
|
case OP_SKIP_ARG:
|
||
|
case OP_THEN_ARG:
|
||
|
return cc + 1 + 2 + cc[1];
|
||
|
|
||
|
default:
|
||
|
/* All opcodes are supported now! */
|
||
|
SLJIT_UNREACHABLE();
|
||
|
return NULL;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
static BOOL check_opcode_types(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
|
||
|
{
|
||
|
int count;
|
||
|
pcre_uchar *slot;
|
||
|
pcre_uchar *assert_back_end = cc - 1;
|
||
|
|
||
|
/* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
|
||
|
while (cc < ccend)
|
||
|
{
|
||
|
switch(*cc)
|
||
|
{
|
||
|
case OP_SET_SOM:
|
||
|
common->has_set_som = TRUE;
|
||
|
common->might_be_empty = TRUE;
|
||
|
cc += 1;
|
||
|
break;
|
||
|
|
||
|
case OP_REF:
|
||
|
case OP_REFI:
|
||
|
common->optimized_cbracket[GET2(cc, 1)] = 0;
|
||
|
cc += 1 + IMM2_SIZE;
|
||
|
break;
|
||
|
|
||
|
case OP_CBRAPOS:
|
||
|
case OP_SCBRAPOS:
|
||
|
common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] = 0;
|
||
|
cc += 1 + LINK_SIZE + IMM2_SIZE;
|
||
|
break;
|
||
|
|
||
|
case OP_COND:
|
||
|
case OP_SCOND:
|
||
|
/* Only AUTO_CALLOUT can insert this opcode. We do
|
||
|
not intend to support this case. */
|
||
|
if (cc[1 + LINK_SIZE] == OP_CALLOUT)
|
||
|
return FALSE;
|
||
|
cc += 1 + LINK_SIZE;
|
||
|
break;
|
||
|
|
||
|
case OP_CREF:
|
||
|
common->optimized_cbracket[GET2(cc, 1)] = 0;
|
||
|
cc += 1 + IMM2_SIZE;
|
||
|
break;
|
||
|
|
||
|
case OP_DNREF:
|
||
|
case OP_DNREFI:
|
||
|
case OP_DNCREF:
|
||
|
count = GET2(cc, 1 + IMM2_SIZE);
|
||
|
slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
|
||
|
while (count-- > 0)
|
||
|
{
|
||
|
common->optimized_cbracket[GET2(slot, 0)] = 0;
|
||
|
slot += common->name_entry_size;
|
||
|
}
|
||
|
cc += 1 + 2 * IMM2_SIZE;
|
||
|
break;
|
||
|
|
||
|
case OP_RECURSE:
|
||
|
/* Set its value only once. */
|
||
|
if (common->recursive_head_ptr == 0)
|
||
|
{
|
||
|
common->recursive_head_ptr = common->ovector_start;
|
||
|
common->ovector_start += sizeof(sljit_sw);
|
||
|
}
|
||
|
cc += 1 + LINK_SIZE;
|
||
|
break;
|
||
|
|
||
|
case OP_CALLOUT:
|
||
|
if (common->capture_last_ptr == 0)
|
||
|
{
|
||
|
common->capture_last_ptr = common->ovector_start;
|
||
|
common->ovector_start += sizeof(sljit_sw);
|
||
|
}
|
||
|
cc += 2 + 2 * LINK_SIZE;
|
||
|
break;
|
||
|
|
||
|
case OP_ASSERTBACK:
|
||
|
slot = bracketend(cc);
|
||
|
if (slot > assert_back_end)
|
||
|
assert_back_end = slot;
|
||
|
cc += 1 + LINK_SIZE;
|
||
|
break;
|
||
|
|
||
|
case OP_THEN_ARG:
|
||
|
common->has_then = TRUE;
|
||
|
common->control_head_ptr = 1;
|
||
|
/* Fall through. */
|
||
|
|
||
|
case OP_PRUNE_ARG:
|
||
|
case OP_MARK:
|
||
|
if (common->mark_ptr == 0)
|
||
|
{
|
||
|
common->mark_ptr = common->ovector_start;
|
||
|
common->ovector_start += sizeof(sljit_sw);
|
||
|
}
|
||
|
cc += 1 + 2 + cc[1];
|
||
|
break;
|
||
|
|
||
|
case OP_THEN:
|
||
|
common->has_then = TRUE;
|
||
|
common->control_head_ptr = 1;
|
||
|
cc += 1;
|
||
|
break;
|
||
|
|
||
|
case OP_SKIP:
|
||
|
if (cc < assert_back_end)
|
||
|
common->has_skip_in_assert_back = TRUE;
|
||
|
cc += 1;
|
||
|
break;
|
||
|
|
||
|
case OP_SKIP_ARG:
|
||
|
common->control_head_ptr = 1;
|
||
|
common->has_skip_arg = TRUE;
|
||
|
if (cc < assert_back_end)
|
||
|
common->has_skip_in_assert_back = TRUE;
|
||
|
cc += 1 + 2 + cc[1];
|
||
|
break;
|
||
|
|
||
|
default:
|
||
|
cc = next_opcode(common, cc);
|
||
|
if (cc == NULL)
|
||
|
return FALSE;
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
return TRUE;
|
||
|
}
|
||
|
|
||
|
static BOOL is_accelerated_repeat(pcre_uchar *cc)
|
||
|
{
|
||
|
switch(*cc)
|
||
|
{
|
||
|
case OP_TYPESTAR:
|
||
|
case OP_TYPEMINSTAR:
|
||
|
case OP_TYPEPLUS:
|
||
|
case OP_TYPEMINPLUS:
|
||
|
case OP_TYPEPOSSTAR:
|
||
|
case OP_TYPEPOSPLUS:
|
||
|
return (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI);
|
||
|
|
||
|
case OP_STAR:
|
||
|
case OP_MINSTAR:
|
||
|
case OP_PLUS:
|
||
|
case OP_MINPLUS:
|
||
|
case OP_POSSTAR:
|
||
|
case OP_POSPLUS:
|
||
|
|
||
|
case OP_STARI:
|
||
|
case OP_MINSTARI:
|
||
|
case OP_PLUSI:
|
||
|
case OP_MINPLUSI:
|
||
|
case OP_POSSTARI:
|
||
|
case OP_POSPLUSI:
|
||
|
|
||
|
case OP_NOTSTAR:
|
||
|
case OP_NOTMINSTAR:
|
||
|
case OP_NOTPLUS:
|
||
|
case OP_NOTMINPLUS:
|
||
|
case OP_NOTPOSSTAR:
|
||
|
case OP_NOTPOSPLUS:
|
||
|
|
||
|
case OP_NOTSTARI:
|
||
|
case OP_NOTMINSTARI:
|
||
|
case OP_NOTPLUSI:
|
||
|
case OP_NOTMINPLUSI:
|
||
|
case OP_NOTPOSSTARI:
|
||
|
case OP_NOTPOSPLUSI:
|
||
|
return TRUE;
|
||
|
|
||
|
case OP_CLASS:
|
||
|
case OP_NCLASS:
|
||
|
#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
|
||
|
case OP_XCLASS:
|
||
|
cc += (*cc == OP_XCLASS) ? GET(cc, 1) : (int)(1 + (32 / sizeof(pcre_uchar)));
|
||
|
#else
|
||
|
cc += (1 + (32 / sizeof(pcre_uchar)));
|
||
|
#endif
|
||
|
|
||
|
switch(*cc)
|
||
|
{
|
||
|
case OP_CRSTAR:
|
||
|
case OP_CRMINSTAR:
|
||
|
case OP_CRPLUS:
|
||
|
case OP_CRMINPLUS:
|
||
|
case OP_CRPOSSTAR:
|
||
|
case OP_CRPOSPLUS:
|
||
|
return TRUE;
|
||
|
}
|
||
|
break;
|
||
|
}
|
||
|
return FALSE;
|
||
|
}
|
||
|
|
||
|
static SLJIT_INLINE BOOL detect_fast_forward_skip(compiler_common *common, int *private_data_start)
|
||
|
{
|
||
|
pcre_uchar *cc = common->start;
|
||
|
pcre_uchar *end;
|
||
|
|
||
|
/* Skip not repeated brackets. */
|
||
|
while (TRUE)
|
||
|
{
|
||
|
switch(*cc)
|
||
|
{
|
||
|
case OP_SOD:
|
||
|
case OP_SOM:
|
||
|
case OP_SET_SOM:
|
||
|
case OP_NOT_WORD_BOUNDARY:
|
||
|
case OP_WORD_BOUNDARY:
|
||
|
case OP_EODN:
|
||
|
case OP_EOD:
|
||
|
case OP_CIRC:
|
||
|
case OP_CIRCM:
|
||
|
case OP_DOLL:
|
||
|
case OP_DOLLM:
|
||
|
/* Zero width assertions. */
|
||
|
cc++;
|
||
|
continue;
|
||
|
}
|
||
|
|
||
|
if (*cc != OP_BRA && *cc != OP_CBRA)
|
||
|
break;
|
||
|
|
||
|
end = cc + GET(cc, 1);
|
||
|
if (*end != OP_KET || PRIVATE_DATA(end) != 0)
|
||
|
return FALSE;
|
||
|
if (*cc == OP_CBRA)
|
||
|
{
|
||
|
if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
|
||
|
return FALSE;
|
||
|
cc += IMM2_SIZE;
|
||
|
}
|
||
|
cc += 1 + LINK_SIZE;
|
||
|
}
|
||
|
|
||
|
if (is_accelerated_repeat(cc))
|
||
|
{
|
||
|
common->fast_forward_bc_ptr = cc;
|
||
|
common->private_data_ptrs[(cc + 1) - common->start] = *private_data_start;
|
||
|
*private_data_start += sizeof(sljit_sw);
|
||
|
return TRUE;
|
||
|
}
|
||
|
return FALSE;
|
||
|
}
|
||
|
|
||
|
static SLJIT_INLINE void detect_fast_fail(compiler_common *common, pcre_uchar *cc, int *private_data_start, sljit_s32 depth)
|
||
|
{
|
||
|
pcre_uchar *next_alt;
|
||
|
|
||
|
SLJIT_ASSERT(*cc == OP_BRA || *cc == OP_CBRA);
|
||
|
|
||
|
if (*cc == OP_CBRA && common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
|
||
|
return;
|
||
|
|
||
|
next_alt = bracketend(cc) - (1 + LINK_SIZE);
|
||
|
if (*next_alt != OP_KET || PRIVATE_DATA(next_alt) != 0)
|
||
|
return;
|
||
|
|
||
|
do
|
||
|
{
|
||
|
next_alt = cc + GET(cc, 1);
|
||
|
|
||
|
cc += 1 + LINK_SIZE + ((*cc == OP_CBRA) ? IMM2_SIZE : 0);
|
||
|
|
||
|
while (TRUE)
|
||
|
{
|
||
|
switch(*cc)
|
||
|
{
|
||
|
case OP_SOD:
|
||
|
case OP_SOM:
|
||
|
case OP_SET_SOM:
|
||
|
case OP_NOT_WORD_BOUNDARY:
|
||
|
case OP_WORD_BOUNDARY:
|
||
|
case OP_EODN:
|
||
|
case OP_EOD:
|
||
|
case OP_CIRC:
|
||
|
case OP_CIRCM:
|
||
|
case OP_DOLL:
|
||
|
case OP_DOLLM:
|
||
|
/* Zero width assertions. */
|
||
|
cc++;
|
||
|
continue;
|
||
|
}
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
if (depth > 0 && (*cc == OP_BRA || *cc == OP_CBRA))
|
||
|
detect_fast_fail(common, cc, private_data_start, depth - 1);
|
||
|
|
||
|
if (is_accelerated_repeat(cc))
|
||
|
{
|
||
|
common->private_data_ptrs[(cc + 1) - common->start] = *private_data_start;
|
||
|
|
||
|
if (common->fast_fail_start_ptr == 0)
|
||
|
common->fast_fail_start_ptr = *private_data_start;
|
||
|
|
||
|
*private_data_start += sizeof(sljit_sw);
|
||
|
common->fast_fail_end_ptr = *private_data_start;
|
||
|
|
||
|
if (*private_data_start > SLJIT_MAX_LOCAL_SIZE)
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
cc = next_alt;
|
||
|
}
|
||
|
while (*cc == OP_ALT);
|
||
|
}
|
||
|
|
||
|
static int get_class_iterator_size(pcre_uchar *cc)
|
||
|
{
|
||
|
sljit_u32 min;
|
||
|
sljit_u32 max;
|
||
|
switch(*cc)
|
||
|
{
|
||
|
case OP_CRSTAR:
|
||
|
case OP_CRPLUS:
|
||
|
return 2;
|
||
|
|
||
|
case OP_CRMINSTAR:
|
||
|
case OP_CRMINPLUS:
|
||
|
case OP_CRQUERY:
|
||
|
case OP_CRMINQUERY:
|
||
|
return 1;
|
||
|
|
||
|
case OP_CRRANGE:
|
||
|
case OP_CRMINRANGE:
|
||
|
min = GET2(cc, 1);
|
||
|
max = GET2(cc, 1 + IMM2_SIZE);
|
||
|
if (max == 0)
|
||
|
return (*cc == OP_CRRANGE) ? 2 : 1;
|
||
|
max -= min;
|
||
|
if (max > 2)
|
||
|
max = 2;
|
||
|
return max;
|
||
|
|
||
|
default:
|
||
|
return 0;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
static BOOL detect_repeat(compiler_common *common, pcre_uchar *begin)
|
||
|
{
|
||
|
pcre_uchar *end = bracketend(begin);
|
||
|
pcre_uchar *next;
|
||
|
pcre_uchar *next_end;
|
||
|
pcre_uchar *max_end;
|
||
|
pcre_uchar type;
|
||
|
sljit_sw length = end - begin;
|
||
|
int min, max, i;
|
||
|
|
||
|
/* Detect fixed iterations first. */
|
||
|
if (end[-(1 + LINK_SIZE)] != OP_KET)
|
||
|
return FALSE;
|
||
|
|
||
|
/* Already detected repeat. */
|
||
|
if (common->private_data_ptrs[end - common->start - LINK_SIZE] != 0)
|
||
|
return TRUE;
|
||
|
|
||
|
next = end;
|
||
|
min = 1;
|
||
|
while (1)
|
||
|
{
|
||
|
if (*next != *begin)
|
||
|
break;
|
||
|
next_end = bracketend(next);
|
||
|
if (next_end - next != length || memcmp(begin, next, |