11914 lines
356 KiB
C
11914 lines
356 KiB
C
/*************************************************
|
|
* Perl-Compatible Regular Expressions *
|
|
*************************************************/
|
|
|
|
/* PCRE is a library of functions to support regular expressions whose syntax
|
|
and semantics are as close as possible to those of the Perl 5 language.
|
|
|
|
Written by Philip Hazel
|
|
Copyright (c) 1997-2013 University of Cambridge
|
|
|
|
The machine code generator part (this module) was written by Zoltan Herczeg
|
|
Copyright (c) 2010-2013
|
|
|
|
-----------------------------------------------------------------------------
|
|
Redistribution and use in source and binary forms, with or without
|
|
modification, are permitted provided that the following conditions are met:
|
|
|
|
* Redistributions of source code must retain the above copyright notice,
|
|
this list of conditions and the following disclaimer.
|
|
|
|
* Redistributions in binary form must reproduce the above copyright
|
|
notice, this list of conditions and the following disclaimer in the
|
|
documentation and/or other materials provided with the distribution.
|
|
|
|
* Neither the name of the University of Cambridge nor the names of its
|
|
contributors may be used to endorse or promote products derived from
|
|
this software without specific prior written permission.
|
|
|
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
|
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
POSSIBILITY OF SUCH DAMAGE.
|
|
-----------------------------------------------------------------------------
|
|
*/
|
|
|
|
#ifdef HAVE_CONFIG_H
|
|
#include "config.h"
|
|
#endif
|
|
|
|
#include "pcre_internal.h"
|
|
|
|
#if defined SUPPORT_JIT
|
|
|
|
/* All-in-one: Since we use the JIT compiler only from here,
|
|
we just include it. This way we don't need to touch the build
|
|
system files. */
|
|
|
|
#define SLJIT_MALLOC(size, allocator_data) (PUBL(malloc))(size)
|
|
#define SLJIT_FREE(ptr, allocator_data) (PUBL(free))(ptr)
|
|
#define SLJIT_CONFIG_AUTO 1
|
|
#define SLJIT_CONFIG_STATIC 1
|
|
#define SLJIT_VERBOSE 0
|
|
#define SLJIT_DEBUG 0
|
|
|
|
#include "sljit/sljitLir.c"
|
|
|
|
#if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED
|
|
#error Unsupported architecture
|
|
#endif
|
|
|
|
/* Defines for debugging purposes. */
|
|
|
|
/* 1 - Use unoptimized capturing brackets.
|
|
2 - Enable capture_last_ptr (includes option 1). */
|
|
/* #define DEBUG_FORCE_UNOPTIMIZED_CBRAS 2 */
|
|
|
|
/* 1 - Always have a control head. */
|
|
/* #define DEBUG_FORCE_CONTROL_HEAD 1 */
|
|
|
|
/* Allocate memory for the regex stack on the real machine stack.
|
|
Fast, but limited size. */
|
|
#define MACHINE_STACK_SIZE 32768
|
|
|
|
/* Growth rate for stack allocated by the OS. Should be the multiply
|
|
of page size. */
|
|
#define STACK_GROWTH_RATE 8192
|
|
|
|
/* Enable to check that the allocation could destroy temporaries. */
|
|
#if defined SLJIT_DEBUG && SLJIT_DEBUG
|
|
#define DESTROY_REGISTERS 1
|
|
#endif
|
|
|
|
/*
|
|
Short summary about the backtracking mechanism empolyed by the jit code generator:
|
|
|
|
The code generator follows the recursive nature of the PERL compatible regular
|
|
expressions. The basic blocks of regular expressions are condition checkers
|
|
whose execute different commands depending on the result of the condition check.
|
|
The relationship between the operators can be horizontal (concatenation) and
|
|
vertical (sub-expression) (See struct backtrack_common for more details).
|
|
|
|
'ab' - 'a' and 'b' regexps are concatenated
|
|
'a+' - 'a' is the sub-expression of the '+' operator
|
|
|
|
The condition checkers are boolean (true/false) checkers. Machine code is generated
|
|
for the checker itself and for the actions depending on the result of the checker.
|
|
The 'true' case is called as the matching path (expected path), and the other is called as
|
|
the 'backtrack' path. Branch instructions are expesive for all CPUs, so we avoid taken
|
|
branches on the matching path.
|
|
|
|
Greedy star operator (*) :
|
|
Matching path: match happens.
|
|
Backtrack path: match failed.
|
|
Non-greedy star operator (*?) :
|
|
Matching path: no need to perform a match.
|
|
Backtrack path: match is required.
|
|
|
|
The following example shows how the code generated for a capturing bracket
|
|
with two alternatives. Let A, B, C, D are arbirary regular expressions, and
|
|
we have the following regular expression:
|
|
|
|
A(B|C)D
|
|
|
|
The generated code will be the following:
|
|
|
|
A matching path
|
|
'(' matching path (pushing arguments to the stack)
|
|
B matching path
|
|
')' matching path (pushing arguments to the stack)
|
|
D matching path
|
|
return with successful match
|
|
|
|
D backtrack path
|
|
')' backtrack path (If we arrived from "C" jump to the backtrack of "C")
|
|
B backtrack path
|
|
C expected path
|
|
jump to D matching path
|
|
C backtrack path
|
|
A backtrack path
|
|
|
|
Notice, that the order of backtrack code paths are the opposite of the fast
|
|
code paths. In this way the topmost value on the stack is always belong
|
|
to the current backtrack code path. The backtrack path must check
|
|
whether there is a next alternative. If so, it needs to jump back to
|
|
the matching path eventually. Otherwise it needs to clear out its own stack
|
|
frame and continue the execution on the backtrack code paths.
|
|
*/
|
|
|
|
/*
|
|
Saved stack frames:
|
|
|
|
Atomic blocks and asserts require reloading the values of private data
|
|
when the backtrack mechanism performed. Because of OP_RECURSE, the data
|
|
are not necessarly known in compile time, thus we need a dynamic restore
|
|
mechanism.
|
|
|
|
The stack frames are stored in a chain list, and have the following format:
|
|
([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ]
|
|
|
|
Thus we can restore the private data to a particular point in the stack.
|
|
*/
|
|
|
|
typedef struct jit_arguments {
|
|
/* Pointers first. */
|
|
struct sljit_stack *stack;
|
|
const pcre_uchar *str;
|
|
const pcre_uchar *begin;
|
|
const pcre_uchar *end;
|
|
int *offsets;
|
|
pcre_uchar *mark_ptr;
|
|
void *callout_data;
|
|
/* Everything else after. */
|
|
sljit_u32 limit_match;
|
|
int real_offset_count;
|
|
int offset_count;
|
|
sljit_u8 notbol;
|
|
sljit_u8 noteol;
|
|
sljit_u8 notempty;
|
|
sljit_u8 notempty_atstart;
|
|
} jit_arguments;
|
|
|
|
typedef struct executable_functions {
|
|
void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES];
|
|
void *read_only_data_heads[JIT_NUMBER_OF_COMPILE_MODES];
|
|
sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];
|
|
PUBL(jit_callback) callback;
|
|
void *userdata;
|
|
sljit_u32 top_bracket;
|
|
sljit_u32 limit_match;
|
|
} executable_functions;
|
|
|
|
typedef struct jump_list {
|
|
struct sljit_jump *jump;
|
|
struct jump_list *next;
|
|
} jump_list;
|
|
|
|
typedef struct stub_list {
|
|
struct sljit_jump *start;
|
|
struct sljit_label *quit;
|
|
struct stub_list *next;
|
|
} stub_list;
|
|
|
|
typedef struct label_addr_list {
|
|
struct sljit_label *label;
|
|
sljit_uw *update_addr;
|
|
struct label_addr_list *next;
|
|
} label_addr_list;
|
|
|
|
enum frame_types {
|
|
no_frame = -1,
|
|
no_stack = -2
|
|
};
|
|
|
|
enum control_types {
|
|
type_mark = 0,
|
|
type_then_trap = 1
|
|
};
|
|
|
|
typedef int (SLJIT_FUNC *jit_function)(jit_arguments *args);
|
|
|
|
/* The following structure is the key data type for the recursive
|
|
code generator. It is allocated by compile_matchingpath, and contains
|
|
the arguments for compile_backtrackingpath. Must be the first member
|
|
of its descendants. */
|
|
typedef struct backtrack_common {
|
|
/* Concatenation stack. */
|
|
struct backtrack_common *prev;
|
|
jump_list *nextbacktracks;
|
|
/* Internal stack (for component operators). */
|
|
struct backtrack_common *top;
|
|
jump_list *topbacktracks;
|
|
/* Opcode pointer. */
|
|
pcre_uchar *cc;
|
|
} backtrack_common;
|
|
|
|
typedef struct assert_backtrack {
|
|
backtrack_common common;
|
|
jump_list *condfailed;
|
|
/* Less than 0 if a frame is not needed. */
|
|
int framesize;
|
|
/* Points to our private memory word on the stack. */
|
|
int private_data_ptr;
|
|
/* For iterators. */
|
|
struct sljit_label *matchingpath;
|
|
} assert_backtrack;
|
|
|
|
typedef struct bracket_backtrack {
|
|
backtrack_common common;
|
|
/* Where to coninue if an alternative is successfully matched. */
|
|
struct sljit_label *alternative_matchingpath;
|
|
/* For rmin and rmax iterators. */
|
|
struct sljit_label *recursive_matchingpath;
|
|
/* For greedy ? operator. */
|
|
struct sljit_label *zero_matchingpath;
|
|
/* Contains the branches of a failed condition. */
|
|
union {
|
|
/* Both for OP_COND, OP_SCOND. */
|
|
jump_list *condfailed;
|
|
assert_backtrack *assert;
|
|
/* For OP_ONCE. Less than 0 if not needed. */
|
|
int framesize;
|
|
} u;
|
|
/* Points to our private memory word on the stack. */
|
|
int private_data_ptr;
|
|
} bracket_backtrack;
|
|
|
|
typedef struct bracketpos_backtrack {
|
|
backtrack_common common;
|
|
/* Points to our private memory word on the stack. */
|
|
int private_data_ptr;
|
|
/* Reverting stack is needed. */
|
|
int framesize;
|
|
/* Allocated stack size. */
|
|
int stacksize;
|
|
} bracketpos_backtrack;
|
|
|
|
typedef struct braminzero_backtrack {
|
|
backtrack_common common;
|
|
struct sljit_label *matchingpath;
|
|
} braminzero_backtrack;
|
|
|
|
typedef struct char_iterator_backtrack {
|
|
backtrack_common common;
|
|
/* Next iteration. */
|
|
struct sljit_label *matchingpath;
|
|
union {
|
|
jump_list *backtracks;
|
|
struct {
|
|
unsigned int othercasebit;
|
|
pcre_uchar chr;
|
|
BOOL enabled;
|
|
} charpos;
|
|
} u;
|
|
} char_iterator_backtrack;
|
|
|
|
typedef struct ref_iterator_backtrack {
|
|
backtrack_common common;
|
|
/* Next iteration. */
|
|
struct sljit_label *matchingpath;
|
|
} ref_iterator_backtrack;
|
|
|
|
typedef struct recurse_entry {
|
|
struct recurse_entry *next;
|
|
/* Contains the function entry. */
|
|
struct sljit_label *entry;
|
|
/* Collects the calls until the function is not created. */
|
|
jump_list *calls;
|
|
/* Points to the starting opcode. */
|
|
sljit_sw start;
|
|
} recurse_entry;
|
|
|
|
typedef struct recurse_backtrack {
|
|
backtrack_common common;
|
|
BOOL inlined_pattern;
|
|
} recurse_backtrack;
|
|
|
|
#define OP_THEN_TRAP OP_TABLE_LENGTH
|
|
|
|
typedef struct then_trap_backtrack {
|
|
backtrack_common common;
|
|
/* If then_trap is not NULL, this structure contains the real
|
|
then_trap for the backtracking path. */
|
|
struct then_trap_backtrack *then_trap;
|
|
/* Points to the starting opcode. */
|
|
sljit_sw start;
|
|
/* Exit point for the then opcodes of this alternative. */
|
|
jump_list *quit;
|
|
/* Frame size of the current alternative. */
|
|
int framesize;
|
|
} then_trap_backtrack;
|
|
|
|
#define MAX_RANGE_SIZE 4
|
|
|
|
typedef struct compiler_common {
|
|
/* The sljit ceneric compiler. */
|
|
struct sljit_compiler *compiler;
|
|
/* First byte code. */
|
|
pcre_uchar *start;
|
|
/* Maps private data offset to each opcode. */
|
|
sljit_s32 *private_data_ptrs;
|
|
/* Chain list of read-only data ptrs. */
|
|
void *read_only_data_head;
|
|
/* Tells whether the capturing bracket is optimized. */
|
|
sljit_u8 *optimized_cbracket;
|
|
/* Tells whether the starting offset is a target of then. */
|
|
sljit_u8 *then_offsets;
|
|
/* Current position where a THEN must jump. */
|
|
then_trap_backtrack *then_trap;
|
|
/* Starting offset of private data for capturing brackets. */
|
|
sljit_s32 cbra_ptr;
|
|
/* Output vector starting point. Must be divisible by 2. */
|
|
sljit_s32 ovector_start;
|
|
/* Points to the starting character of the current match. */
|
|
sljit_s32 start_ptr;
|
|
/* Last known position of the requested byte. */
|
|
sljit_s32 req_char_ptr;
|
|
/* Head of the last recursion. */
|
|
sljit_s32 recursive_head_ptr;
|
|
/* First inspected character for partial matching.
|
|
(Needed for avoiding zero length partial matches.) */
|
|
sljit_s32 start_used_ptr;
|
|
/* Starting pointer for partial soft matches. */
|
|
sljit_s32 hit_start;
|
|
/* Pointer of the match end position. */
|
|
sljit_s32 match_end_ptr;
|
|
/* Points to the marked string. */
|
|
sljit_s32 mark_ptr;
|
|
/* Recursive control verb management chain. */
|
|
sljit_s32 control_head_ptr;
|
|
/* Points to the last matched capture block index. */
|
|
sljit_s32 capture_last_ptr;
|
|
/* Fast forward skipping byte code pointer. */
|
|
pcre_uchar *fast_forward_bc_ptr;
|
|
/* Locals used by fast fail optimization. */
|
|
sljit_s32 fast_fail_start_ptr;
|
|
sljit_s32 fast_fail_end_ptr;
|
|
|
|
/* Flipped and lower case tables. */
|
|
const sljit_u8 *fcc;
|
|
sljit_sw lcc;
|
|
/* Mode can be PCRE_STUDY_JIT_COMPILE and others. */
|
|
int mode;
|
|
/* TRUE, when minlength is greater than 0. */
|
|
BOOL might_be_empty;
|
|
/* \K is found in the pattern. */
|
|
BOOL has_set_som;
|
|
/* (*SKIP:arg) is found in the pattern. */
|
|
BOOL has_skip_arg;
|
|
/* (*THEN) is found in the pattern. */
|
|
BOOL has_then;
|
|
/* (*SKIP) or (*SKIP:arg) is found in lookbehind assertion. */
|
|
BOOL has_skip_in_assert_back;
|
|
/* Currently in recurse or negative assert. */
|
|
BOOL local_exit;
|
|
/* Currently in a positive assert. */
|
|
BOOL positive_assert;
|
|
/* Newline control. */
|
|
int nltype;
|
|
sljit_u32 nlmax;
|
|
sljit_u32 nlmin;
|
|
int newline;
|
|
int bsr_nltype;
|
|
sljit_u32 bsr_nlmax;
|
|
sljit_u32 bsr_nlmin;
|
|
/* Dollar endonly. */
|
|
int endonly;
|
|
/* Tables. */
|
|
sljit_sw ctypes;
|
|
/* Named capturing brackets. */
|
|
pcre_uchar *name_table;
|
|
sljit_sw name_count;
|
|
sljit_sw name_entry_size;
|
|
|
|
/* Labels and jump lists. */
|
|
struct sljit_label *partialmatchlabel;
|
|
struct sljit_label *quit_label;
|
|
struct sljit_label *forced_quit_label;
|
|
struct sljit_label *accept_label;
|
|
struct sljit_label *ff_newline_shortcut;
|
|
stub_list *stubs;
|
|
label_addr_list *label_addrs;
|
|
recurse_entry *entries;
|
|
recurse_entry *currententry;
|
|
jump_list *partialmatch;
|
|
jump_list *quit;
|
|
jump_list *positive_assert_quit;
|
|
jump_list *forced_quit;
|
|
jump_list *accept;
|
|
jump_list *calllimit;
|
|
jump_list *stackalloc;
|
|
jump_list *revertframes;
|
|
jump_list *wordboundary;
|
|
jump_list *anynewline;
|
|
jump_list *hspace;
|
|
jump_list *vspace;
|
|
jump_list *casefulcmp;
|
|
jump_list *caselesscmp;
|
|
jump_list *reset_match;
|
|
BOOL jscript_compat;
|
|
#ifdef SUPPORT_UTF
|
|
BOOL utf;
|
|
#ifdef SUPPORT_UCP
|
|
BOOL use_ucp;
|
|
jump_list *getucd;
|
|
#endif
|
|
#ifdef COMPILE_PCRE8
|
|
jump_list *utfreadchar;
|
|
jump_list *utfreadchar16;
|
|
jump_list *utfreadtype8;
|
|
#endif
|
|
#endif /* SUPPORT_UTF */
|
|
} compiler_common;
|
|
|
|
/* For byte_sequence_compare. */
|
|
|
|
typedef struct compare_context {
|
|
int length;
|
|
int sourcereg;
|
|
#if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
|
|
int ucharptr;
|
|
union {
|
|
sljit_s32 asint;
|
|
sljit_u16 asushort;
|
|
#if defined COMPILE_PCRE8
|
|
sljit_u8 asbyte;
|
|
sljit_u8 asuchars[4];
|
|
#elif defined COMPILE_PCRE16
|
|
sljit_u16 asuchars[2];
|
|
#elif defined COMPILE_PCRE32
|
|
sljit_u32 asuchars[1];
|
|
#endif
|
|
} c;
|
|
union {
|
|
sljit_s32 asint;
|
|
sljit_u16 asushort;
|
|
#if defined COMPILE_PCRE8
|
|
sljit_u8 asbyte;
|
|
sljit_u8 asuchars[4];
|
|
#elif defined COMPILE_PCRE16
|
|
sljit_u16 asuchars[2];
|
|
#elif defined COMPILE_PCRE32
|
|
sljit_u32 asuchars[1];
|
|
#endif
|
|
} oc;
|
|
#endif
|
|
} compare_context;
|
|
|
|
/* Undefine sljit macros. */
|
|
#undef CMP
|
|
|
|
/* Used for accessing the elements of the stack. */
|
|
#define STACK(i) ((i) * (int)sizeof(sljit_sw))
|
|
|
|
#ifdef SLJIT_PREF_SHIFT_REG
|
|
#if SLJIT_PREF_SHIFT_REG == SLJIT_R2
|
|
/* Nothing. */
|
|
#elif SLJIT_PREF_SHIFT_REG == SLJIT_R3
|
|
#define SHIFT_REG_IS_R3
|
|
#else
|
|
#error "Unsupported shift register"
|
|
#endif
|
|
#endif
|
|
|
|
#define TMP1 SLJIT_R0
|
|
#ifdef SHIFT_REG_IS_R3
|
|
#define TMP2 SLJIT_R3
|
|
#define TMP3 SLJIT_R2
|
|
#else
|
|
#define TMP2 SLJIT_R2
|
|
#define TMP3 SLJIT_R3
|
|
#endif
|
|
#define STR_PTR SLJIT_S0
|
|
#define STR_END SLJIT_S1
|
|
#define STACK_TOP SLJIT_R1
|
|
#define STACK_LIMIT SLJIT_S2
|
|
#define COUNT_MATCH SLJIT_S3
|
|
#define ARGUMENTS SLJIT_S4
|
|
#define RETURN_ADDR SLJIT_R4
|
|
|
|
/* Local space layout. */
|
|
/* These two locals can be used by the current opcode. */
|
|
#define LOCALS0 (0 * sizeof(sljit_sw))
|
|
#define LOCALS1 (1 * sizeof(sljit_sw))
|
|
/* Two local variables for possessive quantifiers (char1 cannot use them). */
|
|
#define POSSESSIVE0 (2 * sizeof(sljit_sw))
|
|
#define POSSESSIVE1 (3 * sizeof(sljit_sw))
|
|
/* Max limit of recursions. */
|
|
#define LIMIT_MATCH (4 * sizeof(sljit_sw))
|
|
/* The output vector is stored on the stack, and contains pointers
|
|
to characters. The vector data is divided into two groups: the first
|
|
group contains the start / end character pointers, and the second is
|
|
the start pointers when the end of the capturing group has not yet reached. */
|
|
#define OVECTOR_START (common->ovector_start)
|
|
#define OVECTOR(i) (OVECTOR_START + (i) * (sljit_sw)sizeof(sljit_sw))
|
|
#define OVECTOR_PRIV(i) (common->cbra_ptr + (i) * (sljit_sw)sizeof(sljit_sw))
|
|
#define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start])
|
|
|
|
#if defined COMPILE_PCRE8
|
|
#define MOV_UCHAR SLJIT_MOV_U8
|
|
#elif defined COMPILE_PCRE16
|
|
#define MOV_UCHAR SLJIT_MOV_U16
|
|
#elif defined COMPILE_PCRE32
|
|
#define MOV_UCHAR SLJIT_MOV_U32
|
|
#else
|
|
#error Unsupported compiling mode
|
|
#endif
|
|
|
|
/* Shortcuts. */
|
|
#define DEFINE_COMPILER \
|
|
struct sljit_compiler *compiler = common->compiler
|
|
#define OP1(op, dst, dstw, src, srcw) \
|
|
sljit_emit_op1(compiler, (op), (dst), (dstw), (src), (srcw))
|
|
#define OP2(op, dst, dstw, src1, src1w, src2, src2w) \
|
|
sljit_emit_op2(compiler, (op), (dst), (dstw), (src1), (src1w), (src2), (src2w))
|
|
#define LABEL() \
|
|
sljit_emit_label(compiler)
|
|
#define JUMP(type) \
|
|
sljit_emit_jump(compiler, (type))
|
|
#define JUMPTO(type, label) \
|
|
sljit_set_label(sljit_emit_jump(compiler, (type)), (label))
|
|
#define JUMPHERE(jump) \
|
|
sljit_set_label((jump), sljit_emit_label(compiler))
|
|
#define SET_LABEL(jump, label) \
|
|
sljit_set_label((jump), (label))
|
|
#define CMP(type, src1, src1w, src2, src2w) \
|
|
sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w))
|
|
#define CMPTO(type, src1, src1w, src2, src2w, label) \
|
|
sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))
|
|
#define OP_FLAGS(op, dst, dstw, type) \
|
|
sljit_emit_op_flags(compiler, (op), (dst), (dstw), (type))
|
|
#define GET_LOCAL_BASE(dst, dstw, offset) \
|
|
sljit_get_local_base(compiler, (dst), (dstw), (offset))
|
|
|
|
#define READ_CHAR_MAX 0x7fffffff
|
|
|
|
#define INVALID_UTF_CHAR 888
|
|
|
|
static pcre_uchar *bracketend(pcre_uchar *cc)
|
|
{
|
|
SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
|
|
do cc += GET(cc, 1); while (*cc == OP_ALT);
|
|
SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
|
|
cc += 1 + LINK_SIZE;
|
|
return cc;
|
|
}
|
|
|
|
static int no_alternatives(pcre_uchar *cc)
|
|
{
|
|
int count = 0;
|
|
SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
|
|
do
|
|
{
|
|
cc += GET(cc, 1);
|
|
count++;
|
|
}
|
|
while (*cc == OP_ALT);
|
|
SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
|
|
return count;
|
|
}
|
|
|
|
/* Functions whose might need modification for all new supported opcodes:
|
|
next_opcode
|
|
check_opcode_types
|
|
set_private_data_ptrs
|
|
get_framesize
|
|
init_frame
|
|
get_private_data_copy_length
|
|
copy_private_data
|
|
compile_matchingpath
|
|
compile_backtrackingpath
|
|
*/
|
|
|
|
static pcre_uchar *next_opcode(compiler_common *common, pcre_uchar *cc)
|
|
{
|
|
SLJIT_UNUSED_ARG(common);
|
|
switch(*cc)
|
|
{
|
|
case OP_SOD:
|
|
case OP_SOM:
|
|
case OP_SET_SOM:
|
|
case OP_NOT_WORD_BOUNDARY:
|
|
case OP_WORD_BOUNDARY:
|
|
case OP_NOT_DIGIT:
|
|
case OP_DIGIT:
|
|
case OP_NOT_WHITESPACE:
|
|
case OP_WHITESPACE:
|
|
case OP_NOT_WORDCHAR:
|
|
case OP_WORDCHAR:
|
|
case OP_ANY:
|
|
case OP_ALLANY:
|
|
case OP_NOTPROP:
|
|
case OP_PROP:
|
|
case OP_ANYNL:
|
|
case OP_NOT_HSPACE:
|
|
case OP_HSPACE:
|
|
case OP_NOT_VSPACE:
|
|
case OP_VSPACE:
|
|
case OP_EXTUNI:
|
|
case OP_EODN:
|
|
case OP_EOD:
|
|
case OP_CIRC:
|
|
case OP_CIRCM:
|
|
case OP_DOLL:
|
|
case OP_DOLLM:
|
|
case OP_CRSTAR:
|
|
case OP_CRMINSTAR:
|
|
case OP_CRPLUS:
|
|
case OP_CRMINPLUS:
|
|
case OP_CRQUERY:
|
|
case OP_CRMINQUERY:
|
|
case OP_CRRANGE:
|
|
case OP_CRMINRANGE:
|
|
case OP_CRPOSSTAR:
|
|
case OP_CRPOSPLUS:
|
|
case OP_CRPOSQUERY:
|
|
case OP_CRPOSRANGE:
|
|
case OP_CLASS:
|
|
case OP_NCLASS:
|
|
case OP_REF:
|
|
case OP_REFI:
|
|
case OP_DNREF:
|
|
case OP_DNREFI:
|
|
case OP_RECURSE:
|
|
case OP_CALLOUT:
|
|
case OP_ALT:
|
|
case OP_KET:
|
|
case OP_KETRMAX:
|
|
case OP_KETRMIN:
|
|
case OP_KETRPOS:
|
|
case OP_REVERSE:
|
|
case OP_ASSERT:
|
|
case OP_ASSERT_NOT:
|
|
case OP_ASSERTBACK:
|
|
case OP_ASSERTBACK_NOT:
|
|
case OP_ONCE:
|
|
case OP_ONCE_NC:
|
|
case OP_BRA:
|
|
case OP_BRAPOS:
|
|
case OP_CBRA:
|
|
case OP_CBRAPOS:
|
|
case OP_COND:
|
|
case OP_SBRA:
|
|
case OP_SBRAPOS:
|
|
case OP_SCBRA:
|
|
case OP_SCBRAPOS:
|
|
case OP_SCOND:
|
|
case OP_CREF:
|
|
case OP_DNCREF:
|
|
case OP_RREF:
|
|
case OP_DNRREF:
|
|
case OP_DEF:
|
|
case OP_BRAZERO:
|
|
case OP_BRAMINZERO:
|
|
case OP_BRAPOSZERO:
|
|
case OP_PRUNE:
|
|
case OP_SKIP:
|
|
case OP_THEN:
|
|
case OP_COMMIT:
|
|
case OP_FAIL:
|
|
case OP_ACCEPT:
|
|
case OP_ASSERT_ACCEPT:
|
|
case OP_CLOSE:
|
|
case OP_SKIPZERO:
|
|
return cc + PRIV(OP_lengths)[*cc];
|
|
|
|
case OP_CHAR:
|
|
case OP_CHARI:
|
|
case OP_NOT:
|
|
case OP_NOTI:
|
|
case OP_STAR:
|
|
case OP_MINSTAR:
|
|
case OP_PLUS:
|
|
case OP_MINPLUS:
|
|
case OP_QUERY:
|
|
case OP_MINQUERY:
|
|
case OP_UPTO:
|
|
case OP_MINUPTO:
|
|
case OP_EXACT:
|
|
case OP_POSSTAR:
|
|
case OP_POSPLUS:
|
|
case OP_POSQUERY:
|
|
case OP_POSUPTO:
|
|
case OP_STARI:
|
|
case OP_MINSTARI:
|
|
case OP_PLUSI:
|
|
case OP_MINPLUSI:
|
|
case OP_QUERYI:
|
|
case OP_MINQUERYI:
|
|
case OP_UPTOI:
|
|
case OP_MINUPTOI:
|
|
case OP_EXACTI:
|
|
case OP_POSSTARI:
|
|
case OP_POSPLUSI:
|
|
case OP_POSQUERYI:
|
|
case OP_POSUPTOI:
|
|
case OP_NOTSTAR:
|
|
case OP_NOTMINSTAR:
|
|
case OP_NOTPLUS:
|
|
case OP_NOTMINPLUS:
|
|
case OP_NOTQUERY:
|
|
case OP_NOTMINQUERY:
|
|
case OP_NOTUPTO:
|
|
case OP_NOTMINUPTO:
|
|
case OP_NOTEXACT:
|
|
case OP_NOTPOSSTAR:
|
|
case OP_NOTPOSPLUS:
|
|
case OP_NOTPOSQUERY:
|
|
case OP_NOTPOSUPTO:
|
|
case OP_NOTSTARI:
|
|
case OP_NOTMINSTARI:
|
|
case OP_NOTPLUSI:
|
|
case OP_NOTMINPLUSI:
|
|
case OP_NOTQUERYI:
|
|
case OP_NOTMINQUERYI:
|
|
case OP_NOTUPTOI:
|
|
case OP_NOTMINUPTOI:
|
|
case OP_NOTEXACTI:
|
|
case OP_NOTPOSSTARI:
|
|
case OP_NOTPOSPLUSI:
|
|
case OP_NOTPOSQUERYI:
|
|
case OP_NOTPOSUPTOI:
|
|
cc += PRIV(OP_lengths)[*cc];
|
|
#ifdef SUPPORT_UTF
|
|
if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
|
|
#endif
|
|
return cc;
|
|
|
|
/* Special cases. */
|
|
case OP_TYPESTAR:
|
|
case OP_TYPEMINSTAR:
|
|
case OP_TYPEPLUS:
|
|
case OP_TYPEMINPLUS:
|
|
case OP_TYPEQUERY:
|
|
case OP_TYPEMINQUERY:
|
|
case OP_TYPEUPTO:
|
|
case OP_TYPEMINUPTO:
|
|
case OP_TYPEEXACT:
|
|
case OP_TYPEPOSSTAR:
|
|
case OP_TYPEPOSPLUS:
|
|
case OP_TYPEPOSQUERY:
|
|
case OP_TYPEPOSUPTO:
|
|
return cc + PRIV(OP_lengths)[*cc] - 1;
|
|
|
|
case OP_ANYBYTE:
|
|
#ifdef SUPPORT_UTF
|
|
if (common->utf) return NULL;
|
|
#endif
|
|
return cc + 1;
|
|
|
|
#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
|
|
case OP_XCLASS:
|
|
return cc + GET(cc, 1);
|
|
#endif
|
|
|
|
case OP_MARK:
|
|
case OP_PRUNE_ARG:
|
|
case OP_SKIP_ARG:
|
|
case OP_THEN_ARG:
|
|
return cc + 1 + 2 + cc[1];
|
|
|
|
default:
|
|
/* All opcodes are supported now! */
|
|
SLJIT_UNREACHABLE();
|
|
return NULL;
|
|
}
|
|
}
|
|
|
|
static BOOL check_opcode_types(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
|
|
{
|
|
int count;
|
|
pcre_uchar *slot;
|
|
pcre_uchar *assert_back_end = cc - 1;
|
|
|
|
/* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
|
|
while (cc < ccend)
|
|
{
|
|
switch(*cc)
|
|
{
|
|
case OP_SET_SOM:
|
|
common->has_set_som = TRUE;
|
|
common->might_be_empty = TRUE;
|
|
cc += 1;
|
|
break;
|
|
|
|
case OP_REF:
|
|
case OP_REFI:
|
|
common->optimized_cbracket[GET2(cc, 1)] = 0;
|
|
cc += 1 + IMM2_SIZE;
|
|
break;
|
|
|
|
case OP_CBRAPOS:
|
|
case OP_SCBRAPOS:
|
|
common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] = 0;
|
|
cc += 1 + LINK_SIZE + IMM2_SIZE;
|
|
break;
|
|
|
|
case OP_COND:
|
|
case OP_SCOND:
|
|
/* Only AUTO_CALLOUT can insert this opcode. We do
|
|
not intend to support this case. */
|
|
if (cc[1 + LINK_SIZE] == OP_CALLOUT)
|
|
return FALSE;
|
|
cc += 1 + LINK_SIZE;
|
|
break;
|
|
|
|
case OP_CREF:
|
|
common->optimized_cbracket[GET2(cc, 1)] = 0;
|
|
cc += 1 + IMM2_SIZE;
|
|
break;
|
|
|
|
case OP_DNREF:
|
|
case OP_DNREFI:
|
|
case OP_DNCREF:
|
|
count = GET2(cc, 1 + IMM2_SIZE);
|
|
slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
|
|
while (count-- > 0)
|
|
{
|
|
common->optimized_cbracket[GET2(slot, 0)] = 0;
|
|
slot += common->name_entry_size;
|
|
}
|
|
cc += 1 + 2 * IMM2_SIZE;
|
|
break;
|
|
|
|
case OP_RECURSE:
|
|
/* Set its value only once. */
|
|
if (common->recursive_head_ptr == 0)
|
|
{
|
|
common->recursive_head_ptr = common->ovector_start;
|
|
common->ovector_start += sizeof(sljit_sw);
|
|
}
|
|
cc += 1 + LINK_SIZE;
|
|
break;
|
|
|
|
case OP_CALLOUT:
|
|
if (common->capture_last_ptr == 0)
|
|
{
|
|
common->capture_last_ptr = common->ovector_start;
|
|
common->ovector_start += sizeof(sljit_sw);
|
|
}
|
|
cc += 2 + 2 * LINK_SIZE;
|
|
break;
|
|
|
|
case OP_ASSERTBACK:
|
|
slot = bracketend(cc);
|
|
if (slot > assert_back_end)
|
|
assert_back_end = slot;
|
|
cc += 1 + LINK_SIZE;
|
|
break;
|
|
|
|
case OP_THEN_ARG:
|
|
common->has_then = TRUE;
|
|
common->control_head_ptr = 1;
|
|
/* Fall through. */
|
|
|
|
case OP_PRUNE_ARG:
|
|
case OP_MARK:
|
|
if (common->mark_ptr == 0)
|
|
{
|
|
common->mark_ptr = common->ovector_start;
|
|
common->ovector_start += sizeof(sljit_sw);
|
|
}
|
|
cc += 1 + 2 + cc[1];
|
|
break;
|
|
|
|
case OP_THEN:
|
|
common->has_then = TRUE;
|
|
common->control_head_ptr = 1;
|
|
cc += 1;
|
|
break;
|
|
|
|
case OP_SKIP:
|
|
if (cc < assert_back_end)
|
|
common->has_skip_in_assert_back = TRUE;
|
|
cc += 1;
|
|
break;
|
|
|
|
case OP_SKIP_ARG:
|
|
common->control_head_ptr = 1;
|
|
common->has_skip_arg = TRUE;
|
|
if (cc < assert_back_end)
|
|
common->has_skip_in_assert_back = TRUE;
|
|
cc += 1 + 2 + cc[1];
|
|
break;
|
|
|
|
default:
|
|
cc = next_opcode(common, cc);
|
|
if (cc == NULL)
|
|
return FALSE;
|
|
break;
|
|
}
|
|
}
|
|
return TRUE;
|
|
}
|
|
|
|
static BOOL is_accelerated_repeat(pcre_uchar *cc)
|
|
{
|
|
switch(*cc)
|
|
{
|
|
case OP_TYPESTAR:
|
|
case OP_TYPEMINSTAR:
|
|
case OP_TYPEPLUS:
|
|
case OP_TYPEMINPLUS:
|
|
case OP_TYPEPOSSTAR:
|
|
case OP_TYPEPOSPLUS:
|
|
return (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI);
|
|
|
|
case OP_STAR:
|
|
case OP_MINSTAR:
|
|
case OP_PLUS:
|
|
case OP_MINPLUS:
|
|
case OP_POSSTAR:
|
|
case OP_POSPLUS:
|
|
|
|
case OP_STARI:
|
|
case OP_MINSTARI:
|
|
case OP_PLUSI:
|
|
case OP_MINPLUSI:
|
|
case OP_POSSTARI:
|
|
case OP_POSPLUSI:
|
|
|
|
case OP_NOTSTAR:
|
|
case OP_NOTMINSTAR:
|
|
case OP_NOTPLUS:
|
|
case OP_NOTMINPLUS:
|
|
case OP_NOTPOSSTAR:
|
|
case OP_NOTPOSPLUS:
|
|
|
|
case OP_NOTSTARI:
|
|
case OP_NOTMINSTARI:
|
|
case OP_NOTPLUSI:
|
|
case OP_NOTMINPLUSI:
|
|
case OP_NOTPOSSTARI:
|
|
case OP_NOTPOSPLUSI:
|
|
return TRUE;
|
|
|
|
case OP_CLASS:
|
|
case OP_NCLASS:
|
|
#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
|
|
case OP_XCLASS:
|
|
cc += (*cc == OP_XCLASS) ? GET(cc, 1) : (int)(1 + (32 / sizeof(pcre_uchar)));
|
|
#else
|
|
cc += (1 + (32 / sizeof(pcre_uchar)));
|
|
#endif
|
|
|
|
switch(*cc)
|
|
{
|
|
case OP_CRSTAR:
|
|
case OP_CRMINSTAR:
|
|
case OP_CRPLUS:
|
|
case OP_CRMINPLUS:
|
|
case OP_CRPOSSTAR:
|
|
case OP_CRPOSPLUS:
|
|
return TRUE;
|
|
}
|
|
break;
|
|
}
|
|
return FALSE;
|
|
}
|
|
|
|
static SLJIT_INLINE BOOL detect_fast_forward_skip(compiler_common *common, int *private_data_start)
|
|
{
|
|
pcre_uchar *cc = common->start;
|
|
pcre_uchar *end;
|
|
|
|
/* Skip not repeated brackets. */
|
|
while (TRUE)
|
|
{
|
|
switch(*cc)
|
|
{
|
|
case OP_SOD:
|
|
case OP_SOM:
|
|
case OP_SET_SOM:
|
|
case OP_NOT_WORD_BOUNDARY:
|
|
case OP_WORD_BOUNDARY:
|
|
case OP_EODN:
|
|
case OP_EOD:
|
|
case OP_CIRC:
|
|
case OP_CIRCM:
|
|
case OP_DOLL:
|
|
case OP_DOLLM:
|
|
/* Zero width assertions. */
|
|
cc++;
|
|
continue;
|
|
}
|
|
|
|
if (*cc != OP_BRA && *cc != OP_CBRA)
|
|
break;
|
|
|
|
end = cc + GET(cc, 1);
|
|
if (*end != OP_KET || PRIVATE_DATA(end) != 0)
|
|
return FALSE;
|
|
if (*cc == OP_CBRA)
|
|
{
|
|
if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
|
|
return FALSE;
|
|
cc += IMM2_SIZE;
|
|
}
|
|
cc += 1 + LINK_SIZE;
|
|
}
|
|
|
|
if (is_accelerated_repeat(cc))
|
|
{
|
|
common->fast_forward_bc_ptr = cc;
|
|
common->private_data_ptrs[(cc + 1) - common->start] = *private_data_start;
|
|
*private_data_start += sizeof(sljit_sw);
|
|
return TRUE;
|
|
}
|
|
return FALSE;
|
|
}
|
|
|
|
static SLJIT_INLINE void detect_fast_fail(compiler_common *common, pcre_uchar *cc, int *private_data_start, sljit_s32 depth)
|
|
{
|
|
pcre_uchar *next_alt;
|
|
|
|
SLJIT_ASSERT(*cc == OP_BRA || *cc == OP_CBRA);
|
|
|
|
if (*cc == OP_CBRA && common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
|
|
return;
|
|
|
|
next_alt = bracketend(cc) - (1 + LINK_SIZE);
|
|
if (*next_alt != OP_KET || PRIVATE_DATA(next_alt) != 0)
|
|
return;
|
|
|
|
do
|
|
{
|
|
next_alt = cc + GET(cc, 1);
|
|
|
|
cc += 1 + LINK_SIZE + ((*cc == OP_CBRA) ? IMM2_SIZE : 0);
|
|
|
|
while (TRUE)
|
|
{
|
|
switch(*cc)
|
|
{
|
|
case OP_SOD:
|
|
case OP_SOM:
|
|
case OP_SET_SOM:
|
|
case OP_NOT_WORD_BOUNDARY:
|
|
case OP_WORD_BOUNDARY:
|
|
case OP_EODN:
|
|
case OP_EOD:
|
|
case OP_CIRC:
|
|
case OP_CIRCM:
|
|
case OP_DOLL:
|
|
case OP_DOLLM:
|
|
/* Zero width assertions. */
|
|
cc++;
|
|
continue;
|
|
}
|
|
break;
|
|
}
|
|
|
|
if (depth > 0 && (*cc == OP_BRA || *cc == OP_CBRA))
|
|
detect_fast_fail(common, cc, private_data_start, depth - 1);
|
|
|
|
if (is_accelerated_repeat(cc))
|
|
{
|
|
common->private_data_ptrs[(cc + 1) - common->start] = *private_data_start;
|
|
|
|
if (common->fast_fail_start_ptr == 0)
|
|
common->fast_fail_start_ptr = *private_data_start;
|
|
|
|
*private_data_start += sizeof(sljit_sw);
|
|
common->fast_fail_end_ptr = *private_data_start;
|
|
|
|
if (*private_data_start > SLJIT_MAX_LOCAL_SIZE)
|
|
return;
|
|
}
|
|
|
|
cc = next_alt;
|
|
}
|
|
while (*cc == OP_ALT);
|
|
}
|
|
|
|
static int get_class_iterator_size(pcre_uchar *cc)
|
|
{
|
|
sljit_u32 min;
|
|
sljit_u32 max;
|
|
switch(*cc)
|
|
{
|
|
case OP_CRSTAR:
|
|
case OP_CRPLUS:
|
|
return 2;
|
|
|
|
case OP_CRMINSTAR:
|
|
case OP_CRMINPLUS:
|
|
case OP_CRQUERY:
|
|
case OP_CRMINQUERY:
|
|
return 1;
|
|
|
|
case OP_CRRANGE:
|
|
case OP_CRMINRANGE:
|
|
min = GET2(cc, 1);
|
|
max = GET2(cc, 1 + IMM2_SIZE);
|
|
if (max == 0)
|
|
return (*cc == OP_CRRANGE) ? 2 : 1;
|
|
max -= min;
|
|
if (max > 2)
|
|
max = 2;
|
|
return max;
|
|
|
|
default:
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
static BOOL detect_repeat(compiler_common *common, pcre_uchar *begin)
|
|
{
|
|
pcre_uchar *end = bracketend(begin);
|
|
pcre_uchar *next;
|
|
pcre_uchar *next_end;
|
|
pcre_uchar *max_end;
|
|
pcre_uchar type;
|
|
sljit_sw length = end - begin;
|
|
int min, max, i;
|
|
|
|
/* Detect fixed iterations first. */
|
|
if (end[-(1 + LINK_SIZE)] != OP_KET)
|
|
return FALSE;
|
|
|
|
/* Already detected repeat. */
|
|
if (common->private_data_ptrs[end - common->start - LINK_SIZE] != 0)
|
|
return TRUE;
|
|
|
|
next = end;
|
|
min = 1;
|
|
while (1)
|
|
{
|
|
if (*next != *begin)
|
|
break;
|
|
next_end = bracketend(next);
|
|
if (next_end - next != length || memcmp(begin, next, IN_UCHARS(length)) != 0)
|
|
break;
|
|
next = next_end;
|
|
min++;
|
|
}
|
|
|
|
if (min == 2)
|
|
return FALSE;
|
|
|
|
max = 0;
|
|
max_end = next;
|
|
if (*next == OP_BRAZERO || *next == OP_BRAMINZERO)
|
|
{
|
|
type = *next;
|
|
while (1)
|
|
{
|
|
if (next[0] != type || next[1] != OP_BRA || next[2 + LINK_SIZE] != *begin)
|
|
break;
|
|
next_end = bracketend(next + 2 + LINK_SIZE);
|
|
if (next_end - next != (length + 2 + LINK_SIZE) || memcmp(begin, next + 2 + LINK_SIZE, IN_UCHARS(length)) != 0)
|
|
break;
|
|
next = next_end;
|
|
max++;
|
|
}
|
|
|
|
if (next[0] == type && next[1] == *begin && max >= 1)
|
|
{
|
|
next_end = bracketend(next + 1);
|
|
if (next_end - next == (length + 1) && memcmp(begin, next + 1, IN_UCHARS(length)) == 0)
|
|
{
|
|
for (i = 0; i < max; i++, next_end += 1 + LINK_SIZE)
|
|
if (*next_end != OP_KET)
|
|
break;
|
|
|
|
if (i == max)
|
|
{
|
|
common->private_data_ptrs[max_end - common->start - LINK_SIZE] = next_end - max_end;
|
|
common->private_data_ptrs[max_end - common->start - LINK_SIZE + 1] = (type == OP_BRAZERO) ? OP_UPTO : OP_MINUPTO;
|
|
/* +2 the original and the last. */
|
|
common->private_data_ptrs[max_end - common->start - LINK_SIZE + 2] = max + 2;
|
|
if (min == 1)
|
|
return TRUE;
|
|
min--;
|
|
max_end -= (1 + LINK_SIZE) + GET(max_end, -LINK_SIZE);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if (min >= 3)
|
|
{
|
|
common->private_data_ptrs[end - common->start - LINK_SIZE] = max_end - end;
|
|
common->private_data_ptrs[end - common->start - LINK_SIZE + 1] = OP_EXACT;
|
|
common->private_data_ptrs[end - common->start - LINK_SIZE + 2] = min;
|
|
return TRUE;
|
|
}
|
|
|
|
return FALSE;
|
|
}
|
|
|
|
#define CASE_ITERATOR_PRIVATE_DATA_1 \
|
|
case OP_MINSTAR: \
|
|
case OP_MINPLUS: \
|
|
case OP_QUERY: \
|
|
case OP_MINQUERY: \
|
|
case OP_MINSTARI: \
|
|
case OP_MINPLUSI: \
|
|
case OP_QUERYI: \
|
|
case OP_MINQUERYI: \
|
|
case OP_NOTMINSTAR: \
|
|
case OP_NOTMINPLUS: \
|
|
case OP_NOTQUERY: \
|
|
case OP_NOTMINQUERY: \
|
|
case OP_NOTMINSTARI: \
|
|
case OP_NOTMINPLUSI: \
|
|
case OP_NOTQUERYI: \
|
|
case OP_NOTMINQUERYI:
|
|
|
|
#define CASE_ITERATOR_PRIVATE_DATA_2A \
|
|
case OP_STAR: \
|
|
case OP_PLUS: \
|
|
case OP_STARI: \
|
|
case OP_PLUSI: \
|
|
case OP_NOTSTAR: \
|
|
case OP_NOTPLUS: \
|
|
case OP_NOTSTARI: \
|
|
case OP_NOTPLUSI:
|
|
|
|
#define CASE_ITERATOR_PRIVATE_DATA_2B \
|
|
case OP_UPTO: \
|
|
case OP_MINUPTO: \
|
|
case OP_UPTOI: \
|
|
case OP_MINUPTOI: \
|
|
case OP_NOTUPTO: \
|
|
case OP_NOTMINUPTO: \
|
|
case OP_NOTUPTOI: \
|
|
case OP_NOTMINUPTOI:
|
|
|
|
#define CASE_ITERATOR_TYPE_PRIVATE_DATA_1 \
|
|
case OP_TYPEMINSTAR: \
|
|
case OP_TYPEMINPLUS: \
|
|
case OP_TYPEQUERY: \
|
|
case OP_TYPEMINQUERY:
|
|
|
|
#define CASE_ITERATOR_TYPE_PRIVATE_DATA_2A \
|
|
case OP_TYPESTAR: \
|
|
case OP_TYPEPLUS:
|
|
|
|
#define CASE_ITERATOR_TYPE_PRIVATE_DATA_2B \
|
|
case OP_TYPEUPTO: \
|
|
case OP_TYPEMINUPTO:
|
|
|
|
static void set_private_data_ptrs(compiler_common *common, int *private_data_start, pcre_uchar *ccend)
|
|
{
|
|
pcre_uchar *cc = common->start;
|
|
pcre_uchar *alternative;
|
|
pcre_uchar *end = NULL;
|
|
int private_data_ptr = *private_data_start;
|
|
int space, size, bracketlen;
|
|
BOOL repeat_check = TRUE;
|
|
|
|
while (cc < ccend)
|
|
{
|
|
space = 0;
|
|
size = 0;
|
|
bracketlen = 0;
|
|
if (private_data_ptr > SLJIT_MAX_LOCAL_SIZE)
|
|
break;
|
|
|
|
if (repeat_check && (*cc == OP_ONCE || *cc == OP_ONCE_NC || *cc == OP_BRA || *cc == OP_CBRA || *cc == OP_COND))
|
|
{
|
|
if (detect_repeat(common, cc))
|
|
{
|
|
/* These brackets are converted to repeats, so no global
|
|
based single character repeat is allowed. */
|
|
if (cc >= end)
|
|
end = bracketend(cc);
|
|
}
|
|
}
|
|
repeat_check = TRUE;
|
|
|
|
switch(*cc)
|
|
{
|
|
case OP_KET:
|
|
if (common->private_data_ptrs[cc + 1 - common->start] != 0)
|
|
{
|
|
common->private_data_ptrs[cc - common->start] = private_data_ptr;
|
|
private_data_ptr += sizeof(sljit_sw);
|
|
cc += common->private_data_ptrs[cc + 1 - common->start];
|
|
}
|
|
cc += 1 + LINK_SIZE;
|
|
break;
|
|
|
|
case OP_ASSERT:
|
|
case OP_ASSERT_NOT:
|
|
case OP_ASSERTBACK:
|
|
case OP_ASSERTBACK_NOT:
|
|
case OP_ONCE:
|
|
case OP_ONCE_NC:
|
|
case OP_BRAPOS:
|
|
case OP_SBRA:
|
|
case OP_SBRAPOS:
|
|
case OP_SCOND:
|
|
common->private_data_ptrs[cc - common->start] = private_data_ptr;
|
|
private_data_ptr += sizeof(sljit_sw);
|
|
bracketlen = 1 + LINK_SIZE;
|
|
break;
|
|
|
|
case OP_CBRAPOS:
|
|
case OP_SCBRAPOS:
|
|
common->private_data_ptrs[cc - common->start] = private_data_ptr;
|
|
private_data_ptr += sizeof(sljit_sw);
|
|
bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
|
|
break;
|
|
|
|
case OP_COND:
|
|
/* Might be a hidden SCOND. */
|
|
alternative = cc + GET(cc, 1);
|
|
if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
|
|
{
|
|
common->private_data_ptrs[cc - common->start] = private_data_ptr;
|
|
private_data_ptr += sizeof(sljit_sw);
|
|
}
|
|
bracketlen = 1 + LINK_SIZE;
|
|
break;
|
|
|
|
case OP_BRA:
|
|
bracketlen = 1 + LINK_SIZE;
|
|
break;
|
|
|
|
case OP_CBRA:
|
|
case OP_SCBRA:
|
|
bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
|
|
break;
|
|
|
|
case OP_BRAZERO:
|
|
case OP_BRAMINZERO:
|
|
case OP_BRAPOSZERO:
|
|
repeat_check = FALSE;
|
|
size = 1;
|
|
break;
|
|
|
|
CASE_ITERATOR_PRIVATE_DATA_1
|
|
space = 1;
|
|
size = -2;
|
|
break;
|
|
|
|
CASE_ITERATOR_PRIVATE_DATA_2A
|
|
space = 2;
|
|
size = -2;
|
|
break;
|
|
|
|
CASE_ITERATOR_PRIVATE_DATA_2B
|
|
space = 2;
|
|
size = -(2 + IMM2_SIZE);
|
|
break;
|
|
|
|
CASE_ITERATOR_TYPE_PRIVATE_DATA_1
|
|
space = 1;
|
|
size = 1;
|
|
break;
|
|
|
|
CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
|
|
if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
|
|
space = 2;
|
|
size = 1;
|
|
break;
|
|
|
|
case OP_TYPEUPTO:
|
|
if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
|
|
space = 2;
|
|
size = 1 + IMM2_SIZE;
|
|
break;
|
|
|
|
case OP_TYPEMINUPTO:
|
|
space = 2;
|
|
size = 1 + IMM2_SIZE;
|
|
break;
|
|
|
|
case OP_CLASS:
|
|
case OP_NCLASS:
|
|
space = get_class_iterator_size(cc + size);
|
|
size = 1 + 32 / sizeof(pcre_uchar);
|
|
break;
|
|
|
|
#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
|
|
case OP_XCLASS:
|
|
space = get_class_iterator_size(cc + size);
|
|
size = GET(cc, 1);
|
|
break;
|
|
#endif
|
|
|
|
default:
|
|
cc = next_opcode(common, cc);
|
|
SLJIT_ASSERT(cc != NULL);
|
|
break;
|
|
}
|
|
|
|
/* Character iterators, which are not inside a repeated bracket,
|
|
gets a private slot instead of allocating it on the stack. */
|
|
if (space > 0 && cc >= end)
|
|
{
|
|
common->private_data_ptrs[cc - common->start] = private_data_ptr;
|
|
private_data_ptr += sizeof(sljit_sw) * space;
|
|
}
|
|
|
|
if (size != 0)
|
|
{
|
|
if (size < 0)
|
|
{
|
|
cc += -size;
|
|
#ifdef SUPPORT_UTF
|
|
if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
|
|
#endif
|
|
}
|
|
else
|
|
cc += size;
|
|
}
|
|
|
|
if (bracketlen > 0)
|
|
{
|
|
if (cc >= end)
|
|
{
|
|
end = bracketend(cc);
|
|
if (end[-1 - LINK_SIZE] == OP_KET)
|
|
end = NULL;
|
|
}
|
|
cc += bracketlen;
|
|
}
|
|
}
|
|
*private_data_start = private_data_ptr;
|
|
}
|
|
|
|
/* Returns with a frame_types (always < 0) if no need for frame. */
|
|
static int get_framesize(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL recursive, BOOL *needs_control_head)
|
|
{
|
|
int length = 0;
|
|
int possessive = 0;
|
|
BOOL stack_restore = FALSE;
|
|
BOOL setsom_found = recursive;
|
|
BOOL setmark_found = recursive;
|
|
/* The last capture is a local variable even for recursions. */
|
|
BOOL capture_last_found = FALSE;
|
|
|
|
#if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
|
|
SLJIT_ASSERT(common->control_head_ptr != 0);
|
|
*needs_control_head = TRUE;
|
|
#else
|
|
*needs_control_head = FALSE;
|
|
#endif
|
|
|
|
if (ccend == NULL)
|
|
{
|
|
ccend = bracketend(cc) - (1 + LINK_SIZE);
|
|
if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))
|
|
{
|
|
possessive = length = (common->capture_last_ptr != 0) ? 5 : 3;
|
|
/* This is correct regardless of common->capture_last_ptr. */
|
|
capture_last_found = TRUE;
|
|
}
|
|
cc = next_opcode(common, cc);
|
|
}
|
|
|
|
SLJIT_ASSERT(cc != NULL);
|
|
while (cc < ccend)
|
|
switch(*cc)
|
|
{
|
|
case OP_SET_SOM:
|
|
SLJIT_ASSERT(common->has_set_som);
|
|
stack_restore = TRUE;
|
|
if (!setsom_found)
|
|
{
|
|
length += 2;
|
|
setsom_found = TRUE;
|
|
}
|
|
cc += 1;
|
|
break;
|
|
|
|
case OP_MARK:
|
|
case OP_PRUNE_ARG:
|
|
case OP_THEN_ARG:
|
|
SLJIT_ASSERT(common->mark_ptr != 0);
|
|
stack_restore = TRUE;
|
|
if (!setmark_found)
|
|
{
|
|
length += 2;
|
|
setmark_found = TRUE;
|
|
}
|
|
if (common->control_head_ptr != 0)
|
|
*needs_control_head = TRUE;
|
|
cc += 1 + 2 + cc[1];
|
|
break;
|
|
|
|
case OP_RECURSE:
|
|
stack_restore = TRUE;
|
|
if (common->has_set_som && !setsom_found)
|
|
{
|
|
length += 2;
|
|
setsom_found = TRUE;
|
|
}
|
|
if (common->mark_ptr != 0 && !setmark_found)
|
|
{
|
|
length += 2;
|
|
setmark_found = TRUE;
|
|
}
|
|
if (common->capture_last_ptr != 0 && !capture_last_found)
|
|
{
|
|
length += 2;
|
|
capture_last_found = TRUE;
|
|
}
|
|
cc += 1 + LINK_SIZE;
|
|
break;
|
|
|
|
case OP_CBRA:
|
|
case OP_CBRAPOS:
|
|
case OP_SCBRA:
|
|
case OP_SCBRAPOS:
|
|
stack_restore = TRUE;
|
|
if (common->capture_last_ptr != 0 && !capture_last_found)
|
|
{
|
|
length += 2;
|
|
capture_last_found = TRUE;
|
|
}
|
|
length += 3;
|
|
cc += 1 + LINK_SIZE + IMM2_SIZE;
|
|
break;
|
|
|
|
case OP_THEN:
|
|
stack_restore = TRUE;
|
|
if (common->control_head_ptr != 0)
|
|
*needs_control_head = TRUE;
|
|
cc ++;
|
|
break;
|
|
|
|
default:
|
|
stack_restore = TRUE;
|
|
/* Fall through. */
|
|
|
|
case OP_NOT_WORD_BOUNDARY:
|
|
case OP_WORD_BOUNDARY:
|
|
case OP_NOT_DIGIT:
|
|
case OP_DIGIT:
|
|
case OP_NOT_WHITESPACE:
|
|
case OP_WHITESPACE:
|
|
case OP_NOT_WORDCHAR:
|
|
case OP_WORDCHAR:
|
|
case OP_ANY:
|
|
case OP_ALLANY:
|
|
case OP_ANYBYTE:
|
|
case OP_NOTPROP:
|
|
case OP_PROP:
|
|
case OP_ANYNL:
|
|
case OP_NOT_HSPACE:
|
|
case OP_HSPACE:
|
|
case OP_NOT_VSPACE:
|
|
case OP_VSPACE:
|
|
case OP_EXTUNI:
|
|
case OP_EODN:
|
|
case OP_EOD:
|
|
case OP_CIRC:
|
|
case OP_CIRCM:
|
|
case OP_DOLL:
|
|
case OP_DOLLM:
|
|
case OP_CHAR:
|
|
case OP_CHARI:
|
|
case OP_NOT:
|
|
case OP_NOTI:
|
|
|
|
case OP_EXACT:
|
|
case OP_POSSTAR:
|
|
case OP_POSPLUS:
|
|
case OP_POSQUERY:
|
|
case OP_POSUPTO:
|
|
|
|
case OP_EXACTI:
|
|
case OP_POSSTARI:
|
|
case OP_POSPLUSI:
|
|
case OP_POSQUERYI:
|
|
case OP_POSUPTOI:
|
|
|
|
case OP_NOTEXACT:
|
|
case OP_NOTPOSSTAR:
|
|
case OP_NOTPOSPLUS:
|
|
case OP_NOTPOSQUERY:
|
|
case OP_NOTPOSUPTO:
|
|
|
|
case OP_NOTEXACTI:
|
|
case OP_NOTPOSSTARI:
|
|
case OP_NOTPOSPLUSI:
|
|
case OP_NOTPOSQUERYI:
|
|
case OP_NOTPOSUPTOI:
|
|
|
|
case OP_TYPEEXACT:
|
|
case OP_TYPEPOSSTAR:
|
|
case OP_TYPEPOSPLUS:
|
|
case OP_TYPEPOSQUERY:
|
|
case OP_TYPEPOSUPTO:
|
|
|
|
case OP_CLASS:
|
|
case OP_NCLASS:
|
|
case OP_XCLASS:
|
|
case OP_CALLOUT:
|
|
|
|
cc = next_opcode(common, cc);
|
|
SLJIT_ASSERT(cc != NULL);
|
|
break;
|
|
}
|
|
|
|
/* Possessive quantifiers can use a special case. */
|
|
if (SLJIT_UNLIKELY(possessive == length))
|
|
return stack_restore ? no_frame : no_stack;
|
|
|
|
if (length > 0)
|
|
return length + 1;
|
|
return stack_restore ? no_frame : no_stack;
|
|
}
|
|
|
|
static void init_frame(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, int stackpos, int stacktop, BOOL recursive)
|
|
{
|
|
DEFINE_COMPILER;
|
|
BOOL setsom_found = recursive;
|
|
BOOL setmark_found = recursive;
|
|
/* The last capture is a local variable even for recursions. */
|
|
BOOL capture_last_found = FALSE;
|
|
int offset;
|
|
|
|
/* >= 1 + shortest item size (2) */
|
|
SLJIT_UNUSED_ARG(stacktop);
|
|
SLJIT_ASSERT(stackpos >= stacktop + 2);
|
|
|
|
stackpos = STACK(stackpos);
|
|
if (ccend == NULL)
|
|
{
|
|
ccend = bracketend(cc) - (1 + LINK_SIZE);
|
|
if (recursive || (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS))
|
|
cc = next_opcode(common, cc);
|
|
}
|
|
|
|
SLJIT_ASSERT(cc != NULL);
|
|
while (cc < ccend)
|
|
switch(*cc)
|
|
{
|
|
case OP_SET_SOM:
|
|
SLJIT_ASSERT(common->has_set_som);
|
|
if (!setsom_found)
|
|
{
|
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
|
|
stackpos -= (int)sizeof(sljit_sw);
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
|
|
stackpos -= (int)sizeof(sljit_sw);
|
|
setsom_found = TRUE;
|
|
}
|
|
cc += 1;
|
|
break;
|
|
|
|
case OP_MARK:
|
|
case OP_PRUNE_ARG:
|
|
case OP_THEN_ARG:
|
|
SLJIT_ASSERT(common->mark_ptr != 0);
|
|
if (!setmark_found)
|
|
{
|
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
|
|
stackpos -= (int)sizeof(sljit_sw);
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
|
|
stackpos -= (int)sizeof(sljit_sw);
|
|
setmark_found = TRUE;
|
|
}
|
|
cc += 1 + 2 + cc[1];
|
|
break;
|
|
|
|
case OP_RECURSE:
|
|
if (common->has_set_som && !setsom_found)
|
|
{
|
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
|
|
stackpos -= (int)sizeof(sljit_sw);
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
|
|
stackpos -= (int)sizeof(sljit_sw);
|
|
setsom_found = TRUE;
|
|
}
|
|
if (common->mark_ptr != 0 && !setmark_found)
|
|
{
|
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
|
|
stackpos -= (int)sizeof(sljit_sw);
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
|
|
stackpos -= (int)sizeof(sljit_sw);
|
|
setmark_found = TRUE;
|
|
}
|
|
if (common->capture_last_ptr != 0 && !capture_last_found)
|
|
{
|
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
|
|
stackpos -= (int)sizeof(sljit_sw);
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
|
|
stackpos -= (int)sizeof(sljit_sw);
|
|
capture_last_found = TRUE;
|
|
}
|
|
cc += 1 + LINK_SIZE;
|
|
break;
|
|
|
|
case OP_CBRA:
|
|
case OP_CBRAPOS:
|
|
case OP_SCBRA:
|
|
case OP_SCBRAPOS:
|
|
if (common->capture_last_ptr != 0 && !capture_last_found)
|
|
{
|
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
|
|
stackpos -= (int)sizeof(sljit_sw);
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
|
|
stackpos -= (int)sizeof(sljit_sw);
|
|
capture_last_found = TRUE;
|
|
}
|
|
offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));
|
|
stackpos -= (int)sizeof(sljit_sw);
|
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
|
|
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
|
|
stackpos -= (int)sizeof(sljit_sw);
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);
|
|
stackpos -= (int)sizeof(sljit_sw);
|
|
|
|
cc += 1 + LINK_SIZE + IMM2_SIZE;
|
|
break;
|
|
|
|
default:
|
|
cc = next_opcode(common, cc);
|
|
SLJIT_ASSERT(cc != NULL);
|
|
break;
|
|
}
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, 0);
|
|
SLJIT_ASSERT(stackpos == STACK(stacktop));
|
|
}
|
|
|
|
static SLJIT_INLINE int get_private_data_copy_length(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL needs_control_head)
|
|
{
|
|
int private_data_length = needs_control_head ? 3 : 2;
|
|
int size;
|
|
pcre_uchar *alternative;
|
|
/* Calculate the sum of the private machine words. */
|
|
while (cc < ccend)
|
|
{
|
|
size = 0;
|
|
switch(*cc)
|
|
{
|
|
case OP_KET:
|
|
if (PRIVATE_DATA(cc) != 0)
|
|
{
|
|
private_data_length++;
|
|
SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0);
|
|
cc += PRIVATE_DATA(cc + 1);
|
|
}
|
|
cc += 1 + LINK_SIZE;
|
|
break;
|
|
|
|
case OP_ASSERT:
|
|
case OP_ASSERT_NOT:
|
|
case OP_ASSERTBACK:
|
|
case OP_ASSERTBACK_NOT:
|
|
case OP_ONCE:
|
|
case OP_ONCE_NC:
|
|
case OP_BRAPOS:
|
|
case OP_SBRA:
|
|
case OP_SBRAPOS:
|
|
case OP_SCOND:
|
|
private_data_length++;
|
|
SLJIT_ASSERT(PRIVATE_DATA(cc) != 0);
|
|
cc += 1 + LINK_SIZE;
|
|
break;
|
|
|
|
case OP_CBRA:
|
|
case OP_SCBRA:
|
|
if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
|
|
private_data_length++;
|
|
cc += 1 + LINK_SIZE + IMM2_SIZE;
|
|
break;
|
|
|
|
case OP_CBRAPOS:
|
|
case OP_SCBRAPOS:
|
|
private_data_length += 2;
|
|
cc += 1 + LINK_SIZE + IMM2_SIZE;
|
|
break;
|
|
|
|
case OP_COND:
|
|
/* Might be a hidden SCOND. */
|
|
alternative = cc + GET(cc, 1);
|
|
if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
|
|
private_data_length++;
|
|
cc += 1 + LINK_SIZE;
|
|
break;
|
|
|
|
CASE_ITERATOR_PRIVATE_DATA_1
|
|
if (PRIVATE_DATA(cc))
|
|
private_data_length++;
|
|
cc += 2;
|
|
#ifdef SUPPORT_UTF
|
|
if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
|
|
#endif
|
|
break;
|
|
|
|
CASE_ITERATOR_PRIVATE_DATA_2A
|
|
if (PRIVATE_DATA(cc))
|
|
private_data_length += 2;
|
|
cc += 2;
|
|
#ifdef SUPPORT_UTF
|
|
if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
|
|
#endif
|
|
break;
|
|
|
|
CASE_ITERATOR_PRIVATE_DATA_2B
|
|
if (PRIVATE_DATA(cc))
|
|
private_data_length += 2;
|
|
cc += 2 + IMM2_SIZE;
|
|
#ifdef SUPPORT_UTF
|
|
if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
|
|
#endif
|
|
break;
|
|
|
|
CASE_ITERATOR_TYPE_PRIVATE_DATA_1
|
|
if (PRIVATE_DATA(cc))
|
|
private_data_length++;
|
|
cc += 1;
|
|
break;
|
|
|
|
CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
|
|
if (PRIVATE_DATA(cc))
|
|
private_data_length += 2;
|
|
cc += 1;
|
|
break;
|
|
|
|
CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
|
|
if (PRIVATE_DATA(cc))
|
|
private_data_length += 2;
|
|
cc += 1 + IMM2_SIZE;
|
|
break;
|
|
|
|
case OP_CLASS:
|
|
case OP_NCLASS:
|
|
#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
|
|
case OP_XCLASS:
|
|
size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
|
|
#else
|
|
size = 1 + 32 / (int)sizeof(pcre_uchar);
|
|
#endif
|
|
if (PRIVATE_DATA(cc))
|
|
private_data_length += get_class_iterator_size(cc + size);
|
|
cc += size;
|
|
break;
|
|
|
|
default:
|
|
cc = next_opcode(common, cc);
|
|
SLJIT_ASSERT(cc != NULL);
|
|
break;
|
|
}
|
|
}
|
|
SLJIT_ASSERT(cc == ccend);
|
|
return private_data_length;
|
|
}
|
|
|
|
static void copy_private_data(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend,
|
|
BOOL save, int stackptr, int stacktop, BOOL needs_control_head)
|
|
{
|
|
DEFINE_COMPILER;
|
|
int srcw[2];
|
|
int count, size;
|
|
BOOL tmp1next = TRUE;
|
|
BOOL tmp1empty = TRUE;
|
|
BOOL tmp2empty = TRUE;
|
|
pcre_uchar *alternative;
|
|
enum {
|
|
loop,
|
|
end
|
|
} status;
|
|
|
|
status = loop;
|
|
stackptr = STACK(stackptr);
|
|
stacktop = STACK(stacktop - 1);
|
|
|
|
if (!save)
|
|
{
|
|
stacktop -= (needs_control_head ? 2 : 1) * sizeof(sljit_sw);
|
|
if (stackptr < stacktop)
|
|
{
|
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
|
|
stackptr += sizeof(sljit_sw);
|
|
tmp1empty = FALSE;
|
|
}
|
|
if (stackptr < stacktop)
|
|
{
|
|
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
|
|
stackptr += sizeof(sljit_sw);
|
|
tmp2empty = FALSE;
|
|
}
|
|
/* The tmp1next must be TRUE in either way. */
|
|
}
|
|
|
|
SLJIT_ASSERT(common->recursive_head_ptr != 0);
|
|
|
|
do
|
|
{
|
|
count = 0;
|
|
if (cc >= ccend)
|
|
{
|
|
if (!save)
|
|
break;
|
|
|
|
count = 1;
|
|
srcw[0] = common->recursive_head_ptr;
|
|
if (needs_control_head)
|
|
{
|
|
SLJIT_ASSERT(common->control_head_ptr != 0);
|
|
count = 2;
|
|
srcw[0] = common->control_head_ptr;
|
|
srcw[1] = common->recursive_head_ptr;
|
|
}
|
|
status = end;
|
|
}
|
|
else switch(*cc)
|
|
{
|
|
case OP_KET:
|
|
if (PRIVATE_DATA(cc) != 0)
|
|
{
|
|
count = 1;
|
|
srcw[0] = PRIVATE_DATA(cc);
|
|
SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0);
|
|
cc += PRIVATE_DATA(cc + 1);
|
|
}
|
|
cc += 1 + LINK_SIZE;
|
|
break;
|
|
|
|
case OP_ASSERT:
|
|
case OP_ASSERT_NOT:
|
|
case OP_ASSERTBACK:
|
|
case OP_ASSERTBACK_NOT:
|
|
case OP_ONCE:
|
|
case OP_ONCE_NC:
|
|
case OP_BRAPOS:
|
|
case OP_SBRA:
|
|
case OP_SBRAPOS:
|
|
case OP_SCOND:
|
|
count = 1;
|
|
srcw[0] = PRIVATE_DATA(cc);
|
|
SLJIT_ASSERT(srcw[0] != 0);
|
|
cc += 1 + LINK_SIZE;
|
|
break;
|
|
|
|
case OP_CBRA:
|
|
case OP_SCBRA:
|
|
if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
|
|
{
|
|
count = 1;
|
|
srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
|
|
}
|
|
cc += 1 + LINK_SIZE + IMM2_SIZE;
|
|
break;
|
|
|
|
case OP_CBRAPOS:
|
|
case OP_SCBRAPOS:
|
|
count = 2;
|
|
srcw[0] = PRIVATE_DATA(cc);
|
|
srcw[1] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
|
|
SLJIT_ASSERT(srcw[0] != 0 && srcw[1] != 0);
|
|
cc += 1 + LINK_SIZE + IMM2_SIZE;
|
|
break;
|
|
|
|
case OP_COND:
|
|
/* Might be a hidden SCOND. */
|
|
alternative = cc + GET(cc, 1);
|
|
if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
|
|
{
|
|
count = 1;
|
|
srcw[0] = PRIVATE_DATA(cc);
|
|
SLJIT_ASSERT(srcw[0] != 0);
|
|
}
|
|
cc += 1 + LINK_SIZE;
|
|
break;
|
|
|
|
CASE_ITERATOR_PRIVATE_DATA_1
|
|
if (PRIVATE_DATA(cc))
|
|
{
|
|
count = 1;
|
|
srcw[0] = PRIVATE_DATA(cc);
|
|
}
|
|
cc += 2;
|
|
#ifdef SUPPORT_UTF
|
|
if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
|
|
#endif
|
|
break;
|
|
|
|
CASE_ITERATOR_PRIVATE_DATA_2A
|
|
if (PRIVATE_DATA(cc))
|
|
{
|
|
count = 2;
|
|
srcw[0] = PRIVATE_DATA(cc);
|
|
srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
|
|
}
|
|
cc += 2;
|
|
#ifdef SUPPORT_UTF
|
|
if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
|
|
#endif
|
|
break;
|
|
|
|
CASE_ITERATOR_PRIVATE_DATA_2B
|
|
if (PRIVATE_DATA(cc))
|
|
{
|
|
count = 2;
|
|
srcw[0] = PRIVATE_DATA(cc);
|
|
srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
|
|
}
|
|
cc += 2 + IMM2_SIZE;
|
|
#ifdef SUPPORT_UTF
|
|
if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
|
|
#endif
|
|
break;
|
|
|
|
CASE_ITERATOR_TYPE_PRIVATE_DATA_1
|
|
if (PRIVATE_DATA(cc))
|
|
{
|
|
count = 1;
|
|
srcw[0] = PRIVATE_DATA(cc);
|
|
}
|
|
cc += 1;
|
|
break;
|
|
|
|
CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
|
|
if (PRIVATE_DATA(cc))
|
|
{
|
|
count = 2;
|
|
srcw[0] = PRIVATE_DATA(cc);
|
|
srcw[1] = srcw[0] + sizeof(sljit_sw);
|
|
}
|
|
cc += 1;
|
|
break;
|
|
|
|
CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
|
|
if (PRIVATE_DATA(cc))
|
|
{
|
|
count = 2;
|
|
srcw[0] = PRIVATE_DATA(cc);
|
|
srcw[1] = srcw[0] + sizeof(sljit_sw);
|
|
}
|
|
cc += 1 + IMM2_SIZE;
|
|
break;
|
|
|
|
case OP_CLASS:
|
|
case OP_NCLASS:
|
|
#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
|
|
case OP_XCLASS:
|
|
size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
|
|
#else
|
|
size = 1 + 32 / (int)sizeof(pcre_uchar);
|
|
#endif
|
|
if (PRIVATE_DATA(cc))
|
|
switch(get_class_iterator_size(cc + size))
|
|
{
|
|
case 1:
|
|
count = 1;
|
|
srcw[0] = PRIVATE_DATA(cc);
|
|
break;
|
|
|
|
case 2:
|
|
count = 2;
|
|
srcw[0] = PRIVATE_DATA(cc);
|
|
srcw[1] = srcw[0] + sizeof(sljit_sw);
|
|
break;
|
|
|
|
default:
|
|
SLJIT_UNREACHABLE();
|
|
break;
|
|
}
|
|
cc += size;
|
|
break;
|
|
|
|
default:
|
|
cc = next_opcode(common, cc);
|
|
SLJIT_ASSERT(cc != NULL);
|
|
break;
|
|
}
|
|
|
|
while (count > 0)
|
|
{
|
|
count--;
|
|
if (save)
|
|
{
|
|
if (tmp1next)
|
|
{
|
|
if (!tmp1empty)
|
|
{
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
|
|
stackptr += sizeof(sljit_sw);
|
|
}
|
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), srcw[count]);
|
|
tmp1empty = FALSE;
|
|
tmp1next = FALSE;
|
|
}
|
|
else
|
|
{
|
|
if (!tmp2empty)
|
|
{
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
|
|
stackptr += sizeof(sljit_sw);
|
|
}
|
|
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), srcw[count]);
|
|
tmp2empty = FALSE;
|
|
tmp1next = TRUE;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
if (tmp1next)
|
|
{
|
|
SLJIT_ASSERT(!tmp1empty);
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), srcw[count], TMP1, 0);
|
|
tmp1empty = stackptr >= stacktop;
|
|
if (!tmp1empty)
|
|
{
|
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
|
|
stackptr += sizeof(sljit_sw);
|
|
}
|
|
tmp1next = FALSE;
|
|
}
|
|
else
|
|
{
|
|
SLJIT_ASSERT(!tmp2empty);
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), srcw[count], TMP2, 0);
|
|
tmp2empty = stackptr >= stacktop;
|
|
if (!tmp2empty)
|
|
{
|
|
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
|
|
stackptr += sizeof(sljit_sw);
|
|
}
|
|
tmp1next = TRUE;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
while (status != end);
|
|
|
|
if (save)
|
|
{
|
|
if (tmp1next)
|
|
{
|
|
if (!tmp1empty)
|
|
{
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
|
|
stackptr += sizeof(sljit_sw);
|
|
}
|
|
if (!tmp2empty)
|
|
{
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
|
|
stackptr += sizeof(sljit_sw);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
if (!tmp2empty)
|
|
{
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
|
|
stackptr += sizeof(sljit_sw);
|
|
}
|
|
if (!tmp1empty)
|
|
{
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
|
|
stackptr += sizeof(sljit_sw);
|
|
}
|
|
}
|
|
}
|
|
SLJIT_ASSERT(cc == ccend && stackptr == stacktop && (save || (tmp1empty && tmp2empty)));
|
|
}
|
|
|
|
static SLJIT_INLINE pcre_uchar *set_then_offsets(compiler_common *common, pcre_uchar *cc, sljit_u8 *current_offset)
|
|
{
|
|
pcre_uchar *end = bracketend(cc);
|
|
BOOL has_alternatives = cc[GET(cc, 1)] == OP_ALT;
|
|
|
|
/* Assert captures then. */
|
|
if (*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT)
|
|
current_offset = NULL;
|
|
/* Conditional block does not. */
|
|
if (*cc == OP_COND || *cc == OP_SCOND)
|
|
has_alternatives = FALSE;
|
|
|
|
cc = next_opcode(common, cc);
|
|
if (has_alternatives)
|
|
current_offset = common->then_offsets + (cc - common->start);
|
|
|
|
while (cc < end)
|
|
{
|
|
if ((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND))
|
|
cc = set_then_offsets(common, cc, current_offset);
|
|
else
|
|
{
|
|
if (*cc == OP_ALT && has_alternatives)
|
|
current_offset = common->then_offsets + (cc + 1 + LINK_SIZE - common->start);
|
|
if (*cc >= OP_THEN && *cc <= OP_THEN_ARG && current_offset != NULL)
|
|
*current_offset = 1;
|
|
cc = next_opcode(common, cc);
|
|
}
|
|
}
|
|
|
|
return end;
|
|
}
|
|
|
|
#undef CASE_ITERATOR_PRIVATE_DATA_1
|
|
#undef CASE_ITERATOR_PRIVATE_DATA_2A
|
|
#undef CASE_ITERATOR_PRIVATE_DATA_2B
|
|
#undef CASE_ITERATOR_TYPE_PRIVATE_DATA_1
|
|
#undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
|
|
#undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
|
|
|
|
static SLJIT_INLINE BOOL is_powerof2(unsigned int value)
|
|
{
|
|
return (value & (value - 1)) == 0;
|
|
}
|
|
|
|
static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label)
|
|
{
|
|
while (list)
|
|
{
|
|
/* sljit_set_label is clever enough to do nothing
|
|
if either the jump or the label is NULL. */
|
|
SET_LABEL(list->jump, label);
|
|
list = list->next;
|
|
}
|
|
}
|
|
|
|
static SLJIT_INLINE void add_jump(struct sljit_compiler *compiler, jump_list **list, struct sljit_jump *jump)
|
|
{
|
|
jump_list *list_item = sljit_alloc_memory(compiler, sizeof(jump_list));
|
|
if (list_item)
|
|
{
|
|
list_item->next = *list;
|
|
list_item->jump = jump;
|
|
*list = list_item;
|
|
}
|
|
}
|
|
|
|
static void add_stub(compiler_common *common, struct sljit_jump *start)
|
|
{
|
|
DEFINE_COMPILER;
|
|
stub_list *list_item = sljit_alloc_memory(compiler, sizeof(stub_list));
|
|
|
|
if (list_item)
|
|
{
|
|
list_item->start = start;
|
|
list_item->quit = LABEL();
|
|
list_item->next = common->stubs;
|
|
common->stubs = list_item;
|
|
}
|
|
}
|
|
|
|
static void flush_stubs(compiler_common *common)
|
|
{
|
|
DEFINE_COMPILER;
|
|
stub_list *list_item = common->stubs;
|
|
|
|
while (list_item)
|
|
{
|
|
JUMPHERE(list_item->start);
|
|
add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));
|
|
JUMPTO(SLJIT_JUMP, list_item->quit);
|
|
list_item = list_item->next;
|
|
}
|
|
common->stubs = NULL;
|
|
}
|
|
|
|
static void add_label_addr(compiler_common *common, sljit_uw *update_addr)
|
|
{
|
|
DEFINE_COMPILER;
|
|
label_addr_list *label_addr;
|
|
|
|
label_addr = sljit_alloc_memory(compiler, sizeof(label_addr_list));
|
|
if (label_addr == NULL)
|
|
return;
|
|
label_addr->label = LABEL();
|
|
label_addr->update_addr = update_addr;
|
|
label_addr->next = common->label_addrs;
|
|
common->label_addrs = label_addr;
|
|
}
|
|
|
|
static SLJIT_INLINE void count_match(compiler_common *common)
|
|
{
|
|
DEFINE_COMPILER;
|
|
|
|
OP2(SLJIT_SUB | SLJIT_SET_Z, COUNT_MATCH, 0, COUNT_MATCH, 0, SLJIT_IMM, 1);
|
|
add_jump(compiler, &common->calllimit, JUMP(SLJIT_ZERO));
|
|
}
|
|
|
|
static SLJIT_INLINE void allocate_stack(compiler_common *common, int size)
|
|
{
|
|
/* May destroy all locals and registers except TMP2. */
|
|
DEFINE_COMPILER;
|
|
|
|
SLJIT_ASSERT(size > 0);
|
|
OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
|
|
#ifdef DESTROY_REGISTERS
|
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);
|
|
OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
|
|
OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, TMP1, 0);
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP1, 0);
|
|
#endif
|
|
add_stub(common, CMP(SLJIT_LESS, STACK_TOP, 0, STACK_LIMIT, 0));
|
|
}
|
|
|
|
static SLJIT_INLINE void free_stack(compiler_common *common, int size)
|
|
{
|
|
DEFINE_COMPILER;
|
|
|
|
SLJIT_ASSERT(size > 0);
|
|
OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
|
|
}
|
|
|
|
static sljit_uw * allocate_read_only_data(compiler_common *common, sljit_uw size)
|
|
{
|
|
DEFINE_COMPILER;
|
|
sljit_uw *result;
|
|
|
|
if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
|
|
return NULL;
|
|
|
|
result = (sljit_uw *)SLJIT_MALLOC(size + sizeof(sljit_uw), compiler->allocator_data);
|
|
if (SLJIT_UNLIKELY(result == NULL))
|
|
{
|
|
sljit_set_compiler_memory_error(compiler);
|
|
return NULL;
|
|
}
|
|
|
|
*(void**)result = common->read_only_data_head;
|
|
common->read_only_data_head = (void *)result;
|
|
return result + 1;
|
|
}
|
|
|
|
static void free_read_only_data(void *current, void *allocator_data)
|
|
{
|
|
void *next;
|
|
|
|
SLJIT_UNUSED_ARG(allocator_data);
|
|
|
|
while (current != NULL)
|
|
{
|
|
next = *(void**)current;
|
|
SLJIT_FREE(current, allocator_data);
|
|
current = next;
|
|
}
|
|
}
|
|
|
|
static SLJIT_INLINE void reset_ovector(compiler_common *common, int length)
|
|
{
|
|
DEFINE_COMPILER;
|
|
struct sljit_label *loop;
|
|
int i;
|
|
|
|
/* At this point we can freely use all temporary registers. */
|
|
SLJIT_ASSERT(length > 1);
|
|
/* TMP1 returns with begin - 1. */
|
|
OP2(SLJIT_SUB, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_S0), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1));
|
|
if (length < 8)
|
|
{
|
|
for (i = 1; i < length; i++)
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), SLJIT_R0, 0);
|
|
}
|
|
else
|
|
{
|
|
if (sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_STORE | SLJIT_MEM_PRE, SLJIT_R0, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw)) == SLJIT_SUCCESS)
|
|
{
|
|
GET_LOCAL_BASE(SLJIT_R1, 0, OVECTOR_START);
|
|
OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, length - 1);
|
|
loop = LABEL();
|
|
sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_STORE | SLJIT_MEM_PRE, SLJIT_R0, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw));
|
|
OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, 1);
|
|
JUMPTO(SLJIT_NOT_ZERO, loop);
|
|
}
|
|
else
|
|
{
|
|
GET_LOCAL_BASE(SLJIT_R1, 0, OVECTOR_START + sizeof(sljit_sw));
|
|
OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, length - 1);
|
|
loop = LABEL();
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R1), 0, SLJIT_R0, 0);
|
|
OP2(SLJIT_ADD, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, sizeof(sljit_sw));
|
|
OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, 1);
|
|
JUMPTO(SLJIT_NOT_ZERO, loop);
|
|
}
|
|
}
|
|
}
|
|
|
|
static SLJIT_INLINE void reset_fast_fail(compiler_common *common)
|
|
{
|
|
DEFINE_COMPILER;
|
|
sljit_s32 i;
|
|
|
|
SLJIT_ASSERT(common->fast_fail_start_ptr < common->fast_fail_end_ptr);
|
|
|
|
OP2(SLJIT_SUB, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
|
|
for (i = common->fast_fail_start_ptr; i < common->fast_fail_end_ptr; i += sizeof(sljit_sw))
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), i, TMP1, 0);
|
|
}
|
|
|
|
static SLJIT_INLINE void do_reset_match(compiler_common *common, int length)
|
|
{
|
|
DEFINE_COMPILER;
|
|
struct sljit_label *loop;
|
|
int i;
|
|
|
|
SLJIT_ASSERT(length > 1);
|
|
/* OVECTOR(1) contains the "string begin - 1" constant. */
|
|
if (length > 2)
|
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
|
|
if (length < 8)
|
|
{
|
|
for (i = 2; i < length; i++)
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), TMP1, 0);
|
|
}
|
|
else
|
|
{
|
|
if (sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_STORE | SLJIT_MEM_PRE, TMP1, SLJIT_MEM1(TMP2), sizeof(sljit_sw)) == SLJIT_SUCCESS)
|
|
{
|
|
GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + sizeof(sljit_sw));
|
|
OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);
|
|
loop = LABEL();
|
|
sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_STORE | SLJIT_MEM_PRE, TMP1, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
|
|
OP2(SLJIT_SUB | SLJIT_SET_Z, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
|
|
JUMPTO(SLJIT_NOT_ZERO, loop);
|
|
}
|
|
else
|
|
{
|
|
GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + 2 * sizeof(sljit_sw));
|
|
OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);
|
|
loop = LABEL();
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, TMP1, 0);
|
|
OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, sizeof(sljit_sw));
|
|
OP2(SLJIT_SUB | SLJIT_SET_Z, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
|
|
JUMPTO(SLJIT_NOT_ZERO, loop);
|
|
}
|
|
}
|
|
|
|
OP1(SLJIT_MOV, STACK_TOP, 0, ARGUMENTS, 0);
|
|
if (common->mark_ptr != 0)
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, SLJIT_IMM, 0);
|
|
if (common->control_head_ptr != 0)
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
|
|
OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(jit_arguments, stack));
|
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
|
|
OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(struct sljit_stack, end));
|
|
}
|
|
|
|
static sljit_sw SLJIT_FUNC do_search_mark(sljit_sw *current, const pcre_uchar *skip_arg)
|
|
{
|
|
while (current != NULL)
|
|
{
|
|
switch (current[1])
|
|
{
|
|
case type_then_trap:
|
|
break;
|
|
|
|
case type_mark:
|
|
if (STRCMP_UC_UC(skip_arg, (pcre_uchar *)current[2]) == 0)
|
|
return current[3];
|
|
break;
|
|
|
|
default:
|
|
SLJIT_UNREACHABLE();
|
|
break;
|
|
}
|
|
SLJIT_ASSERT(current[0] == 0 || current < (sljit_sw*)current[0]);
|
|
current = (sljit_sw*)current[0];
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)
|
|
{
|
|
DEFINE_COMPILER;
|
|
struct sljit_label *loop;
|
|
struct sljit_jump *early_quit;
|
|
BOOL has_pre;
|
|
|
|
/* At this point we can freely use all registers. */
|
|
OP1(SLJIT_MOV, SLJIT_S2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(1), STR_PTR, 0);
|
|
|
|
OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0);
|
|
if (common->mark_ptr != 0)
|
|
OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
|
|
OP1(SLJIT_MOV_S32, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, offset_count));
|
|
if (common->mark_ptr != 0)
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_R2, 0);
|
|
OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, offsets), SLJIT_IMM, sizeof(int));
|
|
OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, begin));
|
|
|
|
has_pre = sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, SLJIT_S1, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw)) == SLJIT_SUCCESS;
|
|
GET_LOCAL_BASE(SLJIT_S0, 0, OVECTOR_START - (has_pre ? sizeof(sljit_sw) : 0));
|
|
|
|
/* Unlikely, but possible */
|
|
early_quit = CMP(SLJIT_EQUAL, SLJIT_R1, 0, SLJIT_IMM, 0);
|
|
loop = LABEL();
|
|
|
|
if (has_pre)
|
|
sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_PRE, SLJIT_S1, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw));
|
|
else
|
|
{
|
|
OP1(SLJIT_MOV, SLJIT_S1, 0, SLJIT_MEM1(SLJIT_S0), 0);
|
|
OP2(SLJIT_ADD, SLJIT_S0, 0, SLJIT_S0, 0, SLJIT_IMM, sizeof(sljit_sw));
|
|
}
|
|
|
|
OP2(SLJIT_ADD, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, sizeof(int));
|
|
OP2(SLJIT_SUB, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_R0, 0);
|
|
/* Copy the integer value to the output buffer */
|
|
#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
|
|
OP2(SLJIT_ASHR, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_IMM, UCHAR_SHIFT);
|
|
#endif
|
|
|
|
OP1(SLJIT_MOV_S32, SLJIT_MEM1(SLJIT_R2), 0, SLJIT_S1, 0);
|
|
OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
|
|
JUMPTO(SLJIT_NOT_ZERO, loop);
|
|
JUMPHERE(early_quit);
|
|
|
|
/* Calculate the return value, which is the maximum ovector value. */
|
|
if (topbracket > 1)
|
|
{
|
|
if (sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, SLJIT_R2, SLJIT_MEM1(SLJIT_R0), -(2 * (sljit_sw)sizeof(sljit_sw))) == SLJIT_SUCCESS)
|
|
{
|
|
GET_LOCAL_BASE(SLJIT_R0, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_sw));
|
|
OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, topbracket + 1);
|
|
|
|
/* OVECTOR(0) is never equal to SLJIT_S2. */
|
|
loop = LABEL();
|
|
sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_PRE, SLJIT_R2, SLJIT_MEM1(SLJIT_R0), -(2 * (sljit_sw)sizeof(sljit_sw)));
|
|
OP2(SLJIT_SUB, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
|
|
CMPTO(SLJIT_EQUAL, SLJIT_R2, 0, SLJIT_S2, 0, loop);
|
|
}
|
|
else
|
|
{
|
|
GET_LOCAL_BASE(SLJIT_R0, 0, OVECTOR_START + (topbracket - 1) * 2 * sizeof(sljit_sw));
|
|
OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, topbracket + 1);
|
|
|
|
/* OVECTOR(0) is never equal to SLJIT_S2. */
|
|
loop = LABEL();
|
|
OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), 0);
|
|
OP2(SLJIT_SUB, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, 2 * (sljit_sw)sizeof(sljit_sw));
|
|
OP2(SLJIT_SUB, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
|
|
CMPTO(SLJIT_EQUAL, SLJIT_R2, 0, SLJIT_S2, 0, loop);
|
|
}
|
|
OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_R1, 0);
|
|
}
|
|
else
|
|
OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
|
|
}
|
|
|
|
static SLJIT_INLINE void return_with_partial_match(compiler_common *common, struct sljit_label *quit)
|
|
{
|
|
DEFINE_COMPILER;
|
|
struct sljit_jump *jump;
|
|
|
|
SLJIT_COMPILE_ASSERT(STR_END == SLJIT_S1, str_end_must_be_saved_reg2);
|
|
SLJIT_ASSERT(common->start_used_ptr != 0 && common->start_ptr != 0
|
|
&& (common->mode == JIT_PARTIAL_SOFT_COMPILE ? common->hit_start != 0 : common->hit_start == 0));
|
|
|
|
OP1(SLJIT_MOV, SLJIT_R1, 0, ARGUMENTS, 0);
|
|
OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_PARTIAL);
|
|
OP1(SLJIT_MOV_S32, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, real_offset_count));
|
|
CMPTO(SLJIT_SIG_LESS, SLJIT_R2, 0, SLJIT_IMM, 2, quit);
|
|
|
|
/* Store match begin and end. */
|
|
OP1(SLJIT_MOV, SLJIT_S0, 0, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, begin));
|
|
OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, offsets));
|
|
|
|
jump = CMP(SLJIT_SIG_LESS, SLJIT_R2, 0, SLJIT_IMM, 3);
|
|
OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_ptr : (common->hit_start + (int)sizeof(sljit_sw)), SLJIT_S0, 0);
|
|
#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
|
|
OP2(SLJIT_ASHR, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, UCHAR_SHIFT);
|
|
#endif
|
|
OP1(SLJIT_MOV_S32, SLJIT_MEM1(SLJIT_R1), 2 * sizeof(int), SLJIT_R2, 0);
|
|
JUMPHERE(jump);
|
|
|
|
OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_used_ptr : common->hit_start);
|
|
OP2(SLJIT_SUB, SLJIT_S1, 0, STR_END, 0, SLJIT_S0, 0);
|
|
#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
|
|
OP2(SLJIT_ASHR, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_IMM, UCHAR_SHIFT);
|
|
#endif
|
|
OP1(SLJIT_MOV_S32, SLJIT_MEM1(SLJIT_R1), sizeof(int), SLJIT_S1, 0);
|
|
|
|
OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_S0, 0);
|
|
#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
|
|
OP2(SLJIT_ASHR, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, UCHAR_SHIFT);
|
|
#endif
|
|
OP1(SLJIT_MOV_S32, SLJIT_MEM1(SLJIT_R1), 0, SLJIT_R2, 0);
|
|
|
|
JUMPTO(SLJIT_JUMP, quit);
|
|
}
|
|
|
|
static SLJIT_INLINE void check_start_used_ptr(compiler_common *common)
|
|
{
|
|
/* May destroy TMP1. */
|
|
DEFINE_COMPILER;
|
|
struct sljit_jump *jump;
|
|
|
|
if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
|
|
{
|
|
/* The value of -1 must be kept for start_used_ptr! */
|
|
OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, 1);
|
|
/* Jumps if start_used_ptr < STR_PTR, or start_used_ptr == -1. Although overwriting
|
|
is not necessary if start_used_ptr == STR_PTR, it does not hurt as well. */
|
|
jump = CMP(SLJIT_LESS_EQUAL, TMP1, 0, STR_PTR, 0);
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
|
|
JUMPHERE(jump);
|
|
}
|
|
else if (common->mode == JIT_PARTIAL_HARD_COMPILE)
|
|
{
|
|
jump = CMP(SLJIT_LESS_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
|
|
JUMPHERE(jump);
|
|
}
|
|
}
|
|
|
|
static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, pcre_uchar *cc)
|
|
{
|
|
/* Detects if the character has an othercase. */
|
|
unsigned int c;
|
|
|
|
#ifdef SUPPORT_UTF
|
|
if (common->utf)
|
|
{
|
|
GETCHAR(c, cc);
|
|
if (c > 127)
|
|
{
|
|
#ifdef SUPPORT_UCP
|
|
return c != UCD_OTHERCASE(c);
|
|
#else
|
|
return FALSE;
|
|
#endif
|
|
}
|
|
#ifndef COMPILE_PCRE8
|
|
return common->fcc[c] != c;
|
|
#endif
|
|
}
|
|
else
|
|
#endif
|
|
c = *cc;
|
|
return MAX_255(c) ? common->fcc[c] != c : FALSE;
|
|
}
|
|
|
|
static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c)
|
|
{
|
|
/* Returns with the othercase. */
|
|
#ifdef SUPPORT_UTF
|
|
if (common->utf && c > 127)
|
|
{
|
|
#ifdef SUPPORT_UCP
|
|
return UCD_OTHERCASE(c);
|
|
#else
|
|
return c;
|
|
#endif
|
|
}
|
|
#endif
|
|
return TABLE_GET(c, common->fcc, c);
|
|
}
|
|
|
|
static unsigned int char_get_othercase_bit(compiler_common *common, pcre_uchar *cc)
|
|
{
|
|
/* Detects if the character and its othercase has only 1 bit difference. */
|
|
unsigned int c, oc, bit;
|
|
#if defined SUPPORT_UTF && defined COMPILE_PCRE8
|
|
int n;
|
|
#endif
|
|
|
|
#ifdef SUPPORT_UTF
|
|
if (common->utf)
|
|
{
|
|
GETCHAR(c, cc);
|
|
if (c <= 127)
|
|
oc = common->fcc[c];
|
|
else
|
|
{
|
|
#ifdef SUPPORT_UCP
|
|
oc = UCD_OTHERCASE(c);
|
|
#else
|
|
oc = c;
|
|
#endif
|
|
}
|
|
}
|
|
else
|
|
{
|
|
c = *cc;
|
|
oc = TABLE_GET(c, common->fcc, c);
|
|
}
|
|
#else
|
|
c = *cc;
|
|
oc = TABLE_GET(c, common->fcc, c);
|
|
#endif
|
|
|
|
SLJIT_ASSERT(c != oc);
|
|
|
|
bit = c ^ oc;
|
|
/* Optimized for English alphabet. */
|
|
if (c <= 127 && bit == 0x20)
|
|
return (0 << 8) | 0x20;
|
|
|
|
/* Since c != oc, they must have at least 1 bit difference. */
|
|
if (!is_powerof2(bit))
|
|
return 0;
|
|
|
|
#if defined COMPILE_PCRE8
|
|
|
|
#ifdef SUPPORT_UTF
|
|
if (common->utf && c > 127)
|
|
{
|
|
n = GET_EXTRALEN(*cc);
|
|
while ((bit & 0x3f) == 0)
|
|
{
|
|
n--;
|
|
bit >>= 6;
|
|
}
|
|
return (n << 8) | bit;
|
|
}
|
|
#endif /* SUPPORT_UTF */
|
|
return (0 << 8) | bit;
|
|
|
|
#elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
|
|
|
|
#ifdef SUPPORT_UTF
|
|
if (common->utf && c > 65535)
|
|
{
|
|
if (bit >= (1 << 10))
|
|
bit >>= 10;
|
|
else
|
|
return (bit < 256) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8));
|
|
}
|
|
#endif /* SUPPORT_UTF */
|
|
return (bit < 256) ? ((0 << 8) | bit) : ((1 << 8) | (bit >> 8));
|
|
|
|
#endif /* COMPILE_PCRE[8|16|32] */
|
|
}
|
|
|
|
static void check_partial(compiler_common *common, BOOL force)
|
|
{
|
|
/* Checks whether a partial matching is occurred. Does not modify registers. */
|
|
DEFINE_COMPILER;
|
|
struct sljit_jump *jump = NULL;
|
|
|
|
SLJIT_ASSERT(!force || common->mode != JIT_COMPILE);
|
|
|
|
if (common->mode == JIT_COMPILE)
|
|
return;
|
|
|
|
if (!force)
|
|
jump = CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
|
|
else if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
|
|
jump = CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1);
|
|
|
|
if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
|
|
else
|
|
{
|
|
if (common->partialmatchlabel != NULL)
|
|
JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
|
|
else
|
|
add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
|
|
}
|
|
|
|
if (jump != NULL)
|
|
JUMPHERE(jump);
|
|
}
|
|
|
|
static void check_str_end(compiler_common *common, jump_list **end_reached)
|
|
{
|
|
/* Does not affect registers. Usually used in a tight spot. */
|
|
DEFINE_COMPILER;
|
|
struct sljit_jump *jump;
|
|
|
|
if (common->mode == JIT_COMPILE)
|
|
{
|
|
add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
|
|
return;
|
|
}
|
|
|
|
jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
|
|
if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
|
|
{
|
|
add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
|
|
add_jump(compiler, end_reached, JUMP(SLJIT_JUMP));
|
|
}
|
|
else
|
|
{
|
|
add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
|
|
if (common->partialmatchlabel != NULL)
|
|
JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
|
|
else
|
|
add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
|
|
}
|
|
JUMPHERE(jump);
|
|
}
|
|
|
|
static void detect_partial_match(compiler_common *common, jump_list **backtracks)
|
|
{
|
|
DEFINE_COMPILER;
|
|
struct sljit_jump *jump;
|
|
|
|
if (common->mode == JIT_COMPILE)
|
|
{
|
|
add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
|
|
return;
|
|
}
|
|
|