1
1
Fork 0

Official tarball pcre-8.45.tar.gz

This commit is contained in:
commit 0c5004f693
368 changed files with 302041 additions and 0 deletions

313
132html Executable file
View File

@ -0,0 +1,313 @@
#! /usr/bin/perl -w
# Script to turn PCRE man pages into HTML
# Subroutine to handle font changes and other escapes
sub do_line {
my($s) = $_[0];
$s =~ s/</&#60;/g; # Deal with < and >
$s =~ s/>/&#62;/g;
$s =~ s"\\fI(.*?)\\f[RP]"<i>$1</i>"g;
$s =~ s"\\fB(.*?)\\f[RP]"<b>$1</b>"g;
$s =~ s"\\e"\\"g;
$s =~ s/(?<=Copyright )\(c\)/&copy;/g;
$s;
}
# Subroutine to ensure not in a paragraph
sub end_para {
if ($inpara)
{
print TEMP "</PRE>\n" if ($inpre);
print TEMP "</P>\n";
}
$inpara = $inpre = 0;
$wrotetext = 0;
}
# Subroutine to start a new paragraph
sub new_para {
&end_para();
print TEMP "<P>\n";
$inpara = 1;
}
# Main program
$innf = 0;
$inpara = 0;
$inpre = 0;
$wrotetext = 0;
$toc = 0;
$ref = 1;
while ($#ARGV >= 0 && $ARGV[0] =~ /^-/)
{
$toc = 1 if $ARGV[0] eq "-toc";
shift;
}
# Initial output to STDOUT
print <<End ;
<html>
<head>
<title>$ARGV[0] specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>$ARGV[0] man page</h1>
<p>
Return to the <a href="index.html">PCRE index page</a>.
</p>
<p>
This page is part of the PCRE HTML documentation. It was generated automatically
from the original man page. If there is any nonsense in it, please consult the
man page, in case the conversion went wrong.
<br>
End
print "<ul>\n" if ($toc);
open(TEMP, ">/tmp/$$") || die "Can't open /tmp/$$ for output\n";
while (<STDIN>)
{
# Handle lines beginning with a dot
if (/^\./)
{
# Some of the PCRE man pages used to contain instances of .br. However,
# they should have all been removed because they cause trouble in some
# (other) automated systems that translate man pages to HTML. Complain if
# we find .br or .in (another macro that is deprecated).
if (/^\.br/ || /^\.in/)
{
print STDERR "\n*** Deprecated macro encountered - rewrite needed\n";
print STDERR "*** $_\n";
die "*** Processing abandoned\n";
}
# Instead of .br, relevent "literal" sections are enclosed in .nf/.fi.
elsif (/^\.nf/)
{
$innf = 1;
}
elsif (/^\.fi/)
{
$innf = 0;
}
# Handling .sp is subtle. If it is inside a literal section, do nothing if
# the next line is a non literal text line; similarly, if not inside a
# literal section, do nothing if a literal follows, unless we are inside
# a .nf/.ne section. The point being that the <pre> and </pre> that delimit
# literal sections will do the spacing. Always skip if no previous output.
elsif (/^\.sp/)
{
if ($wrotetext)
{
$_ = <STDIN>;
if ($inpre)
{
print TEMP "\n" if (/^[\s.]/);
}
else
{
print TEMP "<br>\n<br>\n" if ($innf || !/^[\s.]/);
}
redo; # Now process the lookahead line we just read
}
}
elsif (/^\.TP/ || /^\.PP/ || /^\.P/)
{
&new_para();
}
elsif (/^\.SH\s*("?)(.*)\1/)
{
# Ignore the NAME section
if ($2 =~ /^NAME\b/)
{
<STDIN>;
next;
}
&end_para();
my($title) = &do_line($2);
if ($toc)
{
printf("<li><a name=\"TOC%d\" href=\"#SEC%d\">$title</a>\n",
$ref, $ref);
printf TEMP ("<br><a name=\"SEC%d\" href=\"#TOC1\">$title</a><br>\n",
$ref);
$ref++;
}
else
{
print TEMP "<br><b>\n$title\n</b><br>\n";
}
}
elsif (/^\.SS\s*("?)(.*)\1/)
{
&end_para();
my($title) = &do_line($2);
print TEMP "<br><b>\n$title\n</b><br>\n";
}
elsif (/^\.B\s*(.*)/)
{
&new_para() if (!$inpara);
$_ = &do_line($1);
s/"(.*?)"/$1/g;
print TEMP "<b>$_</b>\n";
$wrotetext = 1;
}
elsif (/^\.I\s*(.*)/)
{
&new_para() if (!$inpara);
$_ = &do_line($1);
s/"(.*?)"/$1/g;
print TEMP "<i>$_</i>\n";
$wrotetext = 1;
}
# A comment that starts "HREF" takes the next line as a name that
# is turned into a hyperlink, using the text given, which might be
# in a special font. If it ends in () or (digits) or punctuation, they
# aren't part of the link.
elsif (/^\.\\"\s*HREF/)
{
$_=<STDIN>;
chomp;
$_ = &do_line($_);
$_ =~ s/\s+$//;
$_ =~ /^(?:<.>)?([^<(]+)(?:\(\))?(?:<\/.>)?(?:\(\d+\))?[.,;:]?$/;
print TEMP "<a href=\"$1.html\">$_</a>\n";
}
# A comment that starts "HTML" inserts literal HTML
elsif (/^\.\\"\s*HTML\s*(.*)/)
{
print TEMP $1;
}
# A comment that starts < inserts that HTML at the end of the
# *next* input line - so as not to get a newline between them.
elsif (/^\.\\"\s*(<.*>)/)
{
my($markup) = $1;
$_=<STDIN>;
chomp;
$_ = &do_line($_);
$_ =~ s/\s+$//;
print TEMP "$_$markup\n";
}
# A comment that starts JOIN joins the next two lines together, with one
# space between them. Then that line is processed. This is used in some
# displays where two lines are needed for the "man" version. JOINSH works
# the same, except that it assumes this is a shell command, so removes
# continuation backslashes.
elsif (/^\.\\"\s*JOIN(SH)?/)
{
my($one,$two);
$one = <STDIN>;
$two = <STDIN>;
$one =~ s/\s*\\e\s*$// if (defined($1));
chomp($one);
$two =~ s/^\s+//;
$_ = "$one $two";
redo; # Process the joined lines
}
# .EX/.EE are used in the pcredemo page to bracket the entire program,
# which is unmodified except for turning backslash into "\e".
elsif (/^\.EX\s*$/)
{
print TEMP "<PRE>\n";
while (<STDIN>)
{
last if /^\.EE\s*$/;
s/\\e/\\/g;
s/&/&amp;/g;
s/</&lt;/g;
s/>/&gt;/g;
print TEMP;
}
}
# Ignore anything not recognized
next;
}
# Line does not begin with a dot. Replace blank lines with new paragraphs
if (/^\s*$/)
{
&end_para() if ($wrotetext);
next;
}
# Convert fonts changes and output an ordinary line. Ensure that indented
# lines are marked as literal.
$_ = &do_line($_);
&new_para() if (!$inpara);
if (/^\s/)
{
if (!$inpre)
{
print TEMP "<pre>\n";
$inpre = 1;
}
}
elsif ($inpre)
{
print TEMP "</pre>\n";
$inpre = 0;
}
# Add <br> to the end of a non-literal line if we are within .nf/.fi
$_ .= "<br>\n" if (!$inpre && $innf);
print TEMP;
$wrotetext = 1;
}
# The TOC, if present, will have been written - terminate it
print "</ul>\n" if ($toc);
# Copy the remainder to the standard output
close(TEMP);
open(TEMP, "/tmp/$$") || die "Can't open /tmp/$$ for input\n";
print while (<TEMP>);
print <<End ;
<p>
Return to the <a href="index.html">PCRE index page</a>.
</p>
End
close(TEMP);
unlink("/tmp/$$");
# End

45
AUTHORS Normal file
View File

@ -0,0 +1,45 @@
THE MAIN PCRE LIBRARY
---------------------
Written by: Philip Hazel
Email local part: Philip.Hazel
Email domain: gmail.com
University of Cambridge Computing Service,
Cambridge, England.
Copyright (c) 1997-2021 University of Cambridge
All rights reserved
PCRE JUST-IN-TIME COMPILATION SUPPORT
-------------------------------------
Written by: Zoltan Herczeg
Email local part: hzmester
Emain domain: freemail.hu
Copyright(c) 2010-2021 Zoltan Herczeg
All rights reserved.
STACK-LESS JUST-IN-TIME COMPILER
--------------------------------
Written by: Zoltan Herczeg
Email local part: hzmester
Emain domain: freemail.hu
Copyright(c) 2009-2021 Zoltan Herczeg
All rights reserved.
THE C++ WRAPPER LIBRARY
-----------------------
Written by: Google Inc.
Copyright (c) 2007-2012 Google Inc
All rights reserved
####

1067
CMakeLists.txt Normal file

File diff suppressed because it is too large Load Diff

5
COPYING Normal file
View File

@ -0,0 +1,5 @@
PCRE LICENCE
Please see the file LICENCE in the PCRE distribution for licensing details.
End

6246
ChangeLog Normal file

File diff suppressed because it is too large Load Diff

67
CheckMan Executable file
View File

@ -0,0 +1,67 @@
#! /usr/bin/perl
# A script to scan PCRE's man pages to check for typos in the control
# sequences. I use only a small set of the available repertoire, so it is
# straightforward to check that nothing else has slipped in by mistake. This
# script should be called in the doc directory.
$yield = 0;
while (scalar(@ARGV) > 0)
{
$line = 0;
$file = shift @ARGV;
open (IN, $file) || die "Failed to open $file\n";
while (<IN>)
{
$line++;
if (/^\s*$/)
{
printf "Empty line $line of $file\n";
$yield = 1;
}
elsif (/^\./)
{
if (!/^\.\s*$|
^\.B\s+\S|
^\.TH\s\S|
^\.SH\s\S|
^\.SS\s\S|
^\.TP(?:\s?\d+)?\s*$|
^\.SM\s*$|
^\.br\s*$|
^\.rs\s*$|
^\.sp\s*$|
^\.nf\s*$|
^\.fi\s*$|
^\.P\s*$|
^\.PP\s*$|
^\.\\"(?:\ HREF)?\s*$|
^\.\\"\sHTML\s<a\shref="[^"]+?">\s*$|
^\.\\"\sHTML\s<a\sname="[^"]+?"><\/a>\s*$|
^\.\\"\s<\/a>\s*$|
^\.\\"\sJOINSH\s*$|
^\.\\"\sJOIN\s*$/x
)
{
printf "Bad control line $line of $file\n";
$yield = 1;
}
}
else
{
if (/\\[^ef]|\\f[^IBP]/)
{
printf "Bad backslash in line $line of $file\n";
$yield = 1;
}
}
}
close(IN);
}
exit $yield;
# End

113
CleanTxt Executable file
View File

@ -0,0 +1,113 @@
#! /usr/bin/perl -w
# Script to take the output of nroff -man and remove all the backspacing and
# the page footers and the screen commands etc so that it is more usefully
# readable online. In fact, in the latest nroff, intermediate footers don't
# seem to be generated any more.
$blankcount = 0;
$lastwascut = 0;
$firstheader = 1;
# Input on STDIN; output to STDOUT.
while (<STDIN>)
{
s/\x1b\[\d+m//g; # Remove screen controls "ESC [ number m"
s/.\x8//g; # Remove "char, backspace"
# Handle header lines. Retain only the first one we encounter, but remove
# the blank line that follows. Any others (e.g. at end of document) and the
# following blank line are dropped.
if (/^PCRE(\w*)\(([13])\)\s+PCRE\1\(\2\)$/)
{
if ($firstheader)
{
$firstheader = 0;
print;
$lastprinted = $_;
$lastwascut = 0;
}
$_=<STDIN>; # Remove a blank that follows
next;
}
# Count runs of empty lines
if (/^\s*$/)
{
$blankcount++;
$lastwascut = 0;
next;
}
# If a chunk of lines has been cut out (page footer) and the next line
# has a different indentation, put back one blank line.
if ($lastwascut && $blankcount < 1 && defined($lastprinted))
{
($a) = $lastprinted =~ /^(\s*)/;
($b) = $_ =~ /^(\s*)/;
$blankcount++ if ($a ne $b);
}
# We get here only when we have a non-blank line in hand. If it was preceded
# by 3 or more blank lines, read the next 3 lines and see if they are blank.
# If so, remove all 7 lines, and remember that we have just done a cut.
if ($blankcount >= 3)
{
for ($i = 0; $i < 3; $i++)
{
$next[$i] = <STDIN>;
$next[$i] = "" if !defined $next[$i];
$next[$i] =~ s/\x1b\[\d+m//g; # Remove screen controls "ESC [ number m"
$next[$i] =~ s/.\x8//g; # Remove "char, backspace"
}
# Cut out chunks of the form <3 blanks><non-blank><3 blanks>
if ($next[0] =~ /^\s*$/ &&
$next[1] =~ /^\s*$/ &&
$next[2] =~ /^\s*$/)
{
$blankcount -= 3;
$lastwascut = 1;
}
# Otherwise output the saved blanks, the current, and the next three
# lines. Remember the last printed line.
else
{
for ($i = 0; $i < $blankcount; $i++) { print "\n"; }
print;
for ($i = 0; $i < 3; $i++)
{
$next[$i] =~ s/.\x8//g;
print $next[$i];
$lastprinted = $_;
}
$lastwascut = 0;
$blankcount = 0;
}
}
# This non-blank line is not preceded by 3 or more blank lines. Output
# any blanks there are, and the line. Remember it. Force two blank lines
# before headings.
else
{
$blankcount = 2 if /^\S/ && !/^Last updated/ && !/^Copyright/ &&
defined($lastprinted);
for ($i = 0; $i < $blankcount; $i++) { print "\n"; }
print;
$lastprinted = $_;
$lastwascut = 0;
$blankcount = 0;
}
}
# End

35
Detrail Executable file
View File

@ -0,0 +1,35 @@
#!/usr/bin/perl
# This is a script for removing trailing whitespace from lines in files that
# are listed on the command line.
# This subroutine does the work for one file.
sub detrail {
my($file) = $_[0];
my($changed) = 0;
open(IN, "$file") || die "Can't open $file for input";
@lines = <IN>;
close(IN);
foreach (@lines)
{
if (/\s+\n$/)
{
s/\s+\n$/\n/;
$changed = 1;
}
}
if ($changed)
{
open(OUT, ">$file") || die "Can't open $file for output";
print OUT @lines;
close(OUT);
}
}
# This is the main program
$, = ""; # Output field separator
for ($i = 0; $i < @ARGV; $i++) { &detrail($ARGV[$i]); }
# End

528
HACKING Normal file
View File

@ -0,0 +1,528 @@
Technical Notes about PCRE
--------------------------
These are very rough technical notes that record potentially useful information
about PCRE internals. For information about testing PCRE, see the pcretest
documentation and the comment at the head of the RunTest file.
Historical note 1
-----------------
Many years ago I implemented some regular expression functions to an algorithm
suggested by Martin Richards. These were not Unix-like in form, and were quite
restricted in what they could do by comparison with Perl. The interesting part
about the algorithm was that the amount of space required to hold the compiled
form of an expression was known in advance. The code to apply an expression did
not operate by backtracking, as the original Henry Spencer code and current
Perl code does, but instead checked all possibilities simultaneously by keeping
a list of current states and checking all of them as it advanced through the
subject string. In the terminology of Jeffrey Friedl's book, it was a "DFA
algorithm", though it was not a traditional Finite State Machine (FSM). When
the pattern was all used up, all remaining states were possible matches, and
the one matching the longest subset of the subject string was chosen. This did
not necessarily maximize the individual wild portions of the pattern, as is
expected in Unix and Perl-style regular expressions.
Historical note 2
-----------------
By contrast, the code originally written by Henry Spencer (which was
subsequently heavily modified for Perl) compiles the expression twice: once in
a dummy mode in order to find out how much store will be needed, and then for
real. (The Perl version probably doesn't do this any more; I'm talking about
the original library.) The execution function operates by backtracking and
maximizing (or, optionally, minimizing in Perl) the amount of the subject that
matches individual wild portions of the pattern. This is an "NFA algorithm" in
Friedl's terminology.
OK, here's the real stuff
-------------------------
For the set of functions that form the "basic" PCRE library (which are
unrelated to those mentioned above), I tried at first to invent an algorithm
that used an amount of store bounded by a multiple of the number of characters
in the pattern, to save on compiling time. However, because of the greater
complexity in Perl regular expressions, I couldn't do this. In any case, a
first pass through the pattern is helpful for other reasons.
Support for 16-bit and 32-bit data strings
-------------------------------------------
From release 8.30, PCRE supports 16-bit as well as 8-bit data strings; and from
release 8.32, PCRE supports 32-bit data strings. The library can be compiled
in any combination of 8-bit, 16-bit or 32-bit modes, creating up to three
different libraries. In the description that follows, the word "short" is used
for a 16-bit data quantity, and the word "unit" is used for a quantity that is
a byte in 8-bit mode, a short in 16-bit mode and a 32-bit word in 32-bit mode.
However, so as not to over-complicate the text, the names of PCRE functions are
given in 8-bit form only.
Computing the memory requirement: how it was
--------------------------------------------
Up to and including release 6.7, PCRE worked by running a very degenerate first
pass to calculate a maximum store size, and then a second pass to do the real
compile - which might use a bit less than the predicted amount of memory. The
idea was that this would turn out faster than the Henry Spencer code because
the first pass is degenerate and the second pass can just store stuff straight
into the vector, which it knows is big enough.
Computing the memory requirement: how it is
-------------------------------------------
By the time I was working on a potential 6.8 release, the degenerate first pass
had become very complicated and hard to maintain. Indeed one of the early
things I did for 6.8 was to fix Yet Another Bug in the memory computation. Then
I had a flash of inspiration as to how I could run the real compile function in
a "fake" mode that enables it to compute how much memory it would need, while
actually only ever using a few hundred bytes of working memory, and without too
many tests of the mode that might slow it down. So I refactored the compiling
functions to work this way. This got rid of about 600 lines of source. It
should make future maintenance and development easier. As this was such a major
change, I never released 6.8, instead upping the number to 7.0 (other quite
major changes were also present in the 7.0 release).
A side effect of this work was that the previous limit of 200 on the nesting
depth of parentheses was removed. However, there is a downside: pcre_compile()
runs more slowly than before (30% or more, depending on the pattern) because it
is doing a full analysis of the pattern. My hope was that this would not be a
big issue, and in the event, nobody has commented on it.
At release 8.34, a limit on the nesting depth of parentheses was re-introduced
(default 250, settable at build time) so as to put a limit on the amount of
system stack used by pcre_compile(). This is a safety feature for environments
with small stacks where the patterns are provided by users.
Traditional matching function
-----------------------------
The "traditional", and original, matching function is called pcre_exec(), and
it implements an NFA algorithm, similar to the original Henry Spencer algorithm
and the way that Perl works. This is not surprising, since it is intended to be
as compatible with Perl as possible. This is the function most users of PCRE
will use most of the time. From release 8.20, if PCRE is compiled with
just-in-time (JIT) support, and studying a compiled pattern with JIT is
successful, the JIT code is run instead of the normal pcre_exec() code, but the
result is the same.
Supplementary matching function
-------------------------------
From PCRE 6.0, there is also a supplementary matching function called
pcre_dfa_exec(). This implements a DFA matching algorithm that searches
simultaneously for all possible matches that start at one point in the subject
string. (Going back to my roots: see Historical Note 1 above.) This function
intreprets the same compiled pattern data as pcre_exec(); however, not all the
facilities are available, and those that are do not always work in quite the
same way. See the user documentation for details.
The algorithm that is used for pcre_dfa_exec() is not a traditional FSM,
because it may have a number of states active at one time. More work would be
needed at compile time to produce a traditional FSM where only one state is
ever active at once. I believe some other regex matchers work this way. JIT
support is not available for this kind of matching.
Changeable options
------------------
The /i, /m, or /s options (PCRE_CASELESS, PCRE_MULTILINE, PCRE_DOTALL, and some
others) may change in the middle of patterns. From PCRE 8.13, their processing
is handled entirely at compile time by generating different opcodes for the
different settings. The runtime functions do not need to keep track of an
options state any more.
Format of compiled patterns
---------------------------
The compiled form of a pattern is a vector of unsigned units (bytes in 8-bit
mode, shorts in 16-bit mode, 32-bit words in 32-bit mode), containing items of
variable length. The first unit in an item contains an opcode, and the length
of the item is either implicit in the opcode or contained in the data that
follows it.
In many cases listed below, LINK_SIZE data values are specified for offsets
within the compiled pattern. LINK_SIZE always specifies a number of bytes. The
default value for LINK_SIZE is 2, but PCRE can be compiled to use 3-byte or
4-byte values for these offsets, although this impairs the performance. (3-byte
LINK_SIZE values are available only in 8-bit mode.) Specifing a LINK_SIZE
larger than 2 is necessary only when patterns whose compiled length is greater
than 64K are going to be processed. In this description, we assume the "normal"
compilation options. Data values that are counts (e.g. quantifiers) are two
bytes long in 8-bit mode (most significant byte first), or one unit in 16-bit
and 32-bit modes.
Opcodes with no following data
------------------------------
These items are all just one unit long
OP_END end of pattern
OP_ANY match any one character other than newline
OP_ALLANY match any one character, including newline
OP_ANYBYTE match any single unit, even in UTF-8/16 mode
OP_SOD match start of data: \A
OP_SOM, start of match (subject + offset): \G
OP_SET_SOM, set start of match (\K)
OP_CIRC ^ (start of data)
OP_CIRCM ^ multiline mode (start of data or after newline)
OP_NOT_WORD_BOUNDARY \W
OP_WORD_BOUNDARY \w
OP_NOT_DIGIT \D
OP_DIGIT \d
OP_NOT_HSPACE \H
OP_HSPACE \h
OP_NOT_WHITESPACE \S
OP_WHITESPACE \s
OP_NOT_VSPACE \V
OP_VSPACE \v
OP_NOT_WORDCHAR \W
OP_WORDCHAR \w
OP_EODN match end of data or newline at end: \Z
OP_EOD match end of data: \z
OP_DOLL $ (end of data, or before final newline)
OP_DOLLM $ multiline mode (end of data or before newline)
OP_EXTUNI match an extended Unicode grapheme cluster
OP_ANYNL match any Unicode newline sequence
OP_ASSERT_ACCEPT )
OP_ACCEPT ) These are Perl 5.10's "backtracking control
OP_COMMIT ) verbs". If OP_ACCEPT is inside capturing
OP_FAIL ) parentheses, it may be preceded by one or more
OP_PRUNE ) OP_CLOSE, each followed by a count that
OP_SKIP ) indicates which parentheses must be closed.
OP_THEN )
OP_ASSERT_ACCEPT is used when (*ACCEPT) is encountered within an assertion.
This ends the assertion, not the entire pattern match.
Backtracking control verbs with optional data
---------------------------------------------
(*THEN) without an argument generates the opcode OP_THEN and no following data.
OP_MARK is followed by the mark name, preceded by a one-unit length, and
followed by a binary zero. For (*PRUNE), (*SKIP), and (*THEN) with arguments,
the opcodes OP_PRUNE_ARG, OP_SKIP_ARG, and OP_THEN_ARG are used, with the name
following in the same format as OP_MARK.
Matching literal characters
---------------------------
The OP_CHAR opcode is followed by a single character that is to be matched
casefully. For caseless matching, OP_CHARI is used. In UTF-8 or UTF-16 modes,
the character may be more than one unit long. In UTF-32 mode, characters
are always exactly one unit long.
If there is only one character in a character class, OP_CHAR or OP_CHARI is
used for a positive class, and OP_NOT or OP_NOTI for a negative one (that is,
for something like [^a]).
Repeating single characters
---------------------------
The common repeats (*, +, ?), when applied to a single character, use the
following opcodes, which come in caseful and caseless versions:
Caseful Caseless
OP_STAR OP_STARI
OP_MINSTAR OP_MINSTARI
OP_POSSTAR OP_POSSTARI
OP_PLUS OP_PLUSI
OP_MINPLUS OP_MINPLUSI
OP_POSPLUS OP_POSPLUSI
OP_QUERY OP_QUERYI
OP_MINQUERY OP_MINQUERYI
OP_POSQUERY OP_POSQUERYI
Each opcode is followed by the character that is to be repeated. In ASCII mode,
these are two-unit items; in UTF-8 or UTF-16 modes, the length is variable; in
UTF-32 mode these are one-unit items. Those with "MIN" in their names are the
minimizing versions. Those with "POS" in their names are possessive versions.
Other repeats make use of these opcodes:
Caseful Caseless
OP_UPTO OP_UPTOI
OP_MINUPTO OP_MINUPTOI
OP_POSUPTO OP_POSUPTOI
OP_EXACT OP_EXACTI
Each of these is followed by a count and then the repeated character. OP_UPTO
matches from 0 to the given number. A repeat with a non-zero minimum and a
fixed maximum is coded as an OP_EXACT followed by an OP_UPTO (or OP_MINUPTO or
OPT_POSUPTO).
Another set of matching repeating opcodes (called OP_NOTSTAR, OP_NOTSTARI,
etc.) are used for repeated, negated, single-character classes such as [^a]*.
The normal single-character opcodes (OP_STAR, etc.) are used for repeated
positive single-character classes.
Repeating character types
-------------------------
Repeats of things like \d are done exactly as for single characters, except
that instead of a character, the opcode for the type is stored in the data
unit. The opcodes are:
OP_TYPESTAR
OP_TYPEMINSTAR
OP_TYPEPOSSTAR
OP_TYPEPLUS
OP_TYPEMINPLUS
OP_TYPEPOSPLUS
OP_TYPEQUERY
OP_TYPEMINQUERY
OP_TYPEPOSQUERY
OP_TYPEUPTO
OP_TYPEMINUPTO
OP_TYPEPOSUPTO
OP_TYPEEXACT
Match by Unicode property
-------------------------
OP_PROP and OP_NOTPROP are used for positive and negative matches of a
character by testing its Unicode property (the \p and \P escape sequences).
Each is followed by two units that encode the desired property as a type and a
value. The types are a set of #defines of the form PT_xxx, and the values are
enumerations of the form ucp_xx, defined in the ucp.h source file. The value is
relevant only for PT_GC (General Category), PT_PC (Particular Category), and
PT_SC (Script).
Repeats of these items use the OP_TYPESTAR etc. set of opcodes, followed by
three units: OP_PROP or OP_NOTPROP, and then the desired property type and
value.
Character classes
-----------------
If there is only one character in a class, OP_CHAR or OP_CHARI is used for a
positive class, and OP_NOT or OP_NOTI for a negative one (that is, for
something like [^a]).
A set of repeating opcodes (called OP_NOTSTAR etc.) are used for repeated,
negated, single-character classes. The normal single-character opcodes
(OP_STAR, etc.) are used for repeated positive single-character classes.
When there is more than one character in a class, and all the code points are
less than 256, OP_CLASS is used for a positive class, and OP_NCLASS for a
negative one. In either case, the opcode is followed by a 32-byte (16-short,
8-word) bit map containing a 1 bit for every character that is acceptable. The
bits are counted from the least significant end of each unit. In caseless mode,
bits for both cases are set.
The reason for having both OP_CLASS and OP_NCLASS is so that, in UTF-8/16/32
mode, subject characters with values greater than 255 can be handled correctly.
For OP_CLASS they do not match, whereas for OP_NCLASS they do.
For classes containing characters with values greater than 255 or that contain
\p or \P, OP_XCLASS is used. It optionally uses a bit map if any code points
are less than 256, followed by a list of pairs (for a range) and single
characters. In caseless mode, both cases are explicitly listed.
OP_XCLASS is followed by a unit containing flag bits: XCL_NOT indicates that
this is a negative class, and XCL_MAP indicates that a bit map is present.
There follows the bit map, if XCL_MAP is set, and then a sequence of items
coded as follows:
XCL_END marks the end of the list
XCL_SINGLE one character follows
XCL_RANGE two characters follow
XCL_PROP a Unicode property (type, value) follows
XCL_NOTPROP a Unicode property (type, value) follows
If a range starts with a code point less than 256 and ends with one greater
than 256, an XCL_RANGE item is used, without setting any bits in the bit map.
This means that if no other items in the class set bits in the map, a map is
not needed.
Back references
---------------
OP_REF (caseful) or OP_REFI (caseless) is followed by a count containing the
reference number if the reference is to a unique capturing group (either by
number or by name). When named groups are used, there may be more than one
group with the same name. In this case, a reference by name generates OP_DNREF
or OP_DNREFI. These are followed by two counts: the index (not the byte offset)
in the group name table of the first entry for the requred name, followed by
the number of groups with the same name.
Repeating character classes and back references
-----------------------------------------------
Single-character classes are handled specially (see above). This section
applies to other classes and also to back references. In both cases, the repeat
information follows the base item. The matching code looks at the following
opcode to see if it is one of
OP_CRSTAR
OP_CRMINSTAR
OP_CRPOSSTAR
OP_CRPLUS
OP_CRMINPLUS
OP_CRPOSPLUS
OP_CRQUERY
OP_CRMINQUERY
OP_CRPOSQUERY
OP_CRRANGE
OP_CRMINRANGE
OP_CRPOSRANGE
All but the last three are single-unit items, with no data. The others are
followed by the minimum and maximum repeat counts.
Brackets and alternation
------------------------
A pair of non-capturing round brackets is wrapped round each expression at
compile time, so alternation always happens in the context of brackets.
[Note for North Americans: "bracket" to some English speakers, including
myself, can be round, square, curly, or pointy. Hence this usage rather than
"parentheses".]
Non-capturing brackets use the opcode OP_BRA. Originally PCRE was limited to 99
capturing brackets and it used a different opcode for each one. From release
3.5, the limit was removed by putting the bracket number into the data for
higher-numbered brackets. From release 7.0 all capturing brackets are handled
this way, using the single opcode OP_CBRA.
A bracket opcode is followed by LINK_SIZE bytes which give the offset to the
next alternative OP_ALT or, if there aren't any branches, to the matching
OP_KET opcode. Each OP_ALT is followed by LINK_SIZE bytes giving the offset to
the next one, or to the OP_KET opcode. For capturing brackets, the bracket
number is a count that immediately follows the offset.
OP_KET is used for subpatterns that do not repeat indefinitely, and OP_KETRMIN
and OP_KETRMAX are used for indefinite repetitions, minimally or maximally
respectively (see below for possessive repetitions). All three are followed by
LINK_SIZE bytes giving (as a positive number) the offset back to the matching
bracket opcode.
If a subpattern is quantified such that it is permitted to match zero times, it
is preceded by one of OP_BRAZERO, OP_BRAMINZERO, or OP_SKIPZERO. These are
single-unit opcodes that tell the matcher that skipping the following
subpattern entirely is a valid branch. In the case of the first two, not
skipping the pattern is also valid (greedy and non-greedy). The third is used
when a pattern has the quantifier {0,0}. It cannot be entirely discarded,
because it may be called as a subroutine from elsewhere in the regex.
A subpattern with an indefinite maximum repetition is replicated in the
compiled data its minimum number of times (or once with OP_BRAZERO if the
minimum is zero), with the final copy terminating with OP_KETRMIN or OP_KETRMAX
as appropriate.
A subpattern with a bounded maximum repetition is replicated in a nested
fashion up to the maximum number of times, with OP_BRAZERO or OP_BRAMINZERO
before each replication after the minimum, so that, for example, (abc){2,5} is
compiled as (abc)(abc)((abc)((abc)(abc)?)?)?, except that each bracketed group
has the same number.
When a repeated subpattern has an unbounded upper limit, it is checked to see
whether it could match an empty string. If this is the case, the opcode in the
final replication is changed to OP_SBRA or OP_SCBRA. This tells the matcher
that it needs to check for matching an empty string when it hits OP_KETRMIN or
OP_KETRMAX, and if so, to break the loop.
Possessive brackets
-------------------
When a repeated group (capturing or non-capturing) is marked as possessive by
the "+" notation, e.g. (abc)++, different opcodes are used. Their names all
have POS on the end, e.g. OP_BRAPOS instead of OP_BRA and OP_SCPBRPOS instead
of OP_SCBRA. The end of such a group is marked by OP_KETRPOS. If the minimum
repetition is zero, the group is preceded by OP_BRAPOSZERO.
Once-only (atomic) groups
-------------------------
These are just like other subpatterns, but they start with the opcode
OP_ONCE or OP_ONCE_NC. The former is used when there are no capturing brackets
within the atomic group; the latter when there are. The distinction is needed
for when there is a backtrack to before the group - any captures within the
group must be reset, so it is necessary to retain backtracking points inside
the group even after it is complete in order to do this. When there are no
captures in an atomic group, all the backtracking can be discarded when it is
complete. This is more efficient, and also uses less stack.
The check for matching an empty string in an unbounded repeat is handled
entirely at runtime, so there are just these two opcodes for atomic groups.
Assertions
----------
Forward assertions are also just like other subpatterns, but starting with one
of the opcodes OP_ASSERT or OP_ASSERT_NOT. Backward assertions use the opcodes
OP_ASSERTBACK and OP_ASSERTBACK_NOT, and the first opcode inside the assertion
is OP_REVERSE, followed by a count of the number of characters to move back the
pointer in the subject string. In ASCII mode, the count is a number of units,
but in UTF-8/16 mode each character may occupy more than one unit; in UTF-32
mode each character occupies exactly one unit. A separate count is present in
each alternative of a lookbehind assertion, allowing them to have different
fixed lengths.
Conditional subpatterns
-----------------------
These are like other subpatterns, but they start with the opcode OP_COND, or
OP_SCOND for one that might match an empty string in an unbounded repeat. If
the condition is a back reference, this is stored at the start of the
subpattern using the opcode OP_CREF followed by a count containing the
reference number, provided that the reference is to a unique capturing group.
If the reference was by name and there is more than one group with that name,
OP_DNCREF is used instead. It is followed by two counts: the index in the group
names table, and the number of groups with the same name.
If the condition is "in recursion" (coded as "(?(R)"), or "in recursion of
group x" (coded as "(?(Rx)"), the group number is stored at the start of the
subpattern using the opcode OP_RREF (with a value of zero for "the whole
pattern") or OP_DNRREF (with data as for OP_DNCREF). For a DEFINE condition,
just the single unit OP_DEF is used (it has no associated data). Otherwise, a
conditional subpattern always starts with one of the assertions.
Recursion
---------
Recursion either matches the current regex, or some subexpression. The opcode
OP_RECURSE is followed by aLINK_SIZE value that is the offset to the starting
bracket from the start of the whole pattern. From release 6.5, OP_RECURSE is
automatically wrapped inside OP_ONCE brackets, because otherwise some patterns
broke it. OP_RECURSE is also used for "subroutine" calls, even though they are
not strictly a recursion.
Callout
-------
OP_CALLOUT is followed by one unit of data that holds a callout number in the
range 0 to 254 for manual callouts, or 255 for an automatic callout. In both
cases there follows a count giving the offset in the pattern string to the
start of the following item, and another count giving the length of this item.
These values make is possible for pcretest to output useful tracing information
using automatic callouts.
Philip Hazel
November 2013

368
INSTALL Normal file
View File

@ -0,0 +1,368 @@
Installation Instructions
*************************
Copyright (C) 1994-1996, 1999-2002, 2004-2016 Free Software
Foundation, Inc.
Copying and distribution of this file, with or without modification,
are permitted in any medium without royalty provided the copyright
notice and this notice are preserved. This file is offered as-is,
without warranty of any kind.
Basic Installation
==================
Briefly, the shell command './configure && make && make install'
should configure, build, and install this package. The following
more-detailed instructions are generic; see the 'README' file for
instructions specific to this package. Some packages provide this
'INSTALL' file but do not implement all of the features documented
below. The lack of an optional feature in a given package is not
necessarily a bug. More recommendations for GNU packages can be found
in *note Makefile Conventions: (standards)Makefile Conventions.
The 'configure' shell script attempts to guess correct values for
various system-dependent variables used during compilation. It uses
those values to create a 'Makefile' in each directory of the package.
It may also create one or more '.h' files containing system-dependent
definitions. Finally, it creates a shell script 'config.status' that
you can run in the future to recreate the current configuration, and a
file 'config.log' containing compiler output (useful mainly for
debugging 'configure').
It can also use an optional file (typically called 'config.cache' and
enabled with '--cache-file=config.cache' or simply '-C') that saves the
results of its tests to speed up reconfiguring. Caching is disabled by
default to prevent problems with accidental use of stale cache files.
If you need to do unusual things to compile the package, please try
to figure out how 'configure' could check whether to do them, and mail
diffs or instructions to the address given in the 'README' so they can
be considered for the next release. If you are using the cache, and at
some point 'config.cache' contains results you don't want to keep, you
may remove or edit it.
The file 'configure.ac' (or 'configure.in') is used to create
'configure' by a program called 'autoconf'. You need 'configure.ac' if
you want to change it or regenerate 'configure' using a newer version of
'autoconf'.
The simplest way to compile this package is:
1. 'cd' to the directory containing the package's source code and type
'./configure' to configure the package for your system.
Running 'configure' might take a while. While running, it prints
some messages telling which features it is checking for.
2. Type 'make' to compile the package.
3. Optionally, type 'make check' to run any self-tests that come with
the package, generally using the just-built uninstalled binaries.
4. Type 'make install' to install the programs and any data files and
documentation. When installing into a prefix owned by root, it is
recommended that the package be configured and built as a regular
user, and only the 'make install' phase executed with root
privileges.
5. Optionally, type 'make installcheck' to repeat any self-tests, but
this time using the binaries in their final installed location.
This target does not install anything. Running this target as a
regular user, particularly if the prior 'make install' required
root privileges, verifies that the installation completed
correctly.
6. You can remove the program binaries and object files from the
source code directory by typing 'make clean'. To also remove the
files that 'configure' created (so you can compile the package for
a different kind of computer), type 'make distclean'. There is
also a 'make maintainer-clean' target, but that is intended mainly
for the package's developers. If you use it, you may have to get
all sorts of other programs in order to regenerate files that came
with the distribution.
7. Often, you can also type 'make uninstall' to remove the installed
files again. In practice, not all packages have tested that
uninstallation works correctly, even though it is required by the
GNU Coding Standards.
8. Some packages, particularly those that use Automake, provide 'make
distcheck', which can by used by developers to test that all other
targets like 'make install' and 'make uninstall' work correctly.
This target is generally not run by end users.
Compilers and Options
=====================
Some systems require unusual options for compilation or linking that
the 'configure' script does not know about. Run './configure --help'
for details on some of the pertinent environment variables.
You can give 'configure' initial values for configuration parameters
by setting variables in the command line or in the environment. Here is
an example:
./configure CC=c99 CFLAGS=-g LIBS=-lposix
*Note Defining Variables::, for more details.
Compiling For Multiple Architectures
====================================
You can compile the package for more than one kind of computer at the
same time, by placing the object files for each architecture in their
own directory. To do this, you can use GNU 'make'. 'cd' to the
directory where you want the object files and executables to go and run
the 'configure' script. 'configure' automatically checks for the source
code in the directory that 'configure' is in and in '..'. This is known
as a "VPATH" build.
With a non-GNU 'make', it is safer to compile the package for one
architecture at a time in the source code directory. After you have
installed the package for one architecture, use 'make distclean' before
reconfiguring for another architecture.
On MacOS X 10.5 and later systems, you can create libraries and
executables that work on multiple system types--known as "fat" or
"universal" binaries--by specifying multiple '-arch' options to the
compiler but only a single '-arch' option to the preprocessor. Like
this:
./configure CC="gcc -arch i386 -arch x86_64 -arch ppc -arch ppc64" \
CXX="g++ -arch i386 -arch x86_64 -arch ppc -arch ppc64" \
CPP="gcc -E" CXXCPP="g++ -E"
This is not guaranteed to produce working output in all cases, you
may have to build one architecture at a time and combine the results
using the 'lipo' tool if you have problems.
Installation Names
==================
By default, 'make install' installs the package's commands under
'/usr/local/bin', include files under '/usr/local/include', etc. You
can specify an installation prefix other than '/usr/local' by giving
'configure' the option '--prefix=PREFIX', where PREFIX must be an
absolute file name.
You can specify separate installation prefixes for
architecture-specific files and architecture-independent files. If you
pass the option '--exec-prefix=PREFIX' to 'configure', the package uses
PREFIX as the prefix for installing programs and libraries.
Documentation and other data files still use the regular prefix.
In addition, if you use an unusual directory layout you can give
options like '--bindir=DIR' to specify different values for particular
kinds of files. Run 'configure --help' for a list of the directories
you can set and what kinds of files go in them. In general, the default
for these options is expressed in terms of '${prefix}', so that
specifying just '--prefix' will affect all of the other directory
specifications that were not explicitly provided.
The most portable way to affect installation locations is to pass the
correct locations to 'configure'; however, many packages provide one or
both of the following shortcuts of passing variable assignments to the
'make install' command line to change installation locations without
having to reconfigure or recompile.
The first method involves providing an override variable for each
affected directory. For example, 'make install
prefix=/alternate/directory' will choose an alternate location for all
directory configuration variables that were expressed in terms of
'${prefix}'. Any directories that were specified during 'configure',
but not in terms of '${prefix}', must each be overridden at install time
for the entire installation to be relocated. The approach of makefile
variable overrides for each directory variable is required by the GNU
Coding Standards, and ideally causes no recompilation. However, some
platforms have known limitations with the semantics of shared libraries
that end up requiring recompilation when using this method, particularly
noticeable in packages that use GNU Libtool.
The second method involves providing the 'DESTDIR' variable. For
example, 'make install DESTDIR=/alternate/directory' will prepend
'/alternate/directory' before all installation names. The approach of
'DESTDIR' overrides is not required by the GNU Coding Standards, and
does not work on platforms that have drive letters. On the other hand,
it does better at avoiding recompilation issues, and works well even
when some directory options were not specified in terms of '${prefix}'
at 'configure' time.
Optional Features
=================
If the package supports it, you can cause programs to be installed
with an extra prefix or suffix on their names by giving 'configure' the
option '--program-prefix=PREFIX' or '--program-suffix=SUFFIX'.
Some packages pay attention to '--enable-FEATURE' options to
'configure', where FEATURE indicates an optional part of the package.
They may also pay attention to '--with-PACKAGE' options, where PACKAGE
is something like 'gnu-as' or 'x' (for the X Window System). The
'README' should mention any '--enable-' and '--with-' options that the
package recognizes.
For packages that use the X Window System, 'configure' can usually
find the X include and library files automatically, but if it doesn't,
you can use the 'configure' options '--x-includes=DIR' and
'--x-libraries=DIR' to specify their locations.
Some packages offer the ability to configure how verbose the
execution of 'make' will be. For these packages, running './configure
--enable-silent-rules' sets the default to minimal output, which can be
overridden with 'make V=1'; while running './configure
--disable-silent-rules' sets the default to verbose, which can be
overridden with 'make V=0'.
Particular systems
==================
On HP-UX, the default C compiler is not ANSI C compatible. If GNU CC
is not installed, it is recommended to use the following options in
order to use an ANSI C compiler:
./configure CC="cc -Ae -D_XOPEN_SOURCE=500"
and if that doesn't work, install pre-built binaries of GCC for HP-UX.
HP-UX 'make' updates targets which have the same time stamps as their
prerequisites, which makes it generally unusable when shipped generated
files such as 'configure' are involved. Use GNU 'make' instead.
On OSF/1 a.k.a. Tru64, some versions of the default C compiler cannot
parse its '<wchar.h>' header file. The option '-nodtk' can be used as a
workaround. If GNU CC is not installed, it is therefore recommended to
try
./configure CC="cc"
and if that doesn't work, try
./configure CC="cc -nodtk"
On Solaris, don't put '/usr/ucb' early in your 'PATH'. This
directory contains several dysfunctional programs; working variants of
these programs are available in '/usr/bin'. So, if you need '/usr/ucb'
in your 'PATH', put it _after_ '/usr/bin'.
On Haiku, software installed for all users goes in '/boot/common',
not '/usr/local'. It is recommended to use the following options:
./configure --prefix=/boot/common
Specifying the System Type
==========================
There may be some features 'configure' cannot figure out
automatically, but needs to determine by the type of machine the package
will run on. Usually, assuming the package is built to be run on the
_same_ architectures, 'configure' can figure that out, but if it prints
a message saying it cannot guess the machine type, give it the
'--build=TYPE' option. TYPE can either be a short name for the system
type, such as 'sun4', or a canonical name which has the form:
CPU-COMPANY-SYSTEM
where SYSTEM can have one of these forms:
OS
KERNEL-OS
See the file 'config.sub' for the possible values of each field. If
'config.sub' isn't included in this package, then this package doesn't
need to know the machine type.
If you are _building_ compiler tools for cross-compiling, you should
use the option '--target=TYPE' to select the type of system they will
produce code for.
If you want to _use_ a cross compiler, that generates code for a
platform different from the build platform, you should specify the
"host" platform (i.e., that on which the generated programs will
eventually be run) with '--host=TYPE'.
Sharing Defaults
================
If you want to set default values for 'configure' scripts to share,
you can create a site shell script called 'config.site' that gives
default values for variables like 'CC', 'cache_file', and 'prefix'.
'configure' looks for 'PREFIX/share/config.site' if it exists, then
'PREFIX/etc/config.site' if it exists. Or, you can set the
'CONFIG_SITE' environment variable to the location of the site script.
A warning: not all 'configure' scripts look for a site script.
Defining Variables
==================
Variables not defined in a site shell script can be set in the
environment passed to 'configure'. However, some packages may run
configure again during the build, and the customized values of these
variables may be lost. In order to avoid this problem, you should set
them in the 'configure' command line, using 'VAR=value'. For example:
./configure CC=/usr/local2/bin/gcc
causes the specified 'gcc' to be used as the C compiler (unless it is
overridden in the site shell script).
Unfortunately, this technique does not work for 'CONFIG_SHELL' due to an
Autoconf limitation. Until the limitation is lifted, you can use this
workaround:
CONFIG_SHELL=/bin/bash ./configure CONFIG_SHELL=/bin/bash
'configure' Invocation
======================
'configure' recognizes the following options to control how it
operates.
'--help'
'-h'
Print a summary of all of the options to 'configure', and exit.
'--help=short'
'--help=recursive'
Print a summary of the options unique to this package's
'configure', and exit. The 'short' variant lists options used only
in the top level, while the 'recursive' variant lists options also
present in any nested packages.
'--version'
'-V'
Print the version of Autoconf used to generate the 'configure'
script, and exit.
'--cache-file=FILE'
Enable the cache: use and save the results of the tests in FILE,
traditionally 'config.cache'. FILE defaults to '/dev/null' to
disable caching.
'--config-cache'
'-C'
Alias for '--cache-file=config.cache'.
'--quiet'
'--silent'
'-q'
Do not print messages saying which checks are being made. To
suppress all normal output, redirect it to '/dev/null' (any error
messages will still be shown).
'--srcdir=DIR'
Look for the package's source code in directory DIR. Usually
'configure' can determine that directory automatically.
'--prefix=DIR'
Use DIR as the installation prefix. *note Installation Names:: for
more details, including other options available for fine-tuning the
installation locations.
'--no-create'
'-n'
Run the configure checks, but stop before creating any output
files.
'configure' also accepts some other, not widely useful, options. Run
'configure --help' for more details.

93
LICENCE Normal file
View File

@ -0,0 +1,93 @@
PCRE LICENCE
------------
PCRE is a library of functions to support regular expressions whose syntax
and semantics are as close as possible to those of the Perl 5 language.
Release 8 of PCRE is distributed under the terms of the "BSD" licence, as
specified below. The documentation for PCRE, supplied in the "doc"
directory, is distributed under the same terms as the software itself. The data
in the testdata directory is not copyrighted and is in the public domain.
The basic library functions are written in C and are freestanding. Also
included in the distribution is a set of C++ wrapper functions, and a
just-in-time compiler that can be used to optimize pattern matching. These
are both optional features that can be omitted when the library is built.
THE BASIC LIBRARY FUNCTIONS
---------------------------
Written by: Philip Hazel
Email local part: Philip.Hazel
Email domain: gmail.com
University of Cambridge Computing Service,
Cambridge, England.
Copyright (c) 1997-2021 University of Cambridge
All rights reserved.
PCRE JUST-IN-TIME COMPILATION SUPPORT
-------------------------------------
Written by: Zoltan Herczeg
Email local part: hzmester
Email domain: freemail.hu
Copyright(c) 2010-2021 Zoltan Herczeg
All rights reserved.
STACK-LESS JUST-IN-TIME COMPILER
--------------------------------
Written by: Zoltan Herczeg
Email local part: hzmester
Email domain: freemail.hu
Copyright(c) 2009-2021 Zoltan Herczeg
All rights reserved.
THE C++ WRAPPER FUNCTIONS
-------------------------
Contributed by: Google Inc.
Copyright (c) 2007-2012, Google Inc.
All rights reserved.
THE "BSD" LICENCE
-----------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the University of Cambridge nor the name of Google
Inc. nor the names of their contributors may be used to endorse or
promote products derived from this software without specific prior
written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
End

895
Makefile.am Normal file
View File

@ -0,0 +1,895 @@
## Process this file with automake to produce Makefile.in.
ACLOCAL_AMFLAGS = -I m4
dist_doc_DATA = \
doc/pcre.txt \
doc/pcre-config.txt \
doc/pcregrep.txt \
doc/pcretest.txt \
AUTHORS \
COPYING \
ChangeLog \
LICENCE \
NEWS \
README
# Note that pcrecpp.html is not in this list; it is listed separately below.
dist_html_DATA = \
doc/html/NON-AUTOTOOLS-BUILD.txt \
doc/html/README.txt \
doc/html/index.html \
doc/html/pcre-config.html \
doc/html/pcre.html \
doc/html/pcre16.html \
doc/html/pcre32.html \
doc/html/pcre_assign_jit_stack.html \
doc/html/pcre_compile.html \
doc/html/pcre_compile2.html \
doc/html/pcre_config.html \
doc/html/pcre_copy_named_substring.html \
doc/html/pcre_copy_substring.html \
doc/html/pcre_dfa_exec.html \
doc/html/pcre_exec.html \
doc/html/pcre_free_study.html \
doc/html/pcre_free_substring.html \
doc/html/pcre_free_substring_list.html \
doc/html/pcre_fullinfo.html \
doc/html/pcre_get_named_substring.html \
doc/html/pcre_get_stringnumber.html \
doc/html/pcre_get_stringtable_entries.html \
doc/html/pcre_get_substring.html \
doc/html/pcre_get_substring_list.html \
doc/html/pcre_jit_exec.html \
doc/html/pcre_jit_stack_alloc.html \
doc/html/pcre_jit_stack_free.html \
doc/html/pcre_maketables.html \
doc/html/pcre_pattern_to_host_byte_order.html \
doc/html/pcre_refcount.html \
doc/html/pcre_study.html \
doc/html/pcre_utf16_to_host_byte_order.html \
doc/html/pcre_utf32_to_host_byte_order.html \
doc/html/pcre_version.html \
doc/html/pcreapi.html \
doc/html/pcrebuild.html \
doc/html/pcrecallout.html \
doc/html/pcrecompat.html \
doc/html/pcredemo.html \
doc/html/pcregrep.html \
doc/html/pcrejit.html \
doc/html/pcrelimits.html \
doc/html/pcrematching.html \
doc/html/pcrepartial.html \
doc/html/pcrepattern.html \
doc/html/pcreperform.html \
doc/html/pcreposix.html \
doc/html/pcreprecompile.html \
doc/html/pcresample.html \
doc/html/pcrestack.html \
doc/html/pcresyntax.html \
doc/html/pcretest.html \
doc/html/pcreunicode.html
pcrecpp_html = doc/html/pcrecpp.html
dist_noinst_DATA = $(pcrecpp_html)
if WITH_PCRE_CPP
html_DATA = $(pcrecpp_html)
endif
# The Libtool libraries to install. We'll add to this later.
lib_LTLIBRARIES =
# Unit tests you want to run when people type 'make check'.
# TESTS is for binary unit tests, check_SCRIPTS for script-based tests
TESTS =
check_SCRIPTS =
dist_noinst_SCRIPTS =
# Some of the binaries we make are to be installed, and others are
# (non-user-visible) helper programs needed to build libpcre, libpcre16
# or libpcre32.
bin_PROGRAMS =
noinst_PROGRAMS =
# Additional files to delete on 'make clean' and 'make maintainer-clean'.
CLEANFILES =
MAINTAINERCLEANFILES =
# Additional files to bundle with the distribution, over and above what
# the Autotools include by default.
EXTRA_DIST =
# These files contain additional m4 macros that are used by autoconf.
EXTRA_DIST += \
m4/ax_pthread.m4 m4/pcre_visibility.m4
# These files contain maintenance information
EXTRA_DIST += \
doc/perltest.txt \
NON-UNIX-USE \
NON-AUTOTOOLS-BUILD \
HACKING
# These files are used in the preparation of a release
EXTRA_DIST += \
PrepareRelease \
CheckMan \
CleanTxt \
Detrail \
132html \
doc/index.html.src
# These files are to do with building for Virtual Pascal
EXTRA_DIST += \
makevp.bat \
makevp_c.txt \
makevp_l.txt \
pcregexp.pas
# These files are usable versions of pcre.h and config.h that are distributed
# for the benefit of people who are building PCRE manually, without the
# Autotools support.
EXTRA_DIST += \
pcre.h.generic \
config.h.generic
# The only difference between pcre.h.in and pcre.h is the setting of the PCRE
# version number. Therefore, we can create the generic version just by copying.
pcre.h.generic: pcre.h.in configure.ac
rm -f $@
cp -p pcre.h $@
# It is more complicated for config.h.generic. We need the version that results
# from a default configuration so as to get all the default values for PCRE
# configuration macros such as MATCH_LIMIT and NEWLINE. We can get this by
# doing a configure in a temporary directory. However, some trickery is needed,
# because the source directory may already be configured. If you just try
# running configure in a new directory, it complains. For this reason, we move
# config.status out of the way while doing the default configuration. The
# resulting config.h is munged by perl to put #ifdefs round any #defines for
# macros with values, and to #undef all boolean macros such as HAVE_xxx and
# SUPPORT_xxx. We also get rid of any gcc-specific visibility settings. Make
# sure that PCRE_EXP_DEFN is unset (in case it has visibility settings).
config.h.generic: configure.ac
rm -rf $@ _generic
mkdir _generic
cs=$(srcdir)/config.status; test ! -f $$cs || mv -f $$cs $$cs.aside
cd _generic && $(abs_top_srcdir)/configure || :
cs=$(srcdir)/config.status; test ! -f $$cs.aside || mv -f $$cs.aside $$cs
test -f _generic/config.h
perl -n \
-e 'BEGIN{$$blank=0;}' \
-e 'if(/PCRE_EXP_DEFN/){print"/* #undef PCRE_EXP_DEFN */\n";$$blank=0;next;}' \
-e 'if(/to make a symbol visible/){next;}' \
-e 'if(/__attribute__ \(\(visibility/){next;}' \
-e 'if(/LT_OBJDIR/){print"/* This is ignored unless you are using libtool. */\n";}' \
-e 'if(/^#define\s((?:HAVE|SUPPORT|STDC)_\w+)/){print"/* #undef $$1 */\n";$$blank=0;next;}' \
-e 'if(/^#define\s(?!PACKAGE|VERSION)(\w+)/){print"#ifndef $$1\n$$_#endif\n";$$blank=0;next;}' \
-e 'if(/^\s*$$/){print unless $$blank; $$blank=1;} else{print;$$blank=0;}' \
_generic/config.h >$@
rm -rf _generic
MAINTAINERCLEANFILES += pcre.h.generic config.h.generic
# These are the header files we'll install. We do not distribute pcre.h because
# it is generated from pcre.h.in.
nodist_include_HEADERS = \
pcre.h
include_HEADERS = \
pcreposix.h
# These additional headers will be be installed if C++ support is enabled. We
# do not distribute pcrecpparg.h or pcre_stringpiece.h, as these are generated
# from corresponding .h.in files (which we do distribute).
if WITH_PCRE_CPP
nodist_include_HEADERS += \
pcrecpparg.h \
pcre_stringpiece.h
include_HEADERS += \
pcrecpp.h \
pcre_scanner.h
endif # WITH_PCRE_CPP
bin_SCRIPTS = pcre-config
## ---------------------------------------------------------------
## The dftables program is used to rebuild character tables before compiling
## PCRE, if --enable-rebuild-chartables is specified. It is not a user-visible
## program. The default (when --enable-rebuild-chartables is not specified) is
## to copy a distributed set of tables that are defined for ASCII code. In this
## case, dftables is not needed.
if WITH_REBUILD_CHARTABLES
noinst_PROGRAMS += dftables
dftables_SOURCES = dftables.c
pcre_chartables.c: dftables$(EXEEXT)
./dftables$(EXEEXT) $@
else
pcre_chartables.c: $(srcdir)/pcre_chartables.c.dist
rm -f $@
$(LN_S) $(srcdir)/pcre_chartables.c.dist $@