Official tarball pcre-8.45.tar.gz
This commit is contained in:
commit
0c5004f693
313
132html
Executable file
313
132html
Executable file
@ -0,0 +1,313 @@
|
||||
#! /usr/bin/perl -w
|
||||
|
||||
# Script to turn PCRE man pages into HTML
|
||||
|
||||
|
||||
# Subroutine to handle font changes and other escapes
|
||||
|
||||
sub do_line {
|
||||
my($s) = $_[0];
|
||||
|
||||
$s =~ s/</</g; # Deal with < and >
|
||||
$s =~ s/>/>/g;
|
||||
$s =~ s"\\fI(.*?)\\f[RP]"<i>$1</i>"g;
|
||||
$s =~ s"\\fB(.*?)\\f[RP]"<b>$1</b>"g;
|
||||
$s =~ s"\\e"\\"g;
|
||||
$s =~ s/(?<=Copyright )\(c\)/©/g;
|
||||
$s;
|
||||
}
|
||||
|
||||
# Subroutine to ensure not in a paragraph
|
||||
|
||||
sub end_para {
|
||||
if ($inpara)
|
||||
{
|
||||
print TEMP "</PRE>\n" if ($inpre);
|
||||
print TEMP "</P>\n";
|
||||
}
|
||||
$inpara = $inpre = 0;
|
||||
$wrotetext = 0;
|
||||
}
|
||||
|
||||
# Subroutine to start a new paragraph
|
||||
|
||||
sub new_para {
|
||||
&end_para();
|
||||
print TEMP "<P>\n";
|
||||
$inpara = 1;
|
||||
}
|
||||
|
||||
|
||||
# Main program
|
||||
|
||||
$innf = 0;
|
||||
$inpara = 0;
|
||||
$inpre = 0;
|
||||
$wrotetext = 0;
|
||||
$toc = 0;
|
||||
$ref = 1;
|
||||
|
||||
while ($#ARGV >= 0 && $ARGV[0] =~ /^-/)
|
||||
{
|
||||
$toc = 1 if $ARGV[0] eq "-toc";
|
||||
shift;
|
||||
}
|
||||
|
||||
# Initial output to STDOUT
|
||||
|
||||
print <<End ;
|
||||
<html>
|
||||
<head>
|
||||
<title>$ARGV[0] specification</title>
|
||||
</head>
|
||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
|
||||
<h1>$ARGV[0] man page</h1>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE index page</a>.
|
||||
</p>
|
||||
<p>
|
||||
This page is part of the PCRE HTML documentation. It was generated automatically
|
||||
from the original man page. If there is any nonsense in it, please consult the
|
||||
man page, in case the conversion went wrong.
|
||||
<br>
|
||||
End
|
||||
|
||||
print "<ul>\n" if ($toc);
|
||||
|
||||
open(TEMP, ">/tmp/$$") || die "Can't open /tmp/$$ for output\n";
|
||||
|
||||
while (<STDIN>)
|
||||
{
|
||||
# Handle lines beginning with a dot
|
||||
|
||||
if (/^\./)
|
||||
{
|
||||
# Some of the PCRE man pages used to contain instances of .br. However,
|
||||
# they should have all been removed because they cause trouble in some
|
||||
# (other) automated systems that translate man pages to HTML. Complain if
|
||||
# we find .br or .in (another macro that is deprecated).
|
||||
|
||||
if (/^\.br/ || /^\.in/)
|
||||
{
|
||||
print STDERR "\n*** Deprecated macro encountered - rewrite needed\n";
|
||||
print STDERR "*** $_\n";
|
||||
die "*** Processing abandoned\n";
|
||||
}
|
||||
|
||||
# Instead of .br, relevent "literal" sections are enclosed in .nf/.fi.
|
||||
|
||||
elsif (/^\.nf/)
|
||||
{
|
||||
$innf = 1;
|
||||
}
|
||||
|
||||
elsif (/^\.fi/)
|
||||
{
|
||||
$innf = 0;
|
||||
}
|
||||
|
||||
# Handling .sp is subtle. If it is inside a literal section, do nothing if
|
||||
# the next line is a non literal text line; similarly, if not inside a
|
||||
# literal section, do nothing if a literal follows, unless we are inside
|
||||
# a .nf/.ne section. The point being that the <pre> and </pre> that delimit
|
||||
# literal sections will do the spacing. Always skip if no previous output.
|
||||
|
||||
elsif (/^\.sp/)
|
||||
{
|
||||
if ($wrotetext)
|
||||
{
|
||||
$_ = <STDIN>;
|
||||
if ($inpre)
|
||||
{
|
||||
print TEMP "\n" if (/^[\s.]/);
|
||||
}
|
||||
else
|
||||
{
|
||||
print TEMP "<br>\n<br>\n" if ($innf || !/^[\s.]/);
|
||||
}
|
||||
redo; # Now process the lookahead line we just read
|
||||
}
|
||||
}
|
||||
elsif (/^\.TP/ || /^\.PP/ || /^\.P/)
|
||||
{
|
||||
&new_para();
|
||||
}
|
||||
elsif (/^\.SH\s*("?)(.*)\1/)
|
||||
{
|
||||
# Ignore the NAME section
|
||||
if ($2 =~ /^NAME\b/)
|
||||
{
|
||||
<STDIN>;
|
||||
next;
|
||||
}
|
||||
|
||||
&end_para();
|
||||
my($title) = &do_line($2);
|
||||
if ($toc)
|
||||
{
|
||||
printf("<li><a name=\"TOC%d\" href=\"#SEC%d\">$title</a>\n",
|
||||
$ref, $ref);
|
||||
printf TEMP ("<br><a name=\"SEC%d\" href=\"#TOC1\">$title</a><br>\n",
|
||||
$ref);
|
||||
$ref++;
|
||||
}
|
||||
else
|
||||
{
|
||||
print TEMP "<br><b>\n$title\n</b><br>\n";
|
||||
}
|
||||
}
|
||||
elsif (/^\.SS\s*("?)(.*)\1/)
|
||||
{
|
||||
&end_para();
|
||||
my($title) = &do_line($2);
|
||||
print TEMP "<br><b>\n$title\n</b><br>\n";
|
||||
}
|
||||
elsif (/^\.B\s*(.*)/)
|
||||
{
|
||||
&new_para() if (!$inpara);
|
||||
$_ = &do_line($1);
|
||||
s/"(.*?)"/$1/g;
|
||||
print TEMP "<b>$_</b>\n";
|
||||
$wrotetext = 1;
|
||||
}
|
||||
elsif (/^\.I\s*(.*)/)
|
||||
{
|
||||
&new_para() if (!$inpara);
|
||||
$_ = &do_line($1);
|
||||
s/"(.*?)"/$1/g;
|
||||
print TEMP "<i>$_</i>\n";
|
||||
$wrotetext = 1;
|
||||
}
|
||||
|
||||
# A comment that starts "HREF" takes the next line as a name that
|
||||
# is turned into a hyperlink, using the text given, which might be
|
||||
# in a special font. If it ends in () or (digits) or punctuation, they
|
||||
# aren't part of the link.
|
||||
|
||||
elsif (/^\.\\"\s*HREF/)
|
||||
{
|
||||
$_=<STDIN>;
|
||||
chomp;
|
||||
$_ = &do_line($_);
|
||||
$_ =~ s/\s+$//;
|
||||
$_ =~ /^(?:<.>)?([^<(]+)(?:\(\))?(?:<\/.>)?(?:\(\d+\))?[.,;:]?$/;
|
||||
print TEMP "<a href=\"$1.html\">$_</a>\n";
|
||||
}
|
||||
|
||||
# A comment that starts "HTML" inserts literal HTML
|
||||
|
||||
elsif (/^\.\\"\s*HTML\s*(.*)/)
|
||||
{
|
||||
print TEMP $1;
|
||||
}
|
||||
|
||||
# A comment that starts < inserts that HTML at the end of the
|
||||
# *next* input line - so as not to get a newline between them.
|
||||
|
||||
elsif (/^\.\\"\s*(<.*>)/)
|
||||
{
|
||||
my($markup) = $1;
|
||||
$_=<STDIN>;
|
||||
chomp;
|
||||
$_ = &do_line($_);
|
||||
$_ =~ s/\s+$//;
|
||||
print TEMP "$_$markup\n";
|
||||
}
|
||||
|
||||
# A comment that starts JOIN joins the next two lines together, with one
|
||||
# space between them. Then that line is processed. This is used in some
|
||||
# displays where two lines are needed for the "man" version. JOINSH works
|
||||
# the same, except that it assumes this is a shell command, so removes
|
||||
# continuation backslashes.
|
||||
|
||||
elsif (/^\.\\"\s*JOIN(SH)?/)
|
||||
{
|
||||
my($one,$two);
|
||||
$one = <STDIN>;
|
||||
$two = <STDIN>;
|
||||
$one =~ s/\s*\\e\s*$// if (defined($1));
|
||||
chomp($one);
|
||||
$two =~ s/^\s+//;
|
||||
$_ = "$one $two";
|
||||
redo; # Process the joined lines
|
||||
}
|
||||
|
||||
# .EX/.EE are used in the pcredemo page to bracket the entire program,
|
||||
# which is unmodified except for turning backslash into "\e".
|
||||
|
||||
elsif (/^\.EX\s*$/)
|
||||
{
|
||||
print TEMP "<PRE>\n";
|
||||
while (<STDIN>)
|
||||
{
|
||||
last if /^\.EE\s*$/;
|
||||
s/\\e/\\/g;
|
||||
s/&/&/g;
|
||||
s/</</g;
|
||||
s/>/>/g;
|
||||
print TEMP;
|
||||
}
|
||||
}
|
||||
|
||||
# Ignore anything not recognized
|
||||
|
||||
next;
|
||||
}
|
||||
|
||||
# Line does not begin with a dot. Replace blank lines with new paragraphs
|
||||
|
||||
if (/^\s*$/)
|
||||
{
|
||||
&end_para() if ($wrotetext);
|
||||
next;
|
||||
}
|
||||
|
||||
# Convert fonts changes and output an ordinary line. Ensure that indented
|
||||
# lines are marked as literal.
|
||||
|
||||
$_ = &do_line($_);
|
||||
&new_para() if (!$inpara);
|
||||
|
||||
if (/^\s/)
|
||||
{
|
||||
if (!$inpre)
|
||||
{
|
||||
print TEMP "<pre>\n";
|
||||
$inpre = 1;
|
||||
}
|
||||
}
|
||||
elsif ($inpre)
|
||||
{
|
||||
print TEMP "</pre>\n";
|
||||
$inpre = 0;
|
||||
}
|
||||
|
||||
# Add <br> to the end of a non-literal line if we are within .nf/.fi
|
||||
|
||||
$_ .= "<br>\n" if (!$inpre && $innf);
|
||||
|
||||
print TEMP;
|
||||
$wrotetext = 1;
|
||||
}
|
||||
|
||||
# The TOC, if present, will have been written - terminate it
|
||||
|
||||
print "</ul>\n" if ($toc);
|
||||
|
||||
# Copy the remainder to the standard output
|
||||
|
||||
close(TEMP);
|
||||
open(TEMP, "/tmp/$$") || die "Can't open /tmp/$$ for input\n";
|
||||
|
||||
print while (<TEMP>);
|
||||
|
||||
print <<End ;
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE index page</a>.
|
||||
</p>
|
||||
End
|
||||
|
||||
close(TEMP);
|
||||
unlink("/tmp/$$");
|
||||
|
||||
# End
|
45
AUTHORS
Normal file
45
AUTHORS
Normal file
@ -0,0 +1,45 @@
|
||||
THE MAIN PCRE LIBRARY
|
||||
---------------------
|
||||
|
||||
Written by: Philip Hazel
|
||||
Email local part: Philip.Hazel
|
||||
Email domain: gmail.com
|
||||
|
||||
University of Cambridge Computing Service,
|
||||
Cambridge, England.
|
||||
|
||||
Copyright (c) 1997-2021 University of Cambridge
|
||||
All rights reserved
|
||||
|
||||
|
||||
PCRE JUST-IN-TIME COMPILATION SUPPORT
|
||||
-------------------------------------
|
||||
|
||||
Written by: Zoltan Herczeg
|
||||
Email local part: hzmester
|
||||
Emain domain: freemail.hu
|
||||
|
||||
Copyright(c) 2010-2021 Zoltan Herczeg
|
||||
All rights reserved.
|
||||
|
||||
|
||||
STACK-LESS JUST-IN-TIME COMPILER
|
||||
--------------------------------
|
||||
|
||||
Written by: Zoltan Herczeg
|
||||
Email local part: hzmester
|
||||
Emain domain: freemail.hu
|
||||
|
||||
Copyright(c) 2009-2021 Zoltan Herczeg
|
||||
All rights reserved.
|
||||
|
||||
|
||||
THE C++ WRAPPER LIBRARY
|
||||
-----------------------
|
||||
|
||||
Written by: Google Inc.
|
||||
|
||||
Copyright (c) 2007-2012 Google Inc
|
||||
All rights reserved
|
||||
|
||||
####
|
1067
CMakeLists.txt
Normal file
1067
CMakeLists.txt
Normal file
File diff suppressed because it is too large
Load Diff
5
COPYING
Normal file
5
COPYING
Normal file
@ -0,0 +1,5 @@
|
||||
PCRE LICENCE
|
||||
|
||||
Please see the file LICENCE in the PCRE distribution for licensing details.
|
||||
|
||||
End
|
67
CheckMan
Executable file
67
CheckMan
Executable file
@ -0,0 +1,67 @@
|
||||
#! /usr/bin/perl
|
||||
|
||||
# A script to scan PCRE's man pages to check for typos in the control
|
||||
# sequences. I use only a small set of the available repertoire, so it is
|
||||
# straightforward to check that nothing else has slipped in by mistake. This
|
||||
# script should be called in the doc directory.
|
||||
|
||||
$yield = 0;
|
||||
|
||||
while (scalar(@ARGV) > 0)
|
||||
{
|
||||
$line = 0;
|
||||
$file = shift @ARGV;
|
||||
|
||||
open (IN, $file) || die "Failed to open $file\n";
|
||||
|
||||
while (<IN>)
|
||||
{
|
||||
$line++;
|
||||
if (/^\s*$/)
|
||||
{
|
||||
printf "Empty line $line of $file\n";
|
||||
$yield = 1;
|
||||
}
|
||||
elsif (/^\./)
|
||||
{
|
||||
if (!/^\.\s*$|
|
||||
^\.B\s+\S|
|
||||
^\.TH\s\S|
|
||||
^\.SH\s\S|
|
||||
^\.SS\s\S|
|
||||
^\.TP(?:\s?\d+)?\s*$|
|
||||
^\.SM\s*$|
|
||||
^\.br\s*$|
|
||||
^\.rs\s*$|
|
||||
^\.sp\s*$|
|
||||
^\.nf\s*$|
|
||||
^\.fi\s*$|
|
||||
^\.P\s*$|
|
||||
^\.PP\s*$|
|
||||
^\.\\"(?:\ HREF)?\s*$|
|
||||
^\.\\"\sHTML\s<a\shref="[^"]+?">\s*$|
|
||||
^\.\\"\sHTML\s<a\sname="[^"]+?"><\/a>\s*$|
|
||||
^\.\\"\s<\/a>\s*$|
|
||||
^\.\\"\sJOINSH\s*$|
|
||||
^\.\\"\sJOIN\s*$/x
|
||||
)
|
||||
{
|
||||
printf "Bad control line $line of $file\n";
|
||||
$yield = 1;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (/\\[^ef]|\\f[^IBP]/)
|
||||
{
|
||||
printf "Bad backslash in line $line of $file\n";
|
||||
$yield = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
close(IN);
|
||||
}
|
||||
|
||||
exit $yield;
|
||||
# End
|
113
CleanTxt
Executable file
113
CleanTxt
Executable file
@ -0,0 +1,113 @@
|
||||
#! /usr/bin/perl -w
|
||||
|
||||
# Script to take the output of nroff -man and remove all the backspacing and
|
||||
# the page footers and the screen commands etc so that it is more usefully
|
||||
# readable online. In fact, in the latest nroff, intermediate footers don't
|
||||
# seem to be generated any more.
|
||||
|
||||
$blankcount = 0;
|
||||
$lastwascut = 0;
|
||||
$firstheader = 1;
|
||||
|
||||
# Input on STDIN; output to STDOUT.
|
||||
|
||||
while (<STDIN>)
|
||||
{
|
||||
s/\x1b\[\d+m//g; # Remove screen controls "ESC [ number m"
|
||||
s/.\x8//g; # Remove "char, backspace"
|
||||
|
||||
# Handle header lines. Retain only the first one we encounter, but remove
|
||||
# the blank line that follows. Any others (e.g. at end of document) and the
|
||||
# following blank line are dropped.
|
||||
|
||||
if (/^PCRE(\w*)\(([13])\)\s+PCRE\1\(\2\)$/)
|
||||
{
|
||||
if ($firstheader)
|
||||
{
|
||||
$firstheader = 0;
|
||||
print;
|
||||
$lastprinted = $_;
|
||||
$lastwascut = 0;
|
||||
}
|
||||
$_=<STDIN>; # Remove a blank that follows
|
||||
next;
|
||||
}
|
||||
|
||||
# Count runs of empty lines
|
||||
|
||||
if (/^\s*$/)
|
||||
{
|
||||
$blankcount++;
|
||||
$lastwascut = 0;
|
||||
next;
|
||||
}
|
||||
|
||||
# If a chunk of lines has been cut out (page footer) and the next line
|
||||
# has a different indentation, put back one blank line.
|
||||
|
||||
if ($lastwascut && $blankcount < 1 && defined($lastprinted))
|
||||
{
|
||||
($a) = $lastprinted =~ /^(\s*)/;
|
||||
($b) = $_ =~ /^(\s*)/;
|
||||
$blankcount++ if ($a ne $b);
|
||||
}
|
||||
|
||||
# We get here only when we have a non-blank line in hand. If it was preceded
|
||||
# by 3 or more blank lines, read the next 3 lines and see if they are blank.
|
||||
# If so, remove all 7 lines, and remember that we have just done a cut.
|
||||
|
||||
if ($blankcount >= 3)
|
||||
{
|
||||
for ($i = 0; $i < 3; $i++)
|
||||
{
|
||||
$next[$i] = <STDIN>;
|
||||
$next[$i] = "" if !defined $next[$i];
|
||||
$next[$i] =~ s/\x1b\[\d+m//g; # Remove screen controls "ESC [ number m"
|
||||
$next[$i] =~ s/.\x8//g; # Remove "char, backspace"
|
||||
}
|
||||
|
||||
# Cut out chunks of the form <3 blanks><non-blank><3 blanks>
|
||||
|
||||
if ($next[0] =~ /^\s*$/ &&
|
||||
$next[1] =~ /^\s*$/ &&
|
||||
$next[2] =~ /^\s*$/)
|
||||
{
|
||||
$blankcount -= 3;
|
||||
$lastwascut = 1;
|
||||
}
|
||||
|
||||
# Otherwise output the saved blanks, the current, and the next three
|
||||
# lines. Remember the last printed line.
|
||||
|
||||
else
|
||||
{
|
||||
for ($i = 0; $i < $blankcount; $i++) { print "\n"; }
|
||||
print;
|
||||
for ($i = 0; $i < 3; $i++)
|
||||
{
|
||||
$next[$i] =~ s/.\x8//g;
|
||||
print $next[$i];
|
||||
$lastprinted = $_;
|
||||
}
|
||||
$lastwascut = 0;
|
||||
$blankcount = 0;
|
||||
}
|
||||
}
|
||||
|
||||
# This non-blank line is not preceded by 3 or more blank lines. Output
|
||||
# any blanks there are, and the line. Remember it. Force two blank lines
|
||||
# before headings.
|
||||
|
||||
else
|
||||
{
|
||||
$blankcount = 2 if /^\S/ && !/^Last updated/ && !/^Copyright/ &&
|
||||
defined($lastprinted);
|
||||
for ($i = 0; $i < $blankcount; $i++) { print "\n"; }
|
||||
print;
|
||||
$lastprinted = $_;
|
||||
$lastwascut = 0;
|
||||
$blankcount = 0;
|
||||
}
|
||||
}
|
||||
|
||||
# End
|
35
Detrail
Executable file
35
Detrail
Executable file
@ -0,0 +1,35 @@
|
||||
#!/usr/bin/perl
|
||||
|
||||
# This is a script for removing trailing whitespace from lines in files that
|
||||
# are listed on the command line.
|
||||
|
||||
# This subroutine does the work for one file.
|
||||
|
||||
sub detrail {
|
||||
my($file) = $_[0];
|
||||
my($changed) = 0;
|
||||
open(IN, "$file") || die "Can't open $file for input";
|
||||
@lines = <IN>;
|
||||
close(IN);
|
||||
foreach (@lines)
|
||||
{
|
||||
if (/\s+\n$/)
|
||||
{
|
||||
s/\s+\n$/\n/;
|
||||
$changed = 1;
|
||||
}
|
||||
}
|
||||
if ($changed)
|
||||
{
|
||||
open(OUT, ">$file") || die "Can't open $file for output";
|
||||
print OUT @lines;
|
||||
close(OUT);
|
||||
}
|
||||
}
|
||||
|
||||
# This is the main program
|
||||
|
||||
$, = ""; # Output field separator
|
||||
for ($i = 0; $i < @ARGV; $i++) { &detrail($ARGV[$i]); }
|
||||
|
||||
# End
|
528
HACKING
Normal file
528
HACKING
Normal file
@ -0,0 +1,528 @@
|
||||
Technical Notes about PCRE
|
||||
--------------------------
|
||||
|
||||
These are very rough technical notes that record potentially useful information
|
||||
about PCRE internals. For information about testing PCRE, see the pcretest
|
||||
documentation and the comment at the head of the RunTest file.
|
||||
|
||||
|
||||
Historical note 1
|
||||
-----------------
|
||||
|
||||
Many years ago I implemented some regular expression functions to an algorithm
|
||||
suggested by Martin Richards. These were not Unix-like in form, and were quite
|
||||
restricted in what they could do by comparison with Perl. The interesting part
|
||||
about the algorithm was that the amount of space required to hold the compiled
|
||||
form of an expression was known in advance. The code to apply an expression did
|
||||
not operate by backtracking, as the original Henry Spencer code and current
|
||||
Perl code does, but instead checked all possibilities simultaneously by keeping
|
||||
a list of current states and checking all of them as it advanced through the
|
||||
subject string. In the terminology of Jeffrey Friedl's book, it was a "DFA
|
||||
algorithm", though it was not a traditional Finite State Machine (FSM). When
|
||||
the pattern was all used up, all remaining states were possible matches, and
|
||||
the one matching the longest subset of the subject string was chosen. This did
|
||||
not necessarily maximize the individual wild portions of the pattern, as is
|
||||
expected in Unix and Perl-style regular expressions.
|
||||
|
||||
|
||||
Historical note 2
|
||||
-----------------
|
||||
|
||||
By contrast, the code originally written by Henry Spencer (which was
|
||||
subsequently heavily modified for Perl) compiles the expression twice: once in
|
||||
a dummy mode in order to find out how much store will be needed, and then for
|
||||
real. (The Perl version probably doesn't do this any more; I'm talking about
|
||||
the original library.) The execution function operates by backtracking and
|
||||
maximizing (or, optionally, minimizing in Perl) the amount of the subject that
|
||||
matches individual wild portions of the pattern. This is an "NFA algorithm" in
|
||||
Friedl's terminology.
|
||||
|
||||
|
||||
OK, here's the real stuff
|
||||
-------------------------
|
||||
|
||||
For the set of functions that form the "basic" PCRE library (which are
|
||||
unrelated to those mentioned above), I tried at first to invent an algorithm
|
||||
that used an amount of store bounded by a multiple of the number of characters
|
||||
in the pattern, to save on compiling time. However, because of the greater
|
||||
complexity in Perl regular expressions, I couldn't do this. In any case, a
|
||||
first pass through the pattern is helpful for other reasons.
|
||||
|
||||
|
||||
Support for 16-bit and 32-bit data strings
|
||||
-------------------------------------------
|
||||
|
||||
From release 8.30, PCRE supports 16-bit as well as 8-bit data strings; and from
|
||||
release 8.32, PCRE supports 32-bit data strings. The library can be compiled
|
||||
in any combination of 8-bit, 16-bit or 32-bit modes, creating up to three
|
||||
different libraries. In the description that follows, the word "short" is used
|
||||
for a 16-bit data quantity, and the word "unit" is used for a quantity that is
|
||||
a byte in 8-bit mode, a short in 16-bit mode and a 32-bit word in 32-bit mode.
|
||||
However, so as not to over-complicate the text, the names of PCRE functions are
|
||||
given in 8-bit form only.
|
||||
|
||||
|
||||
Computing the memory requirement: how it was
|
||||
--------------------------------------------
|
||||
|
||||
Up to and including release 6.7, PCRE worked by running a very degenerate first
|
||||
pass to calculate a maximum store size, and then a second pass to do the real
|
||||
compile - which might use a bit less than the predicted amount of memory. The
|
||||
idea was that this would turn out faster than the Henry Spencer code because
|
||||
the first pass is degenerate and the second pass can just store stuff straight
|
||||
into the vector, which it knows is big enough.
|
||||
|
||||
|
||||
Computing the memory requirement: how it is
|
||||
-------------------------------------------
|
||||
|
||||
By the time I was working on a potential 6.8 release, the degenerate first pass
|
||||
had become very complicated and hard to maintain. Indeed one of the early
|
||||
things I did for 6.8 was to fix Yet Another Bug in the memory computation. Then
|
||||
I had a flash of inspiration as to how I could run the real compile function in
|
||||
a "fake" mode that enables it to compute how much memory it would need, while
|
||||
actually only ever using a few hundred bytes of working memory, and without too
|
||||
many tests of the mode that might slow it down. So I refactored the compiling
|
||||
functions to work this way. This got rid of about 600 lines of source. It
|
||||
should make future maintenance and development easier. As this was such a major
|
||||
change, I never released 6.8, instead upping the number to 7.0 (other quite
|
||||
major changes were also present in the 7.0 release).
|
||||
|
||||
A side effect of this work was that the previous limit of 200 on the nesting
|
||||
depth of parentheses was removed. However, there is a downside: pcre_compile()
|
||||
runs more slowly than before (30% or more, depending on the pattern) because it
|
||||
is doing a full analysis of the pattern. My hope was that this would not be a
|
||||
big issue, and in the event, nobody has commented on it.
|
||||
|
||||
At release 8.34, a limit on the nesting depth of parentheses was re-introduced
|
||||
(default 250, settable at build time) so as to put a limit on the amount of
|
||||
system stack used by pcre_compile(). This is a safety feature for environments
|
||||
with small stacks where the patterns are provided by users.
|
||||
|
||||
|
||||
Traditional matching function
|
||||
-----------------------------
|
||||
|
||||
The "traditional", and original, matching function is called pcre_exec(), and
|
||||
it implements an NFA algorithm, similar to the original Henry Spencer algorithm
|
||||
and the way that Perl works. This is not surprising, since it is intended to be
|
||||
as compatible with Perl as possible. This is the function most users of PCRE
|
||||
will use most of the time. From release 8.20, if PCRE is compiled with
|
||||
just-in-time (JIT) support, and studying a compiled pattern with JIT is
|
||||
successful, the JIT code is run instead of the normal pcre_exec() code, but the
|
||||
result is the same.
|
||||
|
||||
|
||||
Supplementary matching function
|
||||
-------------------------------
|
||||
|
||||
From PCRE 6.0, there is also a supplementary matching function called
|
||||
pcre_dfa_exec(). This implements a DFA matching algorithm that searches
|
||||
simultaneously for all possible matches that start at one point in the subject
|
||||
string. (Going back to my roots: see Historical Note 1 above.) This function
|
||||
intreprets the same compiled pattern data as pcre_exec(); however, not all the
|
||||
facilities are available, and those that are do not always work in quite the
|
||||
same way. See the user documentation for details.
|
||||
|
||||
The algorithm that is used for pcre_dfa_exec() is not a traditional FSM,
|
||||
because it may have a number of states active at one time. More work would be
|
||||
needed at compile time to produce a traditional FSM where only one state is
|
||||
ever active at once. I believe some other regex matchers work this way. JIT
|
||||
support is not available for this kind of matching.
|
||||
|
||||
|
||||
Changeable options
|
||||
------------------
|
||||
|
||||
The /i, /m, or /s options (PCRE_CASELESS, PCRE_MULTILINE, PCRE_DOTALL, and some
|
||||
others) may change in the middle of patterns. From PCRE 8.13, their processing
|
||||
is handled entirely at compile time by generating different opcodes for the
|
||||
different settings. The runtime functions do not need to keep track of an
|
||||
options state any more.
|
||||
|
||||
|
||||
Format of compiled patterns
|
||||
---------------------------
|
||||
|
||||
The compiled form of a pattern is a vector of unsigned units (bytes in 8-bit
|
||||
mode, shorts in 16-bit mode, 32-bit words in 32-bit mode), containing items of
|
||||
variable length. The first unit in an item contains an opcode, and the length
|
||||
of the item is either implicit in the opcode or contained in the data that
|
||||
follows it.
|
||||
|
||||
In many cases listed below, LINK_SIZE data values are specified for offsets
|
||||
within the compiled pattern. LINK_SIZE always specifies a number of bytes. The
|
||||
default value for LINK_SIZE is 2, but PCRE can be compiled to use 3-byte or
|
||||
4-byte values for these offsets, although this impairs the performance. (3-byte
|
||||
LINK_SIZE values are available only in 8-bit mode.) Specifing a LINK_SIZE
|
||||
larger than 2 is necessary only when patterns whose compiled length is greater
|
||||
than 64K are going to be processed. In this description, we assume the "normal"
|
||||
compilation options. Data values that are counts (e.g. quantifiers) are two
|
||||
bytes long in 8-bit mode (most significant byte first), or one unit in 16-bit
|
||||
and 32-bit modes.
|
||||
|
||||
|
||||
Opcodes with no following data
|
||||
------------------------------
|
||||
|
||||
These items are all just one unit long
|
||||
|
||||
OP_END end of pattern
|
||||
OP_ANY match any one character other than newline
|
||||
OP_ALLANY match any one character, including newline
|
||||
OP_ANYBYTE match any single unit, even in UTF-8/16 mode
|
||||
OP_SOD match start of data: \A
|
||||
OP_SOM, start of match (subject + offset): \G
|
||||
OP_SET_SOM, set start of match (\K)
|
||||
OP_CIRC ^ (start of data)
|
||||
OP_CIRCM ^ multiline mode (start of data or after newline)
|
||||
OP_NOT_WORD_BOUNDARY \W
|
||||
OP_WORD_BOUNDARY \w
|
||||
OP_NOT_DIGIT \D
|
||||
OP_DIGIT \d
|
||||
OP_NOT_HSPACE \H
|
||||
OP_HSPACE \h
|
||||
OP_NOT_WHITESPACE \S
|
||||
OP_WHITESPACE \s
|
||||
OP_NOT_VSPACE \V
|
||||
OP_VSPACE \v
|
||||
OP_NOT_WORDCHAR \W
|
||||
OP_WORDCHAR \w
|
||||
OP_EODN match end of data or newline at end: \Z
|
||||
OP_EOD match end of data: \z
|
||||
OP_DOLL $ (end of data, or before final newline)
|
||||
OP_DOLLM $ multiline mode (end of data or before newline)
|
||||
OP_EXTUNI match an extended Unicode grapheme cluster
|
||||
OP_ANYNL match any Unicode newline sequence
|
||||
|
||||
OP_ASSERT_ACCEPT )
|
||||
OP_ACCEPT ) These are Perl 5.10's "backtracking control
|
||||
OP_COMMIT ) verbs". If OP_ACCEPT is inside capturing
|
||||
OP_FAIL ) parentheses, it may be preceded by one or more
|
||||
OP_PRUNE ) OP_CLOSE, each followed by a count that
|
||||
OP_SKIP ) indicates which parentheses must be closed.
|
||||
OP_THEN )
|
||||
|
||||
OP_ASSERT_ACCEPT is used when (*ACCEPT) is encountered within an assertion.
|
||||
This ends the assertion, not the entire pattern match.
|
||||
|
||||
|
||||
Backtracking control verbs with optional data
|
||||
---------------------------------------------
|
||||
|
||||
(*THEN) without an argument generates the opcode OP_THEN and no following data.
|
||||
OP_MARK is followed by the mark name, preceded by a one-unit length, and
|
||||
followed by a binary zero. For (*PRUNE), (*SKIP), and (*THEN) with arguments,
|
||||
the opcodes OP_PRUNE_ARG, OP_SKIP_ARG, and OP_THEN_ARG are used, with the name
|
||||
following in the same format as OP_MARK.
|
||||
|
||||
|
||||
Matching literal characters
|
||||
---------------------------
|
||||
|
||||
The OP_CHAR opcode is followed by a single character that is to be matched
|
||||
casefully. For caseless matching, OP_CHARI is used. In UTF-8 or UTF-16 modes,
|
||||
the character may be more than one unit long. In UTF-32 mode, characters
|
||||
are always exactly one unit long.
|
||||
|
||||
If there is only one character in a character class, OP_CHAR or OP_CHARI is
|
||||
used for a positive class, and OP_NOT or OP_NOTI for a negative one (that is,
|
||||
for something like [^a]).
|
||||
|
||||
|
||||
Repeating single characters
|
||||
---------------------------
|
||||
|
||||
The common repeats (*, +, ?), when applied to a single character, use the
|
||||
following opcodes, which come in caseful and caseless versions:
|
||||
|
||||
Caseful Caseless
|
||||
OP_STAR OP_STARI
|
||||
OP_MINSTAR OP_MINSTARI
|
||||
OP_POSSTAR OP_POSSTARI
|
||||
OP_PLUS OP_PLUSI
|
||||
OP_MINPLUS OP_MINPLUSI
|
||||
OP_POSPLUS OP_POSPLUSI
|
||||
OP_QUERY OP_QUERYI
|
||||
OP_MINQUERY OP_MINQUERYI
|
||||
OP_POSQUERY OP_POSQUERYI
|
||||
|
||||
Each opcode is followed by the character that is to be repeated. In ASCII mode,
|
||||
these are two-unit items; in UTF-8 or UTF-16 modes, the length is variable; in
|
||||
UTF-32 mode these are one-unit items. Those with "MIN" in their names are the
|
||||
minimizing versions. Those with "POS" in their names are possessive versions.
|
||||
Other repeats make use of these opcodes:
|
||||
|
||||
Caseful Caseless
|
||||
OP_UPTO OP_UPTOI
|
||||
OP_MINUPTO OP_MINUPTOI
|
||||
OP_POSUPTO OP_POSUPTOI
|
||||
OP_EXACT OP_EXACTI
|
||||
|
||||
Each of these is followed by a count and then the repeated character. OP_UPTO
|
||||
matches from 0 to the given number. A repeat with a non-zero minimum and a
|
||||
fixed maximum is coded as an OP_EXACT followed by an OP_UPTO (or OP_MINUPTO or
|
||||
OPT_POSUPTO).
|
||||
|
||||
Another set of matching repeating opcodes (called OP_NOTSTAR, OP_NOTSTARI,
|
||||
etc.) are used for repeated, negated, single-character classes such as [^a]*.
|
||||
The normal single-character opcodes (OP_STAR, etc.) are used for repeated
|
||||
positive single-character classes.
|
||||
|
||||
|
||||
Repeating character types
|
||||
-------------------------
|
||||
|
||||
Repeats of things like \d are done exactly as for single characters, except
|
||||
that instead of a character, the opcode for the type is stored in the data
|
||||
unit. The opcodes are:
|
||||
|
||||
OP_TYPESTAR
|
||||
OP_TYPEMINSTAR
|
||||
OP_TYPEPOSSTAR
|
||||
OP_TYPEPLUS
|
||||
OP_TYPEMINPLUS
|
||||
OP_TYPEPOSPLUS
|
||||
OP_TYPEQUERY
|
||||
OP_TYPEMINQUERY
|
||||
OP_TYPEPOSQUERY
|
||||
OP_TYPEUPTO
|
||||
OP_TYPEMINUPTO
|
||||
OP_TYPEPOSUPTO
|
||||
OP_TYPEEXACT
|
||||
|
||||
|
||||
Match by Unicode property
|
||||
-------------------------
|
||||
|
||||
OP_PROP and OP_NOTPROP are used for positive and negative matches of a
|
||||
character by testing its Unicode property (the \p and \P escape sequences).
|
||||
Each is followed by two units that encode the desired property as a type and a
|
||||
value. The types are a set of #defines of the form PT_xxx, and the values are
|
||||
enumerations of the form ucp_xx, defined in the ucp.h source file. The value is
|
||||
relevant only for PT_GC (General Category), PT_PC (Particular Category), and
|
||||
PT_SC (Script).
|
||||
|
||||
Repeats of these items use the OP_TYPESTAR etc. set of opcodes, followed by
|
||||
three units: OP_PROP or OP_NOTPROP, and then the desired property type and
|
||||
value.
|
||||
|
||||
|
||||
Character classes
|
||||
-----------------
|
||||
|
||||
If there is only one character in a class, OP_CHAR or OP_CHARI is used for a
|
||||
positive class, and OP_NOT or OP_NOTI for a negative one (that is, for
|
||||
something like [^a]).
|
||||
|
||||
A set of repeating opcodes (called OP_NOTSTAR etc.) are used for repeated,
|
||||
negated, single-character classes. The normal single-character opcodes
|
||||
(OP_STAR, etc.) are used for repeated positive single-character classes.
|
||||
|
||||
When there is more than one character in a class, and all the code points are
|
||||
less than 256, OP_CLASS is used for a positive class, and OP_NCLASS for a
|
||||
negative one. In either case, the opcode is followed by a 32-byte (16-short,
|
||||
8-word) bit map containing a 1 bit for every character that is acceptable. The
|
||||
bits are counted from the least significant end of each unit. In caseless mode,
|
||||
bits for both cases are set.
|
||||
|
||||
The reason for having both OP_CLASS and OP_NCLASS is so that, in UTF-8/16/32
|
||||
mode, subject characters with values greater than 255 can be handled correctly.
|
||||
For OP_CLASS they do not match, whereas for OP_NCLASS they do.
|
||||
|
||||
For classes containing characters with values greater than 255 or that contain
|
||||
\p or \P, OP_XCLASS is used. It optionally uses a bit map if any code points
|
||||
are less than 256, followed by a list of pairs (for a range) and single
|
||||
characters. In caseless mode, both cases are explicitly listed.
|
||||
|
||||
OP_XCLASS is followed by a unit containing flag bits: XCL_NOT indicates that
|
||||
this is a negative class, and XCL_MAP indicates that a bit map is present.
|
||||
There follows the bit map, if XCL_MAP is set, and then a sequence of items
|
||||
coded as follows:
|
||||
|
||||
XCL_END marks the end of the list
|
||||
XCL_SINGLE one character follows
|
||||
XCL_RANGE two characters follow
|
||||
XCL_PROP a Unicode property (type, value) follows
|
||||
XCL_NOTPROP a Unicode property (type, value) follows
|
||||
|
||||
If a range starts with a code point less than 256 and ends with one greater
|
||||
than 256, an XCL_RANGE item is used, without setting any bits in the bit map.
|
||||
This means that if no other items in the class set bits in the map, a map is
|
||||
not needed.
|
||||
|
||||
|
||||
Back references
|
||||
---------------
|
||||
|
||||
OP_REF (caseful) or OP_REFI (caseless) is followed by a count containing the
|
||||
reference number if the reference is to a unique capturing group (either by
|
||||
number or by name). When named groups are used, there may be more than one
|
||||
group with the same name. In this case, a reference by name generates OP_DNREF
|
||||
or OP_DNREFI. These are followed by two counts: the index (not the byte offset)
|
||||
in the group name table of the first entry for the requred name, followed by
|
||||
the number of groups with the same name.
|
||||
|
||||
|
||||
Repeating character classes and back references
|
||||
-----------------------------------------------
|
||||
|
||||
Single-character classes are handled specially (see above). This section
|
||||
applies to other classes and also to back references. In both cases, the repeat
|
||||
information follows the base item. The matching code looks at the following
|
||||
opcode to see if it is one of
|
||||
|
||||
OP_CRSTAR
|
||||
OP_CRMINSTAR
|
||||
OP_CRPOSSTAR
|
||||
OP_CRPLUS
|
||||
OP_CRMINPLUS
|
||||
OP_CRPOSPLUS
|
||||
OP_CRQUERY
|
||||
OP_CRMINQUERY
|
||||
OP_CRPOSQUERY
|
||||
OP_CRRANGE
|
||||
OP_CRMINRANGE
|
||||
OP_CRPOSRANGE
|
||||
|
||||
All but the last three are single-unit items, with no data. The others are
|
||||
followed by the minimum and maximum repeat counts.
|
||||
|
||||
|
||||
Brackets and alternation
|
||||
------------------------
|
||||
|
||||
A pair of non-capturing round brackets is wrapped round each expression at
|
||||
compile time, so alternation always happens in the context of brackets.
|
||||
|
||||
[Note for North Americans: "bracket" to some English speakers, including
|
||||
myself, can be round, square, curly, or pointy. Hence this usage rather than
|
||||
"parentheses".]
|
||||
|
||||
Non-capturing brackets use the opcode OP_BRA. Originally PCRE was limited to 99
|
||||
capturing brackets and it used a different opcode for each one. From release
|
||||
3.5, the limit was removed by putting the bracket number into the data for
|
||||
higher-numbered brackets. From release 7.0 all capturing brackets are handled
|
||||
this way, using the single opcode OP_CBRA.
|
||||
|
||||
A bracket opcode is followed by LINK_SIZE bytes which give the offset to the
|
||||
next alternative OP_ALT or, if there aren't any branches, to the matching
|
||||
OP_KET opcode. Each OP_ALT is followed by LINK_SIZE bytes giving the offset to
|
||||
the next one, or to the OP_KET opcode. For capturing brackets, the bracket
|
||||
number is a count that immediately follows the offset.
|
||||
|
||||
OP_KET is used for subpatterns that do not repeat indefinitely, and OP_KETRMIN
|
||||
and OP_KETRMAX are used for indefinite repetitions, minimally or maximally
|
||||
respectively (see below for possessive repetitions). All three are followed by
|
||||
LINK_SIZE bytes giving (as a positive number) the offset back to the matching
|
||||
bracket opcode.
|
||||
|
||||
If a subpattern is quantified such that it is permitted to match zero times, it
|
||||
is preceded by one of OP_BRAZERO, OP_BRAMINZERO, or OP_SKIPZERO. These are
|
||||
single-unit opcodes that tell the matcher that skipping the following
|
||||
subpattern entirely is a valid branch. In the case of the first two, not
|
||||
skipping the pattern is also valid (greedy and non-greedy). The third is used
|
||||
when a pattern has the quantifier {0,0}. It cannot be entirely discarded,
|
||||
because it may be called as a subroutine from elsewhere in the regex.
|
||||
|
||||
A subpattern with an indefinite maximum repetition is replicated in the
|
||||
compiled data its minimum number of times (or once with OP_BRAZERO if the
|
||||
minimum is zero), with the final copy terminating with OP_KETRMIN or OP_KETRMAX
|
||||
as appropriate.
|
||||
|
||||
A subpattern with a bounded maximum repetition is replicated in a nested
|
||||
fashion up to the maximum number of times, with OP_BRAZERO or OP_BRAMINZERO
|
||||
before each replication after the minimum, so that, for example, (abc){2,5} is
|
||||
compiled as (abc)(abc)((abc)((abc)(abc)?)?)?, except that each bracketed group
|
||||
has the same number.
|
||||
|
||||
When a repeated subpattern has an unbounded upper limit, it is checked to see
|
||||
whether it could match an empty string. If this is the case, the opcode in the
|
||||
final replication is changed to OP_SBRA or OP_SCBRA. This tells the matcher
|
||||
that it needs to check for matching an empty string when it hits OP_KETRMIN or
|
||||
OP_KETRMAX, and if so, to break the loop.
|
||||
|
||||
|
||||
Possessive brackets
|
||||
-------------------
|
||||
|
||||
When a repeated group (capturing or non-capturing) is marked as possessive by
|
||||
the "+" notation, e.g. (abc)++, different opcodes are used. Their names all
|
||||
have POS on the end, e.g. OP_BRAPOS instead of OP_BRA and OP_SCPBRPOS instead
|
||||
of OP_SCBRA. The end of such a group is marked by OP_KETRPOS. If the minimum
|
||||
repetition is zero, the group is preceded by OP_BRAPOSZERO.
|
||||
|
||||
|
||||
Once-only (atomic) groups
|
||||
-------------------------
|
||||
|
||||
These are just like other subpatterns, but they start with the opcode
|
||||
OP_ONCE or OP_ONCE_NC. The former is used when there are no capturing brackets
|
||||
within the atomic group; the latter when there are. The distinction is needed
|
||||
for when there is a backtrack to before the group - any captures within the
|
||||
group must be reset, so it is necessary to retain backtracking points inside
|
||||
the group even after it is complete in order to do this. When there are no
|
||||
captures in an atomic group, all the backtracking can be discarded when it is
|
||||
complete. This is more efficient, and also uses less stack.
|
||||
|
||||
The check for matching an empty string in an unbounded repeat is handled
|
||||
entirely at runtime, so there are just these two opcodes for atomic groups.
|
||||
|
||||
|
||||
Assertions
|
||||
----------
|
||||
|
||||
Forward assertions are also just like other subpatterns, but starting with one
|
||||
of the opcodes OP_ASSERT or OP_ASSERT_NOT. Backward assertions use the opcodes
|
||||
OP_ASSERTBACK and OP_ASSERTBACK_NOT, and the first opcode inside the assertion
|
||||
is OP_REVERSE, followed by a count of the number of characters to move back the
|
||||
pointer in the subject string. In ASCII mode, the count is a number of units,
|
||||
but in UTF-8/16 mode each character may occupy more than one unit; in UTF-32
|
||||
mode each character occupies exactly one unit. A separate count is present in
|
||||
each alternative of a lookbehind assertion, allowing them to have different
|
||||
fixed lengths.
|
||||
|
||||
|
||||
Conditional subpatterns
|
||||
-----------------------
|
||||
|
||||
These are like other subpatterns, but they start with the opcode OP_COND, or
|
||||
OP_SCOND for one that might match an empty string in an unbounded repeat. If
|
||||
the condition is a back reference, this is stored at the start of the
|
||||
subpattern using the opcode OP_CREF followed by a count containing the
|
||||
reference number, provided that the reference is to a unique capturing group.
|
||||
If the reference was by name and there is more than one group with that name,
|
||||
OP_DNCREF is used instead. It is followed by two counts: the index in the group
|
||||
names table, and the number of groups with the same name.
|
||||
|
||||
If the condition is "in recursion" (coded as "(?(R)"), or "in recursion of
|
||||
group x" (coded as "(?(Rx)"), the group number is stored at the start of the
|
||||
subpattern using the opcode OP_RREF (with a value of zero for "the whole
|
||||
pattern") or OP_DNRREF (with data as for OP_DNCREF). For a DEFINE condition,
|
||||
just the single unit OP_DEF is used (it has no associated data). Otherwise, a
|
||||
conditional subpattern always starts with one of the assertions.
|
||||
|
||||
|
||||
Recursion
|
||||
---------
|
||||
|
||||
Recursion either matches the current regex, or some subexpression. The opcode
|
||||
OP_RECURSE is followed by aLINK_SIZE value that is the offset to the starting
|
||||
bracket from the start of the whole pattern. From release 6.5, OP_RECURSE is
|
||||
automatically wrapped inside OP_ONCE brackets, because otherwise some patterns
|
||||
broke it. OP_RECURSE is also used for "subroutine" calls, even though they are
|
||||
not strictly a recursion.
|
||||
|
||||
|
||||
Callout
|
||||
-------
|
||||
|
||||
OP_CALLOUT is followed by one unit of data that holds a callout number in the
|
||||
range 0 to 254 for manual callouts, or 255 for an automatic callout. In both
|
||||
cases there follows a count giving the offset in the pattern string to the
|
||||
start of the following item, and another count giving the length of this item.
|
||||
These values make is possible for pcretest to output useful tracing information
|
||||
using automatic callouts.
|
||||
|
||||
Philip Hazel
|
||||
November 2013
|
368
INSTALL
Normal file
368
INSTALL
Normal file
@ -0,0 +1,368 @@
|
||||
Installation Instructions
|
||||
*************************
|
||||
|
||||
Copyright (C) 1994-1996, 1999-2002, 2004-2016 Free Software
|
||||
Foundation, Inc.
|
||||
|
||||
Copying and distribution of this file, with or without modification,
|
||||
are permitted in any medium without royalty provided the copyright
|
||||
notice and this notice are preserved. This file is offered as-is,
|
||||
without warranty of any kind.
|
||||
|
||||
Basic Installation
|
||||
==================
|
||||
|
||||
Briefly, the shell command './configure && make && make install'
|
||||
should configure, build, and install this package. The following
|
||||
more-detailed instructions are generic; see the 'README' file for
|
||||
instructions specific to this package. Some packages provide this
|
||||
'INSTALL' file but do not implement all of the features documented
|
||||
below. The lack of an optional feature in a given package is not
|
||||
necessarily a bug. More recommendations for GNU packages can be found
|
||||
in *note Makefile Conventions: (standards)Makefile Conventions.
|
||||
|
||||
The 'configure' shell script attempts to guess correct values for
|
||||
various system-dependent variables used during compilation. It uses
|
||||
those values to create a 'Makefile' in each directory of the package.
|
||||
It may also create one or more '.h' files containing system-dependent
|
||||
definitions. Finally, it creates a shell script 'config.status' that
|
||||
you can run in the future to recreate the current configuration, and a
|
||||
file 'config.log' containing compiler output (useful mainly for
|
||||
debugging 'configure').
|
||||
|
||||
It can also use an optional file (typically called 'config.cache' and
|
||||
enabled with '--cache-file=config.cache' or simply '-C') that saves the
|
||||
results of its tests to speed up reconfiguring. Caching is disabled by
|
||||
default to prevent problems with accidental use of stale cache files.
|
||||
|
||||
If you need to do unusual things to compile the package, please try
|
||||
to figure out how 'configure' could check whether to do them, and mail
|
||||
diffs or instructions to the address given in the 'README' so they can
|
||||
be considered for the next release. If you are using the cache, and at
|
||||
some point 'config.cache' contains results you don't want to keep, you
|
||||
may remove or edit it.
|
||||
|
||||
The file 'configure.ac' (or 'configure.in') is used to create
|
||||
'configure' by a program called 'autoconf'. You need 'configure.ac' if
|
||||
you want to change it or regenerate 'configure' using a newer version of
|
||||
'autoconf'.
|
||||
|
||||
The simplest way to compile this package is:
|
||||
|
||||
1. 'cd' to the directory containing the package's source code and type
|
||||
'./configure' to configure the package for your system.
|
||||
|
||||
Running 'configure' might take a while. While running, it prints
|
||||
some messages telling which features it is checking for.
|
||||
|
||||
2. Type 'make' to compile the package.
|
||||
|
||||
3. Optionally, type 'make check' to run any self-tests that come with
|
||||
the package, generally using the just-built uninstalled binaries.
|
||||
|
||||
4. Type 'make install' to install the programs and any data files and
|
||||
documentation. When installing into a prefix owned by root, it is
|
||||
recommended that the package be configured and built as a regular
|
||||
user, and only the 'make install' phase executed with root
|
||||
privileges.
|
||||
|
||||
5. Optionally, type 'make installcheck' to repeat any self-tests, but
|
||||
this time using the binaries in their final installed location.
|
||||
This target does not install anything. Running this target as a
|
||||
regular user, particularly if the prior 'make install' required
|
||||
root privileges, verifies that the installation completed
|
||||
correctly.
|
||||
|
||||
6. You can remove the program binaries and object files from the
|
||||
source code directory by typing 'make clean'. To also remove the
|
||||
files that 'configure' created (so you can compile the package for
|
||||
a different kind of computer), type 'make distclean'. There is
|
||||
also a 'make maintainer-clean' target, but that is intended mainly
|
||||
for the package's developers. If you use it, you may have to get
|
||||
all sorts of other programs in order to regenerate files that came
|
||||
with the distribution.
|
||||
|
||||
7. Often, you can also type 'make uninstall' to remove the installed
|
||||
files again. In practice, not all packages have tested that
|
||||
uninstallation works correctly, even though it is required by the
|
||||
GNU Coding Standards.
|
||||
|
||||
8. Some packages, particularly those that use Automake, provide 'make
|
||||
distcheck', which can by used by developers to test that all other
|
||||
targets like 'make install' and 'make uninstall' work correctly.
|
||||
This target is generally not run by end users.
|
||||
|
||||
Compilers and Options
|
||||
=====================
|
||||
|
||||
Some systems require unusual options for compilation or linking that
|
||||
the 'configure' script does not know about. Run './configure --help'
|
||||
for details on some of the pertinent environment variables.
|
||||
|
||||
You can give 'configure' initial values for configuration parameters
|
||||
by setting variables in the command line or in the environment. Here is
|
||||
an example:
|
||||
|
||||
./configure CC=c99 CFLAGS=-g LIBS=-lposix
|
||||
|
||||
*Note Defining Variables::, for more details.
|
||||
|
||||
Compiling For Multiple Architectures
|
||||
====================================
|
||||
|
||||
You can compile the package for more than one kind of computer at the
|
||||
same time, by placing the object files for each architecture in their
|
||||
own directory. To do this, you can use GNU 'make'. 'cd' to the
|
||||
directory where you want the object files and executables to go and run
|
||||
the 'configure' script. 'configure' automatically checks for the source
|
||||
code in the directory that 'configure' is in and in '..'. This is known
|
||||
as a "VPATH" build.
|
||||
|
||||
With a non-GNU 'make', it is safer to compile the package for one
|
||||
architecture at a time in the source code directory. After you have
|
||||
installed the package for one architecture, use 'make distclean' before
|
||||
reconfiguring for another architecture.
|
||||
|
||||
On MacOS X 10.5 and later systems, you can create libraries and
|
||||
executables that work on multiple system types--known as "fat" or
|
||||
"universal" binaries--by specifying multiple '-arch' options to the
|
||||
compiler but only a single '-arch' option to the preprocessor. Like
|
||||
this:
|
||||
|
||||
./configure CC="gcc -arch i386 -arch x86_64 -arch ppc -arch ppc64" \
|
||||
CXX="g++ -arch i386 -arch x86_64 -arch ppc -arch ppc64" \
|
||||
CPP="gcc -E" CXXCPP="g++ -E"
|
||||
|
||||
This is not guaranteed to produce working output in all cases, you
|
||||
may have to build one architecture at a time and combine the results
|
||||
using the 'lipo' tool if you have problems.
|
||||
|
||||
Installation Names
|
||||
==================
|
||||
|
||||
By default, 'make install' installs the package's commands under
|
||||
'/usr/local/bin', include files under '/usr/local/include', etc. You
|
||||
can specify an installation prefix other than '/usr/local' by giving
|
||||
'configure' the option '--prefix=PREFIX', where PREFIX must be an
|
||||
absolute file name.
|
||||
|
||||
You can specify separate installation prefixes for
|
||||
architecture-specific files and architecture-independent files. If you
|
||||
pass the option '--exec-prefix=PREFIX' to 'configure', the package uses
|
||||
PREFIX as the prefix for installing programs and libraries.
|
||||
Documentation and other data files still use the regular prefix.
|
||||
|
||||
In addition, if you use an unusual directory layout you can give
|
||||
options like '--bindir=DIR' to specify different values for particular
|
||||
kinds of files. Run 'configure --help' for a list of the directories
|
||||
you can set and what kinds of files go in them. In general, the default
|
||||
for these options is expressed in terms of '${prefix}', so that
|
||||
specifying just '--prefix' will affect all of the other directory
|
||||
specifications that were not explicitly provided.
|
||||
|
||||
The most portable way to affect installation locations is to pass the
|
||||
correct locations to 'configure'; however, many packages provide one or
|
||||
both of the following shortcuts of passing variable assignments to the
|
||||
'make install' command line to change installation locations without
|
||||
having to reconfigure or recompile.
|
||||
|
||||
The first method involves providing an override variable for each
|
||||
affected directory. For example, 'make install
|
||||
prefix=/alternate/directory' will choose an alternate location for all
|
||||
directory configuration variables that were expressed in terms of
|
||||
'${prefix}'. Any directories that were specified during 'configure',
|
||||
but not in terms of '${prefix}', must each be overridden at install time
|
||||
for the entire installation to be relocated. The approach of makefile
|
||||
variable overrides for each directory variable is required by the GNU
|
||||
Coding Standards, and ideally causes no recompilation. However, some
|
||||
platforms have known limitations with the semantics of shared libraries
|
||||
that end up requiring recompilation when using this method, particularly
|
||||
noticeable in packages that use GNU Libtool.
|
||||
|
||||
The second method involves providing the 'DESTDIR' variable. For
|
||||
example, 'make install DESTDIR=/alternate/directory' will prepend
|
||||
'/alternate/directory' before all installation names. The approach of
|
||||
'DESTDIR' overrides is not required by the GNU Coding Standards, and
|
||||
does not work on platforms that have drive letters. On the other hand,
|
||||
it does better at avoiding recompilation issues, and works well even
|
||||
when some directory options were not specified in terms of '${prefix}'
|
||||
at 'configure' time.
|
||||
|
||||
Optional Features
|
||||
=================
|
||||
|
||||
If the package supports it, you can cause programs to be installed
|
||||
with an extra prefix or suffix on their names by giving 'configure' the
|
||||
option '--program-prefix=PREFIX' or '--program-suffix=SUFFIX'.
|
||||
|
||||
Some packages pay attention to '--enable-FEATURE' options to
|
||||
'configure', where FEATURE indicates an optional part of the package.
|
||||
They may also pay attention to '--with-PACKAGE' options, where PACKAGE
|
||||
is something like 'gnu-as' or 'x' (for the X Window System). The
|
||||
'README' should mention any '--enable-' and '--with-' options that the
|
||||
package recognizes.
|
||||
|
||||
For packages that use the X Window System, 'configure' can usually
|
||||
find the X include and library files automatically, but if it doesn't,
|
||||
you can use the 'configure' options '--x-includes=DIR' and
|
||||
'--x-libraries=DIR' to specify their locations.
|
||||
|
||||
Some packages offer the ability to configure how verbose the
|
||||
execution of 'make' will be. For these packages, running './configure
|
||||
--enable-silent-rules' sets the default to minimal output, which can be
|
||||
overridden with 'make V=1'; while running './configure
|
||||
--disable-silent-rules' sets the default to verbose, which can be
|
||||
overridden with 'make V=0'.
|
||||
|
||||
Particular systems
|
||||
==================
|
||||
|
||||
On HP-UX, the default C compiler is not ANSI C compatible. If GNU CC
|
||||
is not installed, it is recommended to use the following options in
|
||||
order to use an ANSI C compiler:
|
||||
|
||||
./configure CC="cc -Ae -D_XOPEN_SOURCE=500"
|
||||
|
||||
and if that doesn't work, install pre-built binaries of GCC for HP-UX.
|
||||
|
||||
HP-UX 'make' updates targets which have the same time stamps as their
|
||||
prerequisites, which makes it generally unusable when shipped generated
|
||||
files such as 'configure' are involved. Use GNU 'make' instead.
|
||||
|
||||
On OSF/1 a.k.a. Tru64, some versions of the default C compiler cannot
|
||||
parse its '<wchar.h>' header file. The option '-nodtk' can be used as a
|
||||
workaround. If GNU CC is not installed, it is therefore recommended to
|
||||
try
|
||||
|
||||
./configure CC="cc"
|
||||
|
||||
and if that doesn't work, try
|
||||
|
||||
./configure CC="cc -nodtk"
|
||||
|
||||
On Solaris, don't put '/usr/ucb' early in your 'PATH'. This
|
||||
directory contains several dysfunctional programs; working variants of
|
||||
these programs are available in '/usr/bin'. So, if you need '/usr/ucb'
|
||||
in your 'PATH', put it _after_ '/usr/bin'.
|
||||
|
||||
On Haiku, software installed for all users goes in '/boot/common',
|
||||
not '/usr/local'. It is recommended to use the following options:
|
||||
|
||||
./configure --prefix=/boot/common
|
||||
|
||||
Specifying the System Type
|
||||
==========================
|
||||
|
||||
There may be some features 'configure' cannot figure out
|
||||
automatically, but needs to determine by the type of machine the package
|
||||
will run on. Usually, assuming the package is built to be run on the
|
||||
_same_ architectures, 'configure' can figure that out, but if it prints
|
||||
a message saying it cannot guess the machine type, give it the
|
||||
'--build=TYPE' option. TYPE can either be a short name for the system
|
||||
type, such as 'sun4', or a canonical name which has the form:
|
||||
|
||||
CPU-COMPANY-SYSTEM
|
||||
|
||||
where SYSTEM can have one of these forms:
|
||||
|
||||
OS
|
||||
KERNEL-OS
|
||||
|
||||
See the file 'config.sub' for the possible values of each field. If
|
||||
'config.sub' isn't included in this package, then this package doesn't
|
||||
need to know the machine type.
|
||||
|
||||
If you are _building_ compiler tools for cross-compiling, you should
|
||||
use the option '--target=TYPE' to select the type of system they will
|
||||
produce code for.
|
||||
|
||||
If you want to _use_ a cross compiler, that generates code for a
|
||||
platform different from the build platform, you should specify the
|
||||
"host" platform (i.e., that on which the generated programs will
|
||||
eventually be run) with '--host=TYPE'.
|
||||
|
||||
Sharing Defaults
|
||||
================
|
||||
|
||||
If you want to set default values for 'configure' scripts to share,
|
||||
you can create a site shell script called 'config.site' that gives
|
||||
default values for variables like 'CC', 'cache_file', and 'prefix'.
|
||||
'configure' looks for 'PREFIX/share/config.site' if it exists, then
|
||||
'PREFIX/etc/config.site' if it exists. Or, you can set the
|
||||
'CONFIG_SITE' environment variable to the location of the site script.
|
||||
A warning: not all 'configure' scripts look for a site script.
|
||||
|
||||
Defining Variables
|
||||
==================
|
||||
|
||||
Variables not defined in a site shell script can be set in the
|
||||
environment passed to 'configure'. However, some packages may run
|
||||
configure again during the build, and the customized values of these
|
||||
variables may be lost. In order to avoid this problem, you should set
|
||||
them in the 'configure' command line, using 'VAR=value'. For example:
|
||||
|
||||
./configure CC=/usr/local2/bin/gcc
|
||||
|
||||
causes the specified 'gcc' to be used as the C compiler (unless it is
|
||||
overridden in the site shell script).
|
||||
|
||||
Unfortunately, this technique does not work for 'CONFIG_SHELL' due to an
|
||||
Autoconf limitation. Until the limitation is lifted, you can use this
|
||||
workaround:
|
||||
|
||||
CONFIG_SHELL=/bin/bash ./configure CONFIG_SHELL=/bin/bash
|
||||
|
||||
'configure' Invocation
|
||||
======================
|
||||
|
||||
'configure' recognizes the following options to control how it
|
||||
operates.
|
||||
|
||||
'--help'
|
||||
'-h'
|
||||
Print a summary of all of the options to 'configure', and exit.
|
||||
|
||||
'--help=short'
|
||||
'--help=recursive'
|
||||
Print a summary of the options unique to this package's
|
||||
'configure', and exit. The 'short' variant lists options used only
|
||||
in the top level, while the 'recursive' variant lists options also
|
||||
present in any nested packages.
|
||||
|
||||
'--version'
|
||||
'-V'
|
||||
Print the version of Autoconf used to generate the 'configure'
|
||||
script, and exit.
|
||||
|
||||
'--cache-file=FILE'
|
||||
Enable the cache: use and save the results of the tests in FILE,
|
||||
traditionally 'config.cache'. FILE defaults to '/dev/null' to
|
||||
disable caching.
|
||||
|
||||
'--config-cache'
|
||||
'-C'
|
||||
Alias for '--cache-file=config.cache'.
|
||||
|
||||
'--quiet'
|
||||
'--silent'
|
||||
'-q'
|
||||
Do not print messages saying which checks are being made. To
|
||||
suppress all normal output, redirect it to '/dev/null' (any error
|
||||
messages will still be shown).
|
||||
|
||||
'--srcdir=DIR'
|
||||
Look for the package's source code in directory DIR. Usually
|
||||
'configure' can determine that directory automatically.
|
||||
|
||||
'--prefix=DIR'
|
||||
Use DIR as the installation prefix. *note Installation Names:: for
|
||||
more details, including other options available for fine-tuning the
|
||||
installation locations.
|
||||
|
||||
'--no-create'
|
||||
'-n'
|
||||
Run the configure checks, but stop before creating any output
|
||||
files.
|
||||
|
||||
'configure' also accepts some other, not widely useful, options. Run
|
||||
'configure --help' for more details.
|
93
LICENCE
Normal file
93
LICENCE
Normal file
@ -0,0 +1,93 @@
|
||||
PCRE LICENCE
|
||||
------------
|
||||
|
||||
PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Release 8 of PCRE is distributed under the terms of the "BSD" licence, as
|
||||
specified below. The documentation for PCRE, supplied in the "doc"
|
||||
directory, is distributed under the same terms as the software itself. The data
|
||||
in the testdata directory is not copyrighted and is in the public domain.
|
||||
|
||||
The basic library functions are written in C and are freestanding. Also
|
||||
included in the distribution is a set of C++ wrapper functions, and a
|
||||
just-in-time compiler that can be used to optimize pattern matching. These
|
||||
are both optional features that can be omitted when the library is built.
|
||||
|
||||
|
||||
THE BASIC LIBRARY FUNCTIONS
|
||||
---------------------------
|
||||
|
||||
Written by: Philip Hazel
|
||||
Email local part: Philip.Hazel
|
||||
Email domain: gmail.com
|
||||
|
||||
University of Cambridge Computing Service,
|
||||
Cambridge, England.
|
||||
|
||||
Copyright (c) 1997-2021 University of Cambridge
|
||||
All rights reserved.
|
||||
|
||||
|
||||
PCRE JUST-IN-TIME COMPILATION SUPPORT
|
||||
-------------------------------------
|
||||
|
||||
Written by: Zoltan Herczeg
|
||||
Email local part: hzmester
|
||||
Email domain: freemail.hu
|
||||
|
||||
Copyright(c) 2010-2021 Zoltan Herczeg
|
||||
All rights reserved.
|
||||
|
||||
|
||||
STACK-LESS JUST-IN-TIME COMPILER
|
||||
--------------------------------
|
||||
|
||||
Written by: Zoltan Herczeg
|
||||
Email local part: hzmester
|
||||
Email domain: freemail.hu
|
||||
|
||||
Copyright(c) 2009-2021 Zoltan Herczeg
|
||||
All rights reserved.
|
||||
|
||||
|
||||
THE C++ WRAPPER FUNCTIONS
|
||||
-------------------------
|
||||
|
||||
Contributed by: Google Inc.
|
||||
|
||||
Copyright (c) 2007-2012, Google Inc.
|
||||
All rights reserved.
|
||||
|
||||
|
||||
THE "BSD" LICENCE
|
||||
-----------------
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the name of Google
|
||||
Inc. nor the names of their contributors may be used to endorse or
|
||||
promote products derived from this software without specific prior
|
||||
written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
End
|
895
Makefile.am
Normal file
895
Makefile.am
Normal file
@ -0,0 +1,895 @@
|
||||
## Process this file with automake to produce Makefile.in.
|
||||
|
||||
ACLOCAL_AMFLAGS = -I m4
|
||||
|
||||
dist_doc_DATA = \
|
||||
doc/pcre.txt \
|
||||
doc/pcre-config.txt \
|
||||
doc/pcregrep.txt \
|
||||
doc/pcretest.txt \
|
||||
AUTHORS \
|
||||
COPYING \
|
||||
ChangeLog \
|
||||
LICENCE \
|
||||
NEWS \
|
||||
README
|
||||
|
||||
# Note that pcrecpp.html is not in this list; it is listed separately below.
|
||||
|
||||
dist_html_DATA = \
|
||||
doc/html/NON-AUTOTOOLS-BUILD.txt \
|
||||
doc/html/README.txt \
|
||||
doc/html/index.html \
|
||||
doc/html/pcre-config.html \
|
||||
doc/html/pcre.html \
|
||||
doc/html/pcre16.html \
|
||||
doc/html/pcre32.html \
|
||||
doc/html/pcre_assign_jit_stack.html \
|
||||
doc/html/pcre_compile.html \
|
||||
doc/html/pcre_compile2.html \
|
||||
doc/html/pcre_config.html \
|
||||
doc/html/pcre_copy_named_substring.html \
|
||||
doc/html/pcre_copy_substring.html \
|
||||
doc/html/pcre_dfa_exec.html \
|
||||
doc/html/pcre_exec.html \
|
||||
doc/html/pcre_free_study.html \
|
||||
doc/html/pcre_free_substring.html \
|
||||
doc/html/pcre_free_substring_list.html \
|
||||
doc/html/pcre_fullinfo.html \
|
||||
doc/html/pcre_get_named_substring.html \
|
||||
doc/html/pcre_get_stringnumber.html \
|
||||
doc/html/pcre_get_stringtable_entries.html \
|
||||
doc/html/pcre_get_substring.html \
|
||||
doc/html/pcre_get_substring_list.html \
|
||||
doc/html/pcre_jit_exec.html \
|
||||
doc/html/pcre_jit_stack_alloc.html \
|
||||
doc/html/pcre_jit_stack_free.html \
|
||||
doc/html/pcre_maketables.html \
|
||||
doc/html/pcre_pattern_to_host_byte_order.html \
|
||||
doc/html/pcre_refcount.html \
|
||||
doc/html/pcre_study.html \
|
||||
doc/html/pcre_utf16_to_host_byte_order.html \
|
||||
doc/html/pcre_utf32_to_host_byte_order.html \
|
||||
doc/html/pcre_version.html \
|
||||
doc/html/pcreapi.html \
|
||||
doc/html/pcrebuild.html \
|
||||
doc/html/pcrecallout.html \
|
||||
doc/html/pcrecompat.html \
|
||||
doc/html/pcredemo.html \
|
||||
doc/html/pcregrep.html \
|
||||
doc/html/pcrejit.html \
|
||||
doc/html/pcrelimits.html \
|
||||
doc/html/pcrematching.html \
|
||||
doc/html/pcrepartial.html \
|
||||
doc/html/pcrepattern.html \
|
||||
doc/html/pcreperform.html \
|
||||
doc/html/pcreposix.html \
|
||||
doc/html/pcreprecompile.html \
|
||||
doc/html/pcresample.html \
|
||||
doc/html/pcrestack.html \
|
||||
doc/html/pcresyntax.html \
|
||||
doc/html/pcretest.html \
|
||||
doc/html/pcreunicode.html
|
||||
|
||||
pcrecpp_html = doc/html/pcrecpp.html
|
||||
dist_noinst_DATA = $(pcrecpp_html)
|
||||
|
||||
if WITH_PCRE_CPP
|
||||
html_DATA = $(pcrecpp_html)
|
||||
endif
|
||||
|
||||
# The Libtool libraries to install. We'll add to this later.
|
||||
lib_LTLIBRARIES =
|
||||
|
||||
# Unit tests you want to run when people type 'make check'.
|
||||
# TESTS is for binary unit tests, check_SCRIPTS for script-based tests
|
||||
TESTS =
|
||||
check_SCRIPTS =
|
||||
dist_noinst_SCRIPTS =
|
||||
|
||||
# Some of the binaries we make are to be installed, and others are
|
||||
# (non-user-visible) helper programs needed to build libpcre, libpcre16
|
||||
# or libpcre32.
|
||||
bin_PROGRAMS =
|
||||
noinst_PROGRAMS =
|
||||
|
||||
# Additional files to delete on 'make clean' and 'make maintainer-clean'.
|
||||
CLEANFILES =
|
||||
MAINTAINERCLEANFILES =
|
||||
|
||||
# Additional files to bundle with the distribution, over and above what
|
||||
# the Autotools include by default.
|
||||
EXTRA_DIST =
|
||||
|
||||
# These files contain additional m4 macros that are used by autoconf.
|
||||
EXTRA_DIST += \
|
||||
m4/ax_pthread.m4 m4/pcre_visibility.m4
|
||||
|
||||
# These files contain maintenance information
|
||||
EXTRA_DIST += \
|
||||
doc/perltest.txt \
|
||||
NON-UNIX-USE \
|
||||
NON-AUTOTOOLS-BUILD \
|
||||
HACKING
|
||||
|
||||
# These files are used in the preparation of a release
|
||||
EXTRA_DIST += \
|
||||
PrepareRelease \
|
||||
CheckMan \
|
||||
CleanTxt \
|
||||
Detrail \
|
||||
132html \
|
||||
doc/index.html.src
|
||||
|
||||
# These files are to do with building for Virtual Pascal
|
||||
EXTRA_DIST += \
|
||||
makevp.bat \
|
||||
makevp_c.txt \
|
||||
makevp_l.txt \
|
||||
pcregexp.pas
|
||||
|
||||
# These files are usable versions of pcre.h and config.h that are distributed
|
||||
# for the benefit of people who are building PCRE manually, without the
|
||||
# Autotools support.
|
||||
EXTRA_DIST += \
|
||||
pcre.h.generic \
|
||||
config.h.generic
|
||||
|
||||
# The only difference between pcre.h.in and pcre.h is the setting of the PCRE
|
||||
# version number. Therefore, we can create the generic version just by copying.
|
||||
pcre.h.generic: pcre.h.in configure.ac
|
||||
rm -f $@
|
||||
cp -p pcre.h $@
|
||||
|
||||
# It is more complicated for config.h.generic. We need the version that results
|
||||
# from a default configuration so as to get all the default values for PCRE
|
||||
# configuration macros such as MATCH_LIMIT and NEWLINE. We can get this by
|
||||
# doing a configure in a temporary directory. However, some trickery is needed,
|
||||
# because the source directory may already be configured. If you just try
|
||||
# running configure in a new directory, it complains. For this reason, we move
|
||||
# config.status out of the way while doing the default configuration. The
|
||||
# resulting config.h is munged by perl to put #ifdefs round any #defines for
|
||||
# macros with values, and to #undef all boolean macros such as HAVE_xxx and
|
||||
# SUPPORT_xxx. We also get rid of any gcc-specific visibility settings. Make
|
||||
# sure that PCRE_EXP_DEFN is unset (in case it has visibility settings).
|
||||
config.h.generic: configure.ac
|
||||
rm -rf $@ _generic
|
||||
mkdir _generic
|
||||
cs=$(srcdir)/config.status; test ! -f $$cs || mv -f $$cs $$cs.aside
|
||||
cd _generic && $(abs_top_srcdir)/configure || :
|
||||
cs=$(srcdir)/config.status; test ! -f $$cs.aside || mv -f $$cs.aside $$cs
|
||||
test -f _generic/config.h
|
||||
perl -n \
|
||||
-e 'BEGIN{$$blank=0;}' \
|
||||
-e 'if(/PCRE_EXP_DEFN/){print"/* #undef PCRE_EXP_DEFN */\n";$$blank=0;next;}' \
|
||||
-e 'if(/to make a symbol visible/){next;}' \
|
||||
-e 'if(/__attribute__ \(\(visibility/){next;}' \
|
||||
-e 'if(/LT_OBJDIR/){print"/* This is ignored unless you are using libtool. */\n";}' \
|
||||
-e 'if(/^#define\s((?:HAVE|SUPPORT|STDC)_\w+)/){print"/* #undef $$1 */\n";$$blank=0;next;}' \
|
||||
-e 'if(/^#define\s(?!PACKAGE|VERSION)(\w+)/){print"#ifndef $$1\n$$_#endif\n";$$blank=0;next;}' \
|
||||
-e 'if(/^\s*$$/){print unless $$blank; $$blank=1;} else{print;$$blank=0;}' \
|
||||
_generic/config.h >$@
|
||||
rm -rf _generic
|
||||
|
||||
MAINTAINERCLEANFILES += pcre.h.generic config.h.generic
|
||||
|
||||
# These are the header files we'll install. We do not distribute pcre.h because
|
||||
# it is generated from pcre.h.in.
|
||||
nodist_include_HEADERS = \
|
||||
pcre.h
|
||||
include_HEADERS = \
|
||||
pcreposix.h
|
||||
|
||||
# These additional headers will be be installed if C++ support is enabled. We
|
||||
# do not distribute pcrecpparg.h or pcre_stringpiece.h, as these are generated
|
||||
# from corresponding .h.in files (which we do distribute).
|
||||
if WITH_PCRE_CPP
|
||||
nodist_include_HEADERS += \
|
||||
pcrecpparg.h \
|
||||
pcre_stringpiece.h
|
||||
include_HEADERS += \
|
||||
pcrecpp.h \
|
||||
pcre_scanner.h
|
||||
endif # WITH_PCRE_CPP
|
||||
|
||||
bin_SCRIPTS = pcre-config
|
||||
|
||||
## ---------------------------------------------------------------
|
||||
## The dftables program is used to rebuild character tables before compiling
|
||||