| #!/usr/bin/perl -w |
| # |
| # This is JavaScriptCore's variant of the PCRE library. While this library |
| # started out as a copy of PCRE, many of the features of PCRE have been |
| # removed. This library now supports only the regular expression features |
| # required by the JavaScript language specification, and has only the functions |
| # needed by JavaScriptCore and the rest of WebKit. |
| # |
| # Originally written by Philip Hazel |
| # Copyright (c) 1997-2006 University of Cambridge |
| # Copyright (C) 2002, 2004, 2006, 2007, 2008 Apple Inc. All rights reserved. |
| # |
| # ----------------------------------------------------------------------------- |
| # Redistribution and use in source and binary forms, with or without |
| # modification, are permitted provided that the following conditions are met: |
| # |
| # * Redistributions of source code must retain the above copyright notice, |
| # this list of conditions and the following disclaimer. |
| # |
| # * Redistributions in binary form must reproduce the above copyright |
| # notice, this list of conditions and the following disclaimer in the |
| # documentation and/or other materials provided with the distribution. |
| # |
| # * Neither the name of the University of Cambridge nor the names of its |
| # contributors may be used to endorse or promote products derived from |
| # this software without specific prior written permission. |
| # |
| # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
| # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
| # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
| # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
| # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
| # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
| # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
| # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
| # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
| # POSSIBILITY OF SUCH DAMAGE. |
| # ----------------------------------------------------------------------------- |
| |
| # This is a freestanding support program to generate a file containing |
| # character tables. The tables are built according to the default C |
| # locale. |
| |
| use strict; |
| |
| use File::Basename; |
| use File::Spec; |
| use File::Temp; |
| |
| sub readHeaderValues(); |
| |
| my %pcre_internal; |
| |
| if (scalar(@ARGV) != 1) { |
| print STDERR "Usage: ", basename($0), " output-file\n"; |
| exit 1; |
| } |
| |
| my $outputFile = shift @ARGV; |
| |
| readHeaderValues(); |
| |
| open(OUT, ">", $outputFile) or die "$!"; |
| binmode(OUT); |
| |
| printf(OUT |
| "/*************************************************\n" . |
| "* Perl-Compatible Regular Expressions *\n" . |
| "*************************************************/\n\n" . |
| "/* This file is automatically written by the dftables auxiliary \n" . |
| "program. If you edit it by hand, you might like to edit the Makefile to \n" . |
| "prevent its ever being regenerated.\n\n"); |
| printf(OUT |
| "This file contains the default tables for characters with codes less than\n" . |
| "128 (ASCII characters). These tables are used when no external tables are\n" . |
| "passed to PCRE. */\n\n" . |
| "const unsigned char kjs_pcre_default_tables[%d] = {\n\n" . |
| "/* This table is a lower casing table. */\n\n", $pcre_internal{tables_length}); |
| |
| if ($pcre_internal{lcc_offset} != 0) { |
| die "lcc_offset != 0"; |
| } |
| |
| printf(OUT " "); |
| for (my $i = 0; $i < 128; $i++) { |
| if (($i & 7) == 0 && $i != 0) { |
| printf(OUT "\n "); |
| } |
| printf(OUT "0x%02X", ord(lc(chr($i)))); |
| if ($i != 127) { |
| printf(OUT ", "); |
| } |
| } |
| printf(OUT ",\n\n"); |
| |
| printf(OUT "/* This table is a case flipping table. */\n\n"); |
| |
| if ($pcre_internal{fcc_offset} != 128) { |
| die "fcc_offset != 128"; |
| } |
| |
| printf(OUT " "); |
| for (my $i = 0; $i < 128; $i++) { |
| if (($i & 7) == 0 && $i != 0) { |
| printf(OUT "\n "); |
| } |
| my $c = chr($i); |
| printf(OUT "0x%02X", $c =~ /[[:lower:]]/ ? ord(uc($c)) : ord(lc($c))); |
| if ($i != 127) { |
| printf(OUT ", "); |
| } |
| } |
| printf(OUT ",\n\n"); |
| |
| printf(OUT |
| "/* This table contains bit maps for various character classes.\n" . |
| "Each map is 32 bytes long and the bits run from the least\n" . |
| "significant end of each byte. The classes are: space, digit, word. */\n\n"); |
| |
| if ($pcre_internal{cbits_offset} != $pcre_internal{fcc_offset} + 128) { |
| die "cbits_offset != fcc_offset + 128"; |
| } |
| |
| my @cbit_table = (0) x $pcre_internal{cbit_length}; |
| for (my $i = ord('0'); $i <= ord('9'); $i++) { |
| $cbit_table[$pcre_internal{cbit_digit} + $i / 8] |= 1 << ($i & 7); |
| } |
| $cbit_table[$pcre_internal{cbit_word} + ord('_') / 8] |= 1 << (ord('_') & 7); |
| for (my $i = 0; $i < 128; $i++) { |
| my $c = chr($i); |
| if ($c =~ /[[:alnum:]]/) { |
| $cbit_table[$pcre_internal{cbit_word} + $i / 8] |= 1 << ($i & 7); |
| } |
| if ($c =~ /[[:space:]]/) { |
| $cbit_table[$pcre_internal{cbit_space} + $i / 8] |= 1 << ($i & 7); |
| } |
| } |
| |
| printf(OUT " "); |
| for (my $i = 0; $i < $pcre_internal{cbit_length}; $i++) { |
| if (($i & 7) == 0 && $i != 0) { |
| if (($i & 31) == 0) { |
| printf(OUT "\n"); |
| } |
| printf(OUT "\n "); |
| } |
| printf(OUT "0x%02X", $cbit_table[$i]); |
| if ($i != $pcre_internal{cbit_length} - 1) { |
| printf(OUT ", "); |
| } |
| } |
| printf(OUT ",\n\n"); |
| |
| printf(OUT |
| "/* This table identifies various classes of character by individual bits:\n" . |
| " 0x%02x white space character\n" . |
| " 0x%02x hexadecimal digit\n" . |
| " 0x%02x alphanumeric or '_'\n*/\n\n", |
| $pcre_internal{ctype_space}, $pcre_internal{ctype_xdigit}, $pcre_internal{ctype_word}); |
| |
| if ($pcre_internal{ctypes_offset} != $pcre_internal{cbits_offset} + $pcre_internal{cbit_length}) { |
| die "ctypes_offset != cbits_offset + cbit_length"; |
| } |
| |
| printf(OUT " "); |
| for (my $i = 0; $i < 128; $i++) { |
| my $x = 0; |
| my $c = chr($i); |
| if ($c =~ /[[:space:]]/) { |
| $x += $pcre_internal{ctype_space}; |
| } |
| if ($c =~ /[[:xdigit:]]/) { |
| $x += $pcre_internal{ctype_xdigit}; |
| } |
| if ($c =~ /[[:alnum:]_]/) { |
| $x += $pcre_internal{ctype_word}; |
| } |
| printf(OUT "0x%02X", $x); |
| if ($i != 127) { |
| printf(OUT ", "); |
| } else { |
| printf(OUT "};"); |
| } |
| if (($i & 7) == 7) { |
| printf(OUT " /* "); |
| my $d = chr($i - 7); |
| if ($d =~ /[[:print:]]/) { |
| printf(OUT " %c -", $i - 7); |
| } else { |
| printf(OUT "%3d-", $i - 7); |
| } |
| if ($c =~ m/[[:print:]]/) { |
| printf(OUT " %c ", $i); |
| } else { |
| printf(OUT "%3d", $i); |
| } |
| printf(OUT " */\n"); |
| if ($i != 127) { |
| printf(OUT " "); |
| } |
| } |
| } |
| |
| if ($pcre_internal{tables_length} != $pcre_internal{ctypes_offset} + 128) { |
| die "tables_length != ctypes_offset + 128"; |
| } |
| |
| printf(OUT "\n\n/* End of chartables.c */\n"); |
| |
| close(OUT); |
| |
| exit 0; |
| |
| sub readHeaderValues() |
| { |
| my @variables = qw( |
| cbit_digit |
| cbit_length |
| cbit_space |
| cbit_word |
| cbits_offset |
| ctype_space |
| ctype_word |
| ctype_xdigit |
| ctypes_offset |
| fcc_offset |
| lcc_offset |
| tables_length |
| ); |
| |
| local $/ = undef; |
| |
| my $headerPath = File::Spec->catfile(dirname($0), "pcre_internal.h"); |
| |
| my $fh = new File::Temp( |
| DIR => ($ENV{'TMPDIR'} || "/tmp"), |
| SUFFIX => ".in", |
| TEMPLATE => basename($0) . "-XXXXXXXX", |
| UNLINK => 0, |
| ); |
| my $tempFile = $fh->filename(); |
| |
| print $fh "#define DFTABLES\n\n"; |
| |
| open(HEADER, "<", $headerPath) or die "$!"; |
| print $fh <HEADER>; |
| close(HEADER); |
| |
| print $fh "\n\n"; |
| |
| for my $v (@variables) { |
| print $fh "\$pcre_internal{\"$v\"} = $v;\n"; |
| } |
| |
| close($fh); |
| |
| open(CPP, "cpp '$tempFile' |") or die "$!"; |
| my $content = <CPP>; |
| close(CPP); |
| |
| eval $content; |
| die "$@" if $@; |
| } |