blob: 565753182879dee9882b9dfae3c4c661dc370ca0 [file] [log] [blame]
#! /usr/bin/perl -w
#
# Static Hashtable Generator
#
# (c) 2000-2002 by Harri Porten <porten@kde.org> and
# David Faure <faure@kde.org>
# Modified (c) 2004 by Nikolas Zimmermann <wildfox@kde.org>
#
# Part of the KJS library.
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#
use strict;
my $file = $ARGV[0];
shift;
my $findSize = 0;
my $includelookup = 0;
# Use -s as second argument to make it try many hash sizes
$findSize = 1 if (defined($ARGV[0]) && $ARGV[0] eq "-s");
# Use -i as second argument to make it include "lookup.h"
$includelookup = 1 if (defined($ARGV[0]) && $ARGV[0] eq "-i");
# Use -n as second argument to make it use the third argument as namespace parameter ie. -n KDOM
my $useNameSpace = $ARGV[1] if (defined($ARGV[0]) && $ARGV[0] eq "-n");
print STDERR "Creating hashtable for $file\n";
open(IN, $file) or die "No such file $file";
my @keys = ();
my @values = ();
my @attrs = ();
my @params = ();
my @hashes = ();
my @table = ();
my @links = ();
my $inside = 0;
my $name;
my $size;
my $hashsize;
my $banner = 0;
sub calcTable();
sub output();
sub hashValue($);
while (<IN>) {
chop;
s/^\s*//g;
if (/^\#|^$/) {
# comment. do nothing
} elsif (/^\@begin/ && !$inside) {
if (/^\@begin\s*([:_\w]+)\s*(\d+)\s*$/) {
$inside = 1;
$name = $1;
$hashsize = $2;
} else {
printf STDERR "WARNING: \@begin without table name and hashsize, skipping $_\n";
}
} elsif (/^\@end\s*$/ && $inside) {
if($findSize) {
my $entriesnum=@keys;
print STDERR "Table: $name $entriesnum entries\n";
for( my $i=3 ; $i<79 ; ++$i) { $hashsize=$i ; calcTable(); }
} else {
calcTable();
}
output();
@keys = ();
@values = ();
@attrs = ();
@params = ();
@table = ();
@links = ();
@hashes = ();
$inside = 0;
} elsif (/^(\S+)\s*(\S+)\s*([\w\|]*)\s*(\w*)\s*$/ && $inside) {
my $key = $1;
my $val = $2;
my $att = $3;
my $param = $4;
push(@keys, $key);
push(@values, $val);
push(@hashes, hashValue($key));
printf STDERR "WARNING: Number of arguments missing for $key/$val\n"
if ( $att =~ m/Function/ && length($param) == 0);
push(@attrs, length($att) > 0 ? $att : "0");
push(@params, length($param) > 0 ? $param : "0");
} elsif ($inside) {
die "invalid data {" . $_ . "}";
}
}
die "missing closing \@end" if ($inside);
sub calcTable() {
$size = $hashsize;
my $collisions = 0;
my $maxdepth = 0;
my $i = 0;
foreach my $key (@keys) {
my $depth = 0;
my $h = hashValue($key) % $hashsize;
while (defined($table[$h])) {
if (defined($links[$h])) {
$h = $links[$h];
$depth++;
} else {
$collisions++;
$links[$h] = $size;
$h = $size;
$size++;
}
}
#print STDERR "table[$h] = $i\n";
$table[$h] = $i;
$i++;
$maxdepth = $depth if ( $depth > $maxdepth);
}
# Ensure table is big enough (in case of undef entries at the end)
if ( $#table+1 < $size ) {
$#table = $size-1;
}
#print STDERR "After loop: size=$size table=".($#table+1)."\n";
if ($findSize) {
my $emptycount = 0;
foreach my $entry (@table) {
$emptycount++ if (!defined($entry));
}
print STDERR "Hashsize: $hashsize Total Size: $size Empty: $emptycount MaxDepth: $maxdepth Collisions: $collisions\n";
}
# my $debugtable = 0;
# foreach my $entry (@table) {
# print STDERR "$debugtable " . (defined $entry ? $entry : '<undefined>');
# print STDERR " -> " . $links[$debugtable] if (defined($links[$debugtable]));
# print STDERR "\n";
# $debugtable++;
# }
}
sub leftShift($$) {
my ($value, $distance) = @_;
return (($value << $distance) & 0xFFFFFFFF);
}
# Paul Hsieh's SuperFastHash
# http://www.azillionmonkeys.com/qed/hash.html
# Ported from UString..
sub hashValue($) {
my @chars = split(/ */, $_[0]);
# This hash is designed to work on 16-bit chunks at a time. But since the normal case
# (above) is to hash UTF-16 characters, we just treat the 8-bit chars as if they
# were 16-bit chunks, which should give matching results
my $EXP2_32 = 4294967296;
my $hash = 0x9e3779b9;
my $l = scalar @chars; #I wish this was in Ruby --- Maks
my $rem = $l & 1;
$l = $l >> 1;
my $s = 0;
# Main loop
for (; $l > 0; $l--) {
$hash += ord($chars[$s]);
my $tmp = leftShift(ord($chars[$s+1]), 11) ^ $hash;
$hash = (leftShift($hash, 16)% $EXP2_32) ^ $tmp;
$s += 2;
$hash += $hash >> 11;
$hash %= $EXP2_32;
}
# Handle end case
if ($rem !=0) {
$hash += ord($chars[$s]);
$hash ^= (leftShift($hash, 11)% $EXP2_32);
$hash += $hash >> 17;
}
# Force "avalanching" of final 127 bits
$hash ^= leftShift($hash, 3);
$hash += ($hash >> 5);
$hash = ($hash% $EXP2_32);
$hash ^= (leftShift($hash, 2)% $EXP2_32);
$hash += ($hash >> 15);
$hash = $hash% $EXP2_32;
$hash ^= (leftShift($hash, 10)% $EXP2_32);
# this avoids ever returning a hash code of 0, since that is used to
# signal "hash not computed yet", using a value that is likely to be
# effectively the same as 0 when the low bits are masked
$hash = 0x80000000 if ($hash == 0);
return $hash;
}
sub output() {
if (!$banner) {
$banner = 1;
print "/* Automatically generated from $file using $0. DO NOT EDIT ! */\n";
}
my $nameEntries = "${name}Entries";
$nameEntries =~ s/:/_/g;
print "\n#include \"lookup.h\"\n" if ($includelookup);
if ($useNameSpace) {
print "\nnamespace ${useNameSpace}\n{\n";
print "\nusing namespace KJS;";
} else {
print "\nnamespace KJS {\n";
}
print "\nstatic const struct HashEntry ".$nameEntries."[] = {\n";
my $i = 0;
#print STDERR "writing out table with ".($#table+1)." entries\n";
if ($hashsize eq 0) {
# To make the hash table lookup code fast, we don't allow tables of size 0.
# That way it can do a modulo by the size without a special case to avoid division by 0.
print " \{ 0, 0, 0, 0, 0 \}\n";
$hashsize = 1;
$size = 1;
} else {
foreach my $entry (@table) {
if (defined($entry)) {
my $key = $keys[$entry];
print " \{ \"" . $key . "\"";
print ", " . $values[$entry];
print ", " . $attrs[$entry];
print ", " . $params[$entry];
print ", ";
if (defined($links[$i])) {
print "&" . $nameEntries . "[" . $links[$i] . "]" . " \}";
} else {
print "0 \}"
}
print "/* " . $hashes[$entry] . " */ ";
} else {
print " \{ 0, 0, 0, 0, 0 \}";
}
print "," unless ($i == $size - 1);
print "\n";
$i++;
}
}
print "};\n\n";
print "const struct HashTable $name = ";
print "\{ 2, $size, ".$nameEntries.", $hashsize \};\n\n";
print "} // namespace\n";
}