#!/usr/bin/perl
use CGI qw(:standard);
use DB_File;
use Compress::Zlib;
$datadir = "data";
$lws = 5;
import_names;
$langProject = $Q::lang . $Q::project;
print header(-charset => 'utf-8');
if(defined $Q::page) {
initDB();
loadRevInfo();
loadHashes();
printAuthorInfo();
printHistInfo();
print "rhic2();\n";
}
sub initDB {
my $page = $Q::page;
$page =~ s/ /_/g;
tie %idx, "DB_File", "$datadir/$langProject.idx", O_RDONLY;
($offset, $len, $cm, $posSeq, $nSeq) = split / /, $idx{$page};
untie %idx;
}
sub loadRevInfo {
my $pid = open HIST, '-|';
if(!$pid) {
open REV, "$datadir/$langProject.rev";
seek REV, $offset, 0;
if($cm==0) {
read REV, $buf, $len;
print $buf;
} elsif($cm==1) {
($refinf, $status) = inflateInit(-WindowBits => 0 - MAX_WBITS);
for(my $bufsize = 16*1024; $len>0; $len -= $bufsize) {
read REV, $buf, $len<$bufsize?$len:$bufsize;
($xml, $status) = $refinf->inflate($buf);
print $xml;
}
}
close REV;
exit;
}
my $userNr = 0;
my $revId = -1;
while(<HIST>) {
$len += do { use bytes; length };
if($inRev) {
if(/^\s*<\/revision/) {
$user =~ s/'/\\'/g;
$user{$revId} = $user;
$userNr{$user} = $userNr++ unless defined $userNr{$user};
$timestamp{$revId} = $timestamp;
$length{$revId} = $length;
# $diffId,$comment,$md5 are not used
$comment = "";
$inRev = 0;
} elsif(/^\s*<timestamp>(.*?)<\/timestamp>/) {
$timestamp = $1;
} elsif(/\s*<id>(\d+)<\/id>/) {
if(!$inContributor) {
($revId, $diffId) = ($1, $revId);
}
} elsif(/^\s*<comment>(.*?)<\/comment>/) {
$comment = $1;
} elsif(/^\s*<contributor>/) {
$inContributor = 1;
} elsif(/^\s*<\/contributor/) {
$inContributor = 0;
} elsif(/^\s*<(username|ip)>(.*?)<\/\1>/) {
$user = $2;
} elsif(/^\s*<text type="sectionlist" length="(\d+)" md5="(.*?)">(.*?)<\/text>/) {
$length = $1;
$md5 = $2;
} elsif(/^\s*<text offset=\"(\d+?)\"(?: lengthGz=\"(\d+)\")? length=\"(\d+)\" md5=\"(.*?)\" \/>/) {
$length = $3;
$md5 = $4;
}
} elsif($inSgr) {
if(/^\s*<\/sectiongroup>/) {
$inSgr = 0;
}
} else {
if(/^\s*<revision/) {
$inRev = 1;
} elsif(/^\s*<sectiongroup offset="(\d+)" length="(\d+)">/) {
$inSgr = 1;
}
}
}
close HIST;
}
sub loadHashes {
my $seqs;
open SEQ, "$datadir/$langProject.seq";
seek SEQ, $posSeq, 0;
read SEQ, $seqs, 12*$nSeq;
close SEQ;
for(my $i=0; $i<$nSeq; $i++) {
my ($hash, $first, $last) = unpack("N3", do { use bytes; substr($seqs, 12*$i, 12) });
push @{$hashesFirst{$first}}, sprintf "%08x", $hash;
push @{$hashesLast{$last}}, sprintf "%08x", $hash;
}
}
sub printAuthorInfo {
print "var histAuInfo = new Array(\n";
foreach $user (sort { $userNr{$a} <=> $userNr{$b} } keys %userNr) {
print ", " if $userNr{$user}>0;
print "'$user'";
}
print ");\n";
}
sub printHistInfo {
my $i=0;
# The array constructor in Firefox accepts at most 2^16 arguments, so
# we must work with nested arrays.
print "var histInfo = new Array(\n";
foreach $revId (sort { $a <=> $b } keys %user) {
if($i%1000==0) {
print ", " if $i>0;
print "new Array(\n";
}
print "," if $i%1000>0;
printf "%d,'%s',%d,'%s'\n", $revId, $timestamp{$revId}, $userNr{$user{$revId}},
join("", @{$hashesFirst{$revId}});
if($i%1000==999) {
print ")\n";
}
$i++;
}
if($i%1000!=0) {
print ")\n";
}
print ");\n";
}