#!/usr/bin/perl -w use DBI; use Compress::Zlib; use Time::Local; my $dbh = DBI->connect("dbi:mysql:dbname=cytaty","USERNAME","PASSWORD") or die "Can't open database: $@"; $sth = $dbh->prepare("SELECT cur_timestamp,cur_namespace,cur_title,cur_text FROM cur"); $sth->execute or die $@; while (@row = $sth->fetchrow_array) { my ($ts,$ns,$t,$tx) = @row; push @events, [$ts,$ns,$t,$tx]; } $sth = $dbh->prepare("SELECT old_timestamp,old_namespace,old_title,old_text,old_flags FROM old"); $sth->execute or die $@; while (@row = $sth->fetchrow_array) { my ($ts,$ns,$t,$tx,$f) = @row; if ($f =~ /gzip/) { my $ch = inflateInit(-WindowBits => 0 - MAX_WBITS); my ($out,$stat) = $ch->inflate($tx); die "deflate failed $stat" if $stat != Z_OK and $stat != Z_STREAM_END; $tx = $out; } push @events, [$ts,$ns,$t,$tx]; } @events = sort {$a->[0] <=> $b->[0]} @events; my %db; $quote_cnt=0; for(@events) { my ($ts,$ns,$t,$tx) = @$_; next unless $ns == 0; # main namespace only # Convert ts to linear timespace $ts =~ /^(....)(..)(..)(..)(..)(..)$/ or die "Bad timestamp"; $ts = timelocal($6,$5,$4,$3,$2-1,$1); $old = $db{$t}; if(defined $old) { $sum_len -= length($old); unregister_quote($_) for $old =~ m/^\*[^*](.*)/mg; } $sum_len += length($tx); register_quote($_) for $tx =~ m/^\*[^*](.*)/mg; $db{$t} = $tx; print "$ts $quote_cnt\n"; } sub unregister_quote { my $q = shift; $quote_mult{$q}--; $quote_cnt -- unless $quote_mult{$q}; } sub register_quote { my $q = shift; $quote_cnt ++ unless $quote_mult{$q}; $quote_mult{$q}++; }