#!/usr/bin/perl -w
# Convert Sources.gz files into Sleepycat db files for efficient usage of
# data
#
# Copyright (C) 2006  Jeroen van Wolffelaar <jeroen@wolffelaar.nl>
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.

# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.

use strict;
use lib './lib';

$| = 1;

my $what = $ARGV[0] ? "non-free" : "*";
# max. distinct results for a given package postfix
my $MAX_SOURCE_POSTFIXES = 100;

use DB_File;
use File::Path;
use Packages::Config qw( $TOPDIR $DBDIR @ARCHIVES @SUITES );
&Packages::Config::init( './' );
my %sources_small = ();
my %source_names = ();
my %source_postfixes = ();

$/ = "";

-d $DBDIR || mkpath( $DBDIR );

for my $archive (@ARCHIVES) {
    for my $suite (@SUITES) {

	print "Reading $archive/$suite...\n";
	my %source_names_suite = ();
	my %sources_all_db;
	tie %sources_all_db, "DB_File", "$DBDIR/sources_all_$suite.db.new",
		O_RDWR|O_CREAT, 0666, $DB_BTREE
		or die "Error creating DB: $!";
	if (!-d "$TOPDIR/archive/$archive/$suite/") {
		print "\tseems not to exist, skipping...\n";
		next;
	}
	open PKG, "zcat $TOPDIR/archive/$archive/$suite/$what/source/Sources.gz|";
	while (<PKG>) {
		next if /^\s*$/;
		my $data = "";
		my %data = ();
		chomp;
		s/\n /\377/g;
		while (/^(\S+):\s*(.*)\s*$/mg) {
			my ($key, $value) = ($1, $2);
			$value =~ s/\377/\n /g;
			$key =~ tr [A-Z] [a-z];
			$data{$key} = $value;
		}
		$source_names{$data{'package'}} = 1;
		$source_names_suite{$data{'package'}} = 1;
		delete $data{'binary'};

		$data{files} =~ s/\s*\n\s*/\01/sog;
		$data{files} =~ s/^\01//sg;

		my $section = 'main';
		my $subsection = $data{section} || '-';
		if ($data{section} && ($data{section} =~ m=/=o)) {
		    ($section, $subsection) = split m=/=o, $data{section}, 2;
		}
		$data{'section'} = $section;
		$data{'subsection'} = $subsection;
		$data{'priority'} ||= "-";
		$sources_small{$data{'package'}} .=
			"$archive $suite $section $subsection $data{'priority'} $data{'version'}\000";

		while (my ($key, $value) = each (%data)) {
		    next if $key eq 'package' or $key eq 'archive' or $key eq 'suite';
		    print STDERR "WARN: $key ($suite/$archive/$data{package}/$data{architecture}\n" unless defined $value;
		    $data .= "$key\00$value\00";
		}
		$data =~ s/.$//so;
		$sources_all_db{"$archive $suite $data{'package'}"}
			= $data;
	}
	open NAMES, '>>', "$DBDIR/source_names_$suite.txt.new"
	    or die "Error creating source names list: $!";
	foreach (sort keys %source_names_suite) {
	    print NAMES "$_\n";
	}
	close NAMES;

	untie %sources_all_db;
    }
}

print "Writing databases...\n";
my %sources_small_db;
tie %sources_small_db, "DB_File", "$DBDIR/sources_small.db.new",
	O_RDWR|O_CREAT, 0666, $DB_BTREE
	or die "Error creating DB: $!";
while (my ($k, $v) = each(%sources_small)) {
	$v =~ s/.$//s;
	$sources_small_db{$k} = $v;
}
untie %sources_small_db;

# package names stuff:
for my $pkg (keys %source_names) {
	for (my $i=0;$i<length($pkg)-1;$i++) {
		my $before = substr($pkg, 0, $i);
		my $after = substr($pkg, $i);
		$before = "^" if $before eq ""; # otherwise split doesn't work properly
		$source_postfixes{$after} .= "$before\0";
	}
}
my %source_postfixes_db;
tie %source_postfixes_db, "DB_File", "$DBDIR/source_postfixes.db.new",
	O_RDWR|O_CREAT, 0666, $DB_BTREE
	or die "Error creating DB: $!";
while (my ($k, $v) = each(%source_postfixes)) {
	$v =~ s/.$//s;
	my $nr = ($v =~ tr/\000/\000/) + 1;
	if ($nr > $MAX_SOURCE_POSTFIXES) {
	    $v = ($v =~ /\^/) ? "^\001" . $nr
		: "\001" . $nr;
	}
	$source_postfixes_db{$k} = $v;
}
untie %source_postfixes_db;

for my $suite (@SUITES) {
	rename("$DBDIR/sources_all_$suite.db.new", "$DBDIR/sources_all_$suite.db");
	rename("$DBDIR/source_names_$suite.txt.new", "$DBDIR/source_names_$suite.txt");
}
rename("$DBDIR/sources_small.db.new", "$DBDIR/sources_small.db");
rename("$DBDIR/source_postfixes.db.new", "$DBDIR/source_postfixes.db");
