#!/usr/bin/perl

# This script can be used on a mirror (or e.g. coccia.debian.org) to
# produce an overview of the size of the archive. The numbers reflect
# the "raw" size of the archive: the size of packages and source files.
# It does not include the size of files containing meta data, nor of
# various separate directories.

# Copyright (c) 2009 Frans Pop <fjp@debian.org>

use warnings;
use strict;
use Cwd;
use Getopt::Long;
use File::Temp qw/ tempfile /;
use Compress::Zlib;

our $root="/srv/ftp.debian.org/ftp/dists";
our @arches= qw(source all amd64 arm64 armel armhf i386 mips mips64el mipsel ppc64el riscv64 s390x);
our @comps= qw(main contrib non-free non-free-firmware main/debian-installer);
our @suites= qw(oldoldstable oldstable stable testing unstable experimental);
our %dists;
our (@source_files, @package_files);
our (%files, %sizes, %total, %width);

GetOptions ("root=s" => \$root) or die "Error in command line arguments\n";

chdir($root) or die "chdir $root: $!";

my ($tfh, $tfile) = tempfile();
END { unlink $tfile if $tfile }

### Collect the data

foreach my $suite (@suites) {
  next unless -f "$root/$suite/Release";
  if (open my $release, "<", "$root/$suite/Release") {
    while (<$release>) {
      # Describe suites using their Codename, except for experimental; its
      # Codename is technically "rc-buggy", but it's rarely called that in
      # practice and we want something shorter anyway to make the table look
      # nicer.
      if ($suite eq "experimental") {
	$dists{$suite} = "exp";
      } elsif (/^Codename:/) {
	($dists{$suite}) = m/^Codename:\s+(.*)/i;
	last;
      }
    }
    close $release;
  }

  foreach my $comp (@comps) {
    next unless -d "$root/$suite/$comp";
    print(STDERR "Processing $suite $comp\n");
    foreach my $arch (@arches) {
      my $file;
      if ($arch eq "source") {
	$file = "source/Sources.xz";
	next unless -f "$root/$suite/$comp/$file";
	parse_sources($file, $suite, $comp);
      } else {
	$file = "binary-$arch/Packages.xz";
	next unless -f "$root/$suite/$comp/$file";
	parse_packages($file, $suite, $comp, $arch);
      }
    }
  }
}

### Print the tables

foreach my $suite (@suites) {
  next unless exists $dists{$suite};
  $width{$suite} = exists $sizes{"d$suite"} ? 16 : 6;
}

print("Total archive size (binary + source) per component:\n\n");
printf("%10s ", "(in MiB)");
foreach my $suite (@suites) {
  next unless exists $dists{$suite};
  printf("| %$width{$suite}s ", $dists{$suite});
}
printf("| %6s\n", "all");
print_ruler();
foreach my $comp (@comps) {
  next unless exists $sizes{all}{$comp};
  if ($comp eq "main/debian-installer") {
    printf("%-10s", "main/d-i");
  } elsif ($comp eq "non-free-firmware") {
    printf("%-10s", "non-free-f");
  } else {
    printf("%-10s", $comp);
  }
  foreach my $suite (@suites) {
    next unless exists $dists{$suite};
    if (exists $sizes{$suite}{$comp}) {
      $total{$suite} += $sizes{$suite}{$comp};
      printf(" | %6i", int((1 + $sizes{$suite}{$comp}) /1024/1024));
      if (exists $sizes{"d$suite"}{$comp}) {
        $total{"d$suite"} += $sizes{"d$suite"}{$comp};
        printf(" (+%6i)", int((1 + $sizes{"d$suite"}{$comp}) /1024/1024));
      }
    } else {
      print(" | " . (" " x $width{$suite}));
    }
  }
  printf(" | %6i\n", int((1 + $sizes{all}{$comp}) /1024/1024));
  $total{all} += $sizes{all}{$comp};
}
print_ruler();
printf("%-9s ", "total");
foreach my $suite (@suites) {
  next unless exists $dists{$suite};
  printf(" | %6i", int((1 + $total{$suite}) /1024/1024));
  printf(" (+%6i)", int((1 + $total{"d$suite"}) /1024/1024)) if exists $total{"d$suite"};
}
printf(" | %6i", int((1 + $total{all}) /1024/1024)."\n");

print("\n\n");
print("Archive size per architecture (source and arch=all packages are shown separately):\n\n");
printf("%10s ", "(in MiB)");
foreach my $suite (@suites) {
  next unless exists $dists{$suite};
  printf("| %$width{$suite}s ", $dists{$suite});
}
printf("| %6s\n", "all");
print_ruler();
foreach my $arch (@arches) {
  next unless exists $sizes{all}{$arch};
  my $parch = $arch;
  $parch =~ s/kfree/k/;
  printf("%-10s", $parch);
  foreach my $suite (@suites) {
    next unless exists $dists{$suite};
    if (exists $sizes{$suite}{$arch}) {
      printf(" | %6i", int((1 + $sizes{$suite}{$arch}) /1024/1024));
      printf(" (+%6i)", int((1 + $sizes{"d$suite"}{$arch}) /1024/1024)) if exists $sizes{"d$suite"}{$arch};
    } else {
      printf(" | " . (" " x $width{$suite}));
    }
  }
  printf(" | %6i\n", int((1 + $sizes{all}{$arch}) /1024/1024));
}

my @ts=gmtime(time());
printf("\nAll numbers reflect the state of the archive per %i %s %i.\n", $ts[3],
       (qw(Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec))[$ts[4]], $ts[5] + 1900);

### Functions

sub print_ruler {
  print("-" x 11);
  foreach my $suite (@suites) {
    next unless exists $dists{$suite};
    print("|" . "-" x ($width{$suite} + 2));
  }
  print("|" . "-" x 7 . "\n");
  return;
}

sub parse_packages {
  my ($file, $suite, $comp, $arch) = @_;
  my ($line, $res, $size, $filename, $architecture);
  local $/ = "\n\n";
  system_redirect_io("unxz", "$root/$suite/$comp/$file", "$tfile");
  open(my $tfh, "<", $tfile) or die "$tfile: $!";
  for (;;) {
    my $buf;
    unless (defined( $buf = <$tfh> )) {
      last if eof;
      die "$file: $!" if $!;
    }
    $_ = $buf;
    ($filename) = m/^Filename:\s+(.*)/im;
    $filename =~ s:/+:/:; # remove redundant slashes in paths
    ($architecture) = m/^Architecture:\s+(.*)/im;
    ($size) = m/^Size:\s+(\d+)/im;

    if (! exists $files{$filename}{$suite}) {
      $sizes{$suite}{$comp} += $size;
      $sizes{$suite}{$architecture} += $size;
      if (($suite eq "oldstable" && exists $dists{oldoldstable} &&
	   ! exists $files{$filename}{oldoldstable}) ||
	  ($suite eq "stable" && exists $dists{oldstable} &&
	   ! exists $files{$filename}{oldstable}) ||
	  ($suite eq "testing" && exists $dists{stable} &&
	   ! exists $files{$filename}{stable}) ||
	  ($suite eq "unstable" && exists $dists{testing} &&
	   ! exists $files{$filename}{testing})) {
	$sizes{"d$suite"}{$comp} += $size;
	$sizes{"d$suite"}{$architecture} += $size;
      }
    }
    if (! exists $files{$filename}{x}) {
      $sizes{all}{$comp} += $size;
      $sizes{all}{$architecture} += $size;
    }
    $files{$filename}{x} = 1;
    $files{$filename}{$suite} = 1;
  }
  close($tfh);
  return;
}
sub parse_sources {
  my ($file, $suite, $comp) = @_;
  my ($line, $res, $directory, $checksums, $hash, $size, $filename);
  local $/ = "\n\n";
  system_redirect_io("unxz", "$root/$suite/$comp/$file", "$tfile");
  open(my $tfh, "<", $tfile) or die "$tfile: $!";
  for (;;) {
    my $buf;
    unless (defined( $buf = <$tfh> )) {
      last if eof;
      die "$file: $!" if $!;
    }
    $_ = $buf;
    ($directory) = m/^Directory:\s+(.*)/im;
    ($checksums) = m/^Checksums-Sha256:\n((?: .*\n)+)/m;
    foreach my $checksum_line (split /^/m, $checksums) {
      ($hash, $size, $filename)=split(' ', $checksum_line, 3);
      $filename = "$directory/$filename";
      $filename =~ s:/+:/:; # remove redundant slashes in paths

      if (! exists $files{$filename}{$suite}) {
	$sizes{$suite}{$comp} += $size;
	$sizes{$suite}{source} += $size;
	if (($suite eq "oldstable" && exists $dists{oldoldstable} &&
	     ! exists $files{$filename}{oldoldstable}) ||
	    ($suite eq "stable" && exists $dists{oldstable} &&
	     ! exists $files{$filename}{oldstable}) ||
	    ($suite eq "testing" && exists $dists{stable} &&
	     ! exists $files{$filename}{stable}) ||
	    ($suite eq "unstable" && exists $dists{testing} &&
	     ! exists $files{$filename}{testing})) {
	  $sizes{"d$suite"}{$comp} += $size;
	  $sizes{"d$suite"}{source} += $size;
	}
      }
      if (! exists $files{$filename}{x}) {
	$sizes{all}{$comp} += $size;
	$sizes{all}{source} += $size;
      }
      $files{$filename}{x} = 1;
      $files{$filename}{$suite} = 1;
    }
  }
  close($tfh);
  return;
}

# run system() with stdin and stdout redirected to files
# unlinks stdout target file first to break hard links
sub system_redirect_io {
  my ($command, $fromfile, $tofile) = @_;

  if (-f $tofile) {
    unlink($tofile) or die "unlink($tofile) failed: $!";
  }
  system("$command <$fromfile >$tofile");
  return;
}
