ports/infrastructure/lib/DPB/Fetch.pm

322 lines
8 KiB
Perl

# ex:ts=8 sw=4:
# $OpenBSD: Fetch.pm,v 1.95 2023/09/25 17:10:16 espie Exp $
#
# Copyright (c) 2010-2013 Marc Espie <espie@openbsd.org>
#
# Permission to use, copy, modify, and distribute this software for any
# purpose with or without fee is hereby granted, provided that the above
# copyright notice and this permission notice appear in all copies.
#
# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
use v5.36;
use DPB::Clock;
use DPB::Distfile;
use OpenBSD::md5;
use DPB::User;
# handles fetch information, if required
package DPB::Fetch;
our @ISA = (qw(DPB::UserProxy));
sub new($class, $distdir, $logger, $state, $ftp_only)
{
my $o = bless {distdir => $distdir, sha => {}, reverse => {},
logger => $logger,
known_sha => {}, known_files => {},
known_short => {},
user => $state->{fetch_user},
state => $state,
ftp_only => $ftp_only,
cache => {},
build_user => $state->{build_user},
fetch_only => $state->{fetch_only}}, $class;
my $fh = $o->open('<', "$distdir/distinfo");
if (defined $fh) {
print "Reading distinfo...";
while (<$fh>) {
if (m/^SHA256\s*\((.*)\) \= (.*)/) {
next unless -f "$distdir/$1";
$o->{sha}{$1} = OpenBSD::sha->fromstring($2);
$o->{reverse}{$2} = $1;
}
}
close $fh;
}
# bsd.port.mk gave us a FETCH_CMD, remove options we will replace
# to avoid confusing the command line
for my $w (split /\s+/, $state->{fetch_cmd}) {
next if $w eq '-C';
next if $w eq '-v';
next if $w eq '-V';
next if $w eq '-m';
push(@{$o->{fetch_cmd}}, $w);
}
print "zap duplicates...";
# rewrite "more or less" the same info, so we flush duplicates,
# e.g., keep only most recent checksum seen
$o->make_path($distdir);
my $name = "$distdir/distinfo";
$fh = $o->open('>', "$name.new");
if (defined $fh) {
for my $k (sort keys %{$o->{sha}}) {
print $fh "SHA256 ($k) = ", $o->{sha}{$k}->stringize,
"\n" or $state->fatal("Can't write #1: #2",
$name, $!);
}
close ($fh);
}
print "Done\n";
$o->rename("$name.new", $name);
$o->{log} = $o->open(">>", $name);
DPB::Util->make_hot($o->{log});
return $o;
}
sub mark_sha($self, $sha, $file)
{
$self->{known_sha}{$sha}{$file} = 1;
# next cases are only needed to weed out by_cipher of extra links
if ($file =~ m/^.*\/([^\/]+)$/) {
$self->{known_short}{$sha}{$1} = 1;
}
# in particular, double / in $sha will vanish thanks to the fs
my $do = 0;
if ($sha =~ s/\/\//\//g) {
$do++;
}
if ($sha =~ s/^\///) {
$do++;
}
if ($do) {
if ($file =~ m/^.*\/([^\/]+)$/) {
$self->{known_short}{$sha}{$1} = 1;
} else {
$self->{known_short}{$sha}{$file} = 1;
}
}
}
sub known_file($self, $sha, $file)
{
$self->mark_sha($sha->stringize, $file);
$self->{known_file}{$file} = 1;
}
sub run_expire_old($self, $core, $opt_e)
{
$core->unsquiggle;
$core->start_job(DPB::Job::Normal->new(
sub($shell) {
$self->expire_old;
},
sub($core) {
# and we will never need this again
delete $self->{known_file};
delete $self->{known_sha};
delete $self->{known_short};
if (!$opt_e) {
$core->mark_ready;
}
return 0;
},
"UPDATING DISTFILES HISTORY"));
return 1;
}
sub parse_old($self, $fh, $fh2)
{
while (<$fh>) {
if (my ($ts, $file, $sha) =
m/^(\d+(?:\.\d+)?)\s+SHA256\s*\((.*)\) \= (.*\=)$/) {
$file = DPB::Distfile->normalize($file);
if (!$self->{known_sha}{$sha}{$file}) {
$self->mark_sha($sha, $file);
$self->{known_file}{$file} = 1;
print $fh2 "$ts SHA256 ($file) = $sha\n"
or return 0;
}
}
}
return 1;
}
sub expire_old($self)
{
my $ts = CORE::time();
my $distdir = $self->distdir;
chdir($distdir) or die "can't change to distdir: $!";
my $fh2 = $self->open(">", "history.new");
return if !$fh2;
if (my $fh = $self->open('<', "history")) {
$self->parse_old($fh, $fh2) or return;
close $fh;
}
while (my ($sha, $file) = each %{$self->{reverse}}) {
next if $self->{known_sha}{$sha}{$file};
print $fh2 "$ts SHA256 ($file) = $sha\n" or return;
$self->{known_file}{$file} = 1;
}
for my $special (qw(Makefile distinfo history history.new)) {
$self->{known_file}{$special} = 1;
}
my $fatal = 0;
# let's also scan the directory proper
require File::Find;
File::Find::find(
sub() {
if (-d $_ &&
($File::Find::name eq "./by_cipher" ||
$File::Find::name eq "./list" ||
$File::Find::name eq "./build-stats")) {
$File::Find::prune = 1;
return;
}
return unless -f _;
return if $fatal;
return if m/\.part$/;
my $actual = $File::Find::name;
$actual =~ s/^.\///;
return if $self->{known_file}{$actual};
my $sha = OpenBSD::sha->new($_)->stringize;
print $fh2 "$ts SHA256 ($actual) = $sha\n" or $fatal = 1;
$self->mark_sha($sha, $actual);
}, ".");
my $c = "by_cipher/sha256";
if (-d $c && !$fatal) {
# and scan the ciphers as well !
File::Find::find(
sub() {
return unless -f $_;
return if $fatal;
if ($File::Find::dir =~
m/^\.\/by_cipher\/sha256\/..?\/(.*)$/) {
my $sha = $1;
return if $self->{known_sha}{$sha}{$_};
return if $self->{known_short}{$sha}{$_};
print $fh2 "$ts SHA256 ($_) = ", $sha, "\n"
or $fatal = 1;
}
}, $c);
}
return if $fatal;
close $fh2 && $self->rename("history.new", "history");
}
sub forget_cache($self)
{
$self->{cache} = {};
}
sub distdir($self)
{
return $self->{distdir};
}
sub read_checksums($self, $filename)
{
# XXX the fetch user might not have read access there ?
my $fh = $self->{build_user}->open('<', $filename);
if (!defined $fh) {
return { error => $! };
}
my $r = { size => {}, sha => {}};
while (<$fh>) {
if (my ($file, $sz) = m/^SIZE \((.*)\) \= (\d+)$/) {
$r->{size}{DPB::Distfile->normalize($file)} = $sz;
} elsif (my ($file2, $sha) = m/^SHA256 \((.*)\) \= (.*)$/) {
$r->{sha}{DPB::Distfile->normalize($file2)} =
OpenBSD::sha->fromstring($sha);
}
# next!
}
return $r;
}
sub build1info($self, $v, $mirror, $roach)
{
my $info = $v->{info};
return unless defined $info->{distfiles};
my $dir = $info->{DIST_SUBDIR};
my $checksum_file = $info->{CHECKSUM_FILE};
if (!defined $checksum_file) {
$v->break("No checksum file");
return;
}
$checksum_file = $checksum_file->string;
# collapse identical checksum files together
$checksum_file =~ s,/[^/]+/\.\./,/,g;
my $fname = $self->{state}->anchor($checksum_file);
$self->{cache}{$checksum_file} //=
$self->read_checksums($fname);
my $checksums = $self->{cache}{$checksum_file};
my $files = {};
my $build = sub($arg, $k) {
my $site = 'SITES';
my $url;
if ($k =~ m/^.*?FILES(.+)/) {
$site .= $1;
}
if ($arg =~ m/^(.*)\{(.*)\}(.*)$/) {
$arg = $1 . $3;
$url = $2 . $3;
}
return DPB::Distfile->new($arg, $url, $dir,
$info->{sites}{$site},
$checksums, $fname, $v, $self);
};
while (my ($k, $o) = each %{$info->{distfiles}}) {
if ($k =~ m/^SUPDISTFILES/ && !$mirror) {
next;
}
for my $d ($o->list) {
my $file = &$build($d, $k);
$files->{$file} = $file if defined $file;
}
}
$roach->build1info($v);
for my $k (qw(DIST_SUBDIR CHECKSUM_FILE distfiles sites)) {
delete $info->{$k};
}
bless $files, "AddDepends";
$info->{DIST} = $files;
if ($self->{ftp_only} && defined $info->{PERMIT_DISTFILES}) {
$info->{DISTIGNORE} = 1;
$info->{IGNORE} //= AddIgnore->new(
"Distfile not allowed for ftp");
}
}
sub build_distinfo($self, $h, $mirror, $roach)
{
for my $v (values %$h) {
$self->build1info($v, $mirror, $roach);
}
}
sub fetch($self, $file, $core, $endcode)
{
require DPB::Job::Fetch;
my $job = DPB::Job::Fetch->new($file, $endcode, $self,
$self->{logger});
$core->start_job($job);
}
1;