#!/usr/bin/perl -w ## written by Maarten Vanraes (c) 2009-2012 ## urpmi-proxy is GPLv2+ use strict; use warnings; my $debug = 0; my $proxy = 0; my $config_file = '/etc/urpmi-proxy.conf'; # config defaults my $cache_tmp_path = '/var/tmp/urpmi-proxy'; my $cache_path = '/var/cache/urpmi-proxy'; my $logfile = '/var/log/urpmi-proxy.log'; my $check_updates_only_files = '(MD5SUM|descriptions)'; my $check_no_updates_files; my $merge_files = 'media.cfg'; my $sources = [ 'urpmi' ]; my $connect_timeout = 120; my $ftp_response_timeout = 30; my $max_stall_speed = 8192; my $max_stall_time = 60; # load config file if (-R $config_file) { my $r = open(FILE, '<', $config_file); if ($r) { my $l = ''; while (my $i = ) { $l .= $i; } eval $l; close FILE; } } print STDERR "logfile: $logfile\n" if $debug; print STDERR "orig sources: " . scalar(@$sources) . "\n" if $debug; if ($debug) { foreach my $sou (@$sources) { print STDERR " - " . $sou . "\n"; } } # prepare cache path system("mkdir -p $cache_tmp_path"); # check for valid request my $file = $ENV{PATH_INFO}; return_error(500, 'Server error') if !$file; # split up request return_error(500, 'Server error') if $file !~ m!^(.*)/([^/]*)$!; my $dest_path = $1; my $filename = $2; my $file_type = ""; my $merge = 0; print STDERR "file: $file\n" if $debug; print STDERR "dest_path: $dest_path\n" if $debug; print STDERR "filename: $filename\n" if $debug; # check if there's a time condition my $modified_since = 0; if (defined $ENV{HTTP_IF_MODIFIED_SINCE}) { $modified_since = `date --date="$ENV{HTTP_IF_MODIFIED_SINCE}" "+%s"` if defined $ENV{HTTP_IF_MODIFIED_SINCE}; $modified_since =~ s/[\s\r\n]*$//; print STDERR "is modified since: $ENV{HTTP_IF_MODIFIED_SINCE} ($modified_since) ?\n" if $debug && $modified_since; } # set request ENV vars my $ip = $ENV{REMOTE_ADDR}; my $user_agent = ''; $user_agent = $ENV{HTTP_USER_AGENT} if defined $ENV{HTTP_USER_AGENT}; # check if request needs update checking my $check_file = 1; $check_file = 0 if defined $check_no_updates_files && $filename =~ m/$check_no_updates_files/; if (defined $check_updates_only_files) { $check_file = 0; $check_file = 1 if $filename =~ m/$check_updates_only_files/; } # check if request needs merging $merge = 1 if $filename =~ m/$merge_files/; $check_file = 1 if $merge; print STDERR "check_file: $check_file\n" if $debug; print STDERR "merge: $merge\n" if $debug; # get datetime from local file if it exists my @stat = lstat($cache_path . $file); if ($filename && scalar(@stat) > 0) { print STDERR "timestamp: $stat[9]\n" if $debug; # if the file needs no update checks, check in cache return_file($cache_path, $file, $logfile, 'HIT_NO_CHECK', \@stat, $ip, $user_agent) if !$check_file; } # set up curl with timecheck my $curl; my $r = 0; my $file_sent = 0; my $file_unmodified = 0; my $file_time = -1; my $err = 200; # prepare curl transfer my $tmp_file = $cache_tmp_path . "/" . rand() . $$; open(FILEHANDLE, ">", $tmp_file) or do { _log($logfile, $file, 500, 'MISS', -1, $ip, $user_agent); return_error(500, 'Server error'); }; binmode(FILEHANDLE); my %curldata = (fh => \*FILEHANDLE, file_sent => \$file_sent, content_type => $file_type, size => -1, merge => \$merge); print STDERR "sources: " . scalar(@$sources) . "\n" if $debug; if ($debug) { foreach my $sou (@$sources) { print STDERR " - " . $sou . "\n"; } } # filter out duplicate sources (and expand urpmi) my @sources; my %seen; foreach my $s (@$sources) { # heh next if $seen{$s}++; if ($s eq "urpmi") { # urpmi support is required use urpm; use urpm::cfg; my $urpm = new urpm(); urpm::get_global_options($urpm); my $config = urpm::cfg::load_config($urpm->{config}); my %s; foreach my $media (@{$config->{media}}) { if (!$media->{ignore}) { if ($media->{mirrorlist} && !($seen{'mirrorlist:' . $media->{mirrorlist}}++)) { # push mirrorlists now so they'll be first push @sources, 'mirrorlist://' . $media->{mirrorlist}; } elsif ($media->{url} && !($seen{$media->{url}}++)) { $s{$media->{url}} = 1; } } } # push the urls push @sources, keys %s if scalar(keys %s); } else { push @sources, $s; } } # check for source print STDERR "interpolated sources: " . scalar(@sources) . "\n" if $debug; if ($debug) { foreach my $sou (@sources) { print STDERR " - " . $sou . "\n"; } } foreach my $source (@sources) { my ($type, @loc) = split('://', $source); my $loc = join('://', @loc); print STDERR "source of type $type: '" . $source . "'\n" if $debug; if ($type eq 'mirrorlist') { # get exact url from cache and parse my $res = open(FILE, '<', '/var/cache/urpmi/mirrors.cache'); if ($res) { my $mirrorcache = ''; while (my $i = ) { $mirrorcache .= $i; } close FILE; my $host_loc = $loc; $host_loc =~ s/\$/\\\$/g; if ($mirrorcache =~ m/'$host_loc'\s+=>\s+{[\r\n]+\s+'chosen'\s+=>\s+'([^']+)'/m) { $source = $1; # rectify source to remove '/distrib/version/arch' $source =~ s!/[^/]+/[^/]+/[^/]+$!!; print STDERR "mirrorlist returns source '$source'\n" if $debug; ($type, @loc) = split('://', $source); $loc = join('://', @loc); if (defined $type) { print STDERR "mirrorlist returns type $type: '" . $source . "'\n" if $debug; } else { print STDERR "transfer error: mirrorlist is no url '" . $source . "'.\n" if $debug; $type = ''; } } else { print STDERR "transfer error: mirrorlist has no chosen url '" . $source . "'.\n" if $debug; $type = ''; } } else { print STDERR "transfer error: couldn't open mirrorlist cache.\n" if $debug; } } if ($type eq 'rsync') { # find the equivalent ftp mirror location by hostname my $res = open(FILE, '<', '/var/cache/urpmi/mirrors.cache'); if ($res) { my $mirrorcache = ''; while (my $i = ) { $mirrorcache .= $i; } close FILE; my $loc_host = $loc; $loc_host =~ s!/.+!!; if ($mirrorcache =~ m!'url'\s+=>\s+'((ftp|http)://$loc_host/[^']+)'!) { $source = $1; # rectify source to remove '/distrib/version/arch' $source =~ s!/[^/]+/[^/]+/[^/]+$!!; print STDERR "rsync switch returns source '$source'\n" if $debug; ($type, @loc) = split('://', $source); $loc = join('://', @loc); if (defined $type) { print STDERR "rsync switch returns type $type: '" . $source . "'\n" if $debug; } else { print STDERR "transfer error: rsync switch is no url '" . $source . "'.\n" if $debug; $type = ''; } } else { print STDERR "transfer error: rsync switch has no suitable url '" . $source . "'.\n" if $debug; $type = ''; } } else { print STDERR "transfer error: couldn't open mirrorlist cache.\n" if $debug; } } if ($type eq 'file') { my @statl = lstat($loc . $file); if ($filename && scalar(@statl) > 0) { my $ft = `file -b --mime-type $loc$file`; $ft =~ s/[\s\r\n]*$//; my $t = localtime($statl[9]); print STDERR "HTTP Header: 200 OK\n" if $debug; print STDERR "Content-Type: $ft\n" if $debug; print STDERR "Content-Length: " . $statl[7] . "\n" if $debug; print STDERR "Last-Modified: " . $t . "\n" if $debug; $r = open(FILE, "<", $loc . $file); if ($r) { print STDERR "file fetch url '" . $loc . $file . "'\n" if $debug; if (!$file_sent) { $file_sent = $statl[7]; print "Status: 200 OK\r\n"; print "Content-Type: " . $ft . "\r\n"; print "Content-Length: " . $statl[7] . "\r\n" if !$merge; print "Last-Modified: " . $t . "\r\n"; print "\r\n"; } else { $file_sent += $statl[7]; } binmode(FILE); my $buf; while (read(FILE, $buf, 1024)) { print FILEHANDLE $buf; print $buf; } close FILE; $r = 0; } else { print STDERR "transfer error: couldn't open file '" . $loc . $file . "'.\n" if $debug; $r = 1; $err = 404; } } else { print STDERR "transfer error: couldn't read file '" . $loc . $file . "'.\n" if $debug; } } elsif ($type) { if (!defined $curl) { use WWW::Curl::Easy; # set up curl stuff $curl = new WWW::Curl::Easy; if (scalar(@stat) > 0 && $stat[9] > $modified_since) { $curl->setopt(CURLOPT_TIMECONDITION, 1); # CURL_TIMECOND_IFMODSINCE $curl->setopt(CURLOPT_TIMEVALUE, $stat[9]); } elsif ($modified_since > 0) { $curl->setopt(CURLOPT_TIMECONDITION, 1); # CURL_TIMECOND_IFMODSINCE $curl->setopt(CURLOPT_TIMEVALUE, $modified_since); } $curl->setopt(CURLOPT_USERAGENT, $user_agent) if $user_agent; $curl->setopt(CURLOPT_PROXY, $proxy) if $proxy; $curl->setopt(CURLOPT_CONNECTTIMEOUT, $connect_timeout); $curl->setopt(CURLOPT_FTP_RESPONSE_TIMEOUT, $ftp_response_timeout); $curl->setopt(CURLOPT_LOW_SPEED_LIMIT, $max_stall_speed); $curl->setopt(CURLOPT_LOW_SPEED_TIME, $max_stall_time); $curl->setopt(CURLOPT_FOLLOWLOCATION, 1); $curl->setopt(CURLOPT_FILETIME, 1); # hook curl transfer functions for local caching $curl->setopt(CURLOPT_WRITEDATA, \%curldata); $curl->setopt(CURLOPT_WRITEFUNCTION, \&write_function); $curl->setopt(CURLOPT_WRITEHEADER, \%curldata); $curl->setopt(CURLOPT_HEADERFUNCTION, \&header_function); } # depending on type check if remote file is newer print STDERR "curl fetch url '" . $source . $file . "'\n" if $debug; $curl->setopt(CURLOPT_URL, $source . $file); $r = $curl->perform; print STDERR "curl return value: " . $err . "\n" if $debug; # use curl to get it and output it directly if ($r == 0) { $err = $curl->getinfo(CURLINFO_HTTP_CODE); if ($err =~ m/^2/ || $err == 304) { if ($curl->getinfo(CURLINFO_CONDITION_UNMET)) { $file_unmodified = 1; print STDERR "condition unmet\n" if $debug; } $file_time = $curl->getinfo(CURLINFO_FILETIME); } else { # error stuff ? print STDERR "transfer error: http code " . $err . "\n" if $debug; } } else { # error stuff ? print STDERR "transfer error: " . $curl->strerror($r) . " ($r)\n" if $debug; } } else { print STDERR "transfer error: this source does not have a type\n" if $debug; } print STDERR "file_sent: $file_sent\n" if $debug; last if $file_sent && !$merge; } my $extra = ''; close(FILEHANDLE); if ($file_sent && $r == 0 && $err =~ m/^2/ && $filename) { # clean up file and move to correct location if (system("mkdir -p $cache_path$dest_path") == 0) { if (rename($tmp_file, $cache_path . $file)) { utime(time(), $file_time, $cache_path . $file) if $file_time > 0; } else { print STDERR "WARNING: file '$tmp_file' could not be moved to '$cache_path$file'\n"; } } else { print STDERR "WARNING: containing path for '$cache_path$file' could not be created\n"; } _log($logfile, $file, 200, 'MISS', $file_sent, $ip, $user_agent); } else { unlink($tmp_file); if ($file_sent) { if ($filename) { _log($logfile, $file, $err, 'MISS_FAIL_SENT', $file_sent, $ip, $user_agent); } else { # It was actually successful, but paths can't be saved... print STDERR "NOTICE: paths cant be saved: '$file'\n" if $debug; _log($logfile, $file, $err, 'MISS', $file_sent, $ip, $user_agent); } exit 0; } if ($file_unmodified) { $extra = '_UNMODIFIED'; if ($modified_since > 0 && (scalar(@stat) == 0 || $stat[9] <= $modified_since)) { # it's been requested, so we can answer unmodified _log($logfile, $file, 304, 'MISS' . $extra, 0, $ip, $user_agent); return_error(304, 'Unmodified'); } } else { $extra = 'AFTER_FAIL'; } return_file($cache_path, $file, $logfile, 'HIT' . $extra, \@stat, $ip, $user_agent) if $filename && scalar(@stat) > 0; _log($logfile, $file, 404, 'MISS_FAIL', -1, $ip, $user_agent); return_error(404, 'File not found'); } print STDERR "finished." if $debug; exit 0; sub header_function { my ($ptr, $data) = @_; if (!${$data->{file_sent}}) { $data->{http_header} = $1 if $ptr =~ m!^HTTP/[0-9.]+\s+(.+?)[\s\r\n]*$!; $data->{content_type} = $1 if $ptr =~ m/^Content-[tT]ype:\s+(.+?)[\s\r\n]*$/; $data->{size} = $1 if $ptr =~ m/^Content-[lL]ength:\s+(.+?)[\s\r\n]*$/; $data->{date} = $1 if $ptr =~ m/^Last-[mM]odified:\s+(.+?)[\s\r\n]*$/; $data->{size} = $1 if $ptr =~ m/^213\s+(.+?)[\s\r\n]*$/; } return length($ptr); } sub write_function { my ($ptr, $data) = @_; my $f = ${$data->{fh}}; print $f ($ptr); if (!${$data->{file_sent}}) { ${$data->{file_sent}} = length($ptr); print STDERR "HTTP header: " . $data->{http_header} . "\n" if $debug && defined $data->{http_header}; print STDERR "Content-Type: " . $data->{content_type} . "\n" if $debug && defined $data->{content_type}; print STDERR "Content-Length: " . $data->{size} . "\n" if $debug; print STDERR "Last-Modified: " . $data->{date} . "\n" if $debug && defined $data->{date}; print "Status: " . $data->{http_header} . "\r\n" if $data->{http_header} && $data->{http_header} !~ m/^2/; print "Content-Type: " . $data->{content_type} . "\r\n" if $data->{content_type}; print "Content-Length: " . $data->{size} . "\r\n" if $data->{size} > -1 && !${$data->{merge}}; print "Last-Modified: " . $data->{date} . "\r\n" if $data->{date}; print "\r\n"; } else { ${$data->{file_sent}} += length($ptr); } print $ptr; return length($ptr); } sub _log { my ($logfile, $file, $code, $cached, $size, $ip, $user_agent) = @_; my $date = `date`; $date =~ s/[\s\r\n]*$//; $size = '-' if $size < 0; open(FILE, ">>" . $logfile) or return; print FILE "[" . $date . "] $ip $code $size $cached '$file' '$user_agent'\n"; close(FILE); } sub return_file { my ($cache_path, $file, $logfile, $cached, $stat, $ip, $user_agent)=@_; open(FILE, "<", $cache_path . $file) or do { _log($logfile, $file, 500, $cached, -1, $ip, $user_agent); return_error(500, 'Server error'); }; my $ft = `file -b --mime-type $cache_path$file`; $ft =~ s/[\s\r\n]*$//; my $t = localtime($stat->[9]); print STDERR "HTTP header: 200 OK\n" if $debug; print STDERR "Content-Type: '$ft'\n" if $debug; print STDERR "Content-Length: " . $stat->[7] . "\n" if $debug; print STDERR "Last-Modified: " . $t . "\n" if $debug; print "Status: 200 OK\r\n"; print "Content-Type: " . $ft . "\r\n"; print "Content-Length: " . $stat->[7] . "\r\n"; print "Last-Modified: " . $t . "\r\n"; print "\r\n"; binmode(FILE); my $buf; while (read(FILE, $buf, 1024)) { print $buf; } close FILE; _log($logfile, $file, 200, $cached, $stat->[7], $ip, $user_agent); exit 0; } sub return_error { my ($code, $text) = @_; print "Status: $code $text\r\n\r\n"; print STDERR "$code $text.\n" if $debug; exit 0; }