1 |
#!/usr/bin/perl -w |
2 |
## written by Maarten Vanraes (c) 2009-2012 |
3 |
## urpmi-proxy is GPLv2+ |
4 |
|
5 |
use strict; |
6 |
use warnings; |
7 |
|
8 |
my $debug = 0; |
9 |
my $config_file = '/etc/urpmi-proxy.conf'; |
10 |
|
11 |
# config defaults |
12 |
my $cache_tmp_path = '/var/tmp/urpmi-proxy'; |
13 |
my $cache_path = '/var/cache/urpmi-proxy'; |
14 |
my $logfile = '/var/log/urpmi-proxy.log'; |
15 |
my $check_updates_only_files = '(MD5SUM|descriptions)'; |
16 |
my $check_no_updates_files; |
17 |
my $merge_files = 'media.cfg'; |
18 |
my $sources = [ |
19 |
'urpmi' |
20 |
]; |
21 |
my $connect_timeout = 120; |
22 |
my $ftp_response_timeout = 30; |
23 |
my $max_stall_speed = 8192; |
24 |
my $max_stall_time = 60; |
25 |
|
26 |
# load config file |
27 |
if (-R $config_file) { |
28 |
my $r = open(FILE, '<', $config_file); |
29 |
if ($r) { |
30 |
my $l = ''; |
31 |
while (my $i = <FILE>) { |
32 |
$l .= $i; |
33 |
} |
34 |
eval $l; |
35 |
close FILE; |
36 |
} |
37 |
} |
38 |
print STDERR "logfile: $logfile\n" if $debug; |
39 |
|
40 |
print STDERR "orig sources: " . scalar(@$sources) . "\n" if $debug; |
41 |
if ($debug) { |
42 |
foreach my $sou (@$sources) { |
43 |
print STDERR " - " . $sou . "\n"; |
44 |
} |
45 |
} |
46 |
|
47 |
# prepare cache path |
48 |
system("mkdir -p $cache_tmp_path"); |
49 |
|
50 |
# check for valid request |
51 |
my $file = $ENV{PATH_INFO}; |
52 |
return_error(500, 'Server error') if !$file; |
53 |
|
54 |
# split up request |
55 |
return_error(500, 'Server error') if $file !~ m!^(.*)/([^/]*)$!; |
56 |
my $dest_path = $1; |
57 |
my $filename = $2; |
58 |
my $file_type = ""; |
59 |
my $merge = 0; |
60 |
|
61 |
print STDERR "file: $file\n" if $debug; |
62 |
print STDERR "dest_path: $dest_path\n" if $debug; |
63 |
print STDERR "filename: $filename\n" if $debug; |
64 |
|
65 |
# check if there's a time condition |
66 |
my $modified_since = 0; |
67 |
if (defined $ENV{HTTP_IF_MODIFIED_SINCE}) { |
68 |
$modified_since = `date --date="$ENV{HTTP_IF_MODIFIED_SINCE}" "+%s"` if defined $ENV{HTTP_IF_MODIFIED_SINCE}; |
69 |
$modified_since =~ s/[\s\r\n]*$//; |
70 |
print STDERR "is modified since: $ENV{HTTP_IF_MODIFIED_SINCE} ($modified_since) ?\n" if $debug && $modified_since; |
71 |
} |
72 |
|
73 |
# set request ENV vars |
74 |
my $ip = $ENV{REMOTE_ADDR}; |
75 |
my $user_agent = ''; |
76 |
$user_agent = $ENV{HTTP_USER_AGENT} if defined $ENV{HTTP_USER_AGENT}; |
77 |
|
78 |
# check if request needs update checking |
79 |
my $check_file = 1; |
80 |
$check_file = 0 if defined $check_no_updates_files && $filename =~ m/$check_no_updates_files/; |
81 |
if (defined $check_updates_only_files) { |
82 |
$check_file = 0; |
83 |
$check_file = 1 if $filename =~ m/$check_updates_only_files/; |
84 |
} |
85 |
|
86 |
# check if request needs merging |
87 |
$merge = 1 if $filename =~ m/$merge_files/; |
88 |
$check_file = 1 if $merge; |
89 |
|
90 |
print STDERR "check_file: $check_file\n" if $debug; |
91 |
print STDERR "merge: $merge\n" if $debug; |
92 |
|
93 |
# get datetime from local file if it exists |
94 |
my @stat = lstat($cache_path . $file); |
95 |
if ($filename && scalar(@stat) > 0) { |
96 |
print STDERR "timestamp: $stat[9]\n" if $debug; |
97 |
# if the file needs no update checks, check in cache |
98 |
return_file($cache_path, $file, $logfile, 'HIT_NO_CHECK', \@stat, $ip, $user_agent) if !$check_file; |
99 |
} |
100 |
|
101 |
# set up curl with timecheck |
102 |
my $curl; |
103 |
my $r = 0; |
104 |
my $file_sent = 0; |
105 |
my $file_unmodified = 0; |
106 |
my $file_time = -1; |
107 |
my $err = 200; |
108 |
|
109 |
# prepare curl transfer |
110 |
my $tmp_file = $cache_tmp_path . "/" . rand() . $$; |
111 |
open(FILEHANDLE, ">", $tmp_file) or do { |
112 |
_log($logfile, $file, 500, 'MISS', -1, $ip, $user_agent); |
113 |
return_error(500, 'Server error'); |
114 |
}; |
115 |
binmode(FILEHANDLE); |
116 |
my %curldata = (fh => \*FILEHANDLE, file_sent => \$file_sent, content_type => $file_type, size => -1, merge => \$merge); |
117 |
|
118 |
print STDERR "sources: " . scalar(@$sources) . "\n" if $debug; |
119 |
if ($debug) { |
120 |
foreach my $sou (@$sources) { |
121 |
print STDERR " - " . $sou . "\n"; |
122 |
} |
123 |
} |
124 |
|
125 |
# filter out duplicate sources (and expand urpmi) |
126 |
my @sources; |
127 |
my %seen; |
128 |
foreach my $s (@$sources) { |
129 |
# heh |
130 |
next if $seen{$s}++; |
131 |
if ($s eq "urpmi") { |
132 |
# urpmi support is required |
133 |
use urpm; |
134 |
use urpm::cfg; |
135 |
my $urpm = new urpm(); |
136 |
urpm::get_global_options($urpm); |
137 |
my $config = urpm::cfg::load_config($urpm->{config}); |
138 |
my %s; |
139 |
foreach my $media (@{$config->{media}}) { |
140 |
if (!$media->{ignore}) { |
141 |
if ($media->{mirrorlist} && !($seen{'mirrorlist:' . $media->{mirrorlist}}++)) { |
142 |
# push mirrorlists now so they'll be first |
143 |
push @sources, 'mirrorlist://' . $media->{mirrorlist}; |
144 |
} |
145 |
elsif ($media->{url} && !($seen{$media->{url}}++)) { |
146 |
$s{$media->{url}} = 1; |
147 |
} |
148 |
} |
149 |
} |
150 |
# push the urls |
151 |
push @sources, keys %s if scalar(keys %s); |
152 |
} |
153 |
else { |
154 |
push @sources, $s; |
155 |
} |
156 |
} |
157 |
|
158 |
# check for source |
159 |
print STDERR "interpolated sources: " . scalar(@sources) . "\n" if $debug; |
160 |
if ($debug) { |
161 |
foreach my $sou (@sources) { |
162 |
print STDERR " - " . $sou . "\n"; |
163 |
} |
164 |
} |
165 |
foreach my $source (@sources) { |
166 |
my ($type, @loc) = split('://', $source); |
167 |
my $loc = join('://', @loc); |
168 |
print STDERR "source of type $type: '" . $source . "'\n" if $debug; |
169 |
if ($type eq 'mirrorlist') { |
170 |
# get exact url from cache and parse |
171 |
my $res = open(FILE, '<', '/var/cache/urpmi/mirrors.cache'); |
172 |
if ($res) { |
173 |
my $mirrorcache = ''; |
174 |
while (my $i = <FILE>) { |
175 |
$mirrorcache .= $i; |
176 |
} |
177 |
close FILE; |
178 |
my $host_loc = $loc; |
179 |
$host_loc =~ s/\$/\\\$/g; |
180 |
if ($mirrorcache =~ m/'$host_loc'\s+=>\s+{[\r\n]+\s+'chosen'\s+=>\s+'([^']+)'/m) { |
181 |
$source = $1; |
182 |
# rectify source to remove '/distrib/version/arch' |
183 |
$source =~ s!/[^/]+/[^/]+/[^/]+$!!; |
184 |
print STDERR "mirrorlist returns source '$source'\n" if $debug; |
185 |
($type, @loc) = split('://', $source); |
186 |
$loc = join('://', @loc); |
187 |
if (defined $type) { |
188 |
print STDERR "mirrorlist returns type $type: '" . $source . "'\n" if $debug; |
189 |
} |
190 |
else { |
191 |
print STDERR "transfer error: mirrorlist is no url '" . $source . "'.\n" if $debug; |
192 |
$type = ''; |
193 |
} |
194 |
} |
195 |
else { |
196 |
print STDERR "transfer error: mirrorlist has no chosen url '" . $source . "'.\n" if $debug; |
197 |
$type = ''; |
198 |
} |
199 |
} |
200 |
else { |
201 |
print STDERR "transfer error: couldn't open mirrorlist cache.\n" if $debug; |
202 |
} |
203 |
} |
204 |
if ($type eq 'rsync') { |
205 |
# find the equivalent ftp mirror location by hostname |
206 |
my $res = open(FILE, '<', '/var/cache/urpmi/mirrors.cache'); |
207 |
if ($res) { |
208 |
my $mirrorcache = ''; |
209 |
while (my $i = <FILE>) { |
210 |
$mirrorcache .= $i; |
211 |
} |
212 |
close FILE; |
213 |
my $loc_host = $loc; |
214 |
$loc_host =~ s!/.+!!; |
215 |
if ($mirrorcache =~ m!'url'\s+=>\s+'((ftp|http)://$loc_host/[^']+)'!) { |
216 |
$source = $1; |
217 |
# rectify source to remove '/distrib/version/arch' |
218 |
$source =~ s!/[^/]+/[^/]+/[^/]+$!!; |
219 |
print STDERR "rsync switch returns source '$source'\n" if $debug; |
220 |
($type, @loc) = split('://', $source); |
221 |
$loc = join('://', @loc); |
222 |
if (defined $type) { |
223 |
print STDERR "rsync switch returns type $type: '" . $source . "'\n" if $debug; |
224 |
} |
225 |
else { |
226 |
print STDERR "transfer error: rsync switch is no url '" . $source . "'.\n" if $debug; |
227 |
$type = ''; |
228 |
} |
229 |
} |
230 |
else { |
231 |
print STDERR "transfer error: rsync switch has no suitable url '" . $source . "'.\n" if $debug; |
232 |
$type = ''; |
233 |
} |
234 |
} |
235 |
else { |
236 |
print STDERR "transfer error: couldn't open mirrorlist cache.\n" if $debug; |
237 |
} |
238 |
} |
239 |
if ($type eq 'file') { |
240 |
my @statl = lstat($loc . $file); |
241 |
if ($filename && scalar(@statl) > 0) { |
242 |
my $ft = `file -b --mime-type $loc$file`; |
243 |
$ft =~ s/[\s\r\n]*$//; |
244 |
my $t = localtime($statl[9]); |
245 |
print STDERR "HTTP Header: 200 OK\n" if $debug; |
246 |
print STDERR "Content-Type: $ft\n" if $debug; |
247 |
print STDERR "Content-Length: " . $statl[7] . "\n" if $debug; |
248 |
print STDERR "Last-Modified: " . $t . "\n" if $debug; |
249 |
$r = open(FILE, "<", $loc . $file); |
250 |
if ($r) { |
251 |
print STDERR "file fetch url '" . $loc . $file . "'\n" if $debug; |
252 |
if (!$file_sent) { |
253 |
$file_sent = $statl[7]; |
254 |
print "Status: 200 OK\r\n"; |
255 |
print "Content-Type: " . $ft . "\r\n"; |
256 |
print "Content-Length: " . $statl[7] . "\r\n" if !$merge; |
257 |
print "Last-Modified: " . $t . "\r\n"; |
258 |
print "\r\n"; |
259 |
} |
260 |
else { |
261 |
$file_sent += $statl[7]; |
262 |
} |
263 |
binmode(FILE); |
264 |
my $buf; |
265 |
while (read(FILE, $buf, 1024)) { |
266 |
print FILEHANDLE $buf; |
267 |
print $buf; |
268 |
} |
269 |
close FILE; |
270 |
$r = 0; |
271 |
} |
272 |
else { |
273 |
print STDERR "transfer error: couldn't open file '" . $loc . $file . "'.\n" if $debug; |
274 |
$r = 1; |
275 |
$err = 404; |
276 |
} |
277 |
} |
278 |
else { |
279 |
print STDERR "transfer error: couldn't read file '" . $loc . $file . "'.\n" if $debug; |
280 |
} |
281 |
} |
282 |
elsif ($type) { |
283 |
if (!defined $curl) { |
284 |
use WWW::Curl::Easy; |
285 |
# set up curl stuff |
286 |
$curl = new WWW::Curl::Easy; |
287 |
if (scalar(@stat) > 0 && $stat[9] > $modified_since) { |
288 |
$curl->setopt(CURLOPT_TIMECONDITION, 1); # CURL_TIMECOND_IFMODSINCE |
289 |
$curl->setopt(CURLOPT_TIMEVALUE, $stat[9]); |
290 |
} |
291 |
elsif ($modified_since > 0) { |
292 |
$curl->setopt(CURLOPT_TIMECONDITION, 1); # CURL_TIMECOND_IFMODSINCE |
293 |
$curl->setopt(CURLOPT_TIMEVALUE, $modified_since); |
294 |
} |
295 |
$curl->setopt(CURLOPT_USERAGENT, $user_agent) if $user_agent; |
296 |
$curl->setopt(CURLOPT_CONNECTTIMEOUT, $connect_timeout); |
297 |
$curl->setopt(CURLOPT_FTP_RESPONSE_TIMEOUT, $ftp_response_timeout); |
298 |
$curl->setopt(CURLOPT_LOW_SPEED_LIMIT, $max_stall_speed); |
299 |
$curl->setopt(CURLOPT_LOW_SPEED_TIME, $max_stall_time); |
300 |
$curl->setopt(CURLOPT_FOLLOWLOCATION, 1); |
301 |
$curl->setopt(CURLOPT_FILETIME, 1); |
302 |
# hook curl transfer functions for local caching |
303 |
$curl->setopt(CURLOPT_WRITEDATA, \%curldata); |
304 |
$curl->setopt(CURLOPT_WRITEFUNCTION, \&write_function); |
305 |
$curl->setopt(CURLOPT_WRITEHEADER, \%curldata); |
306 |
$curl->setopt(CURLOPT_HEADERFUNCTION, \&header_function); |
307 |
} |
308 |
# depending on type check if remote file is newer |
309 |
print STDERR "curl fetch url '" . $source . $file . "'\n" if $debug; |
310 |
$curl->setopt(CURLOPT_URL, $source . $file); |
311 |
$r = $curl->perform; |
312 |
print STDERR "curl return value: " . $err . "\n" if $debug; |
313 |
# use curl to get it and output it directly |
314 |
if ($r == 0) { |
315 |
$err = $curl->getinfo(CURLINFO_HTTP_CODE); |
316 |
if ($err =~ m/^2/ || $err == 304) { |
317 |
if ($curl->getinfo(CURLINFO_CONDITION_UNMET)) { |
318 |
$file_unmodified = 1; |
319 |
print STDERR "condition unmet\n" if $debug; |
320 |
} |
321 |
$file_time = $curl->getinfo(CURLINFO_FILETIME); |
322 |
} |
323 |
else { |
324 |
# error stuff ? |
325 |
print STDERR "transfer error: http code " . $err . "\n" if $debug; |
326 |
} |
327 |
} |
328 |
else { |
329 |
# error stuff ? |
330 |
print STDERR "transfer error: " . $curl->strerror($r) . " ($r)\n" if $debug; |
331 |
} |
332 |
} |
333 |
else { |
334 |
print STDERR "transfer error: this source does not have a type\n" if $debug; |
335 |
} |
336 |
print STDERR "file_sent: $file_sent\n" if $debug; |
337 |
last if $file_sent && !$merge; |
338 |
} |
339 |
|
340 |
my $extra = ''; |
341 |
|
342 |
close(FILEHANDLE); |
343 |
|
344 |
if ($file_sent && $r == 0 && $err =~ m/^2/ && $filename) { |
345 |
# clean up file and move to correct location |
346 |
if (system("mkdir -p $cache_path$dest_path") == 0) { |
347 |
if (rename($tmp_file, $cache_path . $file)) { |
348 |
utime(time(), $file_time, $cache_path . $file) if $file_time > 0; |
349 |
} |
350 |
else { |
351 |
print STDERR "WARNING: file '$tmp_file' could not be moved to '$cache_path$file'\n"; |
352 |
} |
353 |
} |
354 |
else { |
355 |
print STDERR "WARNING: containing path for '$cache_path$file' could not be created\n"; |
356 |
} |
357 |
_log($logfile, $file, 200, 'MISS', $file_sent, $ip, $user_agent); |
358 |
} |
359 |
else { |
360 |
unlink($tmp_file); |
361 |
if ($file_sent) { |
362 |
if ($filename) { |
363 |
_log($logfile, $file, $err, 'MISS_FAIL_SENT', $file_sent, $ip, $user_agent); |
364 |
} |
365 |
else { |
366 |
# It was actually successful, but paths can't be saved... |
367 |
print STDERR "NOTICE: paths cant be saved: '$file'\n" if $debug; |
368 |
_log($logfile, $file, $err, 'MISS', $file_sent, $ip, $user_agent); |
369 |
} |
370 |
exit 0; |
371 |
} |
372 |
if ($file_unmodified) { |
373 |
$extra = '_UNMODIFIED'; |
374 |
if ($modified_since > 0 && (scalar(@stat) == 0 || $stat[9] <= $modified_since)) { |
375 |
# it's been requested, so we can answer unmodified |
376 |
_log($logfile, $file, 304, 'MISS' . $extra, 0, $ip, $user_agent); |
377 |
return_error(304, 'Unmodified'); |
378 |
} |
379 |
} |
380 |
else { |
381 |
$extra = 'AFTER_FAIL'; |
382 |
} |
383 |
return_file($cache_path, $file, $logfile, 'HIT' . $extra, \@stat, $ip, $user_agent) if $filename && scalar(@stat) > 0; |
384 |
_log($logfile, $file, 404, 'MISS_FAIL', -1, $ip, $user_agent); |
385 |
return_error(404, 'File not found'); |
386 |
} |
387 |
|
388 |
print STDERR "finished." if $debug; |
389 |
|
390 |
exit 0; |
391 |
|
392 |
sub header_function { |
393 |
my ($ptr, $data) = @_; |
394 |
if (!${$data->{file_sent}}) { |
395 |
$data->{http_header} = $1 if $ptr =~ m!^HTTP/[0-9.]+\s+(.+?)[\s\r\n]*$!; |
396 |
$data->{content_type} = $1 if $ptr =~ m/^Content-[tT]ype:\s+(.+?)[\s\r\n]*$/; |
397 |
$data->{size} = $1 if $ptr =~ m/^Content-[lL]ength:\s+(.+?)[\s\r\n]*$/; |
398 |
$data->{date} = $1 if $ptr =~ m/^Last-[mM]odified:\s+(.+?)[\s\r\n]*$/; |
399 |
$data->{size} = $1 if $ptr =~ m/^213\s+(.+?)[\s\r\n]*$/; |
400 |
} |
401 |
return length($ptr); |
402 |
} |
403 |
|
404 |
sub write_function { |
405 |
my ($ptr, $data) = @_; |
406 |
my $f = ${$data->{fh}}; |
407 |
print $f ($ptr); |
408 |
if (!${$data->{file_sent}}) { |
409 |
${$data->{file_sent}} = length($ptr); |
410 |
print STDERR "HTTP header: " . $data->{http_header} . "\n" if $debug && defined $data->{http_header}; |
411 |
print STDERR "Content-Type: " . $data->{content_type} . "\n" if $debug && defined $data->{content_type}; |
412 |
print STDERR "Content-Length: " . $data->{size} . "\n" if $debug; |
413 |
print STDERR "Last-Modified: " . $data->{date} . "\n" if $debug && defined $data->{date}; |
414 |
print "Status: " . $data->{http_header} . "\r\n" if $data->{http_header} && $data->{http_header} !~ m/^2/; |
415 |
print "Content-Type: " . $data->{content_type} . "\r\n" if $data->{content_type}; |
416 |
print "Content-Length: " . $data->{size} . "\r\n" if $data->{size} > -1 && !${$data->{merge}}; |
417 |
print "Last-Modified: " . $data->{date} . "\r\n" if $data->{date}; |
418 |
print "\r\n"; |
419 |
} |
420 |
else { |
421 |
${$data->{file_sent}} += length($ptr); |
422 |
} |
423 |
print $ptr; |
424 |
return length($ptr); |
425 |
} |
426 |
|
427 |
sub _log { |
428 |
my ($logfile, $file, $code, $cached, $size, $ip, $user_agent) = @_; |
429 |
my $date = `date`; |
430 |
$date =~ s/[\s\r\n]*$//; |
431 |
$size = '-' if $size < 0; |
432 |
open(FILE, ">>" . $logfile) or return; |
433 |
print FILE "[" . $date . "] $ip $code $size $cached '$file' '$user_agent'\n"; |
434 |
close(FILE); |
435 |
} |
436 |
|
437 |
sub return_file { |
438 |
my ($cache_path, $file, $logfile, $cached, $stat, $ip, $user_agent)=@_; |
439 |
open(FILE, "<", $cache_path . $file) or do { |
440 |
_log($logfile, $file, 500, $cached, -1, $ip, $user_agent); |
441 |
return_error(500, 'Server error'); |
442 |
}; |
443 |
my $ft = `file -b --mime-type $cache_path$file`; |
444 |
$ft =~ s/[\s\r\n]*$//; |
445 |
my $t = localtime($stat->[9]); |
446 |
print STDERR "HTTP header: 200 OK\n" if $debug; |
447 |
print STDERR "Content-Type: '$ft'\n" if $debug; |
448 |
print STDERR "Content-Length: " . $stat->[7] . "\n" if $debug; |
449 |
print STDERR "Last-Modified: " . $t . "\n" if $debug; |
450 |
print "Status: 200 OK\r\n"; |
451 |
print "Content-Type: " . $ft . "\r\n"; |
452 |
print "Content-Length: " . $stat->[7] . "\r\n"; |
453 |
print "Last-Modified: " . $t . "\r\n"; |
454 |
print "\r\n"; |
455 |
binmode(FILE); |
456 |
my $buf; |
457 |
while (read(FILE, $buf, 1024)) { |
458 |
print $buf; |
459 |
} |
460 |
close FILE; |
461 |
_log($logfile, $file, 200, $cached, $stat->[7], $ip, $user_agent); |
462 |
exit 0; |
463 |
} |
464 |
|
465 |
sub return_error { |
466 |
my ($code, $text) = @_; |
467 |
print "Status: $code $text\r\n\r\n"; |
468 |
print STDERR "$code $text.\n" if $debug; |
469 |
exit 0; |
470 |
} |