1 |
#!/usr/bin/perl -w |
2 |
## written by Maarten Vanraes (c) 2009-2012 |
3 |
## urpmi-proxy is GPLv2+ |
4 |
|
5 |
use strict; |
6 |
use warnings; |
7 |
|
8 |
my $debug = 0; |
9 |
my $proxy = 0; |
10 |
my $config_file = '/etc/urpmi-proxy.conf'; |
11 |
|
12 |
# config defaults |
13 |
my $cache_tmp_path = '/var/tmp/urpmi-proxy'; |
14 |
my $cache_path = '/var/cache/urpmi-proxy'; |
15 |
my $logfile = '/var/log/urpmi-proxy.log'; |
16 |
my $check_updates_only_files = '(MD5SUM|descriptions)'; |
17 |
my $check_no_updates_files; |
18 |
my $merge_files = 'media.cfg'; |
19 |
my $sources = [ |
20 |
'urpmi' |
21 |
]; |
22 |
my $connect_timeout = 120; |
23 |
my $ftp_response_timeout = 30; |
24 |
my $max_stall_speed = 8192; |
25 |
my $max_stall_time = 60; |
26 |
|
27 |
# load config file |
28 |
if (-R $config_file) { |
29 |
my $r = open(FILE, '<', $config_file); |
30 |
if ($r) { |
31 |
my $l = ''; |
32 |
while (my $i = <FILE>) { |
33 |
$l .= $i; |
34 |
} |
35 |
eval $l; |
36 |
close FILE; |
37 |
} |
38 |
} |
39 |
print STDERR "logfile: $logfile\n" if $debug; |
40 |
|
41 |
print STDERR "orig sources: " . scalar(@$sources) . "\n" if $debug; |
42 |
if ($debug) { |
43 |
foreach my $sou (@$sources) { |
44 |
print STDERR " - " . $sou . "\n"; |
45 |
} |
46 |
} |
47 |
|
48 |
# prepare cache path |
49 |
system("mkdir -p $cache_tmp_path"); |
50 |
|
51 |
# check for valid request |
52 |
my $file = $ENV{PATH_INFO}; |
53 |
return_error(500, 'Server error') if !$file; |
54 |
|
55 |
# split up request |
56 |
return_error(500, 'Server error') if $file !~ m!^(.*)/([^/]*)$!; |
57 |
my $dest_path = $1; |
58 |
my $filename = $2; |
59 |
my $file_type = ""; |
60 |
my $merge = 0; |
61 |
|
62 |
print STDERR "file: $file\n" if $debug; |
63 |
print STDERR "dest_path: $dest_path\n" if $debug; |
64 |
print STDERR "filename: $filename\n" if $debug; |
65 |
|
66 |
# check if there's a time condition |
67 |
my $modified_since = 0; |
68 |
if (defined $ENV{HTTP_IF_MODIFIED_SINCE}) { |
69 |
$modified_since = `date --date="$ENV{HTTP_IF_MODIFIED_SINCE}" "+%s"` if defined $ENV{HTTP_IF_MODIFIED_SINCE}; |
70 |
$modified_since =~ s/[\s\r\n]*$//; |
71 |
print STDERR "is modified since: $ENV{HTTP_IF_MODIFIED_SINCE} ($modified_since) ?\n" if $debug && $modified_since; |
72 |
} |
73 |
|
74 |
# set request ENV vars |
75 |
my $ip = $ENV{REMOTE_ADDR}; |
76 |
my $user_agent = ''; |
77 |
$user_agent = $ENV{HTTP_USER_AGENT} if defined $ENV{HTTP_USER_AGENT}; |
78 |
|
79 |
# check if request needs update checking |
80 |
my $check_file = 1; |
81 |
$check_file = 0 if defined $check_no_updates_files && $filename =~ m/$check_no_updates_files/; |
82 |
if (defined $check_updates_only_files) { |
83 |
$check_file = 0; |
84 |
$check_file = 1 if $filename =~ m/$check_updates_only_files/; |
85 |
} |
86 |
|
87 |
# check if request needs merging |
88 |
$merge = 1 if $filename =~ m/$merge_files/; |
89 |
$check_file = 1 if $merge; |
90 |
|
91 |
print STDERR "check_file: $check_file\n" if $debug; |
92 |
print STDERR "merge: $merge\n" if $debug; |
93 |
|
94 |
# get datetime from local file if it exists |
95 |
my @stat = lstat($cache_path . $file); |
96 |
if ($filename && scalar(@stat) > 0) { |
97 |
print STDERR "timestamp: $stat[9]\n" if $debug; |
98 |
# if the file needs no update checks, check in cache |
99 |
return_file($cache_path, $file, $logfile, 'HIT_NO_CHECK', \@stat, $ip, $user_agent) if !$check_file; |
100 |
} |
101 |
|
102 |
# set up curl with timecheck |
103 |
my $curl; |
104 |
my $r = 0; |
105 |
my $file_sent = 0; |
106 |
my $file_unmodified = 0; |
107 |
my $file_time = -1; |
108 |
my $err = 200; |
109 |
|
110 |
# prepare curl transfer |
111 |
my $tmp_file = $cache_tmp_path . "/" . rand() . $$; |
112 |
open(FILEHANDLE, ">", $tmp_file) or do { |
113 |
_log($logfile, $file, 500, 'MISS', -1, $ip, $user_agent); |
114 |
return_error(500, 'Server error'); |
115 |
}; |
116 |
binmode(FILEHANDLE); |
117 |
my %curldata = (fh => \*FILEHANDLE, file_sent => \$file_sent, content_type => $file_type, size => -1, merge => \$merge); |
118 |
|
119 |
print STDERR "sources: " . scalar(@$sources) . "\n" if $debug; |
120 |
if ($debug) { |
121 |
foreach my $sou (@$sources) { |
122 |
print STDERR " - " . $sou . "\n"; |
123 |
} |
124 |
} |
125 |
|
126 |
# filter out duplicate sources (and expand urpmi) |
127 |
my @sources; |
128 |
my %seen; |
129 |
foreach my $s (@$sources) { |
130 |
# heh |
131 |
next if $seen{$s}++; |
132 |
if ($s eq "urpmi") { |
133 |
# urpmi support is required |
134 |
use urpm; |
135 |
use urpm::cfg; |
136 |
my $urpm = new urpm(); |
137 |
urpm::get_global_options($urpm); |
138 |
my $config = urpm::cfg::load_config($urpm->{config}); |
139 |
my %s; |
140 |
foreach my $media (@{$config->{media}}) { |
141 |
if (!$media->{ignore}) { |
142 |
if ($media->{mirrorlist} && !($seen{'mirrorlist:' . $media->{mirrorlist}}++)) { |
143 |
# push mirrorlists now so they'll be first |
144 |
push @sources, 'mirrorlist://' . $media->{mirrorlist}; |
145 |
} |
146 |
elsif ($media->{url} && !($seen{$media->{url}}++)) { |
147 |
$s{$media->{url}} = 1; |
148 |
} |
149 |
} |
150 |
} |
151 |
# push the urls |
152 |
push @sources, keys %s if scalar(keys %s); |
153 |
} |
154 |
else { |
155 |
push @sources, $s; |
156 |
} |
157 |
} |
158 |
|
159 |
# check for source |
160 |
print STDERR "interpolated sources: " . scalar(@sources) . "\n" if $debug; |
161 |
if ($debug) { |
162 |
foreach my $sou (@sources) { |
163 |
print STDERR " - " . $sou . "\n"; |
164 |
} |
165 |
} |
166 |
foreach my $source (@sources) { |
167 |
my ($type, @loc) = split('://', $source); |
168 |
my $loc = join('://', @loc); |
169 |
print STDERR "source of type $type: '" . $source . "'\n" if $debug; |
170 |
if ($type eq 'mirrorlist') { |
171 |
# get exact url from cache and parse |
172 |
my $res = open(FILE, '<', '/var/cache/urpmi/mirrors.cache'); |
173 |
if ($res) { |
174 |
my $mirrorcache = ''; |
175 |
while (my $i = <FILE>) { |
176 |
$mirrorcache .= $i; |
177 |
} |
178 |
close FILE; |
179 |
my $host_loc = $loc; |
180 |
$host_loc =~ s/\$/\\\$/g; |
181 |
if ($mirrorcache =~ m/'$host_loc'\s+=>\s+{[\r\n]+\s+'chosen'\s+=>\s+'([^']+)'/m) { |
182 |
$source = $1; |
183 |
# rectify source to remove '/distrib/version/arch' |
184 |
$source =~ s!/[^/]+/[^/]+/[^/]+$!!; |
185 |
print STDERR "mirrorlist returns source '$source'\n" if $debug; |
186 |
($type, @loc) = split('://', $source); |
187 |
$loc = join('://', @loc); |
188 |
if (defined $type) { |
189 |
print STDERR "mirrorlist returns type $type: '" . $source . "'\n" if $debug; |
190 |
} |
191 |
else { |
192 |
print STDERR "transfer error: mirrorlist is no url '" . $source . "'.\n" if $debug; |
193 |
$type = ''; |
194 |
} |
195 |
} |
196 |
else { |
197 |
print STDERR "transfer error: mirrorlist has no chosen url '" . $source . "'.\n" if $debug; |
198 |
$type = ''; |
199 |
} |
200 |
} |
201 |
else { |
202 |
print STDERR "transfer error: couldn't open mirrorlist cache.\n" if $debug; |
203 |
} |
204 |
} |
205 |
if ($type eq 'rsync') { |
206 |
# find the equivalent ftp mirror location by hostname |
207 |
my $res = open(FILE, '<', '/var/cache/urpmi/mirrors.cache'); |
208 |
if ($res) { |
209 |
my $mirrorcache = ''; |
210 |
while (my $i = <FILE>) { |
211 |
$mirrorcache .= $i; |
212 |
} |
213 |
close FILE; |
214 |
my $loc_host = $loc; |
215 |
$loc_host =~ s!/.+!!; |
216 |
if ($mirrorcache =~ m!'url'\s+=>\s+'((ftp|http)://$loc_host/[^']+)'!) { |
217 |
$source = $1; |
218 |
# rectify source to remove '/distrib/version/arch' |
219 |
$source =~ s!/[^/]+/[^/]+/[^/]+$!!; |
220 |
print STDERR "rsync switch returns source '$source'\n" if $debug; |
221 |
($type, @loc) = split('://', $source); |
222 |
$loc = join('://', @loc); |
223 |
if (defined $type) { |
224 |
print STDERR "rsync switch returns type $type: '" . $source . "'\n" if $debug; |
225 |
} |
226 |
else { |
227 |
print STDERR "transfer error: rsync switch is no url '" . $source . "'.\n" if $debug; |
228 |
$type = ''; |
229 |
} |
230 |
} |
231 |
else { |
232 |
print STDERR "transfer error: rsync switch has no suitable url '" . $source . "'.\n" if $debug; |
233 |
$type = ''; |
234 |
} |
235 |
} |
236 |
else { |
237 |
print STDERR "transfer error: couldn't open mirrorlist cache.\n" if $debug; |
238 |
} |
239 |
} |
240 |
if ($type eq 'file') { |
241 |
my @statl = lstat($loc . $file); |
242 |
if ($filename && scalar(@statl) > 0) { |
243 |
my $ft = `file -b --mime-type $loc$file`; |
244 |
$ft =~ s/[\s\r\n]*$//; |
245 |
my $t = localtime($statl[9]); |
246 |
print STDERR "HTTP Header: 200 OK\n" if $debug; |
247 |
print STDERR "Content-Type: $ft\n" if $debug; |
248 |
print STDERR "Content-Length: " . $statl[7] . "\n" if $debug; |
249 |
print STDERR "Last-Modified: " . $t . "\n" if $debug; |
250 |
$r = open(FILE, "<", $loc . $file); |
251 |
if ($r) { |
252 |
print STDERR "file fetch url '" . $loc . $file . "'\n" if $debug; |
253 |
if (!$file_sent) { |
254 |
$file_sent = $statl[7]; |
255 |
print "Status: 200 OK\r\n"; |
256 |
print "Content-Type: " . $ft . "\r\n"; |
257 |
print "Content-Length: " . $statl[7] . "\r\n" if !$merge; |
258 |
print "Last-Modified: " . $t . "\r\n"; |
259 |
print "\r\n"; |
260 |
} |
261 |
else { |
262 |
$file_sent += $statl[7]; |
263 |
} |
264 |
binmode(FILE); |
265 |
my $buf; |
266 |
while (read(FILE, $buf, 1024)) { |
267 |
print FILEHANDLE $buf; |
268 |
print $buf; |
269 |
} |
270 |
close FILE; |
271 |
$r = 0; |
272 |
} |
273 |
else { |
274 |
print STDERR "transfer error: couldn't open file '" . $loc . $file . "'.\n" if $debug; |
275 |
$r = 1; |
276 |
$err = 404; |
277 |
} |
278 |
} |
279 |
else { |
280 |
print STDERR "transfer error: couldn't read file '" . $loc . $file . "'.\n" if $debug; |
281 |
} |
282 |
} |
283 |
elsif ($type) { |
284 |
if (!defined $curl) { |
285 |
use WWW::Curl::Easy; |
286 |
# set up curl stuff |
287 |
$curl = new WWW::Curl::Easy; |
288 |
if (scalar(@stat) > 0 && $stat[9] > $modified_since) { |
289 |
$curl->setopt(CURLOPT_TIMECONDITION, 1); # CURL_TIMECOND_IFMODSINCE |
290 |
$curl->setopt(CURLOPT_TIMEVALUE, $stat[9]); |
291 |
} |
292 |
elsif ($modified_since > 0) { |
293 |
$curl->setopt(CURLOPT_TIMECONDITION, 1); # CURL_TIMECOND_IFMODSINCE |
294 |
$curl->setopt(CURLOPT_TIMEVALUE, $modified_since); |
295 |
} |
296 |
$curl->setopt(CURLOPT_USERAGENT, $user_agent) if $user_agent; |
297 |
$curl->setopt(CURLOPT_PROXY, $proxy) if $proxy; |
298 |
$curl->setopt(CURLOPT_CONNECTTIMEOUT, $connect_timeout); |
299 |
$curl->setopt(CURLOPT_FTP_RESPONSE_TIMEOUT, $ftp_response_timeout); |
300 |
$curl->setopt(CURLOPT_LOW_SPEED_LIMIT, $max_stall_speed); |
301 |
$curl->setopt(CURLOPT_LOW_SPEED_TIME, $max_stall_time); |
302 |
$curl->setopt(CURLOPT_FOLLOWLOCATION, 1); |
303 |
$curl->setopt(CURLOPT_FILETIME, 1); |
304 |
# hook curl transfer functions for local caching |
305 |
$curl->setopt(CURLOPT_WRITEDATA, \%curldata); |
306 |
$curl->setopt(CURLOPT_WRITEFUNCTION, \&write_function); |
307 |
$curl->setopt(CURLOPT_WRITEHEADER, \%curldata); |
308 |
$curl->setopt(CURLOPT_HEADERFUNCTION, \&header_function); |
309 |
} |
310 |
# depending on type check if remote file is newer |
311 |
print STDERR "curl fetch url '" . $source . $file . "'\n" if $debug; |
312 |
$curl->setopt(CURLOPT_URL, $source . $file); |
313 |
$r = $curl->perform; |
314 |
print STDERR "curl return value: " . $err . "\n" if $debug; |
315 |
# use curl to get it and output it directly |
316 |
if ($r == 0) { |
317 |
$err = $curl->getinfo(CURLINFO_HTTP_CODE); |
318 |
if ($err =~ m/^2/ || $err == 304) { |
319 |
if ($curl->getinfo(CURLINFO_CONDITION_UNMET)) { |
320 |
$file_unmodified = 1; |
321 |
print STDERR "condition unmet\n" if $debug; |
322 |
} |
323 |
$file_time = $curl->getinfo(CURLINFO_FILETIME); |
324 |
} |
325 |
else { |
326 |
# error stuff ? |
327 |
print STDERR "transfer error: http code " . $err . "\n" if $debug; |
328 |
} |
329 |
} |
330 |
else { |
331 |
# error stuff ? |
332 |
print STDERR "transfer error: " . $curl->strerror($r) . " ($r)\n" if $debug; |
333 |
} |
334 |
} |
335 |
else { |
336 |
print STDERR "transfer error: this source does not have a type\n" if $debug; |
337 |
} |
338 |
print STDERR "file_sent: $file_sent\n" if $debug; |
339 |
last if $file_sent && !$merge; |
340 |
} |
341 |
|
342 |
my $extra = ''; |
343 |
|
344 |
close(FILEHANDLE); |
345 |
|
346 |
if ($file_sent && $r == 0 && $err =~ m/^2/ && $filename) { |
347 |
# clean up file and move to correct location |
348 |
if (system("mkdir -p $cache_path$dest_path") == 0) { |
349 |
if (rename($tmp_file, $cache_path . $file)) { |
350 |
utime(time(), $file_time, $cache_path . $file) if $file_time > 0; |
351 |
} |
352 |
else { |
353 |
print STDERR "WARNING: file '$tmp_file' could not be moved to '$cache_path$file'\n"; |
354 |
} |
355 |
} |
356 |
else { |
357 |
print STDERR "WARNING: containing path for '$cache_path$file' could not be created\n"; |
358 |
} |
359 |
_log($logfile, $file, 200, 'MISS', $file_sent, $ip, $user_agent); |
360 |
} |
361 |
else { |
362 |
unlink($tmp_file); |
363 |
if ($file_sent) { |
364 |
if ($filename) { |
365 |
_log($logfile, $file, $err, 'MISS_FAIL_SENT', $file_sent, $ip, $user_agent); |
366 |
} |
367 |
else { |
368 |
# It was actually successful, but paths can't be saved... |
369 |
print STDERR "NOTICE: paths cant be saved: '$file'\n" if $debug; |
370 |
_log($logfile, $file, $err, 'MISS', $file_sent, $ip, $user_agent); |
371 |
} |
372 |
exit 0; |
373 |
} |
374 |
if ($file_unmodified) { |
375 |
$extra = '_UNMODIFIED'; |
376 |
if ($modified_since > 0 && (scalar(@stat) == 0 || $stat[9] <= $modified_since)) { |
377 |
# it's been requested, so we can answer unmodified |
378 |
_log($logfile, $file, 304, 'MISS' . $extra, 0, $ip, $user_agent); |
379 |
return_error(304, 'Unmodified'); |
380 |
} |
381 |
} |
382 |
else { |
383 |
$extra = 'AFTER_FAIL'; |
384 |
} |
385 |
return_file($cache_path, $file, $logfile, 'HIT' . $extra, \@stat, $ip, $user_agent) if $filename && scalar(@stat) > 0; |
386 |
_log($logfile, $file, 404, 'MISS_FAIL', -1, $ip, $user_agent); |
387 |
return_error(404, 'File not found'); |
388 |
} |
389 |
|
390 |
print STDERR "finished." if $debug; |
391 |
|
392 |
exit 0; |
393 |
|
394 |
sub header_function { |
395 |
my ($ptr, $data) = @_; |
396 |
if (!${$data->{file_sent}}) { |
397 |
$data->{http_header} = $1 if $ptr =~ m!^HTTP/[0-9.]+\s+(.+?)[\s\r\n]*$!; |
398 |
$data->{content_type} = $1 if $ptr =~ m/^Content-[tT]ype:\s+(.+?)[\s\r\n]*$/; |
399 |
$data->{size} = $1 if $ptr =~ m/^Content-[lL]ength:\s+(.+?)[\s\r\n]*$/; |
400 |
$data->{date} = $1 if $ptr =~ m/^Last-[mM]odified:\s+(.+?)[\s\r\n]*$/; |
401 |
$data->{size} = $1 if $ptr =~ m/^213\s+(.+?)[\s\r\n]*$/; |
402 |
} |
403 |
return length($ptr); |
404 |
} |
405 |
|
406 |
sub write_function { |
407 |
my ($ptr, $data) = @_; |
408 |
my $f = ${$data->{fh}}; |
409 |
print $f ($ptr); |
410 |
if (!${$data->{file_sent}}) { |
411 |
${$data->{file_sent}} = length($ptr); |
412 |
print STDERR "HTTP header: " . $data->{http_header} . "\n" if $debug && defined $data->{http_header}; |
413 |
print STDERR "Content-Type: " . $data->{content_type} . "\n" if $debug && defined $data->{content_type}; |
414 |
print STDERR "Content-Length: " . $data->{size} . "\n" if $debug; |
415 |
print STDERR "Last-Modified: " . $data->{date} . "\n" if $debug && defined $data->{date}; |
416 |
print "Status: " . $data->{http_header} . "\r\n" if $data->{http_header} && $data->{http_header} !~ m/^2/; |
417 |
print "Content-Type: " . $data->{content_type} . "\r\n" if $data->{content_type}; |
418 |
print "Content-Length: " . $data->{size} . "\r\n" if $data->{size} > -1 && !${$data->{merge}}; |
419 |
print "Last-Modified: " . $data->{date} . "\r\n" if $data->{date}; |
420 |
print "\r\n"; |
421 |
} |
422 |
else { |
423 |
${$data->{file_sent}} += length($ptr); |
424 |
} |
425 |
print $ptr; |
426 |
return length($ptr); |
427 |
} |
428 |
|
429 |
sub _log { |
430 |
my ($logfile, $file, $code, $cached, $size, $ip, $user_agent) = @_; |
431 |
my $date = `date`; |
432 |
$date =~ s/[\s\r\n]*$//; |
433 |
$size = '-' if $size < 0; |
434 |
open(FILE, ">>" . $logfile) or return; |
435 |
print FILE "[" . $date . "] $ip $code $size $cached '$file' '$user_agent'\n"; |
436 |
close(FILE); |
437 |
} |
438 |
|
439 |
sub return_file { |
440 |
my ($cache_path, $file, $logfile, $cached, $stat, $ip, $user_agent)=@_; |
441 |
open(FILE, "<", $cache_path . $file) or do { |
442 |
_log($logfile, $file, 500, $cached, -1, $ip, $user_agent); |
443 |
return_error(500, 'Server error'); |
444 |
}; |
445 |
my $ft = `file -b --mime-type $cache_path$file`; |
446 |
$ft =~ s/[\s\r\n]*$//; |
447 |
my $t = localtime($stat->[9]); |
448 |
print STDERR "HTTP header: 200 OK\n" if $debug; |
449 |
print STDERR "Content-Type: '$ft'\n" if $debug; |
450 |
print STDERR "Content-Length: " . $stat->[7] . "\n" if $debug; |
451 |
print STDERR "Last-Modified: " . $t . "\n" if $debug; |
452 |
print "Status: 200 OK\r\n"; |
453 |
print "Content-Type: " . $ft . "\r\n"; |
454 |
print "Content-Length: " . $stat->[7] . "\r\n"; |
455 |
print "Last-Modified: " . $t . "\r\n"; |
456 |
print "\r\n"; |
457 |
binmode(FILE); |
458 |
my $buf; |
459 |
while (read(FILE, $buf, 1024)) { |
460 |
print $buf; |
461 |
} |
462 |
close FILE; |
463 |
_log($logfile, $file, 200, $cached, $stat->[7], $ip, $user_agent); |
464 |
exit 0; |
465 |
} |
466 |
|
467 |
sub return_error { |
468 |
my ($code, $text) = @_; |
469 |
print "Status: $code $text\r\n\r\n"; |
470 |
print STDERR "$code $text.\n" if $debug; |
471 |
exit 0; |
472 |
} |