1 |
#!/usr/bin/perl |
2 |
# |
3 |
# Copyright (C) 2005,2006 Mandriva |
4 |
# |
5 |
# Author: Florent Villard <warly@mandriva.com> |
6 |
# |
7 |
# This program is free software; you can redistribute it and/or modify |
8 |
# it under the terms of the GNU General Public License as published by |
9 |
# the Free Software Foundation; either version 2, or (at your option) |
10 |
# any later version. |
11 |
# |
12 |
# This program is distributed in the hope that it will be useful, |
13 |
# but WITHOUT ANY WARRANTY; without even the implied warranty of |
14 |
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
15 |
# GNU General Public License for more details. |
16 |
# |
17 |
# You should have received a copy of the GNU General Public License |
18 |
# along with this program; if not, write to the Free Software |
19 |
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. |
20 |
# |
21 |
# compare and rebuild packages on different architecture |
22 |
# |
23 |
# TODO |
24 |
# |
25 |
# - create a configuration file to handle the various iurt running |
26 |
# - get the content of the rebuild dir |
27 |
# - run as many iurt as machines are available and gather results |
28 |
# - the scheduler just take the results, launch new rebuild, and quit |
29 |
# - use perl ssh and try to workarround the non working timeout when the |
30 |
# remote machine is stalled |
31 |
# - use submitter as packager, not generic name |
32 |
# |
33 |
|
34 |
use strict; |
35 |
use MDK::Common qw(any cat_ if_ find); |
36 |
use Iurt::Config qw(config_usage get_date config_init get_author_email check_arch check_noarch); |
37 |
use Iurt::Process qw(check_pid); |
38 |
use Iurt::File qw(check_upload_tree); |
39 |
use Iurt::Mail qw(sendmail); |
40 |
use Iurt::Util qw(plog_init plog ssh_setup ssh sout sget sput); |
41 |
use File::Copy 'move'; |
42 |
use File::Path 'make_path'; |
43 |
use File::Temp 'mktemp'; |
44 |
use Filesys::Df qw(df); |
45 |
use Data::Dumper; |
46 |
use File::Slurp qw(read_file); |
47 |
|
48 |
my %run; |
49 |
my $program_name = 'ulri'; |
50 |
$run{program_name} = $program_name; |
51 |
|
52 |
my $LOG; |
53 |
if (!$ENV{ULRI_LOG_FILE} || !open($LOG, '>>', $ENV{ULRI_LOG_FILE})) { |
54 |
open($LOG, ">&STDERR"); |
55 |
} |
56 |
|
57 |
plog_init($program_name, $LOG, 7, 1); |
58 |
|
59 |
my $HOME = $ENV{HOME}; |
60 |
my $configfile = "$HOME/.upload.conf"; |
61 |
my $sysconfigfile = "/etc/iurt/upload.conf"; |
62 |
|
63 |
my $config = {}; |
64 |
foreach my $f ($configfile, $sysconfigfile) { |
65 |
plog('DEBUG', "load config: $f"); |
66 |
if (-f $f) { |
67 |
$config = eval(cat_($f)) |
68 |
or die "FATAL $program_name: syntax error in $f"; |
69 |
last; |
70 |
} |
71 |
} |
72 |
|
73 |
my %config_usage = ( |
74 |
admin => { |
75 |
desc => 'mail address of the bot administrator', |
76 |
default => 'distrib-admin@mandrivalinux.org' |
77 |
}, |
78 |
'arch_translation' => { |
79 |
desc => "Renaming of arch", |
80 |
default => { 'sparc64' => 'sparcv9' } |
81 |
}, |
82 |
bot => { |
83 |
desc => "List of bot able to compile the packages", |
84 |
default => { |
85 |
i586 => { |
86 |
n1 => { |
87 |
iurt => { |
88 |
user => 'mandrake' , |
89 |
command => 'sudo -u mandrake -H /usr/local/bin/iurt2.sh --copy_srpm --group -v 1 --config local_spool /export/home/mandrake/iurt/__DIR__ --no_rsync --chrooted-urpmi http://kenobi/dis/ -r __TARGET__ __ARCH__', |
90 |
packages => '/export/home/mandrake/iurt/', |
91 |
} , |
92 |
}, |
93 |
}, |
94 |
}, |
95 |
}, |
96 |
media => { |
97 |
desc => 'Corresponding media to add given the current media', |
98 |
default => { |
99 |
default => { |
100 |
"main/release" => [ "main/release", "main/updates" ], |
101 |
"main/updates" => [ "main/release", "main/updates" ], |
102 |
"main/testing" => [ "main/release", "main/updates", |
103 |
"main/testing" ], |
104 |
"main/backports" => [ "main/release", "main/updates", |
105 |
"main/testing", "main/backports" ], |
106 |
"contrib/release" => [ "main/release", "main/updates", |
107 |
"contrib/release", "contrib/updates" ], |
108 |
"contrib/updates" => [ "main/release", "main/updates", |
109 |
"contrib/release", "contrib/updates" ], |
110 |
"contrib/testing" => [ "main/release", "main/updates", |
111 |
"main/testing", "contrib/release", "contrib/updates", |
112 |
"contrib/testing" ], |
113 |
"contrib/backports" => [ "main/release", "main/updates", |
114 |
"main/testing", "main/backports", "contrib/release", |
115 |
"contrib/updates", "contrib/testing", |
116 |
"contrib/backports" ], |
117 |
"non-free/release" => [ "main/release", "main/updates", |
118 |
"non-free/release", "non-free/updates" ], |
119 |
"non-free/updates" => [ "main/release", "main/updates", |
120 |
"non-free/release", "non-free/updates" ], |
121 |
"non-free/testing" => [ "main/release", "main/updates", |
122 |
"main/tessting", "non-free/release", |
123 |
"non-free/updates", "non-free/testing" ], |
124 |
"non-free/backports" => [ "main/release", "main/updates", |
125 |
"main/testing", "main/backports", "non-free/release", |
126 |
"non-free/updates", "non-free/testing", |
127 |
"non-free/backports" ], |
128 |
}, |
129 |
}, |
130 |
}, |
131 |
faildelay => { |
132 |
desc => "Time after which the rebuild is considered as a failure", |
133 |
default => 36000 |
134 |
}, |
135 |
http_queue => { |
136 |
desc => 'Address where log can be consulted', |
137 |
default => 'http://kenobi.mandriva.com/queue ' |
138 |
}, |
139 |
queue => { |
140 |
desc => "Root of the tree where the packages to compile are located", |
141 |
default => "$HOME/uploads" |
142 |
}, |
143 |
tmp => { |
144 |
desc => "Temporary directory", |
145 |
default => "$HOME/tmp" |
146 |
}, |
147 |
ssh_options => { |
148 |
desc => "SSH options", |
149 |
default => "-o ConnectTimeout=20 -o BatchMode=yes" |
150 |
}, |
151 |
packager => { |
152 |
desc => 'Default packager tag user by bot', |
153 |
default => 'Mageia Team <http://www.mageia.org>' |
154 |
}, |
155 |
'arch' => { |
156 |
desc => 'Architectures list for each target', |
157 |
default => { |
158 |
cauldron => [ 'i586', 'x86_64' ], |
159 |
default => [ 'i586', 'x86_64' ], |
160 |
}, |
161 |
}, |
162 |
); |
163 |
config_usage(\%config_usage, $config) if $run{config_usage}; |
164 |
config_init(\%config_usage, $config, \%run); |
165 |
|
166 |
my %untranslated_arch; |
167 |
foreach my $k (keys %{$config->{arch_translation}}) { |
168 |
my $v = $config->{arch_translation}{$k}; |
169 |
push @{$untranslated_arch{$v}}, $k; |
170 |
} |
171 |
|
172 |
$run{pidfile_home} = $config->{tmp}; |
173 |
$run{pidfile} = $program_name; |
174 |
my $pidfile = check_pid(\%run); |
175 |
|
176 |
|
177 |
my ($fulldate, $daydate) = get_date(); |
178 |
$run{daydate} = $daydate; |
179 |
|
180 |
my $df = df $config->{queue}; |
181 |
if ($df->{per} == 100) { |
182 |
# FIXME should send a mail too |
183 |
die "FATAL $program_name: not enough space on the filesystem, only $df->{bavail} KB on $config->{queue}, full at $df->{per}%"; |
184 |
} |
185 |
|
186 |
($fulldate, $daydate) = get_date(); |
187 |
|
188 |
my %pkg_tree; |
189 |
my $compildone = {}; |
190 |
|
191 |
my $todo = "$config->{queue}/todo"; |
192 |
my $failure = "$config->{queue}/failure"; |
193 |
my $done = "$config->{queue}/done"; |
194 |
|
195 |
# Raise this when the noarch package starts to build on any bot |
196 |
my %noarch_build; |
197 |
|
198 |
# |
199 |
# Part 0: gather data from upload tree |
200 |
# |
201 |
|
202 |
plog('MSG', "check uploads tree"); |
203 |
|
204 |
# A list of what is currently building so we can report at the end |
205 |
# |
206 |
my %build_list; |
207 |
|
208 |
plog('DEBUG', "input queue is $todo"); |
209 |
|
210 |
sub todo_func { |
211 |
my ($todo, $f, $m, $s, $r) = @_; |
212 |
|
213 |
my $media = "$m/$s"; |
214 |
|
215 |
if ($r =~ /(\d{14}\.(\w+)\.\w+\.\d+)_(.*\.src\.rpm)$/) { |
216 |
my ($prefix, $user, $srpm) = ($1, $2, $3); |
217 |
|
218 |
plog('DEBUG', "found srpm $srpm ($prefix)"); |
219 |
$pkg_tree{$prefix}{media}{$media}{path} = "/$f/$m/$s"; |
220 |
$pkg_tree{$prefix}{target} = $f; |
221 |
$pkg_tree{$prefix}{user} = $user; |
222 |
push @{$pkg_tree{$prefix}{srpms}} , $srpm; |
223 |
my ($name) = $srpm =~ /(.*)-[^-]+-[^-]+\.src\.rpm$/; |
224 |
|
225 |
return $pkg_tree{$prefix}{srpm_name}{$name} = $srpm; |
226 |
} |
227 |
|
228 |
if ($r =~ /(\d{14}\.\w+\.\w+\.\d+)_([\w-]+)\.(\w+)\.(\w+)\.(\d{14})\.(\d+)\.lock$/) { |
229 |
my ($prefix, $arch, $bot, $host, $date, $pid) = ($1, $2, $3, $4, $5, $6); |
230 |
|
231 |
$arch = $config->{arch_translation}{$arch} if $config->{arch_translation}{$arch}; |
232 |
plog('DEBUG', "found lock on $host/$arch for $prefix"); |
233 |
|
234 |
# Only for build status reporting |
235 |
# |
236 |
push @{$build_list{"$host/$arch"}}, $prefix; |
237 |
|
238 |
if ($arch =~ /noarch/) { |
239 |
plog('DEBUG', "... and $prefix is noarch"); |
240 |
$noarch_build{$prefix} = 1; |
241 |
$arch =~ s/-.*//; |
242 |
} |
243 |
|
244 |
$run{bot}{$host}{$bot} = $prefix; |
245 |
|
246 |
# this should be in the cache, but waiting for a cache-clean option |
247 |
$compildone->{$prefix}{$media}{$arch} = 1; |
248 |
|
249 |
my $time = read_line("$todo/$f/$m/$s/$r"); |
250 |
$time = (split ' ', $time)[2]; |
251 |
push @{$pkg_tree{$prefix}{media}{$media}{bot}}, { |
252 |
bot => $bot, |
253 |
host => $host, |
254 |
date => $date, |
255 |
pid => $pid, |
256 |
'arch' => $arch, |
257 |
'time' => $time |
258 |
}; |
259 |
} |
260 |
} |
261 |
|
262 |
sub todo_post { |
263 |
my ($todo, $f, $m, $s, $r) = @_; |
264 |
|
265 |
if ($r =~ /(\d{14}\.\w+\.\w+\.\d+)_([\w-]+)\.(\w+)\.(\w+)\.(\d{14})\.(\d+)\.lock$/) { |
266 |
if (!$pkg_tree{$1}{srpms}) { |
267 |
plog('INFO', "cleaning orphan $r"); |
268 |
unlink "$todo/$f/$m/$s/$r"; |
269 |
} |
270 |
} |
271 |
} |
272 |
|
273 |
sub done_func { |
274 |
my ($_todo, $_f, $m, $s, $r) = @_; |
275 |
|
276 |
my $media = "$m/$s"; |
277 |
|
278 |
if ($r =~ /(\d{14}\.\w+\.\w+\.\d+)_(.*)\.(done|fail|excluded)$/) { |
279 |
my ($prefix, $arch) = ($1, $2); |
280 |
$arch = $config->{arch_translation}{$arch} if $config->{arch_translation}{$arch}; |
281 |
$compildone->{$prefix}{$media}{$arch} = 1; |
282 |
} elsif ($r =~ /(\d{14}\.\w+\.\w+\.\d+)_(.*\.([^.]+)\.rpm)$/) { |
283 |
my ($prefix, $rpm) = ($1, $2); |
284 |
plog('DEBUG', "found already built rpm $rpm ($prefix)"); |
285 |
push @{$pkg_tree{$prefix}{rpms}} , $rpm; |
286 |
} |
287 |
} |
288 |
|
289 |
|
290 |
check_upload_tree($todo, \&todo_func, \&todo_post); |
291 |
|
292 |
# getting already compiled packages |
293 |
# The cache should not be needed if the .done file are removed as the same |
294 |
# time as the src.rpm in the todo tree |
295 |
check_upload_tree($done, \&done_func); |
296 |
|
297 |
|
298 |
# |
299 |
# Part 1: get results from finished builds |
300 |
# |
301 |
|
302 |
plog('MSG', "check build bot results"); |
303 |
|
304 |
my %later; |
305 |
my $something_finished; |
306 |
foreach my $prefix (keys %pkg_tree) { |
307 |
my $ent = $pkg_tree{$prefix}; |
308 |
foreach my $media (keys %{$ent->{media}}) { |
309 |
my $path = $ent->{media}{$media}{path}; |
310 |
my $user = $ent->{user}; |
311 |
|
312 |
# Local pathnames |
313 |
my $done_dir = "$done/$path"; |
314 |
my $todo_dir = "$todo/$path"; |
315 |
my $fail_dir = "$failure/$path"; |
316 |
|
317 |
bot: foreach my $bot_list (@{$ent->{media}{$media}{bot}}) { |
318 |
my ($bot, $host, $date, $pid, $arch, $time) = |
319 |
@$bot_list{qw(bot host date pid arch time)}; |
320 |
|
321 |
my $bot_conf = $config->{bot}{$arch}{$host}{$bot}; |
322 |
my $remote = ssh_setup($config->{ssh_options}, |
323 |
$bot_conf->{user}, $host); |
324 |
|
325 |
my $prefix_dir = "$bot_conf->{packages}/$path/$prefix-$arch/"; |
326 |
|
327 |
# If our build is noarch, set arch appropriately. |
328 |
# |
329 |
my $lock_file = |
330 |
"$todo_dir/${prefix}_$arch-noarch.$bot.$host.$date.$pid.lock"; |
331 |
|
332 |
if (-f $lock_file) { |
333 |
plog('DEBUG', "$prefix is noarch"); |
334 |
$arch = "noarch"; |
335 |
} else { |
336 |
$lock_file =~ s/-noarch//; |
337 |
} |
338 |
|
339 |
my $status_file = "$prefix_dir/log/status.log"; |
340 |
|
341 |
plog('INFO', "check status: $host/$arch ($bot [$pid])"); |
342 |
my $status = sout($remote, "cat $status_file"); |
343 |
my $success; |
344 |
my $fail; |
345 |
my $later; |
346 |
|
347 |
# Check if the build bot finished on the other side |
348 |
# |
349 |
if ($status) { |
350 |
plog('INFO', "check result: $host/$arch ($bot [$pid])"); |
351 |
foreach my $res (split "\n", $status) { |
352 |
my ($p, $r) = $res =~ /(.*):\s+(.*)/; |
353 |
plog('DEBUG', $res); |
354 |
if ($r eq 'install_deps_failure') { |
355 |
plog('FAIL', "install deps failure, rebuild later: $p"); |
356 |
$later{$prefix} = 1; |
357 |
$later = 1; |
358 |
} |
359 |
if ($r ne 'ok') { |
360 |
plog('FAIL', "$r: $p"); |
361 |
$fail = 1; |
362 |
} |
363 |
} |
364 |
|
365 |
if (!$fail) { |
366 |
my @list = split "\n", sout($remote, "ls $prefix_dir"); |
367 |
my $error; |
368 |
my $done; |
369 |
|
370 |
my $arch_check = join '|', $arch, if_($untranslated_arch{$arch}, @{$untranslated_arch{$arch}}); |
371 |
plog('MSG', "checking for $arch_check arch"); |
372 |
foreach my $result (@list) { |
373 |
$result =~ /\.(src|$arch_check|noarch)\.rpm$/ or next; |
374 |
|
375 |
# do not copy the initial src package |
376 |
$result =~ /^$prefix/ and next; |
377 |
|
378 |
my $result_file = "$done_dir/${prefix}_$result"; |
379 |
|
380 |
plog('OK', "build ok: $result"); |
381 |
if ($result =~ /\.$arch_check\.rpm$/) { |
382 |
$done = 1; |
383 |
} |
384 |
|
385 |
plog('DEBUG', "copy files to done"); |
386 |
make_path($done_dir); |
387 |
if (sget($remote, "$prefix_dir/$result", |
388 |
"$result_file.new")) { |
389 |
plog('ERROR', "copying $result from $host failed ($!)"); |
390 |
$error = 1; |
391 |
last; |
392 |
} |
393 |
if (!move("$result_file.new", $result_file)) { |
394 |
$error = 1; |
395 |
last; |
396 |
} |
397 |
} |
398 |
next if $error; |
399 |
|
400 |
if ($done) { |
401 |
create_file("$done_dir/${prefix}_$arch.done", "$bot $host"); |
402 |
$success = 1; |
403 |
} |
404 |
|
405 |
if ($success) { |
406 |
# Fetch build log and clean remote machine |
407 |
make_path("$done_dir/$prefix"); |
408 |
sget($remote, "$prefix_dir/log/*", "$done_dir/$prefix"); |
409 |
ssh($remote, "rm -rf $prefix_dir"); |
410 |
$something_finished = 1; |
411 |
} |
412 |
} |
413 |
} # if ($status) |
414 |
|
415 |
# |
416 |
# Handle build failure |
417 |
# |
418 |
|
419 |
my $proc_state; |
420 |
if (!$fail) { |
421 |
chomp($proc_state = sout($remote, "ps h -o state $pid")); |
422 |
} |
423 |
|
424 |
my $seconds = time()-$time; |
425 |
|
426 |
# Reasons for failure |
427 |
my $timeout = $seconds > $config->{faildelay}; |
428 |
my $zombie = $proc_state eq 'Z'; |
429 |
my $ended = !$proc_state; |
430 |
|
431 |
unless ($success || $later || $fail || $timeout || $zombie || $ended) { |
432 |
next bot; |
433 |
} |
434 |
|
435 |
plog('INFO', "delete lock file for $prefix"); |
436 |
unlink $lock_file; |
437 |
|
438 |
$run{bot}{$host}{$bot} = 0; |
439 |
|
440 |
next bot if $later; |
441 |
|
442 |
if (!$ended && !$fail) { |
443 |
plog('FAIL', "$bot timed out on $host/$arch ($seconds sec) or " . |
444 |
"it's dead (status $proc_state), removing lock"); |
445 |
$compildone->{$prefix}{$media}{$arch} = 0; |
446 |
next bot; |
447 |
} |
448 |
|
449 |
next bot if $success && !$fail; |
450 |
|
451 |
if (!$status) { |
452 |
plog('ERROR', "build bot died on $host, reschedule compilation"); |
453 |
next bot; |
454 |
} |
455 |
|
456 |
plog('INFO', "Failure reason: $success || $later || $fail || $timeout || $zombie || $ended"); |
457 |
|
458 |
plog('FAIL', "build failed"); |
459 |
create_file("$done_dir/${prefix}_$arch.fail", "$bot $host"); |
460 |
make_path($fail_dir); |
461 |
|
462 |
mkdir("$fail_dir/$prefix"); |
463 |
if (sget($remote, "$prefix_dir/*", "$fail_dir/$prefix")) { |
464 |
plog('ERROR', "copying from $host:$prefix_dir/ " . |
465 |
"to $fail_dir/ failed ($!)"); |
466 |
$compildone->{$prefix}{$media}{$arch} = 0; |
467 |
# clean the log on the compilation machine |
468 |
ssh($remote, "rm -rf $prefix_dir"); |
469 |
next bot; |
470 |
} |
471 |
|
472 |
# What to do with the previously build packages? Move them to |
473 |
# failure, rejected ? |
474 |
# 20061220 warly move them to failure for now |
475 |
|
476 |
foreach my $rpm (@{$ent->{rpms}}) { |
477 |
my $file = "$done_dir/${prefix}_$rpm"; |
478 |
plog('DEBUG', "moving built rpm $file to $fail_dir/${prefix}_$rpm"); |
479 |
link $file, "$fail_dir/${prefix}_$rpm"; |
480 |
unlink $file; |
481 |
} |
482 |
# Should clean the queue |
483 |
# Must remove the SRPM and the lock |
484 |
foreach my $srpm (@{$ent->{srpms}}) { |
485 |
my $file = "$todo_dir/${prefix}_$srpm"; |
486 |
plog('DEBUG', "moving $file to $fail_dir/${prefix}_$srpm"); |
487 |
link $file, "$fail_dir/${prefix}_$srpm"; |
488 |
# FIXME If another arch is currently building, we will not clean its logs (Bug #4343) |
489 |
delete $pkg_tree{$prefix}; |
490 |
unlink $file; |
491 |
# If one arch has been generated, we also have a src.rpm in done |
492 |
$file = "$done_dir/${prefix}_$srpm"; |
493 |
if (-f $file) { |
494 |
plog('DEBUG', "deleting $file"); |
495 |
unlink $file; |
496 |
} |
497 |
} |
498 |
|
499 |
# Notify user if build failed |
500 |
# |
501 |
if ($user) { |
502 |
warn_about_failure($user, $ent, $arch, $fail_dir, $path, $prefix); |
503 |
} |
504 |
|
505 |
# clean the log on the compilation machine |
506 |
ssh($remote, "rm -rf $prefix_dir"); |
507 |
|
508 |
} # end bot |
509 |
} # end path |
510 |
} # end prefix |
511 |
|
512 |
|
513 |
# |
514 |
# Part 2: check queue and start new jobs if a bot is available |
515 |
# |
516 |
|
517 |
plog('MSG', "launching new compilations"); |
518 |
my %to_compile; |
519 |
|
520 |
# do not sort the keys to be able to ignore packages which makes iurt |
521 |
# crash or just lock ulri somehow |
522 |
|
523 |
foreach my $prefix (sort keys %pkg_tree) { |
524 |
next if $later{$prefix}; |
525 |
|
526 |
my $ent = $pkg_tree{$prefix}; |
527 |
foreach my $media (keys %{$ent->{media}}) { |
528 |
my $path = $ent->{media}{$media}{path}; |
529 |
my $target = $ent->{target}; |
530 |
my $srpms = $ent->{srpms} or next; |
531 |
|
532 |
my $user = get_author_email($ent->{user}) || $config->{packager}; |
533 |
$user =~ s/([<>])/\\$1/g; |
534 |
|
535 |
# Local pathnames |
536 |
my $done_dir = "$done/$path"; |
537 |
my $todo_dir = "$todo/$path"; |
538 |
|
539 |
# Make sure these exist |
540 |
make_path($done_dir); |
541 |
make_path($todo_dir); |
542 |
|
543 |
#plog('DEBUG', "searching a bot to compile @$srpms"); |
544 |
|
545 |
# count noarch todos only once even if searching multiple bots |
546 |
my $noarch_countflag = 0; |
547 |
|
548 |
my $arch_list = find { ref($_) eq 'ARRAY' } $config->{arch}, (ref($config->{arch}) eq 'HASH' ? ($config->{arch}{$target}, $config->{arch}{default}) : ()); |
549 |
my @arch_list = $arch_list ? @$arch_list : keys %{$config->{bot}}; |
550 |
# need to find a bot for each arch |
551 |
foreach my $arch (@arch_list) { |
552 |
|
553 |
# Skip this arch if package is building as noarch |
554 |
# |
555 |
next if $noarch_build{$prefix}; |
556 |
|
557 |
next if $compildone->{$prefix}{$media}{noarch}; |
558 |
next if $compildone->{$prefix}{$media}{$arch}; |
559 |
|
560 |
# If all packages in a group are noarch, consider the entire group |
561 |
# as noarch |
562 |
# |
563 |
my $noarch = 1; |
564 |
$noarch = 0 if any { !check_noarch("$todo_dir/${prefix}_$_") } @$srpms; |
565 |
|
566 |
#plog("@$srpms is noarch") if $noarch; |
567 |
|
568 |
my $excluded = any { !check_arch("$todo_dir/${prefix}_$_", $arch) } @$srpms; |
569 |
if ($excluded) { |
570 |
plog('WARN', "excluding from $arch: $excluded"); |
571 |
create_file("$done_dir/${prefix}_$arch.excluded", |
572 |
"ulri $arch excluded"); |
573 |
next; |
574 |
} |
575 |
|
576 |
if ($noarch) { |
577 |
plog('DEBUG', "search any bot for @$srpms") unless $noarch_countflag; |
578 |
} else { |
579 |
plog('DEBUG', "search $arch bot for @$srpms"); |
580 |
} |
581 |
|
582 |
foreach my $host (keys %{$config->{bot}{$arch}}) { |
583 |
foreach my $bot (keys %{$config->{bot}{$arch}{$host}}) { |
584 |
next if $run{bot}{$host}{$bot}; |
585 |
|
586 |
# Enable noarch lock after the first bot snarfs the package |
587 |
# |
588 |
$noarch_build{$prefix} = 1 if $noarch; |
589 |
|
590 |
plog('INFO', "building on $host/$arch ($bot)"); |
591 |
|
592 |
$run{bot}{$host}{$bot} = $prefix; |
593 |
$compildone->{$prefix}{$media}{$arch} = 1; |
594 |
|
595 |
my $bot_conf = $config->{bot}{$arch}{$host}{$bot}; |
596 |
my $remote = ssh_setup($config->{ssh_options}, |
597 |
$bot_conf->{user}, $host); |
598 |
|
599 |
my $prefix_dir = "$bot_conf->{packages}/$path/$prefix-$arch/"; |
600 |
my $status_file = "$prefix_dir/log/status.log"; |
601 |
|
602 |
# Copy packages to build node |
603 |
# |
604 |
# create also the log dir for botcmd.log |
605 |
next if ssh($remote, "mkdir -p $prefix_dir/log"); |
606 |
my $pkgs; |
607 |
my $ok = 1; |
608 |
foreach my $srpm (@$srpms) { |
609 |
plog('NOTIFY', "Send to $host/$arch: $srpm"); |
610 |
$ok &&= !sput($remote, "$todo_dir/${prefix}_$srpm", |
611 |
"$prefix_dir/$srpm"); |
612 |
$pkgs .= " $prefix_dir/$srpm"; |
613 |
} |
614 |
next unless $ok; |
615 |
|
616 |
# spawn remote build bot and save output on local file |
617 |
# (remove status.log before building, otherwise we can have |
618 |
# a install_deps_failure and reschedule even if the package |
619 |
# is currently building) |
620 |
# |
621 |
plog('DEBUG', "remove status file"); |
622 |
ssh($remote, "rm $status_file 2>/dev/null"); |
623 |
|
624 |
plog('INFO', "Execute build command on $host/$arch"); |
625 |
|
626 |
my $temp = mktemp("$config->{tmp}/ulri.tmp.$prefix.XXXXX"); |
627 |
my $cmd = $bot_conf->{command}; |
628 |
$cmd =~ s!__ARCH__!$arch!g; |
629 |
$cmd =~ s!__DIR__!$path/$prefix-$arch!g; |
630 |
$cmd =~ s!__TARGET__!$target!g; |
631 |
$cmd =~ s!__PACKAGER__!$user!g; |
632 |
my $section = $media; |
633 |
$section =~ s!/.*$!!; |
634 |
$cmd =~ s!__SECTION__!$section!g; |
635 |
|
636 |
my $media_to_add; |
637 |
my $medium = ref $config->{media}{$target}{$media} ? $target : 'default'; |
638 |
$media_to_add = join ' ', @{$config->{media}{$medium}{$media}}; |
639 |
plog('DEBUG', "Will compile only with media $media_to_add"); |
640 |
$cmd =~ s!__MEDIA__!$media_to_add!g; |
641 |
|
642 |
#- allow x86_64 hosts to build i586 packages |
643 |
if ($arch eq 'i586') { |
644 |
$cmd = "setarch i586 $cmd"; |
645 |
} |
646 |
|
647 |
plog('DEBUG', "Build $pkgs"); |
648 |
ssh($remote, "'echo PID=\$\$; exec $cmd $pkgs &>$prefix_dir/log/botcmd.\$(date +%s).\$(hostname -s).log' > $temp &"); |
649 |
|
650 |
# wait 10 seconds or until we have the log file |
651 |
# plus 20 seconds if it timeouts. |
652 |
# |
653 |
if (check_file_timeout($temp, 10)) { |
654 |
plog('WARN', "Timeout waiting for building start. Waiting more 20s."); |
655 |
if (check_file_timeout($temp, 20)) { |
656 |
plog('WARN', "Timeout! Abandoning the build."); |
657 |
last; |
658 |
} |
659 |
} |
660 |
|
661 |
# get remote PID from log file |
662 |
# |
663 |
my $pid = get_pid_from_file($temp); |
664 |
unlink $temp; |
665 |
plog('DEBUG', "remote pid $pid"); |
666 |
if (!$pid) { |
667 |
plog('WARN', "pid is unknown, abandoning the build."); |
668 |
last; |
669 |
} |
670 |
|
671 |
# create lock file |
672 |
# |
673 |
my $lock_arch = $noarch ? "$arch-noarch" : $arch; |
674 |
my $lock_file = "$todo_dir/${prefix}_" . |
675 |
"$lock_arch.$bot.$host.$fulldate.$pid.lock"; |
676 |
plog('DEBUG', "create lock $lock_file"); |
677 |
create_file($lock_file, "$program_name $$", time()); |
678 |
|
679 |
# Fork to wait for the build to finish |
680 |
if (fork() == 0) { |
681 |
local $SIG{ALRM} = sub { |
682 |
# Run ourselves to kill the build |
683 |
exec "ulri"; |
684 |
}; |
685 |
alarm $config->{faildelay}; |
686 |
# SSH to $host and wait up for $pid to exit |
687 |
ssh($remote, "'while /bin/true; do ps $pid >/dev/null 2>&1 || exit; sleep 1; done'"); |
688 |
alarm 0; |
689 |
# Fetch build results |
690 |
exec "ulri"; |
691 |
} |
692 |
|
693 |
last; |
694 |
} |
695 |
last if $compildone->{$prefix}{$media}{$arch}; |
696 |
last if $compildone->{$prefix}{$media}{noarch}; |
697 |
} |
698 |
|
699 |
# Count packages to compile for each architecture. Count noarch |
700 |
# package only once. |
701 |
# |
702 |
$arch = 'noarch' if $noarch; |
703 |
unless ($compildone->{$prefix}{$media}{$arch}) { |
704 |
$to_compile{$arch}++ if !($noarch && $noarch_countflag); |
705 |
} |
706 |
$noarch_countflag = 1 if $noarch; |
707 |
} |
708 |
} |
709 |
} |
710 |
|
711 |
plog('MSG', "Current status"); |
712 |
|
713 |
if (keys %build_list) { |
714 |
plog('INFO', "currently building:"); |
715 |
map { plog('INFO', " $_: " . join('', @{$build_list{$_}})) } keys %build_list; |
716 |
} |
717 |
|
718 |
plog('INFO', "jobs in queue:", %to_compile ? |
719 |
map { sprintf("%s(%d)", $_, $to_compile{$_}) } keys %to_compile : "none"); |
720 |
|
721 |
|
722 |
unlink $pidfile; |
723 |
exec "emi" if $something_finished; |
724 |
exit(); |
725 |
|
726 |
|
727 |
# |
728 |
# Subroutines |
729 |
# |
730 |
|
731 |
sub warn_about_failure { |
732 |
my ($user, $ent, $arch, $fail_dir, $path, $prefix) = @_; |
733 |
my $text = join("\n", "Build of the following packages failed:\n", map { "- $_" } @{$ent->{srpms}}) . "\n"; |
734 |
my $srpms = join(' ', @{$ent->{srpms}}, undef); |
735 |
|
736 |
my $to = get_author_email($user) || "Unknown <$config->{admin}>"; |
737 |
my $cc; |
738 |
my $fpath = "$config->{http_queue}/failure/$path/$prefix"; |
739 |
$fpath =~ tr!/!!s; # Squash double slashes ... |
740 |
$fpath =~ s!/!//!; # ... except for http:// |
741 |
|
742 |
$text .= "\nFailure details available in $fpath/log\n"; |
743 |
$text .= "Reason:\n"; |
744 |
$text .= read_file("$fail_dir/$prefix/log/status.log"); |
745 |
$text .= "\nLog files generated:\n"; |
746 |
|
747 |
opendir my $DP1, "$fail_dir/$prefix/log/"; |
748 |
foreach my $f1 (sort(readdir($DP1))) { |
749 |
next if ! -d "$fail_dir/$prefix/log/$f1" || $f1 =~ m/^\./; |
750 |
|
751 |
opendir my $DP2, "$fail_dir/$prefix/log/$f1"; |
752 |
foreach my $f2 (readdir $DP2) { |
753 |
next if $f2 =~ m/^\./; |
754 |
$text .= "$fpath/log/$f1/$f2\n"; |
755 |
} |
756 |
closedir $DP2; |
757 |
} |
758 |
closedir $DP1; |
759 |
|
760 |
sendmail($to, $cc, |
761 |
"Rebuild failed on $arch for $srpms", $text, |
762 |
"Ulri the scheduler bot <$config->{admin}>", 0, $config); |
763 |
} |
764 |
|
765 |
sub get_pid_from_file { |
766 |
my ($file) = @_; |
767 |
|
768 |
my $pid; |
769 |
open my $FILE, $file || die "FATAL: can't open $file"; |
770 |
local $_; |
771 |
while (<$FILE>) { last if ($pid) = /^PID=(\d+)/ } |
772 |
|
773 |
$pid; |
774 |
} |
775 |
|
776 |
sub create_file { |
777 |
my $file = shift; |
778 |
my @contents = @_; |
779 |
|
780 |
open my $FILE, ">$file" or die "FATAL: can't open $file for writing"; |
781 |
print $FILE "@contents"; |
782 |
} |
783 |
|
784 |
sub read_line { |
785 |
my $file = shift; |
786 |
|
787 |
open my $FILE, "<$file" or die "FATAL: can't open $file for reading"; |
788 |
my $contents = <$FILE>; |
789 |
|
790 |
$contents; |
791 |
} |
792 |
|
793 |
sub check_file_timeout { |
794 |
my ($file, $time) = @_; |
795 |
|
796 |
my $i = 0; |
797 |
while ($i < $time && (!-f $file || -z $file)) { sleep 1; $i++ } |
798 |
|
799 |
$i == $time; |
800 |
} |
801 |
|
802 |
__END__ |
803 |
|
804 |
# ulri ends here |
805 |
|
806 |
Discussion |
807 |
---------- |
808 |
|
809 |
20060802 (Warly) |
810 |
|
811 |
* I prefer creating a separate scheduler, so that it can eventually call |
812 |
other bots. |
813 |
* bots should be able to take packages by themselves. |
814 |
* Iurt will perform several checks, they have to be standard and usable |
815 |
by the maintainer, the results must be put in a visible directory or path |
816 |
* We can put packages either in a dir or to prefix all files with the date |
817 |
and uploader. Having all files in a dir will make the listing simpler. |
818 |
Prefixing the files could be problematic if we base the rpm name and |
819 |
version parsing on the filename. |
820 |
* ulri knows the prefix, he could ask iurt to put the packages in a dir |
821 |
with the same prefix. |
822 |
|
823 |
20060806 (Warly) |
824 |
|
825 |
* All the packages are put in done, then the final youri is run to put them |
826 |
in queue/ |
827 |
|
828 |
20061104 (claudio) |
829 |
|
830 |
* Instead if having configuration defaults for our environment and using |
831 |
ulri with the defaults, it would be nicer to have minimalistic/generic |
832 |
defaults and install a configuration file in kenobi |
833 |
* Ulri's configuration file could be renamed to .ulri.conf instead of |
834 |
.upload.conf. ==> ok, it's also used by emi |
835 |
|