1 |
<?php |
2 |
/** |
3 |
*/ |
4 |
|
5 |
class Mageia_Data_Cron_Downloads extends Mageia_Data_Cron |
6 |
{ |
7 |
function __construct($spec, $ts, $appdir, $logtmpdir) { parent::__construct($spec, $ts, $appdir, $logtmpdir); } |
8 |
|
9 |
function first_pass() |
10 |
{ |
11 |
$cmd = 'grep "downloads/get/?q=" < %s | grep -v bot | grep -v passwd | awk \'{print $1" "$4" "$5" "$7" "$9}\'> %s'; |
12 |
$cmd = sprintf($cmd, $this->log_file, $this->log_file . '.tmp'); |
13 |
exec($cmd, $out, $ret); |
14 |
if ($ret > 0) { |
15 |
echo "failed first pass\n"; |
16 |
} else { |
17 |
rename($this->log_file . '.tmp', $this->log_file); |
18 |
} |
19 |
} |
20 |
|
21 |
/** |
22 |
*/ |
23 |
function digest_log($infile = 'php://stdin', $outfile = 'php://stdout') |
24 |
{ |
25 |
echo "Digest. "; |
26 |
|
27 |
$this->first_pass(); |
28 |
|
29 |
require_once realpath($this->appdir . '/lib/maxmind/geoip/geoip.inc.php'); |
30 |
require_once realpath($this->appdir . '/lib/maxmind/geoip/geoipcity.inc.php'); |
31 |
|
32 |
$gi = geoip_open(realpath($this->appdir . '/lib/maxmind/geoip/GeoLiteCityv6.dat'), GEOIP_STANDARD); |
33 |
$infile = fopen($infile, 'r'); |
34 |
$outfile = fopen($outfile, 'w'); |
35 |
|
36 |
$i = 0; |
37 |
while (!feof($infile)) { |
38 |
|
39 |
$s = fgets($infile); |
40 |
$s = trim($s); |
41 |
if ($s == '') |
42 |
continue; |
43 |
|
44 |
$s = explode(' ', $s); |
45 |
|
46 |
if ($s[4] != 200) |
47 |
continue; |
48 |
|
49 |
$ip = $s[0]; |
50 |
|
51 |
// NOTE (rda) dates are local time, not UTC here. |
52 |
// FIXME (rda) change dates to UTC |
53 |
// that involves getting logs of the day before, and filtering _after_ conversion. |
54 |
$sdate = str_replace(array('[', ']'), '', $s[1] . ' ' . $s[2]); |
55 |
$date = date('Y-m-d', strtotime($sdate)); // N for week day, or Y-m-d |
56 |
$time = date('H', strtotime($sdate)); |
57 |
|
58 |
$url = parse_url($s[3]); |
59 |
parse_str($url['query'], $str); |
60 |
$path = explode('/', $url['path']); |
61 |
|
62 |
$p = null; |
63 |
if (array_key_exists('q', $str)) { |
64 |
$p = trim(strtolower($str['q'])); |
65 |
} elseif (array_key_exists('product', $str)) { |
66 |
$p = trim(strtolower($str['product'])); |
67 |
} |
68 |
|
69 |
if (substr($p, 0, 6) != 'mageia') |
70 |
continue; |
71 |
|
72 |
if (strpos($ip, ':') === false) |
73 |
$ip = '::' . $ip; |
74 |
|
75 |
$record = geoip_record_by_addr_v6($gi, $ip); |
76 |
|
77 |
//global $GEOIP_REGION_NAME; |
78 |
|
79 |
$rec = array( |
80 |
'country' => $record->country_code, |
81 |
'region' => $GEOIP_REGION_NAME[$record->country_code][$record->region], |
82 |
'city' => $record->city, |
83 |
'code' => $record->postal_code, |
84 |
'area' => $record->area_code, |
85 |
'continent' => $record->continent_code |
86 |
); |
87 |
|
88 |
if (null !== ($p2 = $this->parse_image_file_name($p))) { |
89 |
$data = array($date, $time, 'www', $p2['version'], $p2['release'], $p2['variant'], $p2['arch'], $p2['medium']); |
90 |
} else { |
91 |
$p = explode('-', $p); |
92 |
$data = array($date, $time, 'www', $p[1], null, null, end($p), prev($p)); |
93 |
} |
94 |
$data = array_merge($data, array( |
95 |
$rec['continent'], |
96 |
$rec['country'], |
97 |
mb_convert_encoding($rec['region'], 'ASCII'), |
98 |
mb_convert_encoding($rec['city'], 'ASCII') |
99 |
)); |
100 |
|
101 |
$s = implode("\t", $data) . "\n"; |
102 |
fputs($outfile, $s, mb_strlen($s)); |
103 |
} |
104 |
fclose($outfile); |
105 |
fclose($infile); |
106 |
geoip_close($gi); |
107 |
} |
108 |
|
109 |
/** |
110 |
*/ |
111 |
function parse_image_file_name($s) |
112 |
{ |
113 |
$re = <<<R |
114 |
/^ |
115 |
( |
116 |
(\w+) # name |
117 |
- |
118 |
(\d+) # version |
119 |
(?:-((?:nightly|alpha|beta|RC)\d*))? # release |
120 |
(?:-(.+))? # variant |
121 |
- |
122 |
(i586|x86_64|dual) # arch |
123 |
(?:-(CD|DVD|BR))? # medium |
124 |
(?:-(build_\w+))? # build |
125 |
) |
126 |
\. |
127 |
(\w+) # extension |
128 |
$/ix |
129 |
R; |
130 |
if (preg_match_all($re, $s, $out)) { |
131 |
return array( |
132 |
'full' => $out[1][0], |
133 |
'name' => $out[2][0], |
134 |
'version' => $out[3][0], |
135 |
'release' => $out[4][0], |
136 |
'variant' => $out[5][0], |
137 |
'arch' => $out[6][0], |
138 |
'medium' => $out[7][0], |
139 |
'build' => $out[8][0], |
140 |
'ext' => $out[9][0] |
141 |
); |
142 |
} |
143 |
return null; |
144 |
} |
145 |
} |
146 |
|