1 |
rda |
1525 |
<?php |
2 |
|
|
/** |
3 |
|
|
*/ |
4 |
|
|
|
5 |
|
|
class Mageia_Data_Cron_Downloads extends Mageia_Data_Cron |
6 |
|
|
{ |
7 |
|
|
function __construct($spec, $ts, $appdir, $logtmpdir) { parent::__construct($spec, $ts, $appdir, $logtmpdir); } |
8 |
|
|
|
9 |
|
|
function first_pass() |
10 |
|
|
{ |
11 |
|
|
$cmd = 'grep "downloads/get/?q=" < %s | grep -v bot | grep -v passwd | awk \'{print $1" "$4" "$5" "$7" "$9}\'> %s'; |
12 |
|
|
$cmd = sprintf($cmd, $this->log_file, $this->log_file . '.tmp'); |
13 |
|
|
exec($cmd, $out, $ret); |
14 |
|
|
if ($ret > 0) { |
15 |
|
|
echo "failed first pass\n"; |
16 |
|
|
} else { |
17 |
|
|
rename($this->log_file . '.tmp', $this->log_file); |
18 |
|
|
} |
19 |
|
|
} |
20 |
|
|
|
21 |
|
|
/** |
22 |
|
|
*/ |
23 |
|
|
function digest_log($infile = 'php://stdin', $outfile = 'php://stdout') |
24 |
|
|
{ |
25 |
|
|
echo "Digest. "; |
26 |
|
|
|
27 |
|
|
$this->first_pass(); |
28 |
|
|
|
29 |
|
|
require_once realpath($this->appdir . '/lib/maxmind/geoip/geoip.inc.php'); |
30 |
|
|
require_once realpath($this->appdir . '/lib/maxmind/geoip/geoipcity.inc.php'); |
31 |
|
|
|
32 |
|
|
$gi = geoip_open(realpath($this->appdir . '/lib/maxmind/geoip/GeoLiteCityv6.dat'), GEOIP_STANDARD); |
33 |
|
|
$infile = fopen($infile, 'r'); |
34 |
|
|
$outfile = fopen($outfile, 'w'); |
35 |
|
|
|
36 |
|
|
$i = 0; |
37 |
|
|
while (!feof($infile)) { |
38 |
|
|
|
39 |
|
|
$s = fgets($infile); |
40 |
|
|
$s = trim($s); |
41 |
|
|
if ($s == '') |
42 |
|
|
continue; |
43 |
|
|
|
44 |
|
|
$s = explode(' ', $s); |
45 |
|
|
|
46 |
|
|
if ($s[4] != 200) |
47 |
|
|
continue; |
48 |
|
|
|
49 |
|
|
$ip = $s[0]; |
50 |
|
|
|
51 |
|
|
// NOTE (rda) dates are local time, not UTC here. |
52 |
|
|
// FIXME (rda) change dates to UTC |
53 |
|
|
// that involves getting logs of the day before, and filtering _after_ conversion. |
54 |
|
|
$sdate = str_replace(array('[', ']'), '', $s[1] . ' ' . $s[2]); |
55 |
|
|
$date = date('Y-m-d', strtotime($sdate)); // N for week day, or Y-m-d |
56 |
|
|
$time = date('H', strtotime($sdate)); |
57 |
|
|
|
58 |
|
|
$url = parse_url($s[3]); |
59 |
|
|
parse_str($url['query'], $str); |
60 |
|
|
$path = explode('/', $url['path']); |
61 |
|
|
|
62 |
|
|
$p = null; |
63 |
|
|
if (array_key_exists('q', $str)) { |
64 |
|
|
$p = trim(strtolower($str['q'])); |
65 |
|
|
} elseif (array_key_exists('product', $str)) { |
66 |
|
|
$p = trim(strtolower($str['product'])); |
67 |
|
|
} |
68 |
|
|
|
69 |
|
|
if (substr($p, 0, 6) != 'mageia') |
70 |
|
|
continue; |
71 |
|
|
|
72 |
|
|
if (strpos($ip, ':') === false) |
73 |
|
|
$ip = '::' . $ip; |
74 |
|
|
|
75 |
|
|
$record = geoip_record_by_addr_v6($gi, $ip); |
76 |
|
|
|
77 |
|
|
//global $GEOIP_REGION_NAME; |
78 |
|
|
|
79 |
|
|
$rec = array( |
80 |
|
|
'country' => $record->country_code, |
81 |
|
|
'region' => $GEOIP_REGION_NAME[$record->country_code][$record->region], |
82 |
|
|
'city' => $record->city, |
83 |
|
|
'code' => $record->postal_code, |
84 |
|
|
'area' => $record->area_code, |
85 |
|
|
'continent' => $record->continent_code |
86 |
|
|
); |
87 |
|
|
|
88 |
|
|
if (null !== ($p2 = $this->parse_image_file_name($p))) { |
89 |
|
|
$data = array($date, $time, 'www', $p2['version'], $p2['release'], $p2['variant'], $p2['arch'], $p2['medium']); |
90 |
|
|
} else { |
91 |
|
|
$p = explode('-', $p); |
92 |
|
|
$data = array($date, $time, 'www', $p[1], null, null, end($p), prev($p)); |
93 |
|
|
} |
94 |
|
|
$data = array_merge($data, array( |
95 |
|
|
$rec['continent'], |
96 |
|
|
$rec['country'], |
97 |
|
|
mb_convert_encoding($rec['region'], 'ASCII'), |
98 |
|
|
mb_convert_encoding($rec['city'], 'ASCII') |
99 |
|
|
)); |
100 |
|
|
|
101 |
|
|
$s = implode("\t", $data) . "\n"; |
102 |
|
|
fputs($outfile, $s, mb_strlen($s)); |
103 |
|
|
} |
104 |
|
|
fclose($outfile); |
105 |
|
|
fclose($infile); |
106 |
|
|
geoip_close($gi); |
107 |
|
|
} |
108 |
|
|
|
109 |
|
|
/** |
110 |
|
|
*/ |
111 |
|
|
function parse_image_file_name($s) |
112 |
|
|
{ |
113 |
|
|
$re = <<<R |
114 |
|
|
/^ |
115 |
|
|
( |
116 |
|
|
(\w+) # name |
117 |
|
|
- |
118 |
|
|
(\d+) # version |
119 |
|
|
(?:-((?:nightly|alpha|beta|RC)\d*))? # release |
120 |
|
|
(?:-(.+))? # variant |
121 |
|
|
- |
122 |
|
|
(i586|x86_64|dual) # arch |
123 |
|
|
(?:-(CD|DVD|BR))? # medium |
124 |
|
|
(?:-(build_\w+))? # build |
125 |
|
|
) |
126 |
|
|
\. |
127 |
|
|
(\w+) # extension |
128 |
|
|
$/ix |
129 |
|
|
R; |
130 |
|
|
if (preg_match_all($re, $s, $out)) { |
131 |
|
|
return array( |
132 |
|
|
'full' => $out[1][0], |
133 |
|
|
'name' => $out[2][0], |
134 |
|
|
'version' => $out[3][0], |
135 |
|
|
'release' => $out[4][0], |
136 |
|
|
'variant' => $out[5][0], |
137 |
|
|
'arch' => $out[6][0], |
138 |
|
|
'medium' => $out[7][0], |
139 |
|
|
'build' => $out[8][0], |
140 |
|
|
'ext' => $out[9][0] |
141 |
|
|
); |
142 |
|
|
} |
143 |
|
|
return null; |
144 |
|
|
} |
145 |
|
|
} |
146 |
|
|
|