1 |
--- a/WWW/Library/Implementation/HTTP.c |
2 |
+++ b/WWW/Library/Implementation/HTTP.c |
3 |
@@ -415,27 +415,150 @@ |
4 |
#endif /* _WINDOWS */ |
5 |
|
6 |
/* |
7 |
+ * RFC-1738 says we can have user/password using these ASCII characters |
8 |
+ * safe = "$" | "-" | "_" | "." | "+" |
9 |
+ * extra = "!" | "*" | "'" | "(" | ")" | "," |
10 |
+ * hex = digit | "A" | "B" | "C" | "D" | "E" | "F" | |
11 |
+ * "a" | "b" | "c" | "d" | "e" | "f" |
12 |
+ * escape = "%" hex hex |
13 |
+ * unreserved = alpha | digit | safe | extra |
14 |
+ * uchar = unreserved | escape |
15 |
+ * user = *[ uchar | ";" | "?" | "&" | "=" ] |
16 |
+ * password = *[ uchar | ";" | "?" | "&" | "=" ] |
17 |
+ * and we cannot have a password without user, i.e., no leading ":" |
18 |
+ * and ":", "@", "/" must be encoded, i.e., will not appear as such. |
19 |
+ * |
20 |
+ * However, in a URL |
21 |
+ * //<user>:<password>@<host>:<port>/<url-path> |
22 |
+ * valid characters in the host are different, not allowing most of those |
23 |
+ * punctuation characters. |
24 |
+ * |
25 |
+ * RFC-3986 amends this, using |
26 |
+ * userinfo = *( unreserved / pct-encoded / sub-delims / ":" ) |
27 |
+ * unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" |
28 |
+ * reserved = gen-delims / sub-delims |
29 |
+ * gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@" |
30 |
+ * sub-delims = "!" / "$" / "&" / "'" / "(" / ")" |
31 |
+ * / "*" / "+" / "," / ";" / "=" |
32 |
+ * and |
33 |
+ * host = IP-literal / IPv4address / reg-name |
34 |
+ * reg-name = *( unreserved / pct-encoded / sub-delims ) |
35 |
+ */ |
36 |
+#define RFC_3986_UNRESERVED(c) (isalnum(UCH(c)) || strchr("-._~", UCH(c)) != 0) |
37 |
+#define RFC_3986_GEN_DELIMS(c) ((c) != 0 && strchr(":/?#[]@", UCH(c)) != 0) |
38 |
+#define RFC_3986_SUB_DELIMS(c) ((c) != 0 && strchr("!$&'()*+,;=", UCH(c)) != 0) |
39 |
+ |
40 |
+static char *skip_user_passwd(char *host) |
41 |
+{ |
42 |
+ char *result = 0; |
43 |
+ char *s = host; |
44 |
+ int pass = 0; |
45 |
+ int ch; |
46 |
+ int last = -1; |
47 |
+ |
48 |
+ while ((ch = UCH(*s)) != '\0') { |
49 |
+ if (ch == '\0') { |
50 |
+ break; |
51 |
+ } else if (ch == ':') { |
52 |
+ if (pass++) |
53 |
+ break; |
54 |
+ } else if (ch == '@') { |
55 |
+ if (s != host && last != ':') |
56 |
+ result = s; |
57 |
+ break; |
58 |
+ } else if (RFC_3986_GEN_DELIMS(ch)) { |
59 |
+ if (!RFC_3986_GEN_DELIMS(s[1])) |
60 |
+ break; |
61 |
+ } else if (ch == '%') { |
62 |
+ if (!(isxdigit(UCH(s[1])) && isxdigit(UCH(s[2])))) |
63 |
+ break; |
64 |
+ } else if (!(RFC_3986_UNRESERVED(ch) || |
65 |
+ RFC_3986_SUB_DELIMS(ch))) { |
66 |
+ break; |
67 |
+ } |
68 |
+ ++s; |
69 |
+ last = ch; |
70 |
+ } |
71 |
+ return result; |
72 |
+} |
73 |
+ |
74 |
+static char *fake_hostname(char *auth) |
75 |
+{ |
76 |
+ char *result = NULL; |
77 |
+ char *colon = NULL; |
78 |
+ |
79 |
+ StrAllocCopy(result, auth); |
80 |
+ if ((colon = strchr(result, ':')) != 0) |
81 |
+ *colon = '\0'; |
82 |
+ if (strchr(result, '.') == 0) |
83 |
+ FREE(result); |
84 |
+ return result; |
85 |
+} |
86 |
+ |
87 |
+/* |
88 |
* Strip any username from the given string so we retain only the host. |
89 |
*/ |
90 |
static void strip_userid(char *host) |
91 |
{ |
92 |
char *p1 = host; |
93 |
- char *p2 = StrChr(host, '@'); |
94 |
- char *fake; |
95 |
+ char *p2 = skip_user_passwd(host); |
96 |
|
97 |
if (p2 != 0) { |
98 |
+ char *msg = NULL; |
99 |
+ char *auth = NULL; |
100 |
+ char *save = NULL; |
101 |
+ char *fake = NULL; |
102 |
+ char *p3 = p2; |
103 |
+ int gen_delims = 0; |
104 |
+ int sub_delims = 0; |
105 |
+ int my_delimit = UCH(*p2); |
106 |
+ int do_trimming = (my_delimit == '@'); |
107 |
+ |
108 |
*p2++ = '\0'; |
109 |
- if ((fake = HTParse(host, "", PARSE_HOST)) != NULL) { |
110 |
- char *msg = NULL; |
111 |
+ StrAllocCopy(auth, host); |
112 |
|
113 |
- CTRACE((tfp, "parsed:%s\n", fake)); |
114 |
- HTSprintf0(&msg, gettext("Address contains a username: %s"), host); |
115 |
- HTAlert(msg); |
116 |
- FREE(msg); |
117 |
+ /* |
118 |
+ * Trailing "gen-delims" demonstrates that there is no user/password. |
119 |
+ */ |
120 |
+ while ((p3 != host) && RFC_3986_GEN_DELIMS(p3[-1])) { |
121 |
+ ++gen_delims; |
122 |
+ *(--p3) = '\0'; |
123 |
} |
124 |
- while ((*p1++ = *p2++) != '\0') { |
125 |
- ; |
126 |
+ /* |
127 |
+ * While legal, punctuation-only user/password is questionable. |
128 |
+ */ |
129 |
+ while ((p3 != host) && RFC_3986_SUB_DELIMS(p3[-1])) { |
130 |
+ ++sub_delims; |
131 |
+ *(--p3) = '\0'; |
132 |
+ } |
133 |
+ CTRACE((tfp, "trimmed:%s\n", host)); |
134 |
+ StrAllocCopy(save, host); |
135 |
+ |
136 |
+ if (gen_delims || strcmp(save, auth)) { |
137 |
+ HTSprintf0(&msg, |
138 |
+ gettext("User/password may appear to be a hostname: '%s' (e.g, '%s')"), |
139 |
+ auth, save); |
140 |
+ do_trimming = !gen_delims; |
141 |
+ } else if (*host == '\0' && sub_delims) { |
142 |
+ HTSprintf0(&msg, |
143 |
+ gettext("User/password contains only punctuation: %s"), |
144 |
+ auth); |
145 |
+ } else if ((fake = fake_hostname(host)) != NULL) { |
146 |
+ HTSprintf0(&msg, |
147 |
+ gettext("User/password may be confused with hostname: '%s' (e.g, '%s')"), |
148 |
+ auth, fake); |
149 |
+ } |
150 |
+ if (msg != 0) |
151 |
+ HTAlert(msg); |
152 |
+ if (do_trimming) { |
153 |
+ while ((*p1++ = *p2++) != '\0') { |
154 |
+ ; |
155 |
+ } |
156 |
} |
157 |
+ FREE(fake); |
158 |
+ FREE(save); |
159 |
+ FREE(auth); |
160 |
+ FREE(msg); |
161 |
} |
162 |
} |
163 |
|