WvStreams
|
00001 /* 00002 * Worldvisions Weaver Software: 00003 * Copyright (C) 1997-2002 Net Integration Technologies, Inc. 00004 * 00005 * WvUrl is a simple URL-parsing class with built-in (though still somewhat 00006 * inconvenient) DNS resolution. 00007 * 00008 * See wvurl.h. 00009 */ 00010 #include "wvurl.h" 00011 #include "strutils.h" 00012 00013 // A static list of the default ports for each protocol. 00014 struct DefaultPort 00015 { 00016 const char *proto; 00017 int port; 00018 bool uses_slashes; 00019 }; 00020 00021 // The protocols must be arranged from longest to shortest because they're 00022 // compared with strncmp, so "https://" will also match http. 00023 static DefaultPort portmap[] = { 00024 { "exchangeits", 7070, false }, 00025 { "exchangeit", 6969, false }, 00026 { "https", 443, true }, 00027 { "http", 80, true }, 00028 { "file", 0, true }, 00029 { "sip", 5060, false }, 00030 { "ftp", 21, true }, 00031 { "ldaps", 636, false }, 00032 { "ldap", 389, false }, 00033 { NULL, 0 } 00034 }; 00035 00036 // Look up the protocol and return the default port. 00037 static int get_default_port(WvString proto) 00038 { 00039 DefaultPort *p = portmap; 00040 for (p = portmap; p->proto != NULL; p++) 00041 { 00042 if (strncmp(p->proto, proto, strlen(p->proto)) == 0) 00043 return p->port; 00044 } 00045 return -1; 00046 } 00047 00048 // Look up the protocol and decide whether it uses slashes (http) or not (sip) 00049 // A check of rfc2396 shows that the URI standard actually distinguishes 00050 // these: 'hierarchical' vs. 'opaque'. 00051 static bool protocol_uses_slashes(WvString proto) 00052 { 00053 DefaultPort *p = portmap; 00054 for (p = portmap; p->proto != NULL; p++) 00055 { 00056 if (strncmp(p->proto, proto, strlen(p->proto)) == 0) 00057 return p->uses_slashes; 00058 } 00059 return false; 00060 } 00061 00062 // Split up the URL into a hostname, a port, and the rest of it. 00063 WvUrl::WvUrl(WvStringParm url) : err("No error") 00064 { 00065 WvString work(url); 00066 char *cptr, *wptr = work.edit(); 00067 00068 port = 0; // error condition by default 00069 addr = NULL; 00070 resolving = true; 00071 00072 // deal with extra whitespace. 00073 wptr = trim_string(wptr); 00074 cptr = wptr + strcspn(wptr, " \t\r\n"); 00075 *cptr = 0; 00076 00077 // if it's not one of these easy prefixes, give up. Our URL parser is 00078 // pretty dumb. 00079 if (get_default_port(wptr) < 0) 00080 { 00081 err = "WvUrl cannot handle the given protocol."; 00082 return; 00083 } 00084 00085 cptr = strchr(wptr, ':'); 00086 if (!cptr) 00087 { 00088 err = "No colon after the protocol."; 00089 return; 00090 } 00091 *cptr = 0; 00092 proto = wptr; 00093 00094 bool use_slashes = protocol_uses_slashes(proto); 00095 wptr = cptr + (use_slashes ? 3 : 1); 00096 00097 cptr = strchr(wptr, '@'); 00098 if (!cptr) // no user given 00099 { 00100 user = ""; 00101 password = ""; 00102 } 00103 else 00104 { 00105 *cptr = 0; 00106 char *cptr2 = strchr(wptr, ':'); 00107 if (cptr2 && (*(cptr2+1) != 0)) 00108 { 00109 *cptr2 = 0; 00110 password = cptr2 + 1; 00111 } 00112 else 00113 password = ""; 00114 user = wptr; 00115 wptr = cptr + 1; 00116 } 00117 00118 cptr = strchr(wptr, '/'); 00119 if (!cptr) // no path given 00120 file = use_slashes ? "/" : ""; 00121 else 00122 { 00123 file = cptr; 00124 *cptr = 0; 00125 } 00126 00127 cptr = strchr(wptr, ':'); 00128 if (!cptr) 00129 port = get_default_port(proto); 00130 else 00131 { 00132 port = atoi(cptr+1); 00133 *cptr = 0; 00134 } 00135 00136 hostname = wptr; 00137 00138 resolve(); 00139 } 00140 00141 00142 WvUrl::WvUrl(const WvUrl &url) : err("No error") 00143 { 00144 addr = NULL; 00145 resolving = true; 00146 00147 proto = url.proto; 00148 user = url.user; 00149 password = url.password; 00150 hostname = url.hostname; 00151 file = url.file; 00152 port = url.port; 00153 00154 resolve(); 00155 } 00156 00157 00158 WvUrl::~WvUrl() 00159 { 00160 if (addr) delete addr; 00161 } 00162 00163 00164 bool WvUrl::resolve() 00165 { 00166 const WvIPAddr *ip; 00167 int numaddrs; 00168 00169 numaddrs = dns.findaddr(0, hostname, &ip); 00170 if (!numaddrs) // error condition 00171 { 00172 err = WvString("Host '%s' could not be found.", hostname); 00173 resolving = false; 00174 return false; 00175 } 00176 else if (numaddrs < 0) // still waiting 00177 { 00178 resolving = true; 00179 return false; 00180 } 00181 else // got at least one address 00182 { 00183 resolving = false; 00184 if (addr) delete addr; 00185 addr = new WvIPPortAddr(*ip, port); 00186 return true; 00187 } 00188 } 00189 00190 00191 // Print out the URL, using the port name (if it's not 80), and either the 00192 // hostname (if we know it) or the address (if we know that instead.) 00193 WvUrl::operator WvString () const 00194 { 00195 if (!isok()) 00196 return WvString("(Invalid URL: %s)", err); 00197 00198 WvString protostr; 00199 if (protocol_uses_slashes(proto)) 00200 protostr = WvString("%s://", proto); 00201 else 00202 protostr = WvString("%s:", proto); 00203 WvString userstr(""); 00204 if (user && user.len() != 0) 00205 { 00206 userstr = WvString("%s", user); 00207 if (password && password.len() != 0) 00208 userstr.append(WvString(":%s@", password)); 00209 else 00210 userstr.append("@"); 00211 } 00212 WvString portstr(""); 00213 if (port && port != get_default_port(proto)) 00214 portstr = WvString(":%s", port); 00215 if (hostname) 00216 return WvString("%s%s%s%s%s", protostr, userstr, hostname, portstr, file); 00217 else if (addr) 00218 return WvString("%s%s%s%s%s", protostr, userstr, *addr, portstr, file); 00219 else 00220 { 00221 assert(0); 00222 return WvString("(Invalid URL)"); 00223 } 00224 } 00225 00226