224 lines
4.7 KiB
C
224 lines
4.7 KiB
C
/* ----------------------------------------------------------------------- *
|
|
*
|
|
* Copyright 2009-2011 Intel Corporation; author: H. Peter Anvin
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person
|
|
* obtaining a copy of this software and associated documentation
|
|
* files (the "Software"), to deal in the Software without
|
|
* restriction, including without limitation the rights to use,
|
|
* copy, modify, merge, publish, distribute, sublicense, and/or
|
|
* sell copies of the Software, and to permit persons to whom
|
|
* the Software is furnished to do so, subject to the following
|
|
* conditions:
|
|
*
|
|
* The above copyright notice and this permission notice shall
|
|
* be included in all copies or substantial portions of the Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
|
* OTHER DEALINGS IN THE SOFTWARE.
|
|
*
|
|
* ----------------------------------------------------------------------- */
|
|
|
|
/*
|
|
* urlparse.c
|
|
*/
|
|
|
|
#include <string.h>
|
|
#include <stdlib.h>
|
|
#include <stdio.h>
|
|
#include "url.h"
|
|
|
|
/*
|
|
* Return the type of a URL without modifying the string
|
|
*/
|
|
enum url_type url_type(const char *url)
|
|
{
|
|
const char *q;
|
|
|
|
q = strchr(url, ':');
|
|
if (!q)
|
|
return URL_SUFFIX;
|
|
|
|
if (q[1] == '/' && q[2] == '/')
|
|
return URL_NORMAL;
|
|
|
|
if (q[1] == ':')
|
|
return URL_OLD_TFTP;
|
|
|
|
return URL_SUFFIX;
|
|
}
|
|
|
|
/*
|
|
* Decompose a URL into its components. This is done in-place;
|
|
* this routine does not allocate any additional storage. Freeing the
|
|
* original buffer frees all storage used.
|
|
*/
|
|
void parse_url(struct url_info *ui, char *url)
|
|
{
|
|
char *p = url;
|
|
char *q, *r, *s;
|
|
int c;
|
|
|
|
memset(ui, 0, sizeof *ui);
|
|
|
|
q = strchr(p, ':');
|
|
if (q && (q[1] == '/' && q[2] == '/')) {
|
|
ui->type = URL_NORMAL;
|
|
|
|
ui->scheme = p;
|
|
*q = '\0';
|
|
p = q+3;
|
|
|
|
q = strchr(p, '/');
|
|
if (q) {
|
|
*q = '\0';
|
|
ui->path = q+1;
|
|
q = strchr(q+1, '#');
|
|
if (q)
|
|
*q = '\0';
|
|
} else {
|
|
ui->path = "";
|
|
}
|
|
|
|
r = strchr(p, '@');
|
|
if (r) {
|
|
ui->user = p;
|
|
*r = '\0';
|
|
s = strchr(p, ':');
|
|
if (s) {
|
|
*s = '\0';
|
|
ui->passwd = s+1;
|
|
}
|
|
p = r+1;
|
|
}
|
|
|
|
ui->host = p;
|
|
r = strchr(p, ':');
|
|
if (r) {
|
|
*r++ = '\0';
|
|
ui->port = 0;
|
|
while ((c = *r++)) {
|
|
c -= '0';
|
|
if (c > 9)
|
|
break;
|
|
ui->port = ui->port * 10 + c;
|
|
}
|
|
}
|
|
} else if (q && q[1] == ':') {
|
|
*q = '\0';
|
|
ui->scheme = "tftp";
|
|
ui->host = p;
|
|
ui->path = q+2;
|
|
ui->type = URL_OLD_TFTP;
|
|
} else {
|
|
ui->path = p;
|
|
ui->type = URL_SUFFIX;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Escapes unsafe characters in a URL.
|
|
* This does *not* escape things like query characters!
|
|
* Returns the number of characters in the total output.
|
|
*/
|
|
size_t url_escape_unsafe(char *output, const char *input, size_t bufsize)
|
|
{
|
|
static const char uchexchar[] = "0123456789ABCDEF";
|
|
const char *p;
|
|
unsigned char c;
|
|
char *q;
|
|
size_t n = 0;
|
|
|
|
q = output;
|
|
for (p = input; (c = *p); p++) {
|
|
if (c <= ' ' || c > '~') {
|
|
if (++n < bufsize) *q++ = '%';
|
|
if (++n < bufsize) *q++ = uchexchar[c >> 4];
|
|
if (++n < bufsize) *q++ = uchexchar[c & 15];
|
|
} else {
|
|
if (++n < bufsize) *q++ = c;
|
|
}
|
|
}
|
|
|
|
*q = '\0';
|
|
return n;
|
|
}
|
|
|
|
static int hexdigit(char c)
|
|
{
|
|
if (c >= '0' && c <= '9')
|
|
return c - '0';
|
|
c |= 0x20;
|
|
if (c >= 'a' && c <= 'f')
|
|
return c - 'a' + 10;
|
|
return -1;
|
|
}
|
|
|
|
/*
|
|
* Unescapes a buffer, optionally ending at an *unescaped* terminator
|
|
* (like ; for TFTP). The unescaping is done in-place.
|
|
*
|
|
* If a terminator is reached, return a pointer to the first character
|
|
* after the terminator.
|
|
*/
|
|
char *url_unescape(char *buffer, char terminator)
|
|
{
|
|
char *p = buffer;
|
|
char *q = buffer;
|
|
unsigned char c;
|
|
int x, y;
|
|
|
|
while ((c = *p)) {
|
|
if (c == terminator) {
|
|
*q = '\0';
|
|
return p;
|
|
}
|
|
p++;
|
|
if (c == '%') {
|
|
x = hexdigit(p[0]);
|
|
if (x >= 0) {
|
|
y = hexdigit(p[1]);
|
|
if (y >= 0) {
|
|
*q++ = (x << 4) + y;
|
|
p += 2;
|
|
continue;
|
|
}
|
|
}
|
|
}
|
|
*q++ = c;
|
|
}
|
|
*q = '\0';
|
|
return NULL;
|
|
}
|
|
|
|
#ifdef URL_TEST
|
|
|
|
int main(int argc, char *argv[])
|
|
{
|
|
int i;
|
|
struct url_info url;
|
|
|
|
for (i = 1; i < argc; i++) {
|
|
parse_url(&url, argv[i]);
|
|
printf("scheme: %s\n"
|
|
"user: %s\n"
|
|
"passwd: %s\n"
|
|
"host: %s\n"
|
|
"port: %d\n"
|
|
"path: %s\n"
|
|
"type: %d\n",
|
|
url.scheme, url.user, url.passwd, url.host, url.port,
|
|
url.path, url.type);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
#endif
|