472 lines
11 KiB
C
472 lines
11 KiB
C
/* ----------------------------------------------------------------------- *
|
|
*
|
|
* Copyright 2011 Intel Corporation; author: H. Peter Anvin
|
|
*
|
|
* This program is free software; you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License as published by
|
|
* the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
|
|
* Boston MA 02110-1301, USA; either version 2 of the License, or
|
|
* (at your option) any later version; incorporated herein by reference.
|
|
*
|
|
* ----------------------------------------------------------------------- */
|
|
|
|
#include <inttypes.h>
|
|
#include <string.h>
|
|
#include <stdlib.h>
|
|
#include <ctype.h>
|
|
#include <dprintf.h>
|
|
#include "pxe.h"
|
|
|
|
enum http_readdir_state {
|
|
st_start, /* 0 Initial state */
|
|
st_open, /* 1 "<" */
|
|
st_a, /* 2 "<a" */
|
|
st_attribute, /* 3 "<a " */
|
|
st_h, /* 4 "<a h" */
|
|
st_hr, /* 5 */
|
|
st_hre, /* 6 */
|
|
st_href, /* 7 */
|
|
st_hrefeq, /* 8 */
|
|
st_hrefqu, /* 9 */
|
|
st_badtag, /* 10 */
|
|
st_badtagqu, /* 11 */
|
|
st_badattr, /* 12 */
|
|
st_badattrqu, /* 13 */
|
|
};
|
|
|
|
struct machine {
|
|
char xchar;
|
|
uint8_t st_xchar;
|
|
uint8_t st_left; /* < */
|
|
uint8_t st_right; /* > */
|
|
uint8_t st_space; /* white */
|
|
uint8_t st_other; /* anything else */
|
|
};
|
|
|
|
static const struct machine statemachine[] = {
|
|
/* xchar st_xchar st_left st_right st_space st_other */
|
|
{ 0, 0, st_open, st_start, st_start, st_start },
|
|
{ 'a', st_a, st_badtag, st_start, st_open, st_badtag },
|
|
{ 0, 0, st_open, st_open, st_attribute, st_badtag },
|
|
{ 'h', st_h, st_open, st_start, st_attribute, st_badattr },
|
|
{ 'r', st_hr, st_open, st_start, st_attribute, st_badattr },
|
|
{ 'e', st_hre, st_open, st_start, st_attribute, st_badattr },
|
|
{ 'f', st_href, st_open, st_start, st_attribute, st_badattr },
|
|
{ '=', st_hrefeq, st_open, st_start, st_attribute, st_badattr },
|
|
{ '\"', st_hrefqu, st_open, st_start, st_attribute, st_hrefeq },
|
|
{ '\"', st_attribute, st_hrefqu, st_hrefqu, st_hrefqu, st_hrefqu },
|
|
{ '\"', st_badtagqu, st_open, st_start, st_badtag, st_badtag },
|
|
{ '\"', st_badtag, st_badtagqu, st_badtagqu, st_badtagqu, st_badtagqu },
|
|
{ '\"', st_badattrqu, st_open, st_start, st_attribute, st_badattr },
|
|
{ '\"', st_attribute, st_badattrqu, st_badattrqu, st_badattrqu, st_badattrqu },
|
|
};
|
|
|
|
struct html_entity {
|
|
uint16_t ucs;
|
|
const char entity[9];
|
|
};
|
|
|
|
static const struct html_entity entities[] = {
|
|
{ 34, "quot" },
|
|
{ 38, "amp" },
|
|
{ 60, "lt" },
|
|
{ 62, "gt" },
|
|
#ifdef HTTP_ALL_ENTITIES
|
|
{ 160, "nbsp" },
|
|
{ 161, "iexcl" },
|
|
{ 162, "cent" },
|
|
{ 163, "pound" },
|
|
{ 164, "curren" },
|
|
{ 165, "yen" },
|
|
{ 166, "brvbar" },
|
|
{ 167, "sect" },
|
|
{ 168, "uml" },
|
|
{ 169, "copy" },
|
|
{ 170, "ordf" },
|
|
{ 171, "laquo" },
|
|
{ 172, "not" },
|
|
{ 173, "shy" },
|
|
{ 174, "reg" },
|
|
{ 175, "macr" },
|
|
{ 176, "deg" },
|
|
{ 177, "plusmn" },
|
|
{ 178, "sup2" },
|
|
{ 179, "sup3" },
|
|
{ 180, "acute" },
|
|
{ 181, "micro" },
|
|
{ 182, "para" },
|
|
{ 183, "middot" },
|
|
{ 184, "cedil" },
|
|
{ 185, "sup1" },
|
|
{ 186, "ordm" },
|
|
{ 187, "raquo" },
|
|
{ 188, "frac14" },
|
|
{ 189, "frac12" },
|
|
{ 190, "frac34" },
|
|
{ 191, "iquest" },
|
|
{ 192, "Agrave" },
|
|
{ 193, "Aacute" },
|
|
{ 194, "Acirc" },
|
|
{ 195, "Atilde" },
|
|
{ 196, "Auml" },
|
|
{ 197, "Aring" },
|
|
{ 198, "AElig" },
|
|
{ 199, "Ccedil" },
|
|
{ 200, "Egrave" },
|
|
{ 201, "Eacute" },
|
|
{ 202, "Ecirc" },
|
|
{ 203, "Euml" },
|
|
{ 204, "Igrave" },
|
|
{ 205, "Iacute" },
|
|
{ 206, "Icirc" },
|
|
{ 207, "Iuml" },
|
|
{ 208, "ETH" },
|
|
{ 209, "Ntilde" },
|
|
{ 210, "Ograve" },
|
|
{ 211, "Oacute" },
|
|
{ 212, "Ocirc" },
|
|
{ 213, "Otilde" },
|
|
{ 214, "Ouml" },
|
|
{ 215, "times" },
|
|
{ 216, "Oslash" },
|
|
{ 217, "Ugrave" },
|
|
{ 218, "Uacute" },
|
|
{ 219, "Ucirc" },
|
|
{ 220, "Uuml" },
|
|
{ 221, "Yacute" },
|
|
{ 222, "THORN" },
|
|
{ 223, "szlig" },
|
|
{ 224, "agrave" },
|
|
{ 225, "aacute" },
|
|
{ 226, "acirc" },
|
|
{ 227, "atilde" },
|
|
{ 228, "auml" },
|
|
{ 229, "aring" },
|
|
{ 230, "aelig" },
|
|
{ 231, "ccedil" },
|
|
{ 232, "egrave" },
|
|
{ 233, "eacute" },
|
|
{ 234, "ecirc" },
|
|
{ 235, "euml" },
|
|
{ 236, "igrave" },
|
|
{ 237, "iacute" },
|
|
{ 238, "icirc" },
|
|
{ 239, "iuml" },
|
|
{ 240, "eth" },
|
|
{ 241, "ntilde" },
|
|
{ 242, "ograve" },
|
|
{ 243, "oacute" },
|
|
{ 244, "ocirc" },
|
|
{ 245, "otilde" },
|
|
{ 246, "ouml" },
|
|
{ 247, "divide" },
|
|
{ 248, "oslash" },
|
|
{ 249, "ugrave" },
|
|
{ 250, "uacute" },
|
|
{ 251, "ucirc" },
|
|
{ 252, "uuml" },
|
|
{ 253, "yacute" },
|
|
{ 254, "thorn" },
|
|
{ 255, "yuml" },
|
|
{ 338, "OElig" },
|
|
{ 339, "oelig" },
|
|
{ 352, "Scaron" },
|
|
{ 353, "scaron" },
|
|
{ 376, "Yuml" },
|
|
{ 402, "fnof" },
|
|
{ 710, "circ" },
|
|
{ 732, "tilde" },
|
|
{ 913, "Alpha" },
|
|
{ 914, "Beta" },
|
|
{ 915, "Gamma" },
|
|
{ 916, "Delta" },
|
|
{ 917, "Epsilon" },
|
|
{ 918, "Zeta" },
|
|
{ 919, "Eta" },
|
|
{ 920, "Theta" },
|
|
{ 921, "Iota" },
|
|
{ 922, "Kappa" },
|
|
{ 923, "Lambda" },
|
|
{ 924, "Mu" },
|
|
{ 925, "Nu" },
|
|
{ 926, "Xi" },
|
|
{ 927, "Omicron" },
|
|
{ 928, "Pi" },
|
|
{ 929, "Rho" },
|
|
{ 931, "Sigma" },
|
|
{ 932, "Tau" },
|
|
{ 933, "Upsilon" },
|
|
{ 934, "Phi" },
|
|
{ 935, "Chi" },
|
|
{ 936, "Psi" },
|
|
{ 937, "Omega" },
|
|
{ 945, "alpha" },
|
|
{ 946, "beta" },
|
|
{ 947, "gamma" },
|
|
{ 948, "delta" },
|
|
{ 949, "epsilon" },
|
|
{ 950, "zeta" },
|
|
{ 951, "eta" },
|
|
{ 952, "theta" },
|
|
{ 953, "iota" },
|
|
{ 954, "kappa" },
|
|
{ 955, "lambda" },
|
|
{ 956, "mu" },
|
|
{ 957, "nu" },
|
|
{ 958, "xi" },
|
|
{ 959, "omicron" },
|
|
{ 960, "pi" },
|
|
{ 961, "rho" },
|
|
{ 962, "sigmaf" },
|
|
{ 963, "sigma" },
|
|
{ 964, "tau" },
|
|
{ 965, "upsilon" },
|
|
{ 966, "phi" },
|
|
{ 967, "chi" },
|
|
{ 968, "psi" },
|
|
{ 969, "omega" },
|
|
{ 977, "thetasym" },
|
|
{ 978, "upsih" },
|
|
{ 982, "piv" },
|
|
{ 8194, "ensp" },
|
|
{ 8195, "emsp" },
|
|
{ 8201, "thinsp" },
|
|
{ 8204, "zwnj" },
|
|
{ 8205, "zwj" },
|
|
{ 8206, "lrm" },
|
|
{ 8207, "rlm" },
|
|
{ 8211, "ndash" },
|
|
{ 8212, "mdash" },
|
|
{ 8216, "lsquo" },
|
|
{ 8217, "rsquo" },
|
|
{ 8218, "sbquo" },
|
|
{ 8220, "ldquo" },
|
|
{ 8221, "rdquo" },
|
|
{ 8222, "bdquo" },
|
|
{ 8224, "dagger" },
|
|
{ 8225, "Dagger" },
|
|
{ 8226, "bull" },
|
|
{ 8230, "hellip" },
|
|
{ 8240, "permil" },
|
|
{ 8242, "prime" },
|
|
{ 8243, "Prime" },
|
|
{ 8249, "lsaquo" },
|
|
{ 8250, "rsaquo" },
|
|
{ 8254, "oline" },
|
|
{ 8260, "frasl" },
|
|
{ 8364, "euro" },
|
|
{ 8465, "image" },
|
|
{ 8472, "weierp" },
|
|
{ 8476, "real" },
|
|
{ 8482, "trade" },
|
|
{ 8501, "alefsym" },
|
|
{ 8592, "larr" },
|
|
{ 8593, "uarr" },
|
|
{ 8594, "rarr" },
|
|
{ 8595, "darr" },
|
|
{ 8596, "harr" },
|
|
{ 8629, "crarr" },
|
|
{ 8656, "lArr" },
|
|
{ 8657, "uArr" },
|
|
{ 8658, "rArr" },
|
|
{ 8659, "dArr" },
|
|
{ 8660, "hArr" },
|
|
{ 8704, "forall" },
|
|
{ 8706, "part" },
|
|
{ 8707, "exist" },
|
|
{ 8709, "empty" },
|
|
{ 8711, "nabla" },
|
|
{ 8712, "isin" },
|
|
{ 8713, "notin" },
|
|
{ 8715, "ni" },
|
|
{ 8719, "prod" },
|
|
{ 8721, "sum" },
|
|
{ 8722, "minus" },
|
|
{ 8727, "lowast" },
|
|
{ 8730, "radic" },
|
|
{ 8733, "prop" },
|
|
{ 8734, "infin" },
|
|
{ 8736, "ang" },
|
|
{ 8743, "and" },
|
|
{ 8744, "or" },
|
|
{ 8745, "cap" },
|
|
{ 8746, "cup" },
|
|
{ 8747, "int" },
|
|
{ 8756, "there4" },
|
|
{ 8764, "sim" },
|
|
{ 8773, "cong" },
|
|
{ 8776, "asymp" },
|
|
{ 8800, "ne" },
|
|
{ 8801, "equiv" },
|
|
{ 8804, "le" },
|
|
{ 8805, "ge" },
|
|
{ 8834, "sub" },
|
|
{ 8835, "sup" },
|
|
{ 8836, "nsub" },
|
|
{ 8838, "sube" },
|
|
{ 8839, "supe" },
|
|
{ 8853, "oplus" },
|
|
{ 8855, "otimes" },
|
|
{ 8869, "perp" },
|
|
{ 8901, "sdot" },
|
|
{ 8968, "lceil" },
|
|
{ 8969, "rceil" },
|
|
{ 8970, "lfloor" },
|
|
{ 8971, "rfloor" },
|
|
{ 9001, "lang" },
|
|
{ 9002, "rang" },
|
|
{ 9674, "loz" },
|
|
{ 9824, "spades" },
|
|
{ 9827, "clubs" },
|
|
{ 9829, "hearts" },
|
|
{ 9830, "diams" },
|
|
#endif /* HTTP_ALL_ENTITIES */
|
|
{ 0, "" }
|
|
};
|
|
|
|
struct entity_state {
|
|
char entity_buf[16];
|
|
char *ep;
|
|
};
|
|
|
|
static char *emit(char *p, int c, struct entity_state *st)
|
|
{
|
|
const struct html_entity *ent;
|
|
unsigned int ucs;
|
|
|
|
if (!st->ep) {
|
|
if (c == '&') {
|
|
/* Entity open */
|
|
st->ep = st->entity_buf;
|
|
} else {
|
|
*p++ = c;
|
|
}
|
|
} else {
|
|
if (c == ';') {
|
|
st->ep = NULL;
|
|
*p = '\0';
|
|
if (st->entity_buf[0] == '#') {
|
|
if ((st->entity_buf[1] | 0x20)== 'x') {
|
|
ucs = strtoul(st->entity_buf + 2, NULL, 16);
|
|
} else {
|
|
ucs = strtoul(st->entity_buf + 1, NULL, 10);
|
|
}
|
|
} else {
|
|
for (ent = entities; ent->ucs; ent++) {
|
|
if (!strcmp(st->entity_buf, ent->entity))
|
|
break;
|
|
}
|
|
ucs = ent->ucs;
|
|
}
|
|
if (ucs < 32 || ucs >= 0x10ffff)
|
|
return p; /* Bogus */
|
|
if (ucs >= 0x10000) {
|
|
*p++ = 0xf0 + (ucs >> 18);
|
|
*p++ = 0x80 + ((ucs >> 12) & 0x3f);
|
|
*p++ = 0x80 + ((ucs >> 6) & 0x3f);
|
|
*p++ = 0x80 + (ucs & 0x3f);
|
|
} else if (ucs >= 0x800) {
|
|
*p++ = 0xe0 + (ucs >> 12);
|
|
*p++ = 0x80 + ((ucs >> 6) & 0x3f);
|
|
*p++ = 0x80 + (ucs & 0x3f);
|
|
} else if (ucs >= 0x80) {
|
|
*p++ = 0xc0 + (ucs >> 6);
|
|
*p++ = 0x80 + (ucs & 0x3f);
|
|
} else {
|
|
*p++ = ucs;
|
|
}
|
|
} else if (st->ep < st->entity_buf + sizeof st->entity_buf - 1) {
|
|
*st->ep++ = c;
|
|
}
|
|
}
|
|
return p;
|
|
}
|
|
|
|
static const char *http_get_filename(struct inode *inode, char *buf)
|
|
{
|
|
int c, lc;
|
|
char *p;
|
|
const struct machine *sm;
|
|
struct entity_state es;
|
|
enum http_readdir_state state = st_start;
|
|
enum http_readdir_state pstate = st_start;
|
|
|
|
memset(&es, 0, sizeof es);
|
|
|
|
p = buf;
|
|
for (;;) {
|
|
c = pxe_getc(inode);
|
|
if (c == -1)
|
|
return NULL;
|
|
|
|
lc = tolower(c);
|
|
|
|
sm = &statemachine[state];
|
|
|
|
if (lc == sm->xchar)
|
|
state = sm->st_xchar;
|
|
else if (c == '<')
|
|
state = sm->st_left;
|
|
else if (c == '>')
|
|
state = sm->st_right;
|
|
else if (isspace(c))
|
|
state = sm->st_space;
|
|
else
|
|
state = sm->st_other;
|
|
|
|
if (state == st_hrefeq || state == st_hrefqu) {
|
|
if (state != pstate)
|
|
p = buf;
|
|
else if (p < buf + FILENAME_MAX)
|
|
p = emit(p, c, &es);
|
|
pstate = state;
|
|
} else {
|
|
if (pstate != st_start)
|
|
pstate = st_start;
|
|
if (p != buf && state == st_start) {
|
|
*p = '\0';
|
|
return buf;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
int http_readdir(struct inode *inode, struct dirent *dirent)
|
|
{
|
|
char buf[FILENAME_MAX + 6];
|
|
const char *fn, *sp;
|
|
|
|
for (;;) {
|
|
fn = http_get_filename(inode, buf);
|
|
|
|
if (!fn)
|
|
return -1; /* End of directory */
|
|
|
|
/* Ignore entries with http special characters */
|
|
if (strchr(fn, '#'))
|
|
continue;
|
|
if (strchr(fn, '?'))
|
|
continue;
|
|
|
|
/* A slash if present has to be the last character, and not the first */
|
|
sp = strchr(fn, '/');
|
|
if (sp) {
|
|
if (sp == fn || sp[1])
|
|
continue;
|
|
} else {
|
|
sp = strchr(fn, '\0');
|
|
}
|
|
|
|
if (sp > fn + NAME_MAX)
|
|
continue;
|
|
|
|
dirent->d_ino = 0; /* Not applicable */
|
|
dirent->d_off = 0; /* Not applicable */
|
|
dirent->d_reclen = offsetof(struct dirent, d_name) + (sp-fn) + 1;
|
|
dirent->d_type = *sp == '/' ? DT_DIR : DT_REG;
|
|
memcpy(dirent->d_name, fn, sp-fn);
|
|
dirent->d_name[sp-fn] = '\0';
|
|
return 0;
|
|
}
|
|
}
|