Improve redbean method=get parameter handling

This commit is contained in:
Justine Tunney 2022-09-19 19:23:24 -07:00
parent 6e582d245b
commit 2cc1d5ac4c
No known key found for this signature in database
GPG key ID: BE714B4575D6E328
9 changed files with 175 additions and 146 deletions

View file

@ -183,7 +183,7 @@ int main(int argc, char *argv[]) {
struct Url url;
char *host, *port;
bool usessl = false;
_gc(ParseUrl(urlarg, -1, &url));
_gc(ParseUrl(urlarg, -1, &url, kUrlPlus));
_gc(url.params.p);
if (url.scheme.n) {
if (url.scheme.n == 5 && !memcasecmp(url.scheme.p, "https", 5)) {

View file

@ -28,8 +28,7 @@
struct UrlParser {
char *p, *q;
const char *s;
unsigned c, i, n;
char isform, islatin1, isopaque;
unsigned c, i, n, f;
};
static void EmitLatin1(char **p, int c) {
@ -99,7 +98,7 @@ static bool ParseScheme(struct UrlParser *u, struct Url *h) {
return false;
}
} else {
u->isopaque = true;
u->f |= kUrlOpaque;
return false;
}
} else if (u->c == '#' || u->c == '?') {
@ -110,7 +109,7 @@ static bool ParseScheme(struct UrlParser *u, struct Url *h) {
} else if (u->c == '%') {
ParseEscape(u);
return false;
} else if (u->c >= 0200 && u->islatin1) {
} else if (u->c >= 0200 && (u->f & kUrlLatin1)) {
EmitLatin1(&u->p, u->c);
return false;
} else {
@ -161,7 +160,7 @@ static void ParseAuthority(struct UrlParser *u, struct Url *h) {
u->q = u->p;
} else if (u->c == '%') {
ParseEscape(u);
} else if (u->c >= 0200 && u->islatin1) {
} else if (u->c >= 0200 && (u->f & kUrlLatin1)) {
EmitLatin1(&u->p, u->c);
} else {
*u->p++ = u->c;
@ -188,11 +187,11 @@ static void ParsePath(struct UrlParser *u, struct UrlView *h) {
u->c = u->s[u->i++] & 255;
if (u->c == '#') {
break;
} else if (u->c == '?' && !u->isopaque) {
} else if (u->c == '?' && !(u->f & kUrlOpaque)) {
break;
} else if (u->c == '%') {
ParseEscape(u);
} else if (u->c >= 0200 && u->islatin1) {
} else if (u->c >= 0200 && (u->f & kUrlLatin1)) {
EmitLatin1(&u->p, u->c);
} else {
*u->p++ = u->c;
@ -213,7 +212,7 @@ static void ParseQuery(struct UrlParser *u, struct UrlParams *h) {
} else if (u->c == '%') {
ParseEscape(u);
} else if (u->c == '+') {
*u->p++ = u->isform ? ' ' : '+';
*u->p++ = (u->f & kUrlPlus) ? ' ' : '+';
} else if (u->c == '&') {
EmitVal(u, h, t);
t = false;
@ -223,7 +222,7 @@ static void ParseQuery(struct UrlParser *u, struct UrlParams *h) {
} else {
*u->p++ = '=';
}
} else if (u->c >= 0200 && u->islatin1) {
} else if (u->c >= 0200 && (u->f & kUrlLatin1)) {
EmitLatin1(&u->p, u->c);
} else {
*u->p++ = u->c;
@ -237,7 +236,7 @@ static void ParseFragment(struct UrlParser *u, struct UrlView *h) {
u->c = u->s[u->i++] & 255;
if (u->c == '%') {
ParseEscape(u);
} else if (u->c >= 0200 && u->islatin1) {
} else if (u->c >= 0200 && (u->f & kUrlLatin1)) {
EmitLatin1(&u->p, u->c);
} else {
*u->p++ = u->c;
@ -248,28 +247,6 @@ static void ParseFragment(struct UrlParser *u, struct UrlView *h) {
u->q = u->p;
}
static char *ParseUrlImpl(const char *s, size_t n, struct Url *h, bool latin1) {
char *m;
struct UrlParser u;
if (n == -1) n = s ? strlen(s) : 0;
u.i = 0;
u.c = 0;
u.s = s;
u.n = n;
u.isform = false;
u.isopaque = false;
u.islatin1 = latin1;
bzero(h, sizeof(*h));
if ((m = malloc(latin1 ? u.n * 2 : u.n))) {
u.q = u.p = m;
if (ParseScheme(&u, h)) ParseAuthority(&u, h);
if (u.c != '#' && u.c != '?') ParsePath(&u, &h->path);
if (u.c == '?') ParseQuery(&u, &h->params);
if (u.c == '#') ParseFragment(&u, &h->fragment);
}
return m;
}
/**
* Parses URL.
*
@ -298,43 +275,39 @@ static char *ParseUrlImpl(const char *s, size_t n, struct Url *h, bool latin1) {
* @param s is value like `/hi?x=y&z` or `http://a.example/hi#x`
* @param n is byte length and -1 implies strlen
* @param h is assumed to be uninitialized
* @param f is flags which may have:
* - `FLAGS_PLUS` to turn `+` into space in query params
* - `FLAGS_LATIN1` to transcode ISO-8859-1 input into UTF-8
* @return memory backing UrlView needing free (and h.params.p too)
* @see URI Generic Syntax RFC3986 RFC2396
* @see EncodeUrl()
*/
char *ParseUrl(const char *s, size_t n, struct Url *h) {
return ParseUrlImpl(s, n, h, false);
}
/**
* Parses HTTP Request-URI.
*
* The input is ISO-8859-1 which is transcoded to UTF-8. Therefore we
* assume percent-encoded bytes are expressed as UTF-8. Returned values
* might contain things like NUL characters, C0, and C1 control codes.
* UTF-8 isn't checked for validity and may contain overlong values.
* Absent can be discerned from empty by checking if the pointer is set.
*
* There's no failure condition for this routine. This is a permissive
* parser that doesn't impose character restrictions beyond what is
* necessary for parsing. This doesn't normalize path segments like `.`
* or `..`. Use IsAcceptablePath() to check for those.
*
* @param s is value like `/hi?x=y&z` or `http://a.example/hi#x`
* @param n is byte length and -1 implies strlen
* @param h is assumed to be uninitialized
* @return memory backing UrlView needing free (and h.params.p too)
*/
char *ParseRequestUri(const char *s, size_t n, struct Url *h) {
return ParseUrlImpl(s, n, h, true);
char *ParseUrl(const char *s, size_t n, struct Url *h, int f) {
char *m;
struct UrlParser u;
if (n == -1) n = s ? strlen(s) : 0;
u.i = 0;
u.c = 0;
u.s = s;
u.n = n;
u.f = f;
bzero(h, sizeof(*h));
if ((m = malloc((f & kUrlLatin1) ? u.n * 2 : u.n))) {
u.q = u.p = m;
if (ParseScheme(&u, h)) ParseAuthority(&u, h);
if (u.c != '#' && u.c != '?') ParsePath(&u, &h->path);
if (u.c == '?') ParseQuery(&u, &h->params);
if (u.c == '#') ParseFragment(&u, &h->fragment);
}
return m;
}
/**
* Parses HTTP POST key-value params.
*
* These are similar to the parameters found in a Request-URI. The main
* difference is that `+` is translated into space here. The mime type
* for this is application/x-www-form-urlencoded.
* These are similar to the parameters found in a Request-URI, except
* usually submitted via an HTTP POST request. We translate `+` into
* space. The mime type is application/x-www-form-urlencoded.
*
* This parser is charset agnostic. Returned values might contain things
* like NUL characters, NUL, control codes, and non-canonical encodings.
@ -357,9 +330,7 @@ char *ParseParams(const char *s, size_t n, struct UrlParams *h) {
u.s = s;
u.n = n;
u.c = '?';
u.isform = true;
u.islatin1 = false;
u.isopaque = false;
u.f = kUrlPlus;
if ((m = malloc(u.n))) {
u.q = u.p = m;
ParseQuery(&u, h);
@ -399,9 +370,7 @@ char *ParseHost(const char *s, size_t n, struct Url *h) {
u.c = 0;
u.s = s;
u.n = n;
u.isform = false;
u.islatin1 = true;
u.isopaque = false;
u.f = kUrlLatin1;
if ((m = malloc(u.n * 2))) {
u.q = u.p = m;
ParseAuthority(&u, h);

View file

@ -1,5 +1,10 @@
#ifndef COSMOPOLITAN_NET_HTTP_URL_H_
#define COSMOPOLITAN_NET_HTTP_URL_H_
#define kUrlPlus 1
#define kUrlLatin1 2
#define kUrlOpaque 4
#if !(__ASSEMBLER__ + __LINKER__ + 0)
COSMOPOLITAN_C_START_
@ -28,9 +33,8 @@ struct Url {
};
char *EncodeUrl(struct Url *, size_t *);
char *ParseUrl(const char *, size_t, struct Url *);
char *ParseUrl(const char *, size_t, struct Url *, int);
char *ParseParams(const char *, size_t, struct UrlParams *);
char *ParseRequestUri(const char *, size_t, struct Url *);
char *ParseHost(const char *, size_t, struct Url *);
char *EscapeUrlView(char *, struct UrlView *, const char[256]);

View file

@ -16,6 +16,7 @@
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/fmt/internal.h"
#include "libc/limits.h"
#include "libc/mem/gc.internal.h"
#include "libc/mem/mem.h"
@ -29,7 +30,7 @@
TEST(ParseUrl, testEmpty) {
struct Url h;
gc(ParseUrl(0, 0, &h));
gc(ParseUrl(0, 0, &h, 0));
gc(h.params.p);
ASSERT_EQ(0, h.params.n);
ASSERT_STREQ("", gc(EncodeUrl(&h, 0)));
@ -37,7 +38,7 @@ TEST(ParseUrl, testEmpty) {
TEST(ParseUrl, testFragment) {
struct Url h;
gc(ParseUrl("#x", -1, &h));
gc(ParseUrl("#x", -1, &h, 0));
gc(h.params.p);
ASSERT_EQ(0, h.path.n);
ASSERT_EQ(1, h.fragment.n);
@ -47,7 +48,7 @@ TEST(ParseUrl, testFragment) {
TEST(ParseUrl, testFragmentAbsent_isNull) {
struct Url h;
gc(ParseUrl("", -1, &h));
gc(ParseUrl("", -1, &h, 0));
gc(h.params.p);
ASSERT_EQ(0, h.fragment.p);
ASSERT_EQ(0, h.fragment.n);
@ -56,7 +57,7 @@ TEST(ParseUrl, testFragmentAbsent_isNull) {
TEST(ParseUrl, testFragmentEmpty_isNonNull) {
struct Url h;
gc(ParseUrl("#", -1, &h)); /* python's uri parser is wrong here */
gc(ParseUrl("#", -1, &h, 0)); /* python's uri parser is wrong here */
gc(h.params.p);
ASSERT_NE(0, h.fragment.p);
ASSERT_EQ(0, h.fragment.n);
@ -65,7 +66,7 @@ TEST(ParseUrl, testFragmentEmpty_isNonNull) {
TEST(ParseUrl, testPathFragment) {
struct Url h;
gc(ParseUrl("x#y", -1, &h));
gc(ParseUrl("x#y", -1, &h, 0));
gc(h.params.p);
ASSERT_EQ(1, h.path.n);
ASSERT_EQ('x', h.path.p[0]);
@ -76,7 +77,7 @@ TEST(ParseUrl, testPathFragment) {
TEST(ParseUrl, testAbsolutePath) {
struct Url h;
gc(ParseUrl("/x/y", -1, &h));
gc(ParseUrl("/x/y", -1, &h, 0));
gc(h.params.p);
ASSERT_EQ(4, h.path.n);
ASSERT_BINEQ(u"/x/y", h.path.p);
@ -85,7 +86,7 @@ TEST(ParseUrl, testAbsolutePath) {
TEST(ParseUrl, testRelativePath1) {
struct Url h;
gc(ParseUrl("x", -1, &h));
gc(ParseUrl("x", -1, &h, 0));
gc(h.params.p);
ASSERT_EQ(1, h.path.n);
ASSERT_EQ('x', h.path.p[0]);
@ -94,7 +95,7 @@ TEST(ParseUrl, testRelativePath1) {
TEST(ParseUrl, testOptions) {
struct Url h;
gc(ParseUrl("*", -1, &h));
gc(ParseUrl("*", -1, &h, 0));
gc(h.params.p);
ASSERT_EQ(1, h.path.n);
ASSERT_EQ('*', h.path.p[0]);
@ -103,7 +104,7 @@ TEST(ParseUrl, testOptions) {
TEST(ParseUrl, testRelativePath2) {
struct Url h;
gc(ParseUrl("x/y", -1, &h));
gc(ParseUrl("x/y", -1, &h, 0));
gc(h.params.p);
ASSERT_EQ(3, h.path.n);
ASSERT_BINEQ(u"x/y", h.path.p);
@ -112,7 +113,7 @@ TEST(ParseUrl, testRelativePath2) {
TEST(ParseUrl, testRoot) {
struct Url h;
gc(ParseUrl("/", -1, &h));
gc(ParseUrl("/", -1, &h, 0));
gc(h.params.p);
ASSERT_EQ(1, h.path.n);
ASSERT_EQ('/', h.path.p[0]);
@ -121,7 +122,7 @@ TEST(ParseUrl, testRoot) {
TEST(ParseUrl, testSchemePath) {
struct Url h;
gc(ParseUrl("x:y", -1, &h));
gc(ParseUrl("x:y", -1, &h, 0));
gc(h.params.p);
ASSERT_EQ(1, h.scheme.n);
ASSERT_BINEQ(u"x", h.scheme.p);
@ -132,7 +133,7 @@ TEST(ParseUrl, testSchemePath) {
TEST(ParseUrl, testSchemeAuthority) {
struct Url h;
gc(ParseUrl("x://y", -1, &h));
gc(ParseUrl("x://y", -1, &h, 0));
gc(h.params.p);
ASSERT_EQ(1, h.scheme.n);
ASSERT_EQ('x', h.scheme.p[0]);
@ -141,9 +142,37 @@ TEST(ParseUrl, testSchemeAuthority) {
ASSERT_STREQ("x://y", gc(EncodeUrl(&h, 0)));
}
TEST(ParseUrl, testParamsPlus_maybeYes) {
struct Url h;
gc(ParseUrl("x?q=hi+there", -1, &h, kUrlPlus));
gc(h.params.p);
ASSERT_EQ(1, h.path.n);
ASSERT_BINEQ(u"x", h.path.p);
ASSERT_EQ(1, h.params.n);
ASSERT_EQ(1, h.params.p[0].key.n);
ASSERT_EQ(8, h.params.p[0].val.n);
ASSERT_BINEQ(u"q", h.params.p[0].key.p);
ASSERT_BINEQ(u"hi there", h.params.p[0].val.p);
ASSERT_STREQ("x?q=hi%20there", gc(EncodeUrl(&h, 0)));
}
TEST(ParseUrl, testParamsPlus_maybeNot) {
struct Url h;
gc(ParseUrl("x?q=hi+there", -1, &h, 0));
gc(h.params.p);
ASSERT_EQ(1, h.path.n);
ASSERT_BINEQ(u"x", h.path.p);
ASSERT_EQ(1, h.params.n);
ASSERT_EQ(1, h.params.p[0].key.n);
ASSERT_EQ(8, h.params.p[0].val.n);
ASSERT_BINEQ(u"q", h.params.p[0].key.p);
ASSERT_BINEQ(u"hi+there", h.params.p[0].val.p);
ASSERT_STREQ("x?q=hi%2Bthere", gc(EncodeUrl(&h, 0)));
}
TEST(ParseUrl, testParamsQuestion_doesntTurnIntoSpace) {
struct Url h;
gc(ParseUrl("x?+", -1, &h));
gc(ParseUrl("x?+", -1, &h, 0));
gc(h.params.p);
ASSERT_EQ(1, h.path.n);
ASSERT_BINEQ(u"x", h.path.p);
@ -155,7 +184,7 @@ TEST(ParseUrl, testParamsQuestion_doesntTurnIntoSpace) {
TEST(ParseUrl, testUrl) {
struct Url h;
gc(ParseUrl("a://b:B@c:C/d?e#f", -1, &h));
gc(ParseUrl("a://b:B@c:C/d?e#f", -1, &h, 0));
gc(h.params.p);
ASSERT_EQ(1, h.scheme.n);
ASSERT_EQ('a', h.scheme.p[0]);
@ -180,7 +209,7 @@ TEST(ParseUrl, testUrl) {
TEST(ParseUrl, testEmptyQueryKeyVal_decodesToEmptyStrings) {
struct Url h;
gc(ParseUrl("?=", -1, &h));
gc(ParseUrl("?=", -1, &h, 0));
gc(h.params.p);
ASSERT_EQ(1, h.params.n);
ASSERT_EQ(0, h.params.p[0].key.n);
@ -192,7 +221,7 @@ TEST(ParseUrl, testEmptyQueryKeyVal_decodesToEmptyStrings) {
TEST(ParseUrl, testMultipleEquals_goesIntoValue) {
struct Url h;
gc(ParseUrl("?==", -1, &h));
gc(ParseUrl("?==", -1, &h, 0));
gc(h.params.p);
ASSERT_EQ(1, h.params.n);
ASSERT_EQ(0, h.params.p[0].key.n);
@ -204,7 +233,7 @@ TEST(ParseUrl, testMultipleEquals_goesIntoValue) {
TEST(ParseUrl, testUrlWithoutScheme) {
struct Url h;
gc(ParseUrl("//b@c/d?e#f", -1, &h));
gc(ParseUrl("//b@c/d?e#f", -1, &h, 0));
gc(h.params.p);
ASSERT_EQ(0, h.scheme.n);
ASSERT_EQ(1, h.user.n);
@ -225,7 +254,7 @@ TEST(ParseUrl, testUrlWithoutScheme) {
TEST(ParseUrl, testUrlWithoutUser) {
struct Url h;
gc(ParseUrl("a://c/d?e#f", -1, &h));
gc(ParseUrl("a://c/d?e#f", -1, &h, 0));
gc(h.params.p);
ASSERT_EQ(1, h.scheme.n);
ASSERT_EQ('a', h.scheme.p[0]);
@ -248,11 +277,11 @@ TEST(ParseUrl, testUrlWithoutUser) {
TEST(ParseUrl, testEmptyParams_absentCanBeDiscerned) {
struct Url h;
gc(ParseUrl("", -1, &h));
gc(ParseUrl("", -1, &h, 0));
gc(h.params.p);
ASSERT_EQ(0, h.params.n);
ASSERT_EQ(NULL, h.params.p);
gc(ParseUrl("?", -1, &h)); /* python's uri parser is wrong here */
gc(ParseUrl("?", -1, &h, 0)); /* python's uri parser is wrong here */
gc(h.params.p);
ASSERT_EQ(0, h.params.n);
ASSERT_NE(NULL, h.params.p);
@ -260,7 +289,7 @@ TEST(ParseUrl, testEmptyParams_absentCanBeDiscerned) {
TEST(ParseUrl, testWeirdAmps_areReproducible) {
struct Url h;
gc(ParseUrl("?&&", -1, &h));
gc(ParseUrl("?&&", -1, &h, 0));
gc(h.params.p);
ASSERT_EQ(3, h.params.n);
ASSERT_EQ(0, h.params.p[0].key.n);
@ -280,7 +309,7 @@ TEST(ParseUrl, testWeirdAmps_areReproducible) {
TEST(ParseUrl, testOpaquePart_canLetQuestionMarkGoInPath) {
struct Url h; /* python's uri parser is wrong here */
gc(ParseUrl("s:o!$%&'()*+,-./09:;=?@AZ_az#fragged", -1, &h));
gc(ParseUrl("s:o!$%&'()*+,-./09:;=?@AZ_az#fragged", -1, &h, 0));
gc(h.params.p);
ASSERT_EQ(26, h.path.n);
ASSERT_EQ(0, memcmp(h.path.p, "o!$%&'()*+,-./09:;=?@AZ_az", 26));
@ -292,7 +321,7 @@ TEST(ParseUrl, testOpaquePart_canLetQuestionMarkGoInPath) {
TEST(ParseUrl, testSchemePathWithoutAuthority_paramsAreAllowed) {
struct Url h;
gc(ParseUrl("s:/o!$%&'()*+,-./09:;=?@AZ_az#fragged", -1, &h));
gc(ParseUrl("s:/o!$%&'()*+,-./09:;=?@AZ_az#fragged", -1, &h, 0));
gc(h.params.p);
ASSERT_EQ(20, h.path.n);
ASSERT_EQ(0, memcmp(h.path.p, "/o!$%&'()*+,-./09:;=", 20));
@ -303,7 +332,7 @@ TEST(ParseUrl, testSchemePathWithoutAuthority_paramsAreAllowed) {
TEST(ParseUrl, testOpaquePart_permitsPercentEncoding) {
struct Url h;
gc(ParseUrl("s:%2Fo!$%&'()*+,-./09:;=?@AZ_az#fragged", -1, &h));
gc(ParseUrl("s:%2Fo!$%&'()*+,-./09:;=?@AZ_az#fragged", -1, &h, 0));
gc(h.params.p);
ASSERT_EQ(27, h.path.n);
ASSERT_EQ(0, memcmp(h.path.p, "/o!$%&'()*+,-./09:;=?@AZ_az", 27));
@ -314,7 +343,7 @@ TEST(ParseUrl, testOpaquePart_permitsPercentEncoding) {
TEST(ParseUrl, testTelephone) {
struct Url h;
gc(ParseUrl("tel:+1-212-867-5309", -1, &h));
gc(ParseUrl("tel:+1-212-867-5309", -1, &h, 0));
gc(h.params.p);
ASSERT_EQ(15, h.path.n);
ASSERT_BINEQ(u"+1-212-867-5309", h.path.p);
@ -323,7 +352,7 @@ TEST(ParseUrl, testTelephone) {
TEST(ParseUrl, testLolv6) {
struct Url h;
gc(ParseUrl("//[::1]:31337", -1, &h));
gc(ParseUrl("//[::1]:31337", -1, &h, 0));
gc(h.params.p);
ASSERT_EQ(3, h.host.n);
ASSERT_BINEQ(u"::1", h.host.p);
@ -334,14 +363,14 @@ TEST(ParseUrl, testLolv6) {
TEST(ParseUrl, testLolV6_withoutPort) {
struct Url h;
gc(ParseUrl("//[::1]", -1, &h));
gc(ParseUrl("//[::1]", -1, &h, 0));
gc(h.params.p);
ASSERT_STREQ("//[::1]", gc(EncodeUrl(&h, 0)));
}
TEST(ParseUrl, testLolv7) {
struct Url h;
gc(ParseUrl("//[vf.::1]", -1, &h));
gc(ParseUrl("//[vf.::1]", -1, &h, 0));
gc(h.params.p);
ASSERT_EQ(6, h.host.n);
ASSERT_BINEQ(u"vf.::1", h.host.p);
@ -352,14 +381,14 @@ TEST(ParseUrl, testLolv7) {
TEST(ParseUrl, testLolv7WithoutColon_weCantProduceLegalEncodingSadly) {
struct Url h;
gc(ParseUrl("//[v7.7.7.7]", -1, &h));
gc(ParseUrl("//[v7.7.7.7]", -1, &h, 0));
gc(h.params.p);
ASSERT_STREQ("//v7.7.7.7", gc(EncodeUrl(&h, 0)));
}
TEST(ParseUrl, testObviouslyIllegalIpLiteral_getsTreatedAsRegName) {
struct Url h;
gc(ParseUrl("//[vf.::1%00]", -1, &h));
gc(ParseUrl("//[vf.::1%00]", -1, &h, 0));
gc(h.params.p);
ASSERT_STREQ("//vf.%3A%3A1%00", gc(EncodeUrl(&h, 0)));
}
@ -411,7 +440,7 @@ TEST(EncodeUrl, testHostPortPlacedInHostField_ungoodIdea) {
TEST(ParseUrl, testUrlWithoutParams) {
struct Url h;
gc(ParseUrl("a://b@c/d#f", -1, &h));
gc(ParseUrl("a://b@c/d#f", -1, &h, 0));
gc(h.params.p);
ASSERT_EQ(1, h.scheme.n);
ASSERT_EQ('a', h.scheme.p[0]);
@ -430,7 +459,7 @@ TEST(ParseUrl, testUrlWithoutParams) {
TEST(ParseUrl, testLatin1_doesNothing) {
struct Url h;
const char b[1] = {0377};
gc(ParseUrl(b, 1, &h));
gc(ParseUrl(b, 1, &h, 0));
gc(h.params.p);
ASSERT_EQ(1, h.path.n);
ASSERT_EQ(0, memcmp("\377", h.path.p, 1));
@ -440,7 +469,7 @@ TEST(ParseUrl, testLatin1_doesNothing) {
TEST(ParseRequestUri, testLatin1_expandsMemoryToUtf8) {
struct Url h;
const char b[1] = {0377};
gc(ParseRequestUri(b, 1, &h));
gc(ParseUrl(b, 1, &h, kUrlPlus | kUrlLatin1));
gc(h.params.p);
ASSERT_EQ(2, h.path.n);
ASSERT_EQ(0, memcmp("\303\277", h.path.p, 2));
@ -448,7 +477,7 @@ TEST(ParseRequestUri, testLatin1_expandsMemoryToUtf8) {
TEST(ParseUrl, testPercentShrinkingMemory) {
struct Url h;
gc(ParseUrl("%Ff", 3, &h));
gc(ParseUrl("%Ff", 3, &h, 0));
gc(h.params.p);
ASSERT_EQ(1, h.path.n);
ASSERT_EQ(0, memcmp("\377", h.path.p, 1));
@ -458,7 +487,7 @@ TEST(ParseUrl, testPercentShrinkingMemory) {
TEST(ParseUrl, testEscapingWontOverrun) {
struct Url h;
char b[1] = {'%'};
gc(ParseUrl(b, 1, &h));
gc(ParseUrl(b, 1, &h, 0));
gc(h.params.p);
ASSERT_EQ(1, h.path.n);
ASSERT_EQ(0, memcmp("%", h.path.p, 1));
@ -467,7 +496,7 @@ TEST(ParseUrl, testEscapingWontOverrun) {
TEST(ParseUrl, testBadPercent_getsIgnored) {
struct Url h;
gc(ParseUrl("%FZ", 3, &h));
gc(ParseUrl("%FZ", 3, &h, 0));
gc(h.params.p);
ASSERT_EQ(3, h.path.n);
ASSERT_EQ(0, memcmp("%FZ", h.path.p, 3));
@ -475,7 +504,7 @@ TEST(ParseUrl, testBadPercent_getsIgnored) {
TEST(ParseUrl, testFileUrl) {
struct Url h;
gc(ParseUrl("file:///etc/passwd", -1, &h));
gc(ParseUrl("file:///etc/passwd", -1, &h, 0));
gc(h.params.p);
ASSERT_EQ(4, h.scheme.n);
ASSERT_BINEQ(u"file", h.scheme.p);
@ -491,7 +520,7 @@ TEST(ParseUrl, testFileUrl) {
TEST(EncodeUrl, testModifyingParseResultAndReencoding_addsStructure) {
size_t n;
struct Url h;
gc(ParseUrl("rel", -1, &h));
gc(ParseUrl("rel", -1, &h, 0));
gc(h.params.p);
h.host.n = 7;
h.host.p = "justine";
@ -580,14 +609,14 @@ TEST(EncodeUrl, testEmptyRegName_isLegal) {
TEST(ParseUrl, testEmptyScheme_isNotPossible) {
struct Url h;
gc(ParseUrl(":", -1, &h));
gc(ParseUrl(":", -1, &h, 0));
gc(h.params.p);
ASSERT_EQ(0, h.scheme.n);
ASSERT_EQ(0, h.scheme.p);
ASSERT_EQ(1, h.path.n);
ASSERT_EQ(':', h.path.p[0]);
ASSERT_STREQ(":", gc(EncodeUrl(&h, 0)));
gc(ParseUrl("://hi", -1, &h));
gc(ParseUrl("://hi", -1, &h, 0));
gc(h.params.p);
ASSERT_EQ(0, h.scheme.n);
ASSERT_EQ(0, h.scheme.p);
@ -598,7 +627,7 @@ TEST(ParseUrl, testEmptyScheme_isNotPossible) {
TEST(ParseUrl, testDataUri) {
struct Url h;
gc(ParseUrl("", -1, &h));
gc(ParseUrl("", -1, &h, 0));
gc(h.params.p);
ASSERT_EQ(0, h.host.n);
ASSERT_EQ(0, h.host.p);
@ -611,7 +640,7 @@ TEST(ParseUrl, testDataUri) {
TEST(ParseUrl, testBadSchemeCharacter_parserAssumesItsPath) {
struct Url h;
gc(ParseUrl("fil\e://hi", -1, &h));
gc(ParseUrl("fil\e://hi", -1, &h, 0));
gc(h.params.p);
ASSERT_EQ(0, h.scheme.n);
ASSERT_EQ(0, h.scheme.p);
@ -673,7 +702,7 @@ TEST(ParseRequestUri, fuzz) {
for (j = 0; j < sizeof(B); ++j) {
B[j] = C[rand() % sizeof(C)];
}
free(ParseRequestUri(B, 8, &h));
free(ParseUrl(B, 8, &h, kUrlPlus | kUrlLatin1));
free(h.params.p);
}
}
@ -687,11 +716,11 @@ void A(void) {
BENCH(ParseUrl, bench) {
struct Url h;
EZBENCH2("ParseParams hyperion", donothing, A());
EZBENCH2("ParseUrl a", donothing, free(ParseUrl("a", -1, &h)));
EZBENCH2("ParseUrl a", donothing, free(ParseUrl("a", -1, &h, false)));
EZBENCH2("ParseUrl a://b@c/d#f", donothing,
free(ParseUrl("a://b@c/d#f", -1, &h)));
free(ParseUrl("a://b@c/d#f", -1, &h, false)));
EZBENCH2("ParseUrl a://b@c/d?z#f", donothing, ({
free(ParseUrl("a://b@c/?zd#f", -1, &h));
free(ParseUrl("a://b@c/?zd#f", -1, &h, 0));
free(h.params.p);
}));
EZBENCH2("ParseHost", donothing, free(ParseHost("127.0.0.1:34832", 15, &h)));
@ -700,14 +729,14 @@ BENCH(ParseUrl, bench) {
BENCH(EncodeUrl, bench) {
struct Url h;
gc(ParseUrl("a", -1, &h));
gc(ParseUrl("a", -1, &h, 0));
EZBENCH2("EncodeUrl a", donothing, free(EncodeUrl(&h, 0)));
gc(ParseUrl("a://b@c/d#f", -1, &h));
gc(ParseUrl("a://b@c/d#f", -1, &h, 0));
EZBENCH2("EncodeUrl a://b@c/d#f", donothing, free(EncodeUrl(&h, 0)));
gc(ParseUrl("a://b@c/?zd#f", -1, &h));
gc(ParseUrl("a://b@c/?zd#f", -1, &h, 0));
gc(h.params.p);
EZBENCH2("EncodeUrl a://b@c/d?z#f", donothing, free(EncodeUrl(&h, 0)));
gc(ParseUrl(kHyperion, kHyperionSize, &h));
gc(ParseUrl(kHyperion, kHyperionSize, &h, 0));
gc(h.params.p);
EZBENCH2("EncodeUrl hyperion", donothing, free(EncodeUrl(&h, 0)));
}

View file

@ -36,12 +36,14 @@ static void LuaSetUrlView(lua_State *L, struct UrlView *v, const char *k) {
}
int LuaParseUrl(lua_State *L) {
int f;
void *m;
size_t n;
struct Url h;
const char *p;
p = luaL_checklstring(L, 1, &n);
m = ParseUrl(p, n, &h);
f = luaL_optinteger(L, 2, 0);
m = ParseUrl(p, n, &h, f);
lua_newtable(L);
LuaSetUrlView(L, &h.scheme, "scheme");
LuaSetUrlView(L, &h.user, "user");

View file

@ -107,7 +107,7 @@ static int LuaFetch(lua_State *L) {
/*
* Parse URL.
*/
_gc(ParseUrl(urlarg, urlarglen, &url));
_gc(ParseUrl(urlarg, urlarglen, &url, true));
_gc(url.params.p);
usingssl = false;
if (url.scheme.n) {

View file

@ -1335,28 +1335,50 @@ FUNCTIONS
Converts RFC1123 string that looks like this: Mon, 29 Mar 2021
15:37:13 GMT to a UNIX timestamp. See parsehttpdatetime.c.
ParseUrl(str) → URL
Parses URL, returning object having the following fields: scheme,
user, pass, host, port, path, params, fragment. This parser is
charset agnostic. Percent encoded bytes are decoded for all
fields. Returned values might contain things like NUL characters,
spaces, control codes, and non-canonical encodings. Absent can be
discerned from empty by checking if the pointer is set. There's no
failure condition for this routine. This is a permissive parser.
This doesn't normalize path segments like `.` or `..` so use
IsAcceptablePath() to check for those. No restrictions are imposed
beyond that which is strictly necessary for parsing. All the data
that is provided will be consumed to the one of the fields. Strict
conformance is enforced on some fields more than others, like
scheme, since it's the most non-deterministically defined field of
them all. Please note this is a URL parser, not a URI parser.
Which means we support everything everything the URI spec says we
should do except for the things we won't do, like tokenizing path
segments into an array and then nesting another array beneath each
of those for storing semicolon parameters. So this parser won't
make SIP easy. What it can do is parse HTTP URLs and most URIs
like data:opaque, better in fact than most things which claim to
be URI parsers.
ParseUrl(url:str[, flags:int]) → URL
Parses URL.
An object containing the following fields is returned:
- `scheme` is a string, e.g. `"http"`
- `user` is the username string, or nil if absent
- `pass` is the password string, or nil if absent
- `host` is the hostname string, or nil if `url` was a path
- `port` is the port string, or nil if absent
- `path` is the path string, or nil if absent
- `params` is the URL paramaters, e.g. `/?a=b&c` would be
represented as the data structure `{{"a", "b"}, {"c"}, ...}`
- `fragment` is the stuff after the `#` character
`flags` may have:
- `kUrlPlus` to turn `+` into space
- `kUrlLatin1` to transcode ISO-8859-1 input into UTF-8
This parser is charset agnostic. Percent encoded bytes are
decoded for all fields. Returned values might contain things
like NUL characters, spaces, control codes, and non-canonical
encodings. Absent can be discerned from empty by checking if
the pointer is set.
There's no failure condition for this routine. This is a
permissive parser. This doesn't normalize path segments like
`.` or `..` so use IsAcceptablePath() to check for those. No
restrictions are imposed beyond that which is strictly
necessary for parsing. All the data that is provided will be
consumed to the one of the fields. Strict conformance is
enforced on some fields more than others, like scheme, since
it's the most non-deterministically defined field of them all.
Please note this is a URL parser, not a URI parser. Which
means we support everything everything the URI spec says we
should do except for the things we won't do, like tokenizing
path segments into an array and then nesting another array
beneath each of those for storing semicolon parameters. So
this parser won't make SIP easy. What it can do is parse HTTP
URLs and most URIs like data:opaque, better in fact than most
things which claim to be URI parsers.
IsAcceptablePath(str) → bool
Returns true if path doesn't contain ".", ".." or "//" segments

View file

@ -99,6 +99,7 @@
#include "net/http/escape.h"
#include "net/http/http.h"
#include "net/http/ip.h"
#include "net/http/url.h"
#include "net/https/https.h"
#include "third_party/getopt/getopt.h"
#include "third_party/lua/cosmo.h"
@ -5123,6 +5124,8 @@ static void LuaStart(void) {
LuaSetConstant(L, "kLogWarn", kLogWarn);
LuaSetConstant(L, "kLogError", kLogError);
LuaSetConstant(L, "kLogFatal", kLogFatal);
LuaSetConstant(L, "kUrlPlus", kUrlPlus);
LuaSetConstant(L, "kUrlLatin1", kUrlLatin1);
// create a list of custom content types
lua_pushlightuserdata(L, (void *)&ctIdx); // push address as unique key
lua_newtable(L);
@ -5673,8 +5676,8 @@ static char *SynchronizeStream(void) {
static void ParseRequestParameters(void) {
uint32_t ip;
FreeLater(ParseRequestUri(inbuf.p + cpm.msg.uri.a,
cpm.msg.uri.b - cpm.msg.uri.a, &url));
FreeLater(ParseUrl(inbuf.p + cpm.msg.uri.a, cpm.msg.uri.b - cpm.msg.uri.a,
&url, kUrlPlus | kUrlLatin1));
if (!url.host.p) {
if (HasHeader(kHttpXForwardedHost) && //
!GetRemoteAddr(&ip, 0) && IsTrustedProxy(ip)) {

View file

@ -401,7 +401,7 @@ int main(int argc, char *argv[]) {
/*
* Parse URL.
*/
_gc(ParseUrl(urlarg, -1, &url));
_gc(ParseUrl(urlarg, -1, &url, kUrlPlus));
_gc(url.params.p);
usessl = false;
if (url.scheme.n) {