Improve redbean method=get parameter handling

2024-05-18 03:22:40 +00:00 · 2022-09-19 19:23:24 -07:00 · 2022-09-19 19:23:24 -07:00 · 2cc1d5ac4c
parent 6e582d245b
commit 2cc1d5ac4c
9 changed files with 175 additions and 146 deletions
--- a/examples/curl.c
+++ b/examples/curl.c
@ -183,7 +183,7 @@ int main(int argc, char *argv[]) {
  struct Url url;
  char *host, *port;
  bool usessl = false;
-  _gc(ParseUrl(urlarg, -1, &url));
+  _gc(ParseUrl(urlarg, -1, &url, kUrlPlus));
  _gc(url.params.p);
  if (url.scheme.n) {
    if (url.scheme.n == 5 && !memcasecmp(url.scheme.p, "https", 5)) {
--- a/net/http/parseurl.c
+++ b/net/http/parseurl.c
@ -28,8 +28,7 @@
 struct UrlParser {
  char *p, *q;
  const char *s;
-  unsigned c, i, n;
-  char isform, islatin1, isopaque;
+  unsigned c, i, n, f;
 };

 static void EmitLatin1(char **p, int c) {
@ -99,7 +98,7 @@ static bool ParseScheme(struct UrlParser *u, struct Url *h) {
          return false;
        }
      } else {
-        u->isopaque = true;
+        u->f |= kUrlOpaque;
        return false;
      }
    } else if (u->c == '#' || u->c == '?') {
@ -110,7 +109,7 @@ static bool ParseScheme(struct UrlParser *u, struct Url *h) {
    } else if (u->c == '%') {
      ParseEscape(u);
      return false;
-    } else if (u->c >= 0200 && u->islatin1) {
+    } else if (u->c >= 0200 && (u->f & kUrlLatin1)) {
      EmitLatin1(&u->p, u->c);
      return false;
    } else {
@ -161,7 +160,7 @@ static void ParseAuthority(struct UrlParser *u, struct Url *h) {
      u->q = u->p;
    } else if (u->c == '%') {
      ParseEscape(u);
-    } else if (u->c >= 0200 && u->islatin1) {
+    } else if (u->c >= 0200 && (u->f & kUrlLatin1)) {
      EmitLatin1(&u->p, u->c);
    } else {
      *u->p++ = u->c;
@ -188,11 +187,11 @@ static void ParsePath(struct UrlParser *u, struct UrlView *h) {
    u->c = u->s[u->i++] & 255;
    if (u->c == '#') {
      break;
-    } else if (u->c == '?' && !u->isopaque) {
+    } else if (u->c == '?' && !(u->f & kUrlOpaque)) {
      break;
    } else if (u->c == '%') {
      ParseEscape(u);
-    } else if (u->c >= 0200 && u->islatin1) {
+    } else if (u->c >= 0200 && (u->f & kUrlLatin1)) {
      EmitLatin1(&u->p, u->c);
    } else {
      *u->p++ = u->c;
@ -213,7 +212,7 @@ static void ParseQuery(struct UrlParser *u, struct UrlParams *h) {
    } else if (u->c == '%') {
      ParseEscape(u);
    } else if (u->c == '+') {
-      *u->p++ = u->isform ? ' ' : '+';
+      *u->p++ = (u->f & kUrlPlus) ? ' ' : '+';
    } else if (u->c == '&') {
      EmitVal(u, h, t);
      t = false;
@ -223,7 +222,7 @@ static void ParseQuery(struct UrlParser *u, struct UrlParams *h) {
      } else {
        *u->p++ = '=';
      }
-    } else if (u->c >= 0200 && u->islatin1) {
+    } else if (u->c >= 0200 && (u->f & kUrlLatin1)) {
      EmitLatin1(&u->p, u->c);
    } else {
      *u->p++ = u->c;
@ -237,7 +236,7 @@ static void ParseFragment(struct UrlParser *u, struct UrlView *h) {
    u->c = u->s[u->i++] & 255;
    if (u->c == '%') {
      ParseEscape(u);
-    } else if (u->c >= 0200 && u->islatin1) {
+    } else if (u->c >= 0200 && (u->f & kUrlLatin1)) {
      EmitLatin1(&u->p, u->c);
    } else {
      *u->p++ = u->c;
@ -248,28 +247,6 @@ static void ParseFragment(struct UrlParser *u, struct UrlView *h) {
  u->q = u->p;
 }

-static char *ParseUrlImpl(const char *s, size_t n, struct Url *h, bool latin1) {
-  char *m;
-  struct UrlParser u;
-  if (n == -1) n = s ? strlen(s) : 0;
-  u.i = 0;
-  u.c = 0;
-  u.s = s;
-  u.n = n;
-  u.isform = false;
-  u.isopaque = false;
-  u.islatin1 = latin1;
-  bzero(h, sizeof(*h));
-  if ((m = malloc(latin1 ? u.n * 2 : u.n))) {
-    u.q = u.p = m;
-    if (ParseScheme(&u, h)) ParseAuthority(&u, h);
-    if (u.c != '#' && u.c != '?') ParsePath(&u, &h->path);
-    if (u.c == '?') ParseQuery(&u, &h->params);
-    if (u.c == '#') ParseFragment(&u, &h->fragment);
-  }
-  return m;
-}
-
 /**
 * Parses URL.
 *
@ -298,43 +275,39 @@ static char *ParseUrlImpl(const char *s, size_t n, struct Url *h, bool latin1) {
 * @param s is value like `/hi?x=y&z` or `http://a.example/hi#x`
 * @param n is byte length and -1 implies strlen
 * @param h is assumed to be uninitialized
+ * @param f is flags which may have:
+ *     - `FLAGS_PLUS` to turn `+` into space in query params
+ *     - `FLAGS_LATIN1` to transcode ISO-8859-1 input into UTF-8
 * @return memory backing UrlView needing free (and h.params.p too)
 * @see URI Generic Syntax RFC3986 RFC2396
 * @see EncodeUrl()
 */
-char *ParseUrl(const char *s, size_t n, struct Url *h) {
-  return ParseUrlImpl(s, n, h, false);
-}
-
-/**
- * Parses HTTP Request-URI.
- *
- * The input is ISO-8859-1 which is transcoded to UTF-8. Therefore we
- * assume percent-encoded bytes are expressed as UTF-8. Returned values
- * might contain things like NUL characters, C0, and C1 control codes.
- * UTF-8 isn't checked for validity and may contain overlong values.
- * Absent can be discerned from empty by checking if the pointer is set.
- *
- * There's no failure condition for this routine. This is a permissive
- * parser that doesn't impose character restrictions beyond what is
- * necessary for parsing. This doesn't normalize path segments like `.`
- * or `..`. Use IsAcceptablePath() to check for those.
- *
- * @param s is value like `/hi?x=y&z` or `http://a.example/hi#x`
- * @param n is byte length and -1 implies strlen
- * @param h is assumed to be uninitialized
- * @return memory backing UrlView needing free (and h.params.p too)
- */
-char *ParseRequestUri(const char *s, size_t n, struct Url *h) {
-  return ParseUrlImpl(s, n, h, true);
+char *ParseUrl(const char *s, size_t n, struct Url *h, int f) {
+  char *m;
+  struct UrlParser u;
+  if (n == -1) n = s ? strlen(s) : 0;
+  u.i = 0;
+  u.c = 0;
+  u.s = s;
+  u.n = n;
+  u.f = f;
+  bzero(h, sizeof(*h));
+  if ((m = malloc((f & kUrlLatin1) ? u.n * 2 : u.n))) {
+    u.q = u.p = m;
+    if (ParseScheme(&u, h)) ParseAuthority(&u, h);
+    if (u.c != '#' && u.c != '?') ParsePath(&u, &h->path);
+    if (u.c == '?') ParseQuery(&u, &h->params);
+    if (u.c == '#') ParseFragment(&u, &h->fragment);
+  }
+  return m;
 }

 /**
 * Parses HTTP POST key-value params.
 *
- * These are similar to the parameters found in a Request-URI. The main
- * difference is that `+` is translated into space here. The mime type
- * for this is application/x-www-form-urlencoded.
+ * These are similar to the parameters found in a Request-URI, except
+ * usually submitted via an HTTP POST request. We translate `+` into
+ * space. The mime type is application/x-www-form-urlencoded.
 *
 * This parser is charset agnostic. Returned values might contain things
 * like NUL characters, NUL, control codes, and non-canonical encodings.
@ -357,9 +330,7 @@ char *ParseParams(const char *s, size_t n, struct UrlParams *h) {
  u.s = s;
  u.n = n;
  u.c = '?';
-  u.isform = true;
-  u.islatin1 = false;
-  u.isopaque = false;
+  u.f = kUrlPlus;
  if ((m = malloc(u.n))) {
    u.q = u.p = m;
    ParseQuery(&u, h);
@ -399,9 +370,7 @@ char *ParseHost(const char *s, size_t n, struct Url *h) {
  u.c = 0;
  u.s = s;
  u.n = n;
-  u.isform = false;
-  u.islatin1 = true;
-  u.isopaque = false;
+  u.f = kUrlLatin1;
  if ((m = malloc(u.n * 2))) {
    u.q = u.p = m;
    ParseAuthority(&u, h);
--- a/net/http/url.h
+++ b/net/http/url.h
@ -1,5 +1,10 @@
 #ifndef COSMOPOLITAN_NET_HTTP_URL_H_
 #define COSMOPOLITAN_NET_HTTP_URL_H_
+
+#define kUrlPlus   1
+#define kUrlLatin1 2
+#define kUrlOpaque 4
+
 #if !(__ASSEMBLER__ + __LINKER__ + 0)
 COSMOPOLITAN_C_START_

@ -28,9 +33,8 @@ struct Url {
 };

 char *EncodeUrl(struct Url *, size_t *);
-char *ParseUrl(const char *, size_t, struct Url *);
+char *ParseUrl(const char *, size_t, struct Url *, int);
 char *ParseParams(const char *, size_t, struct UrlParams *);
-char *ParseRequestUri(const char *, size_t, struct Url *);
 char *ParseHost(const char *, size_t, struct Url *);
 char *EscapeUrlView(char *, struct UrlView *, const char[256]);

--- a/test/net/http/parseurl_test.c
+++ b/test/net/http/parseurl_test.c
@ -16,6 +16,7 @@
 │ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR             │
 │ PERFORMANCE OF THIS SOFTWARE.                                                │
 ╚─────────────────────────────────────────────────────────────────────────────*/
+#include "libc/fmt/internal.h"
 #include "libc/limits.h"
 #include "libc/mem/gc.internal.h"
 #include "libc/mem/mem.h"
@ -29,7 +30,7 @@

 TEST(ParseUrl, testEmpty) {
  struct Url h;
-  gc(ParseUrl(0, 0, &h));
+  gc(ParseUrl(0, 0, &h, 0));
  gc(h.params.p);
  ASSERT_EQ(0, h.params.n);
  ASSERT_STREQ("", gc(EncodeUrl(&h, 0)));
@ -37,7 +38,7 @@ TEST(ParseUrl, testEmpty) {

 TEST(ParseUrl, testFragment) {
  struct Url h;
-  gc(ParseUrl("#x", -1, &h));
+  gc(ParseUrl("#x", -1, &h, 0));
  gc(h.params.p);
  ASSERT_EQ(0, h.path.n);
  ASSERT_EQ(1, h.fragment.n);
@ -47,7 +48,7 @@ TEST(ParseUrl, testFragment) {

 TEST(ParseUrl, testFragmentAbsent_isNull) {
  struct Url h;
-  gc(ParseUrl("", -1, &h));
+  gc(ParseUrl("", -1, &h, 0));
  gc(h.params.p);
  ASSERT_EQ(0, h.fragment.p);
  ASSERT_EQ(0, h.fragment.n);
@ -56,7 +57,7 @@ TEST(ParseUrl, testFragmentAbsent_isNull) {

 TEST(ParseUrl, testFragmentEmpty_isNonNull) {
  struct Url h;
-  gc(ParseUrl("#", -1, &h)); /* python's uri parser is wrong here */
+  gc(ParseUrl("#", -1, &h, 0)); /* python's uri parser is wrong here */
  gc(h.params.p);
  ASSERT_NE(0, h.fragment.p);
  ASSERT_EQ(0, h.fragment.n);
@ -65,7 +66,7 @@ TEST(ParseUrl, testFragmentEmpty_isNonNull) {

 TEST(ParseUrl, testPathFragment) {
  struct Url h;
-  gc(ParseUrl("x#y", -1, &h));
+  gc(ParseUrl("x#y", -1, &h, 0));
  gc(h.params.p);
  ASSERT_EQ(1, h.path.n);
  ASSERT_EQ('x', h.path.p[0]);
@ -76,7 +77,7 @@ TEST(ParseUrl, testPathFragment) {

 TEST(ParseUrl, testAbsolutePath) {
  struct Url h;
-  gc(ParseUrl("/x/y", -1, &h));
+  gc(ParseUrl("/x/y", -1, &h, 0));
  gc(h.params.p);
  ASSERT_EQ(4, h.path.n);
  ASSERT_BINEQ(u"/x/y", h.path.p);
@ -85,7 +86,7 @@ TEST(ParseUrl, testAbsolutePath) {

 TEST(ParseUrl, testRelativePath1) {
  struct Url h;
-  gc(ParseUrl("x", -1, &h));
+  gc(ParseUrl("x", -1, &h, 0));
  gc(h.params.p);
  ASSERT_EQ(1, h.path.n);
  ASSERT_EQ('x', h.path.p[0]);
@ -94,7 +95,7 @@ TEST(ParseUrl, testRelativePath1) {

 TEST(ParseUrl, testOptions) {
  struct Url h;
-  gc(ParseUrl("*", -1, &h));
+  gc(ParseUrl("*", -1, &h, 0));
  gc(h.params.p);
  ASSERT_EQ(1, h.path.n);
  ASSERT_EQ('*', h.path.p[0]);
@ -103,7 +104,7 @@ TEST(ParseUrl, testOptions) {

 TEST(ParseUrl, testRelativePath2) {
  struct Url h;
-  gc(ParseUrl("x/y", -1, &h));
+  gc(ParseUrl("x/y", -1, &h, 0));
  gc(h.params.p);
  ASSERT_EQ(3, h.path.n);
  ASSERT_BINEQ(u"x/y", h.path.p);
@ -112,7 +113,7 @@ TEST(ParseUrl, testRelativePath2) {

 TEST(ParseUrl, testRoot) {
  struct Url h;
-  gc(ParseUrl("/", -1, &h));
+  gc(ParseUrl("/", -1, &h, 0));
  gc(h.params.p);
  ASSERT_EQ(1, h.path.n);
  ASSERT_EQ('/', h.path.p[0]);
@ -121,7 +122,7 @@ TEST(ParseUrl, testRoot) {

 TEST(ParseUrl, testSchemePath) {
  struct Url h;
-  gc(ParseUrl("x:y", -1, &h));
+  gc(ParseUrl("x:y", -1, &h, 0));
  gc(h.params.p);
  ASSERT_EQ(1, h.scheme.n);
  ASSERT_BINEQ(u"x", h.scheme.p);
@ -132,7 +133,7 @@ TEST(ParseUrl, testSchemePath) {

 TEST(ParseUrl, testSchemeAuthority) {
  struct Url h;
-  gc(ParseUrl("x://y", -1, &h));
+  gc(ParseUrl("x://y", -1, &h, 0));
  gc(h.params.p);
  ASSERT_EQ(1, h.scheme.n);
  ASSERT_EQ('x', h.scheme.p[0]);
@ -141,9 +142,37 @@ TEST(ParseUrl, testSchemeAuthority) {
  ASSERT_STREQ("x://y", gc(EncodeUrl(&h, 0)));
 }

+TEST(ParseUrl, testParamsPlus_maybeYes) {
+  struct Url h;
+  gc(ParseUrl("x?q=hi+there", -1, &h, kUrlPlus));
+  gc(h.params.p);
+  ASSERT_EQ(1, h.path.n);
+  ASSERT_BINEQ(u"x", h.path.p);
+  ASSERT_EQ(1, h.params.n);
+  ASSERT_EQ(1, h.params.p[0].key.n);
+  ASSERT_EQ(8, h.params.p[0].val.n);
+  ASSERT_BINEQ(u"q", h.params.p[0].key.p);
+  ASSERT_BINEQ(u"hi there", h.params.p[0].val.p);
+  ASSERT_STREQ("x?q=hi%20there", gc(EncodeUrl(&h, 0)));
+}
+
+TEST(ParseUrl, testParamsPlus_maybeNot) {
+  struct Url h;
+  gc(ParseUrl("x?q=hi+there", -1, &h, 0));
+  gc(h.params.p);
+  ASSERT_EQ(1, h.path.n);
+  ASSERT_BINEQ(u"x", h.path.p);
+  ASSERT_EQ(1, h.params.n);
+  ASSERT_EQ(1, h.params.p[0].key.n);
+  ASSERT_EQ(8, h.params.p[0].val.n);
+  ASSERT_BINEQ(u"q", h.params.p[0].key.p);
+  ASSERT_BINEQ(u"hi+there", h.params.p[0].val.p);
+  ASSERT_STREQ("x?q=hi%2Bthere", gc(EncodeUrl(&h, 0)));
+}
+
 TEST(ParseUrl, testParamsQuestion_doesntTurnIntoSpace) {
  struct Url h;
-  gc(ParseUrl("x?+", -1, &h));
+  gc(ParseUrl("x?+", -1, &h, 0));
  gc(h.params.p);
  ASSERT_EQ(1, h.path.n);
  ASSERT_BINEQ(u"x", h.path.p);
@ -155,7 +184,7 @@ TEST(ParseUrl, testParamsQuestion_doesntTurnIntoSpace) {

 TEST(ParseUrl, testUrl) {
  struct Url h;
-  gc(ParseUrl("a://b:B@c:C/d?e#f", -1, &h));
+  gc(ParseUrl("a://b:B@c:C/d?e#f", -1, &h, 0));
  gc(h.params.p);
  ASSERT_EQ(1, h.scheme.n);
  ASSERT_EQ('a', h.scheme.p[0]);
@ -180,7 +209,7 @@ TEST(ParseUrl, testUrl) {

 TEST(ParseUrl, testEmptyQueryKeyVal_decodesToEmptyStrings) {
  struct Url h;
-  gc(ParseUrl("?=", -1, &h));
+  gc(ParseUrl("?=", -1, &h, 0));
  gc(h.params.p);
  ASSERT_EQ(1, h.params.n);
  ASSERT_EQ(0, h.params.p[0].key.n);
@ -192,7 +221,7 @@ TEST(ParseUrl, testEmptyQueryKeyVal_decodesToEmptyStrings) {

 TEST(ParseUrl, testMultipleEquals_goesIntoValue) {
  struct Url h;
-  gc(ParseUrl("?==", -1, &h));
+  gc(ParseUrl("?==", -1, &h, 0));
  gc(h.params.p);
  ASSERT_EQ(1, h.params.n);
  ASSERT_EQ(0, h.params.p[0].key.n);
@ -204,7 +233,7 @@ TEST(ParseUrl, testMultipleEquals_goesIntoValue) {

 TEST(ParseUrl, testUrlWithoutScheme) {
  struct Url h;
-  gc(ParseUrl("//b@c/d?e#f", -1, &h));
+  gc(ParseUrl("//b@c/d?e#f", -1, &h, 0));
  gc(h.params.p);
  ASSERT_EQ(0, h.scheme.n);
  ASSERT_EQ(1, h.user.n);
@ -225,7 +254,7 @@ TEST(ParseUrl, testUrlWithoutScheme) {

 TEST(ParseUrl, testUrlWithoutUser) {
  struct Url h;
-  gc(ParseUrl("a://c/d?e#f", -1, &h));
+  gc(ParseUrl("a://c/d?e#f", -1, &h, 0));
  gc(h.params.p);
  ASSERT_EQ(1, h.scheme.n);
  ASSERT_EQ('a', h.scheme.p[0]);
@ -248,11 +277,11 @@ TEST(ParseUrl, testUrlWithoutUser) {

 TEST(ParseUrl, testEmptyParams_absentCanBeDiscerned) {
  struct Url h;
-  gc(ParseUrl("", -1, &h));
+  gc(ParseUrl("", -1, &h, 0));
  gc(h.params.p);
  ASSERT_EQ(0, h.params.n);
  ASSERT_EQ(NULL, h.params.p);
-  gc(ParseUrl("?", -1, &h)); /* python's uri parser is wrong here */
+  gc(ParseUrl("?", -1, &h, 0)); /* python's uri parser is wrong here */
  gc(h.params.p);
  ASSERT_EQ(0, h.params.n);
  ASSERT_NE(NULL, h.params.p);
@ -260,7 +289,7 @@ TEST(ParseUrl, testEmptyParams_absentCanBeDiscerned) {

 TEST(ParseUrl, testWeirdAmps_areReproducible) {
  struct Url h;
-  gc(ParseUrl("?&&", -1, &h));
+  gc(ParseUrl("?&&", -1, &h, 0));
  gc(h.params.p);
  ASSERT_EQ(3, h.params.n);
  ASSERT_EQ(0, h.params.p[0].key.n);
@ -280,7 +309,7 @@ TEST(ParseUrl, testWeirdAmps_areReproducible) {

 TEST(ParseUrl, testOpaquePart_canLetQuestionMarkGoInPath) {
  struct Url h; /* python's uri parser is wrong here */
-  gc(ParseUrl("s:o!$%&'()*+,-./09:;=?@AZ_az#fragged", -1, &h));
+  gc(ParseUrl("s:o!$%&'()*+,-./09:;=?@AZ_az#fragged", -1, &h, 0));
  gc(h.params.p);
  ASSERT_EQ(26, h.path.n);
  ASSERT_EQ(0, memcmp(h.path.p, "o!$%&'()*+,-./09:;=?@AZ_az", 26));
@ -292,7 +321,7 @@ TEST(ParseUrl, testOpaquePart_canLetQuestionMarkGoInPath) {

 TEST(ParseUrl, testSchemePathWithoutAuthority_paramsAreAllowed) {
  struct Url h;
-  gc(ParseUrl("s:/o!$%&'()*+,-./09:;=?@AZ_az#fragged", -1, &h));
+  gc(ParseUrl("s:/o!$%&'()*+,-./09:;=?@AZ_az#fragged", -1, &h, 0));
  gc(h.params.p);
  ASSERT_EQ(20, h.path.n);
  ASSERT_EQ(0, memcmp(h.path.p, "/o!$%&'()*+,-./09:;=", 20));
@ -303,7 +332,7 @@ TEST(ParseUrl, testSchemePathWithoutAuthority_paramsAreAllowed) {

 TEST(ParseUrl, testOpaquePart_permitsPercentEncoding) {
  struct Url h;
-  gc(ParseUrl("s:%2Fo!$%&'()*+,-./09:;=?@AZ_az#fragged", -1, &h));
+  gc(ParseUrl("s:%2Fo!$%&'()*+,-./09:;=?@AZ_az#fragged", -1, &h, 0));
  gc(h.params.p);
  ASSERT_EQ(27, h.path.n);
  ASSERT_EQ(0, memcmp(h.path.p, "/o!$%&'()*+,-./09:;=?@AZ_az", 27));
@ -314,7 +343,7 @@ TEST(ParseUrl, testOpaquePart_permitsPercentEncoding) {

 TEST(ParseUrl, testTelephone) {
  struct Url h;
-  gc(ParseUrl("tel:+1-212-867-5309", -1, &h));
+  gc(ParseUrl("tel:+1-212-867-5309", -1, &h, 0));
  gc(h.params.p);
  ASSERT_EQ(15, h.path.n);
  ASSERT_BINEQ(u"+1-212-867-5309", h.path.p);
@ -323,7 +352,7 @@ TEST(ParseUrl, testTelephone) {

 TEST(ParseUrl, testLolv6) {
  struct Url h;
-  gc(ParseUrl("//[::1]:31337", -1, &h));
+  gc(ParseUrl("//[::1]:31337", -1, &h, 0));
  gc(h.params.p);
  ASSERT_EQ(3, h.host.n);
  ASSERT_BINEQ(u"::1", h.host.p);
@ -334,14 +363,14 @@ TEST(ParseUrl, testLolv6) {

 TEST(ParseUrl, testLolV6_withoutPort) {
  struct Url h;
-  gc(ParseUrl("//[::1]", -1, &h));
+  gc(ParseUrl("//[::1]", -1, &h, 0));
  gc(h.params.p);
  ASSERT_STREQ("//[::1]", gc(EncodeUrl(&h, 0)));
 }

 TEST(ParseUrl, testLolv7) {
  struct Url h;
-  gc(ParseUrl("//[vf.::1]", -1, &h));
+  gc(ParseUrl("//[vf.::1]", -1, &h, 0));
  gc(h.params.p);
  ASSERT_EQ(6, h.host.n);
  ASSERT_BINEQ(u"vf.::1", h.host.p);
@ -352,14 +381,14 @@ TEST(ParseUrl, testLolv7) {

 TEST(ParseUrl, testLolv7WithoutColon_weCantProduceLegalEncodingSadly) {
  struct Url h;
-  gc(ParseUrl("//[v7.7.7.7]", -1, &h));
+  gc(ParseUrl("//[v7.7.7.7]", -1, &h, 0));
  gc(h.params.p);
  ASSERT_STREQ("//v7.7.7.7", gc(EncodeUrl(&h, 0)));
 }

 TEST(ParseUrl, testObviouslyIllegalIpLiteral_getsTreatedAsRegName) {
  struct Url h;
-  gc(ParseUrl("//[vf.::1%00]", -1, &h));
+  gc(ParseUrl("//[vf.::1%00]", -1, &h, 0));
  gc(h.params.p);
  ASSERT_STREQ("//vf.%3A%3A1%00", gc(EncodeUrl(&h, 0)));
 }
@ -411,7 +440,7 @@ TEST(EncodeUrl, testHostPortPlacedInHostField_ungoodIdea) {

 TEST(ParseUrl, testUrlWithoutParams) {
  struct Url h;
-  gc(ParseUrl("a://b@c/d#f", -1, &h));
+  gc(ParseUrl("a://b@c/d#f", -1, &h, 0));
  gc(h.params.p);
  ASSERT_EQ(1, h.scheme.n);
  ASSERT_EQ('a', h.scheme.p[0]);
@ -430,7 +459,7 @@ TEST(ParseUrl, testUrlWithoutParams) {
 TEST(ParseUrl, testLatin1_doesNothing) {
  struct Url h;
  const char b[1] = {0377};
-  gc(ParseUrl(b, 1, &h));
+  gc(ParseUrl(b, 1, &h, 0));
  gc(h.params.p);
  ASSERT_EQ(1, h.path.n);
  ASSERT_EQ(0, memcmp("\377", h.path.p, 1));
@ -440,7 +469,7 @@ TEST(ParseUrl, testLatin1_doesNothing) {
 TEST(ParseRequestUri, testLatin1_expandsMemoryToUtf8) {
  struct Url h;
  const char b[1] = {0377};
-  gc(ParseRequestUri(b, 1, &h));
+  gc(ParseUrl(b, 1, &h, kUrlPlus | kUrlLatin1));
  gc(h.params.p);
  ASSERT_EQ(2, h.path.n);
  ASSERT_EQ(0, memcmp("\303\277", h.path.p, 2));
@ -448,7 +477,7 @@ TEST(ParseRequestUri, testLatin1_expandsMemoryToUtf8) {

 TEST(ParseUrl, testPercentShrinkingMemory) {
  struct Url h;
-  gc(ParseUrl("%Ff", 3, &h));
+  gc(ParseUrl("%Ff", 3, &h, 0));
  gc(h.params.p);
  ASSERT_EQ(1, h.path.n);
  ASSERT_EQ(0, memcmp("\377", h.path.p, 1));
@ -458,7 +487,7 @@ TEST(ParseUrl, testPercentShrinkingMemory) {
 TEST(ParseUrl, testEscapingWontOverrun) {
  struct Url h;
  char b[1] = {'%'};
-  gc(ParseUrl(b, 1, &h));
+  gc(ParseUrl(b, 1, &h, 0));
  gc(h.params.p);
  ASSERT_EQ(1, h.path.n);
  ASSERT_EQ(0, memcmp("%", h.path.p, 1));
@ -467,7 +496,7 @@ TEST(ParseUrl, testEscapingWontOverrun) {

 TEST(ParseUrl, testBadPercent_getsIgnored) {
  struct Url h;
-  gc(ParseUrl("%FZ", 3, &h));
+  gc(ParseUrl("%FZ", 3, &h, 0));
  gc(h.params.p);
  ASSERT_EQ(3, h.path.n);
  ASSERT_EQ(0, memcmp("%FZ", h.path.p, 3));
@ -475,7 +504,7 @@ TEST(ParseUrl, testBadPercent_getsIgnored) {

 TEST(ParseUrl, testFileUrl) {
  struct Url h;
-  gc(ParseUrl("file:///etc/passwd", -1, &h));
+  gc(ParseUrl("file:///etc/passwd", -1, &h, 0));
  gc(h.params.p);
  ASSERT_EQ(4, h.scheme.n);
  ASSERT_BINEQ(u"file", h.scheme.p);
@ -491,7 +520,7 @@ TEST(ParseUrl, testFileUrl) {
 TEST(EncodeUrl, testModifyingParseResultAndReencoding_addsStructure) {
  size_t n;
  struct Url h;
-  gc(ParseUrl("rel", -1, &h));
+  gc(ParseUrl("rel", -1, &h, 0));
  gc(h.params.p);
  h.host.n = 7;
  h.host.p = "justine";
@ -580,14 +609,14 @@ TEST(EncodeUrl, testEmptyRegName_isLegal) {

 TEST(ParseUrl, testEmptyScheme_isNotPossible) {
  struct Url h;
-  gc(ParseUrl(":", -1, &h));
+  gc(ParseUrl(":", -1, &h, 0));
  gc(h.params.p);
  ASSERT_EQ(0, h.scheme.n);
  ASSERT_EQ(0, h.scheme.p);
  ASSERT_EQ(1, h.path.n);
  ASSERT_EQ(':', h.path.p[0]);
  ASSERT_STREQ(":", gc(EncodeUrl(&h, 0)));
-  gc(ParseUrl("://hi", -1, &h));
+  gc(ParseUrl("://hi", -1, &h, 0));
  gc(h.params.p);
  ASSERT_EQ(0, h.scheme.n);
  ASSERT_EQ(0, h.scheme.p);
@ -598,7 +627,7 @@ TEST(ParseUrl, testEmptyScheme_isNotPossible) {

 TEST(ParseUrl, testDataUri) {
  struct Url h;
-  gc(ParseUrl("data:image/png;base64,09AZaz+/==", -1, &h));
+  gc(ParseUrl("data:image/png;base64,09AZaz+/==", -1, &h, 0));
  gc(h.params.p);
  ASSERT_EQ(0, h.host.n);
  ASSERT_EQ(0, h.host.p);
@ -611,7 +640,7 @@ TEST(ParseUrl, testDataUri) {

 TEST(ParseUrl, testBadSchemeCharacter_parserAssumesItsPath) {
  struct Url h;
-  gc(ParseUrl("fil\e://hi", -1, &h));
+  gc(ParseUrl("fil\e://hi", -1, &h, 0));
  gc(h.params.p);
  ASSERT_EQ(0, h.scheme.n);
  ASSERT_EQ(0, h.scheme.p);
@ -673,7 +702,7 @@ TEST(ParseRequestUri, fuzz) {
    for (j = 0; j < sizeof(B); ++j) {
      B[j] = C[rand() % sizeof(C)];
    }
-    free(ParseRequestUri(B, 8, &h));
+    free(ParseUrl(B, 8, &h, kUrlPlus | kUrlLatin1));
    free(h.params.p);
  }
 }
@ -687,11 +716,11 @@ void A(void) {
 BENCH(ParseUrl, bench) {
  struct Url h;
  EZBENCH2("ParseParams hyperion", donothing, A());
-  EZBENCH2("ParseUrl a", donothing, free(ParseUrl("a", -1, &h)));
+  EZBENCH2("ParseUrl a", donothing, free(ParseUrl("a", -1, &h, false)));
  EZBENCH2("ParseUrl a://b@c/d#f", donothing,
-           free(ParseUrl("a://b@c/d#f", -1, &h)));
+           free(ParseUrl("a://b@c/d#f", -1, &h, false)));
  EZBENCH2("ParseUrl a://b@c/d?z#f", donothing, ({
-             free(ParseUrl("a://b@c/?zd#f", -1, &h));
+             free(ParseUrl("a://b@c/?zd#f", -1, &h, 0));
             free(h.params.p);
           }));
  EZBENCH2("ParseHost", donothing, free(ParseHost("127.0.0.1:34832", 15, &h)));
@ -700,14 +729,14 @@ BENCH(ParseUrl, bench) {

 BENCH(EncodeUrl, bench) {
  struct Url h;
-  gc(ParseUrl("a", -1, &h));
+  gc(ParseUrl("a", -1, &h, 0));
  EZBENCH2("EncodeUrl a", donothing, free(EncodeUrl(&h, 0)));
-  gc(ParseUrl("a://b@c/d#f", -1, &h));
+  gc(ParseUrl("a://b@c/d#f", -1, &h, 0));
  EZBENCH2("EncodeUrl a://b@c/d#f", donothing, free(EncodeUrl(&h, 0)));
-  gc(ParseUrl("a://b@c/?zd#f", -1, &h));
+  gc(ParseUrl("a://b@c/?zd#f", -1, &h, 0));
  gc(h.params.p);
  EZBENCH2("EncodeUrl a://b@c/d?z#f", donothing, free(EncodeUrl(&h, 0)));
-  gc(ParseUrl(kHyperion, kHyperionSize, &h));
+  gc(ParseUrl(kHyperion, kHyperionSize, &h, 0));
  gc(h.params.p);
  EZBENCH2("EncodeUrl hyperion", donothing, free(EncodeUrl(&h, 0)));
 }
--- a/third_party/lua/luaparseurl.c
+++ b/third_party/lua/luaparseurl.c
@ -36,12 +36,14 @@ static void LuaSetUrlView(lua_State *L, struct UrlView *v, const char *k) {
 }

 int LuaParseUrl(lua_State *L) {
+  int f;
  void *m;
  size_t n;
  struct Url h;
  const char *p;
  p = luaL_checklstring(L, 1, &n);
-  m = ParseUrl(p, n, &h);
+  f = luaL_optinteger(L, 2, 0);
+  m = ParseUrl(p, n, &h, f);
  lua_newtable(L);
  LuaSetUrlView(L, &h.scheme, "scheme");
  LuaSetUrlView(L, &h.user, "user");
--- a/tool/net/fetch.inc
+++ b/tool/net/fetch.inc
@ -107,7 +107,7 @@ static int LuaFetch(lua_State *L) {
  /*
   * Parse URL.
   */
-  _gc(ParseUrl(urlarg, urlarglen, &url));
+  _gc(ParseUrl(urlarg, urlarglen, &url, true));
  _gc(url.params.p);
  usingssl = false;
  if (url.scheme.n) {
--- a/tool/net/help.txt
+++ b/tool/net/help.txt
@ -1335,28 +1335,50 @@ FUNCTIONS
          Converts RFC1123 string that looks like this: Mon, 29 Mar 2021
          15:37:13 GMT to a UNIX timestamp. See parsehttpdatetime.c.

-  ParseUrl(str) → URL
-          Parses URL, returning object having the following fields: scheme,
-          user, pass, host, port, path, params, fragment. This parser is
-          charset agnostic. Percent encoded bytes are decoded for all
-          fields. Returned values might contain things like NUL characters,
-          spaces, control codes, and non-canonical encodings. Absent can be
-          discerned from empty by checking if the pointer is set. There's no
-          failure condition for this routine. This is a permissive parser.
-          This doesn't normalize path segments like `.` or `..` so use
-          IsAcceptablePath() to check for those. No restrictions are imposed
-          beyond that which is strictly necessary for parsing. All the data
-          that is provided will be consumed to the one of the fields. Strict
-          conformance is enforced on some fields more than others, like
-          scheme, since it's the most non-deterministically defined field of
-          them all. Please note this is a URL parser, not a URI parser.
-          Which means we support everything everything the URI spec says we
-          should do except for the things we won't do, like tokenizing path
-          segments into an array and then nesting another array beneath each
-          of those for storing semicolon parameters. So this parser won't
-          make SIP easy. What it can do is parse HTTP URLs and most URIs
-          like data:opaque, better in fact than most things which claim to
-          be URI parsers.
+  ParseUrl(url:str[, flags:int]) → URL
+
+          Parses URL.
+
+          An object containing the following fields is returned:
+
+          - `scheme` is a string, e.g. `"http"`
+          - `user` is the username string, or nil if absent
+          - `pass` is the password string, or nil if absent
+          - `host` is the hostname string, or nil if `url` was a path
+          - `port` is the port string, or nil if absent
+          - `path` is the path string, or nil if absent
+          - `params` is the URL paramaters, e.g. `/?a=b&c` would be
+            represented as the data structure `{{"a", "b"}, {"c"}, ...}`
+          - `fragment` is the stuff after the `#` character
+
+          `flags` may have:
+
+          - `kUrlPlus` to turn `+` into space
+          - `kUrlLatin1` to transcode ISO-8859-1 input into UTF-8
+
+          This parser is charset agnostic. Percent encoded bytes are
+          decoded for all fields. Returned values might contain things
+          like NUL characters, spaces, control codes, and non-canonical
+          encodings. Absent can be discerned from empty by checking if
+          the pointer is set.
+
+          There's no failure condition for this routine. This is a
+          permissive parser. This doesn't normalize path segments like
+          `.` or `..` so use IsAcceptablePath() to check for those. No
+          restrictions are imposed beyond that which is strictly
+          necessary for parsing. All the data that is provided will be
+          consumed to the one of the fields. Strict conformance is
+          enforced on some fields more than others, like scheme, since
+          it's the most non-deterministically defined field of them all.
+
+          Please note this is a URL parser, not a URI parser. Which
+          means we support everything everything the URI spec says we
+          should do except for the things we won't do, like tokenizing
+          path segments into an array and then nesting another array
+          beneath each of those for storing semicolon parameters. So
+          this parser won't make SIP easy. What it can do is parse HTTP
+          URLs and most URIs like data:opaque, better in fact than most
+          things which claim to be URI parsers.

  IsAcceptablePath(str) → bool
          Returns true if path doesn't contain ".", ".." or "//" segments
--- a/tool/net/redbean.c
+++ b/tool/net/redbean.c
@ -99,6 +99,7 @@
 #include "net/http/escape.h"
 #include "net/http/http.h"
 #include "net/http/ip.h"
+#include "net/http/url.h"
 #include "net/https/https.h"
 #include "third_party/getopt/getopt.h"
 #include "third_party/lua/cosmo.h"
@ -5123,6 +5124,8 @@ static void LuaStart(void) {
  LuaSetConstant(L, "kLogWarn", kLogWarn);
  LuaSetConstant(L, "kLogError", kLogError);
  LuaSetConstant(L, "kLogFatal", kLogFatal);
+  LuaSetConstant(L, "kUrlPlus", kUrlPlus);
+  LuaSetConstant(L, "kUrlLatin1", kUrlLatin1);
  // create a list of custom content types
  lua_pushlightuserdata(L, (void *)&ctIdx);  // push address as unique key
  lua_newtable(L);
@ -5673,8 +5676,8 @@ static char *SynchronizeStream(void) {

 static void ParseRequestParameters(void) {
  uint32_t ip;
-  FreeLater(ParseRequestUri(inbuf.p + cpm.msg.uri.a,
-                            cpm.msg.uri.b - cpm.msg.uri.a, &url));
+  FreeLater(ParseUrl(inbuf.p + cpm.msg.uri.a, cpm.msg.uri.b - cpm.msg.uri.a,
+                     &url, kUrlPlus | kUrlLatin1));
  if (!url.host.p) {
    if (HasHeader(kHttpXForwardedHost) &&  //
        !GetRemoteAddr(&ip, 0) && IsTrustedProxy(ip)) {
--- a/tool/net/wb.c
+++ b/tool/net/wb.c
@ -401,7 +401,7 @@ int main(int argc, char *argv[]) {
  /*
   * Parse URL.
   */
-  _gc(ParseUrl(urlarg, -1, &url));
+  _gc(ParseUrl(urlarg, -1, &url, kUrlPlus));
  _gc(url.params.p);
  usessl = false;
  if (url.scheme.n) {