diff --git a/lib/libwebsockets.c b/lib/libwebsockets.c index d0e6720ed9978a217bea79b4baeab727d9660699..a42d2740bef2b765a8ff43d5dd1b941fc0e0e92b 100644 --- a/lib/libwebsockets.c +++ b/lib/libwebsockets.c @@ -2587,6 +2587,215 @@ lws_snprintf(char *str, size_t size, const char *format, ...) } +typedef enum { + LWS_TOKZS_LEADING_WHITESPACE, + LWS_TOKZS_QUOTED_STRING, + LWS_TOKZS_TOKEN, + LWS_TOKZS_TOKEN_POST_TERMINAL +} lws_tokenize_state; + +int +lws_tokenize(struct lws_tokenize *ts) +{ + lws_tokenize_state state = LWS_TOKZS_LEADING_WHITESPACE; + char c, num = -1, flo = 0; + + ts->token = NULL; + ts->token_len = 0; + + while (ts->len) { + c = *ts->start++; + ts->len--; + + lwsl_debug("%s: %c (%d) %d\n", __func__, c, state, (int)ts->len); + + if (!c) + break; + + /* whitespace */ + + if (c == ' ' || c == '\t' || c == '\n' || c == '\r' || + c == '\f') { + switch (state) { + case LWS_TOKZS_LEADING_WHITESPACE: + case LWS_TOKZS_TOKEN_POST_TERMINAL: + continue; + case LWS_TOKZS_QUOTED_STRING: + ts->token_len++; + continue; + case LWS_TOKZS_TOKEN: + /* we want to scan forward to look for = */ + + state = LWS_TOKZS_TOKEN_POST_TERMINAL; + continue; + } + } + + /* quoted string */ + + if (c == '\"') { + if (state == LWS_TOKZS_QUOTED_STRING) + return LWS_TOKZE_QUOTED_STRING; + + /* starting a quoted string */ + + if (ts->flags & LWS_TOKENIZE_F_COMMA_SEP_LIST) { + if (ts->delim == LWSTZ_DT_NEED_DELIM) + return LWS_TOKZE_ERR_COMMA_LIST; + ts->delim = LWSTZ_DT_NEED_DELIM; + } + + state = LWS_TOKZS_QUOTED_STRING; + ts->token = ts->start; + ts->token_len = 0; + + continue; + } + + /* token= aggregation */ + + if (c == '=' && (state == LWS_TOKZS_TOKEN_POST_TERMINAL || + state == LWS_TOKZS_TOKEN)) { + if (num == 1) + return LWS_TOKZE_ERR_NUM_ON_LHS; + /* swallow the = */ + return LWS_TOKZE_TOKEN_NAME_EQUALS; + } + + /* optional token: aggregation */ + + if ((ts->flags & LWS_TOKENIZE_F_AGG_COLON) && c == ':' && + (state == LWS_TOKZS_TOKEN_POST_TERMINAL || + state == LWS_TOKZS_TOKEN)) + /* swallow the : */ + return LWS_TOKZE_TOKEN_NAME_COLON; + + /* aggregate . in a number as a float */ + + if (c == '.' && state == LWS_TOKZS_TOKEN && num == 1) { + if (flo) + return LWS_TOKZE_ERR_MALFORMED_FLOAT; + flo = 1; + ts->token_len++; + continue; + } + + /* delimiter */ + + if (strchr("(),/:;<=>?@[\\]{}", c) || + (!(ts->flags & LWS_TOKENIZE_F_MINUS_NONTERM) && c == '-')) { + switch (state) { + case LWS_TOKZS_LEADING_WHITESPACE: + + if (ts->flags & LWS_TOKENIZE_F_COMMA_SEP_LIST) { + if (c != ',' || + ts->delim != LWSTZ_DT_NEED_DELIM) + return LWS_TOKZE_ERR_COMMA_LIST; + ts->delim = LWSTZ_DT_NEED_NEXT_CONTENT; + } + + ts->token = ts->start - 1; + ts->token_len = 1; + return LWS_TOKZE_DELIMITER; + + case LWS_TOKZS_QUOTED_STRING: + ts->token_len++; + continue; + + case LWS_TOKZS_TOKEN_POST_TERMINAL: + case LWS_TOKZS_TOKEN: + /* report the delimiter next time */ + ts->start--; + ts->len++; + goto token_or_numeric; + } + } + + /* anything that's not whitespace or delimiter is payload */ + + switch (state) { + case LWS_TOKZS_LEADING_WHITESPACE: + + if (ts->flags & LWS_TOKENIZE_F_COMMA_SEP_LIST) { + if (ts->delim == LWSTZ_DT_NEED_DELIM) + return LWS_TOKZE_ERR_COMMA_LIST; + ts->delim = LWSTZ_DT_NEED_DELIM; + } + + state = LWS_TOKZS_TOKEN; + ts->token = ts->start - 1; + ts->token_len = 1; + if (c < '0' || c > '9') + num = 0; + else + if (num < 0) + num = 1; + continue; + case LWS_TOKZS_QUOTED_STRING: + case LWS_TOKZS_TOKEN: + if (c < '0' || c > '9') + num = 0; + else + if (num < 0) + num = 1; + ts->token_len++; + continue; + case LWS_TOKZS_TOKEN_POST_TERMINAL: + /* report the new token next time */ + ts->start--; + ts->len++; + goto token_or_numeric; + } + } + + /* we ran out of content */ + + if (state == LWS_TOKZS_QUOTED_STRING) + return LWS_TOKZE_ERR_UNTERM_STRING; + + if (state != LWS_TOKZS_TOKEN_POST_TERMINAL && + state != LWS_TOKZS_TOKEN) { + if (ts->delim == LWSTZ_DT_NEED_NEXT_CONTENT) + return LWS_TOKZE_ERR_COMMA_LIST; + + return LWS_TOKZE_ENDED; + } + + /* report the pending token */ + +token_or_numeric: + + if (num != 1) + return LWS_TOKZE_TOKEN; + if (flo) + return LWS_TOKZE_FLOAT; + + return LWS_TOKZE_INTEGER; +} + + +LWS_VISIBLE LWS_EXTERN int +lws_tokenize_cstr(struct lws_tokenize *ts, char *str, int max) +{ + if (ts->token_len + 1 >= max) + return 1; + + memcpy(str, ts->token, ts->token_len); + str[ts->token_len] = '\0'; + + return 0; +} + +LWS_VISIBLE LWS_EXTERN void +lws_tokenize_init(struct lws_tokenize *ts, const char *start, int flags) +{ + ts->start = start; + ts->len = 0x7fffffff; + ts->flags = flags; + ts->delim = LWSTZ_DT_NEED_FIRST_CONTENT; +} + + LWS_VISIBLE LWS_EXTERN int lws_is_cgi(struct lws *wsi) { #ifdef LWS_WITH_CGI diff --git a/lib/private-libwebsockets.h b/lib/private-libwebsockets.h index 535fa0be5795b2851d3cf1f55c7d5343022df423..ea7ce00ec4a419b1381830742da8f1285d87f038 100644 --- a/lib/private-libwebsockets.h +++ b/lib/private-libwebsockets.h @@ -2610,6 +2610,109 @@ lws_peer_add_wsi(struct lws_context *context, struct lws_peer *peer, struct lws *wsi); #endif + +/* Do not treat - as a terminal character */ +#define LWS_TOKENIZE_F_MINUS_NONTERM (1 << 0) +/* Separately report aggregate colon-delimited tokens */ +#define LWS_TOKENIZE_F_AGG_COLON (1 << 1) +/* Enforce sequencing for a simple token , token , token ... list */ +#define LWS_TOKENIZE_F_COMMA_SEP_LIST (1 << 2) + +typedef enum { + LWS_TOKZE_ERR_UNTERM_STRING = -4, /* ended while we were in "" */ + LWS_TOKZE_ERR_MALFORMED_FLOAT = -3, /* like 0..1 or 0.1.1 */ + LWS_TOKZE_ERR_NUM_ON_LHS = -2, /* like 123= or 0.1= */ + LWS_TOKZE_ERR_COMMA_LIST = -1, /* like ",tok", or, "tok,," */ + + LWS_TOKZE_ENDED = 0, /* no more content */ + + /* Note: results have ordinal 1+, EOT is 0 and errors are < 0 */ + + LWS_TOKZE_DELIMITER, /* a delimiter appeared */ + LWS_TOKZE_TOKEN, /* a token appeared */ + LWS_TOKZE_INTEGER, /* an integer appeared */ + LWS_TOKZE_FLOAT, /* a float appeared */ + LWS_TOKZE_TOKEN_NAME_EQUALS, /* token [whitespace] = */ + LWS_TOKZE_TOKEN_NAME_COLON, /* token [whitespace] : (only with + LWS_TOKENIZE_F_AGG_COLON flag) */ + LWS_TOKZE_QUOTED_STRING, /* "*", where * may have any char */ + +} lws_tokenize_elem; + +/* + * helper enums to allow caller to enforce legal delimiter sequencing, eg + * disallow "token,,token", "token,", and ",token" + */ + +enum lws_tokenize_delimiter_tracking { + LWSTZ_DT_NEED_FIRST_CONTENT, + LWSTZ_DT_NEED_DELIM, + LWSTZ_DT_NEED_NEXT_CONTENT, +}; + +struct lws_tokenize { + const char *start; /**< set to the start of the string to tokenize */ + const char *token; /**< the start of an identified token or delimiter */ + int len; /**< set to the length of the string to tokenize */ + int token_len; /**< the length of the identied token or delimiter */ + + int flags; /**< optional LWS_TOKENIZE_F_ flags, or 0 */ + int delim; +}; + +/** + * lws_tokenize() - breaks down a string into tokens and delimiters in-place + * + * \param ts: the lws_tokenize struct to init + * \param start: the string to tokenize + * \param flags: LWS_TOKENIZE_F_ option flags + * + * This initializes the tokenize struct to point to the given string, and + * sets the length to 2GiB - 1 (so there must be a terminating NUL)... you can + * override this requirement by setting ts.len yourself before using it. + * + * .delim is also initialized to LWSTZ_DT_NEED_FIRST_CONTENT. + */ + +LWS_VISIBLE LWS_EXTERN void +lws_tokenize_init(struct lws_tokenize *ts, const char *start, int flags); + +/** + * lws_tokenize() - breaks down a string into tokens and delimiters in-place + * + * \param ts: the lws_tokenize struct with information and state on what to do + * + * The \p ts struct should have its start, len and flags members initialized to + * reflect the string to be tokenized and any options. + * + * Then `lws_tokenize()` may be called repeatedly on the struct, returning one + * of `lws_tokenize_elem` each time, and with the struct's `token` and + * `token_len` members set to describe the content of the delimiter or token + * payload each time. + * + * There are no allocations during the process. + * + * returns lws_tokenize_elem that was identified (LWS_TOKZE_ENDED means reached + * the end of the string). + */ + +LWS_VISIBLE LWS_EXTERN lws_tokenize_elem +lws_tokenize(struct lws_tokenize *ts); + +/** + * lws_tokenize_cstr() - copy token string to NUL-terminated buffer + * + * \param ts: pointer to lws_tokenize struct to operate on + * \param str: destination buffer + * \pparam max: bytes in destination buffer + * + * returns 0 if OK or nonzero if the string + NUL won't fit. + */ + +LWS_VISIBLE LWS_EXTERN int +lws_tokenize_cstr(struct lws_tokenize *ts, char *str, int max); + + #ifdef __cplusplus }; #endif diff --git a/lib/server/parsers.c b/lib/server/parsers.c index fb345ab04c4ea6ec7fd34afc10488dfd70d90265..3e801c52faf9b1e69018f2ff31d8d8c2af83e401 100644 --- a/lib/server/parsers.c +++ b/lib/server/parsers.c @@ -559,6 +559,10 @@ LWS_VISIBLE int lws_hdr_copy(struct lws *wsi, char *dst, int len, int toklen = lws_hdr_total_length(wsi, h); int n; + *dst = '\0'; + if (!toklen) + return 0; + if (toklen >= len) return -1; diff --git a/lib/server/server.c b/lib/server/server.c index 99da75b4dc1e367daf4874c006b306cc92098b1e..b14d34b4a6cf3d4b424a744e3d796aefc8d72a8b 100644 --- a/lib/server/server.c +++ b/lib/server/server.c @@ -1330,16 +1330,18 @@ lws_server_init_wsi_for_ws(struct lws *wsi) int lws_handshake_server(struct lws *wsi, unsigned char **buf, size_t len) { - int protocol_len, n = 0, hit, non_space_char_found = 0, m; + int n = 0, m, connection_upgrade = 0; struct lws_context *context = lws_get_context(wsi); struct lws_context_per_thread *pt = &context->pt[(int)wsi->tsi]; struct _lws_header_related hdr; struct allocated_headers *ah; unsigned char *obuf = *buf; - char protocol_list[128]; - char protocol_name[64]; size_t olen = len; - char *p; + const struct lws_protocols *pcol = NULL; + char list[128], name[64]; + struct lws_tokenize ts; + lws_tokenize_elem e; + if (len >= 10000000) { lwsl_err("%s: assert: len %ld\n", __func__, (long)len); @@ -1584,81 +1586,128 @@ upgrade_ws: lwsl_err("NULL protocol at lws_read\n"); /* - * It's websocket - * - * Select the first protocol we support from the list - * the client sent us. + * It's either websocket or h2->websocket * - * Copy it to remove header fragmentation + * If we are on h1, confirm we got the required "connection: upgrade" + * header. h2 / ws-over-h2 does not have this. */ - if (lws_hdr_copy(wsi, protocol_list, sizeof(protocol_list) - 1, - WSI_TOKEN_PROTOCOL) < 0) { - lwsl_err("protocol list too long"); +#if defined(LWS_WITH_HTTP2) + if (wsi->http2_substream) + goto check_protocol; +#endif + + lws_tokenize_init(&ts, list, LWS_TOKENIZE_F_COMMA_SEP_LIST | + LWS_TOKENIZE_F_MINUS_NONTERM); + ts.len = lws_hdr_copy(wsi, list, sizeof(list) - 1, WSI_TOKEN_CONNECTION); + if (ts.len > 0) + { + do { + e = lws_tokenize(&ts); + switch (e) { + case LWS_TOKZE_TOKEN: + if (!strcasecmp(ts.token, "upgrade")) { + connection_upgrade = 1; + e = LWS_TOKZE_ENDED; + } + break; + + case LWS_TOKZE_DELIMITER: + case LWS_TOKZE_ENDED: + break; + + default: + lwsl_err("%s: malformed connection hdr\n", __func__); + + goto bail_nuke_ah; + } + } while (e != LWS_TOKZE_ENDED); + } + if (!connection_upgrade) { + lwsl_notice("No connection hdr\n"); + goto bail_nuke_ah; } - protocol_len = lws_hdr_total_length(wsi, WSI_TOKEN_PROTOCOL); - protocol_list[protocol_len] = '\0'; - p = protocol_list; - hit = 0; - - while (*p && !hit) { - n = 0; - non_space_char_found = 0; - while (n < sizeof(protocol_name) - 1 && - *p && *p != ',') { - /* ignore leading spaces */ - if (!non_space_char_found && *p == ' ') { - p++; - continue; - } - non_space_char_found = 1; - protocol_name[n++] = *p++; - } - protocol_name[n] = '\0'; - if (*p) - p++; - lwsl_info("checking %s\n", protocol_name); + /* Check if 'host' header exits. + * Its content is not validated. If required shall be checked in user's callback. + */ + if (!lws_hdr_simple_ptr(wsi, WSI_TOKEN_HOST)) { + lwsl_info("No host hdr\n"); + goto bail_nuke_ah; + } - n = 0; - while (wsi->vhost->protocols[n].callback) { - lwsl_info("try %s\n", - wsi->vhost->protocols[n].name); +#if defined(LWS_WITH_HTTP2) +check_protocol: +#endif - if (wsi->vhost->protocols[n].name && - !strcmp(wsi->vhost->protocols[n].name, - protocol_name)) { - wsi->protocol = &wsi->vhost->protocols[n]; - hit = 1; - break; - } + /* + * Select the first protocol we support from the list + * the client sent us. + */ - n++; - } - } + lws_tokenize_init(&ts, list, LWS_TOKENIZE_F_COMMA_SEP_LIST | + LWS_TOKENIZE_F_MINUS_NONTERM); + ts.len = lws_hdr_copy(wsi, list, sizeof(list) - 1, WSI_TOKEN_PROTOCOL); + if (ts.len < 0) { + lwsl_err("%s: protocol list too long\n", __func__); + goto bail_nuke_ah; + } + if (!ts.len) { + int n = wsi->vhost->default_protocol_index; + /* + * some clients only have one protocol and do not send the + * protocol list header... allow it and match to the vhost's + * default protocol (which itself defaults to zero) + */ + lwsl_info("%s: defaulting to prot handler %d\n", __func__, n); - /* we didn't find a protocol he wanted? */ + wsi->protocol = &wsi->vhost->protocols[n]; + + goto alloc_ws; + } + + /* otherwise go through the user-provided protocol list */ + + do { + e = lws_tokenize(&ts); + switch (e) { + case LWS_TOKZE_TOKEN: + + if (lws_tokenize_cstr(&ts, name, sizeof(name))) { + lwsl_err("%s: pcol name too long\n", __func__); - if (!hit) { - if (lws_hdr_simple_ptr(wsi, WSI_TOKEN_PROTOCOL)) { - lwsl_info("No protocol from \"%s\" supported\n", - protocol_list); goto bail_nuke_ah; } - /* - * some clients only have one protocol and - * do not send the protocol list header... - * allow it and match to the vhost's default - * protocol (which itself defaults to zero) - */ - lwsl_info("defaulting to prot handler %d\n", - wsi->vhost->default_protocol_index); - n = wsi->vhost->default_protocol_index; - wsi->protocol = &wsi->vhost->protocols[ - (int)wsi->vhost->default_protocol_index]; + lwsl_debug("checking %s\n", name); + pcol = lws_vhost_name_to_protocol(wsi->vhost, name); + if (pcol) { + wsi->protocol = pcol; + e = LWS_TOKZE_ENDED; + } + break; + + case LWS_TOKZE_DELIMITER: + case LWS_TOKZE_ENDED: + break; + + default: + lwsl_err("%s: malformatted protocol list", __func__); + + goto bail_nuke_ah; } + } while (e != LWS_TOKZE_ENDED); + + /* we didn't find a protocol he wanted? */ + + if (!pcol) { + lwsl_notice("No supported protocol \"%s\"\n", list); + + goto bail_nuke_ah; + } + +alloc_ws: /* allocate wsi->user storage */ if (lws_ensure_user_space(wsi))