diff --git a/lib/parsers.c b/lib/parsers.c
index fab799ef1528d54305aee47d8218fb11ab7d05dc..8e1312e8d70a11754b59531a80ae06e38dd12226 100644
--- a/lib/parsers.c
+++ b/lib/parsers.c
@@ -1455,3 +1455,63 @@ lws_remaining_packet_payload(struct lws *wsi)
 {
 	return wsi->u.ws.rx_packet_length;
 }
+
+/* Once we reach LWS_RXPS_PAYLOAD_UNTIL_LENGTH_EXHAUSTED, we know how much
+ * to expect in that state and can deal with it in bulk more efficiently.
+ */
+
+void
+lws_payload_until_length_exhausted(struct lws *wsi, unsigned char **buf,
+				   size_t *len)
+{
+	unsigned char *buffer = *buf, mask[4];
+	int buffer_size, avail, n;
+	char *rx_ubuf;
+
+	if (wsi->protocol->rx_buffer_size)
+		buffer_size = wsi->protocol->rx_buffer_size;
+	else
+		buffer_size = LWS_MAX_SOCKET_IO_BUF;
+	avail = buffer_size - wsi->u.ws.rx_ubuf_head;
+
+	/* do not consume more than we should */
+	if (avail > wsi->u.ws.rx_packet_length)
+		avail = wsi->u.ws.rx_packet_length;
+
+	/* do not consume more than what is in the buffer */
+	if (avail > *len)
+		avail = *len;
+
+	/* we want to leave 1 byte for the parser to handle properly */
+	if (avail <= 1)
+		return;
+
+	avail--;
+	rx_ubuf = wsi->u.ws.rx_ubuf + LWS_PRE + wsi->u.ws.rx_ubuf_head;
+	if (wsi->u.ws.all_zero_nonce)
+		memcpy(rx_ubuf, buffer, avail);
+	else {
+
+		for (n = 0; n < 4; n++)
+			mask[n] = wsi->u.ws.mask[(wsi->u.ws.mask_idx + n) & 3];
+
+		/* deal with 4-byte chunks using unwrapped loop */
+		n = avail >> 2;
+		while (n--) {
+			*(rx_ubuf++) = *(buffer++) ^ mask[0];
+			*(rx_ubuf++) = *(buffer++) ^ mask[1];
+			*(rx_ubuf++) = *(buffer++) ^ mask[2];
+			*(rx_ubuf++) = *(buffer++) ^ mask[3];
+		}
+		/* and the remaining bytes bytewise */
+		for (n = 0; n < (avail & 3); n++)
+			*(rx_ubuf++) = *(buffer++) ^ mask[n];
+
+		wsi->u.ws.mask_idx = (wsi->u.ws.mask_idx + avail) & 3;
+	}
+
+	(*buf) += avail;
+	wsi->u.ws.rx_ubuf_head += avail;
+	wsi->u.ws.rx_packet_length -= avail;
+	*len -= avail;
+}
diff --git a/lib/private-libwebsockets.h b/lib/private-libwebsockets.h
index 0874e9fa53017f176e17c469780ce6d48097fc1c..a83ab8bcd3c8d2384ec355fa43ee2485747536cd 100644
--- a/lib/private-libwebsockets.h
+++ b/lib/private-libwebsockets.h
@@ -1289,6 +1289,9 @@ lws_client_interpret_server_handshake(struct lws *wsi);
 LWS_EXTERN int LWS_WARN_UNUSED_RESULT
 lws_rx_sm(struct lws *wsi, unsigned char c);
 
+LWS_EXTERN void
+lws_payload_until_length_exhausted(struct lws *wsi, unsigned char **buf, size_t *len);
+
 LWS_EXTERN int LWS_WARN_UNUSED_RESULT
 lws_issue_raw_ext_access(struct lws *wsi, unsigned char *buf, size_t len);
 
diff --git a/lib/server.c b/lib/server.c
index 2fac4aba1862665be3c970f35ac446156ba8b55e..32ef04102023d1d79ea8715681f1d4ef69faced0 100644
--- a/lib/server.c
+++ b/lib/server.c
@@ -1235,6 +1235,11 @@ lws_interpret_incoming_packet(struct lws *wsi, unsigned char **buf, size_t len)
 		if (wsi->rxflow_buffer)
 			wsi->rxflow_pos++;
 
+		/* consume payload bytes efficiently */
+		if (wsi->lws_rx_parse_state ==
+		    LWS_RXPS_PAYLOAD_UNTIL_LENGTH_EXHAUSTED)
+			lws_payload_until_length_exhausted(wsi, buf, &len);
+
 		/* process the byte */
 		m = lws_rx_sm(wsi, *(*buf)++);
 		if (m < 0)