diff --git a/CMakeLists.txt b/CMakeLists.txt
index 35acd58da004c84444d270c58376862fa0eef9e6..c5f32e10b45bc31a226703b95d0e8eaa4a2b0daa 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -447,12 +447,10 @@ endif()
 
 if (NOT LWS_WITHOUT_EXTENSIONS)
 	list(APPEND HDR_PRIVATE
-		lib/extension-deflate-frame.h
-		lib/extension-deflate-stream.h)
+		lib/extension-permessage-deflate.h)
 	list(APPEND SOURCES
 		lib/extension.c
-		lib/extension-deflate-frame.c
-		lib/extension-deflate-stream.c)
+		lib/extension-permessage-deflate.c)
 endif()
 
 if (LWS_WITH_LIBEV)
@@ -699,7 +697,7 @@ if (NOT LWS_WITHOUT_TESTAPPS)
 	#
 	# Helper function for adding a test app.
 	#
-	macro(create_test_app TEST_NAME MAIN_SRC S2 S3 S4)
+	macro(create_test_app TEST_NAME MAIN_SRC S2 S3 S4 S5)
 
 		set(TEST_SRCS ${MAIN_SRC})
 		set(TEST_HDR)
@@ -715,6 +713,10 @@ if (NOT LWS_WITHOUT_TESTAPPS)
 		else()
 			list(APPEND TEST_SRCS ${S4})
 		endif()
+		if ("${S5}" STREQUAL "")
+		else()
+			list(APPEND TEST_SRCS ${S5})
+		endif()
 		if (WIN32)
 			list(APPEND TEST_SRCS
 				${WIN32_HELPERS_PATH}/getopt.c
@@ -772,13 +774,27 @@ if (NOT LWS_WITHOUT_TESTAPPS)
 		# test-server
 		#
 		if (NOT LWS_WITHOUT_TEST_SERVER)
-			create_test_app(test-server "test-server/test-server.c" "test-server/test-server-http.c" "test-server/test-server-dumb-increment.c" "test-server/test-server-mirror.c")
+			create_test_app(test-server "test-server/test-server.c"
+				"test-server/test-server-http.c"
+				"test-server/test-server-dumb-increment.c"
+				"test-server/test-server-mirror.c"
+				"test-server/test-server-echogen.c")
 			if (UNIX AND NOT ((CMAKE_C_COMPILER_ID MATCHES "Clang") OR (CMAKE_CXX_COMPILER_ID MATCHES "Clang")))
-				create_test_app(test-server-pthreads "test-server/test-server-pthreads.c" "test-server/test-server-http.c" "test-server/test-server-dumb-increment.c" "test-server/test-server-mirror.c")
+				create_test_app(test-server-pthreads
+					"test-server/test-server-pthreads.c"
+					"test-server/test-server-http.c"
+					"test-server/test-server-dumb-increment.c"
+					"test-server/test-server-mirror.c"
+					"test-server/test-server-echogen.c")
 			endif()
 			if (UNIX AND NOT ((CMAKE_C_COMPILER_ID MATCHES "Clang") OR (CMAKE_CXX_COMPILER_ID MATCHES "Clang"))
 				AND LWS_WITH_LIBEV)
-				create_test_app(test-server-libev "test-server/test-server-libev.c" "test-server/test-server-http.c" "test-server/test-server-dumb-increment.c" "test-server/test-server-mirror.c")
+				create_test_app(test-server-libev
+					"test-server/test-server-libev.c"
+					"test-server/test-server-http.c"
+					"test-server/test-server-dumb-increment.c"
+					"test-server/test-server-mirror.c"
+					"test-server/test-server-echogen.c")
 			endif()
 		endif()
 
@@ -786,7 +802,11 @@ if (NOT LWS_WITHOUT_TESTAPPS)
 		# test-server-extpoll
 		#
 		if (NOT LWS_WITHOUT_TEST_SERVER_EXTPOLL)
-			create_test_app(test-server-extpoll "test-server/test-server.c" "test-server/test-server-http.c" "test-server/test-server-dumb-increment.c" "test-server/test-server-mirror.c")
+			create_test_app(test-server-extpoll "test-server/test-server.c"
+				"test-server/test-server-http.c"
+				"test-server/test-server-dumb-increment.c"
+				"test-server/test-server-mirror.c"
+				"test-server/test-server-echogen.c")
 			# Set defines for this executable only.
 			set_property(
 				TARGET test-server-extpoll
@@ -891,27 +911,27 @@ if (NOT LWS_WITHOUT_TESTAPPS)
 		# test-client
 		#
 		if (NOT LWS_WITHOUT_TEST_CLIENT)
-			create_test_app(test-client "test-server/test-client.c" "" "" "")
+			create_test_app(test-client "test-server/test-client.c" "" "" "" "")
 		endif()
 
 		#
 		# test-fraggle
 		#
 		if (NOT LWS_WITHOUT_TEST_FRAGGLE)
-			create_test_app(test-fraggle "test-server/test-fraggle.c" "" "" "")
+			create_test_app(test-fraggle "test-server/test-fraggle.c" "" "" "" "")
 		endif()
 
 		#
 		# test-ping
 		#
 		if (NOT LWS_WITHOUT_TEST_PING)
-			create_test_app(test-ping "test-server/test-ping.c" "" "" "")
+			create_test_app(test-ping "test-server/test-ping.c" "" "" "" "")
 		endif()
 		#
 		# test-echo
 		#
 		if (NOT WITHOUT_TEST_ECHO)
-			create_test_app(test-echo "test-server/test-echo.c" "" "" "")
+			create_test_app(test-echo "test-server/test-echo.c" "" "" "" "")
 		endif()
 
 	endif(NOT LWS_WITHOUT_CLIENT)
diff --git a/README.coding.md b/README.coding.md
index 6026318e5d01dc09d7501e7d6f07eb04888f1e59..c1a703ec80e77db87f330dbdb6f255c7046a1653 100644
--- a/README.coding.md
+++ b/README.coding.md
@@ -261,8 +261,8 @@ if left `NULL`, then the "DEFAULT" set of ciphers are all possible to select.
 Async nature of client connections
 ----------------------------------
 
-When you call `lws_client_connect(..)` and get a `wsi` back, it does not
-mean your connection is active.  It just mean it started trying to connect.
+When you call `lws_client_connect_info(..)` and get a `wsi` back, it does not
+mean your connection is active.  It just means it started trying to connect.
 
 Your client connection is actually active only when you receive
 `LWS_CALLBACK_CLIENT_ESTABLISHED` for it.
diff --git a/README.test-apps.md b/README.test-apps.md
index 203a11dddfe3447c6029d3c2964a091d11459e6a..60308daeafc825dc467945547d13d1c899e04362 100644
--- a/README.test-apps.md
+++ b/README.test-apps.md
@@ -335,7 +335,5 @@ The autobahn test itself admits this is not part of the standard, just someone's
 random opinion about how they think a ws server should act.  So we will fail
 this by design and it is no problem about RFC6455 compliance.
 
- - Test 6.x: detect we were sent invalid UTF-8.  Validity of encoding is left
- to the user code in libwebsockets until now.
  
  
diff --git a/changelog b/changelog
index bd4839509ef00fb73fbcf2a8c89c883382ade352..e30e46f722f7b9bf48beaf774be0567cf8fbe7ea 100644
--- a/changelog
+++ b/changelog
@@ -1,6 +1,50 @@
 Changelog
 ---------
 
+Extension Changes
+-----------------
+
+1) There is now a "permessage-deflate" / RFC7692 implementation.  It's very
+similar to "deflate-frame" we have offered for a long while; deflate-frame is
+now provided as an alias of permessage-deflate.
+
+The main differences are that the new permessage-deflate implementation:
+
+ - properly performs streaming respecting input and output buffer limits.  The
+   old deflate-frame implementation could only work on complete deflate input
+   and produce complete inflate output for each frame.  The new implementation
+   only mallocs buffers at initialization.
+
+ - goes around the event loop after each input package is processed allowing
+   interleaved output processing.  The RX flow control api can be used to
+   force compressed input processing to match the rate of compressed output
+   processing (test--echo shows an example of how to do this).
+
+ - when being "deflate-frame" for compatibility he uses the same default zlib
+   settings as the old "deflate-frame", but instead of exponentially increasing
+   malloc allocations until the whole output will fit, he observes the default
+   input and output chunking buffer sizes of "permessage-deflate", that's
+   1024 in and 1024 out at a time.
+
+2) deflate-stream has been disabled for many versions (for over a year) and is
+now removed.  Browsers are now standardizing on "permessage-deflate" / RFC7692
+
+3) struct lws_extension is simplified, and lws extensions now have a public
+api (their callback) for use in user code to compose extensions and options
+the user code wants.  lws_get_internal_exts() is deprecated but kept around
+as a NOP.  The changes allow one extension implementation to go by different
+names and allows the user client code to control option offers per-ext.
+
+The test client and server are updated to use the new way.  If you use
+the old way it should still work, but extensions will be disabled until you
+update your code.
+
+Extensions are now responsible for allocating and per-instance private struct
+at instance construction time and freeing it when the instance is destroyed.
+Not needing to know the size means the extension's struct can be opaque
+to user code.
+
+
 User api additions
 ------------------
 
@@ -47,7 +91,8 @@ LWS_CALLBACK_WS_PEER_INITIATED_CLOSE:
              defined in the standard, and may be a string or non-human-
              readble data.
              If you return 0 lws will echo the close and then close the
-             connection.  If you return nonzero lws will just close the             connection.
+             connection.  If you return nonzero lws will just close the
+             connection.
 
 As usual not handling it does the right thing, if you're not interested in it
 just ignore it.
@@ -134,6 +179,15 @@ anyway.
 5) Pings and close used to be limited to 124 bytes, the correct limit is 125
 so that is now also allowed.
 
+6) LWS_PRE is provided as a synonym for LWS_SEND_BUFFER_POST_PADDING, either is
+valid to use now.
+
+7) There's generic support for RFC7462 style extension options built into the
+library now.  As a consequence, a field "options" is added to lws_extension.
+It can be NULL if there are no options on the extension.  Extension internal
+info is part of the public abi because extensions may be implemented outside
+the library.
+
 
 v1.6.0-chrome48-firefox42
 =======================
diff --git a/lib/client-handshake.c b/lib/client-handshake.c
index 77915ce5805562913ab96ca033dfa458ed8d74f9..eca548304bcb5cfac34dfdf45806f39c2f0a3d9f 100644
--- a/lib/client-handshake.c
+++ b/lib/client-handshake.c
@@ -335,35 +335,33 @@ failed:
  */
 
 LWS_VISIBLE struct lws *
-lws_client_connect(struct lws_context *context, const char *address,
-		   int port, int ssl_connection, const char *path,
-		   const char *host, const char *origin,
-		   const char *protocol, int ietf_version_or_minus_one)
+lws_client_connect_info(struct lws_client_connect_info *i)
 {
 	struct lws *wsi;
+	int v = SPEC_LATEST_SUPPORTED;
 
 	wsi = lws_zalloc(sizeof(struct lws));
 	if (wsi == NULL)
 		goto bail;
 
-	wsi->context = context;
+	wsi->context = i->context;
 	wsi->sock = LWS_SOCK_INVALID;
 
 	/* -1 means just use latest supported */
 
-	if (ietf_version_or_minus_one == -1)
-		ietf_version_or_minus_one = SPEC_LATEST_SUPPORTED;
+	if (i->ietf_version_or_minus_one != -1 && i->ietf_version_or_minus_one)
+		v = i->ietf_version_or_minus_one;
 
-	wsi->ietf_spec_revision = ietf_version_or_minus_one;
+	wsi->ietf_spec_revision = v;
 	wsi->user_space = NULL;
 	wsi->state = LWSS_CLIENT_UNCONNECTED;
 	wsi->protocol = NULL;
 	wsi->pending_timeout = NO_PENDING_TIMEOUT;
 
 #ifdef LWS_OPENSSL_SUPPORT
-	wsi->use_ssl = ssl_connection;
+	wsi->use_ssl = i->ssl_connection;
 #else
-	if (ssl_connection) {
+	if (i->ssl_connection) {
 		lwsl_err("libwebsockets not configured for ssl\n");
 		goto bail;
 	}
@@ -376,31 +374,35 @@ lws_client_connect(struct lws_context *context, const char *address,
 	 * we're not necessarily in a position to action these right away,
 	 * stash them... we only need during connect phase so u.hdr is fine
 	 */
-	wsi->u.hdr.ah->c_port = port;
-	if (lws_hdr_simple_create(wsi, _WSI_TOKEN_CLIENT_PEER_ADDRESS, address))
+	wsi->u.hdr.ah->c_port = i->port;
+	if (lws_hdr_simple_create(wsi, _WSI_TOKEN_CLIENT_PEER_ADDRESS, i->address))
 		goto bail1;
 
 	/* these only need u.hdr lifetime as well */
 
-	if (lws_hdr_simple_create(wsi, _WSI_TOKEN_CLIENT_URI, path))
+	if (lws_hdr_simple_create(wsi, _WSI_TOKEN_CLIENT_URI, i->path))
 		goto bail1;
 
-	if (lws_hdr_simple_create(wsi, _WSI_TOKEN_CLIENT_HOST, host))
+	if (lws_hdr_simple_create(wsi, _WSI_TOKEN_CLIENT_HOST, i->host))
 		goto bail1;
 
-	if (origin)
-		if (lws_hdr_simple_create(wsi, _WSI_TOKEN_CLIENT_ORIGIN, origin))
+	if (i->origin)
+		if (lws_hdr_simple_create(wsi, _WSI_TOKEN_CLIENT_ORIGIN, i->origin))
 			goto bail1;
 	/*
 	 * this is a list of protocols we tell the server we're okay with
 	 * stash it for later when we compare server response with it
 	 */
-	if (protocol)
+	if (i->protocol)
 		if (lws_hdr_simple_create(wsi, _WSI_TOKEN_CLIENT_SENT_PROTOCOLS,
-					  protocol))
+					  i->protocol))
 			goto bail1;
 
-	wsi->protocol = &context->protocols[0];
+	wsi->protocol = &i->context->protocols[0];
+	if (wsi && !wsi->user_space && i->userdata) {
+		wsi->user_space_externally_allocated = 1;
+		wsi->user_space = i->userdata;
+	}
 
 	/*
 	 * Check with each extension if it is able to route and proxy this
@@ -409,9 +411,9 @@ lws_client_connect(struct lws_context *context, const char *address,
 	 * connection.
 	 */
 
-	if (lws_ext_cb_all_exts(context, wsi,
-			LWS_EXT_CALLBACK_CAN_PROXY_CLIENT_CONNECTION,
-						     (void *)address, port) > 0) {
+	if (lws_ext_cb_all_exts(i->context, wsi,
+			LWS_EXT_CB_CAN_PROXY_CLIENT_CONNECTION,
+						     (void *)i->address, i->port) > 0) {
 		lwsl_client("lws_client_connect: ext handling conn\n");
 
 		lws_set_timeout(wsi,
@@ -436,6 +438,7 @@ bail:
 
 /**
  * lws_client_connect_extended() - Connect to another websocket server
+ * 				DEPRECAATED use lws_client_connect_info
  * @context:	Websocket context
  * @address:	Remote server address, eg, "myserver.com"
  * @port:	Port to connect to on the remote server, eg, 80
@@ -461,16 +464,33 @@ lws_client_connect_extended(struct lws_context *context, const char *address,
 			    const char *protocol, int ietf_version_or_minus_one,
 			    void *userdata)
 {
-	struct lws *wsi;
+	struct lws_client_connect_info i;
+
+	memset(&i, 0, sizeof(i));
+
+	i.context = context;
+	i.address = address;
+	i.port = port;
+	i.ssl_connection = ssl_connection;
+	i.path = path;
+	i.host = host;
+	i.origin = origin;
+	i.protocol = protocol;
+	i.ietf_version_or_minus_one = ietf_version_or_minus_one;
+	i.userdata = userdata;
+
+	return lws_client_connect_info(&i);
+}
 
-	wsi = lws_client_connect(context, address, port, ssl_connection, path,
+/* deprecated use lws_client_connect_info */
+LWS_VISIBLE struct lws *
+lws_client_connect(struct lws_context *context, const char *address,
+			    int port, int ssl_connection, const char *path,
+			    const char *host, const char *origin,
+			    const char *protocol, int ietf_version_or_minus_one)
+{
+	return lws_client_connect_extended(context, address, port, ssl_connection, path,
 				 host, origin, protocol,
-				 ietf_version_or_minus_one);
-
-	if (wsi && !wsi->user_space && userdata) {
-		wsi->user_space_externally_allocated = 1;
-		wsi->user_space = userdata ;
-	}
-
-	return wsi;
+				 ietf_version_or_minus_one, NULL);
 }
+
diff --git a/lib/client-parser.c b/lib/client-parser.c
index 935028193a713d10a4fb0ed01ead8ce461b3f82e..ad8d58d61f8a34738d76fa5acbc1df48b7adf29e 100644
--- a/lib/client-parser.c
+++ b/lib/client-parser.c
@@ -27,7 +27,29 @@ int lws_client_rx_sm(struct lws *wsi, unsigned char c)
 	unsigned short close_code;
 	unsigned char *pp;
 	struct lws_tokens eff_buf;
-	int handled, m;
+	int handled, n, m, rx_draining_ext = 0;
+
+
+	if (wsi->u.ws.rx_draining_ext) {
+		struct lws **w = &wsi->context->rx_draining_ext_list;
+		lwsl_ext("%s: RX EXT DRAINING: Removing from list\n", __func__, c);
+		assert(!c);
+		eff_buf.token = NULL;
+		eff_buf.token_len = 0;
+		wsi->u.ws.rx_draining_ext = 0;
+		/* remove us from context draining ext list */
+		while (*w) {
+			if (*w == wsi) {
+				*w = wsi->u.ws.rx_draining_ext_list;
+				break;
+			}
+			w = &((*w)->u.ws.rx_draining_ext_list);
+		}
+		wsi->u.ws.rx_draining_ext_list = NULL;
+		rx_draining_ext = 1;
+
+		goto drain_extension;
+	}
 
 	switch (wsi->lws_rx_parse_state) {
 	case LWS_RXPS_NEW:
@@ -40,6 +62,7 @@ int lws_client_rx_sm(struct lws *wsi, unsigned char c)
 			/* revisit if an extension wants them... */
 			switch (wsi->u.ws.opcode) {
 			case LWSWSOPC_TEXT_FRAME:
+				wsi->u.ws.rsv_first_msg = (c & 0x70);
 				wsi->u.ws.continuation_possible = 1;
 				wsi->u.ws.check_utf8 =
 					!!(wsi->context->options &
@@ -47,6 +70,7 @@ int lws_client_rx_sm(struct lws *wsi, unsigned char c)
 				wsi->u.ws.utf8 = 0;
 				break;
 			case LWSWSOPC_BINARY_FRAME:
+				wsi->u.ws.rsv_first_msg = (c & 0x70);
 				wsi->u.ws.check_utf8 = 0;
 				wsi->u.ws.continuation_possible = 1;
 				break;
@@ -80,13 +104,14 @@ int lws_client_rx_sm(struct lws *wsi, unsigned char c)
 			/* revisit if an extension wants them... */
 			if (
 #ifndef LWS_NO_EXTENSIONS
-				!wsi->count_active_extensions &&
+				!wsi->count_act_ext &&
 #endif
 				wsi->u.ws.rsv) {
 				lwsl_info("illegal rsv bits set\n");
 				return -1;
 			}
 			wsi->u.ws.final = !!((c >> 7) & 1);
+			lwsl_ext("%s:    This RX frame Final %d\n", __func__, wsi->u.ws.final);
 
 			if (wsi->u.ws.owed_a_fin &&
 			    (wsi->u.ws.opcode == LWSWSOPC_TEXT_FRAME ||
@@ -165,6 +190,7 @@ int lws_client_rx_sm(struct lws *wsi, unsigned char c)
 
 	case LWS_RXPS_04_FRAME_HDR_LEN16_1:
 		wsi->u.ws.rx_packet_length |= c;
+		//lwsl_err("&&&&& packet length %d\n", wsi->u.ws.rx_packet_length);
 		if (wsi->u.ws.this_frame_masked)
 			wsi->lws_rx_parse_state =
 					LWS_RXPS_07_COLLECT_FRAME_KEY_1;
@@ -246,28 +272,28 @@ int lws_client_rx_sm(struct lws *wsi, unsigned char c)
 		break;
 
 	case LWS_RXPS_07_COLLECT_FRAME_KEY_1:
-		wsi->u.ws.mask_nonce[0] = c;
+		wsi->u.ws.mask[0] = c;
 		if (c)
 			wsi->u.ws.all_zero_nonce = 0;
 		wsi->lws_rx_parse_state = LWS_RXPS_07_COLLECT_FRAME_KEY_2;
 		break;
 
 	case LWS_RXPS_07_COLLECT_FRAME_KEY_2:
-		wsi->u.ws.mask_nonce[1] = c;
+		wsi->u.ws.mask[1] = c;
 		if (c)
 			wsi->u.ws.all_zero_nonce = 0;
 		wsi->lws_rx_parse_state = LWS_RXPS_07_COLLECT_FRAME_KEY_3;
 		break;
 
 	case LWS_RXPS_07_COLLECT_FRAME_KEY_3:
-		wsi->u.ws.mask_nonce[2] = c;
+		wsi->u.ws.mask[2] = c;
 		if (c)
 			wsi->u.ws.all_zero_nonce = 0;
 		wsi->lws_rx_parse_state = LWS_RXPS_07_COLLECT_FRAME_KEY_4;
 		break;
 
 	case LWS_RXPS_07_COLLECT_FRAME_KEY_4:
-		wsi->u.ws.mask_nonce[3] = c;
+		wsi->u.ws.mask[3] = c;
 		if (c)
 			wsi->u.ws.all_zero_nonce = 0;
 
@@ -282,17 +308,12 @@ int lws_client_rx_sm(struct lws *wsi, unsigned char c)
 
 	case LWS_RXPS_PAYLOAD_UNTIL_LENGTH_EXHAUSTED:
 
-		if (!wsi->u.ws.rx_user_buffer) {
-			lwsl_err("NULL client rx_user_buffer\n");
-			return 1;
-		}
+		assert(wsi->u.ws.rx_ubuf);
 
 		if (wsi->u.ws.this_frame_masked && !wsi->u.ws.all_zero_nonce)
-			c ^= wsi->u.ws.mask_nonce[
-					    (wsi->u.ws.frame_mask_index++) & 3];
+			c ^= wsi->u.ws.mask[(wsi->u.ws.mask_idx++) & 3];
 
-		wsi->u.ws.rx_user_buffer[LWS_SEND_BUFFER_PRE_PADDING +
-					 (wsi->u.ws.rx_user_buffer_head++)] = c;
+		wsi->u.ws.rx_ubuf[LWS_PRE + (wsi->u.ws.rx_ubuf_head++)] = c;
 
 		if (--wsi->u.ws.rx_packet_length == 0) {
 			/* spill because we have the whole frame */
@@ -304,13 +325,12 @@ int lws_client_rx_sm(struct lws *wsi, unsigned char c)
 		 * if there's no protocol max frame size given, we are
 		 * supposed to default to LWS_MAX_SOCKET_IO_BUF
 		 */
-
 		if (!wsi->protocol->rx_buffer_size &&
-		    wsi->u.ws.rx_user_buffer_head != LWS_MAX_SOCKET_IO_BUF)
+		    wsi->u.ws.rx_ubuf_head != LWS_MAX_SOCKET_IO_BUF)
 			break;
 
 		if (wsi->protocol->rx_buffer_size &&
-		    wsi->u.ws.rx_user_buffer_head != wsi->protocol->rx_buffer_size)
+		    wsi->u.ws.rx_ubuf_head != wsi->protocol->rx_buffer_size)
 			break;
 
 		/* spill because we filled our rx buffer */
@@ -325,12 +345,12 @@ spill:
 
 		switch (wsi->u.ws.opcode) {
 		case LWSWSOPC_CLOSE:
-			pp = (unsigned char *)&wsi->u.ws.rx_user_buffer[
-						LWS_SEND_BUFFER_PRE_PADDING];
+			pp = (unsigned char *)&wsi->u.ws.rx_ubuf[
+						LWS_PRE];
 			if (wsi->context->options & LWS_SERVER_OPTION_VALIDATE_UTF8 &&
-			    wsi->u.ws.rx_user_buffer_head > 2 &&
+			    wsi->u.ws.rx_ubuf_head > 2 &&
 			    lws_check_utf8(&wsi->u.ws.utf8, pp + 2,
-					   wsi->u.ws.rx_user_buffer_head - 2))
+					   wsi->u.ws.rx_ubuf_head - 2))
 				goto utf8_fail;
 
 			/* is this an acknowledgement of our close? */
@@ -344,8 +364,8 @@ spill:
 			}
 
 			lwsl_parser("client sees server close len = %d\n",
-						 wsi->u.ws.rx_user_buffer_head);
-			if (wsi->u.ws.rx_user_buffer_head >= 2) {
+						 wsi->u.ws.rx_ubuf_head);
+			if (wsi->u.ws.rx_ubuf_head >= 2) {
 				close_code = (pp[0] << 8) | pp[1];
 				if (close_code < 1000 || close_code == 1004 ||
 				    close_code == 1005 || close_code == 1006 ||
@@ -361,7 +381,7 @@ spill:
 					wsi->protocol->callback, wsi,
 					LWS_CALLBACK_WS_PEER_INITIATED_CLOSE,
 					wsi->user_space, pp,
-					wsi->u.ws.rx_user_buffer_head))
+					wsi->u.ws.rx_ubuf_head))
 				return -1;
 			/*
 			 * parrot the close packet payload back
@@ -369,16 +389,16 @@ spill:
 			 * immediately afterwards
 			 */
 			lws_write(wsi, (unsigned char *)
-			   &wsi->u.ws.rx_user_buffer[
-				LWS_SEND_BUFFER_PRE_PADDING],
-				wsi->u.ws.rx_user_buffer_head, LWS_WRITE_CLOSE);
+			   &wsi->u.ws.rx_ubuf[
+				LWS_PRE],
+				wsi->u.ws.rx_ubuf_head, LWS_WRITE_CLOSE);
 			wsi->state = LWSS_RETURNED_CLOSE_ALREADY;
 			/* close the connection */
 			return -1;
 
 		case LWSWSOPC_PING:
 			lwsl_info("received %d byte ping, sending pong\n",
-				  wsi->u.ws.rx_user_buffer_head);
+				  wsi->u.ws.rx_ubuf_head);
 
 			/* he set a close reason on this guy, ignore PING */
 			if (wsi->u.ws.close_in_ping_buffer_len)
@@ -394,30 +414,30 @@ spill:
 			}
 
 			/* control packets can only be < 128 bytes long */
-			if (wsi->u.ws.rx_user_buffer_head > 128 - 3) {
+			if (wsi->u.ws.rx_ubuf_head > 128 - 3) {
 				lwsl_parser("DROP PING payload too large\n");
 				goto ping_drop;
 			}
 
 			/* stash the pong payload */
-			memcpy(wsi->u.ws.ping_payload_buf + LWS_SEND_BUFFER_PRE_PADDING,
-			       &wsi->u.ws.rx_user_buffer[LWS_SEND_BUFFER_PRE_PADDING],
-				wsi->u.ws.rx_user_buffer_head);
+			memcpy(wsi->u.ws.ping_payload_buf + LWS_PRE,
+			       &wsi->u.ws.rx_ubuf[LWS_PRE],
+				wsi->u.ws.rx_ubuf_head);
 
-			wsi->u.ws.ping_payload_len = wsi->u.ws.rx_user_buffer_head;
+			wsi->u.ws.ping_payload_len = wsi->u.ws.rx_ubuf_head;
 			wsi->u.ws.ping_pending_flag = 1;
 
 			/* get it sent as soon as possible */
 			lws_callback_on_writable(wsi);
 ping_drop:
-			wsi->u.ws.rx_user_buffer_head = 0;
+			wsi->u.ws.rx_ubuf_head = 0;
 			handled = 1;
 			break;
 
 		case LWSWSOPC_PONG:
 			lwsl_info("client receied pong\n");
-			lwsl_hexdump(&wsi->u.ws.rx_user_buffer[LWS_SEND_BUFFER_PRE_PADDING],
-				     wsi->u.ws.rx_user_buffer_head);
+			lwsl_hexdump(&wsi->u.ws.rx_ubuf[LWS_PRE],
+				     wsi->u.ws.rx_ubuf_head);
 
 			/* issue it */
 			callback_action = LWS_CALLBACK_CLIENT_RECEIVE_PONG;
@@ -438,17 +458,17 @@ ping_drop:
 			 * state machine.
 			 */
 
-			eff_buf.token = &wsi->u.ws.rx_user_buffer[
-						   LWS_SEND_BUFFER_PRE_PADDING];
-			eff_buf.token_len = wsi->u.ws.rx_user_buffer_head;
+			eff_buf.token = &wsi->u.ws.rx_ubuf[
+						   LWS_PRE];
+			eff_buf.token_len = wsi->u.ws.rx_ubuf_head;
 
-			if (lws_ext_cb_wsi_active_exts(wsi,
-				LWS_EXT_CALLBACK_EXTENDED_PAYLOAD_RX,
+			if (lws_ext_cb_active(wsi,
+				LWS_EXT_CB_EXTENDED_PAYLOAD_RX,
 					&eff_buf, 0) <= 0) { /* not handle or fail */
 
 				lwsl_ext("Unhandled ext opc 0x%x\n",
 					 wsi->u.ws.opcode);
-				wsi->u.ws.rx_user_buffer_head = 0;
+				wsi->u.ws.rx_ubuf_head = 0;
 
 				return 0;
 			}
@@ -464,14 +484,22 @@ ping_drop:
 		if (handled)
 			goto already_done;
 
-		eff_buf.token = &wsi->u.ws.rx_user_buffer[
-						LWS_SEND_BUFFER_PRE_PADDING];
-		eff_buf.token_len = wsi->u.ws.rx_user_buffer_head;
+		eff_buf.token = &wsi->u.ws.rx_ubuf[LWS_PRE];
+		eff_buf.token_len = wsi->u.ws.rx_ubuf_head;
 
-		if (lws_ext_cb_wsi_active_exts(wsi, LWS_EXT_CALLBACK_PAYLOAD_RX,
-					       &eff_buf, 0) < 0) /* fail */
+drain_extension:
+		n = lws_ext_cb_active(wsi, LWS_EXT_CB_PAYLOAD_RX, &eff_buf, 0);
+		lwsl_ext("Ext RX returned %d\n", n);
+		if (n < 0) /* fail */
 			return -1;
 
+		lwsl_ext("post inflate eff_buf len %d\n", eff_buf.token_len);
+
+		if (rx_draining_ext && !eff_buf.token_len) {
+			lwsl_err("   --- ignoring zero drain result, ending drain\n");
+			goto already_done;
+		}
+
 		if (wsi->u.ws.check_utf8 && !wsi->u.ws.defeat_check_utf8) {
 			if (lws_check_utf8(&wsi->u.ws.utf8,
 					   (unsigned char *)eff_buf.token,
@@ -479,7 +507,8 @@ ping_drop:
 				goto utf8_fail;
 
 			/* we are ending partway through utf-8 character? */
-			if (wsi->u.ws.final && wsi->u.ws.utf8) {
+			if (!wsi->u.ws.rx_packet_length && wsi->u.ws.final && wsi->u.ws.utf8 && !n) {
+				lwsl_info("FINAL utf8 error\n");
 utf8_fail:			lwsl_info("utf8 error\n");
 				return -1;
 			}
@@ -489,8 +518,10 @@ utf8_fail:			lwsl_info("utf8 error\n");
 		    callback_action != LWS_CALLBACK_CLIENT_RECEIVE_PONG)
 			goto already_done;
 
-		if (eff_buf.token)
-			eff_buf.token[eff_buf.token_len] = '\0';
+		if (!eff_buf.token)
+			goto already_done;
+
+		eff_buf.token[eff_buf.token_len] = '\0';
 
 		if (!wsi->protocol->callback)
 			goto already_done;
@@ -498,6 +529,21 @@ utf8_fail:			lwsl_info("utf8 error\n");
 		if (callback_action == LWS_CALLBACK_CLIENT_RECEIVE_PONG)
 			lwsl_info("Client doing pong callback\n");
 
+		if (n && eff_buf.token_len) {
+			/* extension had more... main loop will come back
+			 * we want callback to be done with this set, if so,
+			 * because lws_is_final() hides it was final until the
+			 * last chunk
+			 */
+			wsi->u.ws.rx_draining_ext = 1;
+			wsi->u.ws.rx_draining_ext_list = wsi->context->rx_draining_ext_list;
+			wsi->context->rx_draining_ext_list = wsi;
+			lwsl_ext("%s: RX EXT DRAINING: Adding to list\n", __func__);
+		}
+		if (wsi->state == LWSS_RETURNED_CLOSE_ALREADY ||
+		    wsi->state == LWSS_AWAITING_CLOSE_ACK)
+			goto already_done;
+
 		m = wsi->protocol->callback(wsi,
 			(enum lws_callback_reasons)callback_action,
 			wsi->user_space, eff_buf.token, eff_buf.token_len);
@@ -507,7 +553,7 @@ utf8_fail:			lwsl_info("utf8 error\n");
 			return 1;
 
 already_done:
-		wsi->u.ws.rx_user_buffer_head = 0;
+		wsi->u.ws.rx_ubuf_head = 0;
 		break;
 	default:
 		lwsl_err("client rx illegal state\n");
diff --git a/lib/client.c b/lib/client.c
index dbb2c3672b00350973be93119cfbd0d114d9969a..35d3f7a91e431b1d5a0233b8a5aaa4365410aaa2 100644
--- a/lib/client.c
+++ b/lib/client.c
@@ -23,9 +23,7 @@
 
 int lws_handshake_client(struct lws *wsi, unsigned char **buf, size_t len)
 {
-	unsigned int n;
-
-	lwsl_debug("%s: len %u\n", __func__, len);
+	int m;
 
 	switch (wsi->mode) {
 	case LWSCM_WSCL_WAITING_PROXY_REPLY:
@@ -33,24 +31,30 @@ int lws_handshake_client(struct lws *wsi, unsigned char **buf, size_t len)
 	case LWSCM_WSCL_WAITING_SERVER_REPLY:
 	case LWSCM_WSCL_WAITING_EXTENSION_CONNECT:
 	case LWSCM_WS_CLIENT:
-		for (n = 0; n < len; n++) {
+		while (len) {
 			/*
 			 * we were accepting input but now we stopped doing so
 			 */
 			if (!(wsi->rxflow_change_to & LWS_RXFLOW_ALLOW)) {
-				lwsl_debug("%s: caching %d\n", __func__, len - n);
-				lws_rxflow_cache(wsi, *buf, 0, len - n);
+				lwsl_debug("%s: caching %d\n", __func__, len);
+				lws_rxflow_cache(wsi, *buf, 0, len);
 				return 0;
 			}
-
+			if (wsi->u.ws.rx_draining_ext) {
+				m = lws_rx_sm(wsi, 0);
+				if (m < 0)
+					return -1;
+				continue;
+			}
 			/* account for what we're using in rxflow buffer */
 			if (wsi->rxflow_buffer)
 				wsi->rxflow_pos++;
 
 			if (lws_client_rx_sm(wsi, *(*buf)++)) {
-				lwsl_debug("client_rx_sm failed\n");
-				return 1;
+				lwsl_debug("client_rx_sm exited\n");
+				return -1;
 			}
+			len--;
 		}
 		lwsl_debug("%s: finished with %d\n", __func__, len);
 		return 0;
@@ -58,8 +62,6 @@ int lws_handshake_client(struct lws *wsi, unsigned char **buf, size_t len)
 		break;
 	}
 
-	lwsl_debug("%s: did nothing\n", __func__);
-
 	return 0;
 }
 
@@ -79,7 +81,7 @@ int lws_client_socket_service(struct lws_context *context,
 		 * timeout protection set in client-handshake.c
 		 */
 
-               if (lws_client_connect_2(wsi) == NULL) {
+		if (!lws_client_connect_2(wsi)) {
 			/* closed */
 			lwsl_client("closed\n");
 			return -1;
@@ -512,8 +514,9 @@ lws_client_interpret_server_handshake(struct lws *wsi)
 	char *p;
 #ifndef LWS_NO_EXTENSIONS
 	const struct lws_extension *ext;
+	const struct lws_ext_options *opts;
 	char ext_name[128];
-	const char *c;
+	const char *c, *a;
 	char ignore;
 	int more = 1;
 	void *v;
@@ -650,15 +653,20 @@ check_extensions:
 	c = (char *)context->serv_buf;
 	n = 0;
 	ignore = 0;
+	a = NULL;
 	while (more) {
 
-		if (*c && (*c != ',' && *c != ' ' && *c != '\t')) {
-			if (*c == ';')
+		if (*c && (*c != ',' && *c != '\t')) {
+			if (*c == ';') {
 				ignore = 1;
-			if (ignore) {
+				if (!a)
+					a = c + 1;
+			}
+			if (ignore || *c == ' ') {
 				c++;
 				continue;
 			}
+
 			ext_name[n] = *c++;
 			if (n < sizeof(ext_name) - 1)
 				n++;
@@ -676,7 +684,7 @@ check_extensions:
 
 		/* check we actually support it */
 
-		lwsl_ext("checking client ext %s\n", ext_name);
+		lwsl_notice("checking client ext %s\n", ext_name);
 
 		n = 0;
 		ext = lws_get_context(wsi)->extensions;
@@ -687,30 +695,56 @@ check_extensions:
 			}
 
 			n = 1;
-			lwsl_ext("instantiating client ext %s\n", ext_name);
+			lwsl_notice("instantiating client ext %s\n", ext_name);
 
 			/* instantiate the extension on this conn */
 
-			wsi->active_extensions_user[
-				wsi->count_active_extensions] =
-					 lws_zalloc(ext->per_session_data_size);
-			if (wsi->active_extensions_user[
-				wsi->count_active_extensions] == NULL) {
-				lwsl_err("Out of mem\n");
+			wsi->active_extensions[wsi->count_act_ext] = ext;
+
+			/* allow him to construct his ext instance */
+
+			ext->callback(lws_get_context(wsi), ext, wsi,
+				      LWS_EXT_CB_CLIENT_CONSTRUCT,
+				      (void *)&wsi->act_ext_user[wsi->count_act_ext],
+				      (void *)&opts, 0);
+
+			/*
+			 * allow the user code to override ext defaults if it
+			 * wants to
+			 */
+			ext_name[0] = '\0';
+			if (user_callback_handle_rxflow(wsi->protocol->callback,
+					wsi, LWS_CALLBACK_WS_EXT_DEFAULTS,
+					(char *)ext->name, ext_name,
+					sizeof(ext_name)))
+				goto bail2;
+
+			if (ext_name[0] &&
+			    lws_ext_parse_options(ext, wsi, wsi->act_ext_user[
+						  wsi->count_act_ext], opts, ext_name,
+						  strlen(ext_name))) {
+				lwsl_err("%s: unable to parse user defaults '%s'",
+					 __func__, ext_name);
 				goto bail2;
 			}
-			wsi->active_extensions[
-				  wsi->count_active_extensions] = ext;
 
-			/* allow him to construct his context */
+			/*
+			 * give the extension the server options
+			 */
+			if (a && lws_ext_parse_options(ext, wsi, wsi->act_ext_user[wsi->count_act_ext], opts, a, c - a)) {
+				lwsl_err("%s: unable to parse remote defaults '%s'", __func__, a);
+				goto bail2;
+			}
 
-			ext->callback(lws_get_context(wsi), ext, wsi,
-				      LWS_EXT_CALLBACK_CLIENT_CONSTRUCT,
-				      wsi->active_extensions_user[
-					 wsi->count_active_extensions],
-				      NULL, 0);
+			if (ext->callback(lws_get_context(wsi), ext, wsi,
+					LWS_EXT_CB_OPTION_CONFIRM,
+				      wsi->act_ext_user[wsi->count_act_ext],
+				      NULL, 0)) {
+				lwsl_err("%s: ext %s rejects server options %s", ext->name, a);
+				goto bail2;
+			}
 
-			wsi->count_active_extensions++;
+			wsi->count_act_ext++;
 
 			ext++;
 		}
@@ -720,6 +754,7 @@ check_extensions:
 			goto bail2;
 		}
 
+		a = NULL;
 		n = 0;
 	}
 
@@ -772,9 +807,9 @@ check_accept:
 	n = wsi->protocol->rx_buffer_size;
 	if (!n)
 		n = LWS_MAX_SOCKET_IO_BUF;
-	n += LWS_SEND_BUFFER_PRE_PADDING;
-	wsi->u.ws.rx_user_buffer = lws_malloc(n);
-	if (!wsi->u.ws.rx_user_buffer) {
+	n += LWS_PRE;
+	wsi->u.ws.rx_ubuf = lws_malloc(n + 4 /* 0x0000ffff zlib */);
+	if (!wsi->u.ws.rx_ubuf) {
 		lwsl_err("Out of Mem allocating rx buffer %d\n", n);
 		goto bail2;
 	}
@@ -803,12 +838,12 @@ check_accept:
 
 	while (ext && ext->callback) {
 		v = NULL;
-		for (n = 0; n < wsi->count_active_extensions; n++)
+		for (n = 0; n < wsi->count_act_ext; n++)
 			if (wsi->active_extensions[n] == ext)
-				v = wsi->active_extensions_user[n];
+				v = wsi->act_ext_user[n];
 
 		ext->callback(context, ext, wsi,
-			  LWS_EXT_CALLBACK_ANY_WSI_ESTABLISHED, v, NULL, 0);
+			  LWS_EXT_CB_ANY_WSI_ESTABLISHED, v, NULL, 0);
 		ext++;
 	}
 #endif
@@ -816,7 +851,7 @@ check_accept:
 	return 0;
 
 bail3:
-	lws_free_set_NULL(wsi->u.ws.rx_user_buffer);
+	lws_free_set_NULL(wsi->u.ws.rx_ubuf);
 	close_reason = LWS_CLOSE_STATUS_NOSTATUS;
 
 bail2:
@@ -923,7 +958,7 @@ lws_generate_client_handshake(struct lws *wsi, char *pkt)
 	while (ext && ext->callback) {
 
 		n = lws_ext_cb_all_exts(context, wsi,
-			   LWS_EXT_CALLBACK_CHECK_OK_TO_PROPOSE_EXTENSION,
+			   LWS_EXT_CB_CHECK_OK_TO_PROPOSE_EXTENSION,
 			   (char *)ext->name, 0);
 		if (n) { /* an extension vetos us */
 			lwsl_ext("ext %s vetoed\n", (char *)ext->name);
@@ -951,7 +986,7 @@ lws_generate_client_handshake(struct lws *wsi, char *pkt)
 
 		if (ext_count)
 			*p++ = ',';
-		p += sprintf(p, "%s", ext->name);
+		p += sprintf(p, "%s", ext->client_offer);
 		ext_count++;
 
 		ext++;
diff --git a/lib/context.c b/lib/context.c
index 436216a7dd949ffc3dbb9cd4e59017e62c3a7720..113901b3f101f659de38eceb6b2a43ac40d6af0b 100644
--- a/lib/context.c
+++ b/lib/context.c
@@ -262,11 +262,11 @@ lws_create_context(struct lws_context_creation_info *info)
 	 */
 	if (info->port != CONTEXT_PORT_NO_LISTEN) {
 		if (lws_ext_cb_all_exts(context, NULL,
-			LWS_EXT_CALLBACK_SERVER_CONTEXT_CONSTRUCT, NULL, 0) < 0)
+			LWS_EXT_CB_SERVER_CONTEXT_CONSTRUCT, NULL, 0) < 0)
 			goto bail;
 	} else
 		if (lws_ext_cb_all_exts(context, NULL,
-			LWS_EXT_CALLBACK_CLIENT_CONTEXT_CONSTRUCT, NULL, 0) < 0)
+			LWS_EXT_CB_CLIENT_CONTEXT_CONSTRUCT, NULL, 0) < 0)
 			goto bail;
 
 	return context;
@@ -319,10 +319,10 @@ lws_context_destroy(struct lws_context *context)
 	 */
 
 	n = lws_ext_cb_all_exts(context, NULL,
-			LWS_EXT_CALLBACK_SERVER_CONTEXT_DESTRUCT, NULL, 0);
+			LWS_EXT_CB_SERVER_CONTEXT_DESTRUCT, NULL, 0);
 
 	n = lws_ext_cb_all_exts(context, NULL,
-			LWS_EXT_CALLBACK_CLIENT_CONTEXT_DESTRUCT, NULL, 0);
+			LWS_EXT_CB_CLIENT_CONTEXT_DESTRUCT, NULL, 0);
 
 	/*
 	 * inform all the protocols that they are done and will have no more
diff --git a/lib/extension-deflate-frame.c b/lib/extension-deflate-frame.c
deleted file mode 100644
index eebc053be12a3345713d18557da9b589ba8b9d8f..0000000000000000000000000000000000000000
--- a/lib/extension-deflate-frame.c
+++ /dev/null
@@ -1,284 +0,0 @@
-#include "private-libwebsockets.h"
-#include "extension-deflate-frame.h"
-#include <stdio.h>
-#include <string.h>
-#include <assert.h>
-
-#define LWS_ZLIB_WINDOW_BITS 15
-#define LWS_ZLIB_MEMLEVEL 8
-
-int lws_extension_callback_deflate_frame(
-		struct lws_context *context,
-		const struct lws_extension *ext,
-		struct lws *wsi,
-		enum lws_extension_callback_reasons reason,
-		void *user, void *in, size_t len)
-{
-	struct lws_ext_deflate_frame_conn *conn =
-				     (struct lws_ext_deflate_frame_conn *)user;
-	struct lws_tokens *eff_buf = (struct lws_tokens *)in;
-	size_t current_payload, remaining_payload, total_payload;
-	int n;
-	size_t len_so_far;
-
-	switch (reason) {
-
-	/*
-	 * for deflate-frame, both client and server sides act the same
-	 */
-
-	case LWS_EXT_CALLBACK_CLIENT_CONSTRUCT:
-	case LWS_EXT_CALLBACK_CONSTRUCT:
-		conn->zs_in.zalloc = conn->zs_out.zalloc = Z_NULL;
-		conn->zs_in.zfree = conn->zs_out.zfree = Z_NULL;
-		conn->zs_in.opaque = conn->zs_out.opaque = Z_NULL;
-		n = inflateInit2(&conn->zs_in, -LWS_ZLIB_WINDOW_BITS);
-		if (n != Z_OK) {
-			lwsl_ext("deflateInit returned %d\n", n);
-			return 1;
-		}
-		n = deflateInit2(&conn->zs_out,
-				 (context->listen_port ?
-					DEFLATE_FRAME_COMPRESSION_LEVEL_SERVER :
-					DEFLATE_FRAME_COMPRESSION_LEVEL_CLIENT),
-				 Z_DEFLATED,
-				 -LWS_ZLIB_WINDOW_BITS, LWS_ZLIB_MEMLEVEL,
-				 Z_DEFAULT_STRATEGY);
-		if (n != Z_OK) {
-			lwsl_ext("deflateInit2 returned %d\n", n);
-			return 1;
-		}
-		conn->buf_pre_used = 0;
-		conn->buf_pre_length = 0;
-		conn->buf_in_length = sizeof(conn->buf_in);
-		conn->buf_out_length = sizeof(conn->buf_out);
-		conn->compressed_out = 0;
-		conn->buf_pre = NULL;
-		conn->buf_in = lws_malloc(LWS_SEND_BUFFER_PRE_PADDING +
-					  conn->buf_in_length);
-		if (!conn->buf_in)
-			goto bail;
-		conn->buf_out = lws_malloc(LWS_SEND_BUFFER_PRE_PADDING +
-					   conn->buf_out_length);
-		if (!conn->buf_out)
-			goto bail;
-		lwsl_ext("zlibs constructed\n");
-		break;
-bail:
-		lwsl_err("Out of mem\n");
-		(void)inflateEnd(&conn->zs_in);
-		(void)deflateEnd(&conn->zs_out);
-		return -1;
-
-	case LWS_EXT_CALLBACK_DESTROY:
-		lws_free(conn->buf_pre);
-		lws_free(conn->buf_in);
-		lws_free(conn->buf_out);
-		conn->buf_pre_used = 0;
-		conn->buf_pre_length = 0;
-		conn->buf_in_length = 0;
-		conn->buf_out_length = 0;
-		conn->compressed_out = 0;
-		(void)inflateEnd(&conn->zs_in);
-		(void)deflateEnd(&conn->zs_out);
-		lwsl_ext("zlibs destructed\n");
-		break;
-
-	case LWS_EXT_CALLBACK_PAYLOAD_RX:
-		if (!(wsi->u.ws.rsv & 0x40))
-			return 0;
-
-		/*
-		 * inflate the incoming payload
-		 */
-		current_payload = eff_buf->token_len;
-
-		remaining_payload = wsi->u.ws.rx_packet_length;
-		if (remaining_payload) {
-			total_payload = conn->buf_pre_used +
-					current_payload +
-					remaining_payload;
-
-			if (conn->buf_pre_length < total_payload) {
-				conn->buf_pre_length = total_payload;
-				lws_free(conn->buf_pre);
-				conn->buf_pre = lws_malloc(total_payload + 4);
-				if (!conn->buf_pre) {
-					lwsl_err("Out of memory\n");
-					return -1;
-				}
-			}
-
-			memcpy(conn->buf_pre + conn->buf_pre_used,
-					      eff_buf->token, current_payload);
-			conn->buf_pre_used += current_payload;
-
-			eff_buf->token = NULL;
-			eff_buf->token_len = 0;
-
-			return 0;
-		}
-		if (conn->buf_pre_used) {
-			total_payload = conn->buf_pre_used +
-					current_payload;
-
-			memcpy(conn->buf_pre + conn->buf_pre_used,
-					      eff_buf->token, current_payload);
-			conn->buf_pre_used = 0;
-
-			conn->zs_in.next_in = conn->buf_pre;
-		} else {
-			total_payload = current_payload;
-
-			conn->zs_in.next_in = (unsigned char *)eff_buf->token;
-		}
-
-		conn->zs_in.next_in[total_payload + 0] = 0;
-		conn->zs_in.next_in[total_payload + 1] = 0;
-		conn->zs_in.next_in[total_payload + 2] = 0xff;
-		conn->zs_in.next_in[total_payload + 3] = 0xff;
-
-		conn->zs_in.avail_in = total_payload + 4;
-
-		conn->zs_in.next_out =
-				conn->buf_in + LWS_SEND_BUFFER_PRE_PADDING;
-		conn->zs_in.avail_out = conn->buf_in_length;
-
-		while (1) {
-			n = inflate(&conn->zs_in, Z_SYNC_FLUSH);
-			switch (n) {
-			case Z_NEED_DICT:
-			case Z_STREAM_ERROR:
-			case Z_DATA_ERROR:
-			case Z_MEM_ERROR:
-				/*
-				 * screwed.. close the connection...
-				 * we will get a destroy callback to take care
-				 * of closing nicely
-				 */
-				lwsl_info("zlib error inflate %d: %s\n",
-							   n, conn->zs_in.msg);
-				return -1;
-			}
-
-			if (conn->zs_in.avail_out)
-				break;
-
-			len_so_far = conn->zs_in.next_out -
-				(conn->buf_in + LWS_SEND_BUFFER_PRE_PADDING);
-
-			conn->buf_in_length *= 2;
-			if (conn->buf_in_length > LWS_MAX_ZLIB_CONN_BUFFER) {
-				lwsl_ext("zlib in buffer hit limit %u\n",
-						LWS_MAX_ZLIB_CONN_BUFFER);
-				return -1;
-			}
-			conn->buf_in = lws_realloc(conn->buf_in,
-						   LWS_SEND_BUFFER_PRE_PADDING +
-						   conn->buf_in_length);
-			if (!conn->buf_in) {
-				lwsl_err("Out of memory\n");
-				return -1;
-			}
-			lwsl_debug(
-				"deflate-frame ext RX did realloc to %ld\n",
-					conn->buf_in_length);
-			conn->zs_in.next_out = conn->buf_in +
-				LWS_SEND_BUFFER_PRE_PADDING + len_so_far;
-			conn->zs_in.avail_out =
-					conn->buf_in_length - len_so_far;
-		}
-
-		/* rewrite the buffer pointers and length */
-		eff_buf->token =
-			(char *)(conn->buf_in + LWS_SEND_BUFFER_PRE_PADDING);
-		eff_buf->token_len = (int)(conn->zs_in.next_out -
-				 (conn->buf_in + LWS_SEND_BUFFER_PRE_PADDING));
-
-		return 0;
-
-	case LWS_EXT_CALLBACK_PAYLOAD_TX:
-		/*
-		 * deflate the outgoing payload
-		 */
-		current_payload = eff_buf->token_len;
-
-		conn->zs_out.next_in = (unsigned char *)eff_buf->token;
-		conn->zs_out.avail_in = current_payload;
-
-		conn->zs_out.next_out =
-				conn->buf_out + LWS_SEND_BUFFER_PRE_PADDING;
-		conn->zs_out.avail_out = conn->buf_out_length;
-
-		while (1) {
-			n = deflate(&conn->zs_out, Z_SYNC_FLUSH);
-			if (n == Z_STREAM_ERROR) {
-				/*
-				 * screwed.. close the connection... we will
-				 * get a destroy callback to take care of
-				 * closing nicely
-				 */
-				lwsl_ext("zlib error deflate\n");
-
-				return -1;
-			}
-
-			if (conn->zs_out.avail_out)
-				break;
-
-			len_so_far = (conn->zs_out.next_out -
-					(conn->buf_out +
-						 LWS_SEND_BUFFER_PRE_PADDING));
-			conn->buf_out_length *= 2;
-			if (conn->buf_out_length > LWS_MAX_ZLIB_CONN_BUFFER) {
-				lwsl_ext("zlib out hit limit %u\n",
-						LWS_MAX_ZLIB_CONN_BUFFER);
-				return -1;
-			}
-			conn->buf_out = lws_realloc(conn->buf_out,
-						    LWS_SEND_BUFFER_PRE_PADDING +
-						    conn->buf_out_length);
-			if (!conn->buf_out) {
-				lwsl_err("Out of memory\n");
-				return -1;
-			}
-			lwsl_debug(
-				"deflate-frame ext TX did realloc to %ld\n",
-					conn->buf_out_length);
-
-			conn->zs_out.next_out = (conn->buf_out +
-				     LWS_SEND_BUFFER_PRE_PADDING + len_so_far);
-			conn->zs_out.avail_out =
-					   (conn->buf_out_length - len_so_far);
-		}
-
-		conn->compressed_out = 1;
-
-		/* rewrite the buffer pointers and length */
-		eff_buf->token = (char *)(conn->buf_out +
-						LWS_SEND_BUFFER_PRE_PADDING);
-		eff_buf->token_len = (int)(conn->zs_out.next_out -
-			    (conn->buf_out + LWS_SEND_BUFFER_PRE_PADDING)) - 4;
-
-		return 0;
-
-	case LWS_EXT_CALLBACK_PACKET_TX_PRESEND:
-		if (conn->compressed_out) {
-			conn->compressed_out = 0;
-			*((unsigned char *)eff_buf->token) |= 0x40;
-		}
-		break;
-
-	case LWS_EXT_CALLBACK_CHECK_OK_TO_PROPOSE_EXTENSION:
-		/* Avoid x-webkit-deflate-frame extension on client */
-		if (!strcmp((char *)in, "x-webkit-deflate-frame"))
-			return 1;
-		break;
-
-	default:
-		break;
-	}
-
-	return 0;
-}
-
diff --git a/lib/extension-deflate-frame.h b/lib/extension-deflate-frame.h
deleted file mode 100644
index 5e644283462e0b2cbe392ae351209d878a3eeded..0000000000000000000000000000000000000000
--- a/lib/extension-deflate-frame.h
+++ /dev/null
@@ -1,23 +0,0 @@
-
-#include <zlib.h>
-
-#define DEFLATE_FRAME_COMPRESSION_LEVEL_SERVER 1
-#define DEFLATE_FRAME_COMPRESSION_LEVEL_CLIENT Z_DEFAULT_COMPRESSION
-
-struct lws_ext_deflate_frame_conn {
-	z_stream zs_in;
-	z_stream zs_out;
-	size_t buf_pre_used;
-	size_t buf_pre_length;
-	size_t buf_in_length;
-	size_t buf_out_length;
-	int compressed_out;
-	unsigned char *buf_pre;
-	unsigned char *buf_in;
-	unsigned char *buf_out;
-};
-
-extern int lws_extension_callback_deflate_frame(
-	struct lws_context *context, const struct lws_extension *ext,
-	struct lws *wsi, enum lws_extension_callback_reasons reason,
-	void *user, void *in, size_t len);
diff --git a/lib/extension-deflate-stream.c b/lib/extension-deflate-stream.c
deleted file mode 100644
index d1b4a7eb1e335bbead8aea0a48eaf64cdcfed913..0000000000000000000000000000000000000000
--- a/lib/extension-deflate-stream.c
+++ /dev/null
@@ -1,166 +0,0 @@
-#include "private-libwebsockets.h"
-#include "extension-deflate-stream.h"
-#include <stdio.h>
-#include <string.h>
-#include <assert.h>
-
-#define LWS_ZLIB_WINDOW_BITS 15
-#define LWS_ZLIB_MEMLEVEL 8
-
-int
-lws_extension_callback_deflate_stream(struct lws_context *context,
-				      const struct lws_extension *ext,
-				      struct lws *wsi,
-				      enum lws_extension_callback_reasons reason,
-				      void *user, void *in, size_t len)
-{
-	struct lws_ext_deflate_stream_conn *conn =
-				     (struct lws_ext_deflate_stream_conn *)user;
-	int n;
-	struct lws_tokens *eff_buf = (struct lws_tokens *)in;
-
-	switch (reason) {
-
-	/*
-	 * for deflate-stream, both client and server sides act the same
-	 */
-
-	case LWS_EXT_CALLBACK_CLIENT_CONSTRUCT:
-	case LWS_EXT_CALLBACK_CONSTRUCT:
-		conn->zs_in.zalloc = conn->zs_out.zalloc = Z_NULL;
-		conn->zs_in.zfree = conn->zs_out.zfree = Z_NULL;
-		conn->zs_in.opaque = conn->zs_out.opaque = Z_NULL;
-		n = inflateInit2(&conn->zs_in, -LWS_ZLIB_WINDOW_BITS);
-		if (n != Z_OK) {
-			lwsl_err("deflateInit returned %d\n", n);
-			return 1;
-		}
-		n = deflateInit2(&conn->zs_out,
-				 DEFLATE_STREAM_COMPRESSION_LEVEL, Z_DEFLATED,
-				 -LWS_ZLIB_WINDOW_BITS, LWS_ZLIB_MEMLEVEL,
-				 Z_DEFAULT_STRATEGY);
-		if (n != Z_OK) {
-			lwsl_err("deflateInit returned %d\n", n);
-			return 1;
-		}
-		lwsl_ext("zlibs constructed\n");
-		conn->remaining_in = 0;
-		break;
-
-	case LWS_EXT_CALLBACK_DESTROY:
-		(void)inflateEnd(&conn->zs_in);
-		(void)deflateEnd(&conn->zs_out);
-		lwsl_ext("zlibs destructed\n");
-		break;
-
-	case LWS_EXT_CALLBACK_PACKET_RX_PREPARSE:
-
-		/*
-		 * inflate the incoming compressed data
-		 * Notice, length may be 0 and pointer NULL
-		 * in the case we are flushing with nothing new coming in
-		 */
-		if (conn->remaining_in) {
-			conn->zs_in.next_in = conn->buf_in;
-			conn->zs_in.avail_in = conn->remaining_in;
-			conn->remaining_in = 0;
-		} else {
-			conn->zs_in.next_in = (unsigned char *)eff_buf->token;
-			conn->zs_in.avail_in = eff_buf->token_len;
-		}
-
-		conn->zs_in.next_out = conn->buf_out;
-		conn->zs_in.avail_out = sizeof(conn->buf_out);
-
-		n = inflate(&conn->zs_in, Z_SYNC_FLUSH);
-		switch (n) {
-		case Z_NEED_DICT:
-		case Z_DATA_ERROR:
-		case Z_MEM_ERROR:
-			/*
-			 * screwed.. close the connection... we will get a
-			 * destroy callback to take care of closing nicely
-			 */
-			lwsl_err("zlib error inflate %d\n", n);
-			return -1;
-		}
-
-		/* rewrite the buffer pointers and length */
-
-		eff_buf->token = (char *)conn->buf_out;
-		eff_buf->token_len =
-			sizeof(conn->buf_out) - conn->zs_in.avail_out;
-
-		/* copy avail data if not consumed */
-		if (conn->zs_in.avail_in > 0) {
-			conn->remaining_in = conn->zs_in.avail_in;
-			memcpy(conn->buf_in, conn->zs_in.next_in,
-							conn->zs_in.avail_in);
-			return 1;
-		}
-
-		/*
-		 * if we filled the output buffer, signal that we likely have
-		 * more and need to be called again
-		 */
-
-		if (eff_buf->token_len == sizeof(conn->buf_out))
-			return 1;
-
-		/* we don't need calling again until new input data comes */
-
-		return 0;
-
-	case LWS_EXT_CALLBACK_FLUSH_PENDING_TX:
-	case LWS_EXT_CALLBACK_PACKET_TX_PRESEND:
-
-		/*
-		 * deflate the outgoing compressed data
-		 */
-
-		conn->zs_out.next_in = (unsigned char *)eff_buf->token;
-		conn->zs_out.avail_in = eff_buf->token_len;
-
-		conn->zs_out.next_out = conn->buf_out;
-		conn->zs_out.avail_out = sizeof(conn->buf_out);
-
-		n = Z_PARTIAL_FLUSH;
-		if (reason == LWS_EXT_CALLBACK_FLUSH_PENDING_TX)
-			n = Z_FULL_FLUSH;
-
-		n = deflate(&conn->zs_out, n);
-		if (n == Z_STREAM_ERROR) {
-			/*
-			 * screwed.. close the connection... we will get a
-			 * destroy callback to take care of closing nicely
-			 */
-			lwsl_ext("zlib error deflate\n");
-
-			return -1;
-		}
-
-		/* rewrite the buffer pointers and length */
-
-		eff_buf->token = (char *)conn->buf_out;
-		eff_buf->token_len =
-				sizeof(conn->buf_out) - conn->zs_out.avail_out;
-
-		/*
-		 * if we filled the output buffer, signal that we likely have
-		 * more and need to be called again... even in deflate case
-		 * we might sometimes need to spill more than came in
-		 */
-
-		if (eff_buf->token_len == sizeof(conn->buf_out))
-			return 1;
-
-		/* we don't need calling again until new input data comes */
-
-		return 0;
-
-	default:
-		break;
-	}
-
-	return 0;
-}
diff --git a/lib/extension-deflate-stream.h b/lib/extension-deflate-stream.h
deleted file mode 100644
index bbb844d57df43f19ba89a17e3b37973fa874caaf..0000000000000000000000000000000000000000
--- a/lib/extension-deflate-stream.h
+++ /dev/null
@@ -1,18 +0,0 @@
-
-#include <zlib.h>
-
-#define DEFLATE_STREAM_CHUNK 128
-#define DEFLATE_STREAM_COMPRESSION_LEVEL 1
-
-struct lws_ext_deflate_stream_conn {
-	z_stream zs_in;
-	z_stream zs_out;
-	int remaining_in;
-	unsigned char buf_in[LWS_MAX_SOCKET_IO_BUF];
-	unsigned char buf_out[LWS_MAX_SOCKET_IO_BUF];
-};
-
-extern int lws_extension_callback_deflate_stream(
-	struct lws_context *context, const struct lws_extension *ext,
-	struct lws *wsi, enum lws_extension_callback_reasons reason,
-	void *user, void *in, size_t len);
diff --git a/lib/extension-permessage-deflate.c b/lib/extension-permessage-deflate.c
new file mode 100644
index 0000000000000000000000000000000000000000..f63ddd23e9b5ec3e5f44303e6bc3b84554d8287a
--- /dev/null
+++ b/lib/extension-permessage-deflate.c
@@ -0,0 +1,411 @@
+/*
+ * ./lib/extension-permessage-deflate.c
+ *
+ *  Copyright (C) 2016 Andy Green <andy@warmcat.com>
+ *
+ *  This library is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU Lesser General Public
+ *  License as published by the Free Software Foundation:
+ *  version 2.1 of the License.
+ *
+ *  This library is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *  Lesser General Public License for more details.
+ *
+ *  You should have received a copy of the GNU Lesser General Public
+ *  License along with this library; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ *  MA  02110-1301  USA
+ */
+
+#include "private-libwebsockets.h"
+#include "extension-permessage-deflate.h"
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+
+#define LWS_ZLIB_MEMLEVEL 8
+
+const struct lws_ext_options lws_ext_pm_deflate_options[] = {
+	/* public RFC7692 settings */
+	{ "server_no_context_takeover", EXTARG_NONE },
+	{ "client_no_context_takeover", EXTARG_NONE },
+	{ "server_max_window_bits",	EXTARG_OPT_DEC },
+	{ "client_max_window_bits",	EXTARG_OPT_DEC },
+	/* ones only user code can set */
+	{ "rx_buf_size",		EXTARG_DEC },
+	{ "tx_buf_size",		EXTARG_DEC },
+	{ "compression_level",		EXTARG_DEC },
+	{ "mem_level",			EXTARG_DEC },
+	{ NULL, 0 }, /* sentinel */
+};
+
+int
+lws_extension_callback_pm_deflate(struct lws_context *context,
+				  const struct lws_extension *ext,
+				  struct lws *wsi,
+				  enum lws_extension_callback_reasons reason,
+				  void *user, void *in, size_t len)
+{
+	struct lws_ext_pm_deflate_priv *priv =
+				     (struct lws_ext_pm_deflate_priv *)user;
+	struct lws_tokens *eff_buf = (struct lws_tokens *)in;
+	static unsigned char trail[] = { 0, 0, 0xff, 0xff };
+	int n, ret = 0, was_fin = 0, extra;
+	struct lws_ext_option_arg *oa;
+
+	switch (reason) {
+	case LWS_EXT_CB_OPTION_SET:
+		oa = in;
+		lwsl_info("%s: option set: idx %d, %s, len %d\n", __func__, oa->option_index, oa->start, oa->len);
+		if (oa->start)
+			priv->args[oa->option_index] = atoi(oa->start);
+		else
+			priv->args[oa->option_index] = 1;
+		break;
+	case LWS_EXT_CB_OPTION_CONFIRM:
+		if (priv->args[PMD_SERVER_MAX_WINDOW_BITS] < 8 ||
+		    priv->args[PMD_SERVER_MAX_WINDOW_BITS] > 15 ||
+		    priv->args[PMD_CLIENT_MAX_WINDOW_BITS] < 8 ||
+		    priv->args[PMD_CLIENT_MAX_WINDOW_BITS] > 15)
+			return -1;
+		break;
+
+	case LWS_EXT_CB_CLIENT_CONSTRUCT:
+	case LWS_EXT_CB_CONSTRUCT:
+		/* fill in **user */
+		priv = lws_zalloc(sizeof(*priv));
+		*((void **)user) = priv;
+		lwsl_ext("%s: LWS_EXT_CB_*CONSTRUCT\n", __func__);
+		memset(priv, 0, sizeof(*priv));
+
+		/* fill in pointer to options list */
+		if (in)
+			*((const struct lws_ext_options **)in) = lws_ext_pm_deflate_options;
+
+		/* fallthru */
+
+	case LWS_EXT_CB_OPTION_DEFAULT:
+
+		/* set the public, RFC7692 defaults... */
+
+		priv->args[PMD_SERVER_NO_CONTEXT_TAKEOVER] = 0,
+		priv->args[PMD_CLIENT_NO_CONTEXT_TAKEOVER] = 0;
+		priv->args[PMD_SERVER_MAX_WINDOW_BITS] = 15;
+		priv->args[PMD_CLIENT_MAX_WINDOW_BITS] = 15;
+
+		/* ...and the ones the user code can override */
+
+		priv->args[PMD_RX_BUF_PWR2] = 10; /* ie, 1024 */
+		priv->args[PMD_TX_BUF_PWR2] = 10; /* ie, 1024 */
+		priv->args[PMD_COMP_LEVEL] = 1;
+		priv->args[PMD_MEM_LEVEL] = 8;
+		break;
+
+	case LWS_EXT_CB_DESTROY:
+		lwsl_ext("%s: LWS_EXT_CB_DESTROY\n", __func__);
+		lws_free(priv->buf_rx_inflated);
+		lws_free(priv->buf_tx_deflated);
+		if (priv->rx_init)
+			(void)inflateEnd(&priv->rx);
+		if (priv->tx_init)
+			(void)deflateEnd(&priv->tx);
+		lws_free(priv);
+		return ret;
+
+	case LWS_EXT_CB_PAYLOAD_RX:
+		lwsl_ext(" %s: LWS_EXT_CB_PAYLOAD_RX: in %d, existing in %d\n",
+			 __func__, eff_buf->token_len, priv->rx.avail_in);
+		if (!(wsi->u.ws.rsv_first_msg & 0x40))
+			return 0;
+
+#if 0
+		for (n = 0; n < eff_buf->token_len; n++) {
+			printf("%02X ", (unsigned char)eff_buf->token[n]);
+			if ((n & 15) == 15)
+				printf("\n");
+		}
+		printf("\n");
+#endif
+		if (!priv->rx_init)
+			if (inflateInit2(&priv->rx, -priv->args[PMD_SERVER_MAX_WINDOW_BITS]) != Z_OK) {
+				lwsl_err("%s: iniflateInit failed\n", __func__);
+				return -1;
+			}
+		priv->rx_init = 1;
+		if (!priv->buf_rx_inflated)
+			priv->buf_rx_inflated = lws_malloc(LWS_PRE + 7 + 5 +
+					    (1 << priv->args[PMD_RX_BUF_PWR2]));
+		if (!priv->buf_rx_inflated) {
+			lwsl_err("%s: OOM\n", __func__);
+			return -1;
+		}
+
+		/*
+		 * We have to leave the input stream alone if we didn't
+		 * finish with it yet.  The input stream is held in the wsi
+		 * rx buffer by the caller, so this assumption is safe while
+		 * we block new rx while draining the existing rx
+		 */
+		if (eff_buf->token && eff_buf->token_len) {
+			priv->rx.next_in = (unsigned char *)eff_buf->token;
+			priv->rx.avail_in = eff_buf->token_len;
+		}
+		priv->rx.next_out = priv->buf_rx_inflated + LWS_PRE;
+		eff_buf->token = (char *)priv->rx.next_out;
+		priv->rx.avail_out = 1 << priv->args[PMD_RX_BUF_PWR2];
+
+		if (priv->rx_held_valid) {
+			lwsl_ext("-- RX piling on held byte --\n");
+			*(priv->rx.next_out++) = priv->rx_held;
+			priv->rx.avail_out--;
+			priv->rx_held_valid = 0;
+		}
+
+		/* if...
+		 *
+		 *  - he has no remaining input content for this message, and
+		 *  - and this is the final fragment, and
+		 *  - we used everything that could be drained on the input side
+		 *
+		 * ...then put back the 00 00 FF FF the sender stripped as our
+		 * input to zlib
+		 */
+		if (!priv->rx.avail_in && wsi->u.ws.final &&
+		    !wsi->u.ws.rx_packet_length) {
+			lwsl_ext("RX APPEND_TRAILER-DO\n");
+			was_fin = 1;
+			priv->rx.next_in = trail;
+			priv->rx.avail_in = sizeof(trail);
+		}
+
+		n = inflate(&priv->rx, Z_NO_FLUSH);
+		lwsl_ext("inflate ret %d, avi %d, avo %d, wsifinal %d\n", n,
+			 priv->rx.avail_in, priv->rx.avail_out, wsi->u.ws.final);
+		switch (n) {
+		case Z_NEED_DICT:
+		case Z_STREAM_ERROR:
+		case Z_DATA_ERROR:
+		case Z_MEM_ERROR:
+			lwsl_info("zlib error inflate %d: %s\n",
+				  n, priv->rx.msg);
+			return -1;
+		}
+		/*
+		 * If we did not already send in the 00 00 FF FF, and he's
+		 * out of input, he did not EXACTLY fill the output buffer
+		 * (which is ambiguous and we will force it to go around
+		 * again by witholding a byte), and he's otherwise working on
+		 * being a FIN fragment, then do the FIN message processing
+		 * of faking up the 00 00 FF FF that the sender stripped.
+		 */
+		if (!priv->rx.avail_in && wsi->u.ws.final &&
+		    !wsi->u.ws.rx_packet_length && !was_fin &&
+		    priv->rx.avail_out /* ambiguous as to if it is the end */
+		) {
+			lwsl_ext("RX APPEND_TRAILER-DO\n");
+			was_fin = 1;
+			priv->rx.next_in = trail;
+			priv->rx.avail_in = sizeof(trail);
+			n = inflate(&priv->rx, Z_SYNC_FLUSH);
+			lwsl_ext("RX trailer inf returned %d, avi %d, avo %d\n", n,
+				 priv->rx.avail_in, priv->rx.avail_out);
+			switch (n) {
+			case Z_NEED_DICT:
+			case Z_STREAM_ERROR:
+			case Z_DATA_ERROR:
+			case Z_MEM_ERROR:
+				lwsl_info("zlib error inflate %d: %s\n",
+					  n, priv->rx.msg);
+				return -1;
+			}
+		}
+		/*
+		 * we must announce in our returncode now if there is more
+		 * output to be expected from inflate, so we can decide to
+		 * set the FIN bit on this bufferload or not.  However zlib
+		 * is ambiguous when we exactly filled the inflate buffer.  It
+		 * does not give us a clue as to whether we should understand
+		 * that to mean he ended on a buffer boundary, or if there is
+		 * more in the pipeline.
+		 *
+		 * So to work around that safely, if it used all output space
+		 * exactly, we ALWAYS say there is more coming and we withold
+		 * the last byte of the buffer to guarantee that is true.
+		 *
+		 * That still leaves us at least one byte to finish with a FIN
+		 * on, even if actually nothing more is coming from the next
+		 * inflate action itself.
+		 */
+		if (!priv->rx.avail_out) { /* he used all available out buf */
+			lwsl_ext("-- rx grabbing held --\n");
+			/* snip the last byte and hold it for next time */
+			priv->rx_held = *(--priv->rx.next_out);
+			priv->rx_held_valid = 1;
+		}
+
+		eff_buf->token_len = (char *)priv->rx.next_out - eff_buf->token;
+		priv->count_rx_between_fin += eff_buf->token_len;
+
+		lwsl_ext("  %s: RX leaving with new effbuff len %d, "
+			 "ret %d, rx.avail_in=%d, TOTAL RX since FIN %d\n",
+			 __func__, eff_buf->token_len, priv->rx_held_valid,
+			 priv->rx.avail_in, priv->count_rx_between_fin);
+
+		if (was_fin) {
+			priv->count_rx_between_fin = 0;
+			if (priv->args[PMD_SERVER_NO_CONTEXT_TAKEOVER]) {
+				(void)inflateEnd(&priv->rx);
+				priv->rx_init = 0;
+			}
+		}
+#if 0
+		for (n = 0; n < eff_buf->token_len; n++)
+			putchar(eff_buf->token[n]);
+		puts("\n");
+#endif
+
+		return priv->rx_held_valid;
+
+	case LWS_EXT_CB_PAYLOAD_TX:
+
+		if (!priv->tx_init)
+			if (deflateInit2(&priv->tx, priv->args[PMD_COMP_LEVEL],
+					 Z_DEFLATED,
+					 -priv->args[PMD_CLIENT_MAX_WINDOW_BITS +
+						     !wsi->context->listen_port],
+					 priv->args[PMD_MEM_LEVEL],
+					 Z_DEFAULT_STRATEGY) != Z_OK) {
+				lwsl_ext("inflateInit2 failed\n");
+				return 1;
+			}
+		priv->tx_init = 1;
+		if (!priv->buf_tx_deflated)
+			priv->buf_tx_deflated = lws_malloc(LWS_PRE + 7 + 5 +
+					    (1 << priv->args[PMD_TX_BUF_PWR2]));
+		if (!priv->buf_tx_deflated) {
+			lwsl_err("%s: OOM\n", __func__);
+			return -1;
+		}
+
+		if (eff_buf->token) {
+			lwsl_ext("%s: TX: eff_buf length %d\n", __func__,
+				 eff_buf->token_len);
+			priv->tx.next_in = (unsigned char *)eff_buf->token;
+			priv->tx.avail_in = eff_buf->token_len;
+		}
+
+#if 0
+	for (n = 0; n < eff_buf->token_len; n++) {
+		printf("%02X ", (unsigned char)eff_buf->token[n]);
+		if ((n & 15) == 15)
+			printf("\n");
+	}
+	printf("\n");
+#endif
+
+		priv->tx.next_out = priv->buf_tx_deflated + LWS_PRE + 5;
+		eff_buf->token = (char *)priv->tx.next_out;
+		priv->tx.avail_out = 1 << priv->args[PMD_TX_BUF_PWR2];
+
+		n = deflate(&priv->tx, Z_SYNC_FLUSH);
+		if (n == Z_STREAM_ERROR) {
+			lwsl_ext("%s: Z_STREAM_ERROR\n", __func__);
+			return -1;
+		}
+
+		if (priv->tx_held_valid) {
+			priv->tx_held_valid = 0;
+			if (priv->tx.avail_out == 1 << priv->args[PMD_TX_BUF_PWR2])
+				/*
+				 * we can get a situation he took something in
+				 * but did not generate anything out, at the end
+				 * of a message (eg, next thing he sends is 80
+				 * 00, a zero length FIN, like Authobahn can
+				 * send).
+				 * If we have come back as a FIN, we must not
+				 * place the pending trailer 00 00 FF FF, just
+				 * the 1 byte of live data
+				 */
+				*(--eff_buf->token) = priv->tx_held[0];
+			else {
+				/* he generated data, prepend whole pending */
+				eff_buf->token -= 5;
+				for (n = 0; n < 5; n++)
+					eff_buf->token[n] = priv->tx_held[n];
+
+			}
+		}
+		priv->compressed_out = 1;
+		eff_buf->token_len = (int)(priv->tx.next_out -
+					   (unsigned char *)eff_buf->token);
+
+		/*
+		 * we must announce in our returncode now if there is more
+		 * output to be expected from inflate, so we can decide to
+		 * set the FIN bit on this bufferload or not.  However zlib
+		 * is ambiguous when we exactly filled the inflate buffer.  It
+		 * does not give us a clue as to whether we should understand
+		 * that to mean he ended on a buffer boundary, or if there is
+		 * more in the pipeline.
+		 *
+		 * Worse, the guy providing the stuff we are sending may not
+		 * know until after that this was, actually, the last chunk,
+		 * that can happen even if we did not fill the output buf, ie
+		 * he may send after this a zero-length FIN fragment.
+		 *
+		 * This is super difficult because we must snip the last 4
+		 * bytes in the case this is the last compressed output of the
+		 * message.  The only way to deal with it is defer sending the
+		 * last 5 bytes of each frame until the next one, when we will
+		 * be in a position to understand if that has a FIN or not.
+		 */
+
+		extra = !!(len & LWS_WRITE_NO_FIN) || !priv->tx.avail_out;
+
+		if (eff_buf->token_len >= 4 + extra) {
+			lwsl_ext("tx held %d\n", 4 + extra);
+			priv->tx_held_valid = extra;
+			for (n = 3 + extra; n >= 0; n--)
+				priv->tx_held[n] = *(--priv->tx.next_out);
+			eff_buf->token_len -= 4 + extra;
+		}
+		lwsl_ext("  TX rewritten with new effbuff len %d, ret %d\n",
+			 eff_buf->token_len, !priv->tx.avail_out);
+
+		return !priv->tx.avail_out; /* 1 == have more tx pending */
+
+	case LWS_EXT_CB_PACKET_TX_PRESEND:
+		if (!priv->compressed_out)
+			break;
+		priv->compressed_out = 0;
+
+		if ((*(eff_buf->token) & 0x80) && priv->args[PMD_CLIENT_NO_CONTEXT_TAKEOVER]) {
+			(void)deflateEnd(&priv->tx);
+			priv->tx_init = 0;
+		}
+
+		n = *(eff_buf->token) & 15;
+		/* set RSV1, but not on CONTINUATION */
+		if (n == LWSWSOPC_TEXT_FRAME || n == LWSWSOPC_BINARY_FRAME)
+			*eff_buf->token |= 0x40;
+#if 0
+		for (n = 0; n < eff_buf->token_len; n++) {
+			printf("%02X ", (unsigned char)eff_buf->token[n]);
+			if ((n & 15) == 15)
+				puts("\n");
+		}
+		puts("\n");
+#endif
+		lwsl_ext("%s: tx opcode 0x%02X\n", __func__,
+			 (unsigned char)*eff_buf->token);
+		break;
+
+	default:
+		break;
+	}
+
+	return 0;
+}
+
diff --git a/lib/extension-permessage-deflate.h b/lib/extension-permessage-deflate.h
new file mode 100644
index 0000000000000000000000000000000000000000..87377368974daa5a0e0287cea57104c63fb770a3
--- /dev/null
+++ b/lib/extension-permessage-deflate.h
@@ -0,0 +1,41 @@
+
+#include <zlib.h>
+
+#define DEFLATE_FRAME_COMPRESSION_LEVEL_SERVER 1
+#define DEFLATE_FRAME_COMPRESSION_LEVEL_CLIENT Z_DEFAULT_COMPRESSION
+
+enum arg_indexes {
+	PMD_SERVER_NO_CONTEXT_TAKEOVER,
+	PMD_CLIENT_NO_CONTEXT_TAKEOVER,
+	PMD_SERVER_MAX_WINDOW_BITS,
+	PMD_CLIENT_MAX_WINDOW_BITS,
+	PMD_RX_BUF_PWR2,
+	PMD_TX_BUF_PWR2,
+	PMD_COMP_LEVEL,
+	PMD_MEM_LEVEL,
+
+	PMD_ARG_COUNT
+};
+
+struct lws_ext_pm_deflate_priv {
+	z_stream rx;
+	z_stream tx;
+
+	unsigned char *buf_rx_inflated; /* RX inflated output buffer */
+	unsigned char *buf_tx_deflated; /* TX deflated output buffer */
+
+	size_t count_rx_between_fin;
+
+	unsigned char args[PMD_ARG_COUNT];
+	unsigned char tx_held[5];
+	unsigned char rx_held;
+
+	unsigned char tx_init:1;
+	unsigned char rx_init:1;
+	unsigned char compressed_out:1;
+	unsigned char rx_held_valid:1;
+	unsigned char tx_held_valid:1;
+	unsigned char rx_append_trailer:1;
+	unsigned char pending_tx_trailer:1;
+};
+
diff --git a/lib/extension.c b/lib/extension.c
index 6e4797925c724f825d8d822fa3d5cde287e859b1..48682995973028f60731097690fa5e2b7b01396c 100644
--- a/lib/extension.c
+++ b/lib/extension.c
@@ -1,31 +1,6 @@
 #include "private-libwebsockets.h"
 
-#include "extension-deflate-frame.h"
-#include "extension-deflate-stream.h"
-
-struct lws_extension lws_internal_extensions[] = {
-#ifdef LWS_EXT_DEFLATE_STREAM
-	{
-		"deflate-stream",
-		lws_extension_callback_deflate_stream,
-		sizeof(struct lws_ext_deflate_stream_conn)
-	},
-#else
-	{
-		"x-webkit-deflate-frame",
-		lws_extension_callback_deflate_frame,
-		sizeof(struct lws_ext_deflate_frame_conn)
-	},
-	{
-		"deflate-frame",
-		lws_extension_callback_deflate_frame,
-		sizeof(struct lws_ext_deflate_frame_conn)
-	},
-#endif
-	{ /* terminator */
-		NULL, NULL, 0
-	}
-};
+#include "extension-permessage-deflate.h"
 
 LWS_VISIBLE void
 lws_context_init_extensions(struct lws_context_creation_info *info,
@@ -35,28 +10,164 @@ lws_context_init_extensions(struct lws_context_creation_info *info,
 	lwsl_info(" LWS_MAX_EXTENSIONS_ACTIVE: %u\n", LWS_MAX_EXTENSIONS_ACTIVE);
 }
 
-LWS_VISIBLE struct lws_extension *lws_get_internal_extensions()
+
+enum lws_ext_option_parser_states {
+	LEAPS_SEEK_NAME,
+	LEAPS_EAT_NAME,
+	LEAPS_SEEK_VAL,
+	LEAPS_EAT_DEC,
+	LEAPS_SEEK_ARG_TERM
+};
+
+LWS_VISIBLE int
+lws_ext_parse_options(const struct lws_extension *ext, struct lws *wsi,
+		       void *ext_user, const struct lws_ext_options *opts, const char *in, int len)
 {
-	return lws_internal_extensions;
+	enum lws_ext_option_parser_states leap = LEAPS_SEEK_NAME;
+	unsigned int match_map = 0, n, m, w = 0, count_options = 0,
+		     pending_close_quote = 0;
+	struct lws_ext_option_arg oa;
+
+	while (opts[count_options].name)
+		count_options++;
+	while (len) {
+		lwsl_ext("'%c'", *in);
+		switch (leap) {
+		case LEAPS_SEEK_NAME:
+			if (*in == ' ')
+				break;
+			if (*in == ',') {
+				len = 1;
+				break;
+			}
+			match_map = (1 << count_options) - 1;
+			leap = LEAPS_EAT_NAME;
+			w = 0;
+
+		/* fallthru */
+
+		case LEAPS_EAT_NAME:
+			oa.start = NULL;
+			oa.len = 0;
+			m = match_map;
+			n = 0;
+			w = 0;
+			pending_close_quote = 0;
+			while (m) {
+				if (m & 1) {
+					// lwsl_ext("    m=%d, n=%d\n", m, n);
+
+					if (*in == opts[n].name[w]) {
+						if (!opts[n].name[w + 1]) {
+							oa.option_index = n;
+							lwsl_ext("hit %d\n", oa.option_index);
+							leap = LEAPS_SEEK_VAL;
+							if (len ==1)
+								goto set_arg;
+							break;
+						}
+					} else {
+						match_map &= ~(1 << n);
+						if (!match_map)
+							return -1;
+					}
+				}
+				m >>= 1;
+				n++;
+			}
+			w++;
+			break;
+		case LEAPS_SEEK_VAL:
+			if (*in == ' ')
+				break;
+			if (*in == ',') {
+				len = 1;
+				break;
+			}
+			if (*in == ';' || len == 1) { /* ie,nonoptional */
+				if (opts[oa.option_index].type == EXTARG_DEC)
+					return -1;
+				leap = LEAPS_SEEK_NAME;
+				goto set_arg;
+			}
+			if (*in == '=') {
+				w = 0;
+				pending_close_quote = 0;
+				if (opts[oa.option_index].type == EXTARG_NONE)
+					return -1;
+
+				leap = LEAPS_EAT_DEC;
+				break;
+			}
+			return -1;
+
+		case LEAPS_EAT_DEC:
+			if (*in >= '0' && *in <= '9') {
+				if (!w)
+					oa.start = in;
+				w++;
+				if (len != 1)
+					break;
+			}
+			if (!w && *in =='"') {
+				pending_close_quote = 1;
+				break;
+			}
+			if (!w)
+				return -1;
+			if (pending_close_quote && *in != '"' && len != 1)
+				return -1;
+			leap = LEAPS_SEEK_ARG_TERM;
+			if (oa.start)
+				oa.len = in - oa.start;
+			if (len == 1)
+				oa.len++;
+
+set_arg:
+			m = ext->callback(lws_get_context(wsi),
+				ext, wsi, LWS_EXT_CB_OPTION_SET,
+				ext_user, (char *)&oa, 0);
+			if (len == 1)
+				break;
+			if (pending_close_quote && *in == '"')
+				break;
+
+			/* fallthru */
+
+		case LEAPS_SEEK_ARG_TERM:
+			if (*in == ' ')
+				break;
+			if (*in == ';') {
+				leap = LEAPS_SEEK_NAME;
+				break;
+			}
+			if (*in == ',') {
+				len = 1;
+				break;
+			}
+			return -1;
+		}
+
+		len--;
+		in++;
+	}
+
+	return 0;
 }
 
 
 /* 0 = nobody had nonzero return, 1 = somebody had positive return, -1 = fail */
 
-int lws_ext_cb_wsi_active_exts(struct lws *wsi, int reason, void *arg, int len)
+int lws_ext_cb_active(struct lws *wsi, int reason, void *arg, int len)
 {
 	int n, m, handled = 0;
 
-	for (n = 0; n < wsi->count_active_extensions; n++) {
-		m = wsi->active_extensions[n]->callback(
-			lws_get_context(wsi),
-			wsi->active_extensions[n], wsi,
-			reason,
-			wsi->active_extensions_user[n],
-			arg, len);
+	for (n = 0; n < wsi->count_act_ext; n++) {
+		m = wsi->active_extensions[n]->callback(lws_get_context(wsi),
+			wsi->active_extensions[n], wsi, reason,
+			wsi->act_ext_user[n], arg, len);
 		if (m < 0) {
-			lwsl_ext(
-			 "Extension '%s' failed to handle callback %d!\n",
+			lwsl_ext("Ext '%s' failed to handle callback %d!\n",
 				      wsi->active_extensions[n]->name, reason);
 			return -1;
 		}
@@ -77,9 +188,8 @@ int lws_ext_cb_all_exts(struct lws_context *context, struct lws *wsi,
 		m = ext->callback(context, ext, wsi, reason,
 				  (void *)(long)n, arg, len);
 		if (m < 0) {
-			lwsl_ext(
-			 "Extension '%s' failed to handle callback %d!\n",
-				      wsi->active_extensions[n]->name, reason);
+			lwsl_ext("Ext '%s' failed to handle callback %d!\n",
+				 wsi->active_extensions[n]->name, reason);
 			return -1;
 		}
 		if (m)
@@ -116,8 +226,8 @@ lws_issue_raw_ext_access(struct lws *wsi, unsigned char *buf, size_t len)
 		ret = 0;
 
 		/* show every extension the new incoming data */
-		m = lws_ext_cb_wsi_active_exts(wsi,
-			       LWS_EXT_CALLBACK_PACKET_TX_PRESEND, &eff_buf, 0);
+		m = lws_ext_cb_active(wsi,
+			       LWS_EXT_CB_PACKET_TX_PRESEND, &eff_buf, 0);
 		if (m < 0)
 			return -1;
 		if (m) /* handled */
@@ -191,13 +301,13 @@ lws_any_extension_handled(struct lws *wsi,
 
 	/* maybe an extension will take care of it for us */
 
-	for (n = 0; n < wsi->count_active_extensions && !handled; n++) {
+	for (n = 0; n < wsi->count_act_ext && !handled; n++) {
 		if (!wsi->active_extensions[n]->callback)
 			continue;
 
 		handled |= wsi->active_extensions[n]->callback(context,
 			wsi->active_extensions[n], wsi,
-			r, wsi->active_extensions_user[n], v, len);
+			r, wsi->act_ext_user[n], v, len);
 	}
 
 	return handled;
diff --git a/lib/handshake.c b/lib/handshake.c
index 00b543aefc1ccdba71f9dcb219fba38c77f4d759..e51305632e60109ca420f52e81a7e6bc5f3b9624 100644
--- a/lib/handshake.c
+++ b/lib/handshake.c
@@ -64,6 +64,8 @@ lws_read(struct lws *wsi, unsigned char *buf, size_t len)
 	int body_chunk_len;
 	size_t n;
 
+	lwsl_debug("%s: incoming len %d\n", __func__, (int)len);
+
 	switch (wsi->state) {
 #ifdef LWS_USE_HTTP2
 	case LWSS_HTTP2_AWAIT_CLIENT_PREFACE:
@@ -185,7 +187,7 @@ postbody_completion:
 		switch (wsi->mode) {
 		case LWSCM_WS_SERVING:
 
-			if (lws_interpret_incoming_packet(wsi, buf, len) < 0) {
+			if (lws_interpret_incoming_packet(wsi, &buf, len) < 0) {
 				lwsl_info("interpret_incoming_packet has bailed\n");
 				goto bail;
 			}
diff --git a/lib/header.c b/lib/header.c
index e0be8a404713f2576e5b677475bb5f9cac23e0c6..8f71a2894ed293205e5419160e2f8e0f169e28fe 100644
--- a/lib/header.c
+++ b/lib/header.c
@@ -178,10 +178,10 @@ lws_return_http_status(struct lws *wsi, unsigned int code, const char *html_body
 	int n, m;
 	struct lws_context *context = lws_get_context(wsi);
 	unsigned char *p = context->serv_buf +
-			   LWS_SEND_BUFFER_PRE_PADDING;
+			   LWS_PRE;
 	unsigned char *start = p;
 	unsigned char *end = p + sizeof(context->serv_buf) -
-			     LWS_SEND_BUFFER_PRE_PADDING;
+			     LWS_PRE;
 
 	if (!html_body)
 		html_body = "";
diff --git a/lib/http2.c b/lib/http2.c
index 76211de15fcdf041e2a4b6c04fddac50d5808f13..a2aa3f920311d456d5fa3ecb8def44796449881b 100644
--- a/lib/http2.c
+++ b/lib/http2.c
@@ -425,7 +425,7 @@ update_end_headers:
 
 int lws_http2_do_pps_send(struct lws_context *context, struct lws *wsi)
 {
-	unsigned char settings[LWS_SEND_BUFFER_PRE_PADDING + 6 * LWS_HTTP2_SETTINGS__COUNT];
+	unsigned char settings[LWS_PRE + 6 * LWS_HTTP2_SETTINGS__COUNT];
 	struct lws *swsi;
 	int n, m = 0;
 
@@ -436,12 +436,12 @@ int lws_http2_do_pps_send(struct lws_context *context, struct lws *wsi)
 		for (n = 1; n < LWS_HTTP2_SETTINGS__COUNT; n++)
 			if (wsi->u.http2.my_settings.setting[n] != lws_http2_default_settings.setting[n]) {
 				lws_http2_settings_write(wsi, n,
-							 &settings[LWS_SEND_BUFFER_PRE_PADDING + m]);
+							 &settings[LWS_PRE + m]);
 				m += sizeof(wsi->u.http2.one_setting);
 			}
 		n = lws_http2_frame_write(wsi, LWS_HTTP2_FRAME_TYPE_SETTINGS,
 		     			  0, LWS_HTTP2_STREAM_ID_MASTER, m,
-		     			  &settings[LWS_SEND_BUFFER_PRE_PADDING]);
+		     			  &settings[LWS_PRE]);
 		if (n != m) {
 			lwsl_info("send %d %d\n", n, m);
 			return 1;
@@ -451,7 +451,7 @@ int lws_http2_do_pps_send(struct lws_context *context, struct lws *wsi)
 		/* send ack ... always empty */
 		n = lws_http2_frame_write(wsi, LWS_HTTP2_FRAME_TYPE_SETTINGS,
 			1, LWS_HTTP2_STREAM_ID_MASTER, 0,
-			&settings[LWS_SEND_BUFFER_PRE_PADDING]);
+			&settings[LWS_PRE]);
 		if (n) {
 			lwsl_err("ack tells %d\n", n);
 			return 1;
@@ -490,11 +490,11 @@ int lws_http2_do_pps_send(struct lws_context *context, struct lws *wsi)
 		}
 		break;
 	case LWS_PPS_HTTP2_PONG:
-		memcpy(&settings[LWS_SEND_BUFFER_PRE_PADDING], wsi->u.http2.ping_payload, 8);
+		memcpy(&settings[LWS_PRE], wsi->u.http2.ping_payload, 8);
 		n = lws_http2_frame_write(wsi, LWS_HTTP2_FRAME_TYPE_PING,
 		     			  LWS_HTTP2_FLAG_SETTINGS_ACK,
 			    		  LWS_HTTP2_STREAM_ID_MASTER, 8,
-		     			  &settings[LWS_SEND_BUFFER_PRE_PADDING]);
+		     			  &settings[LWS_PRE]);
 		if (n != 8) {
 			lwsl_info("send %d %d\n", n, m);
 			return 1;
diff --git a/lib/libwebsockets.c b/lib/libwebsockets.c
index 3f4e1190216d1d4c2dbdbf554a700ce7f21afa26..b80a657377e00c70837130307d737d709941663b 100644
--- a/lib/libwebsockets.c
+++ b/lib/libwebsockets.c
@@ -130,8 +130,8 @@ lws_close_free_wsi(struct lws *wsi, enum lws_close_status reason)
 	 * parent and just his ch1 aspect is closing?
 	 */
 
-	if (lws_ext_cb_wsi_active_exts(wsi,
-		      LWS_EXT_CALLBACK_CHECK_OK_TO_REALLY_CLOSE, NULL, 0) > 0) {
+	if (lws_ext_cb_active(wsi,
+		      LWS_EXT_CB_CHECK_OK_TO_REALLY_CLOSE, NULL, 0) > 0) {
 		lwsl_ext("extension vetoed close\n");
 		return;
 	}
@@ -148,8 +148,8 @@ lws_close_free_wsi(struct lws *wsi, enum lws_close_status reason)
 
 		/* show every extension the new incoming data */
 
-		m = lws_ext_cb_wsi_active_exts(wsi,
-			  LWS_EXT_CALLBACK_FLUSH_PENDING_TX, &eff_buf, 0);
+		m = lws_ext_cb_active(wsi,
+			  LWS_EXT_CB_FLUSH_PENDING_TX, &eff_buf, 0);
 		if (m < 0) {
 			lwsl_ext("Extension reports fatal error\n");
 			goto just_kill_connection;
@@ -193,14 +193,14 @@ lws_close_free_wsi(struct lws *wsi, enum lws_close_status reason)
 		/* if no prepared close reason, use 1000 and no aux data */
 		if (!wsi->u.ws.close_in_ping_buffer_len) {
 			wsi->u.ws.close_in_ping_buffer_len = 2;
-			wsi->u.ws.ping_payload_buf[LWS_SEND_BUFFER_PRE_PADDING] =
+			wsi->u.ws.ping_payload_buf[LWS_PRE] =
 				(reason >> 16) & 0xff;
-			wsi->u.ws.ping_payload_buf[LWS_SEND_BUFFER_PRE_PADDING + 1] =
+			wsi->u.ws.ping_payload_buf[LWS_PRE + 1] =
 				reason & 0xff;
 		}
 
 		n = lws_write(wsi, &wsi->u.ws.ping_payload_buf[
-						LWS_SEND_BUFFER_PRE_PADDING],
+						LWS_PRE],
 			      wsi->u.ws.close_in_ping_buffer_len,
 			      LWS_WRITE_CLOSE);
 		if (n >= 0) {
@@ -227,7 +227,7 @@ lws_close_free_wsi(struct lws *wsi, enum lws_close_status reason)
 
 just_kill_connection:
 
-	lwsl_debug("close: just_kill_connection\n");
+	lwsl_debug("close: just_kill_connection: %p\n", wsi);
 
 	/*
 	 * we won't be servicing or receiving anything further from this guy
@@ -246,7 +246,36 @@ just_kill_connection:
 	    wsi->mode == LWSCM_WS_SERVING ||
 	    wsi->mode == LWSCM_WS_CLIENT) {
 
-		lws_free_set_NULL(wsi->u.ws.rx_user_buffer);
+		if (wsi->u.ws.rx_draining_ext) {
+			struct lws **w = &wsi->context->rx_draining_ext_list;
+
+			wsi->u.ws.rx_draining_ext = 0;
+			/* remove us from context draining ext list */
+			while (*w) {
+				if (*w == wsi) {
+					*w = wsi->u.ws.rx_draining_ext_list;
+					break;
+				}
+				w = &((*w)->u.ws.rx_draining_ext_list);
+			}
+			wsi->u.ws.rx_draining_ext_list = NULL;
+		}
+
+		if (wsi->u.ws.tx_draining_ext) {
+			struct lws **w = &wsi->context->tx_draining_ext_list;
+
+			wsi->u.ws.tx_draining_ext = 0;
+			/* remove us from context draining ext list */
+			while (*w) {
+				if (*w == wsi) {
+					*w = wsi->u.ws.tx_draining_ext_list;
+					break;
+				}
+				w = &((*w)->u.ws.tx_draining_ext_list);
+			}
+			wsi->u.ws.tx_draining_ext_list = NULL;
+		}
+		lws_free_set_NULL(wsi->u.ws.rx_ubuf);
 
 		if (wsi->trunc_alloc)
 			/* not going to be completed... nuke it */
@@ -282,18 +311,14 @@ just_kill_connection:
 
 	/* deallocate any active extension contexts */
 
-	if (lws_ext_cb_wsi_active_exts(wsi, LWS_EXT_CALLBACK_DESTROY, NULL, 0) < 0)
+	if (lws_ext_cb_active(wsi, LWS_EXT_CB_DESTROY, NULL, 0) < 0)
 		lwsl_warn("extension destruction failed\n");
-#ifndef LWS_NO_EXTENSIONS
-	for (n = 0; n < wsi->count_active_extensions; n++)
-		lws_free(wsi->active_extensions_user[n]);
-#endif
 	/*
 	 * inform all extensions in case they tracked this guy out of band
 	 * even though not active on him specifically
 	 */
 	if (lws_ext_cb_all_exts(context, wsi,
-		       LWS_EXT_CALLBACK_DESTROY_ANY_WSI_CLOSING, NULL, 0) < 0)
+		       LWS_EXT_CB_DESTROY_ANY_WSI_CLOSING, NULL, 0) < 0)
 		lwsl_warn("ext destroy wsi failed\n");
 
 	if (!lws_ssl_close(wsi) && lws_socket_is_valid(wsi->sock)) {
@@ -763,7 +788,9 @@ lws_get_protocol(struct lws *wsi)
 LWS_VISIBLE int
 lws_is_final_fragment(struct lws *wsi)
 {
-	return wsi->u.ws.final && !wsi->u.ws.rx_packet_length;
+	lwsl_info("%s: final %d, rx pk length %d, draining %d", __func__,
+			wsi->u.ws.final, wsi->u.ws.rx_packet_length, wsi->u.ws.rx_draining_ext);
+	return wsi->u.ws.final && !wsi->u.ws.rx_packet_length && !wsi->u.ws.rx_draining_ext;
 }
 
 LWS_VISIBLE unsigned char
@@ -959,11 +986,11 @@ lws_close_reason(struct lws *wsi, enum lws_close_status status,
 {
 	unsigned char *p, *start;
 	int budget = sizeof(wsi->u.ws.ping_payload_buf) -
-		     LWS_SEND_BUFFER_PRE_PADDING;
+		     LWS_PRE;
 
 	assert(wsi->mode == LWSCM_WS_SERVING || wsi->mode == LWSCM_WS_CLIENT);
 
-	start = p = &wsi->u.ws.ping_payload_buf[LWS_SEND_BUFFER_PRE_PADDING];
+	start = p = &wsi->u.ws.ping_payload_buf[LWS_PRE];
 
 	*p++ = (((int)status) >> 8) & 0xff;
 	*p++ = ((int)status) & 0xff;
@@ -979,8 +1006,10 @@ LWS_EXTERN int
 _lws_rx_flow_control(struct lws *wsi)
 {
 	/* there is no pending change */
-	if (!(wsi->rxflow_change_to & LWS_RXFLOW_PENDING_CHANGE))
+	if (!(wsi->rxflow_change_to & LWS_RXFLOW_PENDING_CHANGE)) {
+		lwsl_info("%s: no pending change\n", __func__);
 		return 0;
+	}
 
 	/* stuff is still buffered, not ready to really accept new input */
 	if (wsi->rxflow_buffer) {
@@ -1066,3 +1095,22 @@ lws_check_utf8(unsigned char *state, unsigned char *buf, size_t len)
 
 	return 0;
 }
+
+#ifdef LWS_NO_EXTENSIONS
+
+/* we need to provide dummy callbacks for internal exts
+ * so user code runs when faced with a lib compiled with
+ * extensions disabled.
+ */
+
+int
+lws_extension_callback_pm_deflate(struct lws_context *context,
+                                  const struct lws_extension *ext,
+                                  struct lws *wsi,
+                                  enum lws_extension_callback_reasons reason,
+                                  void *user, void *in, size_t len)
+{
+	return 0;
+}
+#endif
+
diff --git a/lib/libwebsockets.h b/lib/libwebsockets.h
index 7b771ea8b6c798a2a09cafffd07dbf0ef6d80f8f..d2a1b1b1e9685b78acd3e9b93f5dceafff1fe2ad 100644
--- a/lib/libwebsockets.h
+++ b/lib/libwebsockets.h
@@ -1,7 +1,7 @@
 /*
  * libwebsockets - small server side websockets and web server implementation
  *
- * Copyright (C) 2010-2015 Andy Green <andy@warmcat.com>
+ * Copyright (C) 2010-2016 Andy Green <andy@warmcat.com>
  *
  *  This library is free software; you can redistribute it and/or
  *  modify it under the terms of the GNU Lesser General Public
@@ -323,6 +323,8 @@ enum lws_callback_reasons {
 	LWS_CALLBACK_OPENSSL_CONTEXT_REQUIRES_PRIVATE_KEY	= 37,
 	LWS_CALLBACK_WS_PEER_INITIATED_CLOSE			= 38,
 
+	LWS_CALLBACK_WS_EXT_DEFAULTS				= 39,
+
 	/****** add new things just above ---^ ******/
 
 	LWS_CALLBACK_USER = 1000, /* user code can use any including / above */
@@ -415,29 +417,32 @@ struct lws_plat_file_ops {
  * add it at where specified so existing users are unaffected.
  */
 enum lws_extension_callback_reasons {
-	LWS_EXT_CALLBACK_SERVER_CONTEXT_CONSTRUCT		=  0,
-	LWS_EXT_CALLBACK_CLIENT_CONTEXT_CONSTRUCT		=  1,
-	LWS_EXT_CALLBACK_SERVER_CONTEXT_DESTRUCT		=  2,
-	LWS_EXT_CALLBACK_CLIENT_CONTEXT_DESTRUCT		=  3,
-	LWS_EXT_CALLBACK_CONSTRUCT				=  4,
-	LWS_EXT_CALLBACK_CLIENT_CONSTRUCT			=  5,
-	LWS_EXT_CALLBACK_CHECK_OK_TO_REALLY_CLOSE		=  6,
-	LWS_EXT_CALLBACK_CHECK_OK_TO_PROPOSE_EXTENSION		=  7,
-	LWS_EXT_CALLBACK_DESTROY				=  8,
-	LWS_EXT_CALLBACK_DESTROY_ANY_WSI_CLOSING		=  9,
-	LWS_EXT_CALLBACK_ANY_WSI_ESTABLISHED			= 10,
-	LWS_EXT_CALLBACK_PACKET_RX_PREPARSE			= 11,
-	LWS_EXT_CALLBACK_PACKET_TX_PRESEND			= 12,
-	LWS_EXT_CALLBACK_PACKET_TX_DO_SEND			= 13,
-	LWS_EXT_CALLBACK_HANDSHAKE_REPLY_TX			= 14,
-	LWS_EXT_CALLBACK_FLUSH_PENDING_TX			= 15,
-	LWS_EXT_CALLBACK_EXTENDED_PAYLOAD_RX			= 16,
-	LWS_EXT_CALLBACK_CAN_PROXY_CLIENT_CONNECTION		= 17,
-	LWS_EXT_CALLBACK_1HZ					= 18,
-	LWS_EXT_CALLBACK_REQUEST_ON_WRITEABLE			= 19,
-	LWS_EXT_CALLBACK_IS_WRITEABLE				= 20,
-	LWS_EXT_CALLBACK_PAYLOAD_TX				= 21,
-	LWS_EXT_CALLBACK_PAYLOAD_RX				= 22,
+	LWS_EXT_CB_SERVER_CONTEXT_CONSTRUCT		=  0,
+	LWS_EXT_CB_CLIENT_CONTEXT_CONSTRUCT		=  1,
+	LWS_EXT_CB_SERVER_CONTEXT_DESTRUCT		=  2,
+	LWS_EXT_CB_CLIENT_CONTEXT_DESTRUCT		=  3,
+	LWS_EXT_CB_CONSTRUCT				=  4,
+	LWS_EXT_CB_CLIENT_CONSTRUCT			=  5,
+	LWS_EXT_CB_CHECK_OK_TO_REALLY_CLOSE		=  6,
+	LWS_EXT_CB_CHECK_OK_TO_PROPOSE_EXTENSION		=  7,
+	LWS_EXT_CB_DESTROY				=  8,
+	LWS_EXT_CB_DESTROY_ANY_WSI_CLOSING		=  9,
+	LWS_EXT_CB_ANY_WSI_ESTABLISHED			= 10,
+	LWS_EXT_CB_PACKET_RX_PREPARSE			= 11,
+	LWS_EXT_CB_PACKET_TX_PRESEND			= 12,
+	LWS_EXT_CB_PACKET_TX_DO_SEND			= 13,
+	LWS_EXT_CB_HANDSHAKE_REPLY_TX			= 14,
+	LWS_EXT_CB_FLUSH_PENDING_TX			= 15,
+	LWS_EXT_CB_EXTENDED_PAYLOAD_RX			= 16,
+	LWS_EXT_CB_CAN_PROXY_CLIENT_CONNECTION		= 17,
+	LWS_EXT_CB_1HZ					= 18,
+	LWS_EXT_CB_REQUEST_ON_WRITEABLE			= 19,
+	LWS_EXT_CB_IS_WRITEABLE				= 20,
+	LWS_EXT_CB_PAYLOAD_TX				= 21,
+	LWS_EXT_CB_PAYLOAD_RX				= 22,
+	LWS_EXT_CB_OPTION_DEFAULT			= 23,
+	LWS_EXT_CB_OPTION_SET				= 24,
+	LWS_EXT_CB_OPTION_CONFIRM			= 25,
 
 	/****** add new things just above ---^ ******/
 };
@@ -1056,14 +1061,13 @@ typedef int
 lws_callback_function(struct lws *wsi, enum lws_callback_reasons reason,
 		    void *user, void *in, size_t len);
 
-#ifndef LWS_NO_EXTENSIONS
 /**
  * typedef lws_extension_callback_function() - Hooks to allow extensions to operate
  * @context:	Websockets context
  * @ext:	This extension
  * @wsi:	Opaque websocket instance pointer
  * @reason:	The reason for the call
- * @user:	Pointer to per-session user data allocated by library
+ * @user:	Pointer to ptr to per-session user data allocated by library
  * @in:		Pointer used for some callback reasons
  * @len:	Length set for some callback reasons
  *
@@ -1075,26 +1079,26 @@ lws_callback_function(struct lws *wsi, enum lws_callback_reasons reason,
  *	each active extension on each connection.  That is what is pointed to
  *	by the @user parameter.
  *
- *	LWS_EXT_CALLBACK_CONSTRUCT:  called when the server has decided to
+ *	LWS_EXT_CB_CONSTRUCT:  called when the server has decided to
  *		select this extension from the list provided by the client,
  *		just before the server will send back the handshake accepting
  *		the connection with this extension active.  This gives the
  *		extension a chance to initialize its connection context found
  *		in @user.
  *
- *	LWS_EXT_CALLBACK_CLIENT_CONSTRUCT: same as LWS_EXT_CALLBACK_CONSTRUCT
+ *	LWS_EXT_CB_CLIENT_CONSTRUCT: same as LWS_EXT_CB_CONSTRUCT
  *		but called when client is instantiating this extension.  Some
  *		extensions will work the same on client and server side and then
  *		you can just merge handlers for both CONSTRUCTS.
  *
- *	LWS_EXT_CALLBACK_DESTROY:  called when the connection the extension was
+ *	LWS_EXT_CB_DESTROY:  called when the connection the extension was
  *		being used on is about to be closed and deallocated.  It's the
  *		last chance for the extension to deallocate anything it has
  *		allocated in the user data (pointed to by @user) before the
  *		user data is deleted.  This same callback is used whether you
  *		are in client or server instantiation context.
  *
- *	LWS_EXT_CALLBACK_PACKET_RX_PREPARSE: when this extension was active on
+ *	LWS_EXT_CB_PACKET_RX_PREPARSE: when this extension was active on
  *		a connection, and a packet of data arrived at the connection,
  *		it is passed to this callback to give the extension a chance to
  *		change the data, eg, decompress it.  @user is pointing to the
@@ -1106,21 +1110,22 @@ lws_callback_function(struct lws *wsi, enum lws_callback_reasons reason,
  *		a new buffer allocated in its private user context data and
  *		set the pointed-to lws_tokens members to point to its buffer.
  *
- *	LWS_EXT_CALLBACK_PACKET_TX_PRESEND: this works the same way as
- *		LWS_EXT_CALLBACK_PACKET_RX_PREPARSE above, except it gives the
+ *	LWS_EXT_CB_PACKET_TX_PRESEND: this works the same way as
+ *		LWS_EXT_CB_PACKET_RX_PREPARSE above, except it gives the
  *		extension a chance to change websocket data just before it will
  *		be sent out.  Using the same lws_token pointer scheme in @in,
  *		the extension can change the buffer and the length to be
  *		transmitted how it likes.  Again if it wants to grow the
  *		buffer safely, it should copy the data into its own buffer and
  *		set the lws_tokens token pointer to it.
+ *
+ *	LWS_EXT_CB_ARGS_VALIDATE:
  */
 typedef int
 lws_extension_callback_function(struct lws_context *context,
 			      const struct lws_extension *ext, struct lws *wsi,
 			      enum lws_extension_callback_reasons reason,
 			      void *user, void *in, size_t len);
-#endif
 
 /**
  * struct lws_protocols -	List of protocols and handlers server
@@ -1140,7 +1145,7 @@ lws_extension_callback_function(struct lws_context *context,
  *		error, but the buffer will spill to the user callback when
  *		full, which you can detect by using
  *		lws_remaining_packet_payload().  Notice that you
- *		just talk about frame size here, the LWS_SEND_BUFFER_PRE_PADDING
+ *		just talk about frame size here, the LWS_PRE
  *		and post-padding are automatically also allocated on top.
  * @id:		ignored by lws, but useful to contain user information bound
  *		to the selected protocol.  For example if this protocol was
@@ -1174,30 +1179,65 @@ struct lws_protocols {
 	 * This is part of the ABI, don't needlessly break compatibilty */
 };
 
-#ifndef LWS_NO_EXTENSIONS
+enum lws_ext_options_types {
+	EXTARG_NONE,
+	EXTARG_DEC,
+	EXTARG_OPT_DEC
+
+	/* Add new things just above here ---^
+	 * This is part of the ABI, don't needlessly break compatibilty */
+};
+
+/**
+ * struct lws_ext_options -	Option arguments to the extension.  These are
+ *				used in the negotiation at ws upgrade time.
+ *				The helper function lws_ext_parse_options()
+ *				uses these to generate callbacks
+ *
+ * @name:			Option name, eg, "server_no_context_takeover"
+ * @type:			What kind of args the option can take
+ */
+struct lws_ext_options {
+	const char *name;
+	enum lws_ext_options_types type;
+
+	/* Add new things just above here ---^
+	 * This is part of the ABI, don't needlessly break compatibilty */
+};
+
+struct lws_ext_option_arg {
+	int option_index;
+	const char *start;
+	int len;
+};
+
 /**
  * struct lws_extension -	An extension we know how to cope with
  *
- * @name:			Formal extension name, eg, "deflate-stream"
+ * @name:			Formal extension name, eg, "permessage-deflate"
  * @callback:			Service callback
- * @per_session_data_size:	Libwebsockets will auto-malloc this much
- *				memory for the use of the extension, a pointer
- *				to it comes in the @user callback parameter
- * @per_context_private_data:   Optional storage for this extension that
- *				is per-context, so it can track stuff across
- *				all sessions, etc, if it wants
+ * @client_offer:		String containing exts and options client offers
  */
 
 struct lws_extension {
 	const char *name;
 	lws_extension_callback_function *callback;
-	size_t per_session_data_size;
-	void *per_context_private_data;
+	const char *client_offer;
 
 	/* Add new things just above here ---^
 	 * This is part of the ABI, don't needlessly break compatibilty */
 };
-#endif
+
+/* 
+ * The internal exts are part of the public abi
+ * If we add more extensions, publish the callback here
+ */
+
+extern int lws_extension_callback_pm_deflate(
+	struct lws_context *context, const struct lws_extension *ext,
+	struct lws *wsi, enum lws_extension_callback_reasons reason,
+	void *user, void *in, size_t len);
+
 
 /**
  * struct lws_context_creation_info - parameters to create context with
@@ -1216,14 +1256,15 @@ struct lws_extension {
  *		extensions this context supports.  If you configured with
  *		--without-extensions, you should give NULL here.
  * @token_limits: NULL or struct lws_token_limits pointer which is initialized
- *      with a token length limit for each possible WSI_TOKEN_***
+ *		with a token length limit for each possible WSI_TOKEN_***
  * @ssl_cert_filepath:	If libwebsockets was compiled to use ssl, and you want
  *			to listen using SSL, set to the filepath to fetch the
  *			server cert from, otherwise NULL for unencrypted
  * @ssl_private_key_filepath: filepath to private key if wanting SSL mode;
  *			if this is set to NULL but sll_cert_filepath is set, the
- *			OPENSSL_CONTEXT_REQUIRES_PRIVATE_KEY callback is called to allow
- *			setting of the private key directly via openSSL library calls
+ *			OPENSSL_CONTEXT_REQUIRES_PRIVATE_KEY callback is called
+ *			to allow setting of the private key directly via openSSL
+ *			library calls
  * @ssl_ca_filepath: CA certificate filepath or NULL
  * @ssl_cipher_list:	List of valid ciphers to use (eg,
  * 			"RC4-MD5:RC4-SHA:AES128-SHA:AES256-SHA:HIGH:!DSS:!aNULL"
@@ -1231,7 +1272,8 @@ struct lws_extension {
  * @http_proxy_address: If non-NULL, attempts to proxy via the given address.
  *			If proxy auth is required, use format
  *			"username:password@server:port"
- * @http_proxy_port:	If http_proxy_address was non-NULL, uses this port at the address
+ * @http_proxy_port:	If http_proxy_address was non-NULL, uses this port at
+ * 			the address
  * @gid:	group id to change to after setting listen socket, or -1.
  * @uid:	user id to change to after setting listen socket, or -1.
  * @options:	0, or LWS_SERVER_OPTION_... bitfields
@@ -1297,6 +1339,22 @@ struct lws_context_creation_info {
 	void *_unused[8];
 };
 
+struct lws_client_connect_info {
+	struct lws_context *context;
+	const char *address;
+	int port;
+	int ssl_connection;
+	const char *path;
+	const char *host;
+	const char *origin;
+	const char *protocol;
+	int ietf_version_or_minus_one;
+	void *userdata;
+	const struct lws_extension *client_exts;
+	void *_unused[4];
+};
+
+
 LWS_VISIBLE LWS_EXTERN void
 lws_set_log_level(int level,
 		  void (*log_emit_function)(int level, const char *line));
@@ -1397,14 +1455,14 @@ lws_set_timeout(struct lws *wsi, enum pending_timeout reason, int secs);
  * IMPORTANT NOTICE!
  *
  * When sending with websocket protocol
- * 
+ *
  * LWS_WRITE_TEXT,
  * LWS_WRITE_BINARY,
  * LWS_WRITE_CONTINUATION,
  * LWS_WRITE_PING,
  * LWS_WRITE_PONG
- * 
- * the send buffer has to have LWS_SEND_BUFFER_PRE_PADDING bytes valid BEFORE
+ *
+ * the send buffer has to have LWS_PRE bytes valid BEFORE
  * the buffer pointer you pass to lws_write().
  *
  * This allows us to add protocol info before and after the data, and send as
@@ -1413,27 +1471,27 @@ lws_set_timeout(struct lws *wsi, enum pending_timeout reason, int secs);
  * So for example you need this kind of code to use lws_write with a
  * 128-byte payload
  *
- *   char buf[LWS_SEND_BUFFER_PRE_PADDING + 128];
+ *   char buf[LWS_PRE + 128];
  *
  *   // fill your part of the buffer... for example here it's all zeros
- *   memset(&buf[LWS_SEND_BUFFER_PRE_PADDING], 0, 128);
+ *   memset(&buf[LWS_PRE], 0, 128);
+ *
+ *   lws_write(wsi, &buf[LWS_PRE], 128, LWS_WRITE_TEXT);
  *
- *   lws_write(wsi, &buf[LWS_SEND_BUFFER_PRE_PADDING], 128, LWS_WRITE_TEXT);
- * 
  * When sending HTTP, with
- * 
+ *
  * LWS_WRITE_HTTP,
  * LWS_WRITE_HTTP_HEADERS
  * LWS_WRITE_HTTP_FINAL
- * 
+ *
  * there is no protocol data prepended, and don't need to take care about the
- * LWS_SEND_BUFFER_PRE_PADDING bytes valid before the buffer pointer.
+ * LWS_PRE bytes valid before the buffer pointer.
  *
- * LWS_SEND_BUFFER_PRE_PADDING is at least the frame nonce + 2 header + 8 length
+ * LWS_PRE is at least the frame nonce + 2 header + 8 length
  * LWS_SEND_BUFFER_POST_PADDING is deprecated, it's now 0 and can be left off.
  * The example apps no longer use it.
  *
- * Pad LWS_SEND_BUFFER_PRE_PADDING to the CPU word size, so that word references
+ * Pad LWS_PRE to the CPU word size, so that word references
  * to the address immediately after the padding won't cause an unaligned access
  * error. Sometimes for performance reasons the recommended padding is even
  * larger than sizeof(void *).
@@ -1453,7 +1511,9 @@ lws_set_timeout(struct lws *wsi, enum pending_timeout reason, int secs);
 #endif
 #define _LWS_PAD(n) (((n) % _LWS_PAD_SIZE) ? \
 		((n) + (_LWS_PAD_SIZE - ((n) % _LWS_PAD_SIZE))) : (n))
-#define LWS_SEND_BUFFER_PRE_PADDING _LWS_PAD(4 + 10)
+#define LWS_PRE _LWS_PAD(4 + 10)
+/* used prior to 1.7 and retained for backward compatibility */
+#define LWS_SEND_BUFFER_PRE_PADDING LWS_PRE
 #define LWS_SEND_BUFFER_POST_PADDING 0
 
 LWS_VISIBLE LWS_EXTERN int
@@ -1542,12 +1602,13 @@ lws_remaining_packet_payload(struct lws *wsi);
 LWS_VISIBLE LWS_EXTERN size_t
 lws_get_peer_write_allowance(struct lws *wsi);
 
+/* deprecated, use lws_client_connect_info() */
 LWS_VISIBLE LWS_EXTERN struct lws *
 lws_client_connect(struct lws_context *clients, const char *address,
 		   int port, int ssl_connection, const char *path,
 		   const char *host, const char *origin, const char *protocol,
 		   int ietf_version_or_minus_one);
-
+/* deprecated, use lws_client_connect_info() */
 LWS_VISIBLE LWS_EXTERN struct lws *
 lws_client_connect_extended(struct lws_context *clients, const char *address,
 			    int port, int ssl_connection, const char *path,
@@ -1555,6 +1616,9 @@ lws_client_connect_extended(struct lws_context *clients, const char *address,
 			    const char *protocol, int ietf_version_or_minus_one,
 			    void *userdata);
 
+LWS_VISIBLE LWS_EXTERN struct lws *
+lws_client_connect_info(struct lws_client_connect_info * ccinfo);
+
 LWS_VISIBLE LWS_EXTERN const char *
 lws_canonical_hostname(struct lws_context *context);
 
@@ -1704,7 +1768,11 @@ LWS_VISIBLE LWS_EXTERN int
 lws_read(struct lws *wsi, unsigned char *buf, size_t len);
 
 #ifndef LWS_NO_EXTENSIONS
-LWS_VISIBLE LWS_EXTERN struct lws_extension *lws_get_internal_extensions();
+/* deprecated */
+#define lws_get_internal_extensions() NULL
+LWS_VISIBLE LWS_EXTERN int
+lws_ext_parse_options(const struct lws_extension *ext, struct lws *wsi,
+		       void *ext_user, const struct lws_ext_options *opts, const char *o, int len);
 #endif
 
 /*
diff --git a/lib/lws-plat-unix.c b/lib/lws-plat-unix.c
index 9d3a66ce6b64c143df2b1b8148d03d7d87f95ee6..3b82d9192566e95c85cdda8a45398e971cbae9ff 100644
--- a/lib/lws-plat-unix.c
+++ b/lib/lws-plat-unix.c
@@ -101,8 +101,9 @@ lws_plat_service(struct lws_context *context, int timeout_ms)
 	int n;
 	int m;
 	char buf;
+	struct lws *wsi;
 #ifdef LWS_OPENSSL_SUPPORT
-	struct lws *wsi, *wsi_next;
+	struct lws *wsi_next;
 #endif
 
 	/* stay dead once we are dead */
@@ -123,18 +124,25 @@ lws_plat_service(struct lws_context *context, int timeout_ms)
 	}
 	context->service_tid = context->service_tid_detected;
 
+	/* if we know we are draining rx ext, do not wait in poll */
+	if (context->rx_draining_ext_list)
+		timeout_ms = 0;
+
 #ifdef LWS_OPENSSL_SUPPORT
 	/* if we know we have non-network pending data, do not wait in poll */
-	if (lws_ssl_anybody_has_buffered_read(context))
+	if (lws_ssl_anybody_has_buffered_read(context)) {
 		timeout_ms = 0;
+		lwsl_err("ssl buffered read\n");
+	}
 #endif
+
 	n = poll(context->fds, context->fds_count, timeout_ms);
-	context->service_tid = 0;
 
 #ifdef LWS_OPENSSL_SUPPORT
-	if (!lws_ssl_anybody_has_buffered_read(context) && n == 0) {
+	if (!context->rx_draining_ext_list &&
+	    !lws_ssl_anybody_has_buffered_read(context) && n == 0) {
 #else
-	if (n == 0) /* poll timeout */ {
+	if (!context->rx_draining_ext_list && n == 0) /* poll timeout */ {
 #endif
 		lws_service_fd(context, NULL);
 		return 0;
@@ -146,6 +154,17 @@ lws_plat_service(struct lws_context *context, int timeout_ms)
 		return 0;
 	}
 
+	/*
+	 * For all guys with already-available ext data to drain, if they are
+	 * not flowcontrolled, fake their POLLIN status
+	 */
+	wsi = context->rx_draining_ext_list;
+	while (wsi) {
+		context->fds[wsi->position_in_fds_table].revents |=
+			context->fds[wsi->position_in_fds_table].events & POLLIN;
+		wsi = wsi->u.ws.rx_draining_ext_list;
+	}
+
 #ifdef LWS_OPENSSL_SUPPORT
 	/*
 	 * For all guys with buffered SSL read data already saved up, if they
diff --git a/lib/output.c b/lib/output.c
index 3e782c2a520b202c2ac7297747553b864a16d80d..9ba6d53ddc812dcae8e4d2e72e932baca2f1846e 100644
--- a/lib/output.c
+++ b/lib/output.c
@@ -25,15 +25,15 @@ static int
 lws_0405_frame_mask_generate(struct lws *wsi)
 {
 #if 0
-	wsi->u.ws.mask_nonce[0] = 0;
-	wsi->u.ws.mask_nonce[1] = 0;
-	wsi->u.ws.mask_nonce[2] = 0;
-	wsi->u.ws.mask_nonce[3] = 0;
+	wsi->u.ws.mask[0] = 0;
+	wsi->u.ws.mask[1] = 0;
+	wsi->u.ws.mask[2] = 0;
+	wsi->u.ws.mask[3] = 0;
 #else
 		int n;
 	/* fetch the per-frame nonce */
 
-	n = lws_get_random(lws_get_context(wsi), wsi->u.ws.mask_nonce, 4);
+	n = lws_get_random(lws_get_context(wsi), wsi->u.ws.mask, 4);
 	if (n != 4) {
 		lwsl_parser("Unable to read from random device %s %d\n",
 			    SYSTEM_RANDOM_FILEPATH, n);
@@ -41,7 +41,7 @@ lws_0405_frame_mask_generate(struct lws *wsi)
 	}
 #endif
 	/* start masking from first byte of masking key buffer */
-	wsi->u.ws.frame_mask_index = 0;
+	wsi->u.ws.mask_idx = 0;
 
 	return 0;
 }
@@ -112,8 +112,7 @@ int lws_issue_raw(struct lws *wsi, unsigned char *buf, size_t len)
 		assert(0);
 	}
 
-	m = lws_ext_cb_wsi_active_exts(wsi,
-			LWS_EXT_CALLBACK_PACKET_TX_DO_SEND, &buf, len);
+	m = lws_ext_cb_active(wsi, LWS_EXT_CB_PACKET_TX_DO_SEND, &buf, len);
 	if (m < 0)
 		return -1;
 	if (m) /* handled */ {
@@ -208,7 +207,7 @@ handle_truncated_send:
  * @wsi:	Websocket instance (available from user callback)
  * @buf:	The data to send.  For data being sent on a websocket
  *		connection (ie, not default http), this buffer MUST have
- *		LWS_SEND_BUFFER_PRE_PADDING bytes valid BEFORE the pointer.
+ *		LWS_PRE bytes valid BEFORE the pointer.
  *		This is so the protocol header data can be added in-situ.
  * @len:	Count of the data bytes in the payload starting from buf
  * @protocol:	Use LWS_WRITE_HTTP to reply to an http connection, and one
@@ -231,27 +230,48 @@ handle_truncated_send:
  *	pressure at any given time.
  */
 
-LWS_VISIBLE int lws_write(struct lws *wsi, unsigned char *buf,
-			  size_t len, enum lws_write_protocol protocol)
+LWS_VISIBLE int lws_write(struct lws *wsi, unsigned char *buf, size_t len,
+			  enum lws_write_protocol wp)
 {
-	int masked7 = wsi->mode == LWSCM_WS_CLIENT;
+	int masked7 = (wsi->mode == LWSCM_WS_CLIENT);
 	unsigned char is_masked_bit = 0;
 	unsigned char *dropmask = NULL;
 	struct lws_tokens eff_buf;
 	int pre = 0, n;
 	size_t orig_len = len;
 
-	if (protocol == LWS_WRITE_HTTP ||
-	    protocol == LWS_WRITE_HTTP_FINAL ||
-	    protocol == LWS_WRITE_HTTP_HEADERS)
+	if (wsi->state == LWSS_ESTABLISHED && wsi->u.ws.tx_draining_ext) {
+		/* remove us from the list */
+		struct lws **w = &wsi->context->tx_draining_ext_list;
+		lwsl_debug("%s: TX EXT DRAINING: Remove from list\n", __func__);
+		wsi->u.ws.tx_draining_ext = 0;
+		/* remove us from context draining ext list */
+		while (*w) {
+			if (*w == wsi) {
+				*w = wsi->u.ws.tx_draining_ext_list;
+				break;
+			}
+			w = &((*w)->u.ws.tx_draining_ext_list);
+		}
+		wsi->u.ws.tx_draining_ext_list = NULL;
+		wp = (wsi->u.ws.tx_draining_stashed_wp & 0xc0) |
+				LWS_WRITE_CONTINUATION;
+
+		lwsl_ext("FORCED draining wp to 0x%02X\n", wp);
+	}
+
+	if (wp == LWS_WRITE_HTTP ||
+	    wp == LWS_WRITE_HTTP_FINAL ||
+	    wp == LWS_WRITE_HTTP_HEADERS)
 		goto send_raw;
 
-	/* websocket protocol, either binary or text */
+	/* if not in a state to send stuff, then just send nothing */
 
 	if (wsi->state != LWSS_ESTABLISHED &&
-	    !(wsi->state == LWSS_RETURNED_CLOSE_ALREADY &&
-	      protocol == LWS_WRITE_CLOSE))
-		return -1;
+	    ((wsi->state != LWSS_RETURNED_CLOSE_ALREADY &&
+	      wsi->state != LWSS_AWAITING_CLOSE_ACK) ||
+			    wp != LWS_WRITE_CLOSE))
+		return 0;
 
 	/* if we are continuing a frame that already had its header done */
 
@@ -264,20 +284,56 @@ LWS_VISIBLE int lws_write(struct lws *wsi, unsigned char *buf,
 
 	/*
 	 * give a chance to the extensions to modify payload
-	 * pre-TX mangling is not allowed to truncate
+	 * the extension may decide to produce unlimited payload erratically
+	 * (eg, compression extension), so we require only that if he produces
+	 * something, it will be a complete fragment of the length known at
+	 * the time (just the fragment length known), and if he has
+	 * more we will come back next time he is writeable and allow him to
+	 * produce more fragments until he's drained.
+	 *
+	 * This allows what is sent each time it is writeable to be limited to
+	 * a size that can be sent without partial sends or blocking, allows
+	 * interleaving of control frames and other connection service.
 	 */
 	eff_buf.token = (char *)buf;
 	eff_buf.token_len = len;
 
-	switch ((int)protocol) {
+	switch ((int)wp) {
 	case LWS_WRITE_PING:
 	case LWS_WRITE_PONG:
 	case LWS_WRITE_CLOSE:
 		break;
 	default:
-		if (lws_ext_cb_wsi_active_exts(wsi, LWS_EXT_CALLBACK_PAYLOAD_TX,
-					       &eff_buf, 0) < 0)
+		n = lws_ext_cb_active(wsi, LWS_EXT_CB_PAYLOAD_TX, &eff_buf, wp);
+		if (n < 0)
 			return -1;
+
+		if (n && eff_buf.token_len) {
+			/* extension requires further draining */
+			wsi->u.ws.tx_draining_ext = 1;
+			wsi->u.ws.tx_draining_ext_list =
+					wsi->context->tx_draining_ext_list;
+			wsi->context->tx_draining_ext_list = wsi;
+			/* we must come back to do more */
+			lws_callback_on_writable(wsi);
+			/*
+			 * keep a copy of the write type for the overall
+			 * action that has provoked generation of these
+			 * fragments, so the last guy can use its FIN state.
+			 */
+			wsi->u.ws.tx_draining_stashed_wp = wp;
+			/* this is definitely not actually the last fragment
+			 * because the extension asserted he has more coming
+			 * So make sure this intermediate one doesn't go out
+			 * with a FIN.
+			 */
+			wp |= LWS_WRITE_NO_FIN;
+		}
+
+		if (eff_buf.token_len && wsi->u.ws.stashed_write_pending) {
+			wsi->u.ws.stashed_write_pending = 0;
+			wp = (wp &0xc0) | (int)wsi->u.ws.stashed_write_type;
+		}
 	}
 
 	/*
@@ -285,12 +341,24 @@ LWS_VISIBLE int lws_write(struct lws *wsi, unsigned char *buf,
 	 * compression extension, it has already updated its state according
 	 * to this being issued
 	 */
-	if ((char *)buf != eff_buf.token)
+	if ((char *)buf != eff_buf.token) {
+		/*
+		 * ext might eat it, but no have anything to issue yet
+		 * in that case we have to follow his lead, but stash and
+		 * replace the write type that was lost here the first time.
+		 */
+		if (len && !eff_buf.token_len) {
+			if (!wsi->u.ws.stashed_write_pending)
+				wsi->u.ws.stashed_write_type = (char)wp & 0x3f;
+			wsi->u.ws.stashed_write_pending = 1;
+			return len;
+		}
 		/*
 		 * extension recreated it:
 		 * need to buffer this if not all sent
 		 */
 		wsi->u.ws.clean_buffer = 0;
+	}
 
 	buf = (unsigned char *)eff_buf.token;
 	len = eff_buf.token_len;
@@ -303,7 +371,7 @@ LWS_VISIBLE int lws_write(struct lws *wsi, unsigned char *buf,
 			is_masked_bit = 0x80;
 		}
 
-		switch (protocol & 0xf) {
+		switch (wp & 0xf) {
 		case LWS_WRITE_TEXT:
 			n = LWSWSOPC_TEXT_FRAME;
 			break;
@@ -324,11 +392,11 @@ LWS_VISIBLE int lws_write(struct lws *wsi, unsigned char *buf,
 			n = LWSWSOPC_PONG;
 			break;
 		default:
-			lwsl_warn("lws_write: unknown write opc / protocol\n");
+			lwsl_warn("lws_write: unknown write opc / wp\n");
 			return -1;
 		}
 
-		if (!(protocol & LWS_WRITE_NO_FIN))
+		if (!(wp & LWS_WRITE_NO_FIN))
 			n |= 1 << 7;
 
 		if (len < 126) {
@@ -370,10 +438,10 @@ do_more_inside_frame:
 
 	/*
 	 * Deal with masking if we are in client -> server direction and
-	 * the protocol demands it
+	 * the wp demands it
 	 */
 
-	if (wsi->mode == LWSCM_WS_CLIENT) {
+	if (masked7) {
 		if (!wsi->u.ws.inside_frame)
 			if (lws_0405_frame_mask_generate(wsi)) {
 				lwsl_err("frame mask generation failed\n");
@@ -385,17 +453,16 @@ do_more_inside_frame:
 		 */
 		if (dropmask) { /* never set if already inside frame */
 			for (n = 4; n < (int)len + 4; n++)
-				dropmask[n] = dropmask[n] ^
-				wsi->u.ws.mask_nonce[
-					(wsi->u.ws.frame_mask_index++) & 3];
+				dropmask[n] = dropmask[n] ^ wsi->u.ws.mask[
+					(wsi->u.ws.mask_idx++) & 3];
 
 			/* copy the frame nonce into place */
-			memcpy(dropmask, wsi->u.ws.mask_nonce, 4);
+			memcpy(dropmask, wsi->u.ws.mask, 4);
 		}
 	}
 
 send_raw:
-	switch ((int)protocol) {
+	switch ((int)wp) {
 	case LWS_WRITE_CLOSE:
 /*		lwsl_hexdump(&buf[-pre], len); */
 	case LWS_WRITE_HTTP:
@@ -408,26 +475,26 @@ send_raw:
 			unsigned char flags = 0;
 
 			n = LWS_HTTP2_FRAME_TYPE_DATA;
-			if (protocol == LWS_WRITE_HTTP_HEADERS) {
+			if (wp == LWS_WRITE_HTTP_HEADERS) {
 				n = LWS_HTTP2_FRAME_TYPE_HEADERS;
 				flags = LWS_HTTP2_FLAG_END_HEADERS;
 				if (wsi->u.http2.send_END_STREAM)
 					flags |= LWS_HTTP2_FLAG_END_STREAM;
 			}
 
-			if ((protocol == LWS_WRITE_HTTP ||
-			     protocol == LWS_WRITE_HTTP_FINAL) &&
+			if ((wp == LWS_WRITE_HTTP ||
+			     wp == LWS_WRITE_HTTP_FINAL) &&
 			    wsi->u.http.content_length) {
 				wsi->u.http.content_remain -= len;
 				lwsl_info("%s: content_remain = %lu\n", __func__,
 					  wsi->u.http.content_remain);
 				if (!wsi->u.http.content_remain) {
 					lwsl_info("%s: selecting final write mode\n", __func__);
-					protocol = LWS_WRITE_HTTP_FINAL;
+					wp = LWS_WRITE_HTTP_FINAL;
 				}
 			}
 
-			if (protocol == LWS_WRITE_HTTP_FINAL && wsi->u.http2.END_STREAM) {
+			if (wp == LWS_WRITE_HTTP_FINAL && wsi->u.http2.END_STREAM) {
 				lwsl_info("%s: setting END_STREAM\n", __func__);
 				flags |= LWS_HTTP2_FLAG_END_STREAM;
 			}
@@ -441,8 +508,6 @@ send_raw:
 		break;
 	}
 
-	wsi->u.ws.inside_frame = 1;
-
 	/*
 	 * give any active extensions a chance to munge the buffer
 	 * before send.  We pass in a pointer to an lws_tokens struct
@@ -463,6 +528,7 @@ send_raw:
 	 */
 
 	n = lws_issue_raw_ext_access(wsi, buf - pre, len + pre);
+	wsi->u.ws.inside_frame = 1;
 	if (n <= 0)
 		return n;
 
diff --git a/lib/parsers.c b/lib/parsers.c
index 9df7a0e7d5dcacab7544d0a9dfd597b81cdad69a..71706206ed249e6695b5bad256862321173abe17 100644
--- a/lib/parsers.c
+++ b/lib/parsers.c
@@ -738,12 +738,33 @@ int
 lws_rx_sm(struct lws *wsi, unsigned char c)
 {
 	struct lws_tokens eff_buf;
-	int ret = 0;
+	int ret = 0, n, rx_draining_ext = 0;
 	int callback_action = LWS_CALLBACK_RECEIVE;
+	if (wsi->socket_is_permanently_unusable)
+		return -1;
 
 	switch (wsi->lws_rx_parse_state) {
 	case LWS_RXPS_NEW:
+		if (wsi->u.ws.rx_draining_ext) {
+			struct lws **w = &wsi->context->rx_draining_ext_list;
+
+			eff_buf.token = NULL;
+			eff_buf.token_len = 0;
+			wsi->u.ws.rx_draining_ext = 0;
+			/* remove us from context draining ext list */
+			while (*w) {
+				if (*w == wsi) {
+					*w = wsi->u.ws.rx_draining_ext_list;
+					break;
+				}
+				w = &((*w)->u.ws.rx_draining_ext_list);
+			}
+			wsi->u.ws.rx_draining_ext_list = NULL;
+			rx_draining_ext = 1;
+			lwsl_err("%s: doing draining flow\n", __func__);
 
+			goto drain_extension;
+		}
 		switch (wsi->ietf_spec_revision) {
 		case 13:
 			/*
@@ -758,20 +779,20 @@ lws_rx_sm(struct lws *wsi, unsigned char c)
 			break;
 		}
 		break;
-	case LWS_RXPS_04_MASK_NONCE_1:
-		wsi->u.ws.mask_nonce[1] = c;
+	case LWS_RXPS_04_mask_1:
+		wsi->u.ws.mask[1] = c;
 		if (c)
 			wsi->u.ws.all_zero_nonce = 0;
-		wsi->lws_rx_parse_state = LWS_RXPS_04_MASK_NONCE_2;
+		wsi->lws_rx_parse_state = LWS_RXPS_04_mask_2;
 		break;
-	case LWS_RXPS_04_MASK_NONCE_2:
-		wsi->u.ws.mask_nonce[2] = c;
+	case LWS_RXPS_04_mask_2:
+		wsi->u.ws.mask[2] = c;
 		if (c)
 			wsi->u.ws.all_zero_nonce = 0;
-		wsi->lws_rx_parse_state = LWS_RXPS_04_MASK_NONCE_3;
+		wsi->lws_rx_parse_state = LWS_RXPS_04_mask_3;
 		break;
-	case LWS_RXPS_04_MASK_NONCE_3:
-		wsi->u.ws.mask_nonce[3] = c;
+	case LWS_RXPS_04_mask_3:
+		wsi->u.ws.mask[3] = c;
 		if (c)
 			wsi->u.ws.all_zero_nonce = 0;
 
@@ -780,7 +801,7 @@ lws_rx_sm(struct lws *wsi, unsigned char c)
 		 * this is the start of a frame with a new key
 		 */
 
-		wsi->u.ws.frame_mask_index = 0;
+		wsi->u.ws.mask_idx = 0;
 
 		wsi->lws_rx_parse_state = LWS_RXPS_04_FRAME_HDR_1;
 		break;
@@ -825,9 +846,22 @@ handle_first:
 		switch (wsi->u.ws.opcode) {
 		case LWSWSOPC_TEXT_FRAME:
 		case LWSWSOPC_BINARY_FRAME:
+			wsi->u.ws.rsv_first_msg = (c & 0x70);
 			wsi->u.ws.frame_is_binary =
 			     wsi->u.ws.opcode == LWSWSOPC_BINARY_FRAME;
 			break;
+		case 3:
+		case 4:
+		case 5:
+		case 6:
+		case 7:
+		case 0xb:
+		case 0xc:
+		case 0xd:
+		case 0xe:
+		case 0xf:
+			lwsl_info("illegal opcode\n");
+			return -1;
 		}
 		wsi->lws_rx_parse_state = LWS_RXPS_04_FRAME_HDR_LEN;
 		break;
@@ -944,33 +978,33 @@ handle_first:
 		break;
 
 	case LWS_RXPS_07_COLLECT_FRAME_KEY_1:
-		wsi->u.ws.mask_nonce[0] = c;
+		wsi->u.ws.mask[0] = c;
 		if (c)
 			wsi->u.ws.all_zero_nonce = 0;
 		wsi->lws_rx_parse_state = LWS_RXPS_07_COLLECT_FRAME_KEY_2;
 		break;
 
 	case LWS_RXPS_07_COLLECT_FRAME_KEY_2:
-		wsi->u.ws.mask_nonce[1] = c;
+		wsi->u.ws.mask[1] = c;
 		if (c)
 			wsi->u.ws.all_zero_nonce = 0;
 		wsi->lws_rx_parse_state = LWS_RXPS_07_COLLECT_FRAME_KEY_3;
 		break;
 
 	case LWS_RXPS_07_COLLECT_FRAME_KEY_3:
-		wsi->u.ws.mask_nonce[2] = c;
+		wsi->u.ws.mask[2] = c;
 		if (c)
 			wsi->u.ws.all_zero_nonce = 0;
 		wsi->lws_rx_parse_state = LWS_RXPS_07_COLLECT_FRAME_KEY_4;
 		break;
 
 	case LWS_RXPS_07_COLLECT_FRAME_KEY_4:
-		wsi->u.ws.mask_nonce[3] = c;
+		wsi->u.ws.mask[3] = c;
 		if (c)
 			wsi->u.ws.all_zero_nonce = 0;
 		wsi->lws_rx_parse_state =
 					LWS_RXPS_PAYLOAD_UNTIL_LENGTH_EXHAUSTED;
-		wsi->u.ws.frame_mask_index = 0;
+		wsi->u.ws.mask_idx = 0;
 		if (wsi->u.ws.rx_packet_length == 0) {
 			wsi->lws_rx_parse_state = LWS_RXPS_NEW;
 			goto spill;
@@ -980,19 +1014,16 @@ handle_first:
 
 	case LWS_RXPS_PAYLOAD_UNTIL_LENGTH_EXHAUSTED:
 
-		if (!wsi->u.ws.rx_user_buffer) {
-			lwsl_err("NULL user buffer...\n");
-			return 1;
-		}
+		assert(wsi->u.ws.rx_ubuf);
 
 		if (wsi->u.ws.all_zero_nonce)
-			wsi->u.ws.rx_user_buffer[LWS_SEND_BUFFER_PRE_PADDING +
-			       (wsi->u.ws.rx_user_buffer_head++)] = c;
+			wsi->u.ws.rx_ubuf[LWS_PRE +
+			       (wsi->u.ws.rx_ubuf_head++)] = c;
 		else
-			wsi->u.ws.rx_user_buffer[LWS_SEND_BUFFER_PRE_PADDING +
-			       (wsi->u.ws.rx_user_buffer_head++)] =
-				   c ^ wsi->u.ws.mask_nonce[
-					    (wsi->u.ws.frame_mask_index++) & 3];
+			wsi->u.ws.rx_ubuf[LWS_PRE +
+			       (wsi->u.ws.rx_ubuf_head++)] =
+				   c ^ wsi->u.ws.mask[
+					    (wsi->u.ws.mask_idx++) & 3];
 
 		if (--wsi->u.ws.rx_packet_length == 0) {
 			/* spill because we have the whole frame */
@@ -1006,12 +1037,12 @@ handle_first:
 		 */
 
 		if (!wsi->protocol->rx_buffer_size &&
-			 		wsi->u.ws.rx_user_buffer_head !=
+			 		wsi->u.ws.rx_ubuf_head !=
 			 				  LWS_MAX_SOCKET_IO_BUF)
 			break;
 		else
 			if (wsi->protocol->rx_buffer_size &&
-					wsi->u.ws.rx_user_buffer_head !=
+					wsi->u.ws.rx_ubuf_head !=
 						  wsi->protocol->rx_buffer_size)
 			break;
 
@@ -1044,9 +1075,9 @@ spill:
 					wsi->protocol->callback, wsi,
 					LWS_CALLBACK_WS_PEER_INITIATED_CLOSE,
 					wsi->user_space,
-					&wsi->u.ws.rx_user_buffer[
-						LWS_SEND_BUFFER_PRE_PADDING],
-					wsi->u.ws.rx_user_buffer_head))
+					&wsi->u.ws.rx_ubuf[
+						LWS_PRE],
+					wsi->u.ws.rx_ubuf_head))
 				return -1;
 
 			lwsl_parser("server sees client close packet\n");
@@ -1057,7 +1088,7 @@ spill:
 
 		case LWSWSOPC_PING:
 			lwsl_info("received %d byte ping, sending pong\n",
-						 wsi->u.ws.rx_user_buffer_head);
+						 wsi->u.ws.rx_ubuf_head);
 
 			if (wsi->u.ws.ping_pending_flag) {
 				/*
@@ -1069,29 +1100,29 @@ spill:
 			}
 process_as_ping:
 			/* control packets can only be < 128 bytes long */
-			if (wsi->u.ws.rx_user_buffer_head > 128 - 3) {
+			if (wsi->u.ws.rx_ubuf_head > 128 - 3) {
 				lwsl_parser("DROP PING payload too large\n");
 				goto ping_drop;
 			}
 
 			/* stash the pong payload */
-			memcpy(wsi->u.ws.ping_payload_buf + LWS_SEND_BUFFER_PRE_PADDING,
-			       &wsi->u.ws.rx_user_buffer[LWS_SEND_BUFFER_PRE_PADDING],
-				wsi->u.ws.rx_user_buffer_head);
+			memcpy(wsi->u.ws.ping_payload_buf + LWS_PRE,
+			       &wsi->u.ws.rx_ubuf[LWS_PRE],
+				wsi->u.ws.rx_ubuf_head);
 
-			wsi->u.ws.ping_payload_len = wsi->u.ws.rx_user_buffer_head;
+			wsi->u.ws.ping_payload_len = wsi->u.ws.rx_ubuf_head;
 			wsi->u.ws.ping_pending_flag = 1;
 
 			/* get it sent as soon as possible */
 			lws_callback_on_writable(wsi);
 ping_drop:
-			wsi->u.ws.rx_user_buffer_head = 0;
+			wsi->u.ws.rx_ubuf_head = 0;
 			return 0;
 
 		case LWSWSOPC_PONG:
 			lwsl_info("received pong\n");
-			lwsl_hexdump(&wsi->u.ws.rx_user_buffer[LWS_SEND_BUFFER_PRE_PADDING],
-			             wsi->u.ws.rx_user_buffer_head);
+			lwsl_hexdump(&wsi->u.ws.rx_ubuf[LWS_PRE],
+			             wsi->u.ws.rx_ubuf_head);
 
 			/* issue it */
 			callback_action = LWS_CALLBACK_RECEIVE_PONG;
@@ -1111,17 +1142,15 @@ ping_drop:
 			 * state machine.
 			 */
 
-			eff_buf.token = &wsi->u.ws.rx_user_buffer[
-						   LWS_SEND_BUFFER_PRE_PADDING];
-			eff_buf.token_len = wsi->u.ws.rx_user_buffer_head;
+			eff_buf.token = &wsi->u.ws.rx_ubuf[LWS_PRE];
+			eff_buf.token_len = wsi->u.ws.rx_ubuf_head;
 
-			if (lws_ext_cb_wsi_active_exts(wsi,
-				LWS_EXT_CALLBACK_EXTENDED_PAYLOAD_RX,
+			if (lws_ext_cb_active(wsi, LWS_EXT_CB_EXTENDED_PAYLOAD_RX,
 					&eff_buf, 0) <= 0) /* not handle or fail */
 				lwsl_ext("ext opc opcode 0x%x unknown\n",
 							      wsi->u.ws.opcode);
 
-			wsi->u.ws.rx_user_buffer_head = 0;
+			wsi->u.ws.rx_ubuf_head = 0;
 			return 0;
 		}
 
@@ -1131,13 +1160,34 @@ ping_drop:
 		 * so it can be sent straight out again using lws_write
 		 */
 
-		eff_buf.token = &wsi->u.ws.rx_user_buffer[
-						LWS_SEND_BUFFER_PRE_PADDING];
-		eff_buf.token_len = wsi->u.ws.rx_user_buffer_head;
+		eff_buf.token = &wsi->u.ws.rx_ubuf[LWS_PRE];
+		eff_buf.token_len = wsi->u.ws.rx_ubuf_head;
 
-		if (lws_ext_cb_wsi_active_exts(wsi,
-				LWS_EXT_CALLBACK_PAYLOAD_RX, &eff_buf, 0) < 0)
+drain_extension:
+		lwsl_ext("%s: passing %d to ext\n", __func__, eff_buf.token_len);
+
+		if (wsi->state == LWSS_RETURNED_CLOSE_ALREADY ||
+		    wsi->state == LWSS_AWAITING_CLOSE_ACK)
+			goto already_done;
+
+		n = lws_ext_cb_active(wsi, LWS_EXT_CB_PAYLOAD_RX, &eff_buf, 0);
+		if (n < 0) {
+			/*
+			 * we may rely on this to get RX, just drop connection
+			 */
+			wsi->socket_is_permanently_unusable = 1;
 			return -1;
+		}
+
+		if (rx_draining_ext && eff_buf.token_len == 0)
+			goto already_done;
+
+		if (n && eff_buf.token_len) {
+			/* extension had more... main loop will come back */
+			wsi->u.ws.rx_draining_ext = 1;
+			wsi->u.ws.rx_draining_ext_list = wsi->context->rx_draining_ext_list;
+			wsi->context->rx_draining_ext_list = wsi;
+		}
 
 		if (eff_buf.token_len > 0 ||
 		    callback_action == LWS_CALLBACK_RECEIVE_PONG) {
@@ -1146,7 +1196,7 @@ ping_drop:
 			if (wsi->protocol->callback) {
 
 				if (callback_action == LWS_CALLBACK_RECEIVE_PONG)
-				    lwsl_info("Doing pong callback\n");
+					lwsl_info("Doing pong callback\n");
 
 				ret = user_callback_handle_rxflow(
 						wsi->protocol->callback,
@@ -1160,7 +1210,8 @@ ping_drop:
 				lwsl_err("No callback on payload spill!\n");
 		}
 
-		wsi->u.ws.rx_user_buffer_head = 0;
+already_done:
+		wsi->u.ws.rx_ubuf_head = 0;
 		break;
 	}
 
diff --git a/lib/pollfd.c b/lib/pollfd.c
index 319abbd5b630b2fb77b86b029c2fcaf46f78c3b4..c41a3b8104b0fc0d6fbc48dfa5a4d19b0f2d2b2c 100644
--- a/lib/pollfd.c
+++ b/lib/pollfd.c
@@ -250,8 +250,8 @@ lws_callback_on_writable(struct lws *wsi)
 network_sock:
 #endif
 
-	if (lws_ext_cb_wsi_active_exts(wsi,
-				LWS_EXT_CALLBACK_REQUEST_ON_WRITEABLE, NULL, 0))
+	if (lws_ext_cb_active(wsi,
+				LWS_EXT_CB_REQUEST_ON_WRITEABLE, NULL, 0))
 		return 1;
 
 	if (wsi->position_in_fds_table < 0) {
diff --git a/lib/private-libwebsockets.h b/lib/private-libwebsockets.h
index 36ae98fe24bbb9e82f76ddf8cdb2115ac7ab4dff..e618a1354b4c70eb89f95d721130736b79e4b25f 100644
--- a/lib/private-libwebsockets.h
+++ b/lib/private-libwebsockets.h
@@ -292,6 +292,9 @@ extern "C" {
 #ifndef LWS_MAX_EXTENSIONS_ACTIVE
 #define LWS_MAX_EXTENSIONS_ACTIVE 3
 #endif
+#ifndef LWS_MAX_EXT_OFFERS
+#define LWS_MAX_EXT_OFFERS 8
+#endif
 #ifndef SPEC_LATEST_SUPPORTED
 #define SPEC_LATEST_SUPPORTED 13
 #endif
@@ -373,9 +376,9 @@ enum lws_pending_protocol_send {
 enum lws_rx_parse_state {
 	LWS_RXPS_NEW,
 
-	LWS_RXPS_04_MASK_NONCE_1,
-	LWS_RXPS_04_MASK_NONCE_2,
-	LWS_RXPS_04_MASK_NONCE_3,
+	LWS_RXPS_04_mask_1,
+	LWS_RXPS_04_mask_2,
+	LWS_RXPS_04_mask_3,
 
 	LWS_RXPS_04_FRAME_HDR_1,
 	LWS_RXPS_04_FRAME_HDR_LEN,
@@ -525,6 +528,8 @@ struct lws_context {
 	const struct lws_protocols *protocols;
 	void *http_header_data;
 	struct allocated_headers *ah_pool;
+	struct lws *rx_draining_ext_list;
+	struct lws *tx_draining_ext_list;
 #ifdef LWS_OPENSSL_SUPPORT
 	SSL_CTX *ssl_ctx;
 	SSL_CTX *ssl_client_ctx;
@@ -795,7 +800,7 @@ struct _lws_http2_related {
 	unsigned int my_stream_id;
 	unsigned int child_count;
 	int my_priority;
-	
+
 	unsigned int END_STREAM:1;
 	unsigned int END_HEADERS:1;
 	unsigned int send_END_STREAM:1;
@@ -834,20 +839,25 @@ struct _lws_header_related {
 };
 
 struct _lws_websocket_related {
-	char *rx_user_buffer;
+	char *rx_ubuf;
+	struct lws *rx_draining_ext_list;
+	struct lws *tx_draining_ext_list;
 	size_t rx_packet_length;
-	unsigned int rx_user_buffer_head;
-	unsigned char mask_nonce[4];
+	unsigned int rx_ubuf_head;
+	unsigned char mask[4];
 	/* Also used for close content... control opcode == < 128 */
-	unsigned char ping_payload_buf[128 - 3 + LWS_SEND_BUFFER_PRE_PADDING];
+	unsigned char ping_payload_buf[128 - 3 + LWS_PRE];
 
 	unsigned char ping_payload_len;
-	unsigned char frame_mask_index;
+	unsigned char mask_idx;
 	unsigned char opcode;
 	unsigned char rsv;
+	unsigned char rsv_first_msg;
 	/* zero if no info, or length including 2-byte close code */
 	unsigned char close_in_ping_buffer_len;
 	unsigned char utf8;
+	unsigned char stashed_write_type;
+	unsigned char tx_draining_stashed_wp;
 
 	unsigned int final:1;
 	unsigned int frame_is_binary:1;
@@ -861,6 +871,10 @@ struct _lws_websocket_related {
 	unsigned int owed_a_fin:1;
 	unsigned int check_utf8:1;
 	unsigned int defeat_check_utf8:1;
+	unsigned int pmce_compressed_message:1;
+	unsigned int stashed_write_pending:1;
+	unsigned int rx_draining_ext:1;
+	unsigned int tx_draining_ext:1;
 };
 
 struct lws {
@@ -896,7 +910,7 @@ struct lws {
 	unsigned char *trunc_alloc; /* non-NULL means buffering in progress */
 #ifndef LWS_NO_EXTENSIONS
 	const struct lws_extension *active_extensions[LWS_MAX_EXTENSIONS_ACTIVE];
-	void *active_extensions_user[LWS_MAX_EXTENSIONS_ACTIVE];
+	void *act_ext_user[LWS_MAX_EXTENSIONS_ACTIVE];
 #endif
 #ifdef LWS_OPENSSL_SUPPORT
 	SSL *ssl;
@@ -936,7 +950,7 @@ struct lws {
 
 	/* chars */
 #ifndef LWS_NO_EXTENSIONS
-	unsigned char count_active_extensions;
+	unsigned char count_act_ext;
 #endif
 	unsigned char ietf_spec_revision;
 	char mode; /* enum connection_mode */
@@ -1042,15 +1056,16 @@ lws_any_extension_handled(struct lws *wsi,
 			  void *v, size_t len);
 
 LWS_EXTERN int
-lws_ext_cb_wsi_active_exts(struct lws *wsi, int reason,
+lws_ext_cb_active(struct lws *wsi, int reason,
 				 void *buf, int len);
 LWS_EXTERN int
 lws_ext_cb_all_exts(struct lws_context *context,
 					 struct lws *wsi, int reason,
 					 void *arg, int len);
+
 #else
 #define lws_any_extension_handled(_a, _b, _c, _d) (0)
-#define lws_ext_cb_wsi_active_exts(_a, _b, _c, _d) (0)
+#define lws_ext_cb_active(_a, _b, _c, _d) (0)
 #define lws_ext_cb_all_exts(_a, _b, _c, _d, _e) (0)
 #define lws_issue_raw_ext_access lws_issue_raw
 #define lws_context_init_extensions(_a, _b)
@@ -1137,7 +1152,7 @@ int lws_context_init_server(struct lws_context_creation_info *info,
 LWS_EXTERN int
 handshake_0405(struct lws_context *context, struct lws *wsi);
 LWS_EXTERN int
-lws_interpret_incoming_packet(struct lws *wsi, unsigned char *buf, size_t len);
+lws_interpret_incoming_packet(struct lws *wsi, unsigned char **buf, size_t len);
 LWS_EXTERN void
 lws_server_get_canonical_hostname(struct lws_context *context,
 				  struct lws_context_creation_info *info);
diff --git a/lib/server-handshake.c b/lib/server-handshake.c
index c306baa6e95226813906004a2dcfba763b382b26..7108cc7dbf42cd2697872b669deb07d0e3331912 100644
--- a/lib/server-handshake.c
+++ b/lib/server-handshake.c
@@ -26,20 +26,19 @@
 LWS_VISIBLE int
 lws_extension_server_handshake(struct lws *wsi, char **p)
 {
-	int n;
-	char *c;
-	char ext_name[128];
-	const struct lws_extension *ext;
 	struct lws_context *context = wsi->context;
+	const struct lws_extension *ext;
+	char ext_name[128];
 	int ext_count = 0;
+	char *c;//, *start;
 	int more = 1;
 	char ignore;
+	int n, m;
 
 	/*
 	 * Figure out which extensions the client has that we want to
 	 * enable on this connection, and give him back the list
 	 */
-
 	if (!lws_hdr_total_length(wsi, WSI_TOKEN_EXTENSIONS))
 		return 0;
 
@@ -49,21 +48,21 @@ lws_extension_server_handshake(struct lws *wsi, char **p)
 	 */
 
 	if (lws_hdr_copy(wsi, (char *)context->serv_buf,
-			sizeof(context->serv_buf),
-					      WSI_TOKEN_EXTENSIONS) < 0)
+			 sizeof(context->serv_buf), WSI_TOKEN_EXTENSIONS) < 0)
 		return 1;
 
 	c = (char *)context->serv_buf;
 	lwsl_parser("WSI_TOKEN_EXTENSIONS = '%s'\n", c);
-	wsi->count_active_extensions = 0;
+	wsi->count_act_ext = 0;
 	n = 0;
 	ignore = 0;
+//	start = c;
 	while (more) {
 
-		if (*c && (*c != ',' && *c != ' ' && *c != '\t')) {
-			if (ext_name[n] == ';')
+		if (*c && (*c != ',' && *c != '\t')) {
+			if (*c == ';')
 				ignore = 1;
-			if (ignore) {
+			if (ignore || *c == ' ') {
 				c++;
 				continue;
 			}
@@ -93,14 +92,21 @@ lws_extension_server_handshake(struct lws *wsi, char **p)
 				ext++;
 				continue;
 			}
-
+#if 0
+			m = ext->callback(lws_get_context(wsi), ext, wsi,
+					  LWS_EXT_CB_ARGS_VALIDATE,
+					  NULL, start + n, 0);
+			if (m) {
+				ext++;
+				continue;
+			}
+#endif
 			/*
 			 * oh, we do support this one he asked for... but let's
 			 * ask user code if it's OK to apply it on this
 			 * particular connection + protocol
 			 */
-
-			n = lws_get_context(wsi)->protocols[0].callback(wsi,
+			m = lws_get_context(wsi)->protocols[0].callback(wsi,
 				LWS_CALLBACK_CONFIRM_EXTENSION_OKAY,
 				wsi->user_space, ext_name, 0);
 
@@ -110,7 +116,7 @@ lws_extension_server_handshake(struct lws *wsi, char **p)
 			 * unhandled
 			 */
 
-			if (n) {
+			if (m) {
 				ext++;
 				continue;
 			}
@@ -120,40 +126,28 @@ lws_extension_server_handshake(struct lws *wsi, char **p)
 			if (ext_count)
 				*(*p)++ = ',';
 			else
-				LWS_CPYAPP(*p,
-				 "\x0d\x0aSec-WebSocket-Extensions: ");
+				LWS_CPYAPP(*p, "\x0d\x0aSec-WebSocket-Extensions: ");
 			*p += sprintf(*p, "%s", ext_name);
 			ext_count++;
 
 			/* instantiate the extension on this conn */
 
-			wsi->active_extensions_user[
-				wsi->count_active_extensions] =
-				     lws_zalloc(ext->per_session_data_size);
-			if (wsi->active_extensions_user[
-			     wsi->count_active_extensions] == NULL) {
-				lwsl_err("Out of mem\n");
-				return 1;
-			}
-
-			wsi->active_extensions[
-				  wsi->count_active_extensions] = ext;
+			wsi->active_extensions[wsi->count_act_ext] = ext;
 
 			/* allow him to construct his context */
 
-			ext->callback(lws_get_context(wsi),
-					ext, wsi,
-					LWS_EXT_CALLBACK_CONSTRUCT,
-					wsi->active_extensions_user[
-				wsi->count_active_extensions], NULL, 0);
+			ext->callback(lws_get_context(wsi), ext, wsi,
+				      LWS_EXT_CB_CONSTRUCT,
+				      (void *)&wsi->act_ext_user[wsi->count_act_ext],
+				      NULL, 0);
 
-			wsi->count_active_extensions++;
-			lwsl_parser("count_active_extensions <- %d\n",
-					  wsi->count_active_extensions);
+			wsi->count_act_ext++;
+			lwsl_parser("count_act_ext <- %d\n", wsi->count_act_ext);
 
 			ext++;
 		}
 
+		//start = c;
 		n = 0;
 	}
 
@@ -208,7 +202,7 @@ handshake_0405(struct lws_context *context, struct lws *wsi)
 
 	/* make a buffer big enough for everything */
 
-	response = (char *)context->serv_buf + MAX_WEBSOCKET_04_KEY_LEN + LWS_SEND_BUFFER_PRE_PADDING;
+	response = (char *)context->serv_buf + MAX_WEBSOCKET_04_KEY_LEN + LWS_PRE;
 	p = response;
 	LWS_CPYAPP(p, "HTTP/1.1 101 Switching Protocols\x0d\x0a"
 		      "Upgrade: WebSocket\x0d\x0a"
@@ -240,7 +234,7 @@ handshake_0405(struct lws_context *context, struct lws *wsi)
 
 	LWS_CPYAPP(p, "\x0d\x0a\x0d\x0a");
 
-	if (!lws_any_extension_handled(wsi, LWS_EXT_CALLBACK_HANDSHAKE_REPLY_TX,
+	if (!lws_any_extension_handled(wsi, LWS_EXT_CB_HANDSHAKE_REPLY_TX,
 				       response, p - response)) {
 
 		/* okay send the handshake response accepting the connection */
diff --git a/lib/server.c b/lib/server.c
index c24f9b1c9dd14f086045b96923dfaf1dc84554fe..07156454c2d6492136c83b6a96361de2bd7dbe17 100644
--- a/lib/server.c
+++ b/lib/server.c
@@ -539,9 +539,9 @@ upgrade_ws:
 		n = wsi->protocol->rx_buffer_size;
 		if (!n)
 			n = LWS_MAX_SOCKET_IO_BUF;
-		n += LWS_SEND_BUFFER_PRE_PADDING;
-		wsi->u.ws.rx_user_buffer = lws_malloc(n);
-		if (!wsi->u.ws.rx_user_buffer) {
+		n += LWS_PRE;
+		wsi->u.ws.rx_ubuf = lws_malloc(n + 4 /* 0x0000ffff zlib */);
+		if (!wsi->u.ws.rx_ubuf) {
 			lwsl_err("Out of Mem allocating rx buffer %d\n", n);
 			return 1;
 		}
@@ -876,11 +876,9 @@ LWS_VISIBLE int lws_serve_http_file(struct lws *wsi, const char *file,
 				    int other_headers_len)
 {
 	struct lws_context *context = lws_get_context(wsi);
-	unsigned char *response = context->serv_buf +
-				  LWS_SEND_BUFFER_PRE_PADDING;
+	unsigned char *response = context->serv_buf + LWS_PRE;
 	unsigned char *p = response;
-	unsigned char *end = p + sizeof(context->serv_buf) -
-			     LWS_SEND_BUFFER_PRE_PADDING;
+	unsigned char *end = p + sizeof(context->serv_buf) - LWS_PRE;
 	int ret = 0;
 
 	wsi->u.http.fd = lws_plat_file_open(wsi, file, &wsi->u.http.filelen,
@@ -929,38 +927,47 @@ LWS_VISIBLE int lws_serve_http_file(struct lws *wsi, const char *file,
 }
 
 int
-lws_interpret_incoming_packet(struct lws *wsi, unsigned char *buf, size_t len)
+lws_interpret_incoming_packet(struct lws *wsi, unsigned char **buf, size_t len)
 {
-	size_t n = 0;
 	int m;
 
+	lwsl_parser("%s: received %d byte packet\n", __func__, (int)len);
 #if 0
-	lwsl_parser("received %d byte packet\n", (int)len);
-	lwsl_hexdump(buf, len);
+	lwsl_hexdump(*buf, len);
 #endif
 
 	/* let the rx protocol state machine have as much as it needs */
 
-	while (n < len) {
+	while (len) {
 		/*
 		 * we were accepting input but now we stopped doing so
 		 */
 		if (!(wsi->rxflow_change_to & LWS_RXFLOW_ALLOW)) {
-			lws_rxflow_cache(wsi, buf, n, len);
-
+			lws_rxflow_cache(wsi, *buf, 0, len);
+			lwsl_parser("%s: cached %d\n", __func__, len);
 			return 1;
 		}
 
+		if (wsi->u.ws.rx_draining_ext) {
+			m = lws_rx_sm(wsi, 0);
+			if (m < 0)
+				return -1;
+			continue;
+		}
+
 		/* account for what we're using in rxflow buffer */
 		if (wsi->rxflow_buffer)
 			wsi->rxflow_pos++;
 
 		/* process the byte */
-		m = lws_rx_sm(wsi, buf[n++]);
+		m = lws_rx_sm(wsi, *(*buf)++);
 		if (m < 0)
 			return -1;
+		len--;
 	}
 
+	lwsl_parser("%s: exit with %d unused\n", __func__, (int)len);
+
 	return 0;
 }
 
diff --git a/lib/service.c b/lib/service.c
index 8d5c2d5ba586ffab4f09b83d84c1174571dee611..136da2d2cd7cad6aecfb8a089af2e0b494ed0340 100644
--- a/lib/service.c
+++ b/lib/service.c
@@ -53,12 +53,18 @@ lws_handle_POLLOUT_event(struct lws *wsi, struct lws_pollfd *pollfd)
 #endif
 	int ret, m, n;
 
-	/* pending truncated sends have uber priority */
+	/*
+	 * user callback is lowest priority to get these notifications
+	 * actually, since other pending things cannot be disordered
+	 */
 
+	/* Priority 1: pending truncated sends are incomplete ws fragments
+	 *	       If anything else sent first the protocol would be
+	 *	       corrupted.
+	 */
 	if (wsi->trunc_len) {
-		if (lws_issue_raw(wsi, wsi->trunc_alloc +
-				wsi->trunc_offset,
-						wsi->trunc_len) < 0) {
+		if (lws_issue_raw(wsi, wsi->trunc_alloc + wsi->trunc_offset,
+				  wsi->trunc_len) < 0) {
 			lwsl_info("%s signalling to close\n", __func__);
 			return -1;
 		}
@@ -68,8 +74,10 @@ lws_handle_POLLOUT_event(struct lws *wsi, struct lws_pollfd *pollfd)
 		if (wsi->state == LWSS_FLUSHING_STORED_SEND_BEFORE_CLOSE)
 			return -1; /* retry closing now */
 
+
 #ifdef LWS_USE_HTTP2
-	/* protocol packets are next */
+	/* Priority 2: protocol packets
+	 */
 	if (wsi->pps) {
 		lwsl_info("servicing pps %d\n", wsi->pps);
 		switch (wsi->pps) {
@@ -86,8 +94,8 @@ lws_handle_POLLOUT_event(struct lws *wsi, struct lws_pollfd *pollfd)
 		return 0; /* leave POLLOUT active */
 	}
 #endif
-	/* pending control packets have next priority */
-
+	/* Priority 3: pending control packets (pong or close)
+	 */
 	if ((wsi->state == LWSS_ESTABLISHED &&
 	     wsi->u.ws.ping_pending_flag) ||
 	    (wsi->state == LWSS_RETURNED_CLOSE_ALREADY &&
@@ -96,8 +104,7 @@ lws_handle_POLLOUT_event(struct lws *wsi, struct lws_pollfd *pollfd)
 		if (wsi->u.ws.payload_is_close)
 			write_type = LWS_WRITE_CLOSE;
 
-		n = lws_write(wsi, &wsi->u.ws.ping_payload_buf[
-					LWS_SEND_BUFFER_PRE_PADDING],
+		n = lws_write(wsi, &wsi->u.ws.ping_payload_buf[LWS_PRE],
 			      wsi->u.ws.ping_payload_len, write_type);
 		if (n < 0)
 			return -1;
@@ -112,15 +119,30 @@ lws_handle_POLLOUT_event(struct lws *wsi, struct lws_pollfd *pollfd)
 		return 0;
 	}
 
-	/* if we are closing, don't confuse the user with writeable cb */
-
+	/* Priority 4: if we are closing, not allowed to send more data frags
+	 *	       which means user callback or tx ext flush banned now
+	 */
 	if (wsi->state == LWSS_RETURNED_CLOSE_ALREADY)
 		goto user_service;
 
-	/* if nothing critical, user can get the callback */
+	/* Priority 5: Tx path extension with more to send
+	 *
+	 *	       These are handled as new fragments each time around
+	 *	       So while we must block new writeable callback to enforce
+	 *	       payload ordering, but since they are always complete
+	 *	       fragments control packets can interleave OK.
+	 */
+	if (wsi->state == LWSS_ESTABLISHED && wsi->u.ws.tx_draining_ext) {
+		lwsl_ext("SERVICING TX EXT DRAINING\n");
+		if (lws_write(wsi, NULL, 0, LWS_WRITE_CONTINUATION) < 0)
+			return -1;
+		/* leave POLLOUT active */
+		return 0;
+	}
 
-	m = lws_ext_cb_wsi_active_exts(wsi, LWS_EXT_CALLBACK_IS_WRITEABLE,
-								       NULL, 0);
+	/* Priority 6: user can get the callback
+	 */
+	m = lws_ext_cb_active(wsi, LWS_EXT_CB_IS_WRITEABLE, NULL, 0);
 #ifndef LWS_NO_EXTENSIONS
 	if (!wsi->extension_data_pending)
 		goto user_service;
@@ -144,9 +166,9 @@ lws_handle_POLLOUT_event(struct lws *wsi, struct lws_pollfd *pollfd)
 
 		/* give every extension a chance to spill */
 
-		m = lws_ext_cb_wsi_active_exts(wsi,
-					LWS_EXT_CALLBACK_PACKET_TX_PRESEND,
-							           &eff_buf, 0);
+		m = lws_ext_cb_active(wsi,
+					LWS_EXT_CB_PACKET_TX_PRESEND,
+					       &eff_buf, 0);
 		if (m < 0) {
 			lwsl_err("ext reports fatal error\n");
 			return -1;
@@ -162,7 +184,7 @@ lws_handle_POLLOUT_event(struct lws *wsi, struct lws_pollfd *pollfd)
 
 		if (eff_buf.token_len) {
 			n = lws_issue_raw(wsi, (unsigned char *)eff_buf.token,
-							     eff_buf.token_len);
+					  eff_buf.token_len);
 			if (n < 0) {
 				lwsl_info("closing from POLLOUT spill\n");
 				return -1;
@@ -276,7 +298,7 @@ lws_service_timeout_check(struct lws *wsi, unsigned int sec)
 	 * if extensions want in on it (eg, we are a mux parent)
 	 * give them a chance to service child timeouts
 	 */
-	if (lws_ext_cb_wsi_active_exts(wsi, LWS_EXT_CALLBACK_1HZ,
+	if (lws_ext_cb_active(wsi, LWS_EXT_CB_1HZ,
 					     NULL, sec) < 0)
 		return 0;
 
@@ -495,7 +517,7 @@ lws_service_fd(struct lws_context *context, struct lws_pollfd *pollfd)
 	case LWSCM_WS_CLIENT:
 	case LWSCM_HTTP2_SERVING:
 
-		/* the guy requested a callback when it was OK to write */
+		/* 1: something requested a callback when it was OK to write */
 
 		if ((pollfd->revents & LWS_POLLOUT) &&
 		    (wsi->state == LWSS_ESTABLISHED ||
@@ -503,16 +525,77 @@ lws_service_fd(struct lws_context *context, struct lws_pollfd *pollfd)
 		     wsi->state == LWSS_HTTP2_ESTABLISHED_PRE_SETTINGS ||
 		     wsi->state == LWSS_RETURNED_CLOSE_ALREADY ||
 		     wsi->state == LWSS_FLUSHING_STORED_SEND_BEFORE_CLOSE) &&
-			   lws_handle_POLLOUT_event(wsi, pollfd)) {
+		    lws_handle_POLLOUT_event(wsi, pollfd)) {
 			if (wsi->state == LWSS_RETURNED_CLOSE_ALREADY)
 				wsi->state = LWSS_FLUSHING_STORED_SEND_BEFORE_CLOSE;
 			lwsl_info("lws_service_fd: closing\n");
 			goto close_and_handled;
 		}
+#if 1
+		if (wsi->state == LWSS_RETURNED_CLOSE_ALREADY ||
+		    wsi->state == LWSS_AWAITING_CLOSE_ACK) {
+			/*
+			 * we stopped caring about anything except control
+			 * packets.  Force flow control off, defeat tx
+			 * draining.
+			 */
+			lws_rx_flow_control(wsi, 1);
+			wsi->u.ws.tx_draining_ext = 0;
+		}
+#endif
+		if (wsi->u.ws.tx_draining_ext) {
+			/* we cannot deal with new RX until the TX ext
+			 * path has been drained.  It's because new
+			 * rx will, eg, crap on the wsi rx buf that
+			 * may be needed to retain state.
+			 *
+			 * TX ext drain path MUST go through event loop
+			 * to avoid blocking.
+			 */
+			break;
+		}
+
+		if (!(wsi->rxflow_change_to & LWS_RXFLOW_ALLOW))
+			/* We cannot deal with any kind of new RX
+			 * because we are RX-flowcontrolled.
+			 */
+			break;
+
+		/* 2: RX Extension needs to be drained
+		 */
+
+		if (wsi->state == LWSS_ESTABLISHED &&
+		    wsi->u.ws.rx_draining_ext) {
+
+			lwsl_ext("%s: RX EXT DRAINING: Service\n", __func__);
+#ifndef LWS_NO_CLIENT
+			if (wsi->mode == LWSCM_WS_CLIENT) {
+				n = lws_client_rx_sm(wsi, 0);
+				if (n < 0)
+					/* we closed wsi */
+					n = 0;
+			} else
+#endif
+				n = lws_rx_sm(wsi, 0);
 
-		if (wsi->rxflow_buffer &&
-		    (wsi->rxflow_change_to & LWS_RXFLOW_ALLOW)) {
-			lwsl_info("draining rxflow\n");
+			goto handled;
+		}
+
+		if (wsi->u.ws.rx_draining_ext)
+			/*
+			 * We have RX EXT content to drain, but can't do it
+			 * right now.  That means we cannot do anything lower
+			 * priority either.
+			 */
+			break;
+
+		/* 3: RX Flowcontrol buffer needs to be drained
+		 */
+
+		if (wsi->rxflow_buffer) {
+			lwsl_info("draining rxflow (len %d)\n",
+				wsi->rxflow_len - wsi->rxflow_pos
+			);
 			/* well, drain it */
 			eff_buf.token = (char *)wsi->rxflow_buffer +
 						wsi->rxflow_pos;
@@ -521,14 +604,13 @@ lws_service_fd(struct lws_context *context, struct lws_pollfd *pollfd)
 			goto drain;
 		}
 
-		/* any incoming data ready? */
-		/* notice if rx flow going off raced poll(), rx flow wins */
-		if (wsi->rxflow_buffer ||
-		    !(pollfd->revents & pollfd->events & LWS_POLLIN))
+		/* 4: any incoming data ready?
+		 * notice if rx flow going off raced poll(), rx flow wins
+		 */
+		if (!(pollfd->revents & pollfd->events & LWS_POLLIN))
 			break;
 read:
-		eff_buf.token_len = lws_ssl_capable_read(wsi,
-					context->serv_buf,
+		eff_buf.token_len = lws_ssl_capable_read(wsi, context->serv_buf,
 					pending ? pending :
 					sizeof(context->serv_buf));
 		switch (eff_buf.token_len) {
@@ -562,8 +644,8 @@ drain:
 		do {
 			more = 0;
 
-			m = lws_ext_cb_wsi_active_exts(wsi,
-				LWS_EXT_CALLBACK_PACKET_RX_PREPARSE, &eff_buf, 0);
+			m = lws_ext_cb_active(wsi,
+				LWS_EXT_CB_PACKET_RX_PREPARSE, &eff_buf, 0);
 			if (m < 0)
 				goto close_and_handled;
 			if (m)
diff --git a/libwebsockets-api-doc.html b/libwebsockets-api-doc.html
index a3d4bbd33b2f2b01accd44904b7004e98f2e0450..e6772fc187102e7a5778a3d40a73268197d8b627 100644
--- a/libwebsockets-api-doc.html
+++ b/libwebsockets-api-doc.html
@@ -4,7 +4,7 @@
 (<i>struct lws *</i> <b>wsi</b>,
 <i>unsigned char *</i> <b>buf</b>,
 <i>size_t</i> <b>len</b>,
-<i>enum lws_write_protocol</i> <b>protocol</b>)
+<i>enum lws_write_protocol</i> <b>wp</b>)
 <h3>Arguments</h3>
 <dl>
 <dt><b>wsi</b>
@@ -12,16 +12,10 @@
 <dt><b>buf</b>
 <dd>The data to send.  For data being sent on a websocket
 connection (ie, not default http), this buffer MUST have
-LWS_SEND_BUFFER_PRE_PADDING bytes valid BEFORE the pointer.
+LWS_PRE bytes valid BEFORE the pointer.
 This is so the protocol header data can be added in-situ.
 <dt><b>len</b>
 <dd>Count of the data bytes in the payload starting from buf
-<dt><b>protocol</b>
-<dd>Use LWS_WRITE_HTTP to reply to an http connection, and one
-of LWS_WRITE_BINARY or LWS_WRITE_TEXT to send appropriate
-data on a websockets connection.  Remember to allow the extra
-bytes before and after buf if LWS_WRITE_BINARY or LWS_WRITE_TEXT
-are used.
 </dl>
 <h3>Description</h3>
 <blockquote>
@@ -110,50 +104,19 @@ Helper to report HTTP errors back to the client cleanly and
 consistently
 </blockquote>
 <hr>
-<h2>lws_client_connect - Connect to another websocket server</h2>
+<h2>lws_client_connect_info - Connect to another websocket server</h2>
 <i>struct lws *</i>
-<b>lws_client_connect</b>
-(<i>struct lws_context *</i> <b>context</b>,
-<i>const char *</i> <b>address</b>,
-<i>int</i> <b>port</b>,
-<i>int</i> <b>ssl_connection</b>,
-<i>const char *</i> <b>path</b>,
-<i>const char *</i> <b>host</b>,
-<i>const char *</i> <b>origin</b>,
-<i>const char *</i> <b>protocol</b>,
-<i>int</i> <b>ietf_version_or_minus_one</b>)
+<b>lws_client_connect_info</b>
+(<i>struct lws_client_connect_info *</i> <b>i</b>)
 <h3>Arguments</h3>
 <dl>
-<dt><b>context</b>
-<dd>Websocket context
-<dt><b>address</b>
-<dd>Remote server address, eg, "myserver.com"
-<dt><b>port</b>
-<dd>Port to connect to on the remote server, eg, 80
-<dt><b>ssl_connection</b>
-<dd>0 = ws://, 1 = wss:// encrypted, 2 = wss:// allow self
-signed certs
-<dt><b>path</b>
-<dd>Websocket path on server
-<dt><b>host</b>
-<dd>Hostname on server
-<dt><b>origin</b>
-<dd>Socket origin name
-<dt><b>protocol</b>
-<dd>Comma-separated list of protocols being asked for from
-the server, or just one.  The server will pick the one it
-likes best.  If you don't want to specify a protocol, which is
-legal, use NULL here.
-<dt><b>ietf_version_or_minus_one</b>
-<dd>-1 to ask to connect using the default, latest
-protocol supported, or the specific protocol ordinal
 </dl>
 <h3>Description</h3>
 <blockquote>
 This function creates a connection to a remote server
 </blockquote>
 <hr>
-<h2>lws_client_connect_extended - Connect to another websocket server</h2>
+<h2>lws_client_connect_extended - Connect to another websocket server DEPRECAATED use lws_client_connect_info</h2>
 <i>struct lws *</i>
 <b>lws_client_connect_extended</b>
 (<i>struct lws_context *</i> <b>context</b>,
@@ -1228,7 +1191,7 @@ connection.
 <dt><b>reason</b>
 <dd>The reason for the call
 <dt><b>user</b>
-<dd>Pointer to per-session user data allocated by library
+<dd>Pointer to ptr to per-session user data allocated by library
 <dt><b>in</b>
 <dd>Pointer used for some callback reasons
 <dt><b>len</b>
@@ -1244,7 +1207,7 @@ Libwebsockets takes care of allocating and freeing "user" memory for
 each active extension on each connection.  That is what is pointed to
 by the <tt><b>user</b></tt> parameter.
 </blockquote>
-<h3>LWS_EXT_CALLBACK_CONSTRUCT</h3>
+<h3>LWS_EXT_CB_CONSTRUCT</h3>
 <blockquote>
 called when the server has decided to
 select this extension from the list provided by the client,
@@ -1253,14 +1216,14 @@ the connection with this extension active.  This gives the
 extension a chance to initialize its connection context found
 in <tt><b>user</b></tt>.
 </blockquote>
-<h3>LWS_EXT_CALLBACK_CLIENT_CONSTRUCT</h3>
+<h3>LWS_EXT_CB_CLIENT_CONSTRUCT</h3>
 <blockquote>
-same as LWS_EXT_CALLBACK_CONSTRUCT
+same as LWS_EXT_CB_CONSTRUCT
 but called when client is instantiating this extension.  Some
 extensions will work the same on client and server side and then
 you can just merge handlers for both CONSTRUCTS.
 </blockquote>
-<h3>LWS_EXT_CALLBACK_DESTROY</h3>
+<h3>LWS_EXT_CB_DESTROY</h3>
 <blockquote>
 called when the connection the extension was
 being used on is about to be closed and deallocated.  It's the
@@ -1269,7 +1232,7 @@ allocated in the user data (pointed to by <tt><b>user</b></tt>) before the
 user data is deleted.  This same callback is used whether you
 are in client or server instantiation context.
 </blockquote>
-<h3>LWS_EXT_CALLBACK_PACKET_RX_PREPARSE</h3>
+<h3>LWS_EXT_CB_PACKET_RX_PREPARSE</h3>
 <blockquote>
 when this extension was active on
 a connection, and a packet of data arrived at the connection,
@@ -1283,10 +1246,10 @@ length.  If the extension will grow the content, it should use
 a new buffer allocated in its private user context data and
 set the pointed-to lws_tokens members to point to its buffer.
 </blockquote>
-<h3>LWS_EXT_CALLBACK_PACKET_TX_PRESEND</h3>
+<h3>LWS_EXT_CB_PACKET_TX_PRESEND</h3>
 <blockquote>
 this works the same way as
-LWS_EXT_CALLBACK_PACKET_RX_PREPARSE above, except it gives the
+LWS_EXT_CB_PACKET_RX_PREPARSE above, except it gives the
 extension a chance to change websocket data just before it will
 be sent out.  Using the same lws_token pointer scheme in <tt><b>in</b></tt>,
 the extension can change the buffer and the length to be
@@ -1325,7 +1288,7 @@ you support.  If the frame size is exceeded, there is no
 error, but the buffer will spill to the user callback when
 full, which you can detect by using
 <b>lws_remaining_packet_payload</b>.  Notice that you
-just talk about frame size here, the LWS_SEND_BUFFER_PRE_PADDING
+just talk about frame size here, the LWS_PRE
 and post-padding are automatically also allocated on top.
 <dt><b>id</b>
 <dd>ignored by lws, but useful to contain user information bound
@@ -1354,27 +1317,33 @@ there is no agreed protocol name, that's true during HTTP part of the
 header.
 </blockquote>
 <hr>
+<h2>struct lws_ext_options - Option arguments to the extension.  These are used in the negotiation at ws upgrade time. The helper function lws_ext_parse_options() uses these to generate callbacks</h2>
+<b>struct lws_ext_options</b> {<br>
+&nbsp; &nbsp; <i>const char *</i> <b>name</b>;<br>
+&nbsp; &nbsp; <i>enum lws_ext_options_types</i> <b>type</b>;<br>
+};<br>
+<h3>Members</h3>
+<dl>
+<dt><b>name</b>
+<dd>Option name, eg, "server_no_context_takeover"
+<dt><b>type</b>
+<dd>What kind of args the option can take
+</dl>
+<hr>
 <h2>struct lws_extension - An extension we know how to cope with</h2>
 <b>struct lws_extension</b> {<br>
 &nbsp; &nbsp; <i>const char *</i> <b>name</b>;<br>
 &nbsp; &nbsp; <i>lws_extension_callback_function *</i> <b>callback</b>;<br>
-&nbsp; &nbsp; <i>size_t</i> <b>per_session_data_size</b>;<br>
-&nbsp; &nbsp; <i>void *</i> <b>per_context_private_data</b>;<br>
+&nbsp; &nbsp; <i>const char *</i> <b>client_offer</b>;<br>
 };<br>
 <h3>Members</h3>
 <dl>
 <dt><b>name</b>
-<dd>Formal extension name, eg, "deflate-stream"
+<dd>Formal extension name, eg, "permessage-deflate"
 <dt><b>callback</b>
 <dd>Service callback
-<dt><b>per_session_data_size</b>
-<dd>Libwebsockets will auto-malloc this much
-memory for the use of the extension, a pointer
-to it comes in the <tt><b>user</b></tt> callback parameter
-<dt><b>per_context_private_data</b>
-<dd>Optional storage for this extension that
-is per-context, so it can track stuff across
-all sessions, etc, if it wants
+<dt><b>client_offer</b>
+<dd>String containing exts and options client offers
 </dl>
 <hr>
 <h2>struct lws_context_creation_info - parameters to create context with</h2>
@@ -1434,8 +1403,9 @@ server cert from, otherwise NULL for unencrypted
 <dt><b>ssl_private_key_filepath</b>
 <dd>filepath to private key if wanting SSL mode;
 if this is set to NULL but sll_cert_filepath is set, the
-OPENSSL_CONTEXT_REQUIRES_PRIVATE_KEY callback is called to allow
-setting of the private key directly via openSSL library calls
+OPENSSL_CONTEXT_REQUIRES_PRIVATE_KEY callback is called
+to allow setting of the private key directly via openSSL
+library calls
 <dt><b>ssl_ca_filepath</b>
 <dd>CA certificate filepath or NULL
 <dt><b>ssl_cipher_list</b>
@@ -1447,7 +1417,8 @@ or you can leave it as NULL to get "DEFAULT"
 If proxy auth is required, use format
 "username:password<tt><b>server</b></tt>:port"
 <dt><b>http_proxy_port</b>
-<dd>If http_proxy_address was non-NULL, uses this port at the address
+<dd>If http_proxy_address was non-NULL, uses this port at
+the address
 <dt><b>gid</b>
 <dd>group id to change to after setting listen socket, or -1.
 <dt><b>uid</b>
diff --git a/test-server/test-client.c b/test-server/test-client.c
index b8e262bf52a7ecc27d04ab020baeb72f74d54d7e..25f991b6c69527a3d1060077bb90a8525cdfb0e1 100644
--- a/test-server/test-client.c
+++ b/test-server/test-client.c
@@ -109,14 +109,10 @@ callback_dumb_increment(struct lws *wsi, enum lws_callback_reasons reason,
 			lwsl_notice("denied deflate-stream extension\n");
 			return 1;
 		}
-		if ((strcmp(in, "deflate-frame") == 0) && deny_deflate) {
-			lwsl_notice("denied deflate-frame extension\n");
+		if ((strcmp(in, "x-webkit-deflate-frame") == 0))
 			return 1;
-		}
-		if ((strcmp(in, "x-google-mux") == 0) && deny_mux) {
-			lwsl_notice("denied x-google-mux extension\n");
+		if ((strcmp(in, "deflate-frame") == 0))
 			return 1;
-		}
 		break;
 
 	default:
@@ -134,7 +130,7 @@ static int
 callback_lws_mirror(struct lws *wsi, enum lws_callback_reasons reason,
 		    void *user, void *in, size_t len)
 {
-	unsigned char buf[LWS_SEND_BUFFER_PRE_PADDING + 4096];
+	unsigned char buf[LWS_PRE + 4096];
 	unsigned int rands[4];
 	int l = 0;
 	int n;
@@ -173,15 +169,15 @@ callback_lws_mirror(struct lws *wsi, enum lws_callback_reasons reason,
 	case LWS_CALLBACK_CLIENT_WRITEABLE:
 		for (n = 0; n < 1; n++) {
 			lws_get_random(lws_get_context(wsi), rands, sizeof(rands));
-			l += sprintf((char *)&buf[LWS_SEND_BUFFER_PRE_PADDING + l],
+			l += sprintf((char *)&buf[LWS_PRE + l],
 					"c #%06X %d %d %d;",
 					(int)rands[0] & 0xffffff,
-					(int)rands[1] % 500,
-					(int)rands[2] % 250,
-					(int)rands[3] % 24);
+					(int)abs(rands[1] % 500),
+					(int)abs(rands[2] % 250),
+					(int)abs(rands[3] % 24));
 		}
 
-		n = lws_write(wsi, &buf[LWS_SEND_BUFFER_PRE_PADDING], l,
+		n = lws_write(wsi, &buf[LWS_PRE], l,
 			      opts | LWS_WRITE_TEXT);
 		if (n < 0)
 			return -1;
@@ -225,6 +221,22 @@ static struct lws_protocols protocols[] = {
 	{ NULL, NULL, 0, 0 } /* end */
 };
 
+static const struct lws_extension exts[] = {
+	{
+		"permessage-deflate",
+		lws_extension_callback_pm_deflate,
+		"permessage-deflate; client_max_window_bits"
+	},
+	{
+		"deflate-frame",
+		lws_extension_callback_pm_deflate,
+		"deflate_frame"
+	},
+	{ NULL, NULL, NULL /* terminator */ }
+};
+
+
+
 void sighandler(int sig)
 {
 	force_exit = 1;
@@ -257,11 +269,11 @@ static int ratelimit_connects(unsigned int *last, unsigned int secs)
 
 int main(int argc, char **argv)
 {
-	int n = 0, ret = 0, port = 7681, use_ssl = 0;
+	int n = 0, ret = 0, port = 7681, use_ssl = 0, ietf_version = -1;
 	unsigned int rl_dumb = 0, rl_mirror = 0;
 	struct lws_context_creation_info info;
+	struct lws_client_connect_info i;
 	struct lws_context *context;
-	int ietf_version = -1; /* latest */
 	const char *address;
 
 	memset(&info, 0, sizeof info);
@@ -321,9 +333,6 @@ int main(int argc, char **argv)
 
 	info.port = CONTEXT_PORT_NO_LISTEN;
 	info.protocols = protocols;
-#ifndef LWS_NO_EXTENSIONS
-	info.extensions = lws_get_internal_extensions();
-#endif
 	info.gid = -1;
 	info.uid = -1;
 
@@ -333,6 +342,17 @@ int main(int argc, char **argv)
 		return 1;
 	}
 
+	memset(&i, 0, sizeof(i));
+
+	i.context = context;
+	i.address = address;
+	i.port = port;
+	i.ssl_connection = use_ssl;
+	i.path = "/";
+	i.host = argv[optind];
+	i.origin = argv[optind];
+	i.ietf_version_or_minus_one = ietf_version;
+	i.client_exts = exts;
 	/*
 	 * sit there servicing the websocket context to handle incoming
 	 * packets, and drawing random circles on the mirror protocol websocket
@@ -347,19 +367,14 @@ int main(int argc, char **argv)
 
 		if (!wsi_dumb && ratelimit_connects(&rl_dumb, 2u)) {
 			lwsl_notice("dumb: connecting\n");
-			wsi_dumb = lws_client_connect(context, address, port,
-				use_ssl, "/", argv[optind], argv[optind],
-				protocols[PROTOCOL_DUMB_INCREMENT].name,
-				ietf_version);
+			i.protocol = protocols[PROTOCOL_DUMB_INCREMENT].name;
+			wsi_dumb = lws_client_connect_info(&i);
 		}
 
 		if (!wsi_mirror && ratelimit_connects(&rl_mirror, 2u)) {
 			lwsl_notice("mirror: connecting\n");
-			wsi_mirror = lws_client_connect(context,
-				address, port, use_ssl,  "/",
-				argv[optind], argv[optind],
-				protocols[PROTOCOL_LWS_MIRROR].name,
-				ietf_version);
+			i.protocol = protocols[PROTOCOL_LWS_MIRROR].name;
+			wsi_mirror = lws_client_connect_info(&i);
 		}
 
 		lws_service(context, 500);
diff --git a/test-server/test-echo.c b/test-server/test-echo.c
index 14be18e2f3145103bcde4c8677a63be8df2d04c8..46068608aa270706aad081c882688144d3825706 100644
--- a/test-server/test-echo.c
+++ b/test-server/test-echo.c
@@ -4,7 +4,7 @@
  * This implements both the client and server sides.  It defaults to
  * serving, use --client <remote address> to connect as client.
  *
- * Copyright (C) 2010-2013 Andy Green <andy@warmcat.com>
+ * Copyright (C) 2010-2016 Andy Green <andy@warmcat.com>
  *
  *  This library is free software; you can redistribute it and/or
  *  modify it under the terms of the GNU Lesser General Public
@@ -44,11 +44,13 @@ static volatile int force_exit = 0;
 static int versa, state;
 static int times = -1;
 
-#define MAX_ECHO_PAYLOAD (128 * 1024)
 #define LOCAL_RESOURCE_PATH INSTALL_DATADIR"/libwebsockets-test-server"
 
+#define MAX_ECHO_PAYLOAD 1024
+
 struct per_session_data__echo {
-	unsigned char buf[LWS_SEND_BUFFER_PRE_PADDING + MAX_ECHO_PAYLOAD];
+	size_t rx, tx;
+	unsigned char buf[LWS_PRE + MAX_ECHO_PAYLOAD];
 	unsigned int len;
 	unsigned int index;
 	int final;
@@ -60,16 +62,17 @@ static int
 callback_echo(struct lws *wsi, enum lws_callback_reasons reason, void *user,
 	      void *in, size_t len)
 {
-	struct per_session_data__echo *pss = (struct per_session_data__echo *)user;
+	struct per_session_data__echo *pss =
+			(struct per_session_data__echo *)user;
 	int n;
 
 	switch (reason) {
 
 #ifndef LWS_NO_SERVER
-	/* when the callback is used for server operations --> */
 
 	case LWS_CALLBACK_SERVER_WRITEABLE:
 do_tx:
+
 		n = LWS_WRITE_CONTINUATION;
 		if (!pss->continuation) {
 			if (pss->binary)
@@ -80,8 +83,11 @@ do_tx:
 		}
 		if (!pss->final)
 			n |= LWS_WRITE_NO_FIN;
+		lwsl_info("+++ test-echo: writing %d, with final %d\n",
+			  pss->len, pss->final);
 
-		n = lws_write(wsi, &pss->buf[LWS_SEND_BUFFER_PRE_PADDING], pss->len, n);
+		pss->tx += pss->len;
+		n = lws_write(wsi, &pss->buf[LWS_PRE], pss->len, n);
 		if (n < 0) {
 			lwsl_err("ERROR %d writing to socket, hanging up\n", n);
 			return 1;
@@ -98,15 +104,16 @@ do_tx:
 
 	case LWS_CALLBACK_RECEIVE:
 do_rx:
-		if (len > MAX_ECHO_PAYLOAD) {
-			lwsl_err("Server received packet bigger than %u, hanging up\n", MAX_ECHO_PAYLOAD);
-			return 1;
-		}
-		memcpy(&pss->buf[LWS_SEND_BUFFER_PRE_PADDING], in, len);
-		pss->len = (unsigned int)len;
 		pss->final = lws_is_final_fragment(wsi);
 		pss->binary = lws_frame_is_binary(wsi);
-		lwsl_info("len %d final %d\n", len, pss->final);
+		lwsl_info("+++ test-echo: RX len %d final %d, pss->len=%d\n",
+			  len, pss->final, (int)pss->len);
+
+		memcpy(&pss->buf[LWS_PRE], in, len);
+		assert((int)pss->len == -1);
+		pss->len = (unsigned int)len;
+		pss->rx += len;
+
 		lws_rx_flow_control(wsi, 0);
 		lws_callback_on_writable(wsi);
 		break;
@@ -124,6 +131,7 @@ do_rx:
 	case LWS_CALLBACK_CLIENT_ESTABLISHED:
 		lwsl_debug("Client has connected\n");
 		pss->index = 0;
+		pss->len = -1;
 		state = 2;
 		break;
 
@@ -140,16 +148,15 @@ do_rx:
 		if (versa) {
 			if (pss->len != (unsigned int)-1)
 				goto do_tx;
-			else {
-				lwsl_debug("****** writable with nothing new\n");
-				break;
-			}
+			break;
 		}
 #endif
 		/* we will send our packet... */
-		pss->len = sprintf((char *)&pss->buf[LWS_SEND_BUFFER_PRE_PADDING], "hello from libwebsockets-test-echo client pid %d index %d\n", getpid(), pss->index++);
-		lwsl_notice("Client TX: %s", &pss->buf[LWS_SEND_BUFFER_PRE_PADDING]);
-		n = lws_write(wsi, &pss->buf[LWS_SEND_BUFFER_PRE_PADDING], pss->len, LWS_WRITE_TEXT);
+		pss->len = sprintf((char *)&pss->buf[LWS_PRE],
+				   "hello from libwebsockets-test-echo client pid %d index %d\n",
+				   getpid(), pss->index++);
+		lwsl_notice("Client TX: %s", &pss->buf[LWS_PRE]);
+		n = lws_write(wsi, &pss->buf[LWS_PRE], pss->len, LWS_WRITE_TEXT);
 		if (n < 0) {
 			lwsl_err("ERROR %d writing to socket, hanging up\n", n);
 			return -1;
@@ -158,11 +165,14 @@ do_rx:
 			lwsl_err("Partial write\n");
 			return -1;
 		}
-		break;
-	case LWS_CALLBACK_OPENSSL_LOAD_EXTRA_CLIENT_VERIFY_CERTS:
-
 		break;
 #endif
+	case LWS_CALLBACK_CLIENT_CONFIRM_EXTENSION_SUPPORTED:
+		/* reject everything else except permessage-deflate */
+		if (strcmp(in, "permessage-deflate"))
+			return 1;
+		break;
+
 	default:
 		break;
 	}
@@ -176,15 +186,31 @@ static struct lws_protocols protocols[] = {
 	/* first protocol must always be HTTP handler */
 
 	{
-		"",		/* name */
-		callback_echo,		/* callback */
-		sizeof(struct per_session_data__echo)	/* per_session_data_size */
+		"",		/* name - can be overriden with -e */
+		callback_echo,
+		sizeof(struct per_session_data__echo),	/* per_session_data_size */
+		MAX_ECHO_PAYLOAD,
 	},
 	{
 		NULL, NULL, 0		/* End of list */
 	}
 };
 
+static const struct lws_extension exts[] = {
+	{
+		"permessage-deflate",
+		lws_extension_callback_pm_deflate,
+		"permessage-deflate; client_no_context_takeover; client_max_window_bits"
+	},
+	{
+		"deflate-frame",
+		lws_extension_callback_pm_deflate,
+		"deflate_frame"
+	},
+	{ NULL, NULL, NULL /* terminator */ }
+};
+
+
 void sighandler(int sig)
 {
 	force_exit = 1;
@@ -206,6 +232,7 @@ static struct option options[] = {
 	{ "passphrase", required_argument,	NULL, 'P' },
 	{ "interface",  required_argument,	NULL, 'i' },
 	{ "times",	required_argument,	NULL, 'n' },
+	{ "echogen",	no_argument,		NULL, 'e' },
 #ifndef LWS_NO_DAEMONIZE
 	{ "daemonize", 	no_argument,		NULL, 'D' },
 #endif
@@ -238,6 +265,8 @@ int main(int argc, char **argv)
 	struct lws *wsi;
 	int disallow_selfsigned = 0;
 	struct timeval tv;
+	const char *connect_protocol = NULL;
+	struct lws_client_connect_info i;
 #endif
 
 	int debug_level = 7;
@@ -255,7 +284,7 @@ int main(int argc, char **argv)
 #endif
 
 	while (n >= 0) {
-		n = getopt_long(argc, argv, "i:hsp:d:DC:k:P:vu:n:"
+		n = getopt_long(argc, argv, "i:hsp:d:DC:k:P:vu:n:e"
 #ifndef LWS_NO_CLIENT
 			"c:r:"
 #endif
@@ -313,6 +342,11 @@ int main(int argc, char **argv)
 		case 'v':
 			versa = 1;
 			break;
+		case 'e':
+			protocols[0].name = "lws-echogen";
+			connect_protocol = protocols[0].name;
+			lwsl_err("using lws-echogen\n");
+			break;
 		case 'i':
 			strncpy(interface_name, optarg, sizeof interface_name);
 			interface_name[(sizeof interface_name) - 1] = '\0';
@@ -370,7 +404,7 @@ int main(int argc, char **argv)
 	lws_set_log_level(debug_level, lwsl_emit_syslog);
 
 	lwsl_notice("libwebsockets echo test - "
-		    "(C) Copyright 2010-2015 Andy Green <andy@warmcat.com> - "
+		    "(C) Copyright 2010-2016 Andy Green <andy@warmcat.com> - "
 		    "licensed under LGPL2.1\n");
 #ifndef LWS_NO_CLIENT
 	if (client) {
@@ -380,7 +414,8 @@ int main(int argc, char **argv)
 			lwsl_info("allowing selfsigned\n");
 			use_ssl = 2;
 		} else {
-			lwsl_info("requiring server cert validation againts %s\n", ssl_cert);
+			lwsl_info("requiring server cert validation against %s\n",
+				  ssl_cert);
 			info.ssl_ca_filepath = ssl_cert;
 		}
 	} else {
@@ -396,9 +431,6 @@ int main(int argc, char **argv)
 	info.port = listen_port;
 	info.iface = _interface;
 	info.protocols = protocols;
-#ifndef LWS_NO_EXTENSIONS
-	info.extensions = lws_get_internal_extensions();
-#endif
 	if (use_ssl && !client) {
 		info.ssl_cert_filepath = ssl_cert;
 		info.ssl_private_key_filepath = ssl_key;
@@ -410,6 +442,9 @@ int main(int argc, char **argv)
 	info.gid = -1;
 	info.uid = -1;
 	info.options = opts | LWS_SERVER_OPTION_VALIDATE_UTF8;
+#ifndef LWS_NO_EXTENSIONS
+	info.extensions = exts;
+#endif
 
 	context = lws_create_context(&info);
 	if (context == NULL) {
@@ -430,19 +465,31 @@ int main(int argc, char **argv)
 #ifndef LWS_NO_CLIENT
 		if (client && !state && times) {
 			state = 1;
-			lwsl_notice("Client connecting to %s:%u....\n", address, port);
+			lwsl_notice("Client connecting to %s:%u....\n",
+				    address, port);
 			/* we are in client mode */
 
 			address[sizeof(address) - 1] = '\0';
 			sprintf(ads_port, "%s:%u", address, port & 65535);
 			if (times > 0)
 				times--;
-				
-			wsi = lws_client_connect(context, address,
-				port, use_ssl, uri, ads_port,
-				 ads_port, NULL, -1);
+
+			memset(&i, 0, sizeof(i));
+
+			i.context = context;
+			i.address = address;
+			i.port = port;
+			i.ssl_connection = use_ssl;
+			i.path = uri;
+			i.host = ads_port;
+			i.origin = ads_port;
+			i.protocol = connect_protocol;
+			i.client_exts = exts;
+
+			wsi = lws_client_connect_info(&i);
 			if (!wsi) {
-				lwsl_err("Client failed to connect to %s:%u\n", address, port);
+				lwsl_err("Client failed to connect to %s:%u\n",
+					 address, port);
 				goto bail;
 			}
 		}
@@ -458,7 +505,7 @@ int main(int argc, char **argv)
 					times--;
 			}
 		}
-		
+
 		if (client && !state && !times)
 			break;
 #endif
diff --git a/test-server/test-fraggle.c b/test-server/test-fraggle.c
index a04ad8a80856ec592e2afd2bb3460cec97b1c759..331d2981c61cc0c6f1a495746e4482b0f4452a91 100644
--- a/test-server/test-fraggle.c
+++ b/test-server/test-fraggle.c
@@ -57,13 +57,13 @@ callback_fraggle(struct lws *wsi, enum lws_callback_reasons reason,
 		 void *user, void *in, size_t len)
 {
 	int n;
-	unsigned char buf[LWS_SEND_BUFFER_PRE_PADDING + 8000];
+	unsigned char buf[LWS_PRE + 8000];
 	struct per_session_data__fraggle *psf = user;
 	int chunk;
 	int write_mode = LWS_WRITE_CONTINUATION;
 	unsigned long sum;
 	unsigned char *p = (unsigned char *)in;
-	unsigned char *bp = &buf[LWS_SEND_BUFFER_PRE_PADDING];
+	unsigned char *bp = &buf[LWS_PRE];
 	int ran;
 
 	switch (reason) {
diff --git a/test-server/test-ping.c b/test-server/test-ping.c
index d85af0a647a4c62010e04c2e3001da8545201dfa..b750313d3a1cb364bbabb8c2ce24387e19afe881 100644
--- a/test-server/test-ping.c
+++ b/test-server/test-ping.c
@@ -57,7 +57,7 @@ static unsigned int interval_us = 1000000;
 static unsigned int size = 64;
 static int flood;
 static const char *address;
-static unsigned char pingbuf[LWS_SEND_BUFFER_PRE_PADDING + MAX_MIRROR_PAYLOAD];
+static unsigned char pingbuf[LWS_PRE + MAX_MIRROR_PAYLOAD];
 static char peer_name[128];
 static unsigned long started;
 static int screen_width = 80;
@@ -220,7 +220,7 @@ callback_lws_mirror(struct lws *wsi, enum lws_callback_reasons reason,
 	case LWS_CALLBACK_CLIENT_WRITEABLE:
 
 		shift = 56;
-		p = &pingbuf[LWS_SEND_BUFFER_PRE_PADDING];
+		p = &pingbuf[LWS_PRE];
 
 		/* 64-bit ping index in network byte order */
 
@@ -229,7 +229,7 @@ callback_lws_mirror(struct lws *wsi, enum lws_callback_reasons reason,
 			shift -= 8;
 		}
 
-		while ((unsigned int)(p - &pingbuf[LWS_SEND_BUFFER_PRE_PADDING]) < size)
+		while ((unsigned int)(p - &pingbuf[LWS_PRE]) < size)
 			*p++ = 0;
 
 		gettimeofday(&tv, NULL);
@@ -257,11 +257,11 @@ callback_lws_mirror(struct lws *wsi, enum lws_callback_reasons reason,
 
 		if (use_mirror)
 			n = lws_write(wsi,
-				&pingbuf[LWS_SEND_BUFFER_PRE_PADDING],
+				&pingbuf[LWS_PRE],
 					size, write_options | LWS_WRITE_BINARY);
 		else
 			n = lws_write(wsi,
-				&pingbuf[LWS_SEND_BUFFER_PRE_PADDING],
+				&pingbuf[LWS_PRE],
 					size, write_options | LWS_WRITE_PING);
 
 		if (n < 0)
diff --git a/test-server/test-server-dumb-increment.c b/test-server/test-server-dumb-increment.c
index e8bb9c9c2e94cae834f81a512dcafe67fd43e77d..709c8a15fd9a2f034db7995fae5481c58f44add2 100644
--- a/test-server/test-server-dumb-increment.c
+++ b/test-server/test-server-dumb-increment.c
@@ -26,10 +26,10 @@ int
 callback_dumb_increment(struct lws *wsi, enum lws_callback_reasons reason,
 			void *user, void *in, size_t len)
 {
-	unsigned char buf[LWS_SEND_BUFFER_PRE_PADDING + 512];
+	unsigned char buf[LWS_PRE + 512];
 	struct per_session_data__dumb_increment *pss =
 			(struct per_session_data__dumb_increment *)user;
-	unsigned char *p = &buf[LWS_SEND_BUFFER_PRE_PADDING];
+	unsigned char *p = &buf[LWS_PRE];
 	int n, m;
 
 	switch (reason) {
diff --git a/test-server/test-server-echogen.c b/test-server/test-server-echogen.c
new file mode 100644
index 0000000000000000000000000000000000000000..2ae27e1c552d5da023131442fa4877cb7c887e25
--- /dev/null
+++ b/test-server/test-server-echogen.c
@@ -0,0 +1,123 @@
+/*
+ * libwebsockets-test-server - libwebsockets test implementation
+ *
+ * Copyright (C) 2016 Andy Green <andy@warmcat.com>
+ *
+ *  This library is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU Lesser General Public
+ *  License as published by the Free Software Foundation:
+ *  version 2.1 of the License.
+ *
+ *  This library is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *  Lesser General Public License for more details.
+ *
+ *  You should have received a copy of the GNU Lesser General Public
+ *  License along with this library; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ *  MA  02110-1301  USA
+ */
+#include "test-server.h"
+
+/* echogen protocol
+ *
+ * if you connect to him using his protocol, he'll send you a file chopped
+ * up in various frame sizes repeated until he reaches a limit.
+ */
+
+#define TOTAL 993840
+
+int
+callback_lws_echogen(struct lws *wsi, enum lws_callback_reasons reason,
+			void *user, void *in, size_t len)
+{
+	unsigned char buf[LWS_PRE + 8192];
+	struct per_session_data__echogen *pss =
+			(struct per_session_data__echogen *)user;
+	unsigned char *p = &buf[LWS_PRE];
+	int n, m;
+
+	switch (reason) {
+
+	case LWS_CALLBACK_ESTABLISHED:
+		pss->total = TOTAL;
+		pss->fragsize = 2048;
+		pss->total_rx = 0;
+		sprintf((char *)buf, "%s/test.html", resource_path);
+		pss->fd = open((char *)buf, LWS_O_RDONLY);
+		if (pss->fd < 0) {
+			lwsl_err("Failed to open %s\n", buf);
+			return -1;
+		}
+		pss->wr = LWS_WRITE_TEXT | LWS_WRITE_NO_FIN;
+		lws_callback_on_writable(wsi);
+		break;
+
+	case LWS_CALLBACK_CLOSED:
+		if (pss->fd >= 0)
+			close(pss->fd);
+		break;
+
+	case LWS_CALLBACK_SERVER_WRITEABLE:
+
+//		pss->fragsize += 16;
+//		if (pss->fragsize >= 4096)
+//			pss->fragsize = 32;
+
+		lwsl_err("%s: cb writeable, total left %ld\n", __func__, (long)pss->total);
+		m = pss->fragsize;
+		if ((size_t)m >=  pss->total) {
+			m = (int)pss->total;
+			pss->wr = LWS_WRITE_CONTINUATION; /* ie, FIN */
+		}
+		n = read(pss->fd, p, m);
+		if (n < 0) {
+			lwsl_err("failed read\n");
+			return -1;
+		}
+		if (n < m) {
+			lseek(pss->fd, 0, SEEK_SET);
+			m = read(pss->fd, p + n, m - n);
+			if (m < 0)
+				return -1;
+		} else
+			m = 0;
+		pss->total -= n + m;
+		m = lws_write(wsi, p, n + m, pss->wr);
+		if (m < n) {
+			lwsl_err("ERROR %d writing to di socket\n", n);
+			return -1;
+		}
+		if (!pss->total) {
+			lwsl_err("Completed OK\n");
+			break;
+		}
+		pss->wr = LWS_WRITE_CONTINUATION | LWS_WRITE_NO_FIN;
+		lws_callback_on_writable(wsi);
+		break;
+
+	case LWS_CALLBACK_RECEIVE:
+		pss->total_rx += len;
+		lwsl_err("rx %ld\n", (long)pss->total_rx);
+		if (pss->total_rx == TOTAL) {
+			lws_close_reason(wsi, LWS_CLOSE_STATUS_NORMAL,
+					 (unsigned char *)"done", 4);
+			return -1;
+		}
+		break;
+
+	case LWS_CALLBACK_WS_PEER_INITIATED_CLOSE:
+		lwsl_notice("LWS_CALLBACK_WS_PEER_INITIATED_CLOSE: len %d\n",
+			    len);
+		for (n = 0; n < (int)len; n++)
+			lwsl_notice(" %d: 0x%02X\n", n,
+				    ((unsigned char *)in)[n]);
+		break;
+
+	default:
+		break;
+	}
+
+	return 0;
+}
diff --git a/test-server/test-server-http.c b/test-server/test-server-http.c
index 2f4dd083de783e45948e518813d34a2b97c0aefe..aa9502f2a1e9e93cea0fe80b4ec0c3c77fdd003a 100644
--- a/test-server/test-server-http.c
+++ b/test-server/test-server-http.c
@@ -169,8 +169,8 @@ int callback_http(struct lws *wsi, enum lws_callback_reasons reason, void *user,
 
 			/* well, let's demonstrate how to send the hard way */
 
-			p = buffer + LWS_SEND_BUFFER_PRE_PADDING;
-			end = p + sizeof(buffer) - LWS_SEND_BUFFER_PRE_PADDING;
+			p = buffer + LWS_PRE;
+			end = p + sizeof(buffer) - LWS_PRE;
 
 			pss->fd = lws_plat_file_open(wsi, leaf_path, &file_len,
 						     LWS_O_RDONLY);
@@ -217,8 +217,7 @@ int callback_http(struct lws *wsi, enum lws_callback_reasons reason, void *user,
 			 * this is mandated by changes in HTTP2
 			 */
 
-			n = lws_write(wsi, buffer + LWS_SEND_BUFFER_PRE_PADDING,
-				      p - (buffer + LWS_SEND_BUFFER_PRE_PADDING),
+			n = lws_write(wsi, buffer + LWS_PRE, p - (buffer + LWS_PRE),
 				      LWS_WRITE_HTTP_HEADERS);
 
 			if (n < 0) {
@@ -312,7 +311,7 @@ int callback_http(struct lws *wsi, enum lws_callback_reasons reason, void *user,
 		 */
 		do {
 			/* we'd like the send this much */
-			n = sizeof(buffer) - LWS_SEND_BUFFER_PRE_PADDING;
+			n = sizeof(buffer) - LWS_PRE;
 
 			/* but if the peer told us he wants less, we can adapt */
 			m = lws_get_peer_write_allowance(wsi);
@@ -328,7 +327,7 @@ int callback_http(struct lws *wsi, enum lws_callback_reasons reason, void *user,
 
 			n = lws_plat_file_read(wsi, pss->fd,
 					       &amount, buffer +
-					        LWS_SEND_BUFFER_PRE_PADDING, n);
+					        LWS_PRE, n);
 			/* problem reading, close conn */
 			if (n < 0)
 				goto bail;
@@ -343,7 +342,7 @@ int callback_http(struct lws *wsi, enum lws_callback_reasons reason, void *user,
 			 * is handled by the library itself if you sent a
 			 * content-length header
 			 */
-			m = lws_write(wsi, buffer + LWS_SEND_BUFFER_PRE_PADDING,
+			m = lws_write(wsi, buffer + LWS_PRE,
 				      n, LWS_WRITE_HTTP);
 			if (m < 0)
 				/* write failed, close conn */
diff --git a/test-server/test-server-mirror.c b/test-server/test-server-mirror.c
index a92922fa17a1f14604a15e4f42a4b78294c3cb99..365333d481ac3a876c50f54dd70ba94a03ada4c4 100644
--- a/test-server/test-server-mirror.c
+++ b/test-server/test-server-mirror.c
@@ -62,7 +62,7 @@ callback_lws_mirror(struct lws *wsi, enum lws_callback_reasons reason,
 
 			n = lws_write(wsi, (unsigned char *)
 				   ringbuffer[pss->ringbuffer_tail].payload +
-				   LWS_SEND_BUFFER_PRE_PADDING,
+				   LWS_PRE,
 				   ringbuffer[pss->ringbuffer_tail].len,
 								LWS_WRITE_TEXT);
 			if (n < 0) {
@@ -102,10 +102,10 @@ callback_lws_mirror(struct lws *wsi, enum lws_callback_reasons reason,
 			free(ringbuffer[ringbuffer_head].payload);
 
 		ringbuffer[ringbuffer_head].payload =
-				malloc(LWS_SEND_BUFFER_PRE_PADDING + len);
+				malloc(LWS_PRE + len);
 		ringbuffer[ringbuffer_head].len = len;
 		memcpy((char *)ringbuffer[ringbuffer_head].payload +
-					  LWS_SEND_BUFFER_PRE_PADDING, in, len);
+					  LWS_PRE, in, len);
 		if (ringbuffer_head == (MAX_MESSAGE_QUEUE - 1))
 			ringbuffer_head = 0;
 		else
diff --git a/test-server/test-server.c b/test-server/test-server.c
index 18c9bea08110d15786453fa642be414fe83226fd..cd31082f9fb265d62ce4438f721ed1150ef6126f 100644
--- a/test-server/test-server.c
+++ b/test-server/test-server.c
@@ -67,6 +67,7 @@ enum demo_protocols {
 
 	PROTOCOL_DUMB_INCREMENT,
 	PROTOCOL_LWS_MIRROR,
+	PROTOCOL_LWS_ECHOGEN,
 
 	/* always last */
 	DEMO_PROTOCOL_COUNT
@@ -95,6 +96,12 @@ static struct lws_protocols protocols[] = {
 		sizeof(struct per_session_data__lws_mirror),
 		128,
 	},
+	{
+		"lws-echogen",
+		callback_lws_echogen,
+		sizeof(struct per_session_data__echogen),
+		128,
+	},
 	{ NULL, NULL, 0, 0 } /* terminator */
 };
 
@@ -124,6 +131,22 @@ void sighandler(int sig)
 	lws_cancel_service(context);
 }
 
+static const struct lws_extension exts[] = {
+	{
+		"permessage-deflate",
+		lws_extension_callback_pm_deflate,
+		"permessage-deflate"
+	},
+	{
+		"deflate-frame",
+		lws_extension_callback_pm_deflate,
+		"deflate_frame"
+	},
+	{ NULL, NULL, NULL /* terminator */ }
+};
+
+
+
 static struct option options[] = {
 	{ "help",	no_argument,		NULL, 'h' },
 	{ "debug",	required_argument,	NULL, 'd' },
@@ -258,10 +281,6 @@ int main(int argc, char **argv)
 
 	info.iface = iface;
 	info.protocols = protocols;
-#ifndef LWS_NO_EXTENSIONS
-	info.extensions = lws_get_internal_extensions();
-#endif
-
 	info.ssl_cert_filepath = NULL;
 	info.ssl_private_key_filepath = NULL;
 
@@ -285,8 +304,8 @@ int main(int argc, char **argv)
 	info.gid = -1;
 	info.uid = -1;
 	info.max_http_header_pool = 1;
-	info.options = opts;
-
+	info.options = opts | LWS_SERVER_OPTION_VALIDATE_UTF8;
+	info.extensions = exts;
 	context = lws_create_context(&info);
 	if (context == NULL) {
 		lwsl_err("libwebsocket init failed\n");
diff --git a/test-server/test-server.h b/test-server/test-server.h
index 7a8a922340dadadf5893f831048899dc425b4f4b..89c64a45db65eca3e5ad26dbb1cfd11fda2719d9 100644
--- a/test-server/test-server.h
+++ b/test-server/test-server.h
@@ -66,6 +66,14 @@ struct per_session_data__lws_mirror {
 	int ringbuffer_tail;
 };
 
+struct per_session_data__echogen {
+	size_t total;
+	size_t total_rx;
+	int fd;
+	int fragsize;
+	int wr;
+};
+
 extern int
 callback_http(struct lws *wsi, enum lws_callback_reasons reason, void *user,
 	      void *in, size_t len);
@@ -75,6 +83,9 @@ callback_lws_mirror(struct lws *wsi, enum lws_callback_reasons reason,
 extern int
 callback_dumb_increment(struct lws *wsi, enum lws_callback_reasons reason,
 			void *user, void *in, size_t len);
+extern int
+callback_lws_echogen(struct lws *wsi, enum lws_callback_reasons reason,
+			void *user, void *in, size_t len);
 
 extern void
 dump_handshake_info(struct lws *wsi);
diff --git a/test-server/test.html b/test-server/test.html
index ee5986cd1123bd4dc2b8e900631e617a1b31f7a1..235c21f91e31c305717b7020574183a215baadb5 100644
--- a/test-server/test.html
+++ b/test-server/test.html
@@ -277,7 +277,7 @@ document.getElementById("number").textContent = get_appropriate_ws_url();
 	try {
 		socket_di.onopen = function() {
 			document.getElementById("wsdi_statustd").style.backgroundColor = "#40ff40";
-			document.getElementById("wsdi_status").textContent = " websocket connection opened ";
+			document.getElementById("wsdi_status").innerHTML = " <b>websocket connection opened</b><br>" + socket_di.extensions;
 		} 
 
 		socket_di.onmessage =function got_packet(msg) {
@@ -309,7 +309,7 @@ function ot_open() {
 	try {
 		socket_ot.onopen = function() {
 			document.getElementById("ot_statustd").style.backgroundColor = "#40ff40";
-			document.getElementById("ot_status").textContent = " websocket connection opened ";
+			document.getElementById("ot_status").innerHTML = " <b>websocket connection opened</b><br>" + socket_di.extensions;
 			document.getElementById("ot_open_btn").disabled = true;
 			document.getElementById("ot_close_btn").disabled = false;
 			document.getElementById("ot_req_close_btn").disabled = false;
@@ -359,7 +359,7 @@ function ot_req_close() {
 	try {
 		socket_lm.onopen = function() {
 			document.getElementById("wslm_statustd").style.backgroundColor = "#40ff40";
-			document.getElementById("wslm_status").textContent = " websocket connection opened ";
+			document.getElementById("wslm_status").innerHTML = " <b>websocket connection opened</b><br>" + socket_di.extensions;
 		} 
 
 		socket_lm.onmessage =function got_packet(msg) {