diff --git a/lib/libwebsockets.c b/lib/libwebsockets.c
index 178a24fbd1e2b524c9d5143571249ea152c9c403..04c2ef5fe241b562838372a60d199bb24de07914 100644
--- a/lib/libwebsockets.c
+++ b/lib/libwebsockets.c
@@ -1585,13 +1585,13 @@ lws_cgi(struct lws *wsi, char * const *exec_array, int timeout_secs)
 	lws_change_pollfd(cgi->stdwsi[LWS_STDERR], LWS_POLLOUT, LWS_POLLIN);
 
 	lwsl_debug("%s: fds in %d, out %d, err %d\n", __func__,
-			cgi->stdwsi[LWS_STDIN]->sock,
-			cgi->stdwsi[LWS_STDOUT]->sock,
-			cgi->stdwsi[LWS_STDERR]->sock);
+		   cgi->stdwsi[LWS_STDIN]->sock, cgi->stdwsi[LWS_STDOUT]->sock,
+		   cgi->stdwsi[LWS_STDERR]->sock);
 
 	lws_set_timeout(wsi, PENDING_TIMEOUT_CGI, timeout_secs);
 
-
+	/* the cgi stdout is always sending us http1.x header data first */
+	wsi->hdr_state = LCHS_HEADER;
 
 	/* add us to the pt list of active cgis */
 	cgi->cgi_list = pt->cgi_list;
@@ -1674,6 +1674,103 @@ bail1:
 
 	return -1;
 }
+/**
+ * lws_cgi_write_split_headers: write cgi output accounting for header part
+ *
+ * @wsi: connection to own the process
+ */
+LWS_VISIBLE LWS_EXTERN int
+lws_cgi_write_split_stdout_headers(struct lws *wsi)
+{
+	int n, m;
+	char buf[LWS_PRE + 1024], *start = &buf[LWS_PRE], *p = start,
+	     *end = &buf[sizeof(buf) - 1 - LWS_PRE], c;
+
+	while (wsi->hdr_state != LHCS_PAYLOAD) {
+		/* we have to separate header / finalize and
+		 * payload chunks, since they need to be
+		 * handled separately
+		 */
+		n = read(lws_get_socket_fd(wsi->cgi->stdwsi[LWS_STDOUT]), &c, 1);
+		if (n < 0) {
+			if (errno != EAGAIN)
+				return -1;
+			else
+				n = 0;
+		}
+		if (n) {
+			lwsl_err("-- %c\n", c);
+			switch (wsi->hdr_state) {
+			case LCHS_HEADER:
+				*p++ = c;
+				if (c == '\x0d') {
+					wsi->hdr_state = LCHS_LF1;
+					break;
+				}
+				break;
+			case LCHS_LF1:
+				*p++ = c;
+				if (c == '\x0a') {
+					wsi->hdr_state = LCHS_CR2;
+					break;
+				}
+				/* we got \r[^\n]... it's unreasonable */
+				return -1;
+			case LCHS_CR2:
+				if (c == '\x0d') {
+					/* drop the \x0d */
+					wsi->hdr_state = LCHS_LF2;
+					break;
+				}
+				*p++ = c;
+				break;
+			case LCHS_LF2:
+				if (c == '\x0a') {
+					wsi->hdr_state = LHCS_PAYLOAD;
+					/* drop the \0xa ... finalize will add it if needed */
+					lws_finalize_http_header(wsi,
+							(unsigned char **)&p,
+							(unsigned char *)end);
+					break;
+				}
+				/* we got \r\n\r[^\n]... it's unreasonable */
+				return -1;
+			case LHCS_PAYLOAD:
+				break;
+			}
+		}
+
+		/* ran out of input, ended the headers, or filled up the headers buf */
+		if (!n || wsi->hdr_state == LHCS_PAYLOAD || (p + 4) == end) {
+lwsl_err("a\n");
+			m = lws_write(wsi, (unsigned char *)start,
+				      p - start, LWS_WRITE_HTTP_HEADERS);
+			if (m < 0)
+				return -1;
+lwsl_err("b\n");
+			/* writeability becomes uncertain now we wrote
+			 * something, we must return to the event loop
+			 */
+
+			return 0;
+		}
+	}
+	lwsl_err("%s: stdout\n", __func__);
+	n = read(lws_get_socket_fd(wsi->cgi->stdwsi[LWS_STDOUT]),
+		 start, sizeof(buf) - LWS_PRE);
+
+	if (n < 0 && errno != EAGAIN)
+		return -1;
+	if (n > 0) {
+		m = lws_write(wsi, (unsigned char *)start, n,
+			      LWS_WRITE_HTTP);
+		//lwsl_notice("write %d\n", m);
+		if (m < 0)
+			return -1;
+	}
+
+	return 0;
+}
 
 /**
  * lws_cgi_kill: terminate cgi process associated with wsi
diff --git a/lib/libwebsockets.h b/lib/libwebsockets.h
index 63442d2bea7cb10a1402fde4049c9f62e1b7af9a..f4b20c62e7f16832214f261e2b5cbc2e9b79c7e5 100644
--- a/lib/libwebsockets.h
+++ b/lib/libwebsockets.h
@@ -1866,16 +1866,29 @@ enum lws_enum_stdinouterr {
 	LWS_STDERR = 2,
 };
 
+enum lws_cgi_hdr_state {
+	LCHS_HEADER,
+	LCHS_CR1,
+	LCHS_LF1,
+	LCHS_CR2,
+	LCHS_LF2,
+	LHCS_PAYLOAD,
+};
+
 struct lws_cgi_args {
 	struct lws **stdwsi; /* get fd with lws_get_socket_fd() */
 	enum lws_enum_stdinouterr ch;
 	unsigned char *data; /* for messages with payload */
+	enum lws_cgi_hdr_state hdr_state;
 	int len;
 };
 
 LWS_VISIBLE LWS_EXTERN int
 lws_cgi(struct lws *wsi, char * const *exec_array, int timeout_secs);
 
+LWS_VISIBLE LWS_EXTERN int
+lws_cgi_write_split_stdout_headers(struct lws *wsi);
+
 LWS_VISIBLE LWS_EXTERN int
 lws_cgi_kill(struct lws *wsi);
 #endif
diff --git a/lib/private-libwebsockets.h b/lib/private-libwebsockets.h
index 70dfeaf81f7b55449e4e3131975eda8bf9c3c137..5f118ad765abe818ee4d941ca908ea9cbb03bfab 100644
--- a/lib/private-libwebsockets.h
+++ b/lib/private-libwebsockets.h
@@ -1054,6 +1054,7 @@ struct lws_cgi {
 
 	unsigned int being_closed:1;
 };
+
 #endif
 
 struct lws {
@@ -1158,6 +1159,7 @@ struct lws {
 	char tsi; /* thread service index we belong to */
 #ifdef LWS_WITH_CGI
 	char cgi_channel; /* which of stdin/out/err */
+	char hdr_state;
 #endif
 };
 
diff --git a/lib/service.c b/lib/service.c
index 75658953a798d047c9addb70dee611feacb58c13..ee872feacb9d71aaf94e665cdd857d183253f19d 100644
--- a/lib/service.c
+++ b/lib/service.c
@@ -874,6 +874,7 @@ handle_pending:
 
 			args.ch = wsi->cgi_channel;
 			args.stdwsi = &wsi->parent->cgi->stdwsi[0];
+			args.hdr_state = wsi->hdr_state;
 
 			if (user_callback_handle_rxflow(
 					wsi->parent->protocol->callback,
diff --git a/test-server/lws-cgi-test.sh b/test-server/lws-cgi-test.sh
index 2f5df11781c14c2b9bc263c8c672cd6dd2e95cd1..914673ff918214c9504f791c4848f7a5b45fbb70 100755
--- a/test-server/lws-cgi-test.sh
+++ b/test-server/lws-cgi-test.sh
@@ -1,18 +1,31 @@
 #!/bin/sh
 
-echo "lwstest script stdout"
+echo -e -n "Content-type: text/html\x0d\x0a"
+echo -e -n "\x0d\x0a"
+
+echo "<html><body>"
+echo "<h1>lwstest script stdout</h1>"
 >&2 echo "lwstest script stderr"
 
-echo "REQUEST_METHOD=$REQUEST_METHOD"
+echo "<h2>REQUEST_METHOD=$REQUEST_METHOD</h2>"
 
 if [ "$REQUEST_METHOD" = "POST" ] ; then
 	read line
 	echo "read=\"$line\""
 else
-	cat /proc/meminfo
+	echo "<table>"
+	echo "<tr><td colspan=\"2\" style=\"font-size:120%;text-align:center\">/proc/meminfo</td></tr>"
+	cat /proc/meminfo | while read line ; do
+		A=`echo "$line" | cut -d: -f1`
+		B=`echo "$line" | tr -s ' ' | cut -d' ' -f2-`
+		echo -e "<tr><td style=\"background-color:#f0e8c0\">$A</td>"
+		echo -e "<td style=\"text-align:right\">$B</td></tr>"
+	done
+	echo "</table>"
 fi
 
-echo "done"
+echo "<br/>done"
+echo "</body></html>"
 
 exit 0
 
diff --git a/test-server/test-server-http.c b/test-server/test-server-http.c
index 4de62e8bd1fa7d5d83136db26312abf73a9fea0d..594c8dff3d759628d2caa29537f48a395d9b8759 100644
--- a/test-server/test-server-http.c
+++ b/test-server/test-server-http.c
@@ -213,19 +213,17 @@ int callback_http(struct lws *wsi, enum lws_callback_reasons reason, void *user,
 
 			if (lws_add_http_header_status(wsi, 200, &p, end))
 				return 1;
-			if (lws_add_http_header_by_token(wsi,
-					WSI_TOKEN_HTTP_CONTENT_TYPE,
-					(unsigned char *)"text/plain",
-					10, &p, end))
-				return 1;
 			if (lws_add_http_header_by_token(wsi, WSI_TOKEN_CONNECTION,
 					(unsigned char *)"close", 5, &p, end))
 				return 1;
-			if (lws_finalize_http_header(wsi, &p, end))
-				return 1;
 			n = lws_write(wsi, buffer + LWS_PRE,
 				      p - (buffer + LWS_PRE),
 				      LWS_WRITE_HTTP_HEADERS);
+
+			/* the cgi starts by outputting headers, we can't
+			 *  finalize the headers until we see the end of that
+			 */
+
 			break;
 		}
 #endif
@@ -399,20 +397,10 @@ int callback_http(struct lws *wsi, enum lws_callback_reasons reason, void *user,
 			goto try_to_reuse;
 #ifdef LWS_WITH_CGI
 		if (pss->reason_bf) {
-			lwsl_debug("%s: stdout\n", __func__);
-			n = read(lws_get_socket_fd(pss->args.stdwsi[LWS_STDOUT]),
-					buf + LWS_PRE, sizeof(buf) - LWS_PRE);
-			//lwsl_notice("read %d (errno %d)\n", n, errno);
-			if (n < 0 && errno != EAGAIN)
-				return -1;
-			if (n > 0) {
-				m = lws_write(wsi, (unsigned char *)buf + LWS_PRE, n,
-					      LWS_WRITE_HTTP);
-				//lwsl_notice("write %d\n", m);
-				if (m < 0)
-					goto bail;
-				pss->reason_bf = 0;
-			}
+			if (lws_cgi_write_split_stdout_headers(wsi) < 0)
+				goto bail;
+
+			pss->reason_bf = 0;
 			break;
 		}
 #endif