gzip compression for BOSH HTTP traffic

Hi,

We created the patch to support BOSH compression in Ejabberd 2.1.11. The patch itself amounts to 27 lines of code changes of the original GPL code in the ‘ejabberd_http.erl’ file.
The code changes include the enablement of gzip compression for BOSH HTTP traffic. Support for BOSH, HTTP and gzip compression were already available as functions and modules in the source code of Ejabberd and we did not change anything to these files. We believe that the lack of support for gzip compression of the BOSH protocol was an ‘oversight’ that the original author forgot to add to the source code.

List of files modified or added with version: src/web/ejabberd_http.erl (revision 9)

We release this in compliancy with the GPL3 nature of the code

---start of patch

@@ -404,14 +404,14 @@
    %% URL path prefix.
    case process(RequestHandlers, Request) of
El when element(1, El) == xmlelement ->
-     make_xhtml_output(State, 200, [], El);
+     make_xhtml_output(State, 200, gzip_ce_header(RequestHeaders), El);
{Status, Headers, El} when
element(1, El) == xmlelement ->
-     make_xhtml_output(State, Status, Headers, El);
+     make_xhtml_output(State, Status, Headers ++ gzip_ce_header(RequestHeaders), El);
Output when is_list(Output) or is_binary(Output) ->
-     make_text_output(State, 200, [], Output);
+     make_text_output(State, 200, gzip_ce_header(RequestHeaders), Output);
{Status, Headers, Output} when is_list(Output) or is_binary(Output) ->
-     make_text_output(State, Status, Headers, Output)
+     make_text_output(State, Status, Headers ++ gzip_ce_header(RequestHeaders), Output)
    end
     end;

@@ -443,7 +443,7 @@
_ ->
    ok
     end,
-    Data = recv_data(State, Len),
+    Data = gzip_content_decoder(RequestHeaders, recv_data(State, Len)), %% conditionally decompress data
     ?DEBUG("client data: ~p~n", [Data]),
     case (catch url_decode_q_split(Path)) of
{'EXIT', _} ->
@@ -469,14 +469,14 @@
       ip = IP},
    case process(RequestHandlers, Request) of
El when element(1, El) == xmlelement ->
-     make_xhtml_output(State, 200, [], El);
+     make_xhtml_output(State, 200, gzip_ce_header(RequestHeaders), El);
{Status, Headers, El} when
element(1, El) == xmlelement ->
-     make_xhtml_output(State, Status, Headers, El);
+     make_xhtml_output(State, Status, Headers ++ gzip_ce_header(RequestHeaders), El);
Output when is_list(Output) or is_binary(Output) ->
-     make_text_output(State, 200, [], Output);
+     make_text_output(State, 200, gzip_ce_header(RequestHeaders), Output);
{Status, Headers, Output} when is_list(Output) or is_binary(Output) ->
-     make_text_output(State, Status, Headers, Output)
+     make_text_output(State, Status, Headers ++ gzip_ce_header(RequestHeaders), Output)
    end
     end;

@@ -537,7 +537,7 @@

make_xhtml_output(State, Status, Headers, XHTML) ->
-    Data = case lists:member(html, Headers) of
+    DataR= case lists:member(html, Headers) of
       true ->
   list_to_binary([?HTML_DOCTYPE,
   element_to_string(XHTML)]);
@@ -545,6 +545,7 @@
   list_to_binary([?XHTML_DOCTYPE,
   element_to_string(XHTML)])
   end,
+    Data = gzip_content_encoder(Headers, DataR), %% conditionally decompress
     Headers1 = case lists:keysearch("Content-Type", 1, Headers) of
   {value, _} ->
       [{"Content-Length", integer_to_list(size(Data))} |
@@ -588,7 +589,8 @@
make_text_output(State, Status, Headers, Text) when is_list(Text) ->
     make_text_output(State, Status, Headers, list_to_binary(Text));

-make_text_output(State, Status, Headers, Data) when is_binary(Data) ->
+make_text_output(State, Status, Headers, DataR) when is_binary(DataR) ->
+    Data = gzip_content_encoder(Headers, DataR), %% conditionally decompress
     Headers1 = case lists:keysearch("Content-Type", 1, Headers) of
   {value, _} ->
       [{"Content-Length", integer_to_list(size(Data))} |
@@ -628,6 +630,18 @@

     [SL, H, "\r\n", Data2].

+gzip_content_encoder(ResponseHeaders, Data) ->
+ case lists:keyfind("Content-Encoding", 1, ResponseHeaders) of
+ {_,"gzip"} -> zlib:gzip(Data);
+ _ -> Data end.
+gzip_ce_header(RequestHeaders) ->
+ case string:str(proplists:get_value('Accept-Encoding', RequestHeaders, []), "gzip") of
+ 0 -> [];
+ _ -> [ {"Content-Encoding", "gzip"} | [] ] end.
+gzip_content_decoder(RequestHeaders, Data) ->
+ case proplists:get_value('Content-Encoding', RequestHeaders, []) of
+ "gzip" -> binary_to_list(zlib:gunzip(list_to_binary(Data)));
+ _ -> Data end.

parse_lang(Langs) ->
     case string:tokens(Langs, ",; ") of
@@ -1090,6 +1104,8 @@
     {ok, {http_header,  undefined, 'Accept-Language', undefined, strip_spaces(Con)}};
encode_header("Accept-Encoding", Con) ->
     {ok, {http_header,  undefined, 'Accept-Encoding', undefined, strip_spaces(Con)}};
+encode_header("Content-Encoding", Con) ->
+    {ok, {http_header,  undefined, 'Content-Encoding', undefined, strip_spaces(Con)}};
encode_header(Name, Val) ->
     {ok, {http_header,  undefined, Name, undefined, strip_spaces(Val)}}.

---end of patch

Few comments: gzipping all

Few comments:

  • gzipping all of the content unconditionally is just a waste of CPU, due to the nature of BOSH most server responses are very small (they contain only "<body xmlns='http://jabber.org/protocol/httpbind'/>") and fit into a single packet, gzipping single packet response gives completely no gain, but obviously uses CPU; it would be better to gzip only responses that are larger than one packet (HTTP body larger than 1200-1300 bytes, depending on the amount of HTTP headers)
  • ungzipping incoming traffic is useless, no browser will send such traffic since there is no way for a browser to know, beforehand, that the server supports it
  • ejabberd uses GPL2, not GPL3 (but I'm not sure if that matters)
  • if you want to include your patch in ejabberd then send a pull request on github
Syndicate content