Ethereal-dev: Re: [Ethereal-dev] HTTP chunked encoding patch
Note: This archive is from the project's previous web site, ethereal.com. This list is no longer active.
From: Jerry Talkington <jtalkington@xxxxxxxxxxxxxxxxxxxxx>
Date: Mon, 26 Apr 2004 06:06:07 -0700
On Mon, Apr 26, 2004 at 10:46:00AM +0200, Biot Olivier wrote: > Hi Jerry, > > Thanks for this valuable contribution!!! > > I extensively tested your patch, and it works in all situations... Except > for the odd case where a zero-length chunked-coded answer is generated, as > the whole (undecoded) chunk is passed to the subdissector and in fact > nothing should be passed to it in this peculiar situation. > > I refer to the following MMSE capture sent on ethereal-users: > http://www.ethereal.com/lists/ethereal-users/200404/msg00185.html > > The fault can be reproduced with the attached 2-packet capture extracted > from the capture attached to the referred message. Fixed in the attatched patch. -- GPG public key: http://pgp.mit.edu:11371/pks/lookup?op=get&search=0x9D5B8762
? .gdb-breakpoints ? cscope.out ? depcomp ? flags.sh ? gdbethereal ? epan/.tvbuff.c.swp Index: AUTHORS =================================================================== RCS file: /cvsroot/ethereal/AUTHORS,v retrieving revision 1.999 diff -u -r1.999 AUTHORS --- AUTHORS 22 Apr 2004 08:22:07 -0000 1.999 +++ AUTHORS 26 Apr 2004 13:03:41 -0000 @@ -285,7 +285,7 @@ updates to BGP (Border Gateway Protocol) support } -Jerry Talkington <jerryt[AT]netapp.com> { +Jerry Talkington <jtalkington[AT]users.sourceforge.net> { updates to HTTP support Filter selection/editing GUI improvements WCCP 1.0 support Index: packet-http.c =================================================================== RCS file: /cvsroot/ethereal/packet-http.c,v retrieving revision 1.96 diff -u -r1.96 packet-http.c --- packet-http.c 12 Apr 2004 22:14:37 -0000 1.96 +++ packet-http.c 26 Apr 2004 13:03:42 -0000 @@ -73,6 +73,9 @@ static gint ett_http = -1; static gint ett_http_ntlmssp = -1; static gint ett_http_request = -1; +static gint ett_http_chunked_response = -1; +static gint ett_http_chunk_data = -1; +static gint ett_http_encoded_entity = -1; static dissector_handle_t data_handle; static dissector_handle_t http_handle; @@ -125,6 +128,8 @@ static int is_http_request_or_reply(const gchar *data, int linelen, http_type_t *type, RequestDissector *req_dissector, int *req_strlen); +static int chunked_encoding_dissector(tvbuff_t **tvb_ptr, packet_info *pinfo, + proto_tree *tree, int offset); static void process_header(tvbuff_t *tvb, int offset, int next_offset, const guchar *line, int linelen, int colon_offset, packet_info *pinfo, proto_tree *tree, headers_t *eh_ptr); @@ -589,6 +594,7 @@ */ tvbuff_t *next_tvb; void *save_private_data = NULL; + gint chunks_decoded = 0; /* * Create a tvbuff for the payload. @@ -608,6 +614,38 @@ reported_datalen); /* + * Handle transfer encodings other than "identity". + */ + if (headers.transfer_encoding != NULL && + strcasecmp(headers.transfer_encoding, "identity") != 0) { + if (strcasecmp(headers.transfer_encoding, "chunked") + == 0) { + + chunks_decoded = chunked_encoding_dissector( + &next_tvb, pinfo, tree, 0); + + if (chunks_decoded <= 0) { + /* + * The chunks weren't reassembled, + * or there was a single zero + * length chunk. + */ + goto body_dissected; + } + + } else { + /* + * We currently can't handle, for example, "gzip", + * "compress", or "deflate"; just handle them + * as data for now. + */ + call_dissector(data_handle, next_tvb, pinfo, + http_tree); + goto body_dissected; + } + } + + /* * Handle content encodings other than "identity" (which * shouldn't appear in a Content-Encoding header, but * we handle it in any case). @@ -619,23 +657,28 @@ * "compress", or "deflate"; just handle them as * data for now. */ - call_dissector(data_handle, next_tvb, pinfo, - http_tree); - goto body_dissected; - } + if (chunks_decoded != 0) { + /* + * There is a chunked response tree, so put + * the entity body below it. + */ + proto_item *e_ti = NULL; + proto_tree *e_tree = NULL; - /* - * Handle transfer encodings other than "identity". - */ - if (headers.transfer_encoding != NULL && - strcasecmp(headers.transfer_encoding, "identity") != 0) { - /* - * We currently can't handle, for example, "chunked", - * "gzip", "compress", or "deflate"; just handle them - * as data for now. - */ - call_dissector(data_handle, next_tvb, pinfo, - http_tree); + e_ti = proto_tree_add_text(tree, next_tvb, + 0, tvb_length(next_tvb), + "Encoded entity-body (%s)", + headers.content_encoding); + + e_tree = proto_item_add_subtree(e_ti, + ett_http_encoded_entity); + + call_dissector(data_handle, next_tvb, pinfo, + e_tree); + } else { + call_dissector(data_handle, next_tvb, pinfo, + http_tree); + } goto body_dissected; } @@ -751,6 +794,186 @@ } /* + * Dissect the http data chunks and add them to the tree. + */ +static int +chunked_encoding_dissector(tvbuff_t **tvb_ptr, packet_info *pinfo, + proto_tree *tree, int offset) +{ + guint8 *chunk_string = NULL; + gint chunk_size = 0; + gint chunk_offset = 0; + gint datalen = 0; + gint linelen = 0; + gint chunks_decoded = 0; + tvbuff_t *tvb = NULL; + tvbuff_t *new_tvb = NULL; + gint chunked_data_size = 0; + proto_tree *subtree = NULL; + proto_item *ti = NULL; + + if (tvb_ptr == NULL || *tvb_ptr == NULL) { + return 0; + } + + tvb = *tvb_ptr; + + datalen = tvb_reported_length_remaining(tvb, offset); + + if (tree) { + ti = proto_tree_add_text(tree, tvb, offset, datalen, + "HTTP chunked response"); + subtree = proto_item_add_subtree(ti, ett_http_chunked_response); + } + + + while (datalen != 0) { + proto_item *chunk_ti = NULL; + proto_tree *chunk_subtree = NULL; + tvbuff_t *data_tvb = NULL; + gchar *c = NULL; + + linelen = tvb_find_line_end(tvb, offset, -1, &chunk_offset, TRUE); + + if (linelen <= 0) { + /* Can't get the chunk size line */ + return chunks_decoded; + } + + chunk_string = tvb_get_string(tvb, offset, linelen); + + if (chunk_string == NULL) { + /* Can't get the chunk size line */ + return chunks_decoded; + } + + c = chunk_string; + + /* + * We don't care about the extensions. + */ + if ((c = strchr(c, ';'))) { + *c = '\0'; + } + + if (sscanf(chunk_string, "%x", &chunk_size) != 1) { + g_free(chunk_string); + return chunks_decoded; + } + + g_free(chunk_string); + + + if (chunk_size > datalen) { + /* + * The chunk size is more than what's in the tvbuff, + * so either the user hasn't enabled decoding, or all + * of the segments weren't captured. + */ + chunk_size = datalen; + }/* else if (new_tvb == NULL) { + new_tvb = tvb_new_composite(); + } + + + + if (new_tvb != NULL && chunk_size != 0) { + tvbuff_t *chunk_tvb = NULL; + + chunk_tvb = tvb_new_subset(tvb, chunk_offset, + chunk_size, datalen); + + tvb_composite_append(new_tvb, chunk_tvb); + + } + */ + + chunked_data_size += chunk_size; + + if (chunk_size != 0) { + guint8 *raw_data = g_malloc(chunked_data_size); + gint raw_len = 0; + + if (new_tvb != NULL) { + raw_len = tvb_length_remaining(new_tvb, 0); + tvb_memcpy(new_tvb, raw_data, 0, raw_len); + + tvb_free(new_tvb); + } + + tvb_memcpy(tvb, (guint8 *)(raw_data + raw_len), + chunk_offset, chunk_size); + + new_tvb = tvb_new_real_data(raw_data, + chunked_data_size, chunked_data_size); + + } + + if (subtree) { + if (chunk_size == 0) { + chunk_ti = proto_tree_add_text(subtree, tvb, + offset, + chunk_offset - offset + chunk_size + 2, + "Data chunk (last chunk)"); + } else { + chunk_ti = proto_tree_add_text(subtree, tvb, + offset, + chunk_offset - offset + chunk_size + 2, + "Data chunk (%u octets)", chunk_size); + } + + chunk_subtree = proto_item_add_subtree(chunk_ti, + ett_http_chunk_data); + + proto_tree_add_text(chunk_subtree, tvb, offset, + chunk_offset - offset, "Chunk size: %u octets", + chunk_size); + + data_tvb = tvb_new_subset(tvb, chunk_offset, chunk_size, + datalen); + + + if (chunk_size > 0) { + call_dissector(data_handle, data_tvb, pinfo, + chunk_subtree); + } + + proto_tree_add_text(chunk_subtree, tvb, chunk_offset + + chunk_size, 2, "Chunk boundry"); + } + + chunks_decoded++; + offset = chunk_offset + chunk_size + 2; + datalen = tvb_reported_length_remaining(tvb, offset); + } + + if (new_tvb != NULL) { + + /* + tvb_composite_finalize(new_tvb); + //tvb_set_reported_length(new_tvb, chunked_data_size); + */ + + tvb_set_child_real_data_tvbuff(tvb, new_tvb); + add_new_data_source(pinfo, new_tvb, "De-chunked entity body"); + + tvb_free(*tvb_ptr); + *tvb_ptr = new_tvb; + + } else { + /* + * We didn't create a new tvb, so don't allow sub dissectors + * try to decode the non-existant entity body. + */ + chunks_decoded = -1; + } + + return chunks_decoded; + +} + + +/* * XXX - this won't handle HTTP 0.9 replies, but they're all data * anyway. */ @@ -1271,6 +1494,9 @@ &ett_http, &ett_http_ntlmssp, &ett_http_request, + &ett_http_chunked_response, + &ett_http_chunk_data, + &ett_http_encoded_entity, }; module_t *http_module; Index: packet-wccp.c =================================================================== RCS file: /cvsroot/ethereal/packet-wccp.c,v retrieving revision 1.33 diff -u -r1.33 packet-wccp.c --- packet-wccp.c 28 Aug 2002 21:00:37 -0000 1.33 +++ packet-wccp.c 26 Apr 2004 13:03:43 -0000 @@ -1,6 +1,6 @@ /* packet-wccp.c * Routines for Web Cache Coordination Protocol dissection - * Jerry Talkington <jerryt@xxxxxxxxxx> + * Jerry Talkington <jtalkington@xxxxxxxxxxxxxxxxxxxxx> * * $Id: packet-wccp.c,v 1.33 2002/08/28 21:00:37 jmayer Exp $ * Index: req_resp_hdrs.c =================================================================== RCS file: /cvsroot/ethereal/req_resp_hdrs.c,v retrieving revision 1.3 diff -u -r1.3 req_resp_hdrs.c --- req_resp_hdrs.c 29 Dec 2003 22:33:18 -0000 1.3 +++ req_resp_hdrs.c 26 Apr 2004 13:03:43 -0000 @@ -30,6 +30,7 @@ #include <glib.h> #include <epan/packet.h> #include <epan/strutil.h> +#include <string.h> #include "req_resp_hdrs.h" @@ -47,6 +48,7 @@ int linelen; long int content_length; gboolean content_length_found = FALSE; + gboolean chunked_encoding = FALSE; /* * Do header desegmentation if we've been told to. @@ -131,8 +133,8 @@ } /* - * Is this a Content-Length header? - * If not, it either means that we are in + * Is this a Content-Length or Transfer-Encoding + * header? If not, it either means that we are in * a different header line, or that we are * at the end of the headers, or that there * isn't enough data; the two latter cases @@ -151,6 +153,44 @@ "%li", &content_length) == 1) content_length_found = TRUE; + } else if (tvb_strncaseeql(tvb, + next_offset_sav, + "Transfer-Encoding:", 18) == 0) { + gchar *chunk_type = tvb_get_string(tvb, + next_offset_sav + 18, linelen - 18); + /* + * Find out if this Transfer-Encoding is + * chunked. It should be, since there + * really aren't any other types, but + * RFC 2616 allows for them. + */ + + if (chunk_type != NULL) { + gchar *c = chunk_type; + gint len = strlen(chunk_type); + + + /* start after any white-space */ + while (c != NULL && c < + chunk_type + len && + (*c == ' ' || + *c == 0x09)) { + c++; + } + + if (c <= chunk_type + len ) { + if (strncasecmp(c, "chunked", 7) + == 0) { + /* + * Don't bother looking for extensions; + * since we don't understand them, + * they should be ignored. + */ + chunked_encoding = TRUE; + } + } + g_free(chunk_type); + } } } } @@ -158,30 +198,139 @@ /* * The above loop ends when we reached the end of the headers, so - * there should be content_length byte after the 4 terminating bytes + * there should be content_length bytes after the 4 terminating bytes * and next_offset points to after the end of the headers. */ - if (desegment_body && content_length_found) { - /* next_offset has been set because content-length was found */ - if (!tvb_bytes_exist(tvb, next_offset, content_length)) { - length_remaining = tvb_length_remaining(tvb, - next_offset); - reported_length_remaining = - tvb_reported_length_remaining(tvb, next_offset); - if (length_remaining < reported_length_remaining) { + if (desegment_body) { + if (content_length_found) { + /* next_offset has been set to the end of the headers */ + if (!tvb_bytes_exist(tvb, next_offset, content_length)) { + length_remaining = tvb_length_remaining(tvb, + next_offset); + reported_length_remaining = + tvb_reported_length_remaining(tvb, next_offset); + if (length_remaining < reported_length_remaining) { + /* + * It's a waste of time asking for more + * data, because that data wasn't captured. + */ + return TRUE; + } + if (length_remaining == -1) + length_remaining = 0; + pinfo->desegment_offset = offset; + pinfo->desegment_len = + content_length - length_remaining; + return FALSE; + } + } else if (chunked_encoding) { + /* + * This data is chunked, so we need to keep pulling + * data until we reach the end of the stream, or a + * zero sized chunk. + * + * XXX + * This doesn't bother with trailing headers; I don't + * think they are really used, and we'd have to use + * is_http_request_or_reply() to determine if it was + * a trailing header, or the start of a new response. + */ + gboolean done_chunking = FALSE; + + while (!done_chunking) { + gint chunk_size = 0; + gint chunk_offset = 0; + gchar *chunk_string = NULL; + gchar *c = NULL; + + length_remaining = tvb_length_remaining(tvb, + next_offset); + reported_length_remaining = + tvb_reported_length_remaining(tvb, + next_offset); + + if (reported_length_remaining < 1) { + pinfo->desegment_offset = offset; + pinfo->desegment_len = 1; + return FALSE; + } + + linelen = tvb_find_line_end(tvb, next_offset, + -1, &chunk_offset, TRUE); + + if (linelen == -1 && + length_remaining >= + reported_length_remaining) { + pinfo->desegment_offset = offset; + pinfo->desegment_len = 2; + return FALSE; + } + + /* We have a line with the chunk size in it.*/ + chunk_string = tvb_get_string(tvb, next_offset, + linelen); + c = chunk_string; + /* - * It's a waste of time asking for more - * data, because that data wasn't captured. + * We don't care about the extensions. */ - return TRUE; + if ((c = strchr(c, ';'))) { + *c = '\0'; + } + + if ((sscanf(chunk_string, "%x", + &chunk_size) < 0) || chunk_size < 0) { + /* We couldn't get the chunk size, + * so stop trying. + */ + return TRUE; + } + + if (chunk_size == 0) { + /* + * This is the last chunk. Let's pull in the + * trailing CRLF. + */ + linelen = tvb_find_line_end(tvb, + chunk_offset, -1, &chunk_offset, TRUE); + + if (linelen == -1 && + length_remaining >= + reported_length_remaining) { + pinfo->desegment_offset = offset; + pinfo->desegment_len = 1; + return FALSE; + } + + pinfo->desegment_offset = chunk_offset; + pinfo->desegment_len = 0; + done_chunking = TRUE; + } else { + /* + * Skip to the next chunk if we + * already have it + */ + if (reported_length_remaining > + chunk_size) { + + next_offset = chunk_offset + + chunk_size + 2; + } else { + /* + * Fetch this chunk, plus the + * trailing CRLF. + */ + pinfo->desegment_offset = offset; + pinfo->desegment_len = + chunk_size + 1 - + reported_length_remaining; + return FALSE; + } + } + } - if (length_remaining == -1) - length_remaining = 0; - pinfo->desegment_offset = offset; - pinfo->desegment_len = - content_length - length_remaining; - return FALSE; } + } /* Index: doc/ethereal.pod =================================================================== RCS file: /cvsroot/ethereal/doc/ethereal.pod,v retrieving revision 1.109 diff -u -r1.109 ethereal.pod --- doc/ethereal.pod 23 Apr 2004 19:53:36 -0000 1.109 +++ doc/ethereal.pod 26 Apr 2004 13:03:44 -0000 @@ -1893,7 +1893,7 @@ Warren Young <tangent[AT]mail.com> Heikki Vatiainen <hessu[AT]cs.tut.fi> Greg Hankins <gregh[AT]twoguys.org> - Jerry Talkington <jerryt[AT]netapp.com> + Jerry Talkington <jtalkington[AT]users.sourceforge.net> Dave Chapeskie <dchapes[AT]ddm.on.ca> James Coe <jammer[AT]cin.net> Bert Driehuis <driehuis[AT]playbeing.org>
- Follow-Ups:
- Re: [Ethereal-dev] HTTP chunked encoding patch
- From: Olivier Biot
- Re: [Ethereal-dev] HTTP chunked encoding patch
- References:
- RE: [Ethereal-dev] HTTP chunked encoding patch
- From: Biot Olivier
- RE: [Ethereal-dev] HTTP chunked encoding patch
- Prev by Date: RE: [Ethereal-dev] Unhandled exception in pdml xml output
- Next by Date: [Ethereal-dev] FW: [Ethereal-users] Re: Problem with -w - on Windows 2000 [follo w-up]
- Previous by thread: RE: [Ethereal-dev] HTTP chunked encoding patch
- Next by thread: Re: [Ethereal-dev] HTTP chunked encoding patch
- Index(es):