Wireshark-dev: Re: [Wireshark-dev] Reduce memory consumption by re-reading data from file for r

From: Anders Broman <a.broman@xxxxxxxxx>
Date: Mon, 01 Nov 2010 22:30:02 +0100
Anders Broman skrev 2010-11-01 21:26:
Jeff Morriss skrev 2010-11-01 20:21:
Anders Broman wrote:
- Store the file pointer in the topmost TVBUFF_REAL_DATA changing the
type to something like
    TVBUFF_REAL_DATA_FROM_FILE to differentiating it from tvb:s not
constructed from file data.

- When making sub tvb they'd have the type TVBUFF_SUBSET_FROM_FILE
I think the top-level would also need to store the offset into the file.
    SUBSET and COMPOSITE TVBs would not have to change at all, except if
maybe to have a bit that says their backing is persistent.
Sorry, I didn't mean the file handle but rather the file offset.  The
SUBSET tvbs would then need
their own file offset. I haven't looked at the COMPOSITE type - a problem?
- The reassembly routines should then be changed to not store the
fragments but rather the file offset
    and length when all fragments are available and needs to be presented
that data is read from file and
    stuffed into a "reassembled data tvb".  Possibly data should be kept
around until the final packet in a reassembly sequence on the first pass
for speed.
I had been thinking it would build up composite TVBs using the passed-in
TVBs.  Since the TVBs would now be "persistent" this would be okay (we
recently discussed composite TVBs and why they're not used much
now--basically it's because the underlying TVBs aren't persistent).

Did I miss something? Feasible?
I don't think so and I think yes.

(I came back from Sharkfest thinking I'd build a prototype of this, but
you can see how much work I've done on it! :-()
Should we go ahead and check in changes toward this goal in trunk or
open up a test-branch
or just privately try to build a prototype, collaboration might be
quicker. The needed changes to tvbuff.[ch]
shouldn't be that extensive - I hope.
Anders

Hi,
Here's the first set of changes(untested) if some one wants to have a look. Comments?
Regards
Anders
Index: tvbuff.c
===================================================================
--- tvbuff.c	(revision 34747)
+++ tvbuff.c	(working copy)
@@ -92,23 +92,26 @@
 	tvb_backing_t	*backing;
 	tvb_comp_t	*composite;
 
-	tvb->type		= type;
-	tvb->initialized	= FALSE;
-	tvb->usage_count	= 1;
-	tvb->length		= 0;
+	tvb->type				= type;
+	tvb->initialized		= FALSE;
+	tvb->usage_count		= 1;
+	tvb->length				= 0;
 	tvb->reported_length	= 0;
-	tvb->free_cb		= NULL;
-	tvb->real_data		= NULL;
-	tvb->raw_offset		= -1;
-	tvb->used_in		= NULL;
-	tvb->ds_tvb		= NULL;
+	tvb->free_cb			= NULL;
+	tvb->real_data			= NULL;
+	tvb->raw_offset			= -1;
+	tvb->used_in			= NULL;
+	tvb->ds_tvb				= NULL;
+	tvb->file_off			= 0;
 
 	switch(type) {
+		case TVBUFF_REAL_DATA_FROM_FILE:
 		case TVBUFF_REAL_DATA:
 			/* Nothing */
 			break;
 
 		case TVBUFF_SUBSET:
+		case TVBUFF_SUBSET_FROM_FILE:
 			backing = &tvb->tvbuffs.subset;
 			backing->tvb	= NULL;
 			backing->offset	= 0;
@@ -146,9 +149,9 @@
 }
 
 static tvbuff_t*
-tvb_new_with_subset(const guint subset_tvb_offset, const guint subset_tvb_length)
+tvb_new_with_subset(const tvbuff_type type, const guint subset_tvb_offset, const guint subset_tvb_length)
 {
-	tvbuff_t *tvb = tvb_new(TVBUFF_SUBSET);
+	tvbuff_t *tvb = tvb_new(type);
 	tvb->tvbuffs.subset.offset = subset_tvb_offset;
 	tvb->tvbuffs.subset.length = subset_tvb_length;
 
@@ -166,6 +169,7 @@
 
 	if (tvb->usage_count == 0) {
 		switch (tvb->type) {
+			case TVBUFF_REAL_DATA_FROM_FILE:
 		case TVBUFF_REAL_DATA:
 			if (tvb->free_cb) {
 				/*
@@ -176,6 +180,7 @@
 			break;
 
 		case TVBUFF_SUBSET:
+		case TVBUFF_SUBSET_FROM_FILE:
 			/* This will be NULL if tvb_new_subset() fails because
 			 * reported_length < -1 */
 			if (tvb->tvbuffs.subset.tvb) {
@@ -322,6 +327,28 @@
 }
 
 tvbuff_t*
+tvb_new_real_data_from_file(gint64 file_off, const guint8* data, const guint length, const gint reported_length)
+{
+	tvbuff_t	*tvb;
+
+	THROW_ON(reported_length < -1, ReportedBoundsError);
+
+	tvb = tvb_new(TVBUFF_REAL_DATA_FROM_FILE);
+
+	tvb_set_real_data_no_exceptions(tvb, data, length, reported_length);
+
+	tvb->file_off = file_off;
+
+	/*
+	 * This is the top-level real tvbuff for this data source,
+	 * so its data source tvbuff is itself.
+	 */
+	tvb->ds_tvb = tvb;
+
+	return tvb;
+}
+
+tvbuff_t*
 tvb_new_child_real_data(tvbuff_t *parent, const guint8* data, const guint length, const gint reported_length)
 {
 	tvbuff_t *tvb = tvb_new_real_data(data, length, reported_length);
@@ -525,7 +552,12 @@
 			&subset_tvb_offset,
 			&subset_tvb_length);
 
-	tvb = tvb_new_with_subset(subset_tvb_offset, subset_tvb_length);
+	if((backing->type==TVBUFF_REAL_DATA_FROM_FILE)||(backing->type==TVBUFF_SUBSET_FROM_FILE)){
+		tvb = tvb_new_with_subset(TVBUFF_SUBSET_FROM_FILE, subset_tvb_offset, subset_tvb_length);
+		tvb->file_off = backing->file_off+backing_offset;
+	}else{
+		tvb = tvb_new_with_subset(TVBUFF_SUBSET, subset_tvb_offset, subset_tvb_length);
+	}
 
 	tvb_set_subset_no_exceptions(tvb, backing, reported_length);
 
@@ -549,8 +581,14 @@
 			&subset_tvb_offset,
 			&subset_tvb_length);
 
-	tvb = tvb_new_with_subset(subset_tvb_offset, subset_tvb_length);
+	if((backing->type==TVBUFF_REAL_DATA_FROM_FILE)||(backing->type==TVBUFF_SUBSET_FROM_FILE)){
+		tvb = tvb_new_with_subset(TVBUFF_SUBSET_FROM_FILE, subset_tvb_offset, subset_tvb_length);
+		tvb->file_off = backing->file_off+subset_tvb_offset;
+	}else{
+		tvb = tvb_new_with_subset(TVBUFF_SUBSET, subset_tvb_offset, subset_tvb_length);
+	}
 
+
 	tvb_set_subset_no_exceptions(tvb, backing, -1 /* reported_length */);
 
 	/*
@@ -791,9 +829,11 @@
 	tvbuff_t	*member;
 
 	switch(tvb->type) {
+		case TVBUFF_REAL_DATA_FROM_FILE:
 		case TVBUFF_REAL_DATA:
 			return tvb->real_data;
 		case TVBUFF_SUBSET:
+		case TVBUFF_SUBSET_FROM_FILE:
 			member = tvb->tvbuffs.subset.tvb;
 			return first_real_data_ptr(member);
 		case TVBUFF_COMPOSITE:
@@ -812,9 +852,11 @@
 	tvbuff_t	*member;
 
 	switch(tvb->type) {
+		case TVBUFF_REAL_DATA_FROM_FILE:
 		case TVBUFF_REAL_DATA:
 			return counter;
 		case TVBUFF_SUBSET:
+		case TVBUFF_SUBSET_FROM_FILE:
 			member = tvb->tvbuffs.subset.tvb;
 			return offset_from_real_beginning(member, counter + tvb->tvbuffs.subset.offset);
 		case TVBUFF_COMPOSITE:
@@ -896,9 +938,11 @@
 	}
 	else {
 		switch(tvb->type) {
+			case TVBUFF_REAL_DATA_FROM_FILE:
 			case TVBUFF_REAL_DATA:
 				DISSECTOR_ASSERT_NOT_REACHED();
 			case TVBUFF_SUBSET:
+			case TVBUFF_SUBSET_FROM_FILE:
 				return ensure_contiguous_no_exception(tvb->tvbuffs.subset.tvb,
 						abs_offset - tvb->tvbuffs.subset.offset,
 						abs_length, NULL);
@@ -1064,10 +1108,12 @@
 	}
 
 	switch(tvb->type) {
+		case TVBUFF_REAL_DATA_FROM_FILE:
 		case TVBUFF_REAL_DATA:
 			DISSECTOR_ASSERT_NOT_REACHED();
 
 		case TVBUFF_SUBSET:
+		case TVBUFF_SUBSET_FROM_FILE:
 			return tvb_memcpy(tvb->tvbuffs.subset.tvb, target,
 					abs_offset - tvb->tvbuffs.subset.offset,
 					abs_length);
@@ -1832,10 +1878,12 @@
 	}
 
 	switch(tvb->type) {
+		case TVBUFF_REAL_DATA_FROM_FILE:
 		case TVBUFF_REAL_DATA:
 			DISSECTOR_ASSERT_NOT_REACHED();
 
 		case TVBUFF_SUBSET:
+		case TVBUFF_SUBSET_FROM_FILE:
 			return tvb_find_guint8(tvb->tvbuffs.subset.tvb,
 					abs_offset - tvb->tvbuffs.subset.offset,
 					limit, needle);
@@ -1897,10 +1945,12 @@
 	}
 
 	switch(tvb->type) {
+		case TVBUFF_REAL_DATA_FROM_FILE:
 		case TVBUFF_REAL_DATA:
 			DISSECTOR_ASSERT_NOT_REACHED();
 
 		case TVBUFF_SUBSET:
+		case TVBUFF_SUBSET_FROM_FILE:
 			return tvb_pbrk_guint8(tvb->tvbuffs.subset.tvb,
 					abs_offset - tvb->tvbuffs.subset.offset,
 					limit, needles, found_needle);
Index: tvbuff.h
===================================================================
--- tvbuff.h	(revision 34747)
+++ tvbuff.h	(working copy)
@@ -53,7 +53,9 @@
 
 /** The different types of tvbuff's */
 typedef enum {
+	TVBUFF_REAL_DATA_FROM_FILE,
 	TVBUFF_REAL_DATA,
+	TVBUFF_SUBSET_FROM_FILE,
 	TVBUFF_SUBSET,
 	TVBUFF_COMPOSITE
 } tvbuff_type;
@@ -95,7 +97,8 @@
 	GSList			*used_in;
 
 	/** TVBUFF_SUBSET and TVBUFF_COMPOSITE keep track
-	 * of the other tvbuff's they use */
+	 * of the other tvbuff's they use
+	 */
 	union {
 		tvb_backing_t	subset;
 		tvb_comp_t	composite;
@@ -115,9 +118,12 @@
 	/** Reported length. */
 	guint			reported_length;
 
-	/* Offset from beginning of first TVBUFF_REAL. */
+	/** Offset from beginning of first TVBUFF_REAL. */
 	gint			raw_offset;
 
+	 /** File offset */
+	gint64       file_off;   
+
 	/** Func to call when actually freed */
 	tvbuff_free_cb_t	free_cb;
 } tvbuff_t;
@@ -220,7 +226,10 @@
 extern tvbuff_t* tvb_new_real_data(const guint8* data, const guint length,
     const gint reported_length);
 
-
+/** Combination of tvb_new() and tvb_set_real_data(). Can throw ReportedBoundsError. Should only be called from proto.c
+ * Includes the file offset of the data in the topmost tvb.
+ */
+extern tvbuff_t* tvb_new_real_data_from_file(gint64 file_off, const guint8* data, const guint length, const gint reported_length);
 /** Define the subset of the backing buffer to use.
  *
  * 'backing_offset' can be negative, to indicate bytes from

Index: packet.c
===================================================================
--- packet.c	(revision 34747)
+++ packet.c	(working copy)
@@ -314,7 +314,7 @@
 	EP_CHECK_CANARY(("before dissecting frame %d",fd->num));
 
 	TRY {
-		edt->tvb = tvb_new_real_data(pd, fd->cap_len, fd->pkt_len);
+		edt->tvb = tvb_new_real_data_from_file(fd->file_off, pd, fd->cap_len, fd->pkt_len);
 		/* Add this tvbuffer into the data_src list */
 		add_new_data_source(&edt->pi, edt->tvb, "Frame");