Wireshark-dev: [Wireshark-dev] New packet list: Optimize memory usage

From: Jakub Zawadzki <darkjames@xxxxxxxxxxxxxxxx>
Date: Sun, 12 Jul 2009 21:48:14 +0200
Hi, 

This patch (Proof of Concept) removes allocating memory for columns data,
and makes them 'dynamic' (packets redissected when column data needed)

Getting column data is done in packet_list_record_get_column()
I'd be grateful if someone could give some hints how it could be optimized.
(Anyway I think it'd be nice if we have some column cache for some previous and next rows, or e.g. tcp.stream related)

Unfortunetly resorting for large dumps is PITA - we could also cache some columns (configurable by user?)

Some stats for loading 113MB dumpfile:
             Old          New             Diff
Loading:        16s         11s         (-5s)
  VmRSS:  587956 kB   386376 kB         (-201580 kB)

I haven't seen any visible lags while scrolling, CPU usage is higher but for GUI is still fluent :)
diff --git file.c file.c
index 71ac4d1..181c32f 100644
--- file.c
+++ file.c
@@ -1046,7 +1046,7 @@ add_packet_to_packet_list(frame_data *fdata, capture_file *cf,
      evaluated. */
   if ((dfcode != NULL && refilter) || color_filters_used() ||
       filtering_tap_listeners || (tap_flags & TL_REQUIRES_PROTO_TREE) ||
-      have_custom_cols(&cf->cinfo))
+      0 /* have_custom_cols(&cf->cinfo) */)
 	  create_proto_tree = TRUE;
 
   /* Dissect the frame. */
@@ -1060,7 +1060,7 @@ add_packet_to_packet_list(frame_data *fdata, capture_file *cf,
       color_filters_prime_edt(edt);
   }
 
-  col_custom_prime_edt(edt, &cf->cinfo);
+  /* col_custom_prime_edt(edt, &cf->cinfo); */
 
   tap_queue_init(edt);
   epan_dissect_run(edt, pseudo_header, buf, fdata, &cf->cinfo);
@@ -1090,7 +1090,7 @@ add_packet_to_packet_list(frame_data *fdata, capture_file *cf,
       cum_bytes += fdata->pkt_len;
     }
 
-    epan_dissect_fill_in_columns(edt);
+    /* epan_dissect_fill_in_columns(edt); */	/* dynamic */
 
     /* If we haven't yet seen the first frame, this is it.
 
diff --git gtk/new_packet_list.c gtk/new_packet_list.c
index 73f7209..84bd9bc 100644
--- gtk/new_packet_list.c
+++ gtk/new_packet_list.c
@@ -82,21 +82,10 @@ new_packet_list_create(void)
 }
 
 guint
-new_packet_list_append(column_info cinfo, frame_data *fdata)
+new_packet_list_append(column_info cinfo _U_, frame_data *fdata)
 {
-	gint i;
 	row_data_t row_data;
 
-	/* Allocate the array holding column data, the size is the current number of columns */
-	row_data.col_text = se_alloc0(sizeof(row_data.col_text)*cfile.cinfo.num_cols);
-	row_data.col_fmt = (gint *) se_alloc(sizeof(gint) * cfile.cinfo.num_cols);
-
-	for(i = 0; i < cfile.cinfo.num_cols; i++) {
-		row_data.col_text[i] =
-			se_strdup(cinfo.col_data[i]);
-		row_data.col_fmt[i] = cinfo.col_fmt[i];
-	}
-
 	row_data.fdata = fdata;
 
 	packet_list_append_record(packetlist, &row_data);
@@ -132,8 +121,7 @@ create_view_and_model(void)
 	for(i = 0; i < cfile.cinfo.num_cols; i++) {
 		col = gtk_tree_view_column_new();
 		gtk_tree_view_column_pack_start(col, renderer, TRUE);
-		gtk_tree_view_column_add_attribute(col, renderer, "text",
-						   cfile.cinfo.col_fmt[i]);
+		gtk_tree_view_column_add_attribute(col, renderer, "text", i);
 		gtk_tree_view_column_set_title(col, cfile.cinfo.col_title[i]);
 		gtk_tree_view_column_set_sort_column_id(col, i);
 		gtk_tree_view_column_set_resizable(col, TRUE);
diff --git gtk/packet_list_store.c gtk/packet_list_store.c
index 1b1cc14..898ef6e 100644
--- gtk/packet_list_store.c
+++ gtk/packet_list_store.c
@@ -205,31 +205,6 @@ packet_list_tree_model_init(GtkTreeModelIface *iface)
 static void
 packet_list_init(PacketList *packet_list)
 {
-	guint i;
-#if 0
-	gint fmt;
-
-	for(i = 0; i < (guint)cfile.cinfo.num_cols; i++) {
-		/* Get the format of the column, see column_info.h */
-		fmt = get_column_format(i);
-		switch(fmt){
-			/* if we wish to store data rater than strings for some
-			 * colum types add case statements to the switch.
-			 */
-			case COL_NUMBER:
-			default:
-				packet_list->column_types[i] = G_TYPE_STRING;
-				break;
-		}
-	}
-#endif
-
-	for(i = 0; i < NUM_COL_FMTS; i++) { /* XXX - Temporary? */
-		packet_list->column_types[i] = G_TYPE_STRING;
-	}
-	
-
-	packet_list->n_columns = NUM_COL_FMTS;
 	packet_list->num_rows = 0;
 	packet_list->rows = NULL;
 
@@ -267,17 +242,18 @@ packet_list_get_n_columns(GtkTreeModel *tree_model)
 {
 	g_return_val_if_fail(PACKETLIST_IS_LIST(tree_model), 0);
 
-	return PACKET_LIST(tree_model)->n_columns;
+	/* return NUM_COL_FMTS; */
+	return cfile.cinfo.num_cols;
 }
 
 static GType
 packet_list_get_column_type(GtkTreeModel *tree_model, gint index)
 {
 	g_return_val_if_fail(PACKETLIST_IS_LIST(tree_model), G_TYPE_INVALID);
-	g_return_val_if_fail(index < PACKET_LIST(tree_model)->n_columns &&
+	g_return_val_if_fail(index < cfile.cinfo.num_cols &&
 			     index >= 0, G_TYPE_INVALID);
 
-	return PACKET_LIST(tree_model)->column_types[index];
+	return G_TYPE_STRING;	/* always string */
 }
 
 static gboolean
@@ -340,6 +316,39 @@ packet_list_get_path(GtkTreeModel *tree_model, GtkTreeIter *iter)
 	return path;
 }
 
+static const gchar *
+packet_list_record_get_column(PacketListRecord *record, gint column)
+{
+	capture_file *cf = &cfile;
+	epan_dissect_t *edt;
+
+	gchar *err_info;
+	int err;
+
+	g_return_val_if_fail(column >= 0 && column < cfile.cinfo.num_cols, NULL);
+	
+#if 0	/* doesn't work, dunno how */
+	if (cf->current_frame == record->fdata) {
+		printf("hit cache: %p %p\n", cf->current_frame, record->fdata);
+		return cf->cinfo.col_data[column];
+	}
+#endif
+
+	if (!wtap_seek_read(cf->wth, record->fdata->file_off, &cf->pseudo_header,
+				cf->pd, record->fdata->cap_len, &err, &err_info))
+	{
+		return "[ERROR]";
+	}
+
+	edt = epan_dissect_new(have_custom_cols(&cf->cinfo), FALSE);
+	col_custom_prime_edt(edt, &cf->cinfo);
+	epan_dissect_run(edt, &cf->pseudo_header, cf->pd, record->fdata, &cf->cinfo);
+	epan_dissect_fill_in_columns(edt);
+	epan_dissect_free(edt);
+
+  	return cf->cinfo.col_data[column];
+}
+
 static void
 packet_list_get_value(GtkTreeModel *tree_model, GtkTreeIter *iter, gint column,
 		      GValue *value)
@@ -349,9 +358,9 @@ packet_list_get_value(GtkTreeModel *tree_model, GtkTreeIter *iter, gint column,
 
 	g_return_if_fail(PACKETLIST_IS_LIST(tree_model));
 	g_return_if_fail(iter != NULL);
-	g_return_if_fail(column < PACKET_LIST(tree_model)->n_columns);
+	g_return_if_fail(column >= 0 && column < cfile.cinfo.num_cols);
 
-	g_value_init(value, PACKET_LIST(tree_model)->column_types[column]);
+	g_value_init(value, G_TYPE_STRING);
 
 	packet_list = PACKET_LIST(tree_model);
 
@@ -360,7 +369,7 @@ packet_list_get_value(GtkTreeModel *tree_model, GtkTreeIter *iter, gint column,
 	if(record->pos >= packet_list->num_rows)
 		g_return_if_reached();
 
-	g_value_set_string(value, record->col_text[column]);
+	g_value_set_string(value, packet_list_record_get_column(record, column));
 }
 
 /* Takes an iter structure and sets it to point to the next row. */
@@ -505,7 +514,6 @@ packet_list_append_record(PacketList *packet_list, row_data_t *row_data)
 	GtkTreePath *path;
 	PacketListRecord *newrecord;
 	guint pos;
-	gint i;
 
 	g_return_if_fail(PACKETLIST_IS_LIST(packet_list));
 
@@ -516,18 +524,11 @@ packet_list_append_record(PacketList *packet_list, row_data_t *row_data)
  	packet_list->rows = g_renew(PacketListRecord*, packet_list->rows,
 				    packet_list->num_rows);
 
-	newrecord = se_alloc0(sizeof(PacketListRecord));
-	newrecord->col_text = se_alloc0(sizeof(row_data->col_text)* NUM_COL_FMTS);
-
-
-	/* XXX newrecord->col_text still uses the fmt index */
-	for(i = 0; i < cfile.cinfo.num_cols; i++)
-		newrecord->col_text[row_data->col_fmt[i]] = row_data->col_text[i];
-
+	newrecord        = se_alloc(sizeof(PacketListRecord));
 	newrecord->fdata = row_data->fdata;
+	newrecord->pos   = pos;
 
 	packet_list->rows[pos] = newrecord;
-	newrecord->pos = pos;
 
 	/* Inform the tree view and other interested objects (such as tree row
 	 * references) that we have inserted a new row and where it was
@@ -632,8 +633,8 @@ packet_list_compare_records(gint sort_id, PacketListRecord *a,
 	 * seen by the user, whereas the col_text array from a and b is a
 	 * column format number. This matches them to each other to get
 	 * the right column text. */
-	gchar *a_col_text = a->col_text[cfile.cinfo.col_fmt[sort_id]];
-	gchar *b_col_text = b->col_text[cfile.cinfo.col_fmt[sort_id]];
+	const gchar *a_col_text = packet_list_record_get_column(a, sort_id);
+	const gchar *b_col_text = packet_list_record_get_column(b, sort_id);
 
 	if((a_col_text) && (b_col_text))
 		return strcmp(a_col_text, b_col_text);
diff --git gtk/packet_list_store.h gtk/packet_list_store.h
index 3262ac6..721e18a 100644
--- gtk/packet_list_store.h
+++ gtk/packet_list_store.h
@@ -38,9 +38,6 @@
 #define PACKETLIST_LIST_GET_CLASS(obj) (G_TYPE_INSTANCE_GET_CLASS((obj), PACKETLIST_TYPE_LIST, PacketListClass))
 
 typedef struct {
-	gint *col_fmt;
-	gchar **col_text;
-	gchar **col_filter;
 	frame_data *fdata;
 } row_data_t;
 
@@ -53,7 +50,6 @@ struct _PacketListRecord
 
 {
 	frame_data *fdata;
-	gchar **col_text;
 
 	/* admin stuff used by the custom list model */
 	guint pos; /* position within the array */
@@ -69,8 +65,6 @@ struct _PacketList
 				  * the PacketListRecord structure for each
 				  * row. */
 
-	gint n_columns;
-	GType column_types[NUM_COL_FMTS];
 	GtkWidget *view; /* XXX - Does this really belong here?? */
 
 	gint sort_id;