Wireshark-dev: [Wireshark-dev] RFC: Protocol fields list (reduce memory usage)

From: Jakub Zawadzki <darkjames-ws@xxxxxxxxxxxx>
Date: Mon, 8 Jul 2013 05:59:55 +0200
Hi,

Right nowe for every protocol we have linked list with protocols fields. This list in total takes about 1.5 MB 
(more than 100K hf fields, 16 bytes on 64bits per GList).

This list is only used by two functions: proto_get_first_protocol_field() and proto_get_next_protocol_field(), which are used only by gtk:
 - autocompletion
 - supported protocols list (Internals menu)
 - tree model for filter expression dialog

hf fields are commonly registered next to each other. We can reduce memory usage 
if we replace SList iter per hf, with some range structure storing first hf and last one.

Attaching working PoC, listing all fields ordered by protocol takes some more time
(1.07s vs 0.04s compiled with -O0), but output is the same.

I'd appreciate any comments / review / testing.

Cheers,
Jakub.
diff --git a/epan/proto.c b/epan/proto.c
index 5e4d3de..71925b3 100644
--- a/epan/proto.c
+++ b/epan/proto.c
@@ -239,14 +239,21 @@ static header_field_info hfi_text_only =
 	{ "Text item",	"text", FT_NONE, BASE_NONE, NULL, 0x0, NULL, HFILL };
 int hf_text_only = -1;
 
+struct _protocol_fields_range {
+	struct _protocol_fields_range *next;
+
+	int first;
+	int last;
+};
+
 /* Structure for information about a protocol */
 struct _protocol {
 	const char *name;         /* long description */
 	const char *short_name;   /* short description */
 	const char *filter_name;  /* name of this protocol in filters */
 	int         proto_id;     /* field ID for this protocol */
-	GList      *fields;       /* fields for this protocol */
-	GList      *last_field;   /* pointer to end of list of fields */
+	struct _protocol_fields_range *hf_fields;       /* list of hf's range (first...last) for this protocol */
+	struct _protocol_fields_range *last_hf_fields;  /* last hf item */
 	gboolean    is_enabled;   /* TRUE if protocol is enabled */
 	gboolean    can_toggle;   /* TRUE if is_enabled can be changed */
 	gboolean    is_private;   /* TRUE is protocol is private */
@@ -412,13 +419,22 @@ proto_cleanup(void)
 	}
 
 	while (protocols) {
+		struct _protocol_fields_range *range_list;
+
 		protocol_t        *protocol = (protocol_t *)protocols->data;
 		header_field_info *hfinfo;
 		PROTO_REGISTRAR_GET_NTH(protocol->proto_id, hfinfo);
 		DISSECTOR_ASSERT(protocol->proto_id == hfinfo->id);
 
 		g_slice_free(header_field_info, hfinfo);
-		g_list_free(protocol->fields);
+
+		for (range_list = protocol->hf_fields; range_list;) {
+			struct _protocol_fields_range *range_cur = range_list;
+
+			range_list = range_list->next;
+			g_slice_free(struct _protocol_fields_range, range_cur);
+		}
+
 		protocols = g_list_remove(protocols, protocol);
 		g_free(protocol);
 	}
@@ -4400,7 +4416,7 @@ proto_register_protocol(const char *name, const char *short_name,
 	protocol->name = name;
 	protocol->short_name = short_name;
 	protocol->filter_name = filter_name;
-	protocol->fields = NULL;
+	protocol->hf_fields = NULL;
 	protocol->is_enabled = TRUE; /* protocol is enabled by default */
 	protocol->can_toggle = TRUE;
 	protocol->is_private = FALSE;
@@ -4485,33 +4501,90 @@ proto_get_next_protocol(void **cookie)
 	return protocol->proto_id;
 }
 
+static const struct _protocol_fields_range *
+range_from_hf(int hf)
+{
+	const struct _protocol_fields_range *hf_range;
+	protocol_t *protocol;
+	header_field_info *hfi;
+
+	hfi = proto_registrar_get_nth(hf);
+	if (hfi == NULL)
+		return NULL;
+
+	protocol = find_protocol_by_id(hfi->parent);
+	if (protocol == NULL)
+		return NULL;
+
+	for (hf_range = protocol->hf_fields; hf_range; hf_range = hf_range->next) {
+		if (hf >= hf_range->first && hf <= hf_range->last)
+			return hf_range;
+	}
+
+	return NULL;
+}
+
+static header_field_info *
+range_get_field_info(const struct _protocol_fields_range *hf_range, int hf)
+{
+	static header_field_info *hfi;
+
+	while (!(hfi = proto_registrar_get_nth(hf))) {
+
+		hf++;
+		if (hf > hf_range->last) {
+			hf_range = hf_range->next;
+			if (!hf_range)
+				return NULL;
+			hf = hf_range->first;
+		}
+	}
+
+	return hfi;
+}
+
 header_field_info *
 proto_get_first_protocol_field(const int proto_id, void **cookie)
 {
-	protocol_t       *protocol = find_protocol_by_id(proto_id);
-	hf_register_info *ptr;
+	protocol_t *protocol = find_protocol_by_id(proto_id);
+	const struct _protocol_fields_range *hf_range;
+
+	header_field_info *hfi;
 
-	if ((protocol == NULL) || (protocol->fields == NULL))
+	if ((protocol == NULL) || (protocol->hf_fields == NULL))
 		return NULL;
 
-	*cookie = protocol->fields;
-	ptr = (hf_register_info *)protocol->fields->data;
-	return &ptr->hfinfo;
+	hf_range = protocol->hf_fields;
+	hfi = range_get_field_info(hf_range, hf_range->first);
+	if (hfi)
+		*cookie = GINT_TO_POINTER(hfi->id);
+
+	return hfi;
 }
 
 header_field_info *
 proto_get_next_protocol_field(void **cookie)
 {
-	GList            *list_item = (GList *)*cookie;
-	hf_register_info *ptr;
+	const struct _protocol_fields_range *hf_range;
+	header_field_info *hfi;
+	int hf = GPOINTER_TO_INT(*cookie);
 
-	list_item = g_list_next(list_item);
-	if (list_item == NULL)
-		return NULL;
+	hf_range = range_from_hf(hf);
+	/* assert(hf_range); */
 
-	*cookie = list_item;
-	ptr = (hf_register_info *)list_item->data;
-	return &ptr->hfinfo;
+	hf++;
+	if (hf > hf_range->last) {
+		hf_range = hf_range->next;
+		if (!hf_range)
+			return NULL;
+		hf = hf_range->first;
+	}
+
+	hfi = range_get_field_info(hf_range, hf);
+	if (hfi)
+		*cookie = GINT_TO_POINTER(hfi->id);
+
+	return hfi;
 }
 
 protocol_t *
@@ -4658,9 +4731,9 @@ proto_register_field_array(const int parent, hf_register_info *hf, const int num
 {
 	int		  field_id, i;
 	hf_register_info *ptr = hf;
-	protocol_t	 *proto;
 
-	proto = find_protocol_by_id(parent);
+	int proto_first_id = -1;
+
 	for (i = 0; i < num_records; i++, ptr++) {
 		/*
 		 * Make sure we haven't registered this yet.
@@ -4677,17 +4750,29 @@ proto_register_field_array(const int parent, hf_register_info *hf, const int num
 			return;
 		}
 
-		if (proto != NULL) {
-			if (proto->fields == NULL) {
-				proto->fields = g_list_append(NULL, ptr);
-				proto->last_field = proto->fields;
-			} else {
-				proto->last_field =
-					g_list_append(proto->last_field, ptr)->next;
-			}
-		}
 		field_id = proto_register_field_init(&ptr->hfinfo, parent);
 		*ptr->p_id = field_id;
+
+		if (proto_first_id == -1)
+			proto_first_id = field_id;
+	}
+
+	/* any field registered? */
+	if (proto_first_id != -1 && parent != -1) {
+		protocol_t *proto = find_protocol_by_id(parent);
+
+		struct _protocol_fields_range *hf_range = g_slice_new(struct _protocol_fields_range);
+
+		hf_range->first = proto_first_id;
+		hf_range->last  = field_id;
+		hf_range->next  = NULL;
+
+		if (proto->hf_fields == NULL)
+			proto->hf_fields = hf_range;
+		else
+			proto->last_hf_fields->next = hf_range;
+
+		proto->last_hf_fields = hf_range;
 	}
 }
 
@@ -4695,28 +4780,21 @@ proto_register_field_array(const int parent, hf_register_info *hf, const int num
 void
 proto_unregister_field (const int parent, gint hf_id)
 {
-	hf_register_info *hf;
-	protocol_t       *proto;
-	GList            *field;
+	header_field_info *hfinfo;
+	protocol_t        *proto;
 
 	if (hf_id == -1 || hf_id == 0)
 		return;
 
-	proto = find_protocol_by_id (parent);
-	if (!proto || !proto->fields) {
+	proto = find_protocol_by_id(parent);
+	if (!proto)
 		return;
-	}
 
-	for (field = g_list_first (proto->fields); field; field = g_list_next (field)) {
-		hf = (hf_register_info *)field->data;
-		if (*hf->p_id == hf_id) {
-			/* Found the hf_id in this protocol */
-			g_tree_steal (gpa_name_tree, hf->hfinfo.abbrev);
-			proto->fields = g_list_remove_link (proto->fields, field);
-			proto->last_field = g_list_last (proto->fields);
-			break;
-		}
-	}
+	PROTO_REGISTRAR_GET_NTH(hf_id, hfinfo);
+	g_assert(hfinfo->parent == parent);
+
+	g_tree_steal(gpa_name_tree, hfinfo->abbrev);
+	gpa_hfinfo.hfi[hf_id] = NULL;	/* XXX, need some unique 'unregister field' hfi to not care about NULL dereference */
 }
 
 /* chars allowed in field abbrev */