Wireshark-dev: Re: [Wireshark-dev] RFC: sorted value_string + bsearch

From: Jakub Zawadzki <darkjames@xxxxxxxxxxxxxxxx>
Date: Mon, 26 Apr 2010 21:13:54 +0200
On Fri, Apr 23, 2010 at 11:32:16PM +0200, Anders Broman wrote:
> like a full table starting at a number ( like 500 - 1000 ->value  match index minus 
> offset).

Uhm, sounds complicated :)

Maybe we can initialize match_type in proto_register_field_init()
so developer don't need to care if it's sorted or no...

We can do it also during first use of match_strval_fast() (which might be
better option)

Attaching patch, btw. I changed _fast to _ext.
diff --git epan/dfilter/semcheck.c epan/dfilter/semcheck.c
index 9d8032f..82cde98 100644
--- epan/dfilter/semcheck.c
+++ epan/dfilter/semcheck.c
@@ -232,6 +232,10 @@ mk_fvalue_from_val_string(header_field_info *hfinfo, char *s)
 	}
 	else {
 		const value_string *vals = hfinfo->strings;
+
+		if (hfinfo->display & BASE_EXT_STRING)
+			vals = ((value_string_ext *) vals)->vals;
+
 		while (vals->strptr != NULL) {
 			if (g_ascii_strcasecmp(s, vals->strptr) == 0) {
 				return mk_uint32_fvalue(vals->value);
diff --git epan/proto.c epan/proto.c
index 24a07f4..20dbc86 100644
--- epan/proto.c
+++ epan/proto.c
@@ -3368,6 +3368,8 @@ proto_custom_set(proto_tree* tree, const int field_id,
 			if (hfinfo->strings) {
 				if (hfinfo->display & BASE_RANGE_STRING) {
 					g_strlcpy(result, rval_to_str(u_integer, hfinfo->strings, "%u"), size);
+				} else if (hfinfo->display & BASE_EXT_STRING) {
+					g_strlcpy(result, val_to_str_ext(u_integer, (value_string_ext *) (hfinfo->strings), "%u"), size);
 				} else {
 					g_strlcpy(result, val_to_str(u_integer, cVALS(hfinfo->strings), "%u"), size);
 				}
@@ -3392,6 +3394,8 @@ proto_custom_set(proto_tree* tree, const int field_id,
 			if (hfinfo->strings) {
 				if (hfinfo->display & BASE_RANGE_STRING) {
 					g_strlcpy(result, rval_to_str(integer, hfinfo->strings, "%d"), size);
+				} else if (hfinfo->display & BASE_EXT_STRING) {
+					g_strlcpy(result, val_to_str_ext(integer, (value_string_ext *) (hfinfo->strings), "%d"), size);
 				} else {
 					g_strlcpy(result, val_to_str(integer, cVALS(hfinfo->strings), "%d"), size);
 			}
@@ -4350,12 +4354,39 @@ static void tmp_fld_check_assert(header_field_info *hfinfo) {
 	}
 }
 
+static void
+proto_reinit_value_string(value_string_ext *vse)
+{
+	const value_string *vals = vse->vals;
+	int type = VS_INDEX;
+	guint32 prev = 0;
+	guint i;
+
+	for (i = 0; i < vse->length; i++) {
+		if (type == VS_INDEX && vals[i].value != i)
+			type = VS_BIN_TREE;
+
+		if (type == VS_BIN_TREE && prev > vals[i].value) {
+			type = VS_SEARCH;
+			break;
+		}
+
+		prev = vals[i].value;
+	}
+
+	vse->match_type = type;
+	printf("%p: %d\n", vse, type);
+}
+
 static int
 proto_register_field_init(header_field_info *hfinfo, const int parent)
 {
 
 	tmp_fld_check_assert(hfinfo);
 
+	if (hfinfo->strings && (hfinfo->display & BASE_EXT_STRING) && IS_INT_TYPE(hfinfo->type))
+		proto_reinit_value_string((value_string_ext *) hfinfo->strings);
+
 	/* if this is a bitfield, compute bitshift */
 	if (hfinfo->bitmask) {
 		hfinfo->bitshift = wrs_count_bitshift(hfinfo->bitmask);
@@ -4748,6 +4779,10 @@ fill_label_bitfield(field_info *fi, gchar *label_str)
 			g_snprintf(p, ITEM_LABEL_LENGTH - bitfield_byte_length,
 					 format,  hfinfo->name,
 					 rval_to_str(value, hfinfo->strings, "Unknown"), value);
+		} else if (hfinfo->display & BASE_EXT_STRING) {
+			g_snprintf(p, ITEM_LABEL_LENGTH - bitfield_byte_length,
+					 format,  hfinfo->name,
+					 val_to_str_ext(value, (value_string_ext *) hfinfo->strings, "Unknown"), value);
 		} else {
 			g_snprintf(p, ITEM_LABEL_LENGTH - bitfield_byte_length,
 					 format,  hfinfo->name,
@@ -4790,6 +4825,10 @@ fill_label_uint(field_info *fi, gchar *label_str)
 			g_snprintf(label_str, ITEM_LABEL_LENGTH,
 					 format,  hfinfo->name,
 					 rval_to_str(value, hfinfo->strings, "Unknown"), value);
+		} else if (hfinfo->display & BASE_EXT_STRING) {
+			g_snprintf(label_str, ITEM_LABEL_LENGTH,
+					 format,  hfinfo->name,
+					 val_to_str_ext(value, (value_string_ext *) hfinfo->strings, "Unknown"), value);
 		} else {
 			g_snprintf(label_str, ITEM_LABEL_LENGTH,
 					 format,  hfinfo->name,
@@ -4853,6 +4892,10 @@ fill_label_int(field_info *fi, gchar *label_str)
 			g_snprintf(label_str, ITEM_LABEL_LENGTH,
 					 format,  hfinfo->name,
 					 rval_to_str(value, hfinfo->strings, "Unknown"), value);
+		} else if (hfinfo->display & BASE_EXT_STRING) {
+			g_snprintf(label_str, ITEM_LABEL_LENGTH,
+					 format,  hfinfo->name,
+					 val_to_str_ext(value, (value_string_ext *) hfinfo->strings, "Unknown"), value);
 		} else {
 			g_snprintf(label_str, ITEM_LABEL_LENGTH,
 					 format,  hfinfo->name,
@@ -5728,7 +5771,9 @@ proto_registrar_dump_values(void)
 				hfinfo->type == FT_INT32 ||
 				hfinfo->type == FT_INT64)) {
 
-				if ((hfinfo->display & BASE_RANGE_STRING) == 0) {
+				if ((hfinfo->display & BASE_EXT_STRING) == 0) {
+					vals = ((value_string_ext *) hfinfo->strings)->vals;
+				} if ((hfinfo->display & BASE_RANGE_STRING) == 0) {
 					vals = hfinfo->strings;
 				} else {
 					range = hfinfo->strings;
@@ -6064,6 +6109,8 @@ construct_match_selected_string(field_info *finfo, epan_dissect_t *edt,
 		case FT_INT32:
 			if (hfinfo->display & BASE_RANGE_STRING) {
 				str = match_strrval(fvalue_get_sinteger(&finfo->value), hfinfo->strings);
+			} else if (hfinfo->display & BASE_EXT_STRING) {
+				str = match_strval_ext(fvalue_get_sinteger(&finfo->value), hfinfo->strings);
 			} else {
 				str = match_strval(fvalue_get_sinteger(&finfo->value), hfinfo->strings);
 			}
@@ -6075,6 +6122,8 @@ construct_match_selected_string(field_info *finfo, epan_dissect_t *edt,
 		case FT_UINT32:
 			if (hfinfo->display & BASE_RANGE_STRING) {
 				str = match_strrval(fvalue_get_uinteger(&finfo->value), hfinfo->strings);
+			} else if (hfinfo->display & BASE_EXT_STRING) {
+				str = match_strval_ext(fvalue_get_uinteger(&finfo->value), hfinfo->strings);
 			} else {
 				str = match_strval(fvalue_get_uinteger(&finfo->value), hfinfo->strings);
 			}
@@ -6359,6 +6408,9 @@ proto_item_add_bitmask_tree(proto_item *item, tvbuff_t *tvb, const int offset, c
 				if (hf->display & BASE_RANGE_STRING) {
 					proto_item_append_text(item, "%s%s: %s", first ? "" : ", ",
 								   hf->name, rval_to_str(tmpval, hf->strings, "Unknown"));
+				} else if (hf->display & BASE_EXT_STRING) {
+					proto_item_append_text(item, "%s%s: %s", first ? "" : ", ",
+								   hf->name, val_to_str_ext(tmpval, hf->strings, "Unknown"));
 				} else {
 					proto_item_append_text(item, "%s%s: %s", first ? "" : ", ",
 								   hf->name, val_to_str(tmpval, cVALS(hf->strings), "Unknown"));
@@ -6569,6 +6621,8 @@ proto_tree_add_bits_ret_val(proto_tree *tree, const int hf_index, tvbuff_t *tvb,
 				"%s: %s (%u)",
 				str,	(hf_field->display & BASE_RANGE_STRING) ?
 					rval_to_str((guint32)value, hf_field->strings, "Unknown ") :
+					(hf_field->display & BASE_EXT_STRING) ?
+					val_to_str_ext((guint32)value, (value_string_ext *) (hf_field->strings), "Unknown ") :
 					val_to_str((guint32)value, cVALS(hf_field->strings), "Unknown "),
 				(guint32)value);
 			break;
diff --git epan/proto.h epan/proto.h
index e1c8bd0..97b934d 100644
--- epan/proto.h
+++ epan/proto.h
@@ -159,10 +159,15 @@ typedef enum {
 	BASE_CUSTOM	/**< call custom routine (in ->strings) to format */
 } base_display_e;
 
+#define IS_INT_TYPE(type) \
+  ((type) == FT_UINT8 || (type) == FT_UINT16 || (type) == FT_UINT24 || (type) == FT_UINT32 || (type) == FT_UINT64 || \
+   (type) == FT_INT8 || (type) == FT_INT16 || (type) == FT_INT24 || (type) == FT_INT32 || (type) == FT_INT64)
+
 /* Following constants have to be ORed with a base_display_e when dissector
  * want to use specials MACROs (for the moment, only RVALS) for a
  * header_field_info */
 #define BASE_RANGE_STRING 0x10
+#define BASE_EXT_STRING 0x20
 
 /* BASE_ values that cause the field value to be displayed twice */
 #define IS_BASE_DUAL(b) ((b)==BASE_DEC_HEX||(b)==BASE_HEX_DEC)
diff --git epan/value_string.c epan/value_string.c
index 0e43750..2ffd424 100644
--- epan/value_string.c
+++ epan/value_string.c
@@ -49,6 +49,19 @@ val_to_str(const guint32 val, const value_string *vs, const char *fmt) {
   return ep_strdup_printf(fmt, val);
 }
 
+const gchar*
+val_to_str_ext(const guint32 val, const value_string_ext *vs, const char *fmt) {
+  const gchar *ret;
+
+  g_assert(fmt != NULL);
+
+  ret = match_strval_ext(val, vs);
+  if (ret != NULL)
+    return ret;
+
+  return ep_strdup_printf(fmt, val);
+}
+
 /* Tries to match val against each element in the value_string array vs.
    Returns the associated string ptr on a match.
    Returns 'unknown_str', on failure. */
@@ -65,6 +78,19 @@ val_to_str_const(const guint32 val, const value_string *vs, const char *unknown_
   return unknown_str;
 }
 
+const gchar*
+val_to_str_ext_const(const guint32 val, const value_string_ext *vs, const char *unknown_str) {
+  const gchar *ret;
+
+  g_assert(unknown_str != NULL);
+
+  ret = match_strval_ext(val, vs);
+  if (ret != NULL)
+    return ret;
+
+  return unknown_str;
+}
+
 /* Tries to match val against each element in the value_string array vs.
    Returns the associated string ptr, and sets "*idx" to the index in
    that table, on a match, and returns NULL, and sets "*idx" to -1,
@@ -94,6 +120,39 @@ match_strval(const guint32 val, const value_string *vs) {
     return match_strval_idx(val, vs, &ignore_me);
 }
 
+const gchar*
+match_strval_ext(const guint32 val, const value_string_ext *vs) {
+  guint low, idx, max;
+  guint32 item;
+  if(vs) {
+    switch(vs->match_type){
+    case VS_DEFAULT: 
+      /* XXX: reinit? */
+    case VS_SEARCH:
+      return match_strval(val, vs->vals);
+    case VS_INDEX:
+      return (val < vs->length) ? vs->vals[val].strptr : NULL;
+    case VS_BIN_TREE:
+      for (low = 0, max = vs->length; low < max; ) {
+        idx = (low + max) / 2;
+        item = vs->vals[idx].value;
+
+        if (val < item)
+          max = idx;
+        else if (val > item)
+         low = idx + 1;
+        else
+          return vs->vals[idx].strptr;
+      }
+      break;
+    default:
+      g_assert_not_reached();
+      break;
+    }
+  }
+  return NULL;
+}
+
 /* Tries to match val against each element in the value_string array vs.
    Returns the associated string ptr on a match.
    Formats val with fmt, and returns the resulting string, on failure. */
diff --git epan/value_string.h epan/value_string.h
index 7fcbb30..677ebb6 100644
--- epan/value_string.h
+++ epan/value_string.h
@@ -34,6 +34,25 @@ typedef struct _value_string {
   const gchar   *strptr;
 } value_string;
 
+/* The way matching of value is done in a value_string:
+ * 0 default, value will be set in proto_register_field_init()
+ * 1 Sequential search (as in a normal value string)
+ * 2 The value used as an index(the value string MUST have all values 0-max defined)
+ * 3 Binary search, the valuse MUST be in numerical order.
+ */
+#define VS_DEFAULT  0 
+#define VS_SEARCH   1
+#define VS_INDEX    2
+#define VS_BIN_TREE 3
+
+typedef struct {
+  guint match_type;             /* One of the values abowe */
+  guint length;                 /* length of the array */
+  const value_string *vals;     /* the value string */
+} value_string_ext;
+
+#define VALUE_STRING_EXT_INIT(x) { VS_DEFAULT, array_length(x)-1, x }
+
 /* Struct for the str_to_str, match_strstr_idx, and match_strstr functions */
 
 typedef struct _string_string {
@@ -59,16 +78,19 @@ extern const gchar* match_strval_idx(const guint32 val, const value_string *vs,
 
 /* Like match_strval_idx(), but doesn't return the index. */
 extern const gchar* match_strval(const guint32 val, const value_string *vs);
+extern const gchar* match_strval_ext(const guint32 val, const value_string_ext *vs);
 
 /* Tries to match val against each element in the value_string array vs.
    Returns the associated string ptr on a match.
    Formats val with fmt, and returns the resulting string, on failure. */
 extern const gchar* val_to_str(const guint32 val, const value_string *vs, const char *fmt);
+extern const gchar* val_to_str_ext(const guint32 val, const value_string_ext *vs, const char *fmt);
 
 /* Tries to match val against each element in the value_string array vs.
    Returns the associated string ptr on a match.
    Returns 'unknown_str', on failure. */
 extern const gchar* val_to_str_const(const guint32 val, const value_string *vs, const char *unknown_str);
+extern const gchar* val_to_str_ext_const(const guint32 val, const value_string_ext *vs, const char *unknown_str);
 
 /* Tries to match val against each element in the value_string array vs.
    Returns the associated string ptr, and sets "*idx" to the index in