[MUD-Dev] Re: Question on c++ switch optimization, and parsers in general.

Ola Fosheim Grøstad <olag@ifi.uio.no> Ola Fosheim Grøstad <olag@ifi.uio.no>
Tue Feb 9 01:09:12 CET 1999


Ben Greear wrote:

> Basically, I'll have a bunch of classes hashed into an array
> that will contain the keywords mapped to an enum.

I am a sloppy reader... you were already using hashtables... *slap*

> Currently, I'm using a huge (~400 cases) switch statement.
>
> So, the question is:  Is that efficient?  Does the compiler
> generate code that does better than a linear search down the
> case statements?  If not, I can manually hack a sort of n-ary
> tree performance into it, but I'd wrather not if I can help it.

The only way to find out is to generate assembly code or at least tell us
what compiler you are using .-)  Some compilers have trouble with large
switch statements so you might want to split it. Of course, your integral
values (enums) should only span a small range [0..399] if you want O(1)
performance. I don't think you can get anything faster than a switch
statement, if your code and compiler is doing what you they should do!
(could save you function call overhead)

> Also, I'll be curious if anyone wants to spew forth their own ideas
> on how to parse user's commands into callable methods.

As you already use hashing this is probably of little value to you, but
perhaps some of the non-posting listmembers can use this small tutorial to
gperf, a GNU program which generates non-minimal perfect hashfunctions...
(non-minimal means there will be unused values, so you need one extra level
of mapping)

gperf is at least reasonably easy to use for those who are just starting to
write their mud from scratch. One could find better hashfunctions and lookup
though...
---------------------------------------------------------------------------------------

**** first create a list with your datastructure, keywords and associated
**** values in a file, e.g. perfhash.keywords... Note: no empty lines!

unix> cat perfhash.keywords
struct entry {
char *name;
int value;
};
%%
look,1
l,1
examine,1
get,2
put,3
w,4
west,4
e,5
east,5
n,6
north,6
s,7
south,7
jump,13
say,8
yell,9
shout,10
whisper,11
wave,12
smile,12
grin,12
yawn,12
burp,12
wink,12
sneeze,12
zap,12
groom,13
die,14
quit,15
exit,15
sell,16
buy,17
sad,18
happy,19
unix>

**** then try to compile it with gperf and your fav options

unix> cat perfhash.keywords |
> gperf -g -G -o -t -a -p  -E '-Nfindentry' > perfhash.h
Key link: "exit" = "east", with key set "et".
1 input keys have identical hash values,
try different key positions or use option -D.

**** try again, use other characters than the default (first and last)
**** to do this use the -k option

unix> cat perfhash.keywords |
> gperf -g -G -o -t -a -p  -E '-Nfindentry' '-k1,2'> perfhash.h

Key link: "sad" = "say", with key set "as".
1 input keys have identical hash values,
try different key positions or use option -D.

**** rats! try again...

unix> cat perfhash.keywords |
> gperf -g -G -o -t -a -p  -E '-Nfindentry' '-k1,3'> perfhash.h

**** success! check out the result

unix> cat perfhash.h
/* C code produced by gperf version 2.5 (GNU C++ version) */
/* Command-line: gperf -g -G -o -t -a -p -E -Nfindentry -k1,3  */
struct entry {
char *name;
int value;
};
/* maximum key range = 83, duplicates = 0 */

#ifdef __GNUC__
inline
#endif
static unsigned int
hash (register const char *str, register int len)
{
  static unsigned char asso_values[] =
    {
     84, 84, 84, 84, 84, 84, 84, 84, 84, 84,
     84, 84, 84, 84, 84, 84, 84, 84, 84, 84,
     84, 84, 84, 84, 84, 84, 84, 84, 84, 84,
     84, 84, 84, 84, 84, 84, 84, 84, 84, 84,
     84, 84, 84, 84, 84, 84, 84, 84, 84, 84,
     84, 84, 84, 84, 84, 84, 84, 84, 84, 84,
     84, 84, 84, 84, 84, 84, 84, 84, 84, 84,
     84, 84, 84, 84, 84, 84, 84, 84, 84, 84,
     84, 84, 84, 84, 84, 84, 84, 84, 84, 84,
     84, 84, 84, 84, 84, 84, 84,  5, 60, 84,
      5, 10, 84, 40, 15,  5,  0, 84, 20,  0,
     30, 10,  0, 50,  0,  0,  0,  0, 45,  5,
     84, 20, 10, 84, 84, 84, 84, 84,
    };
  register int hval = len;

  switch (hval)
    {
      default:
      case 3:
        hval += asso_values[str[2]];
      case 2:
      case 1:
        hval += asso_values[str[0]];
        break;
    }
  return hval;
}

static struct entry wordlist[] =
{
      {"",},
      {"s", 7},
      {"",},
      {"put", 3},
      {"jump", 13},
      {"south", 7},
      {"w", 4},
      {"",},
      {"sad", 18},
      {"west", 4},
      {"smile", 12},
      {"e", 5},
      {"",},
      {"zap", 12},
      {"east", 5},
      {"shout", 10},
      {"sneeze", 12},
      {"whisper", 11},
      {"die", 14},
      {"exit", 15},
      {"happy", 19},
      {"l", 1},
      {"examine", 1},
      {"say", 8},
      {"sell", 16},
      {"",}, {"",}, {"",}, {"",},
      {"yawn", 12},
      {"",},
      {"n", 6},
      {"",}, {"",},
      {"look", 1},
      {"north", 6},
      {"",}, {"",}, {"",},
      {"wink", 12},
      {"",}, {"",}, {"",},
      {"get", 2},
      {"yell", 9},
      {"",}, {"",}, {"",}, {"",},
      {"grin", 12},
      {"",}, {"",}, {"",}, {"",},
      {"wave", 12},
      {"groom", 13},
      {"",}, {"",}, {"",},
      {"quit", 15},
      {"",}, {"",}, {"",}, {"",},
      {"burp", 12},
      {"",}, {"",}, {"",}, {"",}, {"",}, {"",}, {"",}, {"",}, {"",},
      {"",}, {"",}, {"",}, {"",}, {"",}, {"",}, {"",}, {"",}, {"",},

      {"buy", 17},
};

#ifdef __GNUC__
inline
#endif
struct entry *
findentry (register const char *str, register int len)
{
  enum
    {
      TOTAL_KEYWORDS = 34,
      MIN_WORD_LENGTH = 1,
      MAX_WORD_LENGTH = 7,
      MIN_HASH_VALUE = 1,
      MAX_HASH_VALUE = 83,
    };

  if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
    {
      register int key = hash (str, len);

      if (key <= MAX_HASH_VALUE && key >= 0)
        {
          register char *s = wordlist[key].name;

          if (*s == *str && !strcmp (str + 1, s + 1))
            return &wordlist[key];
        }
    }
  return 0;
}
unix>

**** write a program. e.g. perfhash.c

#include <string.h>
#include <stdio.h>
#include "perfhash.h"

typedef struct entry entry;

char* test[] = {"look","eye","e","examine","","happy"};

int main(int argc,char **argv){
   entry* e; int i;
   (void)argc; (void)argv;

   for(i=0; i<(sizeof(test)/sizeof(char*)); i++){
      e = findentry(test[i],strlen(test[i]));
      if(e)
         printf("findentry(\"%s\") = \"%s\",
%d\n",test[i],e->name,e->value);
      else
         printf("findentry(\"%s\") = not found\n",test[i]);
   }
   return 0;
}

**** compile with gcc

unix> gcc perfhash.c
unix>

**** execute!

unix> ./a.out
findentry("look") = "look", 1
findentry("eye") = not found
findentry("e") = "e", 5
findentry("examine") = "examine", 1
findentry("") = not found
findentry("happy") = "happy", 19
unix>

**** then optimize for speed, try -S1 etc...
--
Ola Fosheim Groestad,Norway      http://www.stud.ifi.uio.no/~olag/







More information about the mud-dev-archive mailing list