Minor performance tweaks

darcs-hash:20051026105102-ac50b-ffa35c43fd9e1aad47229260e5d7da4249cacdcf.gz
author: axel <axel@liljencrantz.se> 2005-10-26 20:51:02 +1000
committer: axel <axel@liljencrantz.se> 2005-10-26 20:51:02 +1000
commit: 5ba0affdd7b18ed970c039961b75da2cb925d6cd (patch)
tree: 03d3494a2d3a83abc3d527af4074870e1932d19c /tokenizer.c
parent: 721f6169640d8fcf0697d225098141202c693ada (diff)
1 files changed, 19 insertions, 2 deletions
diff --git a/tokenizer.c b/tokenizer.c
index 42b2781a..5e0fa42c 100644
--- a/tokenizer.c
+++ b/tokenizer.c
@@ -46,7 +46,7 @@
 /**
   Characters that separate tokens. They are ordered by frequency of occurrence to increase parsing speed.
 */
-#define SEP L" \n;|#\t\r<>^&"
+#define SEP L" \n|\t;#\r<>^&"
 /**
    Tests if the tokenizer buffer is large enough to hold contents of
    the specified length, and if not, reallocates the tokenizer buffer.
@@ -192,8 +192,18 @@ static int is_string_char( wchar_t c )
 	{
 		return 0;
 	}
+	return 1;
+}
 
-  return 1;
+/**
+   Quick test to catch the most common 'non-magical' characters, makes
+   read_string slightly faster by adding a fast path for the most
+   common characters. This is obviously not a suitable replacement for
+   iswalpha.
+*/
+static int myal( wchar_t c )
+{
+	return (c>=L'a' && c<=L'z') || (c>=L'A'&&c<=L'Z');
 }
 
 /**
@@ -212,6 +222,11 @@ static void read_string( tokenizer *tok )
 
 	while( 1 )
 	{
+		
+		if( !myal( *tok->buff ) )
+		{
+//			debug(1, L"%lc", *tok->buff );
+
 		if( *tok->buff == L'\\' )
 		{
 			tok->buff++;
@@ -338,6 +353,8 @@ static void read_string( tokenizer *tok )
 				}
 				break;
 		}
+		}
+		
 
 		if( !do_loop )
 			break;
author	axel <axel@liljencrantz.se>	2005-10-26 20:51:02 +1000
committer	axel <axel@liljencrantz.se>	2005-10-26 20:51:02 +1000
commit	5ba0affdd7b18ed970c039961b75da2cb925d6cd (patch)
tree	03d3494a2d3a83abc3d527af4074870e1932d19c /tokenizer.c
parent	721f6169640d8fcf0697d225098141202c693ada (diff)