can't pull parse strings - BUG?
Jason
jason2... at jasonjobe.com
Mon Feb 5 17:51:18 UTC 2007
I've finally gotten a little time to start working with the new
pullscan example.
Everything was going well until I tried to add string tokens. The rl
(below) works as is but un-comment the String rules and all hell
breaks loose.
Any pointers would be most welcome.
--- TEST input file -------
[target go: 124 with:(1..9) num:1,298,99 int:87 float:1,299.987]
(list "some string")
(list 'some other string')
-------------- mod_pullscan.rl ------------
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define BUFSIZE 4096
typedef struct _Scanner {
/* Scanner state. */
int cs;
int act;
int have;
int curline;
char *tokstart;
char *tokend;
char *p;
char *pe;
FILE *file;
int done;
/* Token data */
char *data;
int len;
int value;
char *token_name;
char buf[BUFSIZE];
} Scanner;
void scan_init( Scanner *s, FILE *file )
{
memset (s, '\0', sizeof(Scanner));
s->curline = 1;
s->file = file;
}
#define TK_NO_TOKEN (-1)
#define TK_ERR 128
#define TK_EOF 129
#define TK_Identifier 130
#define TK_Number 131
#define TK_Keyword 132
#define TK_String 133
%%{
machine Scanner;
write data;
}%%
#define ret_tok( _tok ) token = _tok; s->data = s->tokstart; s-
>token_name = #_tok
#define ret_char( _tok ) token = _tok; s->data = s->tokstart; s-
>token_name = "TK_Char"
int scan( Scanner *s )
{
char *p = s->p;
char *pe = s->pe;
int token = TK_NO_TOKEN;
int space, readlen;
while ( 1 ) {
if ( p == pe ) {
printf("scanner: need more data\n");
if ( s->tokstart == 0 )
s->have = 0;
else {
/* There is data that needs to be shifted over. */
printf("scanner: buffer broken mid token\n");
s->have = pe - s->tokstart;
memmove( s->buf, s->tokstart, s->have );
s->tokend -= (s->tokstart-s->buf);
s->tokstart = s->buf;
}
p = s->buf + s->have;
space = BUFSIZE - s->have;
if ( space == 0 ) {
/* We filled up the buffer trying to scan a token. */
printf("scanner: out of buffer space\n");
return TK_ERR;
}
if ( s->done ) {
printf("scanner: end of file\n");
p[0] = 0;
readlen = 1;
}
else {
readlen = fread( p, 1, space, s->file );
if ( readlen < space )
s->done = 1;
}
pe = p + readlen;
}
%%{
machine Scanner;
access s->;
newline = '\n' @{s->curline += 1;};
any_count_line = any | newline;
main := |*
# Alpha numberic characters or underscore.
alnum_u = alnum | '_';
# Alpha charactres or underscore.
alpha_u = alpha | '_';
ident = alpha_u alnum_u*;
# Identifiers
ident =>
{ ret_tok( TK_Identifier ); fbreak; };
# Keywords
ident ':' => {
ret_tok (TK_Keyword); fbreak;
};
# Strings
# "'" ( [^'\\] | /\\./ )* "'" => { ret_tok (TK_String); };
# '"' ( [^"\\] | /\\./ )* '"' => { ret_tok (TK_String); };
# Whitespace
[ \t\n];
# Number
digit+ =>
{ ret_tok( TK_Number ); fbreak; };
# EOF
0 =>
{ ret_tok( TK_EOF ); fbreak; };
# Anything else
any =>
{ ret_char( *p ); fbreak; };
*|;
write exec;
}%%
if ( s->cs == Scanner_error )
return TK_ERR;
if ( token != TK_NO_TOKEN ) {
/* Save p and pe. fbreak does not advance p. */
s->p = p + 1;
s->pe = pe;
s->len = s->p - s->data;
return token;
}
}
}
int main (int argc, char** argv)
{
Scanner ss;
int tok;
scan_init(&ss, stdin);
while ( 1 ) {
tok = scan (&ss);
if ( tok == TK_EOF ) {
printf ("parser: EOF\n");
break;
}
else if ( tok == TK_ERR ) {
printf ("parser: ERR\n");
break;
}
else {
printf ("parser: %s(%d) \"", ss.token_name, tok);
fwrite ( ss.data, 1, ss.len, stdout );
printf ("\"\n" );
}
}
return 0;
}
More information about the ragel-users
mailing list