[ragel-users] can't pull parse strings - BUG?
Adrian Thurston
thurs... at cs.queensu.ca
Mon Feb 5 18:25:54 UTC 2007
Ahhh, sorry this example is broken. It's missing a %% write init; in the
init function. Move the write data up, add the init and also add fbreaks
after your ret_tok(TK_String) and your troubles should go away.
Note that it worked before adding in the strings because it was just chance
that the start state had id 0.
-Adrian
Jason wrote:
> I've finally gotten a little time to start working with the new
> pullscan example.
>
> Everything was going well until I tried to add string tokens. The rl
> (below) works as is but un-comment the String rules and all hell
> breaks loose.
>
> Any pointers would be most welcome.
>
> --- TEST input file -------
> [target go: 124 with:(1..9) num:1,298,99 int:87 float:1,299.987]
>
> (list "some string")
> (list 'some other string')
>
> -------------- mod_pullscan.rl ------------
> #include <stdio.h>
> #include <stdlib.h>
> #include <string.h>
>
> #define BUFSIZE 4096
>
> typedef struct _Scanner {
> /* Scanner state. */
> int cs;
> int act;
> int have;
> int curline;
> char *tokstart;
> char *tokend;
> char *p;
> char *pe;
> FILE *file;
> int done;
>
> /* Token data */
> char *data;
> int len;
> int value;
> char *token_name;
> char buf[BUFSIZE];
> } Scanner;
>
>
> void scan_init( Scanner *s, FILE *file )
> {
> memset (s, '\0', sizeof(Scanner));
> s->curline = 1;
> s->file = file;
> }
>
> #define TK_NO_TOKEN (-1)
> #define TK_ERR 128
> #define TK_EOF 129
> #define TK_Identifier 130
> #define TK_Number 131
> #define TK_Keyword 132
> #define TK_String 133
>
>
> %%{
> machine Scanner;
> write data;
> }%%
>
> #define ret_tok( _tok ) token = _tok; s->data = s->tokstart; s-
>> token_name = #_tok
> #define ret_char( _tok ) token = _tok; s->data = s->tokstart; s-
>> token_name = "TK_Char"
>
> int scan( Scanner *s )
> {
> char *p = s->p;
> char *pe = s->pe;
> int token = TK_NO_TOKEN;
> int space, readlen;
>
> while ( 1 ) {
> if ( p == pe ) {
> printf("scanner: need more data\n");
>
> if ( s->tokstart == 0 )
> s->have = 0;
> else {
> /* There is data that needs to be shifted over. */
> printf("scanner: buffer broken mid token\n");
> s->have = pe - s->tokstart;
> memmove( s->buf, s->tokstart, s->have );
> s->tokend -= (s->tokstart-s->buf);
> s->tokstart = s->buf;
> }
>
> p = s->buf + s->have;
> space = BUFSIZE - s->have;
>
> if ( space == 0 ) {
> /* We filled up the buffer trying to scan a token. */
> printf("scanner: out of buffer space\n");
> return TK_ERR;
> }
>
> if ( s->done ) {
> printf("scanner: end of file\n");
> p[0] = 0;
> readlen = 1;
> }
> else {
> readlen = fread( p, 1, space, s->file );
> if ( readlen < space )
> s->done = 1;
> }
>
> pe = p + readlen;
> }
>
> %%{
> machine Scanner;
> access s->;
>
> newline = '\n' @{s->curline += 1;};
> any_count_line = any | newline;
>
> main := |*
>
> # Alpha numberic characters or underscore.
> alnum_u = alnum | '_';
>
> # Alpha charactres or underscore.
> alpha_u = alpha | '_';
>
> ident = alpha_u alnum_u*;
>
> # Identifiers
> ident =>
> { ret_tok( TK_Identifier ); fbreak; };
>
> # Keywords
> ident ':' => {
> ret_tok (TK_Keyword); fbreak;
> };
>
> # Strings
> # "'" ( [^'\\] | /\\./ )* "'" => { ret_tok (TK_String); };
> # '"' ( [^"\\] | /\\./ )* '"' => { ret_tok (TK_String); };
>
> # Whitespace
> [ \t\n];
>
> # Number
> digit+ =>
> { ret_tok( TK_Number ); fbreak; };
>
> # EOF
> 0 =>
> { ret_tok( TK_EOF ); fbreak; };
>
> # Anything else
> any =>
> { ret_char( *p ); fbreak; };
>
> *|;
>
> write exec;
> }%%
>
> if ( s->cs == Scanner_error )
> return TK_ERR;
>
> if ( token != TK_NO_TOKEN ) {
> /* Save p and pe. fbreak does not advance p. */
> s->p = p + 1;
> s->pe = pe;
> s->len = s->p - s->data;
> return token;
> }
> }
> }
>
>
> int main (int argc, char** argv)
> {
> Scanner ss;
> int tok;
>
> scan_init(&ss, stdin);
>
> while ( 1 ) {
> tok = scan (&ss);
> if ( tok == TK_EOF ) {
> printf ("parser: EOF\n");
> break;
> }
> else if ( tok == TK_ERR ) {
> printf ("parser: ERR\n");
> break;
> }
> else {
> printf ("parser: %s(%d) \"", ss.token_name, tok);
> fwrite ( ss.data, 1, ss.len, stdout );
> printf ("\"\n" );
> }
> }
>
> return 0;
> }
>
>
>
More information about the ragel-users
mailing list