can't pull parse strings - BUG?

Jason jason2... at jasonjobe.com
Mon Feb 5 17:51:18 UTC 2007


I've finally gotten a little time to start working with the new
pullscan example.

Everything was going well until I tried to add string tokens. The rl
(below) works as is but un-comment the String rules and all hell
breaks loose.

Any pointers would be most welcome.

---      TEST input file -------
[target go: 124 with:(1..9) num:1,298,99 int:87 float:1,299.987]

(list "some string")
(list 'some other string')

-------------- mod_pullscan.rl ------------
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#define BUFSIZE 4096

typedef struct _Scanner {
	/* Scanner state. */
    int cs;
    int act;
    int have;
    int curline;
    char *tokstart;
    char *tokend;
    char *p;
    char *pe;
	FILE *file;
	int done;

	/* Token data */
	char *data;
	int len;
    int value;
	char *token_name;
	char buf[BUFSIZE];
} Scanner;


void scan_init( Scanner *s, FILE *file )
{
	memset (s, '\0', sizeof(Scanner));
	s->curline = 1;
	s->file = file;
}

#define TK_NO_TOKEN (-1)
#define TK_ERR 128
#define TK_EOF 129
#define TK_Identifier 130
#define TK_Number 131
#define TK_Keyword 132
#define TK_String 133


%%{
	machine Scanner;
	write data;
}%%

#define ret_tok( _tok ) token = _tok; s->data = s->tokstart; s-
>token_name = #_tok
#define ret_char( _tok ) token = _tok; s->data = s->tokstart; s-
>token_name = "TK_Char"

int scan( Scanner *s )
{
	char *p = s->p;
	char *pe = s->pe;
	int token = TK_NO_TOKEN;
	int space, readlen;

	while ( 1 ) {
		if ( p == pe ) {
			printf("scanner: need more data\n");

			if ( s->tokstart == 0 )
				s->have = 0;
			else {
				/* There is data that needs to be shifted over. */
				printf("scanner: buffer broken mid token\n");
				s->have = pe - s->tokstart;
				memmove( s->buf, s->tokstart, s->have );
				s->tokend -= (s->tokstart-s->buf);
				s->tokstart = s->buf;
			}

			p = s->buf + s->have;
			space = BUFSIZE - s->have;

			if ( space == 0 ) {
				/* We filled up the buffer trying to scan a token. */
				printf("scanner: out of buffer space\n");
				return TK_ERR;
			}

			if ( s->done ) {
				printf("scanner: end of file\n");
				p[0] = 0;
				readlen = 1;
			}
			else {
				readlen = fread( p, 1, space, s->file );
				if ( readlen < space )
					s->done = 1;
			}

			pe = p + readlen;
		}

		%%{
			machine Scanner;
			access s->;

			newline = '\n' @{s->curline += 1;};
			any_count_line = any | newline;

			main := |*

			# Alpha numberic characters or underscore.
			alnum_u = alnum | '_';

			# Alpha charactres or underscore.
			alpha_u = alpha | '_';

			ident = alpha_u alnum_u*;

			# Identifiers
			ident =>
				{ ret_tok( TK_Identifier ); fbreak; };

			# Keywords
			ident ':' => {
				ret_tok (TK_Keyword); fbreak;
			};

	# Strings
#			"'" ( [^'\\] | /\\./ )* "'" => { ret_tok (TK_String); };
#			'"' ( [^"\\] | /\\./ )* '"' => { ret_tok (TK_String); };

			# Whitespace
			[ \t\n];

			# Number
			digit+ =>
				{ ret_tok( TK_Number ); fbreak; };

			# EOF
			0 =>
				{ ret_tok( TK_EOF ); fbreak; };

			# Anything else
			any =>
				{ ret_char( *p ); fbreak; };

			*|;

			write exec;
		}%%

		if ( s->cs == Scanner_error )
			return TK_ERR;

		if ( token != TK_NO_TOKEN ) {
			/* Save p and pe. fbreak does not advance p. */
			s->p = p + 1;
			s->pe = pe;
			s->len = s->p - s->data;
			return token;
		}
	}
}


int main (int argc, char** argv)
{
	Scanner ss;
	int tok;

	scan_init(&ss, stdin);

	while ( 1 ) {
		tok = scan (&ss);
		if ( tok == TK_EOF ) {
			printf ("parser: EOF\n");
			break;
		}
		else if ( tok == TK_ERR ) {
			printf ("parser: ERR\n");
			break;
		}
		else {
			printf ("parser: %s(%d) \"", ss.token_name, tok);
			fwrite ( ss.data, 1, ss.len, stdout );
			printf ("\"\n" );
		}
	}

	return 0;
}



More information about the ragel-users mailing list