[ragel-users] parser getting gigantic ?
M P
buserror at gmail.com
Thu Sep 2 14:41:36 UTC 2010
I decided to play with Ragel by making a parser for an extended
version of JSON.
After falling into every single pitfall possible, I managed to get the
whole thing working pretty well, but I just had a look at the
generated code and it reached 600KB total, using -G1 (!)
So, what am I doing wrong ? I'm certain there's probably a lot of
stuff wrong with the way I made the parser, but really, I'm not sure
what so having experts eyeballing this would be very nice...
There are 2 different json parsers in the file; one is for the string
constants, one (the main one) is for the language proper...
/*
* IF YOU ARE LOOKING AT A .c FILE, YOUR ARE LOOKING AT THE WRONG ONE
*
* This file is autogenerated from a .rl source file for 'ragel'
parser generator.
*/
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include "json.h"
%%{
machine json_str;
write data;
}%%
static char * json_append_utf8_glyph(
char * dst,
unsigned long inUnicode )
{
if (!(inUnicode & ~0x7f)) {
*dst++ = ((char)inUnicode);
return dst; // that was easy
}
unsigned char *cur = dst;
unsigned long currentMask = ~0x7ff;
int bits = 6;
int header = 5;
while ((inUnicode & currentMask) && bits <= 24) {
currentMask = currentMask << 6;
bits += 6; header--;
}
*cur++ = (0xfe << header) | (unsigned char)(inUnicode >> (bits));
bits -= 6;
while (bits >= 0) {
*cur++ = 0x80 | ((unsigned char)(inUnicode >> bits) & 0x3f);
bits -= 6;
}
return cur;
}
int json_parse_string(char * str, char *end, char * out)
{
char *p = str, *pe = end ? end : str + strlen( str ), *eof = pe;
int cs;
out = out ? out : str;
uint16_t u;
%%{
machine json_str;
xxdigit = (
([0-9] @{ u = (u << 4) | fc - '0'; }) |
([a-f] @{ u = (u << 4) | fc - 'a' + 0xa; }) |
([A-F] @{ u = (u << 4) | fc - 'A' + 0xa; })
);
utf16 = ( xxdigit{4} ) >{ u = 0; } @{ out = json_append_utf8_glyph(out, u); };
normal = any @{*out++ = fc;};
escape =
('\\' %{ *out++ = '\\'; } ) |
('t' %{ *out++ = '\t'; } ) |
('b' %{ *out++ = '\b'; } ) |
('f' %{ *out++ = '\f'; } ) |
('n' %{ *out++ = '\n'; } ) |
('r' %{ *out++ = '\r'; } ) |
('u' utf16 ) |
( normal -- [\\tbfntu] )
;
main := (
('\\' escape) |
( normal -- '\\' )
)*;
# Initialize and execute.
write init;
write exec;
}%%
*out = 0;
return 0;
}
%%{
machine json;
write data;
}%%
int json_parse( json_driver_t *d, char * str )
{
char *p = str, *pe = str + strlen( str ), *eof = pe;
int cs;
int stack[32], top = 0;
int integer_sign; // for integer decode
char * float_start;
json_driver_value_t v;
uint32_t b64;
int b64_cnt;
%%{
machine json;
action obj_field_list_start { d->open_object(d); }
action obj_field_list_done { d->close_object(d); }
action obj_value_list_start { d->open_array(d); }
action obj_value_list_done { d->close_array(d); }
action obj_create_name { d->set_name(d, &v); }
action obj_set_flag { if (d->add_flag) d->add_flag(d, &v); }
action obj_set_string { d->set_value(d, json_driver_type_string, &v); }
action obj_set_integer { d->set_value(d, json_driver_type_integer, &v); }
action obj_set_float { d->set_value(d, json_driver_type_float, &v); }
action obj_set_hex { d->set_value(d, json_driver_type_hex, &v); }
action obj_set_true { v.u.v_bool = 1; d->set_value(d,
json_driver_type_bool, &v); }
action obj_set_false { v.u.v_bool = 0; d->set_value(d,
json_driver_type_bool, &v); }
action obj_set_null { d->set_value(d, json_driver_type_null, NULL); }
action obj_start_data { if (d->open_data) d->open_data(d); }
action obj_flush_data { if (d->add_data) for (int s=16,i = 0;
i<b64_cnt; i++,s-=8) d->add_data(d, (b64 >> s) & 0xff); }
action obj_end_data { if (d->close_data) d->close_data(d); }
W = [ \t\n]**;
#
# quoted or unquoted string
#
action str_init { v.u.v_str.start = v.u.v_str.end = fpc; }
action str_done { v.u.v_str.end = fpc; }
string = '"' ((([^"] | '\"')*) >str_init %str_done) '"';
ident = ((alpha | '_') (alnum | '_')*) >str_init %str_done;
#
# negative/positive Integer
#
action integer_init { v.u.v_int = 0; integer_sign = 1; }
action integer_minus { integer_sign = -1; }
action integer_digit { v.u.v_int = (v.u.v_int * 10) + (fc - '0'); }
action integer_done { v.u.v_int *= integer_sign; }
integer = (('-' @integer_minus | '+')? (digit+ @integer_digit))
>integer_init %integer_done;
#
# hex integer
#
xxdigit = (
([0-9] @{ v.u.v_int = (v.u.v_int << 4) | fc - '0'; }) |
([a-f] @{ v.u.v_int = (v.u.v_int << 4) | fc - 'a' + 0xa; }) |
([A-F] @{ v.u.v_int = (v.u.v_int << 4) | fc - 'A' + 0xa; })
);
hex = (('-' @integer_minus | '+')?( '0x' xxdigit+))
>integer_init %integer_done;
#
# float/double value
#
action float_init { float_start = fpc; }
action float_done { sscanf(float_start, "%lf", &v.u.v_float); }
#
# float values
#
float = (
('-' | '+')? digit* '.' digit+ [fd]?
) >float_init %float_done;
#
# base64 decoder
#
base64_char = (
([A-Z] @{ b64 = (b64 << 6) | (fc - 'A'); }) |
([a-z] @{ b64 = (b64 << 6) | (fc - 'a' + 26 ); }) |
([0-9] @{ b64 = (b64 << 6) | (fc - '0' + 52 ); }) |
('+' @{ b64 = (b64 << 6) | 62; }) |
('/' @{ b64 = (b64 << 6) | 63; })
);
base64_pad = '=' @{ b64 = (b64 << 6); };
base64_four = (
base64_char base64_char base64_char base64_char
) %{ b64_cnt = 3; } %obj_flush_data;
base64_padder = (
base64_char base64_char
(
(( base64_char base64_pad )
%{ b64_cnt = 2; } ) |
(( base64_pad base64_pad )
%{ b64_cnt = 1; } )
)
) %obj_flush_data;
base64 = ( base64_four** (base64_four | base64_padder) ) >{b64 = 0;}
%err{ printf("### base64 Error : '%s'\n", p); };
#
# JSON value, extended
#
json_value = (
(string %obj_set_string) |
(integer %obj_set_integer) |
(hex %obj_set_hex ) |
(float %obj_set_float) |
('true' %obj_set_true) |
('false' %obj_set_false) |
('null' %obj_set_null) |
('{' @{ fhold; fcall obj_field_list; } ) |
('[' @{ fhold; fcall json_value_list; } ) |
(('%' (W base64)* W '%') >obj_start_data %obj_end_data)
);
json_value_list := (
'[' (
'' |
(W json_value (W ',' W json_value)* )
) W ','? W ']'
) >obj_value_list_start @obj_value_list_done @{ fret; }
%err{ printf("### Array[%d] Error : '%s'\n", top, p); };
obj_field_flag = ( ident ) %obj_set_flag;
obj_field_flags = (
'(' W obj_field_flag (W ',' W obj_field_flag)** ')'
);
obj_field = ((string | ident) %obj_create_name) W obj_field_flags? W
':' W json_value;
obj_field_list := (
'{' (
'' |
(W obj_field (W ',' W obj_field)** )
) W ','? W '}'
) >obj_field_list_start @obj_field_list_done @{ fret; }
%err{ printf("### Object[%d] Error : '%s'\n", top, p); };
main := (
W json_value
) %err{ printf("### JSON Error : '%s'\n", p); };
# Initialize and execute.
write init;
write exec;
}%%
return 0;
};
#ifdef JSON_TEST_UNIT
static void d_set_name(struct json_driver_t *d,
json_driver_value_t * v)
{
int l = v->u.v_str.end - v->u.v_str.start;
printf("\"%*.*s\": ", l, l, v->u.v_str.start);
}
static void d_open_array(struct json_driver_t *d)
{
printf("[");fflush(stdout);
}
static void d_open_object(struct json_driver_t *d)
{
printf("{");fflush(stdout);
}
static void d_set_value(struct json_driver_t *d,
int type,
json_driver_value_t * v)
{
switch (type) {
case json_driver_type_null:
printf("null, ");
break;
case json_driver_type_bool:
printf("%s, ", v->u.v_bool ? "true" : "false");
break;
case json_driver_type_integer:
printf("%d, ", (int)v->u.v_int);
break;
case json_driver_type_hex:
printf("0x%x, ", (int)v->u.v_int);
break;
case json_driver_type_float:
printf("%f, ", (float)v->u.v_float);
break;
case json_driver_type_string: {
char buf[256];
json_parse_string(v->u.v_str.start, v->u.v_str.end, buf);
printf("\"%s\": ", buf);
} break;
}
fflush(stdout);
}
static void d_close_array(struct json_driver_t *d)
{
printf("],");fflush(stdout);
}
static void d_close_object(struct json_driver_t *d)
{
printf("},");fflush(stdout);
}
static void d_open_data(struct json_driver_t *d)
{
printf("%% '");fflush(stdout);
}
static void d_add_data(struct json_driver_t *d, uint8_t data)
{
printf("%c",data);fflush(stdout);
}
static void d_close_data(struct json_driver_t *d)
{
printf("' %%,");fflush(stdout);
}
json_driver_t driver = {
.set_name = d_set_name,
.open_array = d_open_array,
.open_object = d_open_object,
.set_value = d_set_value,
.close_array = d_close_array,
.close_object = d_close_object,
.open_data = d_open_data,
.add_data = d_add_data,
.close_data = d_close_data,
};
int main(int argc, char * argv[])
{
for (int i = 1; i < argc; i++) {
printf("### parsing '%s'\n", argv[i]);
json_parse(&driver, argv[i]);printf("\n");
}
return 0;
}
#endif
_______________________________________________
ragel-users mailing list
ragel-users at complang.org
http://www.complang.org/mailman/listinfo/ragel-users
More information about the ragel-users
mailing list