[ragel-users] ragel and memory usage
Adrian Thurston
thurs... at cs.queensu.ca
Sun Jan 21 06:00:15 UTC 2007
Hi, sorry I didn't notice the message text at the bottom. I'll have a look
at it to find out why it fails when adding the actions mentioned.
-Adrian
> Hi all,
>
> I'm trying to build a grammar for parsing SIP messages (as in rfc3261),
> but I have encountered a problem that I cannot solve - it seems that
> I've hit a state explosion, since ragel just keeps allocating memory
> and finally dies.
>
> Here's my grammar (sorry for pasting it here like this)
>
> --cut--
> %%{
> machine sip_parser;
>
> action mark
> {
> }
>
> action req_i
> {
> }
>
> action req_a
> {
> }
>
> action req_o
> {
> }
>
> action req_b
> {
> }
>
> action req_c
> {
> }
>
> action req_r
> {
> }
>
> action create_req
> {
> }
>
> action request_method
> {
> }
>
> action uri_host
> {
> }
>
> action done
> {
> fbreak;
> }
>
> action uri_port
> {
> }
>
> action uri_pass
> {
> }
>
> action uri_user
> {
> }
>
> action uri_scheme
> {
> }
>
> action set_req_uri
> {
> }
>
> action add_param
> {
> }
>
> action add_param_value
> {
> }
>
> action uri_query
> {
> }
>
> #### SIP PROTOCOL GRAMMAR
>
> CRLF = "\r\n";
> SP = " ";
> HTAB = "\t";
> DQUOTE = "\"";
> LHEX = digit | [a-f];
>
> alphanum = [a-zA-Z0-9];
> reserved = (";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" |
> ",");
> mark = ("-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")");
> unreserved = (alphanum | mark);
> escaped = ("%" xdigit xdigit);
>
> LWS = ((" "* CRLF)? " "+);
> SWS = (LWS?);
>
> HCOLON = ((" " | "\t")* ":" SWS);
>
> token = (alphanum | "-" | "." | "!" | "%" | "*" | "_" | "+" | "`" |
> "'" | "~" )+;
> separators = ("(" | ")" | "<" | ">" | "@" | "," | ";" | ":" | "\\" |
> DQUOTE | "/" | "[" | "]" | "?" | "=" | "{" | "}" | SP | HTAB);
> word = (alphanum | "-" | "." | "!" | "%" | "*" | "_" | "+" | "`" |
> "'" | "~" | "(" | ")" | "<" | ">" | ":" | "\\" | DQUOTE | "/" | "[" |
> "]" | "?" | "{" | "}")+;
>
> STAR = (SWS "*" SWS);
> SLASH = (SWS "/" SWS);
> EQUAL = (SWS "=" SWS);
> LPAREN = (SWS "(" SWS);
> RPAREN = (SWS ")" SWS);
> RAQUOT = (">" SWS);
> LAQUOT = (SWS "<");
> COMMA = (SWS "," SWS);
> SEMI = (SWS ";" SWS);
> COLON = (SWS ":" SWS);
> LDQUOT = (SWS DQUOTE);
> RDQUOT = (DQUOTE SWS);
>
> qdtext = (LWS | 0x21 | 0x23..0x5B | 0x5D..0x7E);
> quoted_pair = ("\\" (0x00..0x09 | 0x0B..0x0C | 0x0E..0x7F));
> quoted_string = (SWS DQUOTE (qdtext | quoted_pair)* DQUOTE);
> ctext = (0x21..27 | 0x2A..0x5B | 0x5D..0x7E | LWS);
> comment_r = (LPAREN? (ctext | quoted_pair) RPAREN?);
> comment = (LPAREN (comment_r)* RPAREN);
>
> port = digit+;
> IPv4address = (digit{1,3} "." digit{1,3} "." digit{1,3} "."
> digit{1,3});
> toplabel = (alpha | (alpha (alphanum | "-")* alphanum));
> domainlabel = (alphanum | (alphanum (alphanum | "-")* alphanum));
> hostname = ((domainlabel ".")* toplabel "."?);
> host = (hostname | IPv4address);
> hostport = (host >mark %uri_host (":" port >mark %uri_port)?);
> password = ((unreserved | escaped | "&" | "=" | "+" | "$" | "," )*);
> user_unreserved = ("&" | "=" | "+" | "$" | "," | ";" | "?" | "/");
> user = ((unreserved | escaped | user_unreserved)+);
> userinfo = (user >mark %uri_user (":" password >mark %uri_pass)?
> "@");
>
> extension_method = token;
> Method = (("INVITE" %req_i | "ACK" %req_a | "OPTIONS" %req_o | "BYE"
> %req_b | "CANCEL" %req_c | "REGISTER" %req_r | extension_method)) >mark
> %request_method;
>
> hnv_unreserved = ("[" | "]" | "|" | "?" | ":" | "+" | "$");
> hvalue = ((hnv_unreserved | unreserved | escaped)*);
> hname = ((hnv_unreserved | unreserved | escaped)+);
> header = (hname "=" hvalue);
> headers = ("?" header ("&" header)*);
> param_unreserved = ("[" | "]" | "/" | ":" | "&" | "+" | "$");
> paramchar = (param_unreserved | unreserved | escaped);
> pvalue = (paramchar+);
> pname = (paramchar+);
> other_param = (pname >mark %add_param ("=" pvalue >mark
> %add_param_value)?);
> lr_param = "lr" >mark %add_param;
> maddr_param = "maddr" >mark %add_param "=" host >mark
> %add_param_value;
> method_param = "method" >mark %add_param "=" Method >mark
> %add_param_value;
> ttl = (digit{1,3});
> ttl_param = "ttl" >mark %add_param "=" ttl >mark %add_param_value;
> other_user = token;
> user_param = "user" >mark %add_param "=" ("phone" | "ip" |
> other_user) >mark %add_param_value;
> other_transport = token;
> transport_param = ("transport" >mark %add_param "=" ("udp" | "tcp" |
> "sctp" | "tls" | other_transport) >mark %add_param_value);
> uri_parameter = (transport_param | user_param | method_param |
> ttl_param | maddr_param | lr_param | other_param);
> uri_parameters = ((";" uri_parameter)*);
> SIP_URI = ("sip:" %{ m_data->m_uri.scheme() = "sip"; } (userinfo)?
> hostport uri_parameters (headers)?);
> SIPS_URI = ("sips:" %{ m_data->m_uri.scheme() = "sips"; } (userinfo)?
> hostport uri_parameters (headers)?);
>
> x_token = ("x-" token);
> ietf_token = token;
> extension_token = (ietf_token | x_token);
> composite_type = ("message" | "multipart" | extension_token);
> discrete_type = ("text" | "image" | "audio" | "video" | "application"
> | extension_token);
> m_type = (discrete_type | composite_type);
>
> m_value = (token | quoted_string);
> m_attribute = token;
> m_parameter = (m_attribute EQUAL m_value);
> iana_token = token;
> m_subtype = (extension_token | iana_token);
>
> uric = (reserved | unreserved | escaped);
> query = (uric*);
> reg_name = (unreserved | escaped | "$" | "," | ";" | ":" | "@"
> | "&" | "=" | "+" )+;
> srvr = ((userinfo "@")? hostport)?;
> authority = (srvr | reg_name);
> scheme = (alpha (alpha | digit | "+" | "-" | "." )*);
> pchar = (unreserved | escaped | ":" | "@" | "&" | "=" | "+"
> | "$" | ",");
> param = (pchar*);
> segment = (pchar* (";" param)*);
> path_segments = (segment ("/" segment)*);
> uric_no_slash = (unreserved | escaped | ";" | "?" | ":" | "@" | "&"
> | "=" | "+" | "$" | ",");
> opaque_part = (uric_no_slash uric*);
> abs_path = ("/" path_segments);
> net_path = ("//" authority (abs_path)?);
> hier_part = ((net_path | abs_path) ("?" query >mark
> %uri_query)?);
> absoluteURI = (scheme >mark %uri_scheme ":" (hier_part |
> opaque_part));
>
> gen_value = (token | host | quoted_string);
> generic_param = (token (EQUAL gen_value)?);
> qvalue = (("0" ("." digit{,3})?) | ("1" ("." ("0"){,3})?));
> accept_param = (("q" EQUAL qvalue) | generic_param);
> media_range = (("*" "/" "*" | ( m_type SLASH "*" ) | ( m_type SLASH
> m_subtype )) (SEMI m_parameter)*);
> accept_range = (media_range (SEMI accept_param)*);
> Accept = ("Accept" HCOLON (accept_range (COMMA accept_range)*))?;
>
> content_coding = (token);
> codings = (content_coding | "*");
> encoding = (codings (SEMI accept_param)*);
> Accept_Encoding = ("Accept-Encoding" HCOLON (encoding (COMMA
> encoding)*)?);
>
> language_range = (((alpha{1,8} ("-" alpha{1,8})*) | "*" ));
> language = (language_range (SEMI accept_param)*);
> Accept_Language = ("Accept-Language" HCOLON (language (COMMA
> language)*)?);
>
> alert_param = (LAQUOT absoluteURI RAQUOT (SEMI generic_param
> )*);
> Alert_Info = ("Alert-Info" HCOLON alert_param (COMMA
> alert_param)*);
>
> Allow = ("Allow" HCOLON (Method (COMMA Method)*)?);
>
> opaque = ("opaque" EQUAL quoted_string);
> algorithm = ("algorithm" EQUAL ("MD5" | "MD5-sess" | token));
> realm_value = (quoted_string);
> realm = ("realm" EQUAL realm_value);
> auth_scheme = (token);
> auth_param_name = (token);
> auth_param = (auth_param_name EQUAL (token | quoted_string));
> other_response = (auth_scheme LWS auth_param (COMMA auth_param)*);
> request_digest = (LDQUOT LHEX{32} RDQUOT);
> dresponse = ("response" EQUAL request_digest);
> nonce_value = (quoted_string);
> nonce = ("nonce" EQUAL nonce_value);
> nc_value = (LHEX{8});
> nonce_count = ("nc" EQUAL nc_value);
> cnonce_value = (nonce_value);
> cnonce = ("cnonce" EQUAL cnonce_value);
> qop_value = ("auth" | "auth-int" | token);
> message_qop = ("qop" EQUAL qop_value);
> digest_uri_value = (absoluteURI); # fixme
> digest_uri = ("uri" EQUAL LDQUOT digest_uri_value RDQUOT);
> username_value = (quoted_string);
> username = ("username" EQUAL username_value);
> dig_resp = (username | realm | nonce | digest_uri | dresponse
> | algorithm | cnonce | opaque | message_qop | nonce_count |
> auth_param);
> digest_response = (dig_resp (COMMA dig_resp)*);
> credentials = (("Digest" LWS digest_response) | other_response);
> Authorization = ("Authorization" HCOLON credentials);
>
> response_digest = (LDQUOT LHEX* RDQUOT);
> response_auth = ("rspauth" EQUAL response_digest);
> nextnonce = ("nextnonce" EQUAL nonce_value);
> ainfo = (nextnonce | message_qop | response_auth |
> cnonce | nonce_count);
> Authentication_Info = ("Authentication-Info" HCOLON ainfo (COMMA
> ainfo)*);
>
> callid = (word ("@" word)?);
> Call_ID = (( "Call-ID" | "i" ) HCOLON callid);
>
> info_param = (("purpose" EQUAL ("icon" | "info" | "card" | token)) |
> generic_param);
> info = (LAQUOT absoluteURI RAQUOT (SEMI info_param)*);
> Call_Info = ("Call-Info" HCOLON info (COMMA info)*);
>
> delta_seconds = (digit+);
> contact_extension = (generic_param);
> c_p_expires = ("expires" EQUAL delta_seconds);
> c_p_q = ("q" EQUAL qvalue);
> contact_params = (c_p_q | c_p_expires);
> display_name = ((token LWS)* | quoted_string);
> addr_spec = (SIP_URI | SIPS_URI | absoluteURI);
> name_addr = ((display_name)? LAQUOT addr_spec RAQUOT);
> contact_param = ((name_addr | addr_spec) (SEMI contact_params)*);
> Contact = (("Contact" | "m" ) HCOLON (STAR | (contact_param
> (COMMA contact_param)*)));
>
> disp_extension_token = (token);
> other_handling = (token);
> handling_param = ("handling" EQUAL ( "optional" | "required" |
> other_handling));
> disp_param = (handling_param | generic_param);
> disp_type = ("render" | "session" | "icon" | "alert" |
> disp_extension_token);
> Content_Disposition = ("Content-Disposition" HCOLON disp_type (SEMI
> disp_param)*);
>
> Content_Encoding = (("Content-Encoding" | "e" ) HCOLON
> content_coding (COMMA content_coding)*);
>
> subtag = (alpha{1,8});
> primary_tag = (alpha{1,8});
> language_tag = (primary_tag ("-" subtag)*);
> Content_Language = ("Content-Language" HCOLON language_tag (COMMA
> language_tag)*);
>
> Content_Length = (("Content-Length" | "l") HCOLON digit+);
>
> media_type = (m_type SLASH m_subtype (SEMI m_parameter)*);
> Content_Type = (("Content-Type" | "c" ) HCOLON media_type);
>
> CSeq = ("CSeq" HCOLON digit+ LWS Method);
>
> message_header = (Accept |
> Accept_Encoding |
> Accept_Language |
> Alert_Info |
> Allow |
> Authentication_Info |
> Authorization |
> Call_ID |
> Call_Info |
> Contact |
> Content_Disposition |
> Content_Encoding |
> Content_Language |
> Content_Length |
> Content_Type |
> CSeq) CRLF;
>
> SIP_Version = "SIP" "/" digit "." digit;
> Request_URI = SIP_URI | SIPS_URI | absoluteURI;
> Request_Line = Method %create_req SP Request_URI %set_req_uri SP
> SIP_Version CRLF;
> Request = Request_Line (message_header)* CRLF;
>
> main := Request;
> }%%
>
> %% write data;
> --cut--
>
> I think that problems is somewhere around this line:
>
> hier_part = ((net_path | abs_path) ("?" query >mark %uri_query)?);
>
> if I remove the ">mark %uri_query" part, ragel is able to complete the
> operation.
> Can anyone give me some clues what's gone bad?
>
> tia,
> d
>
>
> >
More information about the ragel-users
mailing list