ragel and memory usage

Damir Nedzibovic dam... at gmail.com
Sat Jan 20 21:40:53 UTC 2007


Hi all,

I'm trying to build a grammar for parsing SIP messages (as in rfc3261),
but I have encountered a problem that I cannot solve - it seems that
I've hit a state explosion, since ragel just keeps allocating memory
and finally dies.

Here's my grammar (sorry for pasting it here like this)

--cut--
%%{
  machine sip_parser;

  action mark
  {
  }

  action req_i
  {
  }

  action req_a
  {
  }

  action req_o
  {
  }

  action req_b
  {
  }

  action req_c
  {
  }

  action req_r
  {
  }

  action create_req
  {
  }

  action request_method
  {
  }

  action uri_host
  {
  }

  action done
  {
    fbreak;
  }

  action uri_port
  {
  }

  action uri_pass
  {
  }

  action uri_user
  {
  }

  action uri_scheme
  {
  }

  action set_req_uri
  {
  }

  action add_param
  {
  }

  action add_param_value
  {
  }

  action uri_query
  {
  }

#### SIP PROTOCOL GRAMMAR

  CRLF = "\r\n";
  SP = " ";
  HTAB = "\t";
  DQUOTE = "\"";
  LHEX = digit | [a-f];

  alphanum = [a-zA-Z0-9];
  reserved = (";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" |
",");
  mark = ("-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")");
  unreserved = (alphanum | mark);
  escaped = ("%" xdigit xdigit);

  LWS = ((" "* CRLF)? " "+);
  SWS = (LWS?);

  HCOLON = ((" " | "\t")* ":" SWS);

  token = (alphanum | "-" | "." | "!" | "%" | "*" | "_" | "+" | "`" |
"'" | "~" )+;
  separators = ("(" | ")" | "<" | ">" | "@" | "," | ";" | ":" | "\\" |
DQUOTE | "/" | "[" | "]" | "?" | "=" | "{" | "}" | SP | HTAB);
  word = (alphanum | "-" | "." | "!" | "%" | "*" | "_" | "+" | "`" |
"'" | "~" | "(" | ")" | "<" | ">" | ":" | "\\" | DQUOTE | "/" | "[" |
"]" | "?" | "{" | "}")+;

  STAR   = (SWS "*" SWS);
  SLASH  = (SWS "/" SWS);
  EQUAL  = (SWS "=" SWS);
  LPAREN = (SWS "(" SWS);
  RPAREN = (SWS ")" SWS);
  RAQUOT = (">" SWS);
  LAQUOT = (SWS "<");
  COMMA  = (SWS "," SWS);
  SEMI   = (SWS ";" SWS);
  COLON  = (SWS ":" SWS);
  LDQUOT = (SWS DQUOTE);
  RDQUOT = (DQUOTE SWS);

  qdtext = (LWS | 0x21 | 0x23..0x5B | 0x5D..0x7E);
  quoted_pair = ("\\" (0x00..0x09 | 0x0B..0x0C | 0x0E..0x7F));
  quoted_string = (SWS DQUOTE (qdtext | quoted_pair)* DQUOTE);
  ctext    = (0x21..27 | 0x2A..0x5B | 0x5D..0x7E | LWS);
  comment_r = (LPAREN? (ctext | quoted_pair) RPAREN?);
  comment  = (LPAREN (comment_r)* RPAREN);

  port = digit+;
  IPv4address =  (digit{1,3} "." digit{1,3} "." digit{1,3} "."
digit{1,3});
  toplabel = (alpha | (alpha (alphanum | "-")* alphanum));
  domainlabel = (alphanum | (alphanum (alphanum | "-")* alphanum));
  hostname = ((domainlabel ".")* toplabel "."?);
  host = (hostname | IPv4address);
  hostport = (host >mark %uri_host (":" port >mark %uri_port)?);
  password = ((unreserved | escaped | "&" | "=" | "+" | "$" | "," )*);
  user_unreserved = ("&" | "=" | "+" | "$" | "," | ";" | "?" | "/");
  user = ((unreserved | escaped | user_unreserved)+);
  userinfo = (user >mark %uri_user (":" password >mark %uri_pass)?
"@");

  extension_method = token;
  Method = (("INVITE" %req_i | "ACK" %req_a | "OPTIONS" %req_o | "BYE"
%req_b | "CANCEL" %req_c | "REGISTER" %req_r | extension_method)) >mark
%request_method;

  hnv_unreserved = ("[" | "]" | "|" | "?" | ":" | "+" | "$");
  hvalue = ((hnv_unreserved | unreserved | escaped)*);
  hname = ((hnv_unreserved | unreserved | escaped)+);
  header = (hname "=" hvalue);
  headers = ("?" header ("&" header)*);
  param_unreserved = ("[" | "]" | "/" | ":" | "&" | "+" | "$");
  paramchar = (param_unreserved | unreserved | escaped);
  pvalue = (paramchar+);
  pname = (paramchar+);
  other_param = (pname >mark %add_param ("=" pvalue >mark
%add_param_value)?);
  lr_param = "lr" >mark %add_param;
  maddr_param = "maddr" >mark %add_param "=" host >mark
%add_param_value;
  method_param = "method" >mark %add_param "=" Method >mark
%add_param_value;
  ttl = (digit{1,3});
  ttl_param = "ttl" >mark %add_param "=" ttl >mark %add_param_value;
  other_user = token;
  user_param = "user" >mark %add_param "=" ("phone" | "ip" |
other_user) >mark %add_param_value;
  other_transport = token;
  transport_param = ("transport" >mark %add_param "=" ("udp" | "tcp" |
"sctp" | "tls" | other_transport) >mark %add_param_value);
  uri_parameter = (transport_param | user_param | method_param |
ttl_param | maddr_param | lr_param | other_param);
  uri_parameters = ((";" uri_parameter)*);
  SIP_URI = ("sip:" %{ m_data->m_uri.scheme() = "sip"; } (userinfo)?
hostport uri_parameters (headers)?);
  SIPS_URI = ("sips:" %{ m_data->m_uri.scheme() = "sips"; } (userinfo)?
hostport uri_parameters (headers)?);

  x_token = ("x-" token);
  ietf_token = token;
  extension_token = (ietf_token | x_token);
  composite_type = ("message" | "multipart" | extension_token);
  discrete_type = ("text" | "image" | "audio" | "video" | "application"
| extension_token);
  m_type = (discrete_type | composite_type);

  m_value = (token | quoted_string);
  m_attribute = token;
  m_parameter = (m_attribute EQUAL m_value);
  iana_token = token;
  m_subtype = (extension_token | iana_token);

  uric           = (reserved | unreserved | escaped);
  query          = (uric*);
  reg_name       = (unreserved | escaped | "$" | "," | ";" | ":" | "@"
| "&" | "=" | "+" )+;
  srvr           = ((userinfo "@")? hostport)?;
  authority      = (srvr | reg_name);
  scheme         = (alpha (alpha | digit | "+" | "-" | "." )*);
  pchar          = (unreserved | escaped | ":" | "@" | "&" | "=" | "+"
| "$" | ",");
  param          = (pchar*);
  segment        = (pchar* (";" param)*);
  path_segments  = (segment ("/" segment)*);
  uric_no_slash  = (unreserved | escaped | ";" | "?" | ":" | "@" | "&"
| "=" | "+" | "$" | ",");
  opaque_part    = (uric_no_slash uric*);
  abs_path       = ("/" path_segments);
  net_path       = ("//" authority (abs_path)?);
  hier_part      = ((net_path | abs_path) ("?" query >mark
%uri_query)?);
  absoluteURI    = (scheme >mark %uri_scheme ":" (hier_part |
opaque_part));

  gen_value = (token | host | quoted_string);
  generic_param = (token (EQUAL gen_value)?);
  qvalue = (("0" ("." digit{,3})?) | ("1" ("." ("0"){,3})?));
  accept_param = (("q" EQUAL qvalue) | generic_param);
  media_range = (("*" "/" "*" | ( m_type SLASH "*" ) | ( m_type SLASH
m_subtype )) (SEMI m_parameter)*);
  accept_range = (media_range (SEMI accept_param)*);
  Accept = ("Accept" HCOLON (accept_range (COMMA accept_range)*))?;

  content_coding   = (token);
  codings          = (content_coding | "*");
  encoding         = (codings (SEMI accept_param)*);
  Accept_Encoding  = ("Accept-Encoding" HCOLON (encoding (COMMA
encoding)*)?);

  language_range   = (((alpha{1,8} ("-" alpha{1,8})*) | "*" ));
  language         = (language_range (SEMI accept_param)*);
  Accept_Language  = ("Accept-Language" HCOLON (language (COMMA
language)*)?);

  alert_param      = (LAQUOT absoluteURI RAQUOT (SEMI generic_param
)*);
  Alert_Info       = ("Alert-Info" HCOLON alert_param (COMMA
alert_param)*);

  Allow            = ("Allow" HCOLON (Method (COMMA Method)*)?);

  opaque           = ("opaque" EQUAL quoted_string);
  algorithm        = ("algorithm" EQUAL ("MD5" | "MD5-sess" | token));
  realm_value      = (quoted_string);
  realm            = ("realm" EQUAL realm_value);
  auth_scheme      = (token);
  auth_param_name  = (token);
  auth_param       = (auth_param_name EQUAL (token | quoted_string));
  other_response   = (auth_scheme LWS auth_param (COMMA auth_param)*);
  request_digest   = (LDQUOT LHEX{32} RDQUOT);
  dresponse        = ("response" EQUAL request_digest);
  nonce_value      = (quoted_string);
  nonce            = ("nonce" EQUAL nonce_value);
  nc_value         = (LHEX{8});
  nonce_count      = ("nc" EQUAL nc_value);
  cnonce_value     = (nonce_value);
  cnonce           = ("cnonce" EQUAL cnonce_value);
  qop_value        = ("auth" | "auth-int" | token);
  message_qop      = ("qop" EQUAL qop_value);
  digest_uri_value = (absoluteURI); # fixme
  digest_uri       = ("uri" EQUAL LDQUOT digest_uri_value RDQUOT);
  username_value   = (quoted_string);
  username         = ("username" EQUAL username_value);
  dig_resp         = (username | realm | nonce | digest_uri | dresponse
| algorithm | cnonce | opaque | message_qop | nonce_count |
auth_param);
  digest_response  = (dig_resp (COMMA dig_resp)*);
  credentials      = (("Digest" LWS digest_response) | other_response);
  Authorization    = ("Authorization" HCOLON credentials);

  response_digest      = (LDQUOT LHEX* RDQUOT);
  response_auth        = ("rspauth" EQUAL response_digest);
  nextnonce            = ("nextnonce" EQUAL nonce_value);
  ainfo                = (nextnonce | message_qop | response_auth |
cnonce | nonce_count);
  Authentication_Info  = ("Authentication-Info" HCOLON ainfo (COMMA
ainfo)*);

  callid   =  (word ("@" word)?);
  Call_ID  = (( "Call-ID" | "i" ) HCOLON callid);

  info_param  = (("purpose" EQUAL ("icon" | "info" | "card" | token)) |
generic_param);
  info        = (LAQUOT absoluteURI RAQUOT (SEMI info_param)*);
  Call_Info   = ("Call-Info" HCOLON info (COMMA info)*);

  delta_seconds      = (digit+);
  contact_extension  = (generic_param);
  c_p_expires    = ("expires" EQUAL delta_seconds);
  c_p_q          = ("q" EQUAL qvalue);
  contact_params = (c_p_q | c_p_expires);
  display_name   = ((token LWS)* | quoted_string);
  addr_spec      = (SIP_URI | SIPS_URI | absoluteURI);
  name_addr      = ((display_name)? LAQUOT addr_spec RAQUOT);
  contact_param  = ((name_addr | addr_spec) (SEMI contact_params)*);
  Contact     = (("Contact" | "m" ) HCOLON (STAR | (contact_param
(COMMA contact_param)*)));

  disp_extension_token  = (token);
  other_handling        = (token);
  handling_param        = ("handling" EQUAL ( "optional" | "required" |
other_handling));
  disp_param            = (handling_param | generic_param);
  disp_type             = ("render" | "session" | "icon" | "alert" |
disp_extension_token);
  Content_Disposition   = ("Content-Disposition" HCOLON disp_type (SEMI
disp_param)*);

  Content_Encoding  = (("Content-Encoding" | "e" ) HCOLON
content_coding (COMMA content_coding)*);

  subtag            = (alpha{1,8});
  primary_tag       = (alpha{1,8});
  language_tag      = (primary_tag ("-" subtag)*);
  Content_Language  = ("Content-Language" HCOLON language_tag (COMMA
language_tag)*);

  Content_Length    = (("Content-Length" | "l") HCOLON digit+);

  media_type        = (m_type SLASH m_subtype (SEMI m_parameter)*);
  Content_Type      = (("Content-Type" | "c" ) HCOLON media_type);

  CSeq = ("CSeq" HCOLON digit+ LWS Method);

  message_header = (Accept |
                    Accept_Encoding |
                    Accept_Language |
                    Alert_Info |
                    Allow |
                    Authentication_Info |
                    Authorization |
                    Call_ID |
                    Call_Info |
                    Contact |
                    Content_Disposition |
                    Content_Encoding |
                    Content_Language |
                    Content_Length |
                    Content_Type |
                    CSeq) CRLF;

  SIP_Version    = "SIP" "/" digit "." digit;
  Request_URI    = SIP_URI | SIPS_URI | absoluteURI;
  Request_Line   = Method %create_req SP Request_URI %set_req_uri SP
SIP_Version CRLF;
  Request        = Request_Line (message_header)* CRLF;

main := Request;
}%%

%% write data;
--cut--

I think that problems is somewhere around this line:

hier_part      = ((net_path | abs_path) ("?" query >mark %uri_query)?);

if I remove the ">mark %uri_query" part, ragel is able to complete the
operation.
Can anyone give me some clues what's gone bad?

tia,
d



More information about the ragel-users mailing list