Simple URL parser
hsanson
hsan... at gmail.com
Fri May 23 06:10:47 UTC 2008
To learn how to use Ragel I am implementing a simple URL parser that
receives something like "http://www.ragel.com:8080/file.txt" and
returns each part (scheme, hostname, port, path) as strings. As I
understand doing this with Ragel should be a breeze.
Still there is something I am not getting right and would like some
advice, see code below:
The scheme part seems to work so I assume my understanding of Ragel is
not that bad. The problem is with the hostname and port parts. The
hostname action gets called for each character on the hostname, that
is not the intended behavior and the port action never gets called.
Any tips to take me back on track would be greatly appreciated.
Horacio
//###################################
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
typedef struct {
char *scheme;
char *hostname;
char *service;
char *path;
char *uri;
} suj_url;
%%{
machine uri_parser;
# Actions
action mark_start {
start = fpc;
printf("Mark start at %c\n", fc);
}
action scheme {
size_t len = fpc - start + 1;
url->scheme = calloc(len,sizeof(char));
strncpy(url->scheme, start, len);
url->scheme[len]='\0';
printf("scheme: %s\n",url->scheme);
}
action host {
size_t len = fpc - start + 1;
url->hostname = calloc(len,sizeof(char));
strncpy(url->hostname, start, len);
url->hostname[len]='\0';
printf("host: %s\n",url->hostname);
}
action port {
size_t len = fpc - start + 1;
url->service = calloc(len,sizeof(char));
strncpy(url->service, start, len);
url->service[len]='\0';
printf("service: %s\n",url->service);
}
# Grammar
escaped = ("%" xdigit xdigit);
scheme = ("http"i | "rtsp"i | "rtp"i) >mark_start @scheme;
port = (":" digit+) >mark_start %port;
host = (any* -- ("/" | ":")) >mark_start @host;
uri = (scheme "://" host port ) . '\0';
# Main
main := uri;
}%%
%%write data;
suj_url * suj_url_new(char *uri)
{
suj_url *url;
char *start;
char *end;
int cs;
%% write init;
char *p = uri;
char *pe = p + strlen(uri);
url = calloc(1,sizeof(url));
url->uri = calloc(strlen(uri),sizeof(char));
strncpy(url->uri,uri, strlen(uri));
%% write exec;
return url;
}
int main(int argc, char **argv)
{
suj_url *url;
url = suj_url_new("rtp://www.ragel.org:8080");
}
More information about the ragel-users
mailing list