(unexpected) problem with accented chars
jupp_g
jup... at hotmail.de
Thu Jan 10 22:05:40 UTC 2008
If you feed the following console app with chars like [ÄÖÜäöüß] they
are not accepted, which is somewhat unexpected (for me). The dot
output looks fine with proper states and transitions. The problem is
definetely not related to the code page switching, but I didn't delete
the code to enable showing the chars on the console (cmd.exe defaults
to code page 850, which doesn't have some of the chars I want). If I
use the "all" machine things work as expected.
Any hints?
#ifdef _WIN32
#define WIN32_LEAN_AND_MEAN
#include <windows.h>
#endif
#include <stdio.h>
#define BUF_LEN 128
%%{
machine cp1252;
alphtype unsigned char;
action finish_line {
++count;
res = (cs >= cp1252_first_final);
printf( "==> \"%s\"\n", line );
}
action start_line {
line_idx = 0;
}
action add_char {
line[line_idx++] = fc;
}
action end_line {
line[line_idx] = '\0';
}
more = (0xC0..0xFF);
what_i_want = ('0'..'9')|('A'..'Z')|('a'..'z')|(0xC0..0xFF);
all = (0x30..0xFF);
# fails:
main := ( ( (alnum|more)+ > start_line $ add_char %
end_line ) ) 0 @ finish_line;
# fails: main := ( ( (what_i_want)+ > start_line $ add_char %
end_line ) ) 0 @ finish_line;
# only almost what I want, but works:
#main := ( ( (all)+ > start_line $ add_char % end_line ) ) 0 @
finish_line;
}%%
%% write data;
int main( int argc, char* argv[] )
{
#ifdef _WIN32
SetConsoleCP( 1252 );
SetConsoleOutputCP( 1252 );
#endif
for ( int i = 1; i < argc; ++i ) {
int cs;
char *p = argv[i];
char *pe = p + strlen(p) + 1;
int res = 0;
int count = 0;
char line[BUF_LEN];
int line_idx = -1;
line[0] = '\0';
printf( "argv[%d] = \"%s\"\n", i, p );
%% write init;
%% write exec;
printf( "\tresult = %d (count=%d)\n", res, count );
}
return 0;
}
More information about the ragel-users
mailing list