%{ #include "context.h" #include "libformatmanglingdextype_la-type_gram.h" /* See lemoda.net/c/reentrant-parser */ %} %option noyywrap %option yylineno %option nounput /*%option noinput*/ %option reentrant %option bison-bridge %x string ASCII [A-Za-z0-9] SIMPLE {ASCII}|"$"|"-"|"_" %% "V" { return V; } "Z" { return Z; } "B" { return B; } "S" { return S; } "C" { return C; } "I" { return I; } "J" { return J; } "F" { return F; } "D" { return D; } "L" { BEGIN(string); return L; } "["* { yylval->adeep = strlen(yytext); return ARRAY; } "/" { return SLASH; } ";" { BEGIN(INITIAL); return SEMICOLON; } {SIMPLE}* { yylval->text = yytext; return TEXT; } . { unsigned char next; char mutf8[4]; switch ((unsigned char)yytext[0]) { /* U+00a1 ... U+1fff */ case 0x00 ... 0x1f: next = input(yyscanner); if (yytext[0] == 0x00 && next < 0xa1) { REJECT; } else { mutf8[0] = yytext[0]; mutf8[1] = next; mutf8[2] = '\0'; strcpy(yylval->text, mutf8); return TEXT; } break; /* U+2010 ... U+2027 / U+2030 ... U+d7ff */ case 0x20: next = input(yyscanner); switch (next) { case 0x10 ... 0x27: case 0x30 ... 0xff: mutf8[0] = yytext[0]; mutf8[1] = next; mutf8[2] = '\0'; strcpy(yylval->text, mutf8); return TEXT; break; default: REJECT; break; } break; /* ~ U+2030 ... U+d7ff */ case 0x21 ... 0xd7: next = input(yyscanner); mutf8[0] = yytext[0]; mutf8[1] = next; mutf8[2] = '\0'; strcpy(yylval->text, mutf8); return TEXT; break; /* U+e000 ... U+ffef */ case 0xe0 ... 0xff: next = input(yyscanner); if (yytext[0] == 0xff && next > 0xef) { REJECT; } else { mutf8[0] = yytext[0]; mutf8[1] = next; mutf8[2] = '\0'; strcpy(yylval->text, mutf8); return TEXT; } break; /* U+10000 ... U+10ffff */ /* case 0x10: mutf8[0] = yytext[0]; mutf8[1] = input(yyscanner); mutf8[2] = input(yyscanner); mutf8[3] = '\0'; strcpy(yylval->text, mutf8); return TEXT; break; */ default: REJECT; break; } } %%