diff options
-rw-r--r-- | ChangeLog | 6 | ||||
-rw-r--r-- | src/format/mangling/dex/type_gram.y | 16 | ||||
-rw-r--r-- | src/format/mangling/dex/type_tok.l | 115 |
3 files changed, 125 insertions, 12 deletions
@@ -1,3 +1,9 @@ +16-09-29 Cyrille Bagard <nocbos@gmail.com> + + * src/format/mangling/dex/type_gram.y: + * src/format/mangling/dex/type_tok.l: + Add partial support for Dalvik MUTF-8 encodings. + 16-09-28 Cyrille Bagard <nocbos@gmail.com> * plugins/readdex/class.c: diff --git a/src/format/mangling/dex/type_gram.y b/src/format/mangling/dex/type_gram.y index 1176bd2..d5c3f26 100644 --- a/src/format/mangling/dex/type_gram.y +++ b/src/format/mangling/dex/type_gram.y @@ -23,6 +23,7 @@ bool demangle_dex_type(GDexDemangler *, const char *); #include "../../../analysis/types/basic.h" #include "../../../analysis/types/cse.h" +#include "../../../common/extstr.h" } @@ -45,7 +46,7 @@ bool demangle_dex_type(GDexDemangler *, const char *); %type <type> type_descriptor field_type_descriptor non_array_field_type_descriptor full_class_name -%type <text> TEXT +%type <text> TEXT simple_name %{ @@ -87,18 +88,15 @@ non_array_field_type_descriptor: | L full_class_name SEMICOLON { $$ = $2; } full_class_name: - TEXT { $$ = g_class_enum_type_new(CET_CLASS, $1); } - | full_class_name SLASH TEXT { + simple_name { $$ = g_class_enum_type_new(CET_CLASS, $1); } + | full_class_name SLASH simple_name { $$ = g_class_enum_type_new(CET_CLASS, $3); g_data_type_set_namespace($$, $1); g_object_unref($1); } - | full_class_name DOLLAR TEXT { - $$ = g_class_enum_type_new(CET_CLASS, $3); - g_data_type_set_namespace($$, $1); - g_object_unref($1); - } - +simple_name: + TEXT { $$ = strdup($1); } + | simple_name TEXT { $$ = stradd($1, $2); } %% diff --git a/src/format/mangling/dex/type_tok.l b/src/format/mangling/dex/type_tok.l index 7b8a8d3..9c24085 100644 --- a/src/format/mangling/dex/type_tok.l +++ b/src/format/mangling/dex/type_tok.l @@ -10,10 +10,13 @@ %option noyywrap %option yylineno %option nounput -%option noinput + /*%option noinput*/ %x string +ASCII [A-Za-z0-9] +SIMPLE {ASCII}|"$"|"-"|"_" + %% "V" { return V; } @@ -28,10 +31,116 @@ "L" { BEGIN(string); return L; } "["* { type_lval.adeep = strlen(yytext); return ARRAY; } <string>"/" { return SLASH; } -<string>"$" { return DOLLAR; } <string>";" { BEGIN(INITIAL); return SEMICOLON; } -<string>[A-Za-z0-9_-]* { type_lval.text = yytext; return TEXT; } +<string>{SIMPLE}* { type_lval.text = yytext; return TEXT; } + +<string>. { + unsigned char next; + char mutf8[4]; + + switch ((unsigned char)yytext[0]) + { + /* U+00a1 ... U+1fff */ + case 0x00 ... 0x1f: + + next = input(); + + if (yytext[0] == 0x00 && next < 0xa1) + { + REJECT; + } + + else + { + mutf8[0] = yytext[0]; + mutf8[1] = next; + mutf8[2] = '\0'; + + strcpy(type_lval.text, mutf8); return TEXT; + + } + + break; + + /* U+2010 ... U+2027 / U+2030 ... U+d7ff */ + case 0x20: + + next = input(); + + switch (next) + { + case 0x10 ... 0x27: + case 0x30 ... 0xff: + + mutf8[0] = yytext[0]; + mutf8[1] = next; + mutf8[2] = '\0'; + + strcpy(type_lval.text, mutf8); return TEXT; + break; + + default: + REJECT; + break; + + } + + break; + + /* ~ U+2030 ... U+d7ff */ + case 0x21 ... 0xd7: + + next = input(); + + mutf8[0] = yytext[0]; + mutf8[1] = next; + mutf8[2] = '\0'; + + strcpy(type_lval.text, mutf8); return TEXT; + break; + + /* U+e000 ... U+ffef */ + case 0xe0 ... 0xff: + + next = input(); + + if (yytext[0] == 0xff && next > 0xef) + { + REJECT; + } + + else + { + mutf8[0] = yytext[0]; + mutf8[1] = next; + mutf8[2] = '\0'; + + strcpy(type_lval.text, mutf8); return TEXT; + + } + + break; + + /* U+10000 ... U+10ffff */ + /* + case 0x10: + + mutf8[0] = yytext[0]; + mutf8[1] = input(); + mutf8[2] = input(); + mutf8[3] = '\0'; + + strcpy(type_lval.text, mutf8); return TEXT; + break; + */ + + default: + REJECT; + break; + + } + } %% |