summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ChangeLog6
-rw-r--r--src/format/mangling/dex/type_gram.y16
-rw-r--r--src/format/mangling/dex/type_tok.l115
3 files changed, 125 insertions, 12 deletions
diff --git a/ChangeLog b/ChangeLog
index 78421b8..332e6fa 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,9 @@
+16-09-29 Cyrille Bagard <nocbos@gmail.com>
+
+ * src/format/mangling/dex/type_gram.y:
+ * src/format/mangling/dex/type_tok.l:
+ Add partial support for Dalvik MUTF-8 encodings.
+
16-09-28 Cyrille Bagard <nocbos@gmail.com>
* plugins/readdex/class.c:
diff --git a/src/format/mangling/dex/type_gram.y b/src/format/mangling/dex/type_gram.y
index 1176bd2..d5c3f26 100644
--- a/src/format/mangling/dex/type_gram.y
+++ b/src/format/mangling/dex/type_gram.y
@@ -23,6 +23,7 @@ bool demangle_dex_type(GDexDemangler *, const char *);
#include "../../../analysis/types/basic.h"
#include "../../../analysis/types/cse.h"
+#include "../../../common/extstr.h"
}
@@ -45,7 +46,7 @@ bool demangle_dex_type(GDexDemangler *, const char *);
%type <type> type_descriptor field_type_descriptor non_array_field_type_descriptor full_class_name
-%type <text> TEXT
+%type <text> TEXT simple_name
%{
@@ -87,18 +88,15 @@ non_array_field_type_descriptor:
| L full_class_name SEMICOLON { $$ = $2; }
full_class_name:
- TEXT { $$ = g_class_enum_type_new(CET_CLASS, $1); }
- | full_class_name SLASH TEXT {
+ simple_name { $$ = g_class_enum_type_new(CET_CLASS, $1); }
+ | full_class_name SLASH simple_name {
$$ = g_class_enum_type_new(CET_CLASS, $3);
g_data_type_set_namespace($$, $1);
g_object_unref($1);
}
- | full_class_name DOLLAR TEXT {
- $$ = g_class_enum_type_new(CET_CLASS, $3);
- g_data_type_set_namespace($$, $1);
- g_object_unref($1);
- }
-
+simple_name:
+ TEXT { $$ = strdup($1); }
+ | simple_name TEXT { $$ = stradd($1, $2); }
%%
diff --git a/src/format/mangling/dex/type_tok.l b/src/format/mangling/dex/type_tok.l
index 7b8a8d3..9c24085 100644
--- a/src/format/mangling/dex/type_tok.l
+++ b/src/format/mangling/dex/type_tok.l
@@ -10,10 +10,13 @@
%option noyywrap
%option yylineno
%option nounput
-%option noinput
+ /*%option noinput*/
%x string
+ASCII [A-Za-z0-9]
+SIMPLE {ASCII}|"$"|"-"|"_"
+
%%
"V" { return V; }
@@ -28,10 +31,116 @@
"L" { BEGIN(string); return L; }
"["* { type_lval.adeep = strlen(yytext); return ARRAY; }
<string>"/" { return SLASH; }
-<string>"$" { return DOLLAR; }
<string>";" { BEGIN(INITIAL); return SEMICOLON; }
-<string>[A-Za-z0-9_-]* { type_lval.text = yytext; return TEXT; }
+<string>{SIMPLE}* { type_lval.text = yytext; return TEXT; }
+
+<string>. {
+ unsigned char next;
+ char mutf8[4];
+
+ switch ((unsigned char)yytext[0])
+ {
+ /* U+00a1 ... U+1fff */
+ case 0x00 ... 0x1f:
+
+ next = input();
+
+ if (yytext[0] == 0x00 && next < 0xa1)
+ {
+ REJECT;
+ }
+
+ else
+ {
+ mutf8[0] = yytext[0];
+ mutf8[1] = next;
+ mutf8[2] = '\0';
+
+ strcpy(type_lval.text, mutf8); return TEXT;
+
+ }
+
+ break;
+
+ /* U+2010 ... U+2027 / U+2030 ... U+d7ff */
+ case 0x20:
+
+ next = input();
+
+ switch (next)
+ {
+ case 0x10 ... 0x27:
+ case 0x30 ... 0xff:
+
+ mutf8[0] = yytext[0];
+ mutf8[1] = next;
+ mutf8[2] = '\0';
+
+ strcpy(type_lval.text, mutf8); return TEXT;
+ break;
+
+ default:
+ REJECT;
+ break;
+
+ }
+
+ break;
+
+ /* ~ U+2030 ... U+d7ff */
+ case 0x21 ... 0xd7:
+
+ next = input();
+
+ mutf8[0] = yytext[0];
+ mutf8[1] = next;
+ mutf8[2] = '\0';
+
+ strcpy(type_lval.text, mutf8); return TEXT;
+ break;
+
+ /* U+e000 ... U+ffef */
+ case 0xe0 ... 0xff:
+
+ next = input();
+
+ if (yytext[0] == 0xff && next > 0xef)
+ {
+ REJECT;
+ }
+
+ else
+ {
+ mutf8[0] = yytext[0];
+ mutf8[1] = next;
+ mutf8[2] = '\0';
+
+ strcpy(type_lval.text, mutf8); return TEXT;
+
+ }
+
+ break;
+
+ /* U+10000 ... U+10ffff */
+ /*
+ case 0x10:
+
+ mutf8[0] = yytext[0];
+ mutf8[1] = input();
+ mutf8[2] = input();
+ mutf8[3] = '\0';
+
+ strcpy(type_lval.text, mutf8); return TEXT;
+ break;
+ */
+
+ default:
+ REJECT;
+ break;
+
+ }
+ }
%%