From c6409e2c6a390a7cca40da8572c93a5268e90a27 Mon Sep 17 00:00:00 2001
From: Cyrille Bagard <nocbos@gmail.com>
Date: Tue, 1 Dec 2009 23:13:37 +0000
Subject: Improved the Itanium demangling.

git-svn-id: svn://svn.gna.org/svn/chrysalide/trunk@139 abbe820e-26c8-41b2-8c08-b7b2b41f8b0a
---
 ChangeLog                          | 14 +++++++
 src/format/elf/helper_x86.c        | 43 +++++++++++++++++++--
 src/format/elf/symbols.c           | 24 +++++++++++-
 src/format/mangling/itanium_gram.y | 77 +++++++++++++++++++++++++++++++++++---
 src/format/mangling/itanium_tok.l  | 49 ++++++++++++++++++++++++
 src/format/symbol.c                | 12 ++++--
 src/format/symbol.h                |  5 ++-
 7 files changed, 207 insertions(+), 17 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index dd752ac..1deb97d 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,17 @@
+09-12-02  Cyrille Bagard <nocbos@gmail.com>
+
+	* src/format/elf/helper_x86.c:
+	* src/format/elf/symbols.c:
+	Begin to demangle C++ function names.
+
+	* src/format/mangling/itanium_gram.y:
+	* src/format/mangling/itanium_tok.l:
+	Improve the Itanium demangling.
+
+	* src/format/symbol.c:
+	* src/format/symbol.h:
+	Use the attached routine to provide a name when possible.
+
 09-11-20  Cyrille Bagard <nocbos@gmail.com>
 
 	* src/configuration.c:
diff --git a/src/format/elf/helper_x86.c b/src/format/elf/helper_x86.c
index 7113a18..02d4785 100644
--- a/src/format/elf/helper_x86.c
+++ b/src/format/elf/helper_x86.c
@@ -31,6 +31,7 @@
 
 #include "elf-int.h"
 #include "../symbol.h"
+#include "../mangling/demangler.h"
 #include "../../arch/immediate.h"
 #include "../../arch/processor.h"
 #include "../../arch/x86/instruction.h"
@@ -290,9 +291,27 @@ void translate_exe_elf_relocations(GElfFormat *format, GArchInstruction **instru
 
                         /* Routine */
 
-                        routine = g_binary_routine_new();
+                        //printf("++ routine :: %s\n", "_ZN1N1TIiiE2mfES0_IddE"/*g_binary_symbol_to_string(symbols[j])*/);
+                        printf("++ routine :: %s\n", g_binary_symbol_to_string(symbols[j]));
+                        fflush(NULL);
+
+                        routine = try_to_demangle_routine(get_demangler_by_type(DGT_ITANIUM), "_ZN1N1TIiiE2mfES0_IddE");
+                        routine = try_to_demangle_routine(get_demangler_by_type(DGT_ITANIUM), g_binary_symbol_to_string(symbols[j]));
+
+                        if (routine == NULL)
+                        {
+                            routine = g_binary_routine_new();
+                            g_binary_routine_set_name(routine, strdup(g_binary_symbol_to_string(symbols[j])));
+                            printf("++failed\n");
+                        }
+                        else printf("++success\n");
+
+                        printf(" -->> '%s'\n", g_binary_routine_get_name(routine));
+
+                        //if (strcmp(g_binary_symbol_to_string(symbols[j]), "_ZNSt8ios_base4InitC1Ev") == 0)
+                        //if (strcmp(g_binary_symbol_to_string(symbols[j]), "_ZNSolsEPFRSoS_E") == 0)
+                        //    exit(0);
 
-                        g_binary_routine_set_name(routine, new_name);
                         g_binary_routine_set_address(routine, address);
 
                         g_binary_format_add_routine(G_BIN_FORMAT(format), routine);
@@ -300,6 +319,9 @@ void translate_exe_elf_relocations(GElfFormat *format, GArchInstruction **instru
                         /* Symbole uniquement */
 
                         symbol = g_binary_symbol_new(STP_FUNCTION, new_name, address);
+
+                        g_binary_symbol_attach_routine(symbol, routine);
+
                         g_binary_format_add_symbol(G_BIN_FORMAT(format), symbol);
 
                         break;
@@ -396,9 +418,19 @@ void translate_dyn_elf_relocations(GElfFormat *format, GArchInstruction **instru
 
             /* Routine */
 
-            routine = g_binary_routine_new();
+            printf("++ routine :: %s\n", name);
+            fflush(NULL);
+
+            routine = try_to_demangle_routine(get_demangler_by_type(DGT_ITANIUM), name);
+
+            if (routine == NULL)
+            {
+                routine = g_binary_routine_new();
+                g_binary_routine_set_name(routine, strdup(name));
+                printf("++failed\n");
+            }
+            else printf("++success\n");
 
-            g_binary_routine_set_name(routine, new_name);
             g_binary_routine_set_address(routine, address);
 
             g_binary_format_add_routine(G_BIN_FORMAT(format), routine);
@@ -406,6 +438,9 @@ void translate_dyn_elf_relocations(GElfFormat *format, GArchInstruction **instru
             /* Symbole uniquement */
 
             symbol = g_binary_symbol_new(STP_FUNCTION, new_name, address);
+
+            g_binary_symbol_attach_routine(symbol, routine);
+
             g_binary_format_add_symbol(G_BIN_FORMAT(format), symbol);
 
  next_op:
diff --git a/src/format/elf/symbols.c b/src/format/elf/symbols.c
index 94f10e3..3928c27 100644
--- a/src/format/elf/symbols.c
+++ b/src/format/elf/symbols.c
@@ -31,6 +31,7 @@
 #include "elf-int.h"
 #include "helper_x86.h"
 #include "section.h"
+#include "../mangling/demangler.h"
 #include "../../panels/log.h"
 
 
@@ -217,9 +218,28 @@ static bool load_elf_internal_symbols(GElfFormat *format)
 
                 /* Routine */
 
-                routine = g_binary_routine_new();
+                printf("routine :: %s\n", name);
+                fflush(NULL);
+
+                if (1/*strcmp(name, "_Z41__static_initialization_and_destruction_0ii") == 0*/)
+                {
+
+                routine = try_to_demangle_routine(get_demangler_by_type(DGT_ITANIUM), name);
+
+                if (routine == NULL)
+                {
+                    routine = g_binary_routine_new();
+                    g_binary_routine_set_name(routine, strdup(name));
+                    printf("failed\n");
+                }
+                else printf("success\n");
+
+                printf(" -->> '%s'\n", g_binary_routine_get_name(routine));
+
+                //exit(0);
+
+                }
 
-                g_binary_routine_set_name(routine, strdup(name));
                 g_binary_routine_set_address(routine, ELF_SYM(format, sym, st_value));
                 g_binary_routine_set_size(routine, ELF_SYM(format, sym, st_size));
 
diff --git a/src/format/mangling/itanium_gram.y b/src/format/mangling/itanium_gram.y
index 4fa7bcb..da1ea10 100644
--- a/src/format/mangling/itanium_gram.y
+++ b/src/format/mangling/itanium_gram.y
@@ -122,6 +122,15 @@ char *strmerge(char *str1, const char *sep, char *str2);
 %token ST SA SB SS SI SO SD
 
 
+%token OPER_NEW OPER_NEW_ARRAY OPER_DELETE OPER_DELETE_ARRAY OPER_PLUS_UNARY OPER_NEG_UNARY
+%token OPER_AND_UNARY OPER_DE_UNARY OPER_COMPL OPER_PLUS OPER_MINUS OPER_MUL OPER_DIV OPER_MOD
+%token OPER_AND OPER_OR OPER_EXCL_OR OPER_AS OPER_PLUS_EQ OPER_MINUS_EQ OPER_MUL_EQ OPER_DIV_EQ
+%token OPER_MOD_EQ OPER_AND_EQ OPER_OR_EQ OPER_EXCL_OR_EQ OPER_LEFT_SHIFT OPER_RIGHT_SHIFT
+%token OPER_LEFT_SHIFT_EQ OPER_RIGHT_SHIFT_EQ OPER_EQUAL OPER_NOT_EQ OPER_LESS OPER_GREATER
+%token OPER_LESS_EQ OPER_GREATER_EQ OPER_NOT OPER_AND_AND OPER_OR_OR OPER_PLUS_PLUS OPER_MINUS_MINUS
+%token OPER_COMMA OPER_PRIV_MEMB OPER_POINTER_TO OPER_CLASS OPER_INDEX
+
+
 %token NUMBER CHAR
 
 
@@ -155,7 +164,7 @@ char *strmerge(char *str1, const char *sep, char *str2);
 
 
 %type <text> name unscoped_name unscoped_template_name nested_name
-%type <text> unqualified_name
+%type <text> unqualified_name operator_name
 %type <text> prefix source_name
 
 
@@ -208,7 +217,7 @@ encoding:
 
 name:
     nested_name                     { $$ = $1; g_binary_routine_set_name(routine, $1); }
-    | unscoped_name                 { $$ = $1; /*g_binary_routine_set_name(routine, $1);*/ }
+    | unscoped_name                 { $$ = $1; g_binary_routine_set_name(routine, $1); }
     | unscoped_template_name template_args  { $$ = stradd($1, $2); /* TODO : merge -> free */ }
     ;
 
@@ -231,15 +240,17 @@ nested_name:
 
 
 prefix:
-    /* vide */                      { $$ = NULL; }
+    /* vide */                      { $$ = NULL; printf("passage E\n"); }
     | prefix unqualified_name       { $$ = ($2 != NULL ? strmerge($1, "::", $2) : $1); }
+    | substitution                  { $$ = $1; }
     ;
 
 
 
 
 unqualified_name:
-    ctor_dtor_name                  { $$ = NULL; }
+    operator_name                   { printf("dup :: '%s'\n", $1); fflush(NULL) ;$$ = strdup($1) ; }
+    | ctor_dtor_name                { printf("passage C\n"); $$ = NULL; }
     | source_name                   { $$ = $1; }
     ;
 
@@ -255,6 +266,60 @@ identifier:
     | CHAR                          { build_itanium_identifier(demangler, $1); }
     ;
 
+operator_name:
+    OPER_NEW                      { $$ = "new"; }
+    | OPER_NEW_ARRAY                { $$ = "new[]"; }
+    | OPER_DELETE                   { $$ = "delete"; }
+    | OPER_DELETE_ARRAY             { $$ = "delete[]"; }      
+    | OPER_PLUS_UNARY               { $$ = "+"; }
+    | OPER_NEG_UNARY                { $$ = "-"; }
+    | OPER_AND_UNARY                { $$ = "&"; }
+    | OPER_DE_UNARY                 { $$ = "*"; }
+    | OPER_COMPL                    { $$ = "~"; }
+    | OPER_PLUS                     { $$ = "+"; }
+    | OPER_MINUS                    { $$ = "-"; }
+    | OPER_MUL                      { $$ = "*"; }
+    | OPER_DIV                      { $$ = "/"; }
+    | OPER_MOD                      { $$ = "%"; }
+    | OPER_AND                      { $$ = "&"; }
+    | OPER_OR                       { $$ = "|"; }
+    | OPER_EXCL_OR                  { $$ = "^"; }
+    | OPER_AS                       { $$ = "="; }
+    | OPER_PLUS_EQ                  { $$ = "+="; }
+    | OPER_MINUS_EQ                 { $$ = "-="; }
+    | OPER_MUL_EQ                   { $$ = "*="; }
+    | OPER_DIV_EQ                   { $$ = "/="; }
+    | OPER_MOD_EQ                   { $$ = "%)"; }
+    | OPER_AND_EQ                   { $$ = "&="; }
+    | OPER_OR_EQ                    { $$ = "|="; }
+    | OPER_EXCL_OR_EQ               { $$ = "^="; }
+    | OPER_LEFT_SHIFT               { $$ = "<<"; }
+    | OPER_RIGHT_SHIFT              { $$ = ">>"; }
+    | OPER_LEFT_SHIFT_EQ            { $$ = "<<="; }
+    | OPER_RIGHT_SHIFT_EQ           { $$ = ">>="; }
+    | OPER_EQUAL                    { $$ = "=="; }
+    | OPER_NOT_EQ                   { $$ = "!="; }
+    | OPER_LESS                     { $$ = "<"; }
+    | OPER_GREATER                  { $$ = ">"; }
+    | OPER_LESS_EQ                  { $$ = "<="; }
+    | OPER_GREATER_EQ               { $$ = ">="; }
+    | OPER_NOT                      { $$ = "!"; }
+    | OPER_AND_AND                  { $$ = "&&"; }
+    | OPER_OR_OR                    { $$ = "||"; }
+    | OPER_PLUS_PLUS                { $$ = "++"; }
+    | OPER_MINUS_MINUS              { $$ = "--"; }
+    | OPER_COMMA                    { $$ = ","; }
+    | OPER_PRIV_MEMB                { $$ = "->*"; }
+    | OPER_POINTER_TO               { $$ = "->"; }
+    | OPER_CLASS                    { $$ = "()"; }
+    | OPER_INDEX                    { $$ = "[]"; }
+    ;
+
+
+
+
+
+
 ctor_dtor_name:
     C1                              { g_binary_routine_set_type(routine, RTT_CONSTRUCTOR); }
     | C2                            { g_binary_routine_set_type(routine, RTT_CONSTRUCTOR); }
@@ -331,7 +396,7 @@ class_enum_type:
 
 
 template_args:
-    II template_arg_list EE         { $$ = stradd(strprep($2, "<"), ">"); }
+    II template_arg_list EE         { printf("passage I\n"); $$ = stradd(strprep($2, "<"), ">"); }
     ;
 
 template_arg_list:
@@ -344,7 +409,7 @@ template_arg:
     ;
 
 substitution:
-    ST                              { $$ = strdup("std::"); }
+    ST                              { $$ = strdup("std"); }
     | SA                            { $$ = strdup("std::allocator"); }
     | SB                            { $$ = strdup("std::basic_string"); }
     | SS                            { $$ = strdup("std::string"); }
diff --git a/src/format/mangling/itanium_tok.l b/src/format/mangling/itanium_tok.l
index 5eef799..bc5f5f1 100644
--- a/src/format/mangling/itanium_tok.l
+++ b/src/format/mangling/itanium_tok.l
@@ -85,6 +85,55 @@ So                      { return SO; }
 Sd                      { return SD; }
 
 
+nw                      { return OPER_NEW; }
+na                      { return OPER_NEW_ARRAY; }
+dl                      { return OPER_DELETE; }
+da                      { return OPER_DELETE_ARRAY; }      
+ps                      { return OPER_PLUS_UNARY; }
+ng                      { return OPER_NEG_UNARY; }
+ad                      { return OPER_AND_UNARY; }
+de                      { return OPER_DE_UNARY; }
+co                      { return OPER_COMPL; }
+pl                      { return OPER_PLUS; }
+mi                      { return OPER_MINUS; }
+ml                      { return OPER_MUL; }
+dv                      { return OPER_DIV; }
+rm                      { return OPER_MOD; }
+an                      { return OPER_AND; }
+or                      { return OPER_OR; }
+eo                      { return OPER_EXCL_OR; }
+aS                      { return OPER_AS; }
+pL                      { return OPER_PLUS_EQ; }
+mI                      { return OPER_MINUS_EQ; }
+mL                      { return OPER_MUL_EQ; }
+dV                      { return OPER_DIV_EQ; }
+rM                      { return OPER_MOD_EQ; }
+aN                      { return OPER_AND_EQ; }
+oR                      { return OPER_OR_EQ; }
+eO                      { return OPER_EXCL_OR_EQ; }
+ls                      { return OPER_LEFT_SHIFT; }
+rs                      { return OPER_RIGHT_SHIFT; }
+lS                      { return OPER_LEFT_SHIFT_EQ; }
+rS                      { return OPER_RIGHT_SHIFT_EQ; }
+eq                      { return OPER_EQUAL; }
+ne                      { return OPER_NOT_EQ; }
+lt                      { return OPER_LESS; }
+gt                      { return OPER_GREATER; }
+le                      { return OPER_LESS_EQ; }
+ge                      { return OPER_GREATER_EQ; }
+nt                      { return OPER_NOT; }
+aa                      { return OPER_AND_AND; }
+oo                      { return OPER_OR_OR; }
+pp                      { return OPER_PLUS_PLUS; }
+mm                      { return OPER_MINUS_MINUS; }
+cm                      { return OPER_COMMA; }
+pm                      { return OPER_PRIV_MEMB; }
+pt                      { return OPER_POINTER_TO; }
+cl                      { return OPER_CLASS; }
+ix                      { return OPER_INDEX; }
+
+
+
 [0-9]+                  { yylval.val = atoi(yytext); return NUMBER; }
 
 <identifier>.           { if (--itanium_txt_length == 0) BEGIN(INITIAL); yylval.car = *yytext; return CHAR; }
diff --git a/src/format/symbol.c b/src/format/symbol.c
index b5e329b..3ac2d6d 100644
--- a/src/format/symbol.c
+++ b/src/format/symbol.c
@@ -27,9 +27,6 @@
 #include <string.h>
 
 
-#include "../analysis/routine.h"
-
-
 
 /* Symbole d'exécutable (instance) */
 struct _GBinSymbol
@@ -169,7 +166,14 @@ SymbolType g_binary_symbol_get_target_type(const GBinSymbol *symbol)
 
 const char *g_binary_symbol_to_string(const GBinSymbol *symbol)
 {
-    return (symbol->alt != NULL ? symbol->alt : symbol->name);
+    const char *result;                     /* Désignation à retourner     */
+
+    if (symbol->extra.routine != NULL)
+        result = g_binary_routine_get_name(symbol->extra.routine);
+
+    else result = (symbol->alt != NULL ? symbol->alt : symbol->name);
+
+    return result;
 
 }
 
diff --git a/src/format/symbol.h b/src/format/symbol.h
index 17fdbcf..ad847f4 100644
--- a/src/format/symbol.h
+++ b/src/format/symbol.h
@@ -28,7 +28,7 @@
 #include <glib-object.h>
 
 
-#include "../arch/archbase.h"
+#include "../analysis/routine.h"
 
 
 
@@ -77,6 +77,9 @@ off_t g_binary_symbol_get_size(const GBinSymbol *);
 /* Définit un autre nom pour le symbole. */
 void g_binary_symbol_set_alt_name(GBinSymbol *, char *);
 
+/* Attache la routine associée au symbole. */
+void g_binary_symbol_attach_routine(GBinSymbol *, GBinRoutine *);
+
 
 
 #endif  /* _FORMAT_SYMBOL_H */
-- 
cgit v0.11.2-87-g4458