From ed5541867f94b36e701708757a0489298fe77135 Mon Sep 17 00:00:00 2001 From: Cyrille Bagard Date: Fri, 13 May 2016 21:59:41 +0200 Subject: Inserted symbols and routines using an optimized 100 times faster method. --- ChangeLog | 27 +++++++++ src/analysis/disass/area.c | 4 +- src/common/Makefile.am | 1 + src/common/sort.c | 139 ++++++++++++++++++++++++++++++++++++++++++++ src/common/sort.h | 41 +++++++++++++ src/format/dex/class.c | 4 +- src/format/dex/dex.c | 2 - src/format/dwarf/symbols.c | 4 +- src/format/elf/helper_arm.c | 2 +- src/format/elf/symbols.c | 5 +- src/format/executable-int.c | 2 +- src/format/format-int.h | 2 - src/format/format.c | 123 +++++++++++++-------------------------- src/format/format.h | 7 +-- 14 files changed, 258 insertions(+), 105 deletions(-) create mode 100644 src/common/sort.c create mode 100644 src/common/sort.h diff --git a/ChangeLog b/ChangeLog index 7057d72..25b8ab8 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,30 @@ +16-05-13 Cyrille Bagard + + * src/analysis/disass/area.c: + Take concurrency into account when dealing with new symbols. + + * src/common/Makefile.am: + Add the 'sort.[ch]' files to libcommon_la_SOURCES. + + * src/common/sort.c: + * src/common/sort.h: + New entries: create methods to replace heavy calls to qsort(). + + * src/format/dex/class.c: + * src/format/dex/dex.c: + * src/format/dwarf/symbols.c: + * src/format/elf/helper_arm.c: + * src/format/elf/symbols.c: + * src/format/executable-int.c: + Update code. + + * src/format/format-int.h: + Delete useless fields as arrays of symbols and routines are always sorted. + + * src/format/format.c: + * src/format/format.h: + Insert symbols and routines using an optimized 100 times faster method. + 16-05-07 Cyrille Bagard * src/analysis/disass/routines.c: diff --git a/src/analysis/disass/area.c b/src/analysis/disass/area.c index c1807b7..1abb402 100644 --- a/src/analysis/disass/area.c +++ b/src/analysis/disass/area.c @@ -402,9 +402,9 @@ void load_code_from_mem_area_v2(mem_area_v2 *area, mem_area_v2 *list, size_t cou has_new_sym = g_proc_context_pop_new_symbol_at(ctx, &sym_addr)) { has_new_sym = g_binary_format_find_symbol_at(format, &sym_addr, &symbol); - assert(has_new_sym); - insert_extra_symbol_into_mem_areas_v2(list, count, symbol); + if (has_new_sym) + insert_extra_symbol_into_mem_areas_v2(list, count, symbol); } diff --git a/src/common/Makefile.am b/src/common/Makefile.am index 4c9c2a1..9e9969d 100755 --- a/src/common/Makefile.am +++ b/src/common/Makefile.am @@ -16,6 +16,7 @@ libcommon_la_SOURCES = \ macros.h \ net.h net.c \ pathname.h pathname.c \ + sort.h sort.c \ sqlite.h sqlite.c \ xdg.h xdg.c \ xml.h xml.c diff --git a/src/common/sort.c b/src/common/sort.c new file mode 100644 index 0000000..df1df82 --- /dev/null +++ b/src/common/sort.c @@ -0,0 +1,139 @@ + +/* Chrysalide - Outil d'analyse de fichiers binaires + * sort.c - opérations sur des tableaux triés + * + * Copyright (C) 2016 Cyrille Bagard + * + * This file is part of Chrysalide. + * + * OpenIDA is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * OpenIDA is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Foobar. If not, see . + */ + + +#include "sort.h" + + +#include +#include + + + +/****************************************************************************** +* * +* Paramètres : key = élément de comparaison à retrouver ou approcher. * +* base = adresse du tableau à parcourir. * +* nmemb = nombre d'éléments présents au total. * +* size = taille de chaque élément du tableau. * +* compar = méthode de comparaison entre éléments. * +* index = indice de cellule d'insertion. * +* * +* Description : Effectue une recherche dichotomique dans un tableau. * +* * +* Retour : true si l'élément visé à été trouvé, false sinon. * +* * +* Remarques : - * +* * +******************************************************************************/ + +bool bsearch_index(const void *key, const void *base, size_t nmemb, size_t size, __compar_fn_t compar, size_t *index) +{ + bool result; /* Bilan à faire remonter */ + size_t lower; /* Borne inférieure de fenêtre */ + size_t upper; /* Borne supérieure qde fenêtre*/ + size_t idx; /* Indice de cellule analysée */ + const void *cell; /* Cellule en question */ + int status; /* Bilan d'une comparaison */ + + if (nmemb == 0) + { + *index = 0; + return false; + } + + /* Parcours minimal */ + + lower = 0; + upper = nmemb; + + while (lower < upper) + { + idx = (lower + upper) / 2; + cell = (void *)(((const char *)base) + (idx * size)); + + status = (*compar)(key, cell); + + if (status < 0) + upper = idx; + + else if (status > 0) + lower = idx + 1; + + else + break; + + } + + /* Bilan des recherches */ + + result = (status == 0); + + if (status < 0) + *index = idx; + + else if (status > 0) + *index = idx + 1; + + else + *index = idx; + + return result; + +} + + +/****************************************************************************** +* * +* Paramètres : base = adresse du tableau à parcourir. * +* nmemb = nombre d'éléments présents au total. [OUT] * +* size = taille de chaque élément du tableau. * +* compar = méthode de comparaison entre éléments. * +* new = nouvel élément à insérer. * +* * +* Description : Ajoute au bon endroit un élément dans un tableau trié. * +* * +* Retour : Nouvel emplacement du tableau agrandi. * +* * +* Remarques : - * +* * +******************************************************************************/ + +void *qinsert(void *base, size_t *nmemb, size_t size, __compar_fn_t compar, void *new) +{ + void *result; /* Tableau trié à retourner */ + size_t index; /* Indice du point d'insertion */ + + bsearch_index(&new, base, *nmemb, size, compar, &index); + + result = realloc(base, (*nmemb + 1) * size); + + if (index < *nmemb) + memmove((char *)result + (index + 1) * size, (char *)result + index * size, (*nmemb - index) * size); + + (*nmemb)++; + + memcpy((char *)result + index * size, &new, size); + + return result; + +} diff --git a/src/common/sort.h b/src/common/sort.h new file mode 100644 index 0000000..4f41313 --- /dev/null +++ b/src/common/sort.h @@ -0,0 +1,41 @@ + +/* Chrysalide - Outil d'analyse de fichiers binaires + * sort.h - prototypes pour les opérations sur des tableaux triés + * + * Copyright (C) 2016 Cyrille Bagard + * + * This file is part of Chrysalide. + * + * OpenIDA is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * OpenIDA is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Foobar. If not, see . + */ + + +#ifndef _COMMON_SORT_H +#define _COMMON_SORT_H + + +#include +#include + + + +/* Effectue une recherche dichotomique dans un tableau. */ +bool bsearch_index(const void *, const void *, size_t, size_t, __compar_fn_t, size_t *); + +/* Ajoute au bon endroit un élément dans un tableau trié. */ +void *qinsert(void *, size_t *, size_t, __compar_fn_t, void *); + + + +#endif /* _COMMON_SORT_H */ diff --git a/src/format/dex/class.c b/src/format/dex/class.c index 02ac19d..37fdd04 100644 --- a/src/format/dex/class.c +++ b/src/format/dex/class.c @@ -233,7 +233,7 @@ GDexClass *g_dex_class_new(GDexFormat *format, const class_def_item *def) symbol = g_binary_symbol_new(STP_ROUTINE); g_binary_symbol_attach_routine(symbol, routine); - _g_binary_format_add_symbol(G_BIN_FORMAT(format), symbol, false); + g_binary_format_add_symbol(G_BIN_FORMAT(format), symbol); } @@ -256,7 +256,7 @@ GDexClass *g_dex_class_new(GDexFormat *format, const class_def_item *def) symbol = g_binary_symbol_new(STP_ROUTINE); g_binary_symbol_attach_routine(symbol, routine); - _g_binary_format_add_symbol(G_BIN_FORMAT(format), symbol, false); + g_binary_format_add_symbol(G_BIN_FORMAT(format), symbol); } diff --git a/src/format/dex/dex.c b/src/format/dex/dex.c index 51938f9..2897ee3 100755 --- a/src/format/dex/dex.c +++ b/src/format/dex/dex.c @@ -273,8 +273,6 @@ GBinFormat *g_dex_format_new(GBinContent *content, GExeFormat *parent) if (!load_all_dex_classes(result)) goto gdfn_error; - g_binary_format_sort_symbols(base); - if (!g_binary_format_complete_loading(base)) goto gdfn_error; diff --git a/src/format/dwarf/symbols.c b/src/format/dwarf/symbols.c index ceaee9e..c8d6b66 100644 --- a/src/format/dwarf/symbols.c +++ b/src/format/dwarf/symbols.c @@ -117,7 +117,7 @@ static bool load_routine_as_symbol_from_dwarf(GDwarfFormat *format, const dw_die symbol = g_binary_symbol_new(STP_ROUTINE); g_binary_symbol_attach_routine(symbol, routine); - _g_binary_format_add_symbol(G_BIN_FORMAT(format), symbol, false); + g_binary_format_add_symbol(G_BIN_FORMAT(format), symbol); @@ -226,7 +226,7 @@ static bool load_object_as_symbol_from_dwarf(GDwarfFormat *format, const dw_die symbol = g_binary_symbol_new(STP_OBJECT); g_binary_symbol_attach_routine(symbol, routine); - _g_binary_format_add_symbol(G_BIN_FORMAT(format), symbol, false); + g_binary_format_add_symbol(G_BIN_FORMAT(format), symbol); */ diff --git a/src/format/elf/helper_arm.c b/src/format/elf/helper_arm.c index f966296..cf86cfa 100644 --- a/src/format/elf/helper_arm.c +++ b/src/format/elf/helper_arm.c @@ -132,7 +132,7 @@ bool load_elf_arm_relocated_symbols(GElfFormat *format, const elf_shdr *relxxx, } if (symbol != NULL) - _g_binary_format_add_symbol(G_BIN_FORMAT(format), symbol, false); + g_binary_format_add_symbol(G_BIN_FORMAT(format), symbol); } diff --git a/src/format/elf/symbols.c b/src/format/elf/symbols.c index 91a4b71..29a8c3c 100644 --- a/src/format/elf/symbols.c +++ b/src/format/elf/symbols.c @@ -141,7 +141,6 @@ bool load_elf_symbols(GElfFormat *format) - g_binary_format_sort_symbols(G_BIN_FORMAT(format)); result &= load_all_elf_basic_entry_points(format); @@ -207,7 +206,7 @@ static void register_elf_entry_point(GElfFormat *format, virt_t vaddr, phys_t le symbol = g_binary_symbol_new(STP_ENTRY_POINT); g_binary_symbol_attach_routine(symbol, routine); - _g_binary_format_add_symbol(base, symbol, false); + g_binary_format_add_symbol(base, symbol); /* Comptabilisation pour le désassemblage brut */ g_binary_format_register_code_point(base, vaddr, true); @@ -670,7 +669,7 @@ static bool load_elf_internal_symbols(GElfFormat *format) } if (symbol != NULL) - _g_binary_format_add_symbol(base, symbol, false); + g_binary_format_add_symbol(base, symbol); } diff --git a/src/format/executable-int.c b/src/format/executable-int.c index e04a791..bc86eec 100644 --- a/src/format/executable-int.c +++ b/src/format/executable-int.c @@ -44,7 +44,7 @@ bool g_exe_format_without_virt_translate_offset_into_vmpa(const GExeFormat *form /** * On ne peut pas initialiser la partie virtuelle à VMPA_NO_VIRTUAL * car les manipulations au niveau des formats (par exemple, cf. la fonction - * _g_binary_format_add_symbol()) attendent des définitions complètes. + * g_binary_format_add_symbol()) attendent des définitions complètes. */ init_vmpa(pos, off, off); diff --git a/src/format/format-int.h b/src/format/format-int.h index 90b278e..f8a7204 100644 --- a/src/format/format-int.h +++ b/src/format/format-int.h @@ -60,11 +60,9 @@ struct _GBinFormat GBinSymbol **symbols; /* Liste des symboles trouvés */ size_t symbols_count; /* Quantité de ces symboles */ GRWLock syms_lock; /* Accès à la liste de symboles*/ - gint unsorted_symbols; /* Etat du tri des symboles */ GBinRoutine **routines; /* Liste des routines trouvées */ size_t routines_count; /* Nombre de ces routines */ - bool unsorted_routines; /* Etat du tri des routines */ const char **src_files; /* Nom des fichiers source */ size_t src_count; /* Taille de la liste */ diff --git a/src/format/format.c b/src/format/format.c index 88f59b8..0ae6107 100644 --- a/src/format/format.c +++ b/src/format/format.c @@ -37,6 +37,7 @@ #include "java/java.h" #include "pe/pe.h" #include "../arch/processor.h" +#include "../common/sort.h" #include "../decomp/expr/block.h" #include "../gui/panels/log.h" #include "../plugins/pglist.h" @@ -56,7 +57,7 @@ static void _g_binary_format_remove_symbol(GBinFormat *, size_t); static void g_binary_format_delete_duplicated_symbols(GBinFormat *); /* Recherche le symbole associé à une adresse. */ -static bool _g_binary_format_find_symbol(const GBinFormat *, const vmpa2t *, __compar_fn_t, GBinSymbol **); +static bool _g_binary_format_find_symbol(const GBinFormat *, const vmpa2t *, __compar_fn_t, size_t *, GBinSymbol **); @@ -97,9 +98,6 @@ static void g_binary_format_class_init(GBinFormatClass *klass) static void g_binary_format_init(GBinFormat *format) { g_rw_lock_init(&format->syms_lock); - g_atomic_int_set(&format->unsorted_symbols, false); - - format->unsorted_routines = false; } @@ -270,7 +268,6 @@ void g_binary_format_setup_disassembling_context(const GBinFormat *format, GProc * * * Paramètres : format = informations chargées à compléter. * * symbol = symbole à ajouter à la liste. * -* sort = fait état d'un obligation de tri final. * * * * Description : Ajoute un symbole à la collection du format binaire. * * * @@ -280,7 +277,7 @@ void g_binary_format_setup_disassembling_context(const GBinFormat *format, GProc * * ******************************************************************************/ -void _g_binary_format_add_symbol(GBinFormat *format, GBinSymbol *symbol, bool sort) +void g_binary_format_add_symbol(GBinFormat *format, GBinSymbol *symbol) { #ifndef NDEBUG const mrange_t *range; /* Couverture du symbole */ @@ -307,25 +304,17 @@ void _g_binary_format_add_symbol(GBinFormat *format, GBinSymbol *symbol, bool so g_rw_lock_writer_lock(&format->syms_lock); - format->symbols = (GBinSymbol **)realloc(format->symbols, - ++format->symbols_count * sizeof(GBinSymbol *)); - - format->symbols[format->symbols_count - 1] = symbol; - - g_atomic_int_set(&format->unsorted_symbols, true); + format->symbols = qinsert(format->symbols, &format->symbols_count, + sizeof(GBinSymbol *), (__compar_fn_t)g_binary_symbol_cmp, symbol); switch (g_binary_symbol_get_target_type(symbol)) { case STP_ROUTINE: case STP_ENTRY_POINT: - format->routines = (GBinRoutine **)realloc(format->routines, - ++format->routines_count * sizeof(GBinRoutine *)); - - format->routines[format->routines_count - 1] = g_binary_symbol_get_routine(symbol); - - format->unsorted_routines = true; - + format->routines = qinsert(format->routines, &format->routines_count, + sizeof(GBinRoutine *), (__compar_fn_t)g_binary_routine_compare, + g_binary_symbol_get_routine(symbol)); break; default: @@ -335,9 +324,6 @@ void _g_binary_format_add_symbol(GBinFormat *format, GBinSymbol *symbol, bool so g_rw_lock_writer_unlock(&format->syms_lock); - if (sort) - g_binary_format_sort_symbols(format); - } @@ -434,43 +420,6 @@ void g_binary_format_remove_symbol(GBinFormat *format, GBinSymbol *symbol) /****************************************************************************** * * -* Paramètres : format = informations chargées à arranger. * -* * -* Description : Réorganise si besoin est les symboles collectés. * -* * -* Retour : - * -* * -* Remarques : - * -* * -******************************************************************************/ - -void g_binary_format_sort_symbols(GBinFormat *format) -{ - if (g_atomic_int_compare_and_exchange(&format->unsorted_symbols, true, false)) - { - g_rw_lock_writer_lock(&format->syms_lock); - - qsort(format->symbols, format->symbols_count, - sizeof(GBinSymbol *), (__compar_fn_t)g_binary_symbol_cmp); - - if (format->unsorted_routines) - { - qsort(format->routines, format->routines_count, - sizeof(GBinRoutine *), (__compar_fn_t)g_binary_routine_compare); - - format->unsorted_routines = false; - - } - - g_rw_lock_writer_unlock(&format->syms_lock); - - } - -} - - -/****************************************************************************** -* * * Paramètres : format = informations chargées à corriger si besoin est. * * * * Description : Supprime les éventuels doublons au sein des symboles. * @@ -488,8 +437,6 @@ static void g_binary_format_delete_duplicated_symbols(GBinFormat *format) mrange_t last; /* Dernière localisation vue */ size_t index; /* Indice de suppression */ - g_binary_format_sort_symbols(G_BIN_FORMAT(format)); - g_rw_lock_writer_lock(&format->syms_lock); if (format->symbols_count > 1) @@ -684,6 +631,7 @@ bool g_binary_format_find_symbol_by_label(const GBinFormat *format, const char * * Paramètres : format = informations chargées à consulter. * * addr = adresse à cibler lors des recherches. * * fn = méthode de comparaison des symboles. * +* index = indice de l'éventuel symbole trouvé ou NULL. [OUT] * * symbol = éventuel symbole trouvé à déréfenrencer. [OUT] * * * * Description : Recherche le symbole associé à une adresse. * @@ -694,7 +642,7 @@ bool g_binary_format_find_symbol_by_label(const GBinFormat *format, const char * * * ******************************************************************************/ -static bool _g_binary_format_find_symbol(const GBinFormat *format, const vmpa2t *addr, __compar_fn_t fn, GBinSymbol **symbol) +static bool _g_binary_format_find_symbol(const GBinFormat *format, const vmpa2t *addr, __compar_fn_t fn, size_t *index, GBinSymbol **symbol) { bool result; /* Bilan à retourner */ void *found; /* Résultat de recherches */ @@ -703,14 +651,13 @@ static bool _g_binary_format_find_symbol(const GBinFormat *format, const vmpa2t *symbol = NULL; /* TODO : vérifier les doublons côtés appelants */ - g_rw_lock_reader_lock(&format->syms_lock); - found = bsearch(addr, format->symbols, format->symbols_count, sizeof(GBinSymbol *), fn); - g_rw_lock_reader_unlock(&format->syms_lock); - if (found != NULL) { + if (index != NULL) + *index = (GBinSymbol **)found - format->symbols; + *symbol = *(GBinSymbol **)found; g_object_ref(G_OBJECT(*symbol)); @@ -751,7 +698,11 @@ bool g_binary_format_find_symbol_at(const GBinFormat *format, const vmpa2t *addr } - result = _g_binary_format_find_symbol(format, addr, (__compar_fn_t)find_symbol, symbol); + g_rw_lock_reader_lock(&format->syms_lock); + + result = _g_binary_format_find_symbol(format, addr, (__compar_fn_t)find_symbol, NULL, symbol); + + g_rw_lock_reader_unlock(&format->syms_lock); return result; @@ -786,7 +737,11 @@ bool g_binary_format_find_symbol_for(const GBinFormat *format, const vmpa2t *add } - result = _g_binary_format_find_symbol(format, addr, (__compar_fn_t)find_symbol, symbol); + g_rw_lock_reader_lock(&format->syms_lock); + + result = _g_binary_format_find_symbol(format, addr, (__compar_fn_t)find_symbol, NULL, symbol); + + g_rw_lock_reader_unlock(&format->syms_lock); return result; @@ -810,32 +765,32 @@ bool g_binary_format_find_symbol_for(const GBinFormat *format, const vmpa2t *add bool g_binary_format_find_next_symbol_at(const GBinFormat *format, const vmpa2t *addr, GBinSymbol **symbol) { bool result; /* Bilan à retourner */ - size_t i; /* Boucle de parcours */ - const mrange_t *range; /* Espace mémoire parcouru */ + size_t index; /* Indice à considérer */ - result = false; - - *symbol = NULL; + int find_symbol(const vmpa2t *addr, const GBinSymbol **sym) + { + const mrange_t *range; /* Espace mémoire parcouru */ - g_rw_lock_reader_lock(&format->syms_lock); + range = g_binary_symbol_get_range(*sym); - /* TODO : ajouter un peu de dichotomie ici ! */ + return cmp_mrange_with_vmpa(range, addr); - for (i = 0; i < format->symbols_count && !result; i++) - { - range = g_binary_symbol_get_range(format->symbols[i]); + } - if (cmp_vmpa(get_mrange_addr(range), addr) == 0 && (i + 1) < format->symbols_count) - { - *symbol = format->symbols[i + 1]; - g_object_ref(G_OBJECT(*symbol)); + g_rw_lock_reader_lock(&format->syms_lock); - result = true; + result = _g_binary_format_find_symbol(format, addr, (__compar_fn_t)find_symbol, &index, symbol); - } + if (result && (index + 1) < format->symbols_count) + { + *symbol = format->symbols[index + 1]; + g_object_ref(G_OBJECT(*symbol)); } + else + result = false; + g_rw_lock_reader_unlock(&format->syms_lock); return result; diff --git a/src/format/format.h b/src/format/format.h index 4a56d47..915b62f 100644 --- a/src/format/format.h +++ b/src/format/format.h @@ -68,16 +68,11 @@ void g_binary_format_register_code_point(GBinFormat *, virt_t, bool); void g_binary_format_setup_disassembling_context(const GBinFormat *, GProcContext *); /* Ajoute un symbole à la collection du format binaire. */ -void _g_binary_format_add_symbol(GBinFormat *, GBinSymbol *, bool); - -#define g_binary_format_add_symbol(fmt, sym) _g_binary_format_add_symbol(fmt, sym, true) +void g_binary_format_add_symbol(GBinFormat *, GBinSymbol *); /* Retire un symbole de la collection du format binaire. */ void g_binary_format_remove_symbol(GBinFormat *, GBinSymbol *); -/* Réorganise si besoin est les symboles collectés. */ -void g_binary_format_sort_symbols(GBinFormat *); - /* Fournit la liste de tous les symboles détectés. */ GBinSymbol **g_binary_format_get_symbols(const GBinFormat *, size_t *); -- cgit v0.11.2-87-g4458