From bbe5b85affc8456753da76a31649bb16ea480576 Mon Sep 17 00:00:00 2001 From: Cyrille Bagard <nocbos@gmail.com> Date: Fri, 22 Dec 2023 10:01:05 +0100 Subject: Refactor the ACISM backend a little bit. --- src/analysis/scan/patterns/backends/acism-int.h | 63 +++++++++++++++++-------- src/analysis/scan/patterns/backends/acism.c | 63 +++++++++++++------------ 2 files changed, 77 insertions(+), 49 deletions(-) diff --git a/src/analysis/scan/patterns/backends/acism-int.h b/src/analysis/scan/patterns/backends/acism-int.h index 57c3c73..a8cfd59 100644 --- a/src/analysis/scan/patterns/backends/acism-int.h +++ b/src/analysis/scan/patterns/backends/acism-int.h @@ -36,7 +36,7 @@ -//#define __USE_BYTE_FREQ +#define __USE_BYTE_FREQ //#define __SORT_BEFORE_BITMASK @@ -68,6 +68,8 @@ typedef uint16_t acism_code_t; #define MIN_ACISM_CODE 0 #define MAX_ACISM_CODE 0xffff +#define ROOT_STATE_INDEX 0 + /* Noeud de l'arborescence brute */ typedef struct _acism_trie_node_t { @@ -91,35 +93,58 @@ typedef struct _acism_trie_node_t } acism_trie_node_t; +#if __LONG_WIDTH__ < 64 + /* Cellule du tableau compressé final */ -typedef union _acism_state_t +typedef struct _acism_state_t { - uint32_t raw; /* Valeur brute */ - - struct + union { - union + /* Indice 0 */ + struct { - /* Indice 0 */ - struct - { - unsigned int match : 1; /* Correspondance ici */ - unsigned int suffix : 1; /* Correspondance ailleurs */ - unsigned int unused : 4; /* Espace encore disponible */ - unsigned int atom_size : 3; /* Taille d'atome représenté */ - }; - - /* Indice 1 et + */ - unsigned int code : 9; /* Position depuis la base */ - + unsigned int match : 1; /* Correspondance ici */ + unsigned int unused : 4; /* Espace encore disponible */ + unsigned int atom_size : 3; /* Taille d'atome représenté */ + unsigned int suffix : 1; /* Correspondance ailleurs */ }; - unsigned int index : 23; /* Indice de saut */ + /* Indice 1 et + */ + unsigned int code : 9; /* Position depuis la base */ + + }; + + unsigned int index : 23; /* Indice de saut */ + +} acism_state_t; + +#else +/* Cellule du tableau compressé final */ +typedef union _acism_state_t +{ + /* Indice 0 */ + struct + { + uint8_t match : 1; /* Correspondance ici */ + uint8_t atom_size; /* Indice de saut */ + uint8_t suffix : 1; /* Correspondance ailleurs */ + }; + + /* Indice 1 et + */ + uint32_t code; /* Position depuis la base */ + + /* Tous */ + struct + { + uint32_t any; /* Saut de bits */ + uint32_t index; /* Indice de saut */ }; } acism_state_t; +#endif + /* Méthode de recherche basée sur l'algorithme Acism (instance) */ struct _GAcismBackend { diff --git a/src/analysis/scan/patterns/backends/acism.c b/src/analysis/scan/patterns/backends/acism.c index 7c174d5..bceca09 100644 --- a/src/analysis/scan/patterns/backends/acism.c +++ b/src/analysis/scan/patterns/backends/acism.c @@ -475,7 +475,6 @@ static void g_acism_backend_define_codes(GAcismBackend *backend) /* 0 == racine */ backend->codes_count++; -#if 0 for (i = 0, iter = backend->frequencies; i < 256; i++, iter++) { if (iter->frequency == 0) @@ -484,10 +483,6 @@ static void g_acism_backend_define_codes(GAcismBackend *backend) backend->codes_for_bytes[iter->rank] = backend->codes_count++; } -#else - for (i = 0; i < 256; i++) - backend->codes_for_bytes[i] = backend->codes_count++; -#endif } @@ -1051,7 +1046,7 @@ static void g_acism_backend_build_interleave_array(GAcismBackend *backend) if (node->matched_atom > 0) { base[0].match = 1; - base[0].atom_size = backend->sources[node->matched_atom - 1].len; + base[0].atom_size = backend->sources[node->matched_atom - 1].len - 1; backend->pids[node->state_index] = backend->sources[node->matched_atom - 1].pid; @@ -1156,15 +1151,18 @@ static void g_acism_backend_run_scan(const GAcismBackend *backend, GScanContext vmpa2t pos; /* Point de départ ciblé */ const bin_t *data; /* Données à analyser */ #ifdef __USE_BYTE_FREQ - acism_code_t *codes_for_bytes; + acism_code_t codes_for_bytes[256]; /* Copie des codes d'accès */ #endif acism_state_t *root; /* Racine de l'arborescence */ - acism_state_t *state; /* Tête de lecture courante */ + patid_t *pids; /* Identifiants de motifs */ + unsigned int state; /* Tête de lecture courante */ phys_t i; /* Boucle de parcours #1 */ acism_code_t code; /* Code du caractère courant */ - acism_state_t *next; /* Prochaine tête à valider */ - acism_state_t *iter; /* Boucle de parcours #2 */ - acism_state_t *test; /* Test de validité alternative*/ + unsigned int next; /* Prochaine tête à valider */ + acism_state_t next_state; /* Prochaine tête à valider */ + unsigned int iter; /* Boucle de parcours #2 */ + acism_state_t test_state; /* Test de validité alternative*/ + acism_state_t sub_state; /* Test de validité alternative*/ content = g_scan_context_get_content(context); @@ -1176,13 +1174,15 @@ static void g_acism_backend_run_scan(const GAcismBackend *backend, GScanContext /* Suivi via l'arborescence aplatie */ #ifdef __USE_BYTE_FREQ - codes_for_bytes = backend->codes_for_bytes; + memcpy(&codes_for_bytes, backend->codes_for_bytes, 256 * sizeof(acism_code_t)); #endif root = backend->states; if (root == NULL) goto done; - state = root; + pids = backend->pids; + + state = ROOT_STATE_INDEX; for (i = 0; i < dlen; i++) { @@ -1198,12 +1198,15 @@ static void g_acism_backend_run_scan(const GAcismBackend *backend, GScanContext next = state + code; - if (next->code == code) - next = root + next->index; + if (root[next].code == code) + { + next = root[next].index; + next_state = root[next]; + } - else if (state != root) + else if (state != ROOT_STATE_INDEX) { - state = root + state->index; + state = root[state].index; goto retry; } @@ -1212,35 +1215,35 @@ static void g_acism_backend_run_scan(const GAcismBackend *backend, GScanContext /* Remontée d'éventuels résultats */ - if (next->match) + if (next_state.match) { g_scan_context_register_atom_match(context, - backend->pids[next - root], - i + 1 - next->atom_size); + pids[next], + i - next_state.atom_size); - if (next->suffix) + if (next_state.suffix) { - for (iter = root + state->index; ; iter = root + iter->index) + for (iter = root[state].index; ; iter = root[iter].index) { - test = iter + code; + test_state = root[iter + code]; - if (test->code == code) + if (test_state.code == code) { - test = root + test->index; + sub_state = root[test_state.index]; - if (test->match) + if (sub_state.match) { - assert(test->atom_size < next->atom_size); + assert(sub_state.atom_size < next_state.atom_size); g_scan_context_register_atom_match(context, - backend->pids[test - root], - i + 1 - test->atom_size); + pids[test_state.index], + i - sub_state.atom_size); } } - if (iter == root) + if (iter == ROOT_STATE_INDEX) break; } -- cgit v0.11.2-87-g4458