summaryrefslogtreecommitdiff
path: root/src/analysis
diff options
context:
space:
mode:
authorCyrille Bagard <nocbos@gmail.com>2023-12-22 09:01:05 (GMT)
committerCyrille Bagard <nocbos@gmail.com>2023-12-22 09:01:05 (GMT)
commitbbe5b85affc8456753da76a31649bb16ea480576 (patch)
treec84ec75bd14fd196c9507aea4814e03339509f71 /src/analysis
parent55d110939d76e7860b2a313f6b20651c63070179 (diff)
Refactor the ACISM backend a little bit.
Diffstat (limited to 'src/analysis')
-rw-r--r--src/analysis/scan/patterns/backends/acism-int.h63
-rw-r--r--src/analysis/scan/patterns/backends/acism.c63
2 files changed, 77 insertions, 49 deletions
diff --git a/src/analysis/scan/patterns/backends/acism-int.h b/src/analysis/scan/patterns/backends/acism-int.h
index 57c3c73..a8cfd59 100644
--- a/src/analysis/scan/patterns/backends/acism-int.h
+++ b/src/analysis/scan/patterns/backends/acism-int.h
@@ -36,7 +36,7 @@
-//#define __USE_BYTE_FREQ
+#define __USE_BYTE_FREQ
//#define __SORT_BEFORE_BITMASK
@@ -68,6 +68,8 @@ typedef uint16_t acism_code_t;
#define MIN_ACISM_CODE 0
#define MAX_ACISM_CODE 0xffff
+#define ROOT_STATE_INDEX 0
+
/* Noeud de l'arborescence brute */
typedef struct _acism_trie_node_t
{
@@ -91,35 +93,58 @@ typedef struct _acism_trie_node_t
} acism_trie_node_t;
+#if __LONG_WIDTH__ < 64
+
/* Cellule du tableau compressé final */
-typedef union _acism_state_t
+typedef struct _acism_state_t
{
- uint32_t raw; /* Valeur brute */
-
- struct
+ union
{
- union
+ /* Indice 0 */
+ struct
{
- /* Indice 0 */
- struct
- {
- unsigned int match : 1; /* Correspondance ici */
- unsigned int suffix : 1; /* Correspondance ailleurs */
- unsigned int unused : 4; /* Espace encore disponible */
- unsigned int atom_size : 3; /* Taille d'atome représenté */
- };
-
- /* Indice 1 et + */
- unsigned int code : 9; /* Position depuis la base */
-
+ unsigned int match : 1; /* Correspondance ici */
+ unsigned int unused : 4; /* Espace encore disponible */
+ unsigned int atom_size : 3; /* Taille d'atome représenté */
+ unsigned int suffix : 1; /* Correspondance ailleurs */
};
- unsigned int index : 23; /* Indice de saut */
+ /* Indice 1 et + */
+ unsigned int code : 9; /* Position depuis la base */
+
+ };
+
+ unsigned int index : 23; /* Indice de saut */
+
+} acism_state_t;
+
+#else
+/* Cellule du tableau compressé final */
+typedef union _acism_state_t
+{
+ /* Indice 0 */
+ struct
+ {
+ uint8_t match : 1; /* Correspondance ici */
+ uint8_t atom_size; /* Indice de saut */
+ uint8_t suffix : 1; /* Correspondance ailleurs */
+ };
+
+ /* Indice 1 et + */
+ uint32_t code; /* Position depuis la base */
+
+ /* Tous */
+ struct
+ {
+ uint32_t any; /* Saut de bits */
+ uint32_t index; /* Indice de saut */
};
} acism_state_t;
+#endif
+
/* Méthode de recherche basée sur l'algorithme Acism (instance) */
struct _GAcismBackend
{
diff --git a/src/analysis/scan/patterns/backends/acism.c b/src/analysis/scan/patterns/backends/acism.c
index 7c174d5..bceca09 100644
--- a/src/analysis/scan/patterns/backends/acism.c
+++ b/src/analysis/scan/patterns/backends/acism.c
@@ -475,7 +475,6 @@ static void g_acism_backend_define_codes(GAcismBackend *backend)
/* 0 == racine */
backend->codes_count++;
-#if 0
for (i = 0, iter = backend->frequencies; i < 256; i++, iter++)
{
if (iter->frequency == 0)
@@ -484,10 +483,6 @@ static void g_acism_backend_define_codes(GAcismBackend *backend)
backend->codes_for_bytes[iter->rank] = backend->codes_count++;
}
-#else
- for (i = 0; i < 256; i++)
- backend->codes_for_bytes[i] = backend->codes_count++;
-#endif
}
@@ -1051,7 +1046,7 @@ static void g_acism_backend_build_interleave_array(GAcismBackend *backend)
if (node->matched_atom > 0)
{
base[0].match = 1;
- base[0].atom_size = backend->sources[node->matched_atom - 1].len;
+ base[0].atom_size = backend->sources[node->matched_atom - 1].len - 1;
backend->pids[node->state_index] = backend->sources[node->matched_atom - 1].pid;
@@ -1156,15 +1151,18 @@ static void g_acism_backend_run_scan(const GAcismBackend *backend, GScanContext
vmpa2t pos; /* Point de départ ciblé */
const bin_t *data; /* Données à analyser */
#ifdef __USE_BYTE_FREQ
- acism_code_t *codes_for_bytes;
+ acism_code_t codes_for_bytes[256]; /* Copie des codes d'accès */
#endif
acism_state_t *root; /* Racine de l'arborescence */
- acism_state_t *state; /* Tête de lecture courante */
+ patid_t *pids; /* Identifiants de motifs */
+ unsigned int state; /* Tête de lecture courante */
phys_t i; /* Boucle de parcours #1 */
acism_code_t code; /* Code du caractère courant */
- acism_state_t *next; /* Prochaine tête à valider */
- acism_state_t *iter; /* Boucle de parcours #2 */
- acism_state_t *test; /* Test de validité alternative*/
+ unsigned int next; /* Prochaine tête à valider */
+ acism_state_t next_state; /* Prochaine tête à valider */
+ unsigned int iter; /* Boucle de parcours #2 */
+ acism_state_t test_state; /* Test de validité alternative*/
+ acism_state_t sub_state; /* Test de validité alternative*/
content = g_scan_context_get_content(context);
@@ -1176,13 +1174,15 @@ static void g_acism_backend_run_scan(const GAcismBackend *backend, GScanContext
/* Suivi via l'arborescence aplatie */
#ifdef __USE_BYTE_FREQ
- codes_for_bytes = backend->codes_for_bytes;
+ memcpy(&codes_for_bytes, backend->codes_for_bytes, 256 * sizeof(acism_code_t));
#endif
root = backend->states;
if (root == NULL) goto done;
- state = root;
+ pids = backend->pids;
+
+ state = ROOT_STATE_INDEX;
for (i = 0; i < dlen; i++)
{
@@ -1198,12 +1198,15 @@ static void g_acism_backend_run_scan(const GAcismBackend *backend, GScanContext
next = state + code;
- if (next->code == code)
- next = root + next->index;
+ if (root[next].code == code)
+ {
+ next = root[next].index;
+ next_state = root[next];
+ }
- else if (state != root)
+ else if (state != ROOT_STATE_INDEX)
{
- state = root + state->index;
+ state = root[state].index;
goto retry;
}
@@ -1212,35 +1215,35 @@ static void g_acism_backend_run_scan(const GAcismBackend *backend, GScanContext
/* Remontée d'éventuels résultats */
- if (next->match)
+ if (next_state.match)
{
g_scan_context_register_atom_match(context,
- backend->pids[next - root],
- i + 1 - next->atom_size);
+ pids[next],
+ i - next_state.atom_size);
- if (next->suffix)
+ if (next_state.suffix)
{
- for (iter = root + state->index; ; iter = root + iter->index)
+ for (iter = root[state].index; ; iter = root[iter].index)
{
- test = iter + code;
+ test_state = root[iter + code];
- if (test->code == code)
+ if (test_state.code == code)
{
- test = root + test->index;
+ sub_state = root[test_state.index];
- if (test->match)
+ if (sub_state.match)
{
- assert(test->atom_size < next->atom_size);
+ assert(sub_state.atom_size < next_state.atom_size);
g_scan_context_register_atom_match(context,
- backend->pids[test - root],
- i + 1 - test->atom_size);
+ pids[test_state.index],
+ i - sub_state.atom_size);
}
}
- if (iter == root)
+ if (iter == ROOT_STATE_INDEX)
break;
}