From bbe5b85affc8456753da76a31649bb16ea480576 Mon Sep 17 00:00:00 2001
From: Cyrille Bagard <nocbos@gmail.com>
Date: Fri, 22 Dec 2023 10:01:05 +0100
Subject: Refactor the ACISM backend a little bit.

---
 src/analysis/scan/patterns/backends/acism-int.h | 63 +++++++++++++++++--------
 src/analysis/scan/patterns/backends/acism.c     | 63 +++++++++++++------------
 2 files changed, 77 insertions(+), 49 deletions(-)

diff --git a/src/analysis/scan/patterns/backends/acism-int.h b/src/analysis/scan/patterns/backends/acism-int.h
index 57c3c73..a8cfd59 100644
--- a/src/analysis/scan/patterns/backends/acism-int.h
+++ b/src/analysis/scan/patterns/backends/acism-int.h
@@ -36,7 +36,7 @@
 
 
 
-//#define __USE_BYTE_FREQ
+#define __USE_BYTE_FREQ
 //#define __SORT_BEFORE_BITMASK
 
 
@@ -68,6 +68,8 @@ typedef uint16_t acism_code_t;
 #define MIN_ACISM_CODE 0
 #define MAX_ACISM_CODE 0xffff
 
+#define ROOT_STATE_INDEX 0
+
 /* Noeud de l'arborescence brute */
 typedef struct _acism_trie_node_t
 {
@@ -91,35 +93,58 @@ typedef struct _acism_trie_node_t
 
 } acism_trie_node_t;
 
+#if __LONG_WIDTH__ < 64
+
 /* Cellule du tableau compressé final */
-typedef union _acism_state_t
+typedef struct _acism_state_t
 {
-    uint32_t raw;                           /* Valeur brute                */
-
-    struct
+    union
     {
-        union
+        /* Indice 0 */
+        struct
         {
-            /* Indice 0 */
-            struct
-            {
-                unsigned int match : 1;     /* Correspondance ici          */
-                unsigned int suffix : 1;    /* Correspondance ailleurs     */
-                unsigned int unused : 4;    /* Espace encore disponible    */
-                unsigned int atom_size : 3; /* Taille d'atome représenté   */
-            };
-
-            /* Indice 1 et + */
-            unsigned int code : 9;          /* Position depuis la base     */
-
+            unsigned int match : 1;         /* Correspondance ici          */
+            unsigned int unused : 4;        /* Espace encore disponible    */
+            unsigned int atom_size : 3;     /* Taille d'atome représenté   */
+            unsigned int suffix : 1;        /* Correspondance ailleurs     */
         };
 
-        unsigned int index : 23;            /* Indice de saut              */
+        /* Indice 1 et + */
+        unsigned int code : 9;              /* Position depuis la base     */
+
+    };
+
+    unsigned int index : 23;                /* Indice de saut              */
+
+} acism_state_t;
+
+#else
 
+/* Cellule du tableau compressé final */
+typedef union _acism_state_t
+{
+    /* Indice 0 */
+    struct
+    {
+        uint8_t match : 1;                  /* Correspondance ici          */
+        uint8_t atom_size;                  /* Indice de saut              */
+        uint8_t suffix : 1;                 /* Correspondance ailleurs     */
+    };
+
+    /* Indice 1 et + */
+    uint32_t code;                          /* Position depuis la base     */
+
+    /* Tous */
+    struct
+    {
+        uint32_t any;                       /* Saut de bits                */
+        uint32_t index;                     /* Indice de saut              */
     };
 
 } acism_state_t;
 
+#endif
+
 /* Méthode de recherche basée sur l'algorithme Acism (instance) */
 struct _GAcismBackend
 {
diff --git a/src/analysis/scan/patterns/backends/acism.c b/src/analysis/scan/patterns/backends/acism.c
index 7c174d5..bceca09 100644
--- a/src/analysis/scan/patterns/backends/acism.c
+++ b/src/analysis/scan/patterns/backends/acism.c
@@ -475,7 +475,6 @@ static void g_acism_backend_define_codes(GAcismBackend *backend)
     /* 0 == racine */
     backend->codes_count++;
 
-#if 0
     for (i = 0, iter = backend->frequencies; i < 256; i++, iter++)
     {
         if (iter->frequency == 0)
@@ -484,10 +483,6 @@ static void g_acism_backend_define_codes(GAcismBackend *backend)
         backend->codes_for_bytes[iter->rank] = backend->codes_count++;
 
     }
-#else
-    for (i = 0; i < 256; i++)
-        backend->codes_for_bytes[i] = backend->codes_count++;
-#endif
 
 }
 
@@ -1051,7 +1046,7 @@ static void g_acism_backend_build_interleave_array(GAcismBackend *backend)
         if (node->matched_atom > 0)
         {
             base[0].match = 1;
-            base[0].atom_size = backend->sources[node->matched_atom - 1].len;
+            base[0].atom_size = backend->sources[node->matched_atom - 1].len - 1;
 
             backend->pids[node->state_index] = backend->sources[node->matched_atom - 1].pid;
 
@@ -1156,15 +1151,18 @@ static void g_acism_backend_run_scan(const GAcismBackend *backend, GScanContext
     vmpa2t pos;                             /* Point de départ ciblé       */
     const bin_t *data;                      /* Données à analyser          */
 #ifdef __USE_BYTE_FREQ
-    acism_code_t *codes_for_bytes;
+    acism_code_t codes_for_bytes[256];      /* Copie des codes d'accès     */
 #endif
     acism_state_t *root;                    /* Racine de l'arborescence    */
-    acism_state_t *state;                   /* Tête de lecture courante    */
+    patid_t *pids;                          /* Identifiants de motifs      */
+    unsigned int state;                     /* Tête de lecture courante    */
     phys_t i;                               /* Boucle de parcours #1       */
     acism_code_t code;                      /* Code du caractère courant   */
-    acism_state_t *next;                    /* Prochaine tête à valider    */
-    acism_state_t *iter;                    /* Boucle de parcours #2       */
-    acism_state_t *test;                    /* Test de validité alternative*/
+    unsigned int next;                      /* Prochaine tête à valider    */
+    acism_state_t next_state;               /* Prochaine tête à valider    */
+    unsigned int iter;                      /* Boucle de parcours #2       */
+    acism_state_t test_state;               /* Test de validité alternative*/
+    acism_state_t sub_state;                /* Test de validité alternative*/
 
     content = g_scan_context_get_content(context);
 
@@ -1176,13 +1174,15 @@ static void g_acism_backend_run_scan(const GAcismBackend *backend, GScanContext
     /* Suivi via l'arborescence aplatie */
 
 #ifdef __USE_BYTE_FREQ
-    codes_for_bytes = backend->codes_for_bytes;
+    memcpy(&codes_for_bytes, backend->codes_for_bytes, 256 * sizeof(acism_code_t));
 #endif
 
     root = backend->states;
     if (root == NULL) goto done;
 
-    state = root;
+    pids = backend->pids;
+
+    state = ROOT_STATE_INDEX;
 
     for (i = 0; i < dlen; i++)
     {
@@ -1198,12 +1198,15 @@ static void g_acism_backend_run_scan(const GAcismBackend *backend, GScanContext
 
         next = state + code;
 
-        if (next->code == code)
-            next = root + next->index;
+        if (root[next].code == code)
+        {
+            next = root[next].index;
+            next_state = root[next];
+        }
 
-        else if (state != root)
+        else if (state != ROOT_STATE_INDEX)
         {
-            state = root + state->index;
+            state = root[state].index;
             goto retry;
         }
 
@@ -1212,35 +1215,35 @@ static void g_acism_backend_run_scan(const GAcismBackend *backend, GScanContext
 
         /* Remontée d'éventuels résultats */
 
-        if (next->match)
+        if (next_state.match)
         {
             g_scan_context_register_atom_match(context,
-                                               backend->pids[next - root],
-                                               i + 1 - next->atom_size);
+                                               pids[next],
+                                               i - next_state.atom_size);
 
-            if (next->suffix)
+            if (next_state.suffix)
             {
-                for (iter = root + state->index; ; iter = root + iter->index)
+                for (iter = root[state].index; ; iter = root[iter].index)
                 {
-                    test = iter + code;
+                    test_state = root[iter + code];
 
-                    if (test->code == code)
+                    if (test_state.code == code)
                     {
-                        test = root + test->index;
+                        sub_state = root[test_state.index];
 
-                        if (test->match)
+                        if (sub_state.match)
                         {
-                            assert(test->atom_size < next->atom_size);
+                            assert(sub_state.atom_size < next_state.atom_size);
 
                             g_scan_context_register_atom_match(context,
-                                                               backend->pids[test - root],
-                                                               i + 1 - test->atom_size);
+                                                               pids[test_state.index],
+                                                               i - sub_state.atom_size);
 
                         }
 
                     }
 
-                    if (iter == root)
+                    if (iter == ROOT_STATE_INDEX)
                         break;
 
                 }
-- 
cgit v0.11.2-87-g4458