summaryrefslogtreecommitdiff
path: root/src/analysis/scan
diff options
context:
space:
mode:
authorCyrille Bagard <nocbos@gmail.com>2024-02-08 22:08:58 (GMT)
committerCyrille Bagard <nocbos@gmail.com>2024-02-08 22:08:58 (GMT)
commita32ea5bc193d8ea2f3349742cf816f8233354c8d (patch)
tree43bcd22411f81b76a76a53fe5eaabc36f2d6f93d /src/analysis/scan
parentcb97f097d9e7b9bc338fdec8689b45ab40c905c4 (diff)
Discard bit fields to find the bast atoms.
Diffstat (limited to 'src/analysis/scan')
-rw-r--r--src/analysis/scan/patterns/tokens/atom.c120
-rw-r--r--src/analysis/scan/patterns/tokens/atom.h8
2 files changed, 95 insertions, 33 deletions
diff --git a/src/analysis/scan/patterns/tokens/atom.c b/src/analysis/scan/patterns/tokens/atom.c
index 580ad30..f59c81c 100644
--- a/src/analysis/scan/patterns/tokens/atom.c
+++ b/src/analysis/scan/patterns/tokens/atom.c
@@ -44,7 +44,8 @@
/******************************************************************************
* *
* Paramètres : ch = octet dont la valeur est à analyser. *
-* seen = suivi des octets déjà rencontrés. [OUT] *
+* seen = suivi des octets déjà rencontrés. [OUT] *
+* uniq = volume d'octets originaux à actualiser. [OUT] *
* letters = nombre de lettres rencontrées. [OUT] *
* *
* Description : Note l'intêret de rechercher un octet particulier. *
@@ -55,7 +56,7 @@
* *
******************************************************************************/
-int rate_byte_quality(bin_t ch, bitfield_t *seen, size_t *letters)
+int rate_byte_quality(bin_t ch, uint8_t *seen, size_t *uniq, size_t *letters)
{
int result; /* Note à retourner */
@@ -86,7 +87,8 @@ int rate_byte_quality(bin_t ch, bitfield_t *seen, size_t *letters)
}
- set_in_bit_field(seen, ch, 1);
+ if (seen[ch]++ == 0)
+ (*uniq)++;
return result;
@@ -95,8 +97,67 @@ int rate_byte_quality(bin_t ch, bitfield_t *seen, size_t *letters)
/******************************************************************************
* *
-* Paramètres : seen = suivi des octets déjà rencontrés. *
-* max = nombre d'octets considérés à la base. *
+* Paramètres : ch = octet dont la valeur est à analyser. *
+* seen = suivi des octets déjà rencontrés. [OUT] *
+* uniq = volume d'octets originaux à actualiser. [OUT] *
+* letters = nombre de lettres rencontrées. [OUT] *
+* *
+* Description : Annihile l'intêret de rechercher un octet particulier. *
+* *
+* Retour : Note positive ou négative. *
+* *
+* Remarques : - *
+* *
+******************************************************************************/
+
+int unrate_byte_quality(bin_t ch, uint8_t *seen, size_t *uniq, size_t *letters)
+{
+ int result; /* Note à retourner */
+
+ switch (ch)
+ {
+ case 0x00:
+ case 0x20:
+ case 0x90:
+ case 0xcc:
+ case 0xff:
+ result = 12;
+ break;
+
+ case 'A' ... 'Z':
+ case 'a' ... 'z':
+ if (letters == NULL)
+ result = 20;
+ else
+ {
+ result = 18;
+ assert(*letters > 0);
+ (*letters)--;
+ }
+ break;
+
+ default:
+ result = 20;
+ break;
+
+ }
+
+ if (--seen[ch] == 0)
+ {
+ assert(*uniq > 0);
+ (*uniq)--;
+ }
+
+ return result;
+
+}
+
+
+/******************************************************************************
+* *
+* Paramètres : rating = note d'évaluation courante. *
+* uniq = volume d'octets originaux relevés. *
+* max = nombre d'octets considérés à la base. *
* *
* Description : Termine la notation d'un ensemble d'octets. *
* *
@@ -106,21 +167,14 @@ int rate_byte_quality(bin_t ch, bitfield_t *seen, size_t *letters)
* *
******************************************************************************/
-int finish_quality_rating(const bitfield_t *seen, size_t max)
+int finish_quality_rating(int rating, size_t uniq, size_t max)
{
int result; /* Note à retourner */
- size_t uniq; /* Quantié d'octets uniques */
bool bad; /* Indice de mauvaise qualité */
- uniq = popcount_for_bit_field(seen);
-
if (uniq == 1)
{
- bad = test_in_bit_field(seen, 0x00)
- || test_in_bit_field(seen, 0x20)
- || test_in_bit_field(seen, 0x90)
- || test_in_bit_field(seen, 0xcc)
- || test_in_bit_field(seen, 0xff);
+ bad = (rating % 12) == 0;
result = (bad ? -10 * max : 2);
@@ -129,6 +183,8 @@ int finish_quality_rating(const bitfield_t *seen, size_t max)
else
result = uniq * 2;
+ result += rating;
+
return result;
}
@@ -156,11 +212,14 @@ void find_best_atom(const sized_binary_t *raw, size_t maxsize, tracked_scan_atom
size_t best_letters; /* Mémorisation de décompte */
size_t *ptr_letters; /* Pointeur vers le décompte */
int best_rating; /* Meilleur notation obtenue */
- bitfield_t *seen; /* Mémorise les octets déjà vus*/
+ uint8_t seen[256]; /* Mémorisation des passages */
+ size_t uniq; /* Nombre d'octets originaux */
size_t max_loop; /* Limitation des itérations */
+ const bin_t *last; /* Dernier caractère étudié */
size_t k; /* Boucle de parcours #2 */
size_t local_letters; /* Décompte courant des lettres*/
int local_rating; /* Notation courante */
+ const bin_t *first; /* Premier caractère étudié */
/* Si la chaîne fournie est plus petite que la taille d'un atome... */
if (raw->len <= maxsize)
@@ -201,12 +260,15 @@ void find_best_atom(const sized_binary_t *raw, size_t maxsize, tracked_scan_atom
best_letters = 0;
best_rating = 0;
- seen = create_bit_field(256, false);
+ memset(seen, 0, sizeof(seen));
+ uniq = 0;
+
+ last = raw->static_bin_data;
for (k = 0; k < maxsize; k++)
- best_rating += rate_byte_quality(raw->data[k], seen, ptr_letters);
+ best_rating += rate_byte_quality(*last++, seen, &uniq, ptr_letters);
- best_rating += finish_quality_rating(seen, maxsize);
+ best_rating = finish_quality_rating(best_rating, uniq, maxsize);
/* Parcours du reste du contenu */
@@ -214,21 +276,21 @@ void find_best_atom(const sized_binary_t *raw, size_t maxsize, tracked_scan_atom
ptr_letters = (letters != NULL ? &local_letters : NULL);
- for (i = 1; i < max_loop; i++)
- {
- local_letters = 0;
- local_rating = 0;
+ local_letters = best_letters;
+ local_rating = best_rating;
- reset_all_in_bit_field(seen);
+ first = raw->static_bin_data;
- for (k = 0; k < maxsize; k++)
- local_rating += rate_byte_quality(raw->data[i + k], seen, ptr_letters);
+ for (i = 0; i < max_loop; i++)
+ {
+ local_rating += rate_byte_quality(*last++, seen, &uniq, ptr_letters);
+ local_rating -= rate_byte_quality(*first++, seen, &uniq, ptr_letters);
- local_rating += finish_quality_rating(seen, maxsize);
+ local_rating = finish_quality_rating(local_rating, uniq, maxsize);
if (local_rating > best_rating)
{
- atom->pos = i;
+ atom->pos = maxsize + i;
best_letters = local_letters;
best_rating = local_rating;
@@ -239,8 +301,6 @@ void find_best_atom(const sized_binary_t *raw, size_t maxsize, tracked_scan_atom
/* Conclusion */
- delete_bit_field(seen);
-
atom->rem = raw->len - atom->pos - maxsize;
atom->fast_check = false;
@@ -447,7 +507,7 @@ bool enroll_prepared_atom(const sized_binary_t *raw, GEngineBackend *backend, tr
bool result; /* Statut à retourner */
const bin_t *data; /* Données à rechercher */
- data = raw->data + atom->pos;
+ data = raw->static_bin_data + atom->pos;
result = g_engine_backend_enroll_plain_pattern(backend, data, atom->len, atom->tmp_id);
diff --git a/src/analysis/scan/patterns/tokens/atom.h b/src/analysis/scan/patterns/tokens/atom.h
index 1d912d7..1ef8f40 100644
--- a/src/analysis/scan/patterns/tokens/atom.h
+++ b/src/analysis/scan/patterns/tokens/atom.h
@@ -30,7 +30,6 @@
#include "../backend.h"
#include "../../../../arch/vmpa.h"
-#include "../../../../common/bits.h"
#include "../../../../common/szstr.h"
@@ -51,10 +50,13 @@ typedef struct _tracked_scan_atom_t
} tracked_scan_atom_t;
/* Note l'intêret de rechercher un octet particulier. */
-int rate_byte_quality(bin_t, bitfield_t *, size_t *);
+int rate_byte_quality(bin_t, uint8_t *, size_t *, size_t *);
+
+/* Annihile l'intêret de rechercher un octet particulier. */
+int unrate_byte_quality(bin_t, uint8_t *, size_t *, size_t *);
/* Termine la notation d'un ensemble d'octets. */
-int finish_quality_rating(const bitfield_t *, size_t);
+int finish_quality_rating(int, size_t, size_t);
/* Détermine la portion idéale de recherche. */
void find_best_atom(const sized_binary_t *, size_t , tracked_scan_atom_t *, size_t *);