From 55faf115708f8448c1dbbb1798ac115e8021e45f Mon Sep 17 00:00:00 2001
From: Cyrille Bagard <nocbos@gmail.com>
Date: Mon, 25 Oct 2021 00:13:40 +0200
Subject: Use the loaded content class hints in the DB archives.

---
 plugins/pychrysalide/analysis/db/analyst.c | 17 ++++++++---
 src/analysis/binary.c                      |  4 +--
 src/analysis/db/analyst.c                  | 48 ++++++++++++++++++++++--------
 src/analysis/db/analyst.h                  |  3 +-
 src/analysis/db/cdb.c                      | 27 +++++++++++++----
 src/analysis/db/cdb.h                      |  6 ++--
 src/analysis/db/server.c                   | 32 +++++++++++++++-----
 7 files changed, 102 insertions(+), 35 deletions(-)

diff --git a/plugins/pychrysalide/analysis/db/analyst.c b/plugins/pychrysalide/analysis/db/analyst.c
index bb9af30..289db31 100644
--- a/plugins/pychrysalide/analysis/db/analyst.c
+++ b/plugins/pychrysalide/analysis/db/analyst.c
@@ -37,6 +37,7 @@
 #include "client.h"
 #include "collection.h"
 #include "../content.h"
+#include "../loaded.h"
 #include "../../access.h"
 #include "../../helpers.h"
 #include "../../struct.h"
@@ -95,7 +96,9 @@ static PyObject *py_analyst_client_get_current_snapshot(PyObject *, void *);
 static PyObject *py_analyst_client_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
 {
     PyObject *result;                       /* Instance à retourner        */
+    GLoadedContent *loaded;                 /* Contenu local déjà chargé   */
     const char *hash;                       /* Empreinte du binaire visé   */
+    const char *class;                      /* Nature du contenu analysé   */
     PyObject *list;                         /* Liste Python de collections */
     int ret;                                /* Bilan de lecture des args.  */
     Py_ssize_t length;                      /* Nombre d'éléments collectés */
@@ -113,11 +116,15 @@ static PyObject *py_analyst_client_new(PyTypeObject *type, PyObject *args, PyObj
     "\n"                                                                                \
     "Instances can be created using the following constructor:\n"                       \
     "\n"                                                                                \
-    "    AnalystClient(hash, list)"                                                     \
+    "    AnalystClient(hash, class, list, loaded=None)"                                 \
     "\n"                                                                                \
-    "Where hash is a SHA256 fingerprint of the studied binary and list is a list of"    \
+    "Where *hash* is a SHA256 fingerprint of the studied binary, *class* refers to"     \
+    " the nature description of the loaded content (as provided from"                   \
+    " pychrysalide.analysis.LoadedContent.content_class), *list* is a list of"          \
     " pychrysalide.analysis.db.DbCollection instances ; this kind of list can be"       \
     " retrived with the pychrysalide.analysis.LoadedBinary.collections attribute."      \
+    " The *loaded* object is an optional local already loaded content which has to"     \
+    " be a pychrysalide.analysis.LoadedContent instance or *None*."                     \
     "\n"                                                                                \
     "AnalystClient instances emit the following signals:\n"                             \
     "* 'snapshots-updated'\n"                                                           \
@@ -131,7 +138,9 @@ static PyObject *py_analyst_client_new(PyTypeObject *type, PyObject *args, PyObj
     "    Handlers are expected to have only one argument: the client managing the"      \
     "    snapshots."
 
-    ret = PyArg_ParseTuple(args, "sO", &hash, &list);
+    loaded = NULL;
+
+    ret = PyArg_ParseTuple(args, "ssO|O&", &hash, &class, &list, convert_to_loaded_content, &loaded);
     if (!ret) return NULL;
 
     if (!PySequence_Check(list))
@@ -164,7 +173,7 @@ static PyObject *py_analyst_client_new(PyTypeObject *type, PyObject *args, PyObj
 
     }
 
-    client = g_analyst_client_new(hash, collections);
+    client = g_analyst_client_new(hash, class, collections, loaded);
 
     if (client != NULL)
     {
diff --git a/src/analysis/binary.c b/src/analysis/binary.c
index 5d604be..9c3b3b9 100644
--- a/src/analysis/binary.c
+++ b/src/analysis/binary.c
@@ -609,7 +609,7 @@ static bool g_loaded_binary_connect_internal(GLoadedBinary *binary)
 
     /* Tentative de connexion */
 
-    binary->client = g_analyst_client_new(checksum, binary->collections);
+    binary->client = g_analyst_client_new(checksum, "NULL", binary->collections, NULL);
 
     result = g_hub_client_start_internal(G_HUB_CLIENT(binary->client));
 
@@ -646,7 +646,7 @@ static bool g_loaded_binary_connect_remote(GLoadedBinary *binary)
 
     /* Tentative de connexion */
 
-    binary->client = g_analyst_client_new(checksum, binary->collections);
+    binary->client = g_analyst_client_new(checksum, "NULL", binary->collections, NULL);
 
     result = g_hub_client_start_remote(G_HUB_CLIENT(binary->client),
                                        binary->remote_host, binary->remote_port, true);
diff --git a/src/analysis/db/analyst.c b/src/analysis/db/analyst.c
index 49585c2..ab12cc1 100644
--- a/src/analysis/db/analyst.c
+++ b/src/analysis/db/analyst.c
@@ -40,7 +40,10 @@ struct _GAnalystClient
 {
     GHubClient parent;                      /* A laisser en premier        */
 
-    char *hash;                             /* Empreinte du binaire lié    */
+    char *cnt_hash;                         /* Empreinte du binaire lié    */
+    char *cnt_class;                        /* Interprétation du contenu   */
+
+    GLoadedContent *loaded;                 /* Contenu chargé              */
     GList *collections;                     /* Collections d'un binaire    */
 
     bool can_get_updates;                   /* Réception de maj possibles ?*/
@@ -159,7 +162,10 @@ static void g_analyst_client_class_init(GAnalystClientClass *klass)
 
 static void g_analyst_client_init(GAnalystClient *client)
 {
-    client->hash = NULL;
+    client->cnt_hash = NULL;
+    client->cnt_class = NULL;
+
+    client->loaded = NULL;
     client->collections = NULL;
 
     client->can_get_updates = false;
@@ -195,6 +201,8 @@ static void g_analyst_client_dispose(GAnalystClient *client)
 
     g_mutex_clear(&client->snap_lock);
 
+    g_clear_object(&client->loaded);
+
     G_OBJECT_CLASS(g_analyst_client_parent_class)->dispose(G_OBJECT(client));
 
 }
@@ -216,8 +224,11 @@ static void g_analyst_client_finalize(GAnalystClient *client)
 {
     size_t i;                               /* Boucle de parcours          */
 
-    if (client->hash != NULL)
-        free(client->hash);
+    if (client->cnt_hash != NULL)
+        free(client->cnt_hash);
+
+    if (client->cnt_class != NULL)
+        free(client->cnt_class);
 
     if (client->snapshots != NULL)
     {
@@ -236,7 +247,9 @@ static void g_analyst_client_finalize(GAnalystClient *client)
 /******************************************************************************
 *                                                                             *
 *  Paramètres  : hash        = empreinte d'un binaire en cours d'analyse.     *
+*                class       = nature de l'interprétation de ce contenu.      *
 *                collections = ensemble de collections existantes.            *
+*                loaded      = éventuel élément local préchargé.              *
 *                                                                             *
 *  Description : Prépare un client pour une connexion à une BD.               *
 *                                                                             *
@@ -246,13 +259,18 @@ static void g_analyst_client_finalize(GAnalystClient *client)
 *                                                                             *
 ******************************************************************************/
 
-GAnalystClient *g_analyst_client_new(const char *hash, GList *collections)
+GAnalystClient *g_analyst_client_new(const char *hash, const char *class, GList *collections, GLoadedContent *loaded)
 {
     GAnalystClient *result;                     /* Adresse à retourner         */
 
     result = g_object_new(G_TYPE_ANALYST_CLIENT, NULL);
 
-    result->hash = strdup(hash);
+    result->cnt_hash = strdup(hash);
+    result->cnt_class = strdup(class);
+
+    result->loaded = loaded;
+    if (loaded != NULL) g_object_ref(G_OBJECT(loaded));
+
     result->collections = collections;
 
     return result;
@@ -278,7 +296,13 @@ static bool g_analyst_client_complete_hello(GAnalystClient *client, packed_buffe
     bool result;                            /* Bilan à retourner           */
     rle_string str;                         /* Chaîne à communiquer        */
 
-    init_static_rle_string(&str, client->hash);
+    init_static_rle_string(&str, client->cnt_hash);
+
+    result = pack_rle_string(&str, pbuf);
+
+    exit_rle_string(&str);
+
+    init_static_rle_string(&str, client->cnt_class);
 
     result = pack_rle_string(&str, pbuf);
 
@@ -414,10 +438,10 @@ static void *g_analyst_client_update(GAnalystClient *client)
                     error = tmp32;
 
                     if (error == DBE_NONE)
-                        log_variadic_message(LMT_INFO, _("Archive saved for binary '%s'"), client->hash);
+                        log_variadic_message(LMT_INFO, _("Archive saved for binary '%s'"), client->cnt_hash);
                     else
                         log_variadic_message(LMT_ERROR, _("Failed to save the archive for binary '%s'"),
-                                             client->hash);
+                                             client->cnt_hash);
 
                     break;
 
@@ -669,10 +693,10 @@ bool g_analyst_client_send_content(GAnalystClient *client, GBinContent *content)
 
     hash = g_binary_content_get_checksum(content);
 
-    if (strcmp(hash, client->hash) != 0)
+    if (strcmp(hash, client->cnt_hash) != 0)
     {
         log_variadic_message(LMT_ERROR, _("Provided ontent does not match client content (hash: '%s')"),
-                             client->hash);
+                             client->cnt_hash);
         goto exit;
     }
 
@@ -680,7 +704,7 @@ bool g_analyst_client_send_content(GAnalystClient *client, GBinContent *content)
 
     init_packed_buffer(&cnt_pbuf);
 
-    storage = g_object_storage_new(client->hash);
+    storage = g_object_storage_new(client->cnt_hash);
 
     result = g_object_storage_store_object(storage, "contents", G_SERIALIZABLE_OBJECT(content), &pos);
     if (!result) goto exit_with_failure;
diff --git a/src/analysis/db/analyst.h b/src/analysis/db/analyst.h
index 7b11f53..9f7b32b 100644
--- a/src/analysis/db/analyst.h
+++ b/src/analysis/db/analyst.h
@@ -34,6 +34,7 @@
 #include "collection.h"
 #include "misc/snapshot.h"
 #include "../content.h"
+#include "../loaded.h"
 
 
 
@@ -56,7 +57,7 @@ typedef struct _GAnalystClientClass GAnalystClientClass;
 GType g_analyst_client_get_type(void);
 
 /* Prépare un client pour une connexion à une BD. */
-GAnalystClient *g_analyst_client_new(const char *, GList *);
+GAnalystClient *g_analyst_client_new(const char *, const char *, GList *, GLoadedContent *);
 
 /* Envoie un contenu binaire pour conservation côté serveur. */
 bool g_analyst_client_send_content(GAnalystClient *, GBinContent *);
diff --git a/src/analysis/db/cdb.c b/src/analysis/db/cdb.c
index 62de6a7..08410c4 100644
--- a/src/analysis/db/cdb.c
+++ b/src/analysis/db/cdb.c
@@ -80,6 +80,7 @@ struct _GCdbArchive
     GServerBackend parent;                  /* A laisser en premier        */
 
     rle_string hash;                        /* Empreinte cryptographique   */
+    rle_string class;                       /* Nature du contenu analysé   */
 
     char *filename;                         /* Chemin d'accès à l'archive  */
     char *tmpdir;                           /* Répertoire de travail       */
@@ -235,6 +236,7 @@ static void g_cdb_archive_class_init(GCdbArchiveClass *klass)
 static void g_cdb_archive_init(GCdbArchive *archive)
 {
     setup_empty_rle_string(&archive->hash);
+    setup_empty_rle_string(&archive->class);
 
     archive->filename = NULL;
     archive->tmpdir = NULL;
@@ -315,6 +317,7 @@ static void g_cdb_archive_finalize(GCdbArchive *archive)
     if (archive->filename != NULL)
         free(archive->filename);
 
+    exit_rle_string(&archive->class);
     exit_rle_string(&archive->hash);
 
     G_OBJECT_CLASS(g_cdb_archive_parent_class)->finalize(G_OBJECT(archive));
@@ -327,6 +330,7 @@ static void g_cdb_archive_finalize(GCdbArchive *archive)
 *  Paramètres  : basedir = répertoire de stockage des enregistrements.        *
 *                tmpdir  = répertoire de travail temporaire.                  *
 *                hash    = empreinte du binaire à représenter.                *
+*                class   = nature du contenu analysé associé.                 *
 *                error   = indication éventuelle en cas d'échec. [OUT]        *
 *                                                                             *
 *  Description : Définit ou ouvre une archive d'éléments utilisateur.         *
@@ -338,7 +342,7 @@ static void g_cdb_archive_finalize(GCdbArchive *archive)
 *                                                                             *
 ******************************************************************************/
 
-GCdbArchive *g_cdb_archive_new(const char *basedir, const char *tmpdir, const rle_string *hash, DBError *error)
+GCdbArchive *g_cdb_archive_new(const char *basedir, const char *tmpdir, const rle_string *hash, const rle_string *class, DBError *error)
 {
     GCdbArchive *result;                    /* Adresse à retourner         */
     int ret;                                /* Retour d'un appel           */
@@ -347,13 +351,16 @@ GCdbArchive *g_cdb_archive_new(const char *basedir, const char *tmpdir, const rl
     result = g_object_new(G_TYPE_CDB_ARCHIVE, NULL);
 
     dup_into_rle_string(&result->hash, get_rle_string(hash));
+    dup_into_rle_string(&result->class, get_rle_string(class));
 
     *error = DBE_SYS_ERROR;
 
     /* Chemin de l'archive */
 
     result->filename = strdup(basedir);
-    result->filename = stradd(result->filename, hash->data);
+    result->filename = stradd(result->filename, get_rle_string(hash));
+    result->filename = stradd(result->filename, "-");
+    result->filename = stradd(result->filename, get_rle_string(class));
     result->filename = stradd(result->filename, ".cdb.tar.xz");
 
     if (!mkpath(result->filename))
@@ -679,18 +686,26 @@ DBError g_cdb_archive_write(const GCdbArchive *archive)
 *                                                                             *
 *  Paramètres  : archive = informations quant à l'archive à consulter.        *
 *                hash    = empreinte extérieure à comparer.                   *
+*                class   = nature du contenu analysé.                         *
 *                                                                             *
-*  Description : Détermine si une empreinte correspond à celle d'une archive. *
+*  Description : Détermine l'archive correspond à une cible recherchée.       *
 *                                                                             *
-*  Retour      : Résultat de la comparaison : -1, 0 ou 1.                     *
+*  Retour      : Bilan de l'opération.                                        *
 *                                                                             *
 *  Remarques   : -                                                            *
 *                                                                             *
 ******************************************************************************/
 
-int g_cdb_archive_compare_hash(const GCdbArchive *archive, const rle_string *hash)
+bool g_cdb_archive_compare_is_suitable_for(const GCdbArchive *archive, const rle_string *hash, const rle_string *class)
 {
-    return cmp_rle_string(&archive->hash, hash);
+    bool result;                            /* Bilan à retourner           */
+
+    result = (cmp_rle_string(&archive->hash, hash) == 0);
+
+    if (result)
+        result = (cmp_rle_string(&archive->class, class) == 0);
+
+    return result;
 
 }
 
diff --git a/src/analysis/db/cdb.h b/src/analysis/db/cdb.h
index b2c3fc3..7a557f2 100644
--- a/src/analysis/db/cdb.h
+++ b/src/analysis/db/cdb.h
@@ -54,7 +54,7 @@ typedef struct _GCdbArchiveClass GCdbArchiveClass;
 GType g_cdb_archive_get_type(void);
 
 /* Prépare un client pour une connexion à une BD. */
-GCdbArchive *g_cdb_archive_new(const char *, const char *, const rle_string *, DBError *);
+GCdbArchive *g_cdb_archive_new(const char *, const char *, const rle_string *, const rle_string *, DBError *);
 
 /* Construit un chemin pour un fichier propre à l'archive. */
 char *g_cdb_archive_get_tmp_filename(const GCdbArchive *, const char *);
@@ -62,8 +62,8 @@ char *g_cdb_archive_get_tmp_filename(const GCdbArchive *, const char *);
 /* Enregistre une archive avec tous les éléments à conserver. */
 DBError g_cdb_archive_write(const GCdbArchive *);
 
-/* Détermine si une empreinte correspond à celle d'une archive. */
-int g_cdb_archive_compare_hash(const GCdbArchive *, const rle_string *);
+/* Détermine l'archive correspond à une cible recherchée. */
+bool g_cdb_archive_compare_is_suitable_for(const GCdbArchive *, const rle_string *, const rle_string *);
 
 
 
diff --git a/src/analysis/db/server.c b/src/analysis/db/server.c
index b08962c..5c6fd18 100644
--- a/src/analysis/db/server.c
+++ b/src/analysis/db/server.c
@@ -1028,7 +1028,7 @@ static GServerBackend *g_hub_server_handle_admin(GHubServer *server, packed_buff
 
     if (has_more_data_in_packed_buffer(in_pbuf))
     {
-        log_variadic_message(LMT_ERROR, _("The client from '%s' provided to much data!"), peer_name);
+        log_variadic_message(LMT_ERROR, _("The client from '%s' provided too much data!"), peer_name);
 
         result = NULL;
 
@@ -1074,6 +1074,7 @@ static GServerBackend *g_hub_server_handle_analyst(GHubServer *server, packed_bu
 {
     GCdbArchive *result;                    /* Support de suivi à retourner*/
     rle_string hash;                        /* Empreinte du binaire visé   */
+    rle_string class;                       /* Nature du contenu visé      */
     bool status;                            /* Bilan d'une opération       */
     GList *iter;                            /* Boucle de parcours          */
     GCdbArchive *archive;                   /* Destinataire final du client*/
@@ -1097,13 +1098,26 @@ static GServerBackend *g_hub_server_handle_analyst(GHubServer *server, packed_bu
     if (is_rle_string_empty(&hash))
     {
         log_variadic_message(LMT_ERROR, _("The submitted binary hash from '%s' is empty!"), peer_name);
-        goto wrong_receiving;
+        goto wrong_receiving_0;
+    }
+
+    status = unpack_rle_string(&class, in_pbuf);
+    if (!status)
+    {
+        log_variadic_message(LMT_ERROR, _("Error while getting the content class from '%s'..."), peer_name);
+        goto wrong_receiving_0;
+    }
+
+    if (is_rle_string_empty(&class))
+    {
+        log_variadic_message(LMT_ERROR, _("The submitted content class from '%s' is empty!"), peer_name);
+        goto wrong_receiving_1;
     }
 
     if (has_more_data_in_packed_buffer(in_pbuf))
     {
-        log_variadic_message(LMT_ERROR, _("The client from '%s' provided to much data!"), peer_name);
-        goto wrong_receiving;
+        log_variadic_message(LMT_ERROR, _("The client from '%s' provided too much data!"), peer_name);
+        goto wrong_receiving_1;
     }
 
     /* Recherche d'un support existant adapté */
@@ -1114,7 +1128,7 @@ static GServerBackend *g_hub_server_handle_analyst(GHubServer *server, packed_bu
     {
         archive = G_CDB_ARCHIVE(iter->data);
 
-        if (g_cdb_archive_compare_hash(archive, &hash) == 0)
+        if (g_cdb_archive_compare_is_suitable_for(archive, &hash, &class))
             break;
 
     }
@@ -1137,7 +1151,7 @@ static GServerBackend *g_hub_server_handle_analyst(GHubServer *server, packed_bu
         tmpdir = strdup(server->working);
         tmpdir = stradd(tmpdir, "tmp" G_DIR_SEPARATOR_S);
 
-        result = g_cdb_archive_new(basedir, tmpdir, &hash, error);
+        result = g_cdb_archive_new(basedir, tmpdir, &hash, &class, error);
 
         free(tmpdir);
         free(basedir);
@@ -1146,7 +1160,11 @@ static GServerBackend *g_hub_server_handle_analyst(GHubServer *server, packed_bu
 
     }
 
- wrong_receiving:
+ wrong_receiving_1:
+
+    exit_rle_string(&class);
+
+ wrong_receiving_0:
 
     exit_rle_string(&hash);
 
-- 
cgit v0.11.2-87-g4458