From d122453f3ce11f45a63fb870e889f4ce91b34527 Mon Sep 17 00:00:00 2001
From: Cyrille Bagard <nocbos@gmail.com>
Date: Tue, 10 Oct 2023 08:11:17 +0200
Subject: Introduce a "wide" function in order to deal with UTF-16 in match
 conditions.

---
 src/analysis/scan/core.c                   |   2 +
 src/analysis/scan/items/string/Makefile.am |   3 +-
 src/analysis/scan/items/string/wide.c      | 270 +++++++++++++++++++++++++++++
 src/analysis/scan/items/string/wide.h      |  58 +++++++
 tests/analysis/scan/functions.py           |  16 ++
 5 files changed, 348 insertions(+), 1 deletion(-)
 create mode 100644 src/analysis/scan/items/string/wide.c
 create mode 100644 src/analysis/scan/items/string/wide.h

diff --git a/src/analysis/scan/core.c b/src/analysis/scan/core.c
index 461bfdc..fdee1c7 100644
--- a/src/analysis/scan/core.c
+++ b/src/analysis/scan/core.c
@@ -41,6 +41,7 @@
 #include "items/string/lower.h"
 #include "items/string/to_int.h"
 #include "items/string/upper.h"
+#include "items/string/wide.h"
 #include "items/time/make.h"
 #include "items/time/now.h"
 #include "patterns/modifiers/hex.h"
@@ -320,6 +321,7 @@ bool populate_main_scan_namespace(GScanNamespace *space)
         if (result) result = REGISTER_FUNC(ns, g_scan_string_lower_function_new());
         if (result) result = REGISTER_FUNC(ns, g_scan_string_to_int_function_new());
         if (result) result = REGISTER_FUNC(ns, g_scan_string_upper_function_new());
+        if (result) result = REGISTER_FUNC(ns, g_scan_string_wide_function_new());
 
         g_object_unref(G_OBJECT(ns));
 
diff --git a/src/analysis/scan/items/string/Makefile.am b/src/analysis/scan/items/string/Makefile.am
index c9ce6a3..6f8d6c5 100644
--- a/src/analysis/scan/items/string/Makefile.am
+++ b/src/analysis/scan/items/string/Makefile.am
@@ -5,7 +5,8 @@ noinst_LTLIBRARIES  = libanalysisscanitemsstring.la
 libanalysisscanitemsstring_la_SOURCES =		\
 	lower.h lower.c							\
 	to_int.h to_int.c						\
-	upper.h upper.c
+	upper.h upper.c							\
+	wide.h wide.c
 
 libanalysisscanitemsstring_la_CFLAGS = $(LIBGOBJ_CFLAGS)
 
diff --git a/src/analysis/scan/items/string/wide.c b/src/analysis/scan/items/string/wide.c
new file mode 100644
index 0000000..378f21c
--- /dev/null
+++ b/src/analysis/scan/items/string/wide.c
@@ -0,0 +1,270 @@
+
+/* Chrysalide - Outil d'analyse de fichiers binaires
+ * wide.c - bascule de texte ASCII en UTF-16
+ *
+ * Copyright (C) 2023 Cyrille Bagard
+ *
+ *  This file is part of Chrysalide.
+ *
+ *  Chrysalide is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 3 of the License, or
+ *  (at your option) any later version.
+ *
+ *  Chrysalide is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with Foobar.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+
+#include "wide.h"
+
+
+#include <ctype.h>
+
+
+#include "../../item-int.h"
+#include "../../exprs/literal.h"
+
+
+
+/* ---------------------- INTRODUCTION D'UNE NOUVELLE FONCTION ---------------------- */
+
+
+/* Initialise la classe des bascules de texte ASCII en UTF-16. */
+static void g_scan_string_wide_function_class_init(GScanStringWideFunctionClass *);
+
+/* Initialise une instance de bascule de texte ASCII en UTF-16. */
+static void g_scan_string_wide_function_init(GScanStringWideFunction *);
+
+/* Supprime toutes les références externes. */
+static void g_scan_string_wide_function_dispose(GScanStringWideFunction *);
+
+/* Procède à la libération totale de la mémoire. */
+static void g_scan_string_wide_function_finalize(GScanStringWideFunction *);
+
+
+
+/* --------------------- IMPLEMENTATION DES FONCTIONS DE CLASSE --------------------- */
+
+
+/* Indique le nom associé à une expression d'évaluation. */
+static char *g_scan_string_wide_function_get_name(const GScanStringWideFunction *);
+
+/* Réduit une expression à une forme plus simple. */
+static bool g_scan_string_wide_function_run_call(GScanStringWideFunction *, GScanExpression **, size_t, GScanContext *, GScanScope *, GObject **);
+
+
+
+/* ---------------------------------------------------------------------------------- */
+/*                        INTRODUCTION D'UNE NOUVELLE FONCTION                        */
+/* ---------------------------------------------------------------------------------- */
+
+
+/* Indique le type défini pour une bascule de texte ASCII en UTF-16. */
+G_DEFINE_TYPE(GScanStringWideFunction, g_scan_string_wide_function, G_TYPE_SCAN_REGISTERED_ITEM);
+
+
+/******************************************************************************
+*                                                                             *
+*  Paramètres  : klass = classe à initialiser.                                *
+*                                                                             *
+*  Description : Initialise la classe des bascules de texte ASCII en UTF-16.  *
+*                                                                             *
+*  Retour      : -                                                            *
+*                                                                             *
+*  Remarques   : -                                                            *
+*                                                                             *
+******************************************************************************/
+
+static void g_scan_string_wide_function_class_init(GScanStringWideFunctionClass *klass)
+{
+    GObjectClass *object;                   /* Autre version de la classe  */
+    GScanRegisteredItemClass *registered;   /* Version de classe parente   */
+
+    object = G_OBJECT_CLASS(klass);
+
+    object->dispose = (GObjectFinalizeFunc/* ! */)g_scan_string_wide_function_dispose;
+    object->finalize = (GObjectFinalizeFunc)g_scan_string_wide_function_finalize;
+
+    registered = G_SCAN_REGISTERED_ITEM_CLASS(klass);
+
+    registered->get_name = (get_registered_item_name_fc)g_scan_string_wide_function_get_name;
+    registered->run_call = (run_registered_item_call_fc)g_scan_string_wide_function_run_call;
+
+}
+
+
+/******************************************************************************
+*                                                                             *
+*  Paramètres  : func = instance à initialiser.                               *
+*                                                                             *
+*  Description : Initialise une instance de bascule de texte ASCII en UTF-16. *
+*                                                                             *
+*  Retour      : -                                                            *
+*                                                                             *
+*  Remarques   : -                                                            *
+*                                                                             *
+******************************************************************************/
+
+static void g_scan_string_wide_function_init(GScanStringWideFunction *func)
+{
+
+}
+
+
+/******************************************************************************
+*                                                                             *
+*  Paramètres  : func = instance d'objet GLib à traiter.                      *
+*                                                                             *
+*  Description : Supprime toutes les références externes.                     *
+*                                                                             *
+*  Retour      : -                                                            *
+*                                                                             *
+*  Remarques   : -                                                            *
+*                                                                             *
+******************************************************************************/
+
+static void g_scan_string_wide_function_dispose(GScanStringWideFunction *func)
+{
+    G_OBJECT_CLASS(g_scan_string_wide_function_parent_class)->dispose(G_OBJECT(func));
+
+}
+
+
+/******************************************************************************
+*                                                                             *
+*  Paramètres  : func = instance d'objet GLib à traiter.                      *
+*                                                                             *
+*  Description : Procède à la libération totale de la mémoire.                *
+*                                                                             *
+*  Retour      : -                                                            *
+*                                                                             *
+*  Remarques   : -                                                            *
+*                                                                             *
+******************************************************************************/
+
+static void g_scan_string_wide_function_finalize(GScanStringWideFunction *func)
+{
+    G_OBJECT_CLASS(g_scan_string_wide_function_parent_class)->finalize(G_OBJECT(func));
+
+}
+
+
+/******************************************************************************
+*                                                                             *
+*  Paramètres  : -                                                            *
+*                                                                             *
+*  Description : Constitue une fonction de bascule de texte ASCII en UTF-16.  *
+*                                                                             *
+*  Retour      : Fonction mise en place.                                      *
+*                                                                             *
+*  Remarques   : -                                                            *
+*                                                                             *
+******************************************************************************/
+
+GScanRegisteredItem *g_scan_string_wide_function_new(void)
+{
+    GScanRegisteredItem *result;            /* Structure à retourner       */
+
+    result = g_object_new(G_TYPE_SCAN_STRING_WIDE_FUNCTION, NULL);
+
+    return result;
+
+}
+
+
+
+/* ---------------------------------------------------------------------------------- */
+/*                       IMPLEMENTATION DES FONCTIONS DE CLASSE                       */
+/* ---------------------------------------------------------------------------------- */
+
+
+/******************************************************************************
+*                                                                             *
+*  Paramètres  : item = élément d'appel à consulter.                          *
+*                                                                             *
+*  Description : Indique le nom associé à une expression d'évaluation.        *
+*                                                                             *
+*  Retour      : Désignation humaine de l'expression d'évaluation.            *
+*                                                                             *
+*  Remarques   : -                                                            *
+*                                                                             *
+******************************************************************************/
+
+static char *g_scan_string_wide_function_get_name(const GScanStringWideFunction *item)
+{
+    char *result;                           /* Désignation à retourner     */
+
+    result = strdup("wide");
+
+    return result;
+
+}
+
+
+/******************************************************************************
+*                                                                             *
+*  Paramètres  : item  = élément d'appel à consulter.                         *
+*                args  = liste d'éventuels arguments fournis.                 *
+*                count = taille de cette liste.                               *
+*                ctx   = contexte de suivi de l'analyse courante.             *
+*                scope = portée courante des variables locales.               *
+*                out   = zone d'enregistrement de la résolution opérée. [OUT] *
+*                                                                             *
+*  Description : Réduit une expression à une forme plus simple.               *
+*                                                                             *
+*  Retour      : Réduction correspondante, expression déjà réduite, ou NULL.  *
+*                                                                             *
+*  Remarques   : -                                                            *
+*                                                                             *
+******************************************************************************/
+
+static bool g_scan_string_wide_function_run_call(GScanStringWideFunction *item, GScanExpression **args, size_t count, GScanContext *ctx, GScanScope *scope, GObject **out)
+{
+    bool result;                            /* Bilan à retourner           */
+    GScanLiteralExpression *literal;        /* Version plus accessible     */
+    LiteralValueType vtype;                 /* Type de valeur portée       */
+    const sized_string_t *string;           /* Description du chaîne       */
+    sized_string_t new;                     /* Description transformée     */
+    size_t i;                               /* Boucle de parcours          */
+
+    /* Validation des arguments */
+
+    result = (count == 1);
+    if (!result) goto exit;
+
+    result = G_IS_SCAN_LITERAL_EXPRESSION(args[0]);
+    if (!result) goto exit;
+
+    literal = G_SCAN_LITERAL_EXPRESSION(args[0]);
+
+    vtype = g_scan_literal_expression_get_value_type(literal);
+
+    result = (vtype == LVT_STRING);
+    if (!result) goto exit;
+
+    result = g_scan_literal_expression_get_string_value(literal, &string);
+    if (!result) goto exit;
+
+    /* Réalisation de l'opération attendue */
+
+    new.len = string->len * 2;
+    new.data = calloc(new.len, sizeof(bin_t));
+
+    for (i = 0; i < string->len; i++)
+        new.data[i * 2] = string->data[i];
+
+    *out = G_OBJECT(g_scan_literal_expression_new(LVT_STRING, &new));
+
+    exit_szstr(&new);
+
+ exit:
+
+    return result;
+
+}
diff --git a/src/analysis/scan/items/string/wide.h b/src/analysis/scan/items/string/wide.h
new file mode 100644
index 0000000..65195bd
--- /dev/null
+++ b/src/analysis/scan/items/string/wide.h
@@ -0,0 +1,58 @@
+
+/* Chrysalide - Outil d'analyse de fichiers binaires
+ * wide.h - prototypes pour la bascule de texte ASCII en UTF-16
+ *
+ * Copyright (C) 2023 Cyrille Bagard
+ *
+ *  This file is part of Chrysalide.
+ *
+ *  Chrysalide is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 3 of the License, or
+ *  (at your option) any later version.
+ *
+ *  Chrysalide is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with Foobar.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+
+#ifndef _ANALYSIS_SCAN_ITEMS_STRING_WIDE_H
+#define _ANALYSIS_SCAN_ITEMS_STRING_WIDE_H
+
+
+#include <glib-object.h>
+
+
+#include "../../item.h"
+
+
+
+#define G_TYPE_SCAN_STRING_WIDE_FUNCTION            g_scan_string_wide_function_get_type()
+#define G_SCAN_STRING_WIDE_FUNCTION(obj)            (G_TYPE_CHECK_INSTANCE_CAST((obj), G_TYPE_SCAN_STRING_WIDE_FUNCTION, GScanStringWideFunction))
+#define G_IS_SCAN_STRING_WIDE_FUNCTION(obj)         (G_TYPE_CHECK_INSTANCE_TYPE((obj), G_TYPE_SCAN_STRING_WIDE_FUNCTION))
+#define G_SCAN_STRING_WIDE_FUNCTION_CLASS(klass)    (G_TYPE_CHECK_CLASS_CAST((klass), G_TYPE_SCAN_STRING_WIDE_FUNCTION, GScanStringWideFunctionClass))
+#define G_IS_SCAN_STRING_WIDE_FUNCTION_CLASS(klass) (G_TYPE_CHECK_CLASS_TYPE((klass), G_TYPE_SCAN_STRING_WIDE_FUNCTION))
+#define G_SCAN_STRING_WIDE_FUNCTION_GET_CLASS(obj)  (G_TYPE_INSTANCE_GET_CLASS((obj), G_TYPE_SCAN_STRING_WIDE_FUNCTION, GScanStringWideFunctionClass))
+
+
+/* Bascule d'une suite de texte ASCII en UTF-16 (instance) */
+typedef GScanRegisteredItem GScanStringWideFunction;
+
+/* Bascule d'une suite de texte ASCII en UTF-16 (classe) */
+typedef GScanRegisteredItemClass GScanStringWideFunctionClass;
+
+
+/* Indique le type défini pour une bascule de texte ASCII en UTF-16. */
+GType g_scan_string_wide_function_get_type(void);
+
+/* Constitue une fonction de bascule de texte ASCII en UTF-16. */
+GScanRegisteredItem *g_scan_string_wide_function_new(void);
+
+
+
+#endif  /* _ANALYSIS_SCAN_ITEMS_STRING_WIDE_H */
diff --git a/tests/analysis/scan/functions.py b/tests/analysis/scan/functions.py
index 96f029f..e936263 100644
--- a/tests/analysis/scan/functions.py
+++ b/tests/analysis/scan/functions.py
@@ -108,6 +108,7 @@ rule test {
 
         self.check_rule_success(rule)
 
+
         rule = '''
 rule test {
 
@@ -119,6 +120,7 @@ rule test {
 
         self.check_rule_success(rule)
 
+
         rule = '''
 rule test {
 
@@ -134,6 +136,19 @@ rule test {
         self.check_rule_success(rule)
 
 
+        rule = r'''
+rule test {
+
+   condition:
+      "A\x00B\x00C\x00D\x00" endswith string.wide("CD")
+          and "A\x00B\x00C\x00D\x00" contains string.wide("BC")
+
+}
+'''
+
+        self.check_rule_success(rule)
+
+
     def testTime(self):
         """Check current time."""
 
@@ -150,6 +165,7 @@ rule test {
 
         self.check_rule_success(rule)
 
+
         rule = '''
 rule test {
 
-- 
cgit v0.11.2-87-g4458