From eb470f2e5e790ba107171a3ae8c5ed27a72ed8f8 Mon Sep 17 00:00:00 2001
From: Cyrille Bagard <nocbos@gmail.com>
Date: Tue, 9 Mar 2021 22:54:43 +0100
Subject: Implement the Pearson hash method.

---
 plugins/pychrysalide/common/Makefile.am |   3 +-
 plugins/pychrysalide/common/module.c    |   2 +
 plugins/pychrysalide/common/pearson.c   | 179 ++++++++++++++++++++++++++++++++
 plugins/pychrysalide/common/pearson.h   |  39 +++++++
 src/common/Makefile.am                  |   1 +
 src/common/pearson.c                    | 106 +++++++++++++++++++
 src/common/pearson.h                    |  46 ++++++++
 tests/common/pearson.py                 |  50 +++++++++
 8 files changed, 425 insertions(+), 1 deletion(-)
 create mode 100644 plugins/pychrysalide/common/pearson.c
 create mode 100644 plugins/pychrysalide/common/pearson.h
 create mode 100644 src/common/pearson.c
 create mode 100644 src/common/pearson.h
 create mode 100644 tests/common/pearson.py

diff --git a/plugins/pychrysalide/common/Makefile.am b/plugins/pychrysalide/common/Makefile.am
index 66e7622..5f54fe8 100644
--- a/plugins/pychrysalide/common/Makefile.am
+++ b/plugins/pychrysalide/common/Makefile.am
@@ -7,7 +7,8 @@ libpychrysacommon_la_SOURCES =			\
 	leb128.h leb128.c					\
 	module.h module.c					\
 	packed.h packed.c					\
-	pathname.h pathname.c
+	pathname.h pathname.c				\
+	pearson.h pearson.c
 
 libpychrysacommon_la_LDFLAGS = 
 
diff --git a/plugins/pychrysalide/common/module.c b/plugins/pychrysalide/common/module.c
index 865b7e2..cc50c43 100644
--- a/plugins/pychrysalide/common/module.c
+++ b/plugins/pychrysalide/common/module.c
@@ -30,6 +30,7 @@
 #include "leb128.h"
 #include "packed.h"
 #include "pathname.h"
+#include "pearson.h"
 #include "../helpers.h"
 
 
@@ -98,6 +99,7 @@ bool populate_common_module(void)
     if (result) result = populate_common_module_with_fnv1a();
     if (result) result = populate_common_module_with_leb128();
     if (result) result = populate_common_module_with_pathname();
+    if (result) result = populate_common_module_with_pearson();
 
     if (result) result = ensure_python_bitfield_is_registered();
     if (result) result = ensure_python_packed_buffer_is_registered();
diff --git a/plugins/pychrysalide/common/pearson.c b/plugins/pychrysalide/common/pearson.c
new file mode 100644
index 0000000..99c2d27
--- /dev/null
+++ b/plugins/pychrysalide/common/pearson.c
@@ -0,0 +1,179 @@
+
+/* Chrysalide - Outil d'analyse de fichiers binaires
+ * pearson.c - équivalent Python du fichier "common/pearson.c"
+ *
+ * Copyright (C) 2018-2019 Cyrille Bagard
+ *
+ *  This file is part of Chrysalide.
+ *
+ *  Chrysalide is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 3 of the License, or
+ *  (at your option) any later version.
+ *
+ *  Chrysalide is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+
+#include "pearson.h"
+
+
+#include <pygobject.h>
+
+
+#include <i18n.h>
+#include <common/pearson.h>
+
+
+#include "../access.h"
+#include "../helpers.h"
+
+
+
+/* Fournit les permutations par défaut par Pearson. */
+static PyObject *py_pearson_permutations(PyObject *, PyObject *);
+
+/* Détermine l'empreinte Pearson d'une chaîne de caractères. */
+static PyObject *py_pearson(PyObject *, PyObject *);
+
+
+
+/******************************************************************************
+*                                                                             *
+*  Paramètres  : self = NULL car méthode statique.                            *
+*                args = adresse non utilisée ici, en l'absence d'argument.    *
+*                                                                             *
+*  Description : Fournit les permutations par défaut par Pearson.             *
+*                                                                             *
+*  Retour      : Table de valeurs utilisées par défaut.                       *
+*                                                                             *
+*  Remarques   : -                                                            *
+*                                                                             *
+******************************************************************************/
+
+static PyObject *py_pearson_permutations(PyObject *self, PyObject *args)
+{
+    PyObject *result;                       /* Instance à retourner        */
+    const char *table;                      /* Eventuelle table à utiliser */
+
+#define PEARSON_PERMUTATIONS_METHOD PYTHON_METHOD_DEF   \
+(                                                       \
+    pearson_permutations, "",                           \
+    METH_NOARGS, py,                                    \
+    "Provide the default pseudorandom permutations"     \
+    " for the Pearson hash computation.\n"              \
+    "\n"                                                \
+    "The result is 256-byte value."                     \
+)
+
+    table = get_pearson_permutations();
+
+    result = PyBytes_FromStringAndSize(table, 256);
+
+    return result;
+
+}
+
+
+/******************************************************************************
+*                                                                             *
+*  Paramètres  : self = NULL car méthode statique.                            *
+*                args = arguments fournis lors de l'appel à la fonction.      *
+*                                                                             *
+*  Description : Détermine l'empreinte Pearson d'une chaîne de caractères.    *
+*                                                                             *
+*  Retour      : Numéro de révision.                                          *
+*                                                                             *
+*  Remarques   : -                                                            *
+*                                                                             *
+******************************************************************************/
+
+static PyObject *py_pearson(PyObject *self, PyObject *args)
+{
+    PyObject *result;                       /* Instance à retourner        */
+    const char *str;                        /* Chaîne à traiter.           */
+    PyObject *bytes;                        /* Tableau d'octets            */
+    int ret;                                /* Bilan de lecture des args.  */
+    const char *table;                      /* Eventuelle table à utiliser */
+    uint8_t value;                          /* Empreinte calculée          */
+
+#define PEARSON_METHOD PYTHON_METHOD_DEF                        \
+(                                                               \
+    pearson, "str, /, table",                                   \
+    METH_VARARGS, py,                                           \
+    "Compute the Pearson hash of a given string.\n"             \
+    "\n"                                                        \
+    "The default pseudorandom permutations are used if"         \
+    " no *table* of 256 bytes is provided.\n"                   \
+    "\n"                                                        \
+    "A table of permutations can be created with this call:\n"  \
+    "  bytes(sample(list(range(0, 256)), k=256))\n"             \
+    "\n"                                                        \
+    "The result is 8-bit integer value."                        \
+)
+
+    bytes = NULL;
+
+    ret = PyArg_ParseTuple(args, "s|O!", &str, &PyBytes_Type, &bytes);
+    if (!ret) return NULL;
+
+    if (bytes != NULL)
+    {
+        if (PyBytes_Size(bytes) != 256)
+        {
+            PyErr_SetString(PyExc_ValueError, _("256 bytes are required for the custom table."));
+            return NULL;
+        }
+
+        table = PyBytes_AsString(bytes);
+
+        value = pearson_hash(str, table);
+
+    }
+    else
+        value = pearson_hash(str, NULL);
+
+    result = Py_BuildValue("B", (unsigned char)value);
+
+    return result;
+
+}
+
+
+/******************************************************************************
+*                                                                             *
+*  Paramètres  : -                                                            *
+*                                                                             *
+*  Description : Définit une extension du module 'common' à compléter.        *
+*                                                                             *
+*  Retour      : Bilan de l'opération.                                        *
+*                                                                             *
+*  Remarques   : -                                                            *
+*                                                                             *
+******************************************************************************/
+
+bool populate_common_module_with_pearson(void)
+{
+    bool result;                            /* Bilan à retourner           */
+    PyObject *module;                       /* Module à recompléter        */
+
+    static PyMethodDef py_pearson_methods[] = {
+        PEARSON_PERMUTATIONS_METHOD,
+        PEARSON_METHOD,
+        { NULL }
+    };
+
+    module = get_access_to_python_module("pychrysalide.common");
+
+    result = register_python_module_methods(module, py_pearson_methods);
+
+    return result;
+
+}
diff --git a/plugins/pychrysalide/common/pearson.h b/plugins/pychrysalide/common/pearson.h
new file mode 100644
index 0000000..caff72a
--- /dev/null
+++ b/plugins/pychrysalide/common/pearson.h
@@ -0,0 +1,39 @@
+
+/* Chrysalide - Outil d'analyse de fichiers binaires
+ * pearson.h - prototypes pour l'équivalent Python du fichier "common/pearson.c"
+ *
+ * Copyright (C) 2021 Cyrille Bagard
+ *
+ *  This file is part of Chrysalide.
+ *
+ *  Chrysalide is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 3 of the License, or
+ *  (at your option) any later version.
+ *
+ *  Chrysalide is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+
+#ifndef _PLUGINS_PYCHRYSALIDE_COMMON_PEARSON_H
+#define _PLUGINS_PYCHRYSALIDE_COMMON_PEARSON_H
+
+
+#include <Python.h>
+#include <stdbool.h>
+
+
+
+/* Définit une extension du module 'common' à compléter. */
+bool populate_common_module_with_pearson(void);
+
+
+
+#endif  /* _PLUGINS_PYCHRYSALIDE_COMMON_PEARSON_H */
diff --git a/src/common/Makefile.am b/src/common/Makefile.am
index fc313fb..2162d8c 100644
--- a/src/common/Makefile.am
+++ b/src/common/Makefile.am
@@ -21,6 +21,7 @@ libcommon_la_SOURCES =					\
 	net.h net.c							\
 	packed.h packed.c					\
 	pathname.h pathname.c				\
+	pearson.h pearson.c					\
 	shuffle.h shuffle.c					\
 	sort.h sort.c						\
 	sqlite.h sqlite.c					\
diff --git a/src/common/pearson.c b/src/common/pearson.c
new file mode 100644
index 0000000..5607a0c
--- /dev/null
+++ b/src/common/pearson.c
@@ -0,0 +1,106 @@
+
+/* Chrysalide - Outil d'analyse de fichiers binaires
+ * pearson.c - implémentaton du calcul rapide d'empreintes de chaînes
+ *
+ * Copyright (C) 2021 Cyrille Bagard
+ *
+ *  This file is part of Chrysalide.
+ *
+ *  Chrysalide is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 3 of the License, or
+ *  (at your option) any later version.
+ *
+ *  Chrysalide is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with Chrysalide.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+
+#include "pearson.h"
+
+
+#include <stddef.h>
+
+
+
+/* Permutations proposées */
+static const char _pearson_permutations[256] = {
+
+      1,  87,  49,  12, 176, 178, 102, 166, 121, 193,   6,  84, 249, 230,  44, 163,
+     14, 197, 213, 181, 161,  85, 218,  80,  64, 239,  24, 226, 236, 142,  38, 200,
+    110, 177, 104, 103, 141, 253, 255,  50,  77, 101,  81,  18,  45,  96,  31, 222,
+     25, 107, 190,  70,  86, 237, 240,  34,  72, 242,  20, 214, 244, 227, 149, 235,
+     97, 234,  57,  22,  60, 250,  82, 175, 208,   5, 127, 199, 111,  62, 135, 248,
+    174, 169, 211,  58,  66, 154, 106, 195, 245, 171,  17, 187, 182, 179,   0, 243,
+    132,  56, 148,  75, 128, 133, 158, 100, 130, 126,  91,  13, 153, 246, 216, 219,
+    119,  68, 223,  78,  83,  88, 201,  99, 122,  11,  92,  32, 136, 114,  52,  10,
+    138,  30,  48, 183, 156,  35,  61,  26, 143,  74, 251,  94, 129, 162,  63, 152,
+    170,   7, 115, 167, 241, 206,   3, 150,  55,  59, 151, 220,  90,  53,  23, 131,
+    125, 173,  15, 238,  79,  95,  89,  16, 105, 137, 225, 224, 217, 160,  37, 123,
+    118,  73,   2, 157,  46, 116,   9, 145, 134, 228, 207, 212, 202, 215,  69, 229,
+     27, 188,  67, 124, 168, 252,  42,   4,  29, 108,  21, 247,  19, 205,  39, 203,
+    233,  40, 186, 147, 198, 192, 155,  33, 164, 191,  98, 204, 165, 180, 117,  76,
+    140,  36, 210, 172,  41,  54, 159,   8, 185, 232, 113, 196, 231,  47, 146, 120,
+     51,  65,  28, 144, 254, 221,  93, 189, 194, 139, 112,  43,  71, 109, 184, 209
+
+};
+
+
+
+/******************************************************************************
+*                                                                             *
+*  Paramètres  : -                                                            *
+*                                                                             *
+*  Description : Fournit les permutations par défaut par Pearson.             *
+*                                                                             *
+*  Retour      : Table de valeurs utilisées par défaut.                       *
+*                                                                             *
+*  Remarques   : -                                                            *
+*                                                                             *
+******************************************************************************/
+
+const char *get_pearson_permutations(void)
+{
+    const char *result;                     /* Table à renvoyer            */
+
+    result = _pearson_permutations;
+
+    return result;
+
+}
+
+
+/******************************************************************************
+*                                                                             *
+*  Paramètres  : str = chaîne de caractères à traiter.                        *
+*                table = permutations à employer, NULL pour celles par défaut.*
+*                                                                             *
+*  Description : Détermine l'empreinte Pearson d'une chaîne de caractères.    *
+*                                                                             *
+*  Retour      : Valeur calculée.                                             *
+*                                                                             *
+*  Remarques   : -                                                            *
+*                                                                             *
+******************************************************************************/
+
+uint8_t pearson_hash(const char *str, const char *table)
+{
+    uint8_t result;                         /* Valeur à retourner          */
+    unsigned char *iter;                    /* Boucle de parcours          */
+
+    if (table == NULL)
+        table = _pearson_permutations;
+
+    result = 0;
+
+    for (iter = (unsigned char *)str; *iter; iter++)
+        result = table[result ^ (*iter)];
+
+    return result;
+
+}
diff --git a/src/common/pearson.h b/src/common/pearson.h
new file mode 100644
index 0000000..09f45cf
--- /dev/null
+++ b/src/common/pearson.h
@@ -0,0 +1,46 @@
+
+/* Chrysalide - Outil d'analyse de fichiers binaires
+ * pearson.h - prototypes pour l'implémentaton du calcul rapide d'empreintes de chaînes
+ *
+ * Copyright (C) 2021 Cyrille Bagard
+ *
+ *  This file is part of Chrysalide.
+ *
+ *  Chrysalide is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 3 of the License, or
+ *  (at your option) any later version.
+ *
+ *  Chrysalide is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with Chrysalide.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+
+#ifndef _COMMON_PEARSON_H
+#define _COMMON_PEARSON_H
+
+
+#include <stdint.h>
+
+
+/**
+ * Plus d'informations avec les liens suivants :
+ *  - https://en.wikipedia.org/wiki/Pearson_hashing
+ *  - https://web.archive.org/web/20120704025921/http://cs.mwsu.edu/~griffin/courses/2133/downloads/Spring11/p677-pearson.pdf
+ */
+
+
+/* Fournit les permutations par défaut par Pearson. */
+const char *get_pearson_permutations(void);
+
+/* Détermine l'empreinte Pearson d'une chaîne de caractères. */
+uint8_t pearson_hash(const char *, const char *);
+
+
+
+#endif  /* _COMMON_PEARSON_H */
diff --git a/tests/common/pearson.py b/tests/common/pearson.py
new file mode 100644
index 0000000..bb4cf42
--- /dev/null
+++ b/tests/common/pearson.py
@@ -0,0 +1,50 @@
+
+from chrysacase import ChrysalideTestCase
+from pychrysalide.common import pearson_permutations, pearson
+
+
+class TestPearsonHash(ChrysalideTestCase):
+    """TestCase for common Pearson hash.*"""
+
+    def testPearsonDefaultPermutations(self):
+        """Check the default permutations for Pearson hashing."""
+
+        table = pearson_permutations()
+
+        self.assertEqual(256, len(table))
+
+        for i in range(256):
+            self.assertTrue(i in table)
+
+
+    def testPearsonPefectHashing(self):
+        """Compute Pearson pefect hashing."""
+
+        perfect_table = bytes([
+             39, 159, 180, 252,  71,   6,  13, 164, 232,  35, 226, 155,  98, 120, 154,  69,
+            157,  24, 137,  29, 147,  78, 121,  85, 112,   8, 248, 130,  55, 117, 190, 160,
+            176, 131, 228,  64, 211, 106,  38,  27, 140,  30,  88, 210, 227, 104,  84,  77,
+             75, 107, 169, 138, 195, 184,  70,  90,  61, 166,   7, 244, 165, 108, 219,  51,
+              9, 139, 209,  40,  31, 202,  58, 179, 116,  33, 207, 146,  76,  60, 242, 124,
+            254, 197,  80, 167, 153, 145, 129, 233, 132,  48, 246,  86, 156, 177,  36, 187,
+             45,   1,  96,  18,  19,  62, 185, 234,  99,  16, 218,  95, 128, 224, 123, 253,
+             42, 109,   4, 247,  72,   5, 151, 136,   0, 152, 148, 127, 204, 133,  17,  14,
+            182, 217,  54, 199, 119, 174,  82,  57, 215,  41, 114, 208, 206, 110, 239,  23,
+            189,  15,   3,  22, 188,  79, 113, 172,  28,   2, 222,  21, 251, 225, 237, 105,
+            102,  32,  56, 181, 126,  83, 230,  53, 158,  52,  59, 213, 118, 100,  67, 142,
+            220, 170, 144, 115, 205,  26, 125, 168, 249,  66, 175,  97, 255,  92, 229,  91,
+            214, 236, 178, 243,  46,  44, 201, 250, 135, 186, 150, 221, 163, 216, 162,  43,
+             11, 101,  34,  37, 194,  25,  50,  12,  87, 198, 173, 240, 193, 171, 143, 231,
+            111, 141, 191, 103,  74, 245, 223,  20, 161, 235, 122,  63,  89, 149,  73, 238,
+            134,  68,  93, 183, 241,  81, 196,  49, 192,  65, 212,  94, 203,  10, 200,  47
+        ])
+
+        perfect_words = [
+            'a', 'and', 'are', 'as', 'at', 'be', 'but', 'by',
+            'for', 'from', 'had', 'have', 'he', 'her', 'his', 'i',
+            'in', 'is', 'it', 'not', 'of', 'on', 'or', 'that',
+            'the', 'this', 'to', 'was', 'which', 'with', 'you'
+        ]
+
+        for i in range(len(perfect_words)):
+            self.assertEqual(i + 1, pearson(perfect_words[i], perfect_table))
-- 
cgit v0.11.2-87-g4458