summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCyrille Bagard <nocbos@gmail.com>2021-03-09 21:54:43 (GMT)
committerCyrille Bagard <nocbos@gmail.com>2021-03-09 21:54:43 (GMT)
commiteb470f2e5e790ba107171a3ae8c5ed27a72ed8f8 (patch)
tree1d629fdb27b2e44062126d4c865f693b0adb305c
parentc728479b9006dde8d377e9029936de9a625c806b (diff)
Implement the Pearson hash method.
-rw-r--r--plugins/pychrysalide/common/Makefile.am3
-rw-r--r--plugins/pychrysalide/common/module.c2
-rw-r--r--plugins/pychrysalide/common/pearson.c179
-rw-r--r--plugins/pychrysalide/common/pearson.h39
-rw-r--r--src/common/Makefile.am1
-rw-r--r--src/common/pearson.c106
-rw-r--r--src/common/pearson.h46
-rw-r--r--tests/common/pearson.py50
8 files changed, 425 insertions, 1 deletions
diff --git a/plugins/pychrysalide/common/Makefile.am b/plugins/pychrysalide/common/Makefile.am
index 66e7622..5f54fe8 100644
--- a/plugins/pychrysalide/common/Makefile.am
+++ b/plugins/pychrysalide/common/Makefile.am
@@ -7,7 +7,8 @@ libpychrysacommon_la_SOURCES = \
leb128.h leb128.c \
module.h module.c \
packed.h packed.c \
- pathname.h pathname.c
+ pathname.h pathname.c \
+ pearson.h pearson.c
libpychrysacommon_la_LDFLAGS =
diff --git a/plugins/pychrysalide/common/module.c b/plugins/pychrysalide/common/module.c
index 865b7e2..cc50c43 100644
--- a/plugins/pychrysalide/common/module.c
+++ b/plugins/pychrysalide/common/module.c
@@ -30,6 +30,7 @@
#include "leb128.h"
#include "packed.h"
#include "pathname.h"
+#include "pearson.h"
#include "../helpers.h"
@@ -98,6 +99,7 @@ bool populate_common_module(void)
if (result) result = populate_common_module_with_fnv1a();
if (result) result = populate_common_module_with_leb128();
if (result) result = populate_common_module_with_pathname();
+ if (result) result = populate_common_module_with_pearson();
if (result) result = ensure_python_bitfield_is_registered();
if (result) result = ensure_python_packed_buffer_is_registered();
diff --git a/plugins/pychrysalide/common/pearson.c b/plugins/pychrysalide/common/pearson.c
new file mode 100644
index 0000000..99c2d27
--- /dev/null
+++ b/plugins/pychrysalide/common/pearson.c
@@ -0,0 +1,179 @@
+
+/* Chrysalide - Outil d'analyse de fichiers binaires
+ * pearson.c - équivalent Python du fichier "common/pearson.c"
+ *
+ * Copyright (C) 2018-2019 Cyrille Bagard
+ *
+ * This file is part of Chrysalide.
+ *
+ * Chrysalide is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Chrysalide is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+
+#include "pearson.h"
+
+
+#include <pygobject.h>
+
+
+#include <i18n.h>
+#include <common/pearson.h>
+
+
+#include "../access.h"
+#include "../helpers.h"
+
+
+
+/* Fournit les permutations par défaut par Pearson. */
+static PyObject *py_pearson_permutations(PyObject *, PyObject *);
+
+/* Détermine l'empreinte Pearson d'une chaîne de caractères. */
+static PyObject *py_pearson(PyObject *, PyObject *);
+
+
+
+/******************************************************************************
+* *
+* Paramètres : self = NULL car méthode statique. *
+* args = adresse non utilisée ici, en l'absence d'argument. *
+* *
+* Description : Fournit les permutations par défaut par Pearson. *
+* *
+* Retour : Table de valeurs utilisées par défaut. *
+* *
+* Remarques : - *
+* *
+******************************************************************************/
+
+static PyObject *py_pearson_permutations(PyObject *self, PyObject *args)
+{
+ PyObject *result; /* Instance à retourner */
+ const char *table; /* Eventuelle table à utiliser */
+
+#define PEARSON_PERMUTATIONS_METHOD PYTHON_METHOD_DEF \
+( \
+ pearson_permutations, "", \
+ METH_NOARGS, py, \
+ "Provide the default pseudorandom permutations" \
+ " for the Pearson hash computation.\n" \
+ "\n" \
+ "The result is 256-byte value." \
+)
+
+ table = get_pearson_permutations();
+
+ result = PyBytes_FromStringAndSize(table, 256);
+
+ return result;
+
+}
+
+
+/******************************************************************************
+* *
+* Paramètres : self = NULL car méthode statique. *
+* args = arguments fournis lors de l'appel à la fonction. *
+* *
+* Description : Détermine l'empreinte Pearson d'une chaîne de caractères. *
+* *
+* Retour : Numéro de révision. *
+* *
+* Remarques : - *
+* *
+******************************************************************************/
+
+static PyObject *py_pearson(PyObject *self, PyObject *args)
+{
+ PyObject *result; /* Instance à retourner */
+ const char *str; /* Chaîne à traiter. */
+ PyObject *bytes; /* Tableau d'octets */
+ int ret; /* Bilan de lecture des args. */
+ const char *table; /* Eventuelle table à utiliser */
+ uint8_t value; /* Empreinte calculée */
+
+#define PEARSON_METHOD PYTHON_METHOD_DEF \
+( \
+ pearson, "str, /, table", \
+ METH_VARARGS, py, \
+ "Compute the Pearson hash of a given string.\n" \
+ "\n" \
+ "The default pseudorandom permutations are used if" \
+ " no *table* of 256 bytes is provided.\n" \
+ "\n" \
+ "A table of permutations can be created with this call:\n" \
+ " bytes(sample(list(range(0, 256)), k=256))\n" \
+ "\n" \
+ "The result is 8-bit integer value." \
+)
+
+ bytes = NULL;
+
+ ret = PyArg_ParseTuple(args, "s|O!", &str, &PyBytes_Type, &bytes);
+ if (!ret) return NULL;
+
+ if (bytes != NULL)
+ {
+ if (PyBytes_Size(bytes) != 256)
+ {
+ PyErr_SetString(PyExc_ValueError, _("256 bytes are required for the custom table."));
+ return NULL;
+ }
+
+ table = PyBytes_AsString(bytes);
+
+ value = pearson_hash(str, table);
+
+ }
+ else
+ value = pearson_hash(str, NULL);
+
+ result = Py_BuildValue("B", (unsigned char)value);
+
+ return result;
+
+}
+
+
+/******************************************************************************
+* *
+* Paramètres : - *
+* *
+* Description : Définit une extension du module 'common' à compléter. *
+* *
+* Retour : Bilan de l'opération. *
+* *
+* Remarques : - *
+* *
+******************************************************************************/
+
+bool populate_common_module_with_pearson(void)
+{
+ bool result; /* Bilan à retourner */
+ PyObject *module; /* Module à recompléter */
+
+ static PyMethodDef py_pearson_methods[] = {
+ PEARSON_PERMUTATIONS_METHOD,
+ PEARSON_METHOD,
+ { NULL }
+ };
+
+ module = get_access_to_python_module("pychrysalide.common");
+
+ result = register_python_module_methods(module, py_pearson_methods);
+
+ return result;
+
+}
diff --git a/plugins/pychrysalide/common/pearson.h b/plugins/pychrysalide/common/pearson.h
new file mode 100644
index 0000000..caff72a
--- /dev/null
+++ b/plugins/pychrysalide/common/pearson.h
@@ -0,0 +1,39 @@
+
+/* Chrysalide - Outil d'analyse de fichiers binaires
+ * pearson.h - prototypes pour l'équivalent Python du fichier "common/pearson.c"
+ *
+ * Copyright (C) 2021 Cyrille Bagard
+ *
+ * This file is part of Chrysalide.
+ *
+ * Chrysalide is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Chrysalide is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+
+#ifndef _PLUGINS_PYCHRYSALIDE_COMMON_PEARSON_H
+#define _PLUGINS_PYCHRYSALIDE_COMMON_PEARSON_H
+
+
+#include <Python.h>
+#include <stdbool.h>
+
+
+
+/* Définit une extension du module 'common' à compléter. */
+bool populate_common_module_with_pearson(void);
+
+
+
+#endif /* _PLUGINS_PYCHRYSALIDE_COMMON_PEARSON_H */
diff --git a/src/common/Makefile.am b/src/common/Makefile.am
index fc313fb..2162d8c 100644
--- a/src/common/Makefile.am
+++ b/src/common/Makefile.am
@@ -21,6 +21,7 @@ libcommon_la_SOURCES = \
net.h net.c \
packed.h packed.c \
pathname.h pathname.c \
+ pearson.h pearson.c \
shuffle.h shuffle.c \
sort.h sort.c \
sqlite.h sqlite.c \
diff --git a/src/common/pearson.c b/src/common/pearson.c
new file mode 100644
index 0000000..5607a0c
--- /dev/null
+++ b/src/common/pearson.c
@@ -0,0 +1,106 @@
+
+/* Chrysalide - Outil d'analyse de fichiers binaires
+ * pearson.c - implémentaton du calcul rapide d'empreintes de chaînes
+ *
+ * Copyright (C) 2021 Cyrille Bagard
+ *
+ * This file is part of Chrysalide.
+ *
+ * Chrysalide is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Chrysalide is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Chrysalide. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+
+#include "pearson.h"
+
+
+#include <stddef.h>
+
+
+
+/* Permutations proposées */
+static const char _pearson_permutations[256] = {
+
+ 1, 87, 49, 12, 176, 178, 102, 166, 121, 193, 6, 84, 249, 230, 44, 163,
+ 14, 197, 213, 181, 161, 85, 218, 80, 64, 239, 24, 226, 236, 142, 38, 200,
+ 110, 177, 104, 103, 141, 253, 255, 50, 77, 101, 81, 18, 45, 96, 31, 222,
+ 25, 107, 190, 70, 86, 237, 240, 34, 72, 242, 20, 214, 244, 227, 149, 235,
+ 97, 234, 57, 22, 60, 250, 82, 175, 208, 5, 127, 199, 111, 62, 135, 248,
+ 174, 169, 211, 58, 66, 154, 106, 195, 245, 171, 17, 187, 182, 179, 0, 243,
+ 132, 56, 148, 75, 128, 133, 158, 100, 130, 126, 91, 13, 153, 246, 216, 219,
+ 119, 68, 223, 78, 83, 88, 201, 99, 122, 11, 92, 32, 136, 114, 52, 10,
+ 138, 30, 48, 183, 156, 35, 61, 26, 143, 74, 251, 94, 129, 162, 63, 152,
+ 170, 7, 115, 167, 241, 206, 3, 150, 55, 59, 151, 220, 90, 53, 23, 131,
+ 125, 173, 15, 238, 79, 95, 89, 16, 105, 137, 225, 224, 217, 160, 37, 123,
+ 118, 73, 2, 157, 46, 116, 9, 145, 134, 228, 207, 212, 202, 215, 69, 229,
+ 27, 188, 67, 124, 168, 252, 42, 4, 29, 108, 21, 247, 19, 205, 39, 203,
+ 233, 40, 186, 147, 198, 192, 155, 33, 164, 191, 98, 204, 165, 180, 117, 76,
+ 140, 36, 210, 172, 41, 54, 159, 8, 185, 232, 113, 196, 231, 47, 146, 120,
+ 51, 65, 28, 144, 254, 221, 93, 189, 194, 139, 112, 43, 71, 109, 184, 209
+
+};
+
+
+
+/******************************************************************************
+* *
+* Paramètres : - *
+* *
+* Description : Fournit les permutations par défaut par Pearson. *
+* *
+* Retour : Table de valeurs utilisées par défaut. *
+* *
+* Remarques : - *
+* *
+******************************************************************************/
+
+const char *get_pearson_permutations(void)
+{
+ const char *result; /* Table à renvoyer */
+
+ result = _pearson_permutations;
+
+ return result;
+
+}
+
+
+/******************************************************************************
+* *
+* Paramètres : str = chaîne de caractères à traiter. *
+* table = permutations à employer, NULL pour celles par défaut.*
+* *
+* Description : Détermine l'empreinte Pearson d'une chaîne de caractères. *
+* *
+* Retour : Valeur calculée. *
+* *
+* Remarques : - *
+* *
+******************************************************************************/
+
+uint8_t pearson_hash(const char *str, const char *table)
+{
+ uint8_t result; /* Valeur à retourner */
+ unsigned char *iter; /* Boucle de parcours */
+
+ if (table == NULL)
+ table = _pearson_permutations;
+
+ result = 0;
+
+ for (iter = (unsigned char *)str; *iter; iter++)
+ result = table[result ^ (*iter)];
+
+ return result;
+
+}
diff --git a/src/common/pearson.h b/src/common/pearson.h
new file mode 100644
index 0000000..09f45cf
--- /dev/null
+++ b/src/common/pearson.h
@@ -0,0 +1,46 @@
+
+/* Chrysalide - Outil d'analyse de fichiers binaires
+ * pearson.h - prototypes pour l'implémentaton du calcul rapide d'empreintes de chaînes
+ *
+ * Copyright (C) 2021 Cyrille Bagard
+ *
+ * This file is part of Chrysalide.
+ *
+ * Chrysalide is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Chrysalide is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Chrysalide. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+
+#ifndef _COMMON_PEARSON_H
+#define _COMMON_PEARSON_H
+
+
+#include <stdint.h>
+
+
+/**
+ * Plus d'informations avec les liens suivants :
+ * - https://en.wikipedia.org/wiki/Pearson_hashing
+ * - https://web.archive.org/web/20120704025921/http://cs.mwsu.edu/~griffin/courses/2133/downloads/Spring11/p677-pearson.pdf
+ */
+
+
+/* Fournit les permutations par défaut par Pearson. */
+const char *get_pearson_permutations(void);
+
+/* Détermine l'empreinte Pearson d'une chaîne de caractères. */
+uint8_t pearson_hash(const char *, const char *);
+
+
+
+#endif /* _COMMON_PEARSON_H */
diff --git a/tests/common/pearson.py b/tests/common/pearson.py
new file mode 100644
index 0000000..bb4cf42
--- /dev/null
+++ b/tests/common/pearson.py
@@ -0,0 +1,50 @@
+
+from chrysacase import ChrysalideTestCase
+from pychrysalide.common import pearson_permutations, pearson
+
+
+class TestPearsonHash(ChrysalideTestCase):
+ """TestCase for common Pearson hash.*"""
+
+ def testPearsonDefaultPermutations(self):
+ """Check the default permutations for Pearson hashing."""
+
+ table = pearson_permutations()
+
+ self.assertEqual(256, len(table))
+
+ for i in range(256):
+ self.assertTrue(i in table)
+
+
+ def testPearsonPefectHashing(self):
+ """Compute Pearson pefect hashing."""
+
+ perfect_table = bytes([
+ 39, 159, 180, 252, 71, 6, 13, 164, 232, 35, 226, 155, 98, 120, 154, 69,
+ 157, 24, 137, 29, 147, 78, 121, 85, 112, 8, 248, 130, 55, 117, 190, 160,
+ 176, 131, 228, 64, 211, 106, 38, 27, 140, 30, 88, 210, 227, 104, 84, 77,
+ 75, 107, 169, 138, 195, 184, 70, 90, 61, 166, 7, 244, 165, 108, 219, 51,
+ 9, 139, 209, 40, 31, 202, 58, 179, 116, 33, 207, 146, 76, 60, 242, 124,
+ 254, 197, 80, 167, 153, 145, 129, 233, 132, 48, 246, 86, 156, 177, 36, 187,
+ 45, 1, 96, 18, 19, 62, 185, 234, 99, 16, 218, 95, 128, 224, 123, 253,
+ 42, 109, 4, 247, 72, 5, 151, 136, 0, 152, 148, 127, 204, 133, 17, 14,
+ 182, 217, 54, 199, 119, 174, 82, 57, 215, 41, 114, 208, 206, 110, 239, 23,
+ 189, 15, 3, 22, 188, 79, 113, 172, 28, 2, 222, 21, 251, 225, 237, 105,
+ 102, 32, 56, 181, 126, 83, 230, 53, 158, 52, 59, 213, 118, 100, 67, 142,
+ 220, 170, 144, 115, 205, 26, 125, 168, 249, 66, 175, 97, 255, 92, 229, 91,
+ 214, 236, 178, 243, 46, 44, 201, 250, 135, 186, 150, 221, 163, 216, 162, 43,
+ 11, 101, 34, 37, 194, 25, 50, 12, 87, 198, 173, 240, 193, 171, 143, 231,
+ 111, 141, 191, 103, 74, 245, 223, 20, 161, 235, 122, 63, 89, 149, 73, 238,
+ 134, 68, 93, 183, 241, 81, 196, 49, 192, 65, 212, 94, 203, 10, 200, 47
+ ])
+
+ perfect_words = [
+ 'a', 'and', 'are', 'as', 'at', 'be', 'but', 'by',
+ 'for', 'from', 'had', 'have', 'he', 'her', 'his', 'i',
+ 'in', 'is', 'it', 'not', 'of', 'on', 'or', 'that',
+ 'the', 'this', 'to', 'was', 'which', 'with', 'you'
+ ]
+
+ for i in range(len(perfect_words)):
+ self.assertEqual(i + 1, pearson(perfect_words[i], perfect_table))