From eb470f2e5e790ba107171a3ae8c5ed27a72ed8f8 Mon Sep 17 00:00:00 2001 From: Cyrille Bagard Date: Tue, 9 Mar 2021 22:54:43 +0100 Subject: Implement the Pearson hash method. --- plugins/pychrysalide/common/Makefile.am | 3 +- plugins/pychrysalide/common/module.c | 2 + plugins/pychrysalide/common/pearson.c | 179 ++++++++++++++++++++++++++++++++ plugins/pychrysalide/common/pearson.h | 39 +++++++ src/common/Makefile.am | 1 + src/common/pearson.c | 106 +++++++++++++++++++ src/common/pearson.h | 46 ++++++++ tests/common/pearson.py | 50 +++++++++ 8 files changed, 425 insertions(+), 1 deletion(-) create mode 100644 plugins/pychrysalide/common/pearson.c create mode 100644 plugins/pychrysalide/common/pearson.h create mode 100644 src/common/pearson.c create mode 100644 src/common/pearson.h create mode 100644 tests/common/pearson.py diff --git a/plugins/pychrysalide/common/Makefile.am b/plugins/pychrysalide/common/Makefile.am index 66e7622..5f54fe8 100644 --- a/plugins/pychrysalide/common/Makefile.am +++ b/plugins/pychrysalide/common/Makefile.am @@ -7,7 +7,8 @@ libpychrysacommon_la_SOURCES = \ leb128.h leb128.c \ module.h module.c \ packed.h packed.c \ - pathname.h pathname.c + pathname.h pathname.c \ + pearson.h pearson.c libpychrysacommon_la_LDFLAGS = diff --git a/plugins/pychrysalide/common/module.c b/plugins/pychrysalide/common/module.c index 865b7e2..cc50c43 100644 --- a/plugins/pychrysalide/common/module.c +++ b/plugins/pychrysalide/common/module.c @@ -30,6 +30,7 @@ #include "leb128.h" #include "packed.h" #include "pathname.h" +#include "pearson.h" #include "../helpers.h" @@ -98,6 +99,7 @@ bool populate_common_module(void) if (result) result = populate_common_module_with_fnv1a(); if (result) result = populate_common_module_with_leb128(); if (result) result = populate_common_module_with_pathname(); + if (result) result = populate_common_module_with_pearson(); if (result) result = ensure_python_bitfield_is_registered(); if (result) result = ensure_python_packed_buffer_is_registered(); diff --git a/plugins/pychrysalide/common/pearson.c b/plugins/pychrysalide/common/pearson.c new file mode 100644 index 0000000..99c2d27 --- /dev/null +++ b/plugins/pychrysalide/common/pearson.c @@ -0,0 +1,179 @@ + +/* Chrysalide - Outil d'analyse de fichiers binaires + * pearson.c - équivalent Python du fichier "common/pearson.c" + * + * Copyright (C) 2018-2019 Cyrille Bagard + * + * This file is part of Chrysalide. + * + * Chrysalide is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * Chrysalide is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + + +#include "pearson.h" + + +#include + + +#include +#include + + +#include "../access.h" +#include "../helpers.h" + + + +/* Fournit les permutations par défaut par Pearson. */ +static PyObject *py_pearson_permutations(PyObject *, PyObject *); + +/* Détermine l'empreinte Pearson d'une chaîne de caractères. */ +static PyObject *py_pearson(PyObject *, PyObject *); + + + +/****************************************************************************** +* * +* Paramètres : self = NULL car méthode statique. * +* args = adresse non utilisée ici, en l'absence d'argument. * +* * +* Description : Fournit les permutations par défaut par Pearson. * +* * +* Retour : Table de valeurs utilisées par défaut. * +* * +* Remarques : - * +* * +******************************************************************************/ + +static PyObject *py_pearson_permutations(PyObject *self, PyObject *args) +{ + PyObject *result; /* Instance à retourner */ + const char *table; /* Eventuelle table à utiliser */ + +#define PEARSON_PERMUTATIONS_METHOD PYTHON_METHOD_DEF \ +( \ + pearson_permutations, "", \ + METH_NOARGS, py, \ + "Provide the default pseudorandom permutations" \ + " for the Pearson hash computation.\n" \ + "\n" \ + "The result is 256-byte value." \ +) + + table = get_pearson_permutations(); + + result = PyBytes_FromStringAndSize(table, 256); + + return result; + +} + + +/****************************************************************************** +* * +* Paramètres : self = NULL car méthode statique. * +* args = arguments fournis lors de l'appel à la fonction. * +* * +* Description : Détermine l'empreinte Pearson d'une chaîne de caractères. * +* * +* Retour : Numéro de révision. * +* * +* Remarques : - * +* * +******************************************************************************/ + +static PyObject *py_pearson(PyObject *self, PyObject *args) +{ + PyObject *result; /* Instance à retourner */ + const char *str; /* Chaîne à traiter. */ + PyObject *bytes; /* Tableau d'octets */ + int ret; /* Bilan de lecture des args. */ + const char *table; /* Eventuelle table à utiliser */ + uint8_t value; /* Empreinte calculée */ + +#define PEARSON_METHOD PYTHON_METHOD_DEF \ +( \ + pearson, "str, /, table", \ + METH_VARARGS, py, \ + "Compute the Pearson hash of a given string.\n" \ + "\n" \ + "The default pseudorandom permutations are used if" \ + " no *table* of 256 bytes is provided.\n" \ + "\n" \ + "A table of permutations can be created with this call:\n" \ + " bytes(sample(list(range(0, 256)), k=256))\n" \ + "\n" \ + "The result is 8-bit integer value." \ +) + + bytes = NULL; + + ret = PyArg_ParseTuple(args, "s|O!", &str, &PyBytes_Type, &bytes); + if (!ret) return NULL; + + if (bytes != NULL) + { + if (PyBytes_Size(bytes) != 256) + { + PyErr_SetString(PyExc_ValueError, _("256 bytes are required for the custom table.")); + return NULL; + } + + table = PyBytes_AsString(bytes); + + value = pearson_hash(str, table); + + } + else + value = pearson_hash(str, NULL); + + result = Py_BuildValue("B", (unsigned char)value); + + return result; + +} + + +/****************************************************************************** +* * +* Paramètres : - * +* * +* Description : Définit une extension du module 'common' à compléter. * +* * +* Retour : Bilan de l'opération. * +* * +* Remarques : - * +* * +******************************************************************************/ + +bool populate_common_module_with_pearson(void) +{ + bool result; /* Bilan à retourner */ + PyObject *module; /* Module à recompléter */ + + static PyMethodDef py_pearson_methods[] = { + PEARSON_PERMUTATIONS_METHOD, + PEARSON_METHOD, + { NULL } + }; + + module = get_access_to_python_module("pychrysalide.common"); + + result = register_python_module_methods(module, py_pearson_methods); + + return result; + +} diff --git a/plugins/pychrysalide/common/pearson.h b/plugins/pychrysalide/common/pearson.h new file mode 100644 index 0000000..caff72a --- /dev/null +++ b/plugins/pychrysalide/common/pearson.h @@ -0,0 +1,39 @@ + +/* Chrysalide - Outil d'analyse de fichiers binaires + * pearson.h - prototypes pour l'équivalent Python du fichier "common/pearson.c" + * + * Copyright (C) 2021 Cyrille Bagard + * + * This file is part of Chrysalide. + * + * Chrysalide is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * Chrysalide is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + + +#ifndef _PLUGINS_PYCHRYSALIDE_COMMON_PEARSON_H +#define _PLUGINS_PYCHRYSALIDE_COMMON_PEARSON_H + + +#include +#include + + + +/* Définit une extension du module 'common' à compléter. */ +bool populate_common_module_with_pearson(void); + + + +#endif /* _PLUGINS_PYCHRYSALIDE_COMMON_PEARSON_H */ diff --git a/src/common/Makefile.am b/src/common/Makefile.am index fc313fb..2162d8c 100644 --- a/src/common/Makefile.am +++ b/src/common/Makefile.am @@ -21,6 +21,7 @@ libcommon_la_SOURCES = \ net.h net.c \ packed.h packed.c \ pathname.h pathname.c \ + pearson.h pearson.c \ shuffle.h shuffle.c \ sort.h sort.c \ sqlite.h sqlite.c \ diff --git a/src/common/pearson.c b/src/common/pearson.c new file mode 100644 index 0000000..5607a0c --- /dev/null +++ b/src/common/pearson.c @@ -0,0 +1,106 @@ + +/* Chrysalide - Outil d'analyse de fichiers binaires + * pearson.c - implémentaton du calcul rapide d'empreintes de chaînes + * + * Copyright (C) 2021 Cyrille Bagard + * + * This file is part of Chrysalide. + * + * Chrysalide is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * Chrysalide is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Chrysalide. If not, see . + */ + + +#include "pearson.h" + + +#include + + + +/* Permutations proposées */ +static const char _pearson_permutations[256] = { + + 1, 87, 49, 12, 176, 178, 102, 166, 121, 193, 6, 84, 249, 230, 44, 163, + 14, 197, 213, 181, 161, 85, 218, 80, 64, 239, 24, 226, 236, 142, 38, 200, + 110, 177, 104, 103, 141, 253, 255, 50, 77, 101, 81, 18, 45, 96, 31, 222, + 25, 107, 190, 70, 86, 237, 240, 34, 72, 242, 20, 214, 244, 227, 149, 235, + 97, 234, 57, 22, 60, 250, 82, 175, 208, 5, 127, 199, 111, 62, 135, 248, + 174, 169, 211, 58, 66, 154, 106, 195, 245, 171, 17, 187, 182, 179, 0, 243, + 132, 56, 148, 75, 128, 133, 158, 100, 130, 126, 91, 13, 153, 246, 216, 219, + 119, 68, 223, 78, 83, 88, 201, 99, 122, 11, 92, 32, 136, 114, 52, 10, + 138, 30, 48, 183, 156, 35, 61, 26, 143, 74, 251, 94, 129, 162, 63, 152, + 170, 7, 115, 167, 241, 206, 3, 150, 55, 59, 151, 220, 90, 53, 23, 131, + 125, 173, 15, 238, 79, 95, 89, 16, 105, 137, 225, 224, 217, 160, 37, 123, + 118, 73, 2, 157, 46, 116, 9, 145, 134, 228, 207, 212, 202, 215, 69, 229, + 27, 188, 67, 124, 168, 252, 42, 4, 29, 108, 21, 247, 19, 205, 39, 203, + 233, 40, 186, 147, 198, 192, 155, 33, 164, 191, 98, 204, 165, 180, 117, 76, + 140, 36, 210, 172, 41, 54, 159, 8, 185, 232, 113, 196, 231, 47, 146, 120, + 51, 65, 28, 144, 254, 221, 93, 189, 194, 139, 112, 43, 71, 109, 184, 209 + +}; + + + +/****************************************************************************** +* * +* Paramètres : - * +* * +* Description : Fournit les permutations par défaut par Pearson. * +* * +* Retour : Table de valeurs utilisées par défaut. * +* * +* Remarques : - * +* * +******************************************************************************/ + +const char *get_pearson_permutations(void) +{ + const char *result; /* Table à renvoyer */ + + result = _pearson_permutations; + + return result; + +} + + +/****************************************************************************** +* * +* Paramètres : str = chaîne de caractères à traiter. * +* table = permutations à employer, NULL pour celles par défaut.* +* * +* Description : Détermine l'empreinte Pearson d'une chaîne de caractères. * +* * +* Retour : Valeur calculée. * +* * +* Remarques : - * +* * +******************************************************************************/ + +uint8_t pearson_hash(const char *str, const char *table) +{ + uint8_t result; /* Valeur à retourner */ + unsigned char *iter; /* Boucle de parcours */ + + if (table == NULL) + table = _pearson_permutations; + + result = 0; + + for (iter = (unsigned char *)str; *iter; iter++) + result = table[result ^ (*iter)]; + + return result; + +} diff --git a/src/common/pearson.h b/src/common/pearson.h new file mode 100644 index 0000000..09f45cf --- /dev/null +++ b/src/common/pearson.h @@ -0,0 +1,46 @@ + +/* Chrysalide - Outil d'analyse de fichiers binaires + * pearson.h - prototypes pour l'implémentaton du calcul rapide d'empreintes de chaînes + * + * Copyright (C) 2021 Cyrille Bagard + * + * This file is part of Chrysalide. + * + * Chrysalide is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * Chrysalide is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Chrysalide. If not, see . + */ + + +#ifndef _COMMON_PEARSON_H +#define _COMMON_PEARSON_H + + +#include + + +/** + * Plus d'informations avec les liens suivants : + * - https://en.wikipedia.org/wiki/Pearson_hashing + * - https://web.archive.org/web/20120704025921/http://cs.mwsu.edu/~griffin/courses/2133/downloads/Spring11/p677-pearson.pdf + */ + + +/* Fournit les permutations par défaut par Pearson. */ +const char *get_pearson_permutations(void); + +/* Détermine l'empreinte Pearson d'une chaîne de caractères. */ +uint8_t pearson_hash(const char *, const char *); + + + +#endif /* _COMMON_PEARSON_H */ diff --git a/tests/common/pearson.py b/tests/common/pearson.py new file mode 100644 index 0000000..bb4cf42 --- /dev/null +++ b/tests/common/pearson.py @@ -0,0 +1,50 @@ + +from chrysacase import ChrysalideTestCase +from pychrysalide.common import pearson_permutations, pearson + + +class TestPearsonHash(ChrysalideTestCase): + """TestCase for common Pearson hash.*""" + + def testPearsonDefaultPermutations(self): + """Check the default permutations for Pearson hashing.""" + + table = pearson_permutations() + + self.assertEqual(256, len(table)) + + for i in range(256): + self.assertTrue(i in table) + + + def testPearsonPefectHashing(self): + """Compute Pearson pefect hashing.""" + + perfect_table = bytes([ + 39, 159, 180, 252, 71, 6, 13, 164, 232, 35, 226, 155, 98, 120, 154, 69, + 157, 24, 137, 29, 147, 78, 121, 85, 112, 8, 248, 130, 55, 117, 190, 160, + 176, 131, 228, 64, 211, 106, 38, 27, 140, 30, 88, 210, 227, 104, 84, 77, + 75, 107, 169, 138, 195, 184, 70, 90, 61, 166, 7, 244, 165, 108, 219, 51, + 9, 139, 209, 40, 31, 202, 58, 179, 116, 33, 207, 146, 76, 60, 242, 124, + 254, 197, 80, 167, 153, 145, 129, 233, 132, 48, 246, 86, 156, 177, 36, 187, + 45, 1, 96, 18, 19, 62, 185, 234, 99, 16, 218, 95, 128, 224, 123, 253, + 42, 109, 4, 247, 72, 5, 151, 136, 0, 152, 148, 127, 204, 133, 17, 14, + 182, 217, 54, 199, 119, 174, 82, 57, 215, 41, 114, 208, 206, 110, 239, 23, + 189, 15, 3, 22, 188, 79, 113, 172, 28, 2, 222, 21, 251, 225, 237, 105, + 102, 32, 56, 181, 126, 83, 230, 53, 158, 52, 59, 213, 118, 100, 67, 142, + 220, 170, 144, 115, 205, 26, 125, 168, 249, 66, 175, 97, 255, 92, 229, 91, + 214, 236, 178, 243, 46, 44, 201, 250, 135, 186, 150, 221, 163, 216, 162, 43, + 11, 101, 34, 37, 194, 25, 50, 12, 87, 198, 173, 240, 193, 171, 143, 231, + 111, 141, 191, 103, 74, 245, 223, 20, 161, 235, 122, 63, 89, 149, 73, 238, + 134, 68, 93, 183, 241, 81, 196, 49, 192, 65, 212, 94, 203, 10, 200, 47 + ]) + + perfect_words = [ + 'a', 'and', 'are', 'as', 'at', 'be', 'but', 'by', + 'for', 'from', 'had', 'have', 'he', 'her', 'his', 'i', + 'in', 'is', 'it', 'not', 'of', 'on', 'or', 'that', + 'the', 'this', 'to', 'was', 'which', 'with', 'you' + ] + + for i in range(len(perfect_words)): + self.assertEqual(i + 1, pearson(perfect_words[i], perfect_table)) -- cgit v0.11.2-87-g4458