/* Chrysalide - Outil d'analyse de fichiers binaires
 * entropy.c - calcul de l'entropie d'un contenu binaire
 *
 * Copyright (C) 2024 Cyrille Bagard
 *
 *  This file is part of Chrysalide.
 *
 *  Chrysalide is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 3 of the License, or
 *  (at your option) any later version.
 *
 *  Chrysalide is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with Chrysalide.  If not, see <http://www.gnu.org/licenses/>.
 */


#include "entropy.h"


#include <math.h>
#include <string.h>



/******************************************************************************
*                                                                             *
*  Paramètres  : data = séquence d'octets à traiter.                          *
*                len  = quantité de ces octets.                               *
*                bits = calcul en concidérant les bits et non les octets ?    *
*                                                                             *
*  Description : Détermine l'entropie d'un contenu binaire.                   *
*                                                                             *
*  Retour      : Valeur d'entropie du contenu fourni.                         *
*                                                                             *
*  Remarques   : -                                                            *
*                                                                             *
******************************************************************************/

double compute_entropy(const bin_t *data, size_t len, bool bits)
{
    double result;                          /* Valeur calculée à renvoyer  */
    unsigned long counters[256];            /* Décompte des valeurs        */
    const bin_t *d_max;                     /* Borne de fin de parcours #1 */
    const bin_t *d_iter;                    /* Boucle de parcours #1       */
    double log_2;                           /* Valeur constante de log2    */
    unsigned long *c_max;                   /* Borne de fin de parcours #2 */
    unsigned long *c_iter;                  /* Boucle de parcours #2       */
    double freq;                            /* Fréquence liée à une valeur */

    result = 0.0;

    memset(counters, 0, sizeof(counters));

    d_max = data + len;

    for (d_iter = data; d_iter < d_max; d_iter++)
        counters[*d_iter]++;

    /**
     * Explication du choix de log :
     * https://stackoverflow.com/questions/990477/how-to-calculate-the-entropy-of-a-file/990646#990646
     */

    log_2 = log(bits ? 2.0 : 256.0);

    c_max = counters + 256;

    for (c_iter = counters; c_iter < c_max; c_iter++)
    {
        if (*c_iter == 0lu)
            continue;

        freq = ((double)*c_iter) / ((double)len);

        result -= freq * (log(freq) / log_2);

    }

    return result;

}