/* Chrysalide - Outil d'analyse de fichiers binaires
 * rost.c - fichier d'entrée du centre de collecte
 *
 * Copyright (C) 2023 Cyrille Bagard
 *
 *  This file is part of Chrysalide.
 *
 *  Chrysalide is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 3 of the License, or
 *  (at your option) any later version.
 *
 *  Chrysalide is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program; if not, write to the Free Software
 *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 */


#include <assert.h>
#include <getopt.h>
#include <libgen.h>
#include <locale.h>
#include <malloc.h>
#include <stdlib.h>
#include <string.h>


#include <i18n.h>


#include "gleak.h"
#include "analysis/contents/file.h"
#include "analysis/scan/core.h"
#include "analysis/scan/options.h"
#include "analysis/scan/scanner.h"
#include "analysis/scan/patterns/backends/acism.h"
#include "analysis/scan/patterns/backends/bitap.h"
#include "analysis/scan/patterns/backends/hyperscan.h"
#include "core/core.h"
#include "core/global.h"
#include "core/logs.h"
#include "core/paths.h"
#include "plugins/pglist.h"



/* Affiche des indications quant à l'utilisation du programme. */
static void show_rost_help(const char *);

/* Affiche des indications sur la version courante du programme. */
static void show_rost_version(void);

/* Récupère un contenu à traiter depuis l'entrée standard. */
static void *get_input_data_from_stdin(size_t *);



/******************************************************************************
*                                                                             *
*  Paramètres  : name = nom du programme en question.                         *
*                                                                             *
*  Description : Affiche des indications quant à l'utilisation du programme.  *
*                                                                             *
*  Retour      : -                                                            *
*                                                                             *
*  Remarques   : -                                                            *
*                                                                             *
******************************************************************************/

static void show_rost_help(const char *name)
{
    char *tmp;                              /* Conservation modifiable     */
    char *base;                             /* Version courte du nom       */

    tmp = strdup(name);

    base = basename(tmp);

    printf("\n");

    printf("Usage: %s [--help] [--version] [--verbosity] [options] <rules file> <file | dir>\n", base);

    printf("\n");

    printf("\t-h --help\t\tShow this help message.\n");
    printf("\t-v --version\t\tDisplay the program version.\n");

    printf("\n");

    printf("\t-A --algorithm=NAME\tSelect one of the available algorithms for data: acism, bitmap, hyperscan (default: acsim).\n");
    printf("\t-C --check-only\t\tValidate the rule syntax without performing a scan (discard the file/dir argument).\n");
    printf("\t-j --print-json\t\tPrint matching strings in JSON format instead of simple text.\n");
    printf("\t-s --print-strings\tPrint matching strings (default text format only).\n");
    printf("\t-S --print-stats\tPrint rules' statistics.\n");
    printf("\t-g --print-tags\t\tPrint tags linked to rules on match (default text format only).\n");
    printf("\t-t --tag=TAG\t\tPrint only matching rules tagged as TAG (default text format only).\n");
    printf("\t-V --verbosity=level\tSet the log level (0 for all messages, %u for none).\n", LMT_COUNT);

    printf("\n");

    printf("\t--dump-modifiers\tList all registered modifiers for string patterns.\n");
    printf("\t--dump-namespaces\tExplore the root namespace with all its functions and sub-namespaces.\n");

    printf("\n");

    free(tmp);

}


/******************************************************************************
*                                                                             *
*  Paramètres  : -                                                            *
*                                                                             *
*  Description : Affiche des indications sur la version courante du programme.*
*                                                                             *
*  Retour      : -                                                            *
*                                                                             *
*  Remarques   : -                                                            *
*                                                                             *
******************************************************************************/

static void show_rost_version(void)
{
    char *edir;                             /* Répertoire de base effectif */

    printf("\n");

    printf("-o-  Chrysalide ROST r%u  -o-\n", REVISION);
    printf(_("Last compiled on %s at %s\n"), __DATE__, __TIME__);

    printf("\n");

    edir = get_effective_directory(PLUGINS_LIB_DIR);
    printf(_("Plugins library directory: %s\n"), edir);
    free(edir);

    edir = get_effective_directory(PLUGINS_DATA_DIR);
    printf(_("Plugins data directory: %s\n"), edir);
    free(edir);

    edir = get_effective_directory(LOCALE_DIR);
    printf(_("Locale directory: %s\n"), edir);
    free(edir);

    printf("\n");

}


/******************************************************************************
*                                                                             *
*  Paramètres  : length = taille de la définition lue. [OUT]                  *
*                                                                             *
*  Description : Récupère un contenu à traiter depuis l'entrée standard.      *
*                                                                             *
*  Retour      : Adresse valide ou NULL en cas d'échec.                       *
*                                                                             *
*  Remarques   : -                                                            *
*                                                                             *
******************************************************************************/

static void *get_input_data_from_stdin(size_t *length)
{
    char *result;                           /* Espace mémoire à retourner  */
    ssize_t got;                            /* Quantité d'octets lus       */

    result = NULL;

    *length = 0;

#define ALLOC_SIZE 2048

    while (true)
    {
        result = realloc(result, (*length + ALLOC_SIZE) * sizeof(char));

        got = read(STDIN_FILENO, result + *length, ALLOC_SIZE);

        if (got == -1)
        {
            LOG_ERROR_N("read");
            goto exit_with_error;
        }

        *length += got;

        if (got < ALLOC_SIZE)
            break;

    }

    return result;

 exit_with_error:

    free(result);

    return NULL;

}


/******************************************************************************
*                                                                             *
*  Paramètres  : argc = nombre d'arguments dans la ligne de commande.         *
*                argv = arguments de la ligne de commande.                    *
*                                                                             *
*  Description : Point d'entrée du programme.                                 *
*                                                                             *
*  Retour      : EXIT_SUCCESS si le prgm s'est déroulé sans encombres.        *
*                                                                             *
*  Remarques   : -                                                            *
*                                                                             *
******************************************************************************/

int main(int argc, char **argv)
{
    int result;                             /* Bilan de l'exécution        */
    bool show_help;                         /* Affichage de l'aide ?       */
    bool show_version;                      /* Affichage de la version ?   */
    bool check_only;                        /* Validation uniquement       */
    LogMessageType verbosity;               /* Niveau de filtre de message */
    bool dump_modifiers;                    /* Affichage des modificateurs */
    bool dump_namespaces;                   /* Affichage des fonctions     */
    GScanOptions *options;                  /* Options d'analyses          */
    int index;                              /* Indice d'argument           */
    int ret;                                /* Bilan d'un appel            */
    char *edir;                             /* Répertoire de base effectif */
    size_t mod_count;                       /* Quantité de modificateurs   */
    char **modifiers;                       /* Liste de modificateurs      */
    size_t i;                               /* Boucle de parcours          */
    GScanNamespace *root_ns;                /* Espace de noms ROST racine  */
    size_t items_count;                     /* Quantité de modificateurs   */
    char **items;                           /* Liste de modificateurs      */
    char *rules;                            /* Définition de règles        */
    char *target;                           /* Cible communiquée           */
    size_t rule_length;                     /* Taille d'un contenu         */
    void *rule_content;                     /* Contenu à traduire          */
    GContentScanner *scanner;               /* Encadrement d'une recherche */
    GBinContent *content;                   /* Contenu à analyser          */
    GScanContext *context;                  /* Contexte des trouvailles    */
    sized_string_t padding;                 /* Bourrage pour le JSON       */
    bool full;                              /* Détailler l'affichage ?     */

#define LONG_ID(n) (0x40570000 | n)

    static struct option long_options[] = {
        { "help",           no_argument,        NULL,   'h' },
        { "version",        no_argument,        NULL,   'v' },
        { "algorithm",      required_argument,  NULL,   'A' },
        { "check-only",     no_argument,        NULL,   'C' },
        { "print-json",     no_argument,        NULL,   'j' },
        { "print-strings",  no_argument,        NULL,   's' },
        { "print-stats",    no_argument,        NULL,   'S' },
        { "print-tags",     no_argument,        NULL,   'g' },
        { "tag",            required_argument,  NULL,   't' },
        { "verbosity",      required_argument,  NULL,   'V' },
        { "dump-modifiers", no_argument,        NULL,   LONG_ID(1) },
        { "dump-namespaces",no_argument,        NULL,   LONG_ID(2) },
        { NULL,             0,                  NULL,   0 }
    };

    result = EXIT_FAILURE;

    /* Décodage des options */

    show_help = false;
    show_version = false;

    check_only = false;
    verbosity = LMT_COUNT;
    dump_modifiers = false;
    dump_namespaces = false;

    options = g_scan_options_new();

    g_scan_options_set_backend_for_data(options, G_TYPE_ACISM_BACKEND);

    while (true)
    {
        ret = getopt_long(argc, argv, "hvA:CjsSgt:V:", long_options, &index);
        if (ret == -1) break;

        switch (ret)
        {
            case 'h':
                show_help = true;
                break;

            case 'v':
                show_version = true;
                break;

            case 'A':
                if (strcmp(optarg, "acism") == 0)
                    g_scan_options_set_backend_for_data(options, G_TYPE_ACISM_BACKEND);
                else if (strcmp(optarg, "bitmap") == 0)
                    g_scan_options_set_backend_for_data(options, G_TYPE_BITAP_BACKEND);
                else if (strcmp(optarg, "hyperscan") == 0)
                    g_scan_options_set_backend_for_data(options, G_TYPE_HYPERSCAN_BACKEND);
                else
                    g_scan_options_set_backend_for_data(options, G_TYPE_INVALID);
                break;

            case 'C':
                check_only = true;
                g_scan_options_set_check_only(options, true);
                break;

            case 'j':
                g_scan_options_set_print_json(options, true);
                break;

            case 's':
                g_scan_options_set_print_strings(options, true);
                break;

            case 'S':
                g_scan_options_set_print_stats(options, true);
                break;

            case 'g':
                g_scan_options_set_print_tags(options, true);
                break;

            case 't':
                g_scan_options_select_tag(options, optarg);
                break;

            case 'V':
                verbosity = strtoul(optarg, NULL, 10);
                break;

            case LONG_ID(1):
                dump_modifiers = true;
                break;

            case LONG_ID(2):
                dump_namespaces = true;
                break;

        }

    }

    /* Actions de base */

    if (show_help)
    {
        show_rost_help(argv[0]);
        result = EXIT_SUCCESS;
        goto done;
    }

    if (show_version)
    {
        show_rost_version();
        result = EXIT_SUCCESS;
        goto done;
    }

    if (g_scan_options_get_backend_for_data(options) == G_TYPE_INVALID)
    {
        show_rost_help(argv[0]);
        goto done;
    }

    /* Lancement des choses sérieuses */

    setlocale(LC_ALL, "");
    edir = get_effective_directory(LOCALE_DIR);
    bindtextdomain(PACKAGE, edir);
    free(edir);
    textdomain(PACKAGE);

    /* Initialisation de GTK */
    g_set_prgname("ROST");
    //gtk_init(&argc, &argv);

    /* Initialisation du programme */

    set_batch_mode();

    set_log_verbosity(verbosity);

#define CORE_COMPONENTS (ACC_SCAN_FEATURES)

    if (!load_core_components(CORE_COMPONENTS))
        goto done;

    /*
    init_all_plugins(true);
    */

    if (dump_modifiers)
    {
        modifiers = list_all_scan_token_modifiers(&mod_count);

        for (i = 0; i < mod_count; i++)
        {
            printf("%s\n", modifiers[i]);
            free(modifiers[i]);
        }

        if (modifiers != NULL)
            free(modifiers);

        result = EXIT_SUCCESS;

    }

    if (dump_namespaces)
    {
        root_ns = get_rost_root_namespace();

        items = g_scan_namespace_explore(root_ns, &items_count);

        for (i = 0; i < items_count; i++)
        {
            printf("%s\n", items[i]);
            free(items[i]);
        }

        if (items != NULL)
            free(items);

        result = EXIT_SUCCESS;

        g_object_unref(G_OBJECT(root_ns));

    }

    if ((check_only && (optind + 0) != argc && (optind + 1) != argc)
        || (!check_only && (optind + 1) != argc && (optind + 2) != argc))
    {
        if (result == EXIT_FAILURE)
            show_rost_help(argv[0]);
        goto done;
    }

    /* Réinitialisation en cas de dump... */
    else
        result = EXIT_FAILURE;

    /* Traitement des recherches */

    if ((optind + 0) == argc)
    {
        assert(check_only);

        rules = NULL;
        target = NULL;

    }
    else if ((optind + 1) == argc)
    {
        if (check_only)
        {
            rules = argv[optind];
            target = NULL;
        }
        else
        {
            rules = NULL;
            target = argv[optind];
        }
    }
    else
    {
        rules = argv[optind];
        target = argv[optind + 1];

        if (strcmp(rules, "-") == 0 || strcmp(rules, "/dev/stdin") == 0)
            rules = NULL;

    }

    if (rules == NULL)
    {
        rule_content = get_input_data_from_stdin(&rule_length);

        if (rule_content != NULL)
        {
            scanner = g_content_scanner_new_from_text(rule_content, rule_length);
            free(rule_content);
        }
        else
            scanner = NULL;

    }
    else
        scanner = g_content_scanner_new_from_file(rules);

    if (scanner != NULL)
        result = EXIT_SUCCESS;

    if (scanner != NULL && !check_only)
    {
        content = g_file_content_new(target);
        if (content == NULL) goto bad_file_content;

        context = g_content_scanner_analyze(scanner, options, content);
        if (context == NULL) goto bad_scan_context;

        if (g_scan_options_get_print_json(options))
        {
            padding.data = "   ";
            padding.len = 3;

            g_content_scanner_output_to_json(scanner, context, &padding, 0, STDOUT_FILENO);

        }
        else
        {
            full = g_scan_options_get_print_strings(options);

            g_content_scanner_output_to_text(scanner, context, full, STDOUT_FILENO);

        }

        g_object_unref(G_OBJECT(context));

 bad_scan_context:

        g_object_unref(G_OBJECT(content));

 bad_file_content:

    }

    g_clear_object(&scanner);

    g_object_unref(G_OBJECT(options));

    /* Sortie */

#ifdef TRACK_GOBJECT_LEAKS
    remember_gtypes_for_leaks();
#endif

    unload_core_components(CORE_COMPONENTS);

#ifdef TRACK_GOBJECT_LEAKS
    dump_remaining_gtypes();
#endif

    //exit_all_plugins();

 done:

    return result;

}