m2-traitement-benchmark/main.py

from elasticsearch import Elasticsearch
from psycopg2 import connect
from drivers.psql import PSQL_Testing
from drivers.elastic import Elastic_Testing
from sshtunnel import SSHTunnelForwarder
from utils.utils import preprocess_json
from typing_extensions import Dict
from collections import defaultdict
import numpy as np
import matplotlib.pyplot as plt

def test_psql():
    with SSHTunnelForwarder(
        ssh_address_or_host=("orodruin.mordor", 22),
        ssh_username="postgres",
        ssh_password="postgres",
        remote_bind_address=('127.0.0.1', 5432),
        local_bind_address = ('127.0.0.1', 5432)
        ):
            psqlClient = connect(
                database="postgres",
                host="127.0.0.1",
                user="postgres",
                password="postgres",
                port="5432"
            )
            psqlClient.autocommit = False
            return PSQL_Testing().do_tests(psqlClient)


def test_elasticsearch():
    es = Elasticsearch(
        'https://orodruin.mordor:9200/',
        api_key='WjMwVXQ0OEJnUzRTOUVUaVNNVHY6MFh2X3RDcGRRWC1FRVNRZkdhWlYwUQ==',
        verify_certs=False, # just to not create certificates
        ssl_show_warn=False
    )
    return Elastic_Testing().do_tests(es)


def plot(timings: Dict[str, Dict[str, float]]):
    # Transform the dict from {Driver: {Function, timing}} into {Function: [{Drivers, timing}]}
    usable_dict = defaultdict(lambda: defaultdict(float))
    for driver_name, function_timing in timings.items():
        for function_name, timing in function_timing.items():
            usable_dict[function_name][driver_name] += timing
    usable_dict = {k: dict(v) for k, v in usable_dict.items()}

    relative_dict = {}
    for function, systems in usable_dict.items():
        relative_dict[function] = systems['ES'] / systems['PSQL']

    functions = list(relative_dict.keys())
    psql_values = [1] * len(functions) # List of ones for the psql values

    es_values = list(relative_dict.values())


    plt.figure(figsize=(12, 8))
    index = np.arange(len(functions))
    bar_width = 0.35

    plt.bar(index, psql_values, bar_width, label='PSQL', color='lightblue')
    plt.bar(index + bar_width, es_values, bar_width, label='ES', color='orange')

    plt.xlabel('Functions')
    plt.ylabel('Relative Time')
    plt.title('Performance of ES Relative to PSQL')
    plt.xticks(index + bar_width / 2, functions, rotation=45, ha="right")
    plt.legend()

    for i, v in enumerate(psql_values):
        plt.text(i, v + 0.02, str(v), ha='center', va='bottom')
    for i, v in enumerate(es_values):
        plt.text(i + bar_width, v + 0.02, str(round(v, 2)), ha='center', va='bottom')

    plt.tight_layout()
    plt.savefig("plot.png")
    plt.show()


if __name__ == "__main__":

    print("### Preprocessing JSON ###")
    preprocess_json()


    timings = {}

    timings['PSQL'] = test_psql()
    timings['ES'] = test_elasticsearch()

    plot(timings)