m2-traitement-benchmark/main.py
2024-06-02 23:23:02 +02:00

98 lines
2.8 KiB
Python

from elasticsearch import Elasticsearch
from psycopg2 import connect
from drivers.psql import PSQL_Testing
from drivers.elastic import Elastic_Testing
from sshtunnel import SSHTunnelForwarder
from utils.utils import preprocess_json
from typing_extensions import Dict
from collections import defaultdict
import numpy as np
import matplotlib.pyplot as plt
POSTGRES_HOST = "127.0.0.1"
POSTGRES_PORT = "5432"
POSTGRES_DB = "postgres"
POSTGRES_USER = "postgres"
POSTGRES_PASSWORD = "postgres"
ELASTIC_URL = "https://localhost:9200"
ELASTIC_API_KEY = 'Z18xZzFJOEJXTnUzZ2RiTk5YWkw6ekhiRVpQYnVTZ2FhRjFCR3NVUFB4UQ=='
RUN_ELASTIC_WITH_N_SHARDS = [1, 2, 4]
DOCUMENT_MULTIPLICATOR = 10
def test_psql():
psqlClient = connect(
database=POSTGRES_DB,
host=POSTGRES_HOST,
user=POSTGRES_USER,
password=POSTGRES_PASSWORD,
port=POSTGRES_PORT
)
psqlClient.autocommit = False
return PSQL_Testing().do_tests(psqlClient)
def test_elasticsearch(shards = 1):
es = Elasticsearch(
ELASTIC_URL,
api_key=ELASTIC_API_KEY,
verify_certs=False, # just to not create certificates
ssl_show_warn=False
)
return Elastic_Testing(shards).do_tests(es)
def plot(timings: Dict[str, Dict[str, float]]):
functions = list(timings['PSQL'].keys())
drivers = list(timings.keys())
values = {func: [] for func in functions}
for func in functions:
values[func].append(timings['PSQL'][func]);
for driver in [x for x in drivers if x != 'PSQL']:
for func in functions:
values[func].append(timings[driver][func] / values[func][0])
for func in functions:
values[func][0] = 1
fig, ax = plt.subplots(figsize=(12, 8))
index = np.arange(len(functions))
bar_width = 0.2
for i, driver in enumerate(drivers):
ax.bar(index + i * bar_width, [values[func][i] for func in functions], bar_width, label=driver)
ax.set_xlabel('Functions')
ax.set_ylabel('Relative Time')
ax.set_title('Performance of ES Relative to PSQL')
ax.set_xticks(index + bar_width * (len(drivers) -1 ) / 2, functions, rotation=45, ha="right")
ax.set_xticklabels(functions)
ax.legend()
for i, x in enumerate(drivers):
for j, v in enumerate([values[func][i] for func in functions]):
plt.text(j + (i * bar_width), v + 0.02, str(round(v, 2)), fontsize=6, ha='center', va='bottom')
fig.tight_layout()
plt.savefig("plot-orodruin-opti.png")
plt.show()
if __name__ == "__main__":
print("### Preprocessing JSON ###")
preprocess_json(DOCUMENT_MULTIPLICATOR)
timings = {}
timings['PSQL'] = test_psql()
for x in RUN_ELASTIC_WITH_N_SHARDS:
timings[f'ES{x}shards'] = test_elasticsearch(x)
plot(timings)
print(timings)