2024-05-29 02:29:36 +02:00
|
|
|
from elasticsearch import Elasticsearch
|
|
|
|
from psycopg2 import connect
|
|
|
|
from drivers.psql import PSQL_Testing
|
|
|
|
from drivers.elastic import Elastic_Testing
|
|
|
|
from sshtunnel import SSHTunnelForwarder
|
|
|
|
from utils.utils import preprocess_json
|
|
|
|
from typing_extensions import Dict
|
|
|
|
from collections import defaultdict
|
|
|
|
import numpy as np
|
|
|
|
import matplotlib.pyplot as plt
|
|
|
|
|
2024-06-02 23:23:02 +02:00
|
|
|
POSTGRES_HOST = "127.0.0.1"
|
|
|
|
POSTGRES_PORT = "5432"
|
|
|
|
POSTGRES_DB = "postgres"
|
|
|
|
POSTGRES_USER = "postgres"
|
|
|
|
POSTGRES_PASSWORD = "postgres"
|
|
|
|
|
|
|
|
ELASTIC_URL = "https://localhost:9200"
|
|
|
|
ELASTIC_API_KEY = 'Z18xZzFJOEJXTnUzZ2RiTk5YWkw6ekhiRVpQYnVTZ2FhRjFCR3NVUFB4UQ=='
|
|
|
|
RUN_ELASTIC_WITH_N_SHARDS = [1, 2, 4]
|
|
|
|
|
|
|
|
DOCUMENT_MULTIPLICATOR = 10
|
|
|
|
|
2024-05-29 02:29:36 +02:00
|
|
|
def test_psql():
|
|
|
|
psqlClient = connect(
|
2024-06-02 23:23:02 +02:00
|
|
|
database=POSTGRES_DB,
|
|
|
|
host=POSTGRES_HOST,
|
|
|
|
user=POSTGRES_USER,
|
|
|
|
password=POSTGRES_PASSWORD,
|
|
|
|
port=POSTGRES_PORT
|
2024-05-29 02:29:36 +02:00
|
|
|
)
|
|
|
|
psqlClient.autocommit = False
|
|
|
|
return PSQL_Testing().do_tests(psqlClient)
|
|
|
|
|
2024-05-31 17:24:17 +02:00
|
|
|
def test_elasticsearch(shards = 1):
|
2024-05-29 02:29:36 +02:00
|
|
|
es = Elasticsearch(
|
2024-06-02 23:23:02 +02:00
|
|
|
ELASTIC_URL,
|
|
|
|
api_key=ELASTIC_API_KEY,
|
2024-05-29 02:29:36 +02:00
|
|
|
verify_certs=False, # just to not create certificates
|
|
|
|
ssl_show_warn=False
|
|
|
|
)
|
2024-05-31 17:24:17 +02:00
|
|
|
return Elastic_Testing(shards).do_tests(es)
|
2024-05-31 15:53:07 +02:00
|
|
|
|
2024-05-29 02:29:36 +02:00
|
|
|
|
|
|
|
def plot(timings: Dict[str, Dict[str, float]]):
|
2024-05-31 15:53:07 +02:00
|
|
|
functions = list(timings['PSQL'].keys())
|
|
|
|
drivers = list(timings.keys())
|
|
|
|
|
|
|
|
values = {func: [] for func in functions}
|
|
|
|
|
|
|
|
for func in functions:
|
|
|
|
values[func].append(timings['PSQL'][func]);
|
2024-05-29 02:29:36 +02:00
|
|
|
|
2024-05-31 15:53:07 +02:00
|
|
|
for driver in [x for x in drivers if x != 'PSQL']:
|
|
|
|
for func in functions:
|
|
|
|
values[func].append(timings[driver][func] / values[func][0])
|
2024-05-29 02:29:36 +02:00
|
|
|
|
2024-05-31 15:53:07 +02:00
|
|
|
for func in functions:
|
|
|
|
values[func][0] = 1
|
|
|
|
|
|
|
|
fig, ax = plt.subplots(figsize=(12, 8))
|
2024-05-29 02:29:36 +02:00
|
|
|
index = np.arange(len(functions))
|
2024-05-31 17:24:17 +02:00
|
|
|
bar_width = 0.2
|
2024-05-31 15:53:07 +02:00
|
|
|
|
|
|
|
for i, driver in enumerate(drivers):
|
|
|
|
ax.bar(index + i * bar_width, [values[func][i] for func in functions], bar_width, label=driver)
|
2024-05-29 02:29:36 +02:00
|
|
|
|
|
|
|
|
2024-05-31 15:53:07 +02:00
|
|
|
ax.set_xlabel('Functions')
|
|
|
|
ax.set_ylabel('Relative Time')
|
|
|
|
ax.set_title('Performance of ES Relative to PSQL')
|
|
|
|
ax.set_xticks(index + bar_width * (len(drivers) -1 ) / 2, functions, rotation=45, ha="right")
|
|
|
|
ax.set_xticklabels(functions)
|
|
|
|
ax.legend()
|
2024-05-29 02:29:36 +02:00
|
|
|
|
2024-05-31 15:53:07 +02:00
|
|
|
for i, x in enumerate(drivers):
|
|
|
|
for j, v in enumerate([values[func][i] for func in functions]):
|
2024-05-31 17:24:17 +02:00
|
|
|
plt.text(j + (i * bar_width), v + 0.02, str(round(v, 2)), fontsize=6, ha='center', va='bottom')
|
2024-05-29 02:29:36 +02:00
|
|
|
|
2024-05-31 15:53:07 +02:00
|
|
|
fig.tight_layout()
|
2024-05-31 16:28:54 +02:00
|
|
|
plt.savefig("plot-orodruin-opti.png")
|
2024-05-29 02:29:36 +02:00
|
|
|
plt.show()
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
|
|
|
|
print("### Preprocessing JSON ###")
|
2024-06-02 23:23:02 +02:00
|
|
|
preprocess_json(DOCUMENT_MULTIPLICATOR)
|
2024-05-29 02:29:36 +02:00
|
|
|
|
|
|
|
|
|
|
|
timings = {}
|
|
|
|
|
2024-05-31 15:53:07 +02:00
|
|
|
timings['PSQL'] = test_psql()
|
2024-06-02 23:23:02 +02:00
|
|
|
for x in RUN_ELASTIC_WITH_N_SHARDS:
|
|
|
|
timings[f'ES{x}shards'] = test_elasticsearch(x)
|
2024-05-31 15:53:07 +02:00
|
|
|
|
|
|
|
plot(timings)
|
|
|
|
print(timings)
|