omg la ram
This commit is contained in:
parent
4addf5448d
commit
3ef595e195
4 changed files with 90 additions and 3 deletions
|
@ -112,4 +112,54 @@ class Elastic_Testing(DB_Testing):
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return f"Got {driver.search(index=INDEX, body=query).body['aggregations']['attachment_count']['value']}"
|
return f"Got {driver.search(index=INDEX, body=query).body['aggregations']['attachment_count']['value']}"
|
||||||
|
|
||||||
|
def update_add_replies_per_last_name(self, driver: Elasticsearch) -> str:
|
||||||
|
agg_query = {
|
||||||
|
"size": 0,
|
||||||
|
"aggs": {
|
||||||
|
"reply_count_by_last_name": {
|
||||||
|
"terms": {
|
||||||
|
"script": {
|
||||||
|
"source": "String name = doc['sender_name.keyword'].value; String[] parts = /\\s+/.split(name); return parts[parts.length - 1]",
|
||||||
|
"lang": "painless"
|
||||||
|
},
|
||||||
|
"size": 1000
|
||||||
|
},
|
||||||
|
"aggs": {
|
||||||
|
"total_reply_count": {
|
||||||
|
"sum": {
|
||||||
|
"field": "reply_count"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
agg_results = driver.search(index=INDEX, body=agg_query)
|
||||||
|
|
||||||
|
buckets = agg_results['aggregations']['reply_count_by_last_name']['buckets']
|
||||||
|
reply_counts = {bucket['key']: bucket['total_reply_count']['value'] for bucket in buckets}
|
||||||
|
|
||||||
|
update_query = {
|
||||||
|
"script": {
|
||||||
|
"source": """
|
||||||
|
String name = ctx._source.sender_name;
|
||||||
|
String[] parts = /\\s+/.split(name);
|
||||||
|
String lastName = parts[parts.length - 1];
|
||||||
|
if (params.replyCounts.containsKey(lastName)) {
|
||||||
|
ctx._source.total_reply_count = params.replyCounts.get(lastName);
|
||||||
|
}
|
||||||
|
""",
|
||||||
|
"lang": "painless",
|
||||||
|
"params": {
|
||||||
|
"replyCounts": reply_counts
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"query": {
|
||||||
|
"match_all": {}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
driver.update_by_query(index=INDEX, body=update_query, conflicts="proceed")
|
|
@ -29,6 +29,8 @@ class PSQL_Thread(Data_Thread):
|
||||||
class PSQL_Testing(DB_Testing):
|
class PSQL_Testing(DB_Testing):
|
||||||
driver_name = "PSQL"
|
driver_name = "PSQL"
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def __init__(self) -> None:
|
def __init__(self) -> None:
|
||||||
super().__init__()
|
super().__init__()
|
||||||
self.data = [PSQL_Thread(x) for x in Threads]
|
self.data = [PSQL_Thread(x) for x in Threads]
|
||||||
|
@ -109,4 +111,29 @@ class PSQL_Testing(DB_Testing):
|
||||||
cursor.execute(searchQuery)
|
cursor.execute(searchQuery)
|
||||||
driver.commit()
|
driver.commit()
|
||||||
return f"Got {cursor.fetchone()[0]}"
|
return f"Got {cursor.fetchone()[0]}"
|
||||||
|
|
||||||
|
def update_add_replies_per_last_name(self, driver: connection) -> str:
|
||||||
|
cursor = driver.cursor()
|
||||||
|
alterQuery = """
|
||||||
|
ALTER TABLE thread ADD COLUMN last_name_replies INT;
|
||||||
|
"""
|
||||||
|
updateQuery = """
|
||||||
|
WITH last_name_sums AS (
|
||||||
|
SELECT
|
||||||
|
recipient_name,
|
||||||
|
SUM(reply_count) OVER (PARTITION BY SPLIT_PART(recipient_name, ' ', -1)) AS total_replies
|
||||||
|
FROM
|
||||||
|
thread
|
||||||
|
)
|
||||||
|
UPDATE thread AS t
|
||||||
|
SET last_name_replies = ln_sums.total_replies
|
||||||
|
FROM last_name_sums AS ln_sums
|
||||||
|
WHERE t.recipient_name = ln_sums.recipient_name;
|
||||||
|
"""
|
||||||
|
unalterQuery = """
|
||||||
|
ALTER TABLE thread DROP COLUMN last_name_replies;
|
||||||
|
"""
|
||||||
|
cursor.execute(alterQuery)
|
||||||
|
cursor.execute(updateQuery)
|
||||||
|
cursor.execute(unalterQuery)
|
||||||
|
driver.commit()
|
|
@ -5,7 +5,7 @@ from utils.utils import time_func
|
||||||
from time import sleep
|
from time import sleep
|
||||||
|
|
||||||
T = TypeVar('T')
|
T = TypeVar('T')
|
||||||
NUM_RUNS = 10000
|
NUM_RUNS = 10
|
||||||
|
|
||||||
class DB_Testing(Generic[T]):
|
class DB_Testing(Generic[T]):
|
||||||
driver_name: str
|
driver_name: str
|
||||||
|
@ -50,6 +50,10 @@ class DB_Testing(Generic[T]):
|
||||||
def get_sum_attachment_less_5(self, driver: T) -> str:
|
def get_sum_attachment_less_5(self, driver: T) -> str:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def update_add_replies_per_last_name(self, driver: T) -> str:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
def do_tests(self, driver: T) -> Dict[str, float]:
|
def do_tests(self, driver: T) -> Dict[str, float]:
|
||||||
stats = {}
|
stats = {}
|
||||||
|
@ -63,9 +67,15 @@ class DB_Testing(Generic[T]):
|
||||||
sleep(1) # To wait for any indexing to avoid race conditions somehow
|
sleep(1) # To wait for any indexing to avoid race conditions somehow
|
||||||
|
|
||||||
# Multiple
|
# Multiple
|
||||||
|
|
||||||
|
# Search
|
||||||
stats.update(time_func(f"[{self.driver_name}] Count mails with more than 5 attachments and Mr/Mrs", "attach5_mr_mrs", lambda : self.attach5_mr_mrs(driver), NUM_RUNS))
|
stats.update(time_func(f"[{self.driver_name}] Count mails with more than 5 attachments and Mr/Mrs", "attach5_mr_mrs", lambda : self.attach5_mr_mrs(driver), NUM_RUNS))
|
||||||
stats.update(time_func(f"[{self.driver_name}] Sum the number of attachment from mail that have less than 5 attachments", "get_sum_attachment_less_5", lambda : self.get_sum_attachment_less_5(driver), NUM_RUNS))
|
stats.update(time_func(f"[{self.driver_name}] Sum the number of attachment from mail that have less than 5 attachments", "get_sum_attachment_less_5", lambda : self.get_sum_attachment_less_5(driver), NUM_RUNS))
|
||||||
stats.update(time_func(f"[{self.driver_name}] Count mails that have a lorem sentence", "search_lorem", lambda : self.search_lorem(driver), NUM_RUNS))
|
stats.update(time_func(f"[{self.driver_name}] Count mails that have a lorem sentence", "search_lorem", lambda : self.search_lorem(driver), NUM_RUNS))
|
||||||
stats.update(time_func(f"[{self.driver_name}] Count mails that have a mail that ends with microsoft.com", "search_mails_ends_microsoftcom", lambda : self.search_mails_ends_microsoftcom(driver), NUM_RUNS))
|
stats.update(time_func(f"[{self.driver_name}] Count mails that have a mail that ends with microsoft.com", "search_mails_ends_microsoftcom", lambda : self.search_mails_ends_microsoftcom(driver), NUM_RUNS))
|
||||||
|
|
||||||
|
# Update
|
||||||
|
stats.update(time_func(f"[{self.driver_name}] Update all thread to add a field with the total replies per last name", "update_add_replies_per_last_name", lambda : self.update_add_replies_per_last_name(driver), NUM_RUNS))
|
||||||
|
|
||||||
print("\n")
|
print("\n")
|
||||||
return stats
|
return stats
|
BIN
plot.png
Normal file
BIN
plot.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 65 KiB |
Loading…
Reference in a new issue