Skip to content

Commit 3830f48

Browse files
committed
Implemented all benchmarks
0 parents  commit 3830f48

23 files changed

+507
-0
lines changed

README.md

+19
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
# pysql-benchmark
2+
3+
This repository contains the benchmark of the following solutions for mysql drivers in Python:
4+
5+
- PyMySQL
6+
- mysqlclient within a threadpool
7+
- mysqlclient
8+
- mysqlclient within a threadpool
9+
- aiomysql
10+
- asyncmy
11+
12+
# motivation
13+
14+
Because the page of asyncmy offers a benchmark which is limited to only one query, we cannot really estimate the impact of asyncio on the performance.
15+
This is where this project comes into life.
16+
17+
# results
18+
19+
The benchmark has been run on a Macbook Pro M1 Pro 32GB
1.56 KB
Binary file not shown.
1.61 KB
Binary file not shown.

__pycache__/benchmark.cpython-311.pyc

4.4 KB
Binary file not shown.
7.79 KB
Binary file not shown.
1.48 KB
Binary file not shown.
7.41 KB
Binary file not shown.
981 Bytes
Binary file not shown.
Binary file not shown.
977 Bytes
Binary file not shown.
972 Bytes
Binary file not shown.

aiomysql_bench.py

+30
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
import random
2+
import time
3+
4+
import aiomysql
5+
6+
from benchmark_async import AsyncBenchmark
7+
8+
9+
class AIOMySQLBench(AsyncBenchmark):
10+
async def run(self):
11+
# Create a connection pool using the specified pool class
12+
self.pool = await aiomysql.create_pool(
13+
host=self.DB_HOST,
14+
user=self.DB_USER,
15+
password=self.DB_PASSWORD,
16+
db=self.DB_NAME,
17+
minsize=1,
18+
maxsize=self.num_batches,
19+
)
20+
21+
res = {
22+
"insert": await self.insert(),
23+
"select": await self.select(),
24+
"update": await self.update(),
25+
"delete": await self.delete(),
26+
}
27+
28+
self.pool.close()
29+
await self.pool.wait_closed()
30+
return res

asyncmy_bench.py

+30
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
import asyncio
2+
import random
3+
import time
4+
from asyncio import Queue
5+
6+
import asyncmy
7+
8+
from benchmark_async import AsyncBenchmark
9+
10+
11+
class AsyncMyBench(AsyncBenchmark):
12+
async def run(self):
13+
# Create a connection pool manually
14+
self.pool = await asyncmy.create_pool(
15+
host=self.DB_HOST,
16+
user=self.DB_USER,
17+
password=self.DB_PASSWORD,
18+
db=self.DB_NAME,
19+
maxsize=self.num_batches,
20+
)
21+
res = {
22+
"insert": await self.insert(),
23+
"select": await self.select(),
24+
"update": await self.update(),
25+
"delete": await self.delete(),
26+
}
27+
28+
self.pool.close()
29+
await self.pool.wait_closed()
30+
return res

benchmark.py

+88
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
import random
2+
import time
3+
4+
from benchmark_base import BenchmarkBase
5+
6+
7+
class Benchmark(BenchmarkBase):
8+
def __init__(self) -> None:
9+
super().__init__()
10+
11+
def run(self):
12+
return {
13+
"insert": self.insert(),
14+
"select": self.select(),
15+
"update": self.update(),
16+
"delete": self.delete(),
17+
}
18+
19+
def insert(self):
20+
connection, cursor = self._connect()
21+
22+
# Drop the table if it exists and create it again
23+
cursor.execute(self.drop_table_query)
24+
cursor.execute(self.create_table_query)
25+
26+
start_time = time.time()
27+
28+
# Insert 1000 rows using pymysql
29+
for _ in range(self.NUM_QUERIES):
30+
data = f"Sample data {random.randint(1, 1000)}"
31+
cursor.execute(self.insert_query, (data,))
32+
33+
connection.commit()
34+
end_time = time.time()
35+
36+
cursor.close()
37+
connection.close()
38+
39+
return end_time - start_time
40+
41+
def select(self):
42+
connection, cursor = self._connect()
43+
44+
start_time = time.time()
45+
46+
# Execute 1000 SELECT queries using pymysql
47+
for i in range(1, self.NUM_QUERIES + 1):
48+
cursor.execute(self.select_query, (i,))
49+
cursor.fetchone()
50+
51+
end_time = time.time()
52+
53+
cursor.close()
54+
connection.close()
55+
56+
return end_time - start_time
57+
58+
def update(self):
59+
connection, cursor = self._connect()
60+
61+
start_time = time.time()
62+
63+
for i in range(1, self.NUM_QUERIES + 1):
64+
new_data = f"Updated data {random.randint(1, 1000)}"
65+
cursor.execute(self.update_query, (new_data, i))
66+
connection.commit()
67+
end_time = time.time()
68+
69+
cursor.close()
70+
connection.close()
71+
72+
return end_time - start_time
73+
74+
def delete(self):
75+
connection, cursor = self._connect()
76+
77+
start_time = time.time()
78+
79+
for i in range(1, self.NUM_QUERIES + 1):
80+
cursor.execute(self.delete_query)
81+
82+
connection.commit()
83+
end_time = time.time()
84+
85+
cursor.close()
86+
connection.close()
87+
88+
return end_time - start_time

benchmark_async.py

+77
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
import asyncio
2+
import random
3+
import time
4+
5+
import asyncmy
6+
7+
from benchmark_base import BenchmarkBase
8+
9+
10+
class AsyncBenchmark(BenchmarkBase):
11+
def __init__(self, num_batches=10):
12+
super().__init__()
13+
self.num_batches = num_batches # Number of queries to run in parallel at once
14+
15+
async def insert(self):
16+
start_time = time.time()
17+
18+
async def insert_query(_):
19+
async with self.pool.acquire() as connection:
20+
async with connection.cursor() as cursor:
21+
data = f"Sample data {random.randint(1, 1000)}"
22+
await cursor.execute(self.insert_query, (data,))
23+
await connection.commit()
24+
25+
await self._run_in_batches(insert_query)
26+
27+
end_time = time.time()
28+
return end_time - start_time
29+
30+
async def select(self):
31+
start_time = time.time()
32+
33+
async def select_query(i):
34+
async with self.pool.acquire() as connection:
35+
async with connection.cursor() as cursor:
36+
await cursor.execute(self.select_query, (i,))
37+
await cursor.fetchone()
38+
39+
await self._run_in_batches(select_query)
40+
41+
end_time = time.time()
42+
return end_time - start_time
43+
44+
async def update(self):
45+
start_time = time.time()
46+
47+
async def update_query(i):
48+
async with self.pool.acquire() as connection:
49+
async with connection.cursor() as cursor:
50+
new_data = f"Updated data {random.randint(1, 1000)}"
51+
await cursor.execute(self.update_query, (new_data, i))
52+
await connection.commit()
53+
54+
await self._run_in_batches(update_query)
55+
56+
end_time = time.time()
57+
return end_time - start_time
58+
59+
async def delete(self):
60+
start_time = time.time()
61+
62+
async def delete_query(_):
63+
async with self.pool.acquire() as connection:
64+
async with connection.cursor() as cursor:
65+
await cursor.execute(self.delete_query)
66+
await connection.commit()
67+
68+
await self._run_in_batches(delete_query)
69+
70+
end_time = time.time()
71+
return end_time - start_time
72+
73+
async def _run_in_batches(self, query_func):
74+
# Run queries in smaller batches
75+
for i in range(0, self.NUM_QUERIES, self.num_batches):
76+
batch = range(i + 1, min(i + self.num_batches + 1, self.NUM_QUERIES + 1))
77+
await asyncio.gather(*(query_func(j) for j in batch))

benchmark_base.py

+23
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
class BenchmarkBase:
2+
def __init__(self) -> None:
3+
self.DB_HOST = "127.0.0.1"
4+
self.DB_USER = "root"
5+
self.DB_PASSWORD = ""
6+
self.DB_NAME = "benchmark_test"
7+
self.NUM_QUERIES = 10000
8+
9+
self.create_table_query = """
10+
CREATE TABLE IF NOT EXISTS test_table (
11+
id INT AUTO_INCREMENT PRIMARY KEY,
12+
data VARCHAR(100)
13+
)
14+
"""
15+
self.drop_table_query = "DROP TABLE IF EXISTS test_table"
16+
17+
self.insert_query = "INSERT INTO test_table (data) VALUES (%s)"
18+
self.select_query = "SELECT * FROM test_table WHERE id = %s"
19+
self.delete_query = "DELETE FROM test_table WHERE 1 LIMIT 1"
20+
self.update_query = "UPDATE test_table SET data = %s WHERE id = %s"
21+
22+
def _connect():
23+
raise NotImplementedError()

benchmark_threaded.py

+117
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,117 @@
1+
import random
2+
import threading
3+
import time
4+
from concurrent.futures import ThreadPoolExecutor, as_completed
5+
6+
from benchmark_base import BenchmarkBase
7+
8+
9+
class ThreadedBenchmark(BenchmarkBase):
10+
def __init__(self):
11+
super().__init__()
12+
self.NUM_THREADS = 10
13+
self.connection_pool = {}
14+
15+
def _disconnect(self):
16+
for thread in self.connection_pool:
17+
connection, cursor = self.connection_pool[thread]
18+
cursor.close()
19+
connection.close()
20+
21+
def _get_connection_from_pool(self):
22+
thread_id = threading.get_ident()
23+
if thread_id not in self.connection_pool:
24+
self.connection_pool[thread_id] = self._connect()
25+
return self.connection_pool[thread_id]
26+
27+
def run(self):
28+
self._disconnect()
29+
return {
30+
"insert": self.insert(),
31+
"select": self.select(),
32+
"update": self.update(),
33+
"delete": self.delete(),
34+
}
35+
36+
def _run_queries_in_threads(self, query_func):
37+
with ThreadPoolExecutor(max_workers=self.NUM_THREADS) as executor:
38+
futures = [
39+
executor.submit(query_func, i) for i in range(1, self.NUM_QUERIES + 1)
40+
]
41+
for future in as_completed(futures):
42+
future.result()
43+
44+
def insert(self):
45+
connection, cursor = self._connect()
46+
47+
# Drop the table if it exists and create it again
48+
cursor.execute(self.drop_table_query)
49+
cursor.execute(self.create_table_query)
50+
cursor.close()
51+
connection.close()
52+
53+
start_time = time.time()
54+
55+
def insert_query(_):
56+
connection, cursor = self._get_connection_from_pool()
57+
data = f"Sample data {random.randint(1, 1000)}"
58+
cursor.execute(self.insert_query, (data,))
59+
connection.commit()
60+
61+
self._run_queries_in_threads(insert_query)
62+
end_time = time.time()
63+
64+
return end_time - start_time
65+
66+
def select(self):
67+
connection, cursor = self._connect()
68+
69+
cursor.close()
70+
connection.close()
71+
start_time = time.time()
72+
73+
def select_query(i):
74+
connection, cursor = self._get_connection_from_pool()
75+
cursor.execute(self.select_query, (i,))
76+
cursor.fetchone()
77+
78+
self._run_queries_in_threads(select_query)
79+
80+
end_time = time.time()
81+
82+
return end_time - start_time
83+
84+
def update(self):
85+
connection, cursor = self._connect()
86+
cursor.close()
87+
connection.close()
88+
start_time = time.time()
89+
90+
def update_query(i):
91+
connection, cursor = self._get_connection_from_pool()
92+
new_data = f"Updated data {random.randint(1, 1000)}"
93+
cursor.execute(self.update_query, (new_data, i))
94+
connection.commit()
95+
96+
self._run_queries_in_threads(update_query)
97+
98+
end_time = time.time()
99+
100+
return end_time - start_time
101+
102+
def delete(self):
103+
connection, cursor = self._connect()
104+
cursor.close()
105+
connection.close()
106+
107+
start_time = time.time()
108+
109+
def delete_query(_):
110+
connection, cursor = self._get_connection_from_pool()
111+
cursor.execute(self.delete_query)
112+
connection.commit()
113+
114+
self._run_queries_in_threads(delete_query)
115+
end_time = time.time()
116+
117+
return end_time - start_time

0 commit comments

Comments
 (0)