Skip to content

Commit 73df309

Browse files
committed
Fixed workload sampling function.
1 parent 796e8b1 commit 73df309

10 files changed

+83
-85
lines changed

conf/postgres_opttune.conf

+15-17
Original file line numberDiff line numberDiff line change
@@ -18,26 +18,13 @@ pg_os_user = postgres # PostgrSQL ower user(OS user)
1818
ssh_port = 22 # ssh port
1919
ssh_password = postgres # pg_os_user's ssh password
2020

21-
[workload-sampling]
22-
workload_sampling_time_second = 300
23-
# Time (in seconds) to sample the workload running on the database in the [PostgreSQL] section
24-
my_workload_save_dir = ./workload_data/
25-
# Workload save database settings
26-
pghost = localhost # PostgreSQL server host
27-
pgport = 5432 # PostgreSQL server port
28-
pguser = postgres # PostgreSQL user name(Database user)
29-
pgpassword = postgres12 # PostgreSQL user password(Database user)
30-
pgdatabase = sampling # PostgreSQL Database
31-
# workload save directory
32-
33-
3421
[turning]
3522
study_name = pgbench_study # study name
3623
required_recovery_time_second = 0
3724
# The maximum recovery time allowed by the user in case of a PostgreSQL crash,
3825
# which is used to estimate the wax_wal_size parameter.
3926
# Note: The default value of 0 does not perform the estimation of the wax_wal_size parameter.
40-
benchmark = pgbench # Benchmark tool name('my_workload' or pgbench' or 'oltpbench' or 'star_schema_benchmark')
27+
benchmark = pgbench # Benchmark tool name('sampled_workload' or pgbench' or 'oltpbench' or 'star_schema_benchmark')
4128
parameter_json_dir = ./conf/
4229
number_trail = 100 # Number of benchmarks to run for turning
4330
data_load_interval = 10 # Specify the data load interval by the number of benchmarks
@@ -47,8 +34,8 @@ save_study_history = True # Whether to save study history
4734
load_study_history = True # Whether to load study history if a study name already exists.
4835
history_database_url = sqlite:///study-history.db # Example PostgreSQL. postgresql://postgres@localhost/study_history
4936

50-
[my-workload]
51-
my_workload_save_file = workload_data/2020-07-05_180647.531417-2020-07-05_180657.531661.pkl
37+
[sampled_workload]
38+
sampled_workload_save_file = workload_data/2020-09-13_202209.011708-2020-09-13_202239.011973.pkl
5239
# File saved using workload_sampler.py
5340

5441
[pgbench]
@@ -71,4 +58,15 @@ sql_file_path = ./pgopttune/workload/star_schema_sql/
7158
sql_key = Q1.1, Q2.1, Q3.1
7259
# List of queries to be executed by the client (comma-separated)
7360
# Please specify the name of the file in pgopttune/workload/star_schema_sql/ directory(sql_file_path parameter diretory)
74-
# (e.g., Q1.1,Q2,1).
61+
# (e.g., Q1.1,Q2,1).
62+
63+
[workload-sampling]
64+
workload_sampling_time_second = 30
65+
# Time (in seconds) to sample the workload running on the database in the [PostgreSQL] section
66+
my_workload_save_dir = ./workload_data/ # workload save directory
67+
# Database settings to temporarily store workload information
68+
pghost = localhost # PostgreSQL server host
69+
pgport = 5432 # PostgreSQL server port
70+
pguser = postgres # PostgreSQL user name(Database user)
71+
pgpassword = postgres12 # PostgreSQL user password(Database user)
72+
pgdatabase = sampling # PostgreSQL Database

pgopttune/config/my_workload_config.py

-20
This file was deleted.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
import os
2+
from pgopttune.config.config import Config
3+
4+
5+
class SampledWorkloadConfig(Config):
6+
def __init__(self, conf_path, section='sampled_workload'):
7+
super().__init__(conf_path)
8+
self.conf_path = conf_path
9+
self.config_dict = dict(self.config.items(section))
10+
self._check_is_exist_sampled_workload_save_file()
11+
12+
def _check_is_exist_sampled_workload_save_file(self):
13+
if not os.path.exists(self.get_parameter_value('sampled_workload_save_file')):
14+
raise ValueError("{} does not exist."
15+
"Check the sampled_workload_save_file parameter in {}."
16+
.format(self.get_parameter_value('sampled_workload_save_file'), self.conf_path))
17+
18+
@property
19+
def my_workload_save_file(self):
20+
return self.get_parameter_value('sampled_workload_save_file')

pgopttune/objective/objective_my_workload.py

-19
This file was deleted.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
from logging import getLogger
2+
from pgopttune.workload.sampled_workload import SampledWorkload
3+
from pgopttune.objective.objective import Objective
4+
from pgopttune.config.postgres_server_config import PostgresServerConfig
5+
from pgopttune.config.tune_config import TuneConfig
6+
from pgopttune.config.sampled_workload_config import SampledWorkloadConfig
7+
8+
logger = getLogger(__name__)
9+
10+
11+
class ObjectiveSampledWorkload(Objective):
12+
13+
def __init__(self,
14+
postgres_server_config: PostgresServerConfig,
15+
tune_config: TuneConfig,
16+
my_workload_config: SampledWorkloadConfig):
17+
super().__init__(postgres_server_config, tune_config)
18+
self.workload = SampledWorkload.load_sampled_workload(my_workload_config.my_workload_save_file,
19+
postgres_server_config=postgres_server_config)

pgopttune/workload/my_transaction.py renamed to pgopttune/workload/sampled_transaction.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
logger = getLogger(__name__)
77

88

9-
class MyTransaction:
9+
class SampledTransaction:
1010
def __init__(self, session_id: str, query_start_time: list, statement: list):
1111
self.session_id = session_id
1212
self.query_start_time = query_start_time

pgopttune/workload/my_workload.py renamed to pgopttune/workload/sampled_workload.py

+14-14
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,12 @@
88
from pgopttune.utils.pg_connect import get_pg_connection
99
from pgopttune.config.postgres_server_config import PostgresServerConfig
1010
from pgopttune.config.workload_sampling_config import WorkloadSamplingConfig
11-
from pgopttune.workload.my_transaction import MyTransaction
11+
from pgopttune.workload.sampled_transaction import SampledTransaction
1212

1313
logger = getLogger(__name__)
1414

1515

16-
class MyWorkload(Workload):
16+
class SampledWorkload(Workload):
1717
def __init__(self,
1818
postgres_server_config: PostgresServerConfig,
1919
workload_sampling_config: WorkloadSamplingConfig,
@@ -68,13 +68,13 @@ def _create_transactions(self, workload_rows):
6868
session_id = row[1]
6969
statement.append(row[2])
7070
else:
71-
my_transaction = MyTransaction(session_id, query_stat_time, statement)
71+
my_transaction = SampledTransaction(session_id, query_stat_time, statement)
7272
self.my_transactions.append(my_transaction)
7373
query_stat_time = [row[0]]
7474
session_id = row[1]
7575
statement = [row[2]]
7676

77-
def save_my_workload(self):
77+
def save_sampled_workload(self):
7878
save_file_name = datetime.datetime.fromtimestamp(self.start_unix_time).strftime("%Y-%m-%d_%H%M%S.%f") + \
7979
"-" \
8080
+ datetime.datetime.fromtimestamp(self.end_unix_time).strftime("%Y-%m-%d_%H%M%S.%f") + ".pkl"
@@ -102,7 +102,7 @@ def run(self):
102102
return elapsed_time
103103

104104
@classmethod
105-
def load_my_workload(cls, load_file_path, postgres_server_config: PostgresServerConfig = None):
105+
def load_sampled_workload(cls, load_file_path, postgres_server_config: PostgresServerConfig = None):
106106
with open(load_file_path, 'rb') as f:
107107
workload = pickle.load(f)
108108
if postgres_server_config is not None:
@@ -112,7 +112,7 @@ def load_my_workload(cls, load_file_path, postgres_server_config: PostgresServer
112112
@staticmethod
113113
def data_load():
114114
# TODO:
115-
logger.warning("At this time, the data loading function to the sampled database is not implemented.")
115+
logger.warning("At the moment, in the sampled workload, The data reload function is not implemented.")
116116

117117
def _run_transaction(self, transaction_index=0):
118118
# logger.debug("Transaction's statement : {}".format(self.my_transactions[transaction_index].statement))
@@ -130,19 +130,19 @@ def _run_transaction(self, transaction_index=0):
130130
conf_path = './conf/postgres_opttune.conf'
131131
postgres_server_config_test = PostgresServerConfig(conf_path) # PostgreSQL Server config
132132
workload_sampling_config_test = WorkloadSamplingConfig(conf_path)
133-
my_workload = MyWorkload(start_unix_time=1593093506.9530554, end_unix_time=1593093567.088895,
134-
workload_sampling_config=workload_sampling_config_test,
135-
postgres_server_config=postgres_server_config_test)
136-
save_file = my_workload.save_my_workload()
133+
sampled_workload = SampledWorkload(start_unix_time=1593093506.9530554, end_unix_time=1593093567.088895,
134+
workload_sampling_config=workload_sampling_config_test,
135+
postgres_server_config=postgres_server_config_test)
136+
save_file = sampled_workload.save_sampled_workload()
137137
logger.debug("run transactions ")
138-
my_workload_elapsed_time = my_workload.run()
139-
logger.debug(my_workload_elapsed_time)
140-
load_workload = MyWorkload.load_my_workload(save_file, postgres_server_config=postgres_server_config_test)
138+
workload_elapsed_time = sampled_workload.run()
139+
logger.debug(workload_elapsed_time)
140+
load_workload = SampledWorkload.load_sampled_workload(save_file, postgres_server_config=postgres_server_config_test)
141141
logger.debug("run transactions using saved file")
142142
load_workload_elapsed_time = load_workload.run()
143143
logger.debug(load_workload_elapsed_time)
144144
logger.debug("finised...")
145-
logger.debug(my_workload_elapsed_time)
145+
logger.debug(workload_elapsed_time)
146146
logger.debug(load_workload_elapsed_time)
147147

148148
# my_workload.extract_workload()

pgopttune/workload/workload_sampler.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
from pgopttune.log.pg_csv_log import PostgresCsvLog
55
from pgopttune.config.postgres_server_config import PostgresServerConfig
66
from pgopttune.config.workload_sampling_config import WorkloadSamplingConfig
7-
from pgopttune.workload.my_workload import MyWorkload
7+
from pgopttune.workload.sampled_workload import SampledWorkload
88

99
logger = getLogger(__name__)
1010

@@ -31,10 +31,10 @@ def save(self):
3131
logger.debug("Start importing the CSV file(saved executed SQL) into the table.")
3232
self.csv_log.load_csv_to_database(copy_dir=self.workload_sampling_config.my_workload_save_dir,
3333
dsn=self.workload_sampling_config.dsn)
34-
my_workload = MyWorkload(start_unix_time=csv_log_start_time, end_unix_time=csv_log_end_time,
35-
postgres_server_config=self.postgres_server_config,
36-
workload_sampling_config=self.workload_sampling_config)
37-
save_file = my_workload.save_my_workload()
34+
workload = SampledWorkload(start_unix_time=csv_log_start_time, end_unix_time=csv_log_end_time,
35+
postgres_server_config=self.postgres_server_config,
36+
workload_sampling_config=self.workload_sampling_config)
37+
save_file = workload.save_sampled_workload()
3838
logger.info("The workload has been recorded in '{}'".format(save_file))
3939
return save_file
4040

sampling_workload.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -28,10 +28,10 @@ def main(
2828
logger.info(
2929
"You can automatically tune the saved workload by setting the following in'./conf/postgres_opttune.conf'.\n"
3030
"[turning]\n"
31-
"benchmark = my_workload \n"
31+
"benchmark = sampled_workload \n"
3232
":\n"
33-
"[my-workload]\n"
34-
"my_workload_save_file = {}".format(workload_save_file_path))
33+
"[sampled_workload]\n"
34+
"sampled_workload_save_file = {}".format(workload_save_file_path))
3535

3636

3737
if __name__ == "__main__":

tune.py

+6-6
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
from pgopttune.objective.objective_pgbench import ObjectivePgbench
1111
from pgopttune.objective.objective_oltpbench import ObjectiveOltpbench
1212
from pgopttune.objective.objective_star_schema_benchmark import ObjectiveStarSchemaBenchmark
13-
from pgopttune.objective.objective_my_workload import ObjectiveMyWorkload
13+
from pgopttune.objective.objective_sampled_workload import ObjectiveSampledWorkload
1414
from pgopttune.parameter.reset import reset_postgres_param
1515
from pgopttune.parameter.pg_tune_parameter import PostgresTuneParameter
1616
from pgopttune.recovery.pg_recovery import Recovery
@@ -19,7 +19,7 @@
1919
from pgopttune.config.pgbench_config import PgbenchConfig
2020
from pgopttune.config.oltpbench_config import OltpbenchConfig
2121
from pgopttune.config.star_schema_benchmark_config import StarSchemaBenchmarkConfig
22-
from pgopttune.config.my_workload_config import MyWorkloadConfig
22+
from pgopttune.config.sampled_workload_config import SampledWorkloadConfig
2323

2424

2525
def main(
@@ -49,9 +49,9 @@ def main(
4949
star_schema_benchmark_config = StarSchemaBenchmarkConfig(conf_path) # star schema benchmark config
5050
objective = ObjectiveStarSchemaBenchmark(postgres_server_config, tune_config, star_schema_benchmark_config)
5151
# my workload (save using sampling_workload.py)
52-
elif tune_config.benchmark == 'my_workload':
53-
my_workload_config = MyWorkloadConfig(conf_path) # my worklod config config
54-
objective = ObjectiveMyWorkload(postgres_server_config, tune_config, my_workload_config)
52+
elif tune_config.benchmark == 'sampled_workload':
53+
sampled_workload_config = SampledWorkloadConfig(conf_path) # my workload sampled config
54+
objective = ObjectiveSampledWorkload(postgres_server_config, tune_config, sampled_workload_config)
5555
else:
5656
raise NotImplementedError('This benchmark tool is not supported at this time.')
5757

@@ -81,7 +81,7 @@ def main(
8181
# tuning using optuna
8282
try:
8383
sampler = get_sampler(tune_config.sample_mode) # sampler setting
84-
if tune_config.benchmark == 'my_workload':
84+
if tune_config.benchmark == 'sampled_workload':
8585
logger.info("The purpose of optimization is to minimize the total SQL execution time")
8686
study = create_study(study_name=tune_config.study_name, # create study
8787
sampler=sampler,

0 commit comments

Comments
 (0)