Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Feature] Add new system model #336

Merged
merged 6 commits into from
Aug 20, 2022
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion benchmark/FedHPOB/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@ We highly recommend running FedHPO-B with conda.

```bash
git clone https://github.com/alibaba/FederatedScope.git
git checkout a653102c6b8d5421d2874077594df0b2e29401a1
cd FederatedScope
git checkout a653102c6b8d5421d2874077594df0b2e29401a1
pip install -e .
```

Expand Down
17 changes: 17 additions & 0 deletions benchmark/FedHPOB/example.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
from fedhpob.config import fhb_cfg
from fedhpob.benchmarks import TabularBenchmark

benchmark = TabularBenchmark('cnn', 'femnist', 'avg')

# get hyperparameters space
config_space = benchmark.get_configuration_space(CS=True)

# get fidelity space
fidelity_space = benchmark.get_fidelity_space(CS=True)

# get results
res = benchmark(config_space.sample_configuration(),
fidelity_space.sample_configuration(),
fhb_cfg=fhb_cfg,
seed=12345)
print(res)
51 changes: 49 additions & 2 deletions benchmark/FedHPOB/fedhpob/benchmarks/base_benchmark.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,17 @@
import abc
import os
import pickle
import datetime
import numpy as np
from federatedscope.core.configs.config import global_cfg
from federatedscope.core.auxiliaries.data_builder import get_data
from fedhpob.utils.tabular_dataloader import load_data
from fedhpob.utils.util import disable_fs_logger
from fedhpob.utils.cost_model import get_cost_model


class BaseBenchmark(abc.ABC):
def __init__(self, model, dname, algo, rng=None, **kwargs):
def __init__(self, model, dname, algo, cost_mode, rng=None, **kwargs):
"""

:param rng:
Expand All @@ -20,7 +23,8 @@ def __init__(self, model, dname, algo, rng=None, **kwargs):
self.rng = np.random.RandomState()
self.configuration_space = self.get_configuration_space()
self.fidelity_space = self.get_fidelity_space()

self.model, self.dname, self.algo, self.cost_mode = model, dname, \
algo, cost_mode
# Load data and modify cfg of FS.
self.cfg = global_cfg.clone()
self.cfg.set_new_allowed(True)
Expand All @@ -29,6 +33,12 @@ def __init__(self, model, dname, algo, rng=None, **kwargs):
self.cfg.data.type = dname
self.data, modified_cfg = get_data(config=self.cfg.clone())
self.cfg.merge_from_other_cfg(modified_cfg)
# Try load time data
try:
datadir = os.path.join('data', 'tabular_data')
self.table, _ = load_data(datadir, model, dname, algo)
except:
self.table = None
disable_fs_logger(self.cfg, True)

def __call__(self, configuration, fidelity, seed=1, **kwargs):
Expand All @@ -40,7 +50,44 @@ def __call__(self, configuration, fidelity, seed=1, **kwargs):
def _check(self, configuration, fidelity):
pass

def _search(self, configuration, fidelity):
# For configuration
mask = np.array([True] * self.table.shape[0])
for col in configuration.keys():
mask *= (self.table[col].values == configuration[col])
idx = np.where(mask)
result = self.table.iloc[idx]

# For fidelity
mask = np.array([True] * result.shape[0])
for col in fidelity.keys():
if col == 'round':
continue
mask *= (result[col].values == fidelity[col])
idx = np.where(mask)
result = result.iloc[idx]["result"]
return result

def get_lamba_from_df(self, configuration, fidelity):
if self.table is not None:
client_num = self.cfg.federate.client_num * \
self.cfg.federate.sample_client_rate
result = self._search({'seed': 0, **configuration}, fidelity)
index = list(result.keys())
filterd_result = eval(result[index[0]])
c = np.mean(filterd_result['train_time']) + np.mean(
filterd_result['eval_time'])
return c.total_seconds() / float(client_num)
else:
from fedhpob.config import fhb_cfg
return fhb_cfg.cost.c

def _cost(self, configuration, fidelity, **kwargs):
try:
kwargs['const'] = self.get_lamba_from_df(configuration, fidelity)
except:
from fedhpob.config import fhb_cfg
kwargs['const'] = fhb_cfg.cost.c
cost_model = get_cost_model(mode=self.cost_mode)
t = cost_model(self.cfg, configuration, fidelity, self.data, **kwargs)
return t
Expand Down
5 changes: 2 additions & 3 deletions benchmark/FedHPOB/fedhpob/benchmarks/raw_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,9 @@ def __init__(self,
rng=None,
cost_mode='estimated',
**kwargs):
self.model, self.dname, self.algo, self.cost_mode = model, dname, \
algo, cost_mode
super(RawBenchmark, self).__init__(model, dname, algo, cost_mode, rng,
**kwargs)
self.device = kwargs['device']
super(RawBenchmark, self).__init__(model, dname, algo, rng, **kwargs)

def _run_fl(self, configuration, fidelity, key='val_avg_loss', seed=1):
init_cfg = self.cfg.clone()
Expand Down
4 changes: 2 additions & 2 deletions benchmark/FedHPOB/fedhpob/benchmarks/surrogate_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@ def __init__(self,
else:
self.surrogate_models, self.meta_info, self.X, self.Y = \
load_surrogate_model(modeldir, model, dname, algo)
super(SurrogateBenchmark, self).__init__(model, dname, algo, rng,
**kwargs)
super(SurrogateBenchmark, self).__init__(model, dname, algo, cost_mode,
rng, **kwargs)

def _check(self, configuration, fidelity):
for key in configuration:
Expand Down
24 changes: 2 additions & 22 deletions benchmark/FedHPOB/fedhpob/benchmarks/tabular_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,10 @@ def __init__(self,
rng=None,
cost_mode='estimated',
**kwargs):
self.model, self.dname, self.algo, self.cost_mode = model, dname, \
algo, cost_mode
self.table, self.meta_info = load_data(datadir, model, dname, algo)
self.eval_freq = self.meta_info['eval_freq']
super(TabularBenchmark, self).__init__(model, dname, algo, rng,
**kwargs)
super(TabularBenchmark, self).__init__(model, dname, algo, cost_mode,
rng, **kwargs)

def _check(self, configuration, fidelity):
for key, value in configuration.items():
Expand All @@ -31,24 +29,6 @@ def _check(self, configuration, fidelity):
assert value in self.fidelity_space[
key], 'fidelity invalid, check `fidelity_space` for help.'

def _search(self, configuration, fidelity):
# For configuration
mask = np.array([True] * self.table.shape[0])
for col in configuration.keys():
mask *= (self.table[col].values == configuration[col])
idx = np.where(mask)
result = self.table.iloc[idx]

# For fidelity
mask = np.array([True] * result.shape[0])
for col in fidelity.keys():
if col == 'round':
continue
mask *= (result[col].values == fidelity[col])
idx = np.where(mask)
result = result.iloc[idx]["result"]
return result

def objective_function(self,
configuration,
fidelity,
Expand Down
45 changes: 32 additions & 13 deletions benchmark/FedHPOB/fedhpob/utils/cost_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,12 +40,13 @@ def get_cost_model(mode='estimated'):
"""
cost_dict = {
'raw': raw_cost,
'estimated': estimated_cost,
'estimated': cs_cost,
'cross_deivce': cd_cost
}
return cost_dict[mode]


def communication_cost(cfg, model_size, fhb_cfg):
def communication_csilo_cost(cfg, model_size, fhb_cfg):
t_up = model_size / fhb_cfg.cost.bandwidth.client_up
t_down = max(
cfg.federate.client_num * cfg.federate.sample_client_rate *
Expand All @@ -54,7 +55,13 @@ def communication_cost(cfg, model_size, fhb_cfg):
return t_up + t_down


def computation_cost(cfg, fhb_cfg):
def communication_cdevice_cost(cfg, model_size, fhb_cfg):
t_up = model_size / fhb_cfg.cost.bandwidth.client_up
t_down = model_size / fhb_cfg.cost.bandwidth.client_down
return t_up + t_down


def computation_cost(cfg, fhb_cfg, const):
"""
Assume the time is exponential distribution with c,
return the expected maximum of M iid random variables plus server time.
Expand All @@ -63,26 +70,38 @@ def computation_cost(cfg, fhb_cfg):
1.0 / i for i in range(
1,
int(cfg.federate.client_num * cfg.federate.sample_client_rate) + 1)
]) * fhb_cfg.cost.c
]) * const
return t_client + fhb_cfg.cost.time_server


def raw_cost(**kwargs):
return None


def estimated_cost(cfg, configuration, fidelity, data, **kwargs):
def get_info(cfg, configuration, fidelity, data):
cfg = merge_cfg(cfg, configuration, fidelity)
model = get_model(cfg.model, list(data.values())[0])
model_size = sum([param.nelement() for param in model.parameters()])
return cfg, model_size


def cs_cost(cfg, configuration, fidelity, data, **kwargs):
"""
Works on raw, tabular and surrogate mode.
Works on raw, tabular and surrogate mode, cross-silo
"""
def get_info(cfg, configuration, fidelity, data):
cfg = merge_cfg(cfg, configuration, fidelity)
model = get_model(cfg.model, list(data.values())[0])
model_size = sum([param.nelement() for param in model.parameters()])
return cfg, model_size
cfg, num_param = get_info(cfg, configuration, fidelity, data)
t_comm = communication_csilo_cost(cfg, num_param, kwargs['fhb_cfg'])
t_comp = computation_cost(cfg, kwargs['fhb_cfg'], kwargs['const'])
t_round = t_comm + t_comp
return t_round * cfg.federate.total_round_num


def cd_cost(cfg, configuration, fidelity, data, **kwargs):
"""
Works on raw, tabular and surrogate mode, cross-device
"""
cfg, num_param = get_info(cfg, configuration, fidelity, data)
t_comm = communication_cost(cfg, num_param, kwargs['fhb_cfg'])
t_comp = computation_cost(cfg, kwargs['fhb_cfg'])
t_comm = communication_cdevice_cost(cfg, num_param, kwargs['fhb_cfg'])
t_comp = computation_cost(cfg, kwargs['fhb_cfg'], kwargs['const'])
t_round = t_comm + t_comp
return t_round * cfg.federate.total_round_num