Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add README for FS-G #279

Merged
merged 13 commits into from
Aug 3, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion LICENSE
Original file line number Diff line number Diff line change
Expand Up @@ -465,7 +465,7 @@ SOFTWARE.

The function partition_by_category and subgraphing in
federatedscope/gfl/dataset/recsys.py
is borrow from https://github.com/FedML-AI/FedGraphNN
are borrowed from https://github.com/FedML-AI/FedGraphNN

Copyright [FedML] [Chaoyang He, Salman Avestimehr]

Expand Down
21 changes: 9 additions & 12 deletions federatedscope/attack/trainer/benign_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,10 @@ def hook_on_fit_end_test_poison(ctx):
Evaluate metrics of poisoning attacks.
"""

ctx['poison_' + ctx.cur_split +
'_loader'] = ctx.data['poison_' + ctx.cur_split]
ctx['poison_' + ctx.cur_split +
'_data'] = ctx.data['poison_' + ctx.cur_split].dataset
ctx['poison_' + ctx.cur_split + '_loader'] = ctx.data['poison_' +
ctx.cur_split]
ctx['poison_' + ctx.cur_split + '_data'] = ctx.data['poison_' +
ctx.cur_split].dataset
ctx['num_poison_' + ctx.cur_split + '_data'] = len(
ctx.data['poison_' + ctx.cur_split].dataset)
setattr(ctx, "poison_{}_y_true".format(ctx.cur_split), [])
Expand Down Expand Up @@ -67,16 +67,13 @@ def hook_on_fit_end_test_poison(ctx):
ctx.get("poison_num_samples_{}".format(ctx.cur_split)) +
ctx.poison_batch_size)

setattr(
ctx, "poison_{}_y_true".format(ctx.cur_split),
np.concatenate(ctx.get("poison_{}_y_true".format(ctx.cur_split))))
setattr(
ctx, "poison_{}_y_prob".format(ctx.cur_split),
np.concatenate(ctx.get("poison_{}_y_prob".format(ctx.cur_split))))
setattr(ctx, "poison_{}_y_true".format(ctx.cur_split),
np.concatenate(ctx.get("poison_{}_y_true".format(ctx.cur_split))))
setattr(ctx, "poison_{}_y_prob".format(ctx.cur_split),
np.concatenate(ctx.get("poison_{}_y_prob".format(ctx.cur_split))))

logger.info('the {} poisoning samples: {:d}'.format(
ctx.cur_split,
ctx.get("poison_num_samples_{}".format(ctx.cur_split))))
ctx.cur_split, ctx.get("poison_num_samples_{}".format(ctx.cur_split))))

poison_true = ctx['poison_' + ctx.cur_split + '_y_true']
poison_prob = ctx['poison_' + ctx.cur_split + '_y_prob']
Expand Down
1 change: 0 additions & 1 deletion federatedscope/contrib/data/example.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@

def MyData(config):
r"""

Returns:
data:
{
Expand Down
2 changes: 1 addition & 1 deletion federatedscope/core/auxiliaries/model_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def get_shape_from_data(data, model_config, backend='torch'):
) == 'vmfnet' else data['train'].n_row
elif model_config.type.lower() in [
'gcn', 'sage', 'gpr', 'gat', 'gin', 'mpnn'
]:
] or model_config.type.startswith('gnn_'):
num_label = data['num_label'] if 'num_label' in data else None
num_edge_features = data[
'num_edge_features'] if model_config.type == 'mpnn' else None
Expand Down
13 changes: 7 additions & 6 deletions federatedscope/core/auxiliaries/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -502,27 +502,28 @@ def calculate_time_cost(instance_number,
return comp_cost, comm_cost



def calculate_batch_epoch_num(steps, batch_or_epoch, num_data, batch_size,
drop_last):
num_batch_per_epoch = num_data // batch_size + int(
not drop_last and bool(num_data % batch_size))
if num_batch_per_epoch == 0:
raise RuntimeError(
"The number of batch is 0, please check 'batch_size' or set 'drop_last' as False"
)
"The number of batch is 0, please check 'batch_size' or set "
"'drop_last' as False")
elif batch_or_epoch == "epoch":
num_epoch = steps
num_batch_last_epoch = num_batch_per_epoch
num_total_batch = steps * num_batch_per_epoch
else:
num_epoch = math.ceil(steps / num_batch_per_epoch)
num_batch_last_epoch = steps % num_batch_per_epoch or num_batch_per_epoch
num_batch_last_epoch = steps % num_batch_per_epoch or \
num_batch_per_epoch
num_total_batch = steps
return num_batch_per_epoch, num_batch_last_epoch, num_epoch, num_total_batch
return num_batch_per_epoch, num_batch_last_epoch, num_epoch, \
num_total_batch


def merge_param_dict(raw_param, filtered_param):
for key in filtered_param.keys():
raw_param[key] = filtered_param[key]
return raw_param

54 changes: 35 additions & 19 deletions federatedscope/core/trainers/context.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,32 +55,45 @@ class Context(LifecycleDict):
init_dict (dict): a dict used to initialize the instance of Context
init_attr (bool): if set up the static variables
Note:
- The variables within an instance of class `Context` can be set/get as an attribute.
- The variables within an instance of class `Context`
can be set/get as an attribute.
```
ctx.${NAME_VARIABLE} = ${VALUE_VARIABLE}
```
where `${NAME_VARIABLE}` and `${VALUE_VARIABLE}` is the name and value of the variable.
where `${NAME_VARIABLE}` and `${VALUE_VARIABLE}`
is the name and value of the variable.

- To achieve automatically lifecycle management, you can wrap the variable with `CtxVar` and a lifecycle parameter
- To achieve automatically lifecycle management, you can
wrap the variable with `CtxVar` and a lifecycle parameter
as follows
```
ctx.${NAME_VARIABLE} = CtxVar(${VALUE_VARIABLE}, ${LFECYCLE})
```
The parameter `${LFECYCLE}` can be chosen from `LIFECYCLE.BATCH`, `LIFECYCLE.EPOCH` and `LIFECYCLE.ROUTINE`.
Then the variable `ctx.${NAME_VARIABLE}` will be deleted at the end of the corresponding stage
- `LIFECYCLE.BATCH`: the variables will be deleted after running a batch
- `LIFECYCLE.EPOCH`: the variables will be deleted after running a epoch
- `LIFECYCLE.ROUTINE`: the variables will be deleted after running a routine
More details please refer to our [tutorial](https://federatedscope.io/docs/trainer/).

- Context also maintains some special variables across different routines, like
The parameter `${LFECYCLE}` can be chosen from `LIFECYCLE.BATCH`,
`LIFECYCLE.EPOCH` and `LIFECYCLE.ROUTINE`.
Then the variable `ctx.${NAME_VARIABLE}` will be deleted at
the end of the corresponding stage
- `LIFECYCLE.BATCH`: the variables will
be deleted after running a batch
- `LIFECYCLE.EPOCH`: the variables will be
deleted after running a epoch
- `LIFECYCLE.ROUTINE`: the variables will be
deleted after running a routine
More details please refer to our
[tutorial](https://federatedscope.io/docs/trainer/).

- Context also maintains some special variables across
different routines, like
- cfg
- model
- data
- device
- ${split}_data: the dataset object of data split named `${split}`
- ${split}_loader: the data loader object of data split named `${split}`
- num_${split}_data: the number of examples within the dataset named `${split}`
- ${split}_data: the dataset object of data split
named `${split}`
- ${split}_loader: the data loader object of data
split named `${split}`
- num_${split}_data: the number of examples within
the dataset named `${split}`
"""
def __init__(self,
model,
Expand Down Expand Up @@ -127,10 +140,12 @@ def setup_vars(self):
'train_loader', None) is not None:
# Calculate the number of update steps during training given the
# local_update_steps
self.num_train_batch, self.num_train_batch_last_epoch, self.num_train_epoch, self.num_total_train_batch = calculate_batch_epoch_num(
self.cfg.train.local_update_steps,
self.cfg.train.batch_or_epoch, self.num_train_data,
self.cfg.data.batch_size, self.cfg.data.drop_last)
self.num_train_batch, self.num_train_batch_last_epoch, \
self.num_train_epoch, self.num_total_train_batch = \
calculate_batch_epoch_num(
self.cfg.train.local_update_steps,
self.cfg.train.batch_or_epoch, self.num_train_data,
self.cfg.data.batch_size, self.cfg.data.drop_last)

# Process evaluation data
for mode in ["val", "test"]:
Expand Down Expand Up @@ -210,7 +225,8 @@ def __init__(self, obj, lifecycle=None):


def lifecycle(lifecycle):
"""Manage the lifecycle of the variables within context, and blind these operations from user.
"""Manage the lifecycle of the variables within context,
and blind these operations from user.
Args:
lifecycle: the type of lifecycle, choose from "batch/epoch/routine"
"""
Expand Down
9 changes: 7 additions & 2 deletions federatedscope/core/trainers/torch_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,12 +33,15 @@ def get_model_para(self):
share_local_model else self.ctx.model.cpu().state_dict())

def parse_data(self, data):
"""Populate "${split}_data", "${split}_loader" and "num_${split}_data" for different data splits
"""Populate "${split}_data", "${split}_loader" and "num_${
split}_data" for different data splits

"""
init_dict = dict()
if isinstance(data, dict):
for split in data.keys():
if split not in ['train', 'val', 'test']:
continue
init_dict["{}_data".format(split)] = None
init_dict["{}_loader".format(split)] = None
init_dict["num_{}_data".format(split)] = 0
Expand Down Expand Up @@ -143,7 +146,9 @@ def _hook_on_fit_start_init(self, ctx):
ctx.scheduler = get_scheduler(ctx.optimizer,
**ctx.cfg[ctx.cur_mode].scheduler)

# TODO: the number of batch and epoch is decided by the current mode and data split, so the number of batch and epoch should be initialized at the beginning of the routine
# TODO: the number of batch and epoch is decided by the current mode
# and data split, so the number of batch and epoch should be
# initialized at the beginning of the routine

# prepare statistics
ctx.loss_batch_total = CtxVar(0., LIFECYCLE.ROUTINE)
Expand Down
6 changes: 4 additions & 2 deletions federatedscope/core/trainers/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -247,8 +247,10 @@ def _run_routine(self, mode, hooks_set, dataset_name=None):
Arguments:
mode: running mode of client, chosen from train/val/test
Note:
Considering evaluation could be in ```hooks_set["on_epoch_end"]```, there could be two data loaders in
self.ctx, we must tell the running hooks which data_loader to call and which num_samples to count
Considering evaluation could be in ```hooks_set["on_epoch_end"]```,
there could be two data loaders in self.ctx, we must tell the
running hooks which data_loader to call and which
num_samples to count
"""
for hook in hooks_set["on_fit_start"]:
hook(self.ctx)
Expand Down
Loading