Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 5 additions & 4 deletions discojs/discojs-core/src/aggregator/base.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import { client, Task, tf, AsyncInformant } from '..'
import { client, Task, AsyncInformant } from '..'
import { Model } from '../training/model'

import { EventEmitter } from 'events'

Expand Down Expand Up @@ -61,7 +62,7 @@ export abstract class Base<T> {
/**
* The TF.js model whose weights are updated on aggregation.
*/
protected _model?: tf.LayersModel,
protected _model?: Model,
/**
* The round cut-off for contributions.
*/
Expand Down Expand Up @@ -141,7 +142,7 @@ export abstract class Base<T> {
* Sets the aggregator's TF.js model.
* @param model The new TF.js model
*/
setModel (model: tf.LayersModel): void {
setModel (model: Model): void {
this._model = model
}

Expand Down Expand Up @@ -267,7 +268,7 @@ export abstract class Base<T> {
/**
* The aggregator's current model.
*/
get model (): tf.LayersModel | undefined {
get model (): Model | undefined {
return this._model
}

Expand Down
7 changes: 4 additions & 3 deletions discojs/discojs-core/src/aggregator/mean.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import { Map } from 'immutable'

import { AggregationStep, Base as Aggregator } from './base'
import { Task, WeightsContainer, aggregation, tf, client } from '..'
import { Task, WeightsContainer, aggregation, client } from '..'
import { Model } from '../training/model'

/**
* Mean aggregator whose aggregation step consists in computing the mean of the received weights.
Expand All @@ -16,7 +17,7 @@ export class MeanAggregator extends Aggregator<WeightsContainer> {

constructor (
task: Task,
model?: tf.LayersModel,
model?: Model,
roundCutoff = 0,
threshold = 1
) {
Expand Down Expand Up @@ -67,7 +68,7 @@ export class MeanAggregator extends Aggregator<WeightsContainer> {
aggregate (): void {
this.log(AggregationStep.AGGREGATE)
const result = aggregation.avg(this.contributions.get(0)?.values() as Iterable<WeightsContainer>)
this.model?.setWeights(result.weights)
this.model?.toTfjs().setWeights(result.weights)
this.emit(result)
}

Expand Down
5 changes: 3 additions & 2 deletions discojs/discojs-core/src/aggregator/secure.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import { AggregationStep, Base as Aggregator } from './base'
import { tf, aggregation, Task, WeightsContainer, client } from '..'
import { Model } from '../training/model'

import * as crypto from 'crypto'

Expand All @@ -19,7 +20,7 @@ export class SecureAggregator extends Aggregator<WeightsContainer> {

constructor (
task: Task,
model?: tf.LayersModel
model?: Model
) {
super(task, model, 0, 2)

Expand All @@ -35,7 +36,7 @@ export class SecureAggregator extends Aggregator<WeightsContainer> {
} else if (this.communicationRound === 1) {
// Average the received partial sums
const result = aggregation.avg(this.contributions.get(1)?.values() as Iterable<WeightsContainer>)
this.model?.setWeights(result.weights)
this.model?.toTfjs().setWeights(result.weights)
this.emit(result)
} else {
throw new Error('communication round is out of bounds')
Expand Down
13 changes: 13 additions & 0 deletions discojs/discojs-core/src/dataset/data/data_split.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import { Data } from './data'
import { Dataset } from '../dataset'

/**
* Train-validation split of Disco data.
Expand All @@ -7,3 +8,15 @@ export interface DataSplit {
train: Data
validation?: Data
}

/**
* Extracts the training and validation sets from a data split object.
* The returned datasets are preprocessed and batched.
* @param data The data split
* @returns A tuple object containing the training and validation datasets
*/
export function extract (data: DataSplit): {training: Dataset, validation: Dataset} {
const training = data.train.preprocess().batch().dataset
const validation = data.validation?.preprocess().batch().dataset ?? training
return { training, validation }
}
2 changes: 1 addition & 1 deletion discojs/discojs-core/src/dataset/data/index.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
export { DataSplit } from './data_split'
export * as tuple from './data_split'
export { Data } from './data'
export { ImageData } from './image_data'
export { TabularData } from './tabular_data'
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import { Task, tf } from '../../..'
import { PreprocessingFunction } from './base'

import GPT3Tokenizer from 'gpt3-tokenizer'
import { GPTLMHeadModel } from 'gpt-tfjs'
import { List } from 'immutable'

/**
Expand All @@ -22,7 +22,7 @@ interface TokenizedEntry extends tf.TensorContainerObject {
ys: tf.Tensor1D
}

const gpt3Tokenizer = new GPT3Tokenizer({ type: 'gpt3' })
const minGptTokenizer = GPTLMHeadModel.tokenizer

const padding: PreprocessingFunction = {
type: TextPreprocessing.Padding,
Expand Down Expand Up @@ -52,7 +52,7 @@ const tokenize: PreprocessingFunction = {

let tokenized: number[]
if (tokenizer === undefined) {
tokenized = gpt3Tokenizer.encode(xs[0]).bpe
tokenized = minGptTokenizer.encode(xs[0]).bpe
} else {
throw new Error('tokenizer not implemented')
}
Expand Down
2 changes: 1 addition & 1 deletion discojs/discojs-core/src/dataset/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ export { Dataset } from './dataset'
export { DatasetBuilder } from './dataset_builder'
export { ImageLoader, TabularLoader, DataLoader } from './data_loader'
export {
DataSplit, Data, TabularData, ImageData, TextData,
tuple, Data, TabularData, ImageData, TextData,
ImagePreprocessing, TabularPreprocessing, TextPreprocessing,
IMAGE_PREPROCESSING, TABULAR_PREPROCESSING, TEXT_PREPROCESSING
} from './data'
8 changes: 4 additions & 4 deletions discojs/discojs-core/src/default_tasks/cifar10.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { tf, Task, data, TaskProvider } from '..'
import { tf, Task, data, TaskProvider, training } from '..'

export const cifar10: TaskProvider = {
getTask (): Task {
Expand Down Expand Up @@ -42,7 +42,7 @@ export const cifar10: TaskProvider = {
}
},

async getModel (): Promise<tf.LayersModel> {
async getModel (): Promise<training.model.Model> {
const mobilenet = await tf.loadLayersModel(
'https://storage.googleapis.com/tfjs-models/tfjs/mobilenet_v1_0.25_224/model.json'
)
Expand All @@ -51,10 +51,10 @@ export const cifar10: TaskProvider = {
.dense({ units: 10, activation: 'softmax', name: 'denseModified' })
.apply(x.output) as tf.SymbolicTensor

return tf.model({
return new training.model.TFJSModel(this.getTask(), tf.model({
inputs: mobilenet.input,
outputs: predictions,
name: 'modelModified'
})
}))
}
}
8 changes: 4 additions & 4 deletions discojs/discojs-core/src/default_tasks/geotags.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { tf, Task, data, TaskProvider } from '..'
import { tf, data, training, Task, TaskProvider } from '..'
import { Range } from 'immutable'
import { LabelTypeEnum } from '../task/label_type'

Expand Down Expand Up @@ -46,7 +46,7 @@ export const geotags: TaskProvider = {
}
},

async getModel (): Promise<tf.LayersModel> {
async getModel (): Promise<training.model.Model> {
const pretrainedModel = await tf.loadLayersModel(
'https://storage.googleapis.com/deai-313515.appspot.com/models/geotags/model.json'
)
Expand All @@ -56,13 +56,13 @@ export const geotags: TaskProvider = {
pretrainedModel.layers.forEach(layer => { layer.trainable = false })
pretrainedModel.layers[numLayers - 1].trainable = true

const model = tf.sequential({
const model = new training.model.TFJSModel(this.getTask(), tf.sequential({
layers: [
tf.layers.inputLayer({ inputShape: [224, 224, 3] }),
tf.layers.rescaling({ scale: 1 / 127.5, offset: -1 }), // Rescaling input between -1 and 1
pretrainedModel
]
})
}))

return model
}
Expand Down
6 changes: 3 additions & 3 deletions discojs/discojs-core/src/default_tasks/lus_covid.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { tf, Task, data, TaskProvider } from '..'
import { tf, data, training, Task, TaskProvider } from '..'

export const lusCovid: TaskProvider = {
getTask (): Task {
Expand Down Expand Up @@ -43,7 +43,7 @@ export const lusCovid: TaskProvider = {
}
},

async getModel (): Promise<tf.LayersModel> {
async getModel (): Promise<training.model.Model> {
const imageHeight = 100
const imageWidth = 100
const imageChannels = 3
Expand Down Expand Up @@ -90,6 +90,6 @@ export const lusCovid: TaskProvider = {
activation: 'softmax'
}))

return model
return new training.model.TFJSModel(this.getTask(), model)
}
}
6 changes: 3 additions & 3 deletions discojs/discojs-core/src/default_tasks/mnist.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { tf, Task, TaskProvider } from '..'
import { tf, training, Task, TaskProvider } from '..'

export const mnist: TaskProvider = {
getTask (): Task {
Expand Down Expand Up @@ -42,7 +42,7 @@ export const mnist: TaskProvider = {
}
},

async getModel (): Promise<tf.LayersModel> {
async getModel (): Promise<training.model.Model> {
const model = tf.sequential()

model.add(
Expand All @@ -65,6 +65,6 @@ export const mnist: TaskProvider = {
model.add(tf.layers.dense({ units: 64, activation: 'relu' }))
model.add(tf.layers.dense({ units: 10, activation: 'softmax' }))

return model
return new training.model.TFJSModel(this.getTask(), model)
}
}
4 changes: 2 additions & 2 deletions discojs/discojs-core/src/default_tasks/simple_face.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { tf, Task, data, TaskProvider } from '..'
import { data, training, Task, TaskProvider } from '..'

export const simpleFace: TaskProvider = {
getTask (): Task {
Expand Down Expand Up @@ -41,7 +41,7 @@ export const simpleFace: TaskProvider = {
}
},

async getModel (): Promise<tf.LayersModel> {
async getModel (): Promise<training.model.Model> {
throw new Error('Not implemented')
}
}
7 changes: 3 additions & 4 deletions discojs/discojs-core/src/default_tasks/skin_mnist.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import type { Task, TaskProvider } from '..'
import { tf, data } from '..'
import { tf, data, training, Task, TaskProvider } from '..'

export const skinMnist: TaskProvider = {
getTask (): Task {
Expand Down Expand Up @@ -51,7 +50,7 @@ export const skinMnist: TaskProvider = {
}
},

async getModel (): Promise<tf.LayersModel> {
async getModel (): Promise<training.model.Model> {
const numClasses = 7
const size = 28

Expand Down Expand Up @@ -96,6 +95,6 @@ export const skinMnist: TaskProvider = {
model.add(tf.layers.dense({ units: 32 }))
model.add(tf.layers.dense({ units: numClasses, activation: 'softmax' }))

return model
return new training.model.TFJSModel(this.getTask(), model)
}
}
6 changes: 3 additions & 3 deletions discojs/discojs-core/src/default_tasks/titanic.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { tf, Task, TaskProvider } from '..'
import { tf, training, Task, TaskProvider } from '..'

export const titanic: TaskProvider = {
getTask (): Task {
Expand Down Expand Up @@ -73,7 +73,7 @@ export const titanic: TaskProvider = {
}
},

async getModel (): Promise<tf.LayersModel> {
async getModel (): Promise<training.model.Model> {
const model = tf.sequential()

model.add(
Expand All @@ -88,6 +88,6 @@ export const titanic: TaskProvider = {
model.add(tf.layers.dense({ units: 32, activation: 'relu' }))
model.add(tf.layers.dense({ units: 1, activation: 'sigmoid' }))

return model
return new training.model.TFJSModel(this.getTask(), model)
}
}
2 changes: 1 addition & 1 deletion discojs/discojs-core/src/index.browser.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ export { WeightsContainer, aggregation } from './weights'
export { AsyncInformant } from './async_informant'
export { Logger, ConsoleLogger, TrainerLog } from './logging'
export { Memory, ModelType, ModelInfo, Path, ModelSource, Empty as EmptyMemory } from './memory'
export { Disco, TrainingSchemes, TrainingFunction, fitModelFunctions } from './training'
export { Disco, TrainingSchemes } from './training'
export { Validator } from './validation'

export * from './task'
Expand Down
2 changes: 1 addition & 1 deletion discojs/discojs-core/src/index.node.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ export { WeightsContainer, aggregation } from './weights'
export { AsyncInformant } from './async_informant'
export { Logger, ConsoleLogger, TrainerLog } from './logging'
export { Memory, ModelType, ModelInfo, Path, ModelSource, Empty as EmptyMemory } from './memory'
export { Disco, TrainingSchemes, TrainingFunction, fitModelFunctions } from './training'
export { Disco, TrainingSchemes } from './training'
export { Validator } from './validation'

export * from './task'
Expand Down
4 changes: 2 additions & 2 deletions discojs/discojs-core/src/task/task_provider.ts
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
import { tf, Task } from '..'
import { Task, training } from '..'

export interface TaskProvider {
getTask: () => Task
getModel: () => Promise<tf.LayersModel>
getModel: () => Promise<training.model.Model>
}

export function isTaskProvider (obj: any): obj is TaskProvider {
Expand Down
11 changes: 8 additions & 3 deletions discojs/discojs-core/src/task/training_information.ts
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,8 @@ export function isTrainingInformation (raw: unknown): raw is TrainingInformation
'LABEL_LIST' |
'noiseScale' |
'clippingRadius'|
'aggregator'
'aggregator' |
'vocabSize'

const {
dataType,
Expand All @@ -57,7 +58,8 @@ export function isTrainingInformation (raw: unknown): raw is TrainingInformation
LABEL_LIST,
noiseScale,
clippingRadius,
aggregator
aggregator,
vocabSize
} = raw as Record<Fields, unknown | undefined>

if (
Expand All @@ -74,7 +76,8 @@ export function isTrainingInformation (raw: unknown): raw is TrainingInformation
(decentralizedSecure !== undefined && typeof decentralizedSecure !== 'boolean') ||
(maxShareValue !== undefined && typeof maxShareValue !== 'number') ||
(minimumReadyPeers !== undefined && typeof minimumReadyPeers !== 'number') ||
(aggregator !== undefined && typeof aggregator !== 'number')
(aggregator !== undefined && typeof aggregator !== 'number') ||
(vocabSize !== undefined && typeof vocabSize !== 'string')
) {
return false
}
Expand Down Expand Up @@ -185,4 +188,6 @@ export interface TrainingInformation {
// aggregator: aggregator to be used by the server for federated learning, or by the peers for decentralized learning
// default is 'average', other options include for instance 'bandit'
aggregator?: AggregatorChoice
// TODO: for LLMs
vocabSize?: number
}
Loading