Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Turn on using tasks in dataloader #1530

Open
wants to merge 27 commits into
base: master
Choose a base branch
from
Open
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
add f for async preproc
  • Loading branch information
Dhairya Gandhi committed Mar 9, 2021
commit aea5b40bfc68ebd2896d5835b1cd7bb7e31dcf7c
40 changes: 21 additions & 19 deletions src/data/dataloader.jl
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,8 @@ Usage example:
@assert size(datum.labels) == (2,)
end
"""
struct DataLoader{F, T, D,S,L}
struct DataLoader{P, F, T, D,S,L}
f::P
channel::F
# task::T
data::D
Expand All @@ -66,7 +67,7 @@ struct DataLoader{F, T, D,S,L}
end

# X :: tuple of args to loss
function DataLoader(
function DataLoader(f,
args::Tuple;
batchsize = 1, shuffle = false,
partial = true, batchdim = ndims,
Expand All @@ -83,16 +84,16 @@ function DataLoader(
for i in iterator
fullbatch = length(i) == batchsize
if fullbatch
put!(ch, getobs(fs, i, batchdim))
put!(ch, f(getobs(fs, i, batchdim)))
elseif partial
put!(ch, getobs(fs, i, batchdim))
put!(ch, f(getobs(fs, i, batchdim)))
close(ch)
else
close(ch)
end
end)
schedule(t)
DataLoader(ch, args, iterator, batchsize, batchdim, partial)
DataLoader(f, ch, args, iterator, batchsize, batchdim, partial)
end

function validate_kwargs(shuffle, dataset_size, batchsize)
Expand All @@ -111,7 +112,7 @@ end
# batchdim is a function to suggest which dim is the actual
# batch dimension - saying `4` isn't helpful if you have a
# 4 dimensional feature array but a matrix label set
function DataLoader(
function DataLoader(f,
args::NTuple{N,AbstractArray};
batchsize = 1, shuffle = true,
partial = true, batchdim = ndims,
Expand All @@ -131,9 +132,9 @@ function DataLoader(
# sleep(1)
fullbatch = length(i) == batchsize
if fullbatch
put!(ch, getobs(fs, i, batchdim))
put!(ch, f(getobs(fs, i, batchdim)))
elseif partial
put!(ch, getobs(fs, i, batchdim))
put!(ch, f(getobs(fs, i, batchdim)))
close(ch)
else
close(ch)
Expand All @@ -142,19 +143,20 @@ function DataLoader(
end)
schedule(t)
# partial = false -> drop the last iteration of iterator
DataLoader(ch, t, fs, iterator, batchsize, batchdim, partial)
DataLoader(f, ch, t, fs, iterator, batchsize, batchdim, partial)
end
DataLoader(args::NTuple{N,AbstractArray}; kwargs...) = DataLoader(x -> identity.(x), args; kwargs...)

# function DataLoader(args::Tuple;
# batchsize = 1, shuffle = true,
# partial = true, batchdim = ndims,
# epochs = 1) where N
# DataLoader(args,
# batchsize = batchsize,
# shuffle = shuffle,
# partial = partial,
# batchdim = batchdim)
# end
function DataLoader(args;
batchsize = 1, shuffle = true,
partial = true, batchdim = ndims,
epochs = 1) where N
DataLoader(x -> identity.(x), args,
batchsize = batchsize,
shuffle = shuffle,
partial = partial,
batchdim = batchdim)
end

function getobs(data::AbstractArray, ix, bd)
getindex(data,
Expand Down