-
Notifications
You must be signed in to change notification settings - Fork 696
Add some transforms #122
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add some transforms #122
Changes from all commits
83d6a46
af2695f
bb37eca
7ee1d38
5d5289a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||
---|---|---|---|---|
|
@@ -5,6 +5,14 @@ | |||
from . import functional as F | ||||
|
||||
|
||||
def _check_audio(tensor): | ||||
if not isinstance(tensor, nn.Tensor): | ||||
raise TypeError('tensor should be a torch tensor') | ||||
if len(tensor.size()) > 2: | ||||
raise TypeError(('tensor representing audio should be at most ', | ||||
'2Dimentional')) | ||||
|
||||
|
||||
class Compose(object): | ||||
"""Composes several transforms together. | ||||
|
||||
|
@@ -444,3 +452,154 @@ def __call__(self, x_mu): | |||
|
||||
def __repr__(self): | ||||
return self.__class__.__name__ + '()' | ||||
|
||||
|
||||
class Pad(object): | ||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This class seems very similar to PadTrim ( audio/torchaudio/transforms.py Line 75 in 7ee1d38
Is there any difference? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. does not look like. They bot end up using the same |
||||
"""Pad the given tensor on all sides with specified padding fill value | ||||
|
||||
Args: | ||||
padding (int or tuple): Padding on each border. If a single int is | ||||
provided this is used to pad all borders. If tuple of length 2 | ||||
is provided this is the padding on left/right. | ||||
fill: fill value. | ||||
channels_first (bool): Channel is first and time second. Default: `True` | ||||
""" | ||||
def __init__(self, padding, fill=0, channel_first=True): | ||||
self.padding = padding | ||||
self.fill = fill | ||||
self.ch_dim = int(not channel_first) | ||||
|
||||
def __call__(self, tensor): | ||||
""" | ||||
Args: | ||||
tensor (Tensor): Audio of size (Samples x Channels) or (C x S) | ||||
|
||||
Returns: | ||||
tensor (Tensor): A tensor padded right and/or left with fill value | ||||
""" | ||||
if self.ch_dim == 1: | ||||
tensor = tensor.transpose(0, 1) | ||||
|
||||
tensor = torch.nn.ConstantPad1d(self.padding, self.fill)(tensor) | ||||
|
||||
if self.ch_dim == 1: | ||||
tensor = tensor.transpose(0, 1) | ||||
|
||||
return tensor | ||||
|
||||
|
||||
class RandomCrop(object): | ||||
"""Randomly crops a piece of tensor | ||||
|
||||
Args: | ||||
size (int): size of the crop to retrieve | ||||
channels_first (bool): Channel is first and time second. Default: `True` | ||||
""" | ||||
def __init__(self, size, channel_first=True): | ||||
self.size = size | ||||
self.ch_dim = int(not channel_first) | ||||
|
||||
def __call__(self, tensor): | ||||
""" | ||||
Args: | ||||
tensor (Tensor): Audio of size (SxC) or (CxS) | ||||
|
||||
Returns: | ||||
Tensor: A tensor randomly steched by a factor on the sample axis. | ||||
""" | ||||
return F.random_crop(tensor, self.size, self.ch_dim) | ||||
|
||||
|
||||
class RandomStretch(object): | ||||
"""Randomly stretch or shrink audio | ||||
|
||||
Args: | ||||
max_factor (float): Stretching factor of the audio | ||||
interpolate (str): mode of interpolation for the generated audio | ||||
points (linear or nearest) | ||||
channels_first (bool): Channel is first and time second. Default: `True` | ||||
""" | ||||
def __init__(self, max_factor=1.3, interpolate='Linear', channel_first=True): | ||||
self.max_factor = max_factor | ||||
self.interpolate = interpolate | ||||
self.ch_dim = int(not channel_first) | ||||
|
||||
def __call__(self, tensor): | ||||
""" | ||||
Args: | ||||
tensor (Tensor): Audio of size (Samples x Channels) or (C x S) | ||||
|
||||
Returns: | ||||
Tensor: A tensor randomly stetched by a factor on the sample axis. | ||||
""" | ||||
return F.random_stretch(tensor, | ||||
self.max_factor, | ||||
self.interpolate, | ||||
self.ch_dim) | ||||
|
||||
|
||||
class RandomOpposite(object): | ||||
"""Randomly retrive the opposite values of $tensor$ | ||||
|
||||
Args: | ||||
tensor (Tensor): signal tensor with shape (size, channels) | ||||
probability (float): Probability for a flip to happen. | ||||
|
||||
""" | ||||
def __init__(self, probability=0.5): | ||||
self.probability = probability | ||||
|
||||
def __call__(self, tensor): | ||||
return F.random_opposite(tensor, self.probability) | ||||
|
||||
|
||||
class AddChannelDimension(object): | ||||
"""Add a channel dimension if missing. This result in a two dimensional | ||||
Tensor | ||||
|
||||
Args: | ||||
tensor (Tensor): signal tensor with shape (size, channels) | ||||
channels_first (bool): Channel is first and time second. Default: `True` | ||||
""" | ||||
def __init__(self, channel_first): | ||||
self.ch_dim = int(not channel_first) | ||||
|
||||
def __call__(self, tensor): | ||||
if len(tensor.shape) == 1: | ||||
tensor = tensor.unsqueeze(self.ch_dim) | ||||
return tensor | ||||
|
||||
|
||||
class AddDimension(object): | ||||
"""Add a dimension to a Tensor to fit desired model. | ||||
eg: add dimension to fit 2D and 3D convolutions. | ||||
Tensor | ||||
|
||||
Args: | ||||
tensor (Tensor): signal tensor with shape (size, channels) | ||||
dimension (int): The dimesion to create | ||||
""" | ||||
def __init__(self, dimension): | ||||
self.dim = int(dimension) | ||||
|
||||
def __call__(self, tensor): | ||||
tensor = tensor.unsqueeze(self.dim) | ||||
return tensor | ||||
|
||||
|
||||
class ToTensor(object): | ||||
"""Convert a ``numpy.ndarray`` to tensor. | ||||
""" | ||||
|
||||
def __call__(self, array): | ||||
""" | ||||
Args: | ||||
array: a numpy array or array to be converted | ||||
|
||||
Returns: | ||||
Tensor: Converted sound. | ||||
""" | ||||
return torch.tensor(array) | ||||
|
||||
def __repr__(self): | ||||
return self.__class__.__name__ + '()' |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is this similar to
Resample
orphase_vocoder
?