Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

【Hackathon 5th No.31】为 Paddle 新增 column_stack / row_stack / dstack / hstack / vstack API -part #59127

Merged
merged 20 commits into from
Dec 12, 2023
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions python/paddle/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -228,6 +228,11 @@
squeeze,
squeeze_,
stack,
hstack,
vstack,
dstack,
column_stack,
row_stack,
strided_slice,
unique,
unique_consecutive,
Expand Down Expand Up @@ -833,6 +838,11 @@
'disable_signal_handler',
'expand_as',
'stack',
'hstack',
'vstack',
'dstack',
'column_stack',
'row_stack',
'sqrt',
'randperm',
'linspace',
Expand Down
5 changes: 5 additions & 0 deletions python/paddle/tensor/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,11 @@
from .manipulation import squeeze # noqa: F401
from .manipulation import squeeze_ # noqa: F401
from .manipulation import stack # noqa: F401
from .manipulation import hstack # noqa: F401
from .manipulation import vstack # noqa: F401
from .manipulation import dstack # noqa: F401
from .manipulation import column_stack # noqa: F401
from .manipulation import row_stack # noqa: F401
from .manipulation import strided_slice # noqa: F401
from .manipulation import unique # noqa: F401
from .manipulation import unique_consecutive # noqa: F401
Expand Down
295 changes: 295 additions & 0 deletions python/paddle/tensor/manipulation.py
Original file line number Diff line number Diff line change
Expand Up @@ -2033,6 +2033,301 @@ def stack(x, axis=0, name=None):
return out


def hstack(x, name=None):
"""
Stacks all the input tensors ``x`` along horizontal axis.
All tensors must be of the same dtype.

Args:
x (list[Tensor]|tuple[Tensor]): Input ``x`` can be a ``list`` or ``tuple`` of tensors, the Tensors in ``x`` must be of the same
shape and dtype. Supported data types: ``float16``, ``float32``, ``float64``, ``int8``, ``int32``, ``int64`` or ``bfloat16``.
name (str, optional): Name for the operation (optional, default is None). For more information, please refer to :ref:`api_guide_Name`.

Returns:
Tensor, The stacked tensor with same data type as input.

Examples:
.. code-block:: python

>>> import paddle

>>> # hstack with 0-D tensors
>>> x1 = paddle.to_tensor(1.0)
>>> x2 = paddle.to_tensor(2.0)
>>> out = paddle.hstack((x1, x2))
>>> print(out)
Tensor(shape=[2], dtype=float32, place=Place(cpu), stop_gradient=True,
[1., 2.])

>>> # hstack with 1-D tensors
>>> x1 = paddle.to_tensor([1.0, 2.0])
>>> x2 = paddle.to_tensor([3.0, 4.0, 5.0])
>>> out = paddle.hstack((x1, x2))
>>> print(out)
Tensor(shape=[5], dtype=float32, place=Place(cpu), stop_gradient=True,
[1., 2., 3., 4., 5.])

>>> # hstack mix with 0-D & 1-D tensors
>>> x1 = paddle.to_tensor(1.0)
>>> x2 = paddle.to_tensor([3.0, 4.0, 5.0])
>>> out = paddle.hstack((x1, x2))
>>> print(out)
Tensor(shape=[4], dtype=float32, place=Place(cpu), stop_gradient=True,
[1., 3., 4., 5.])

>>> # hstack with 2-D tensors
>>> x1 = paddle.to_tensor([[1.0, 2.0]])
>>> x2 = paddle.to_tensor([[3.0, 4.0, 5.0]])
>>> out = paddle.hstack((x1, x2))
>>> print(out)
Tensor(shape=[1, 5], dtype=float32, place=Place(cpu), stop_gradient=True,
[[1., 2., 3., 4., 5.]])

"""
arrays = paddle.atleast_1d(*x)
if not isinstance(arrays, list):
arrays = [arrays]

if arrays and arrays[0].ndim == 1:
return paddle.concat(arrays, axis=0, name=name)
else:
return paddle.concat(arrays, axis=1, name=name)


def vstack(x, name=None):
"""
Stacks all the input tensors ``x`` along vertical axis.
All tensors must be of the same dtype.

Args:
x (list[Tensor]|tuple[Tensor]): Input ``x`` can be a ``list`` or ``tuple`` of tensors, the Tensors in ``x`` must be of the same
shape and dtype. Supported data types: ``float16``, ``float32``, ``float64``, ``int8``, ``int32``, ``int64`` or ``bfloat16``.
name (str, optional): Name for the operation (optional, default is None). For more information, please refer to :ref:`api_guide_Name`.

Returns:
Tensor, The stacked tensor with same data type as input.

Examples:
.. code-block:: python

>>> import paddle

>>> # vstack with 0-D tensors
>>> x1 = paddle.to_tensor(1.0)
>>> x2 = paddle.to_tensor(2.0)
>>> out = paddle.vstack((x1, x2))
>>> print(out)
Tensor(shape=[2, 1], dtype=float32, place=Place(cpu), stop_gradient=True,
[[1.],
[2.]])

>>> # vstack with 1-D tensors
>>> x1 = paddle.to_tensor([1.0, 2.0, 3.0])
>>> x2 = paddle.to_tensor([3.0, 4.0, 5.0])
>>> out = paddle.vstack((x1, x2))
>>> print(out)
Tensor(shape=[2, 3], dtype=float32, place=Place(cpu), stop_gradient=True,
[[1., 2., 3.],
[3., 4., 5.]])

>>> # vstack mix with 1-D & 2-D tensors
>>> x1 = paddle.to_tensor([1.0, 2.0, 3.0])
>>> x2 = paddle.to_tensor([[3.0, 4.0, 5.0]])
>>> out = paddle.vstack((x1, x2))
>>> print(out)
Tensor(shape=[2, 3], dtype=float32, place=Place(cpu), stop_gradient=True,
[[1., 2., 3.],
[3., 4., 5.]])

>>> # vstack with 2-D tensors
>>> x1 = paddle.to_tensor([[1.0, 2.0, 3.0]])
>>> x2 = paddle.to_tensor([[3.0, 4.0, 5.0]])
>>> out = paddle.vstack((x1, x2))
>>> print(out)
Tensor(shape=[2, 3], dtype=float32, place=Place(cpu), stop_gradient=True,
[[1., 2., 3.],
[3., 4., 5.]])

"""
arrays = paddle.atleast_2d(*x)
if not isinstance(arrays, list):
arrays = [arrays]

return paddle.concat(arrays, axis=0, name=name)


def dstack(x, name=None):
"""
Stacks all the input tensors ``x`` along depth axis.
All tensors must be of the same dtype.

Args:
x (list[Tensor]|tuple[Tensor]): Input ``x`` can be a ``list`` or ``tuple`` of tensors, the Tensors in ``x`` must be of the same
shape and dtype. Supported data types: ``float16``, ``float32``, ``float64``, ``int8``, ``int32``, ``int64`` or ``bfloat16``.
name (str, optional): Name for the operation (optional, default is None). For more information, please refer to :ref:`api_guide_Name`.

Returns:
Tensor, The stacked tensor with same data type as input.

Examples:
.. code-block:: python

>>> import paddle

>>> # dstack with 0-D tensors
>>> x1 = paddle.to_tensor(1.0)
>>> x2 = paddle.to_tensor(2.0)
>>> out = paddle.dstack((x1, x2))
>>> print(out)
Tensor(shape=[1, 1, 2], dtype=float32, place=Place(cpu), stop_gradient=True,
[[[1., 2.]]])

>>> # dstack with 1-D tensors
>>> x1 = paddle.to_tensor([1.0, 2.0, 3.0])
>>> x2 = paddle.to_tensor([3.0, 4.0, 5.0])
>>> out = paddle.dstack((x1, x2))
>>> print(out)
Tensor(shape=[1, 3, 2], dtype=float32, place=Place(cpu), stop_gradient=True,
[[[1., 3.],
[2., 4.],
[3., 5.]]])

>>> # dstack with 3-D tensors
>>> x1 = paddle.to_tensor([[[1.0, 2.0], [3.0, 4.0]]])
>>> x2 = paddle.to_tensor([[[3.0, 4.0], [5.0, 6.0]]])
>>> out = paddle.dstack((x1, x2))
>>> print(out)
Tensor(shape=[1, 2, 4], dtype=float32, place=Place(cpu), stop_gradient=True,
[[[1., 2., 3., 4.],
[3., 4., 5., 6.]]])

"""
arrays = paddle.atleast_3d(*x)
if not isinstance(arrays, list):
arrays = [arrays]

return paddle.concat(arrays, axis=2, name=name)


def column_stack(x, name=None):
"""
Stacks all the input tensors ``x`` along horizontal axis. Each tensor in ``x`` will be first reshaped into ``(tensor.numel(), 1)``
if ``tensor.ndim < 2`` before being stacked.
All tensors must be of the same dtype.

Args:
x (list[Tensor]|tuple[Tensor]): Input ``x`` can be a ``list`` or ``tuple`` of tensors, the Tensors in ``x`` must be of the same
shape and dtype. Supported data types: ``float16``, ``float32``, ``float64``, ``int32``, ``int64`` or ``bfloat16``.
name (str, optional): Name for the operation (optional, default is None). For more information, please refer to :ref:`api_guide_Name`.

Returns:
Tensor, The stacked tensor with same data type as input.

Examples:
.. code-block:: python

>>> import paddle

>>> # column_stack with 0-D tensors
>>> x1 = paddle.to_tensor(1.0)
>>> x2 = paddle.to_tensor(2.0)
>>> out = paddle.column_stack((x1, x2))
>>> print(out)
Tensor(shape=[1, 2], dtype=float32, place=Place(cpu), stop_gradient=True,
[[1., 2.]])

>>> # column_stack mix with 1-D & 2-D tensors
>>> x1 = paddle.to_tensor([[1.0], [2.0], [3.0]])
>>> x2 = paddle.to_tensor([3.0, 4.0, 5.0])
>>> out = paddle.column_stack((x1, x2))
>>> print(out)
Tensor(shape=[3, 2], dtype=float32, place=Place(cpu), stop_gradient=True,
[[1., 3.],
[2., 4.],
[3., 5.]])

>>> # column_stack with 3-D tensors
>>> x1 = paddle.to_tensor([[[1.0, 2.0], [3.0, 4.0]]])
>>> x2 = paddle.to_tensor([[[3.0, 4.0], [5.0, 6.0]]])
>>> out = paddle.column_stack((x1, x2))
>>> print(out)
Tensor(shape=[1, 4, 2], dtype=float32, place=Place(cpu), stop_gradient=True,
[[[1., 2.],
[3., 4.],
[3., 4.],
[5., 6.]]])

"""
arrays = []

for tensor in x:
if tensor.ndim < 2:
arrays.append(tensor.reshape((tensor.numel(), 1)))
else:
arrays.append(tensor)

return paddle.hstack(arrays, name=name)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

这个和torch的计算结果是一样的吗

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

借用一下 torch 官方的例子:

In [18]: import paddle

In [19]: import torch

In [20]: a = torch.tensor([1, 2, 3])
    ...: b = torch.tensor([4, 5, 6])
    ...: torch.column_stack((a, b))
Out[20]: 
tensor([[1, 4],
        [2, 5],
        [3, 6]])

In [21]: a = torch.arange(5)
    ...: b = torch.arange(10).reshape(5, 2)
    ...: torch.column_stack((a, b, b))
Out[21]: 
tensor([[0, 0, 1, 0, 1],
        [1, 2, 3, 2, 3],
        [2, 4, 5, 4, 5],
        [3, 6, 7, 6, 7],
        [4, 8, 9, 8, 9]])

In [22]: x = paddle.to_tensor([1, 2, 3])
    ...: y = paddle.to_tensor([4, 5, 6])
    ...: paddle.column_stack((x, y))
Out[22]: 
Tensor(shape=[3, 2], dtype=int64, place=Place(gpu:0), stop_gradient=True,
       [[1, 4],
        [2, 5],
        [3, 6]])

In [23]: x = paddle.arange(5)
    ...: y = paddle.arange(10).reshape((5, 2))
    ...: paddle.column_stack((x, y, y))
Out[23]: 
Tensor(shape=[5, 5], dtype=int64, place=Place(gpu:0), stop_gradient=True,
       [[0, 0, 1, 0, 1],
        [1, 2, 3, 2, 3],
        [2, 4, 5, 4, 5],
        [3, 6, 7, 6, 7],
        [4, 8, 9, 8, 9]])

应该一样吧 ~

stack 没有 atleast_xd 的那种输入问题,stack 的输入只有一个 🤗 ~

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

因为我看你这个计算逻辑和torch有点区别:

def column_stack(tensors: TensorSequenceType) -> TensorLikeType:
    aligned_tensors = tuple(
        x if x.ndim > 1 else x.reshape((x.numel(), 1)) for x in tensors
    )
    return cat(aligned_tensors, 1)

如果有不同的计算逻辑,需要说明更合理性。

Copy link
Contributor Author

@megemini megemini Nov 22, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

这里是指:

torch 的第二行 return cat(aligned_tensors, 1)

我这里用的 return paddle.hstack(arrays, name=name)

hstack 确实对于 ndim = 0 有特殊处理,但是实际上这里输入的 ndim 一定是大于 0 的,因此,与 return cat(aligned_tensors, 1) 是一样的啊 ~

那我还是改一下吧 ... ... 😅


p.s. 想起来了,当时用 hstack 而不是 concat 是因为: column_stack 和 row_stack 其实是 hstack 和 vstack 对等的实现,row_stack 用的 vstack,所以 column_stack 用的 hstack ~ 不过用 hstack 确实可能存在性能损失,已修改 ~ 👍



def row_stack(x, name=None):
"""
Alias of `paddle.vstack()`.
Stacks all the input tensors ``x`` along vertical axis.
All tensors must be of the same dtype.

Args:
x (list[Tensor]|tuple[Tensor]): Input ``x`` can be a ``list`` or ``tuple`` of tensors, the Tensors in ``x`` must be of the same
shape and dtype. Supported data types: ``float16``, ``float32``, ``float64``, ``int8``, ``int32``, ``int64`` or ``bfloat16``.
name (str, optional): Name for the operation (optional, default is None). For more information, please refer to :ref:`api_guide_Name`.

Returns:
Tensor, The stacked tensor with same data type as input.

Examples:
.. code-block:: python

>>> import paddle

>>> # row_stack with 0-D tensors
>>> x1 = paddle.to_tensor(1.0)
>>> x2 = paddle.to_tensor(2.0)
>>> out = paddle.row_stack((x1, x2))
>>> print(out)
Tensor(shape=[2, 1], dtype=float32, place=Place(cpu), stop_gradient=True,
[[1.],
[2.]])

>>> # row_stack with 1-D tensors
>>> x1 = paddle.to_tensor([1.0, 2.0, 3.0])
>>> x2 = paddle.to_tensor([3.0, 4.0, 5.0])
>>> out = paddle.row_stack((x1, x2))
>>> print(out)
Tensor(shape=[2, 3], dtype=float32, place=Place(cpu), stop_gradient=True,
[[1., 2., 3.],
[3., 4., 5.]])

>>> # row_stack mix with 1-D & 2-D tensors
>>> x1 = paddle.to_tensor([1.0, 2.0, 3.0])
>>> x2 = paddle.to_tensor([[3.0, 4.0, 5.0]])
>>> out = paddle.row_stack((x1, x2))
>>> print(out)
Tensor(shape=[2, 3], dtype=float32, place=Place(cpu), stop_gradient=True,
[[1., 2., 3.],
[3., 4., 5.]])

>>> # row_stack with 2-D tensors
>>> x1 = paddle.to_tensor([[1.0, 2.0, 3.0]])
>>> x2 = paddle.to_tensor([[3.0, 4.0, 5.0]])
>>> out = paddle.row_stack((x1, x2))
>>> print(out)
Tensor(shape=[2, 3], dtype=float32, place=Place(cpu), stop_gradient=True,
[[1., 2., 3.],
[3., 4., 5.]])

"""
return paddle.vstack(x, name=name)


def split(x, num_or_sections, axis=0, name=None):
"""
Split the input tensor into multiple sub-Tensors.
Expand Down
Loading