Skip to content

Commit cbd3d9b

Browse files
committed
Merge branch 'main' of github.com:pytorch/vision into stanford_cars
2 parents af25d72 + 5e56575 commit cbd3d9b

37 files changed

+1476
-443
lines changed

CMakeLists.txt

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ set(CMAKE_CXX_STANDARD 14)
44
file(STRINGS version.txt TORCHVISION_VERSION)
55

66
option(WITH_CUDA "Enable CUDA support" OFF)
7+
option(USE_PYTHON "Link to Python when building" OFF)
78

89
if(WITH_CUDA)
910
enable_language(CUDA)
@@ -17,7 +18,10 @@ if(WITH_CUDA)
1718
endif()
1819
endif()
1920

20-
find_package(Python3 COMPONENTS Development)
21+
if (USE_PYTHON)
22+
add_definitions(-DUSE_PYTHON)
23+
find_package(Python3 REQUIRED COMPONENTS Development)
24+
endif()
2125

2226
find_package(Torch REQUIRED)
2327
find_package(PNG REQUIRED)
@@ -76,7 +80,12 @@ FOREACH(DIR ${ALLOW_LISTED})
7680
ENDFOREACH()
7781

7882
add_library(${PROJECT_NAME} SHARED ${ALL_SOURCES})
79-
target_link_libraries(${PROJECT_NAME} PRIVATE ${TORCH_LIBRARIES} ${PNG_LIBRARY} ${JPEG_LIBRARIES} Python3::Python)
83+
target_link_libraries(${PROJECT_NAME} PRIVATE ${TORCH_LIBRARIES} ${PNG_LIBRARY} ${JPEG_LIBRARIES})
84+
85+
if (USE_PYTHON)
86+
target_link_libraries(${PROJECT_NAME} PRIVATE Python3::Python)
87+
endif()
88+
8089
set_target_properties(${PROJECT_NAME} PROPERTIES
8190
EXPORT_NAME TorchVision
8291
INSTALL_RPATH ${TORCH_INSTALL_PREFIX}/lib)

README.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,10 @@ so make sure that it is also available to cmake via the ``CMAKE_PREFIX_PATH``.
157157

158158
For an example setup, take a look at ``examples/cpp/hello_world``.
159159

160+
Python linking is disabled by default when compiling TorchVision with CMake, this allows you to run models without any Python
161+
dependency. In some special cases where TorchVision's operators are used from Python code, you may need to link to Python. This
162+
can be done by passing ``-DUSE_PYTHON=on`` to CMake.
163+
160164
TorchVision Operators
161165
---------------------
162166
In order to get the torchvision operators registered with torch (eg. for the JIT), all you need to do is to ensure that you

cmake/TorchVisionConfig.cmake.in

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,10 @@ include("${CMAKE_CURRENT_LIST_DIR}/${PN}Targets.cmake")
2828
if(NOT TARGET torch_library)
2929
find_package(Torch REQUIRED)
3030
endif()
31-
if(NOT TARGET Python3::Python)
32-
find_package(Python3 COMPONENTS Development)
31+
if (@USE_PYTHON@)
32+
if(NOT TARGET Python3::Python)
33+
find_package(Python3 COMPONENTS Development)
34+
endif()
3335
endif()
3436

3537
set_target_properties(TorchVision::TorchVision PROPERTIES INTERFACE_INCLUDE_DIRECTORIES "${${PN}_INCLUDE_DIR}" INTERFACE_LINK_LIBRARIES "torch;Python3::Python" )

docs/source/datasets.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,16 +38,19 @@ You can also create your own datasets using the provided :ref:`base classes <bas
3838
Cityscapes
3939
CocoCaptions
4040
CocoDetection
41+
Country211
4142
DTD
4243
EMNIST
4344
FakeData
4445
FashionMNIST
4546
FER2013
4647
Flickr8k
4748
Flickr30k
49+
Flowers102
4850
FlyingChairs
4951
FlyingThings3D
5052
Food101
53+
FGVCAircraft
5154
GTSRB
5255
HD1K
5356
HMDB51

docs/source/models.rst

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ architectures for image classification:
4040
- `MNASNet`_
4141
- `EfficientNet`_
4242
- `RegNet`_
43+
- `VisionTransformer`_
4344

4445
You can construct a model with random weights by calling its constructor:
4546

@@ -75,13 +76,18 @@ You can construct a model with random weights by calling its constructor:
7576
regnet_y_8gf = models.regnet_y_8gf()
7677
regnet_y_16gf = models.regnet_y_16gf()
7778
regnet_y_32gf = models.regnet_y_32gf()
79+
regnet_y_128gf = models.regnet_y_128gf()
7880
regnet_x_400mf = models.regnet_x_400mf()
7981
regnet_x_800mf = models.regnet_x_800mf()
8082
regnet_x_1_6gf = models.regnet_x_1_6gf()
8183
regnet_x_3_2gf = models.regnet_x_3_2gf()
8284
regnet_x_8gf = models.regnet_x_8gf()
8385
regnet_x_16gf = models.regnet_x_16gf()
8486
regnet_x_32gf = models.regnet_x_32gf()
87+
vit_b_16 = models.vit_b_16()
88+
vit_b_32 = models.vit_b_32()
89+
vit_l_16 = models.vit_l_16()
90+
vit_l_32 = models.vit_l_32()
8591
8692
We provide pre-trained models, using the PyTorch :mod:`torch.utils.model_zoo`.
8793
These can be constructed by passing ``pretrained=True``:
@@ -125,6 +131,10 @@ These can be constructed by passing ``pretrained=True``:
125131
regnet_x_8gf = models.regnet_x_8gf(pretrained=True)
126132
regnet_x_16gf = models.regnet_x_16gf(pretrainedTrue)
127133
regnet_x_32gf = models.regnet_x_32gf(pretrained=True)
134+
vit_b_16 = models.vit_b_16(pretrained=True)
135+
vit_b_32 = models.vit_b_32(pretrained=True)
136+
vit_l_16 = models.vit_l_16(pretrained=True)
137+
vit_l_32 = models.vit_l_32(pretrained=True)
128138
129139
Instancing a pre-trained model will download its weights to a cache directory.
130140
This directory can be set using the `TORCH_HOME` environment variable. See
@@ -233,6 +243,10 @@ regnet_y_3_2gf 78.948 94.576
233243
regnet_y_8gf 80.032 95.048
234244
regnet_y_16gf 80.424 95.240
235245
regnet_y_32gf 80.878 95.340
246+
vit_b_16 81.072 95.318
247+
vit_b_32 75.912 92.466
248+
vit_l_16 79.662 94.638
249+
vit_l_32 76.972 93.070
236250
================================ ============= =============
237251

238252

@@ -250,6 +264,7 @@ regnet_y_32gf 80.878 95.340
250264
.. _MNASNet: https://arxiv.org/abs/1807.11626
251265
.. _EfficientNet: https://arxiv.org/abs/1905.11946
252266
.. _RegNet: https://arxiv.org/abs/2003.13678
267+
.. _VisionTransformer: https://arxiv.org/abs/2010.11929
253268

254269
.. currentmodule:: torchvision.models
255270

@@ -425,6 +440,7 @@ RegNet
425440
regnet_y_8gf
426441
regnet_y_16gf
427442
regnet_y_32gf
443+
regnet_y_128gf
428444
regnet_x_400mf
429445
regnet_x_800mf
430446
regnet_x_1_6gf
@@ -433,6 +449,18 @@ RegNet
433449
regnet_x_16gf
434450
regnet_x_32gf
435451

452+
VisionTransformer
453+
-----------------
454+
455+
.. autosummary::
456+
:toctree: generated/
457+
:template: function.rst
458+
459+
vit_b_16
460+
vit_b_32
461+
vit_l_16
462+
vit_l_32
463+
436464
Quantized Models
437465
----------------
438466

examples/cpp/hello_world/CMakeLists.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,10 @@ project(hello-world)
66
# so there is no need to also add `find_package(Torch)` here.
77
find_package(TorchVision REQUIRED)
88

9+
# This due to LibTorch's version is the one included in the Python
10+
# package that links to Python.
11+
find_package(Python3 COMPONENTS Development)
12+
913
add_executable(hello-world main.cpp)
1014

1115
# We now need to link the TorchVision library to our executable.

hubconf.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
# Optional list of dependencies required by the package
22
dependencies = ["torch"]
33

4-
# classification
54
from torchvision.models.alexnet import alexnet
65
from torchvision.models.densenet import densenet121, densenet169, densenet201, densenet161
76
from torchvision.models.efficientnet import (
@@ -28,6 +27,7 @@
2827
regnet_y_8gf,
2928
regnet_y_16gf,
3029
regnet_y_32gf,
30+
regnet_y_128gf,
3131
regnet_x_400mf,
3232
regnet_x_800mf,
3333
regnet_x_1_6gf,
@@ -47,8 +47,6 @@
4747
wide_resnet50_2,
4848
wide_resnet101_2,
4949
)
50-
51-
# segmentation
5250
from torchvision.models.segmentation import (
5351
fcn_resnet50,
5452
fcn_resnet101,
@@ -60,3 +58,9 @@
6058
from torchvision.models.shufflenetv2 import shufflenet_v2_x0_5, shufflenet_v2_x1_0
6159
from torchvision.models.squeezenet import squeezenet1_0, squeezenet1_1
6260
from torchvision.models.vgg import vgg11, vgg13, vgg16, vgg19, vgg11_bn, vgg13_bn, vgg16_bn, vgg19_bn
61+
from torchvision.models.vision_transformer import (
62+
vit_b_16,
63+
vit_b_32,
64+
vit_l_16,
65+
vit_l_32,
66+
)

setup.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -201,7 +201,7 @@ def get_extensions():
201201

202202
if sys.platform == "win32":
203203
define_macros += [("torchvision_EXPORTS", None)]
204-
204+
define_macros += [("USE_PYTHON", None)]
205205
extra_compile_args["cxx"].append("/MP")
206206

207207
debug_mode = os.getenv("DEBUG", "0") == "1"
@@ -254,6 +254,9 @@ def get_extensions():
254254
image_library = []
255255
image_link_flags = []
256256

257+
if sys.platform == "win32":
258+
image_macros += [("USE_PYTHON", None)]
259+
257260
# Locating libPNG
258261
libpng = distutils.spawn.find_executable("libpng-config")
259262
pngfix = distutils.spawn.find_executable("pngfix")
Binary file not shown.

test/test_datasets.py

Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2206,6 +2206,57 @@ def inject_fake_data(self, tmpdir: str, config):
22062206
return len(sampled_classes * n_samples_per_class)
22072207

22082208

2209+
class FGVCAircraftTestCase(datasets_utils.ImageDatasetTestCase):
2210+
DATASET_CLASS = datasets.FGVCAircraft
2211+
ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(
2212+
split=("train", "val", "trainval", "test"), annotation_level=("variant", "family", "manufacturer")
2213+
)
2214+
2215+
def inject_fake_data(self, tmpdir: str, config):
2216+
split = config["split"]
2217+
annotation_level = config["annotation_level"]
2218+
annotation_level_to_file = {
2219+
"variant": "variants.txt",
2220+
"family": "families.txt",
2221+
"manufacturer": "manufacturers.txt",
2222+
}
2223+
2224+
root_folder = pathlib.Path(tmpdir) / "fgvc-aircraft-2013b"
2225+
data_folder = root_folder / "data"
2226+
2227+
classes = ["707-320", "Hawk T1", "Tornado"]
2228+
num_images_per_class = 5
2229+
2230+
datasets_utils.create_image_folder(
2231+
data_folder,
2232+
"images",
2233+
file_name_fn=lambda idx: f"{idx}.jpg",
2234+
num_examples=num_images_per_class * len(classes),
2235+
)
2236+
2237+
annotation_file = data_folder / annotation_level_to_file[annotation_level]
2238+
with open(annotation_file, "w") as file:
2239+
file.write("\n".join(classes))
2240+
2241+
num_samples_per_class = 4 if split == "trainval" else 2
2242+
images_classes = []
2243+
for i in range(len(classes)):
2244+
images_classes.extend(
2245+
[
2246+
f"{idx} {classes[i]}"
2247+
for idx in random.sample(
2248+
range(i * num_images_per_class, (i + 1) * num_images_per_class), num_samples_per_class
2249+
)
2250+
]
2251+
)
2252+
2253+
images_annotation_file = data_folder / f"images_{annotation_level}_{split}.txt"
2254+
with open(images_annotation_file, "w") as file:
2255+
file.write("\n".join(images_classes))
2256+
2257+
return len(classes * num_samples_per_class)
2258+
2259+
22092260
class SUN397TestCase(datasets_utils.ImageDatasetTestCase):
22102261
DATASET_CLASS = datasets.SUN397
22112262

@@ -2517,5 +2568,68 @@ def _inject_fake_data(self, tmpdir, config):
25172568
return num_examples
25182569

25192570

2571+
class Country211TestCase(datasets_utils.ImageDatasetTestCase):
2572+
DATASET_CLASS = datasets.Country211
2573+
2574+
ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(split=("train", "valid", "test"))
2575+
2576+
def inject_fake_data(self, tmpdir: str, config):
2577+
split_folder = pathlib.Path(tmpdir) / "country211" / config["split"]
2578+
split_folder.mkdir(parents=True, exist_ok=True)
2579+
2580+
num_examples = {
2581+
"train": 3,
2582+
"valid": 4,
2583+
"test": 5,
2584+
}[config["split"]]
2585+
2586+
classes = ("AD", "BS", "GR")
2587+
for cls in classes:
2588+
datasets_utils.create_image_folder(
2589+
split_folder,
2590+
name=cls,
2591+
file_name_fn=lambda idx: f"{idx}.jpg",
2592+
num_examples=num_examples,
2593+
)
2594+
2595+
return num_examples * len(classes)
2596+
2597+
2598+
class Flowers102TestCase(datasets_utils.ImageDatasetTestCase):
2599+
DATASET_CLASS = datasets.Flowers102
2600+
2601+
ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(split=("train", "val", "test"))
2602+
REQUIRED_PACKAGES = ("scipy",)
2603+
2604+
def inject_fake_data(self, tmpdir: str, config):
2605+
base_folder = pathlib.Path(tmpdir) / "flowers-102"
2606+
2607+
num_classes = 3
2608+
num_images_per_split = dict(train=5, val=4, test=3)
2609+
num_images_total = sum(num_images_per_split.values())
2610+
datasets_utils.create_image_folder(
2611+
base_folder,
2612+
"jpg",
2613+
file_name_fn=lambda idx: f"image_{idx + 1:05d}.jpg",
2614+
num_examples=num_images_total,
2615+
)
2616+
2617+
label_dict = dict(
2618+
labels=np.random.randint(1, num_classes + 1, size=(1, num_images_total), dtype=np.uint8),
2619+
)
2620+
datasets_utils.lazy_importer.scipy.io.savemat(str(base_folder / "imagelabels.mat"), label_dict)
2621+
2622+
setid_mat = np.arange(1, num_images_total + 1, dtype=np.uint16)
2623+
np.random.shuffle(setid_mat)
2624+
setid_dict = dict(
2625+
trnid=setid_mat[: num_images_per_split["train"]].reshape(1, -1),
2626+
valid=setid_mat[num_images_per_split["train"] : -num_images_per_split["test"]].reshape(1, -1),
2627+
tstid=setid_mat[-num_images_per_split["test"] :].reshape(1, -1),
2628+
)
2629+
datasets_utils.lazy_importer.scipy.io.savemat(str(base_folder / "setid.mat"), setid_dict)
2630+
2631+
return num_images_per_split[config["split"]]
2632+
2633+
25202634
if __name__ == "__main__":
25212635
unittest.main()

0 commit comments

Comments
 (0)