diff --git a/chapter_computer-vision/fcn.md b/chapter_computer-vision/fcn.md index cdbb98bb0..00b37ef84 100644 --- a/chapter_computer-vision/fcn.md +++ b/chapter_computer-vision/fcn.md @@ -300,11 +300,31 @@ net.transpose_conv.weight.set_value(W); 指定随机裁剪的输出图像的形状为$320\times 480$:高和宽都可以被$32$整除。 ```{.python .input} -#@tab all +#@tab mxnet, pytorch batch_size, crop_size = 32, (320, 480) train_iter, test_iter = d2l.load_data_voc(batch_size, crop_size) ``` +```{.python .input} +#@tab paddle +import os +def load_data_voc(batch_size, crop_size): + """加载VOC语义分割数据集 + Defined in :numref:`sec_semantic_segmentation`""" + voc_dir = d2l.download_extract('voc2012', os.path.join( + 'VOCdevkit', 'VOC2012')) + train_iter = paddle.io.DataLoader( + d2l.VOCSegDataset(True, crop_size, voc_dir), batch_size=batch_size, + shuffle=True, return_list=True, drop_last=True, num_workers=0) + test_iter = paddle.io.DataLoader( + d2l.VOCSegDataset(False, crop_size, voc_dir), batch_size=batch_size, + drop_last=True, return_list=True, num_workers=0) + return train_iter, test_iter + +batch_size, crop_size = 32, (320, 480) +train_iter, test_iter = load_data_voc(batch_size, crop_size) +``` + ## [**训练**] 现在我们可以训练全卷积网络了。 @@ -337,7 +357,7 @@ def loss(inputs, targets): num_epochs, lr, wd, devices = 5, 0.001, 1e-3, d2l.try_all_gpus() trainer = paddle.optimizer.SGD(learning_rate=lr, parameters=net.parameters(), weight_decay=wd) -d2l.train_ch13(net, train_iter, test_iter, loss, trainer, num_epochs, devices) +d2l.train_ch13(net, train_iter, test_iter, loss, trainer, num_epochs, devices[:1]) ``` ## [**预测**] diff --git a/chapter_computer-vision/object-detection-dataset.md b/chapter_computer-vision/object-detection-dataset.md index f2c2f18ce..59f4311cc 100644 --- a/chapter_computer-vision/object-detection-dataset.md +++ b/chapter_computer-vision/object-detection-dataset.md @@ -203,9 +203,9 @@ def load_data_bananas(batch_size): def load_data_bananas(batch_size): """加载香蕉检测数据集""" train_iter = paddle.io.DataLoader(BananasDataset(is_train=True), - batch_size=batch_size, shuffle=True) + batch_size=batch_size, return_list=True, shuffle=True) val_iter = paddle.io.DataLoader(BananasDataset(is_train=False), - batch_size=batch_size) + batch_size=batch_size, return_list=True) return train_iter, val_iter ``` diff --git a/chapter_computer-vision/semantic-segmentation-and-dataset.md b/chapter_computer-vision/semantic-segmentation-and-dataset.md index 7ebac06d9..4a0749faa 100644 --- a/chapter_computer-vision/semantic-segmentation-and-dataset.md +++ b/chapter_computer-vision/semantic-segmentation-and-dataset.md @@ -472,6 +472,7 @@ for X, Y in train_iter: batch_size = 64 train_iter = paddle.io.DataLoader(voc_train, batch_size=batch_size, shuffle=True, drop_last=True, + return_list=True, num_workers=d2l.get_dataloader_workers()) for X, Y in train_iter: print(X.shape) @@ -527,10 +528,10 @@ def load_data_voc(batch_size, crop_size): num_workers = d2l.get_dataloader_workers() train_iter = paddle.io.DataLoader( VOCSegDataset(True, crop_size, voc_dir), batch_size=batch_size, - shuffle=True, drop_last=True, num_workers=num_workers) + shuffle=True, return_list=True, drop_last=True, num_workers=num_workers) test_iter = paddle.io.DataLoader( VOCSegDataset(False, crop_size, voc_dir), batch_size=batch_size, - drop_last=True, num_workers=num_workers) + drop_last=True, return_list=True, num_workers=num_workers) return train_iter, test_iter ``` diff --git a/chapter_linear-networks/linear-regression-concise.md b/chapter_linear-networks/linear-regression-concise.md index e068fc3b6..98c8dd3b6 100644 --- a/chapter_linear-networks/linear-regression-concise.md +++ b/chapter_linear-networks/linear-regression-concise.md @@ -94,7 +94,8 @@ def load_array(data_arrays, batch_size, is_train=True): """构造一个Paddle数据迭代器""" dataset = paddle.io.TensorDataset(data_arrays) return paddle.io.DataLoader(dataset, batch_size=batch_size, - shuffle=is_train) + shuffle=is_train, + return_list=True) ``` ```{.python .input} diff --git a/chapter_natural-language-processing-applications/natural-language-inference-attention.md b/chapter_natural-language-processing-applications/natural-language-inference-attention.md index c1461dde7..f11bec0ae 100644 --- a/chapter_natural-language-processing-applications/natural-language-inference-attention.md +++ b/chapter_natural-language-processing-applications/natural-language-inference-attention.md @@ -373,11 +373,37 @@ class DecomposableAttention(nn.Layer): 我们使用 :numref:`sec_natural-language-inference-and-dataset`中定义的函数下载并读取SNLI数据集。批量大小和序列长度分别设置为$256$和$50$。 ```{.python .input} -#@tab all +#@tab mxnet, pytorch batch_size, num_steps = 256, 50 train_iter, test_iter, vocab = d2l.load_data_snli(batch_size, num_steps) ``` +```{.python .input} +#@tab paddle +def load_data_snli(batch_size, num_steps=50): + """下载SNLI数据集并返回数据迭代器和词表 + + Defined in :numref:`sec_natural-language-inference-and-dataset`""" + data_dir = d2l.download_extract('SNLI') + train_data = d2l.read_snli(data_dir, True) + test_data = d2l.read_snli(data_dir, False) + train_set = d2l.SNLIDataset(train_data, num_steps) + test_set = d2l.SNLIDataset(test_data, num_steps, train_set.vocab) + train_iter = paddle.io.DataLoader(train_set,batch_size=batch_size, + shuffle=True, + num_workers=0, + return_list=True) + + test_iter = paddle.io.DataLoader(test_set, batch_size=batch_size, + shuffle=False, + num_workers=0, + return_list=True) + return train_iter, test_iter, train_set.vocab + +batch_size, num_steps = 256, 50 +train_iter, test_iter, vocab = load_data_snli(batch_size, num_steps) +``` + ### 创建模型 我们使用预训练好的100维GloVe嵌入来表示输入词元。我们将向量$\mathbf{a}_i$和$\mathbf{b}_j$在 :eqref:`eq_nli_e`中的维数预定义为100。 :eqref:`eq_nli_e`中的函数$f$和 :eqref:`eq_nli_v_ab`中的函数$g$的输出维度被设置为200.然后我们创建一个模型实例,初始化它的参数,并加载GloVe嵌入来初始化输入词元的向量。 @@ -447,7 +473,7 @@ lr, num_epochs = 0.001, 4 trainer = paddle.optimizer.Adam(learning_rate=lr, parameters=net.parameters()) loss = nn.CrossEntropyLoss(reduction="none") d2l.train_ch13(net, train_iter, test_iter, loss, trainer, num_epochs, - devices) + devices[:1]) ``` ### 使用模型 diff --git a/chapter_natural-language-processing-pretraining/bert-dataset.md b/chapter_natural-language-processing-pretraining/bert-dataset.md index e955ab40d..93cd7bae1 100644 --- a/chapter_natural-language-processing-pretraining/bert-dataset.md +++ b/chapter_natural-language-processing-pretraining/bert-dataset.md @@ -399,7 +399,7 @@ def load_data_wiki(batch_size, max_len): data_dir = d2l.download_extract('wikitext-2', 'wikitext-2') paragraphs = _read_wiki(data_dir) train_set = _WikiTextDataset(paragraphs, max_len) - train_iter = paddle.io.DataLoader(dataset=train_set, batch_size=batch_size, + train_iter = paddle.io.DataLoader(dataset=train_set, batch_size=batch_size, return_list=True, shuffle=True, num_workers=num_workers) return train_iter, train_set.vocab ``` diff --git a/chapter_natural-language-processing-pretraining/bert-pretraining.md b/chapter_natural-language-processing-pretraining/bert-pretraining.md index 329cf5d48..2ca31d10e 100644 --- a/chapter_natural-language-processing-pretraining/bert-pretraining.md +++ b/chapter_natural-language-processing-pretraining/bert-pretraining.md @@ -29,11 +29,28 @@ from d2l import paddle as d2l 首先,我们加载WikiText-2数据集作为小批量的预训练样本,用于遮蔽语言模型和下一句预测。批量大小是512,BERT输入序列的最大长度是64。注意,在原始BERT模型中,最大长度是512。 ```{.python .input} -#@tab all +#@tab mxnet, pytorch batch_size, max_len = 512, 64 train_iter, vocab = d2l.load_data_wiki(batch_size, max_len) ``` +```{.python .input} +#@tab paddle +def load_data_wiki(batch_size, max_len): + """加载WikiText-2数据集 + + Defined in :numref:`subsec_prepare_mlm_data`""" + data_dir = d2l.download_extract('wikitext-2', 'wikitext-2') + paragraphs = d2l._read_wiki(data_dir) + train_set = d2l._WikiTextDataset(paragraphs, max_len) + train_iter = paddle.io.DataLoader(dataset=train_set, batch_size=batch_size, return_list=True, + shuffle=True, num_workers=0) + return train_iter, train_set.vocab + +batch_size, max_len = 512, 64 +train_iter, vocab = load_data_wiki(batch_size, max_len) +``` + ## 预训练BERT 原始BERT :cite:`Devlin.Chang.Lee.ea.2018`有两个不同模型尺寸的版本。基本模型($\text{BERT}_{\text{BASE}}$)使用12层(Transformer编码器块),768个隐藏单元(隐藏大小)和12个自注意头。大模型($\text{BERT}_{\text{LARGE}}$)使用24层,1024个隐藏单元和16个自注意头。值得注意的是,前者有1.1亿个参数,后者有3.4亿个参数。为了便于演示,我们定义了一个小的BERT,使用了2层、128个隐藏单元和2个自注意头。 @@ -257,10 +274,15 @@ def train_bert(train_iter, net, loss, vocab_size, devices, num_steps): 在预训练过程中,我们可以绘制出遮蔽语言模型损失和下一句预测损失。 ```{.python .input} -#@tab all +#@tab mxnet, pytorch train_bert(train_iter, net, loss, len(vocab), devices, 50) ``` +```{.python .input} +#@tab paddle +train_bert(train_iter, net, loss, len(vocab), devices[:1], 50) +``` + ## 用BERT表示文本 在预训练BERT之后,我们可以用它来表示单个文本、文本对或其中的任何词元。下面的函数返回`tokens_a`和`tokens_b`中所有词元的BERT(`net`)表示。 diff --git a/chapter_natural-language-processing-pretraining/word-embedding-dataset.md b/chapter_natural-language-processing-pretraining/word-embedding-dataset.md index 7568a9778..5c6bd5a3b 100644 --- a/chapter_natural-language-processing-pretraining/word-embedding-dataset.md +++ b/chapter_natural-language-processing-pretraining/word-embedding-dataset.md @@ -366,7 +366,7 @@ def load_data_ptb(batch_size, max_window_size, num_noise_words): dataset = PTBDataset(all_centers, all_contexts, all_negatives) data_iter = paddle.io.DataLoader( - dataset, batch_size=batch_size, shuffle=True, + dataset, batch_size=batch_size, shuffle=True, return_list=True, collate_fn=batchify, num_workers=num_workers) return data_iter, vocab ``` diff --git a/d2l/paddle.py b/d2l/paddle.py index d302186df..7846fec82 100644 --- a/d2l/paddle.py +++ b/d2l/paddle.py @@ -156,7 +156,8 @@ def load_array(data_arrays, batch_size, is_train=True): Defined in :numref:`sec_linear_concise`""" dataset = paddle.io.TensorDataset(data_arrays) return paddle.io.DataLoader(dataset, batch_size=batch_size, - shuffle=is_train) + shuffle=is_train, + return_list=True) def get_fashion_mnist_labels(labels): """返回Fashion-MNIST数据集的文本标签 @@ -207,9 +208,11 @@ def load_data_fashion_mnist(batch_size, resize=None): return (paddle.io.DataLoader(dataset=mnist_train, batch_size=batch_size, shuffle=True, + return_list=True, num_workers=get_dataloader_workers()), paddle.io.DataLoader(dataset=mnist_test, batch_size=batch_size, + return_list=True, shuffle=True, num_workers=get_dataloader_workers())) @@ -1852,9 +1855,9 @@ def load_data_bananas(batch_size): Defined in :numref:`sec_object-detection-dataset`""" train_iter = paddle.io.DataLoader(BananasDataset(is_train=True), - batch_size=batch_size, shuffle=True) + batch_size=batch_size, return_list=True, shuffle=True) val_iter = paddle.io.DataLoader(BananasDataset(is_train=False), - batch_size=batch_size) + batch_size=batch_size, return_list=True) return train_iter, val_iter d2l.DATA_HUB['voc2012'] = (d2l.DATA_URL + 'VOCtrainval_11-May-2012.tar', @@ -1964,10 +1967,10 @@ def load_data_voc(batch_size, crop_size): num_workers = d2l.get_dataloader_workers() train_iter = paddle.io.DataLoader( VOCSegDataset(True, crop_size, voc_dir), batch_size=batch_size, - shuffle=True, drop_last=True, num_workers=num_workers) + shuffle=True, return_list=True, drop_last=True, num_workers=num_workers) test_iter = paddle.io.DataLoader( VOCSegDataset(False, crop_size, voc_dir), batch_size=batch_size, - drop_last=True, num_workers=num_workers) + drop_last=True, return_list=True, num_workers=num_workers) return train_iter, test_iter d2l.DATA_HUB['cifar10_tiny'] = (d2l.DATA_URL + 'kaggle_cifar10_tiny.zip', @@ -2162,7 +2165,7 @@ def __len__(self): dataset = PTBDataset(all_centers, all_contexts, all_negatives) data_iter = paddle.io.DataLoader( - dataset, batch_size=batch_size, shuffle=True, + dataset, batch_size=batch_size, shuffle=True, return_list=True, collate_fn=batchify, num_workers=num_workers) return data_iter, vocab @@ -2474,7 +2477,7 @@ def load_data_wiki(batch_size, max_len): data_dir = d2l.download_extract('wikitext-2', 'wikitext-2') paragraphs = _read_wiki(data_dir) train_set = _WikiTextDataset(paragraphs, max_len) - train_iter = paddle.io.DataLoader(dataset=train_set, batch_size=batch_size, + train_iter = paddle.io.DataLoader(dataset=train_set, batch_size=batch_size, return_list=True, shuffle=True, num_workers=num_workers) return train_iter, train_set.vocab diff --git a/static/build.yml b/static/build.yml index 68aef4342..d317b0981 100644 --- a/static/build.yml +++ b/static/build.yml @@ -11,6 +11,6 @@ dependencies: - -f https://download.pytorch.org/whl/torch_stable.html - tensorflow==2.9.1 - tensorflow-probability==0.17.0 - - paddlepaddle-gpu==2.3.1.post112 + - paddlepaddle-gpu==2.3.2.post112 - -f https://www.paddlepaddle.org.cn/whl/linux/mkl/avx/stable.html - opencv-python==4.6.0.66