From 5bef476d8dd0f8899223adbcc0b34134f88f9c57 Mon Sep 17 00:00:00 2001 From: Zillur Rahman Date: Tue, 25 Apr 2023 20:10:58 -0700 Subject: [PATCH] resa-classification --- configs/resa/resa18_tusimple.py | 21 +++++++--- lanedet/datasets/base_dataset.py | 4 +- lanedet/datasets/tusimple.py | 69 +++++++++++++++++++++++++++--- lanedet/engine/runner.py | 72 +++++++++++++++++++++++++++----- lanedet/models/heads/busd.py | 4 +- lanedet/models/heads/lane_seg.py | 69 ++++++++++++++++++++++++++---- 6 files changed, 204 insertions(+), 35 deletions(-) diff --git a/configs/resa/resa18_tusimple.py b/configs/resa/resa18_tusimple.py index 8c11505..2c937d3 100644 --- a/configs/resa/resa18_tusimple.py +++ b/configs/resa/resa18_tusimple.py @@ -11,6 +11,9 @@ ) featuremap_out_channel = 128 featuremap_out_stride = 8 +num_classes = 3 +num_lanes = 6 + 1 +classification = True aggregator = dict( type='RESA', @@ -26,6 +29,7 @@ decoder=dict(type='BUSD'), thr=0.6, sample_y=sample_y, + cat_dim = (num_lanes - 1, num_classes) ) optimizer = dict( @@ -36,7 +40,7 @@ ) -epochs = 5 +epochs = 15 batch_size = 4 total_iter = (3216 // batch_size + 1) * epochs import math @@ -67,6 +71,12 @@ ] val_process = [ + dict(type='Resize', size=(img_width, img_height)), + dict(type='Normalize', img_norm=img_norm), + dict(type='ToTensor'), +] + +infer_process = [ dict(type='Resize', size=(img_width, img_height)), dict(type='Normalize', img_norm=img_norm), dict(type='ToTensor', keys=['img']), @@ -83,7 +93,7 @@ val=dict( type='TuSimple', data_root=dataset_path, - split='test', + split='val', processes=val_process, ), test=dict( @@ -95,12 +105,11 @@ ) -batch_size = 8 workers = 8 -num_classes = 6 + 1 ignore_label = 255 -log_interval = 100 +log_interval = 200 eval_ep = 1 save_ep = epochs -test_json_file='data/tusimple/label_data_0601.json' +#test_json_file='data/tusimple/label_data_0531_small.json' +test_json_file='data/tusimple/label_data_0531.json' lr_update_by_epoch = False \ No newline at end of file diff --git a/lanedet/datasets/base_dataset.py b/lanedet/datasets/base_dataset.py index c2718ae..1a1a405 100644 --- a/lanedet/datasets/base_dataset.py +++ b/lanedet/datasets/base_dataset.py @@ -63,9 +63,9 @@ def __getitem__(self, idx): meta = DC(meta, cpu_only=True) sample.update({'meta': meta}) #generate one dict with img, img_path, lane pixels, seg_img - #category = data_info['categories'] + category = data_info['categories'] #category = [0 if np.all(sample['cls_label'][:,i].numpy() == 100) else category[i] for i in range(6)] - #sample['category'] = torch.LongTensor(category) + sample['category'] = torch.LongTensor(category) #print(sample.keys()) return sample \ No newline at end of file diff --git a/lanedet/datasets/tusimple.py b/lanedet/datasets/tusimple.py index e3171aa..a801cd0 100644 --- a/lanedet/datasets/tusimple.py +++ b/lanedet/datasets/tusimple.py @@ -3,18 +3,24 @@ import cv2 import os import json +import torch import torchvision from .base_dataset import BaseDataset from lanedet.utils.tusimple_metric import LaneEval from .registry import DATASETS import logging import random +import torch.nn.functional as F +import pandas as pd +import seaborn as sns +import matplotlib.pyplot as plt +from sklearn.metrics import confusion_matrix SPLIT_FILES = { #'trainval': ['label_data_0313.json', 'label_data_0601.json', 'label_data_0531.json'], - 'trainval': ['label_data_0313.json', 'label_data_0531.json'], - 'val': ['label_data_0601.json'], - 'test': ['label_data_0601.json'], + 'trainval': ['label_data_0313.json', 'label_data_0601.json'], + 'val': ['label_data_0531.json'], + 'test': ['label_data_0531.json'], } @@ -30,6 +36,7 @@ def load_annotations(self): self.logger.info('Loading TuSimple annotations...') self.data_infos = [] max_lanes = 0 + df = {0:0, 1:1, 2:1, 3:2, 4:2, 5:2, 6:1, 7:0} for anno_file in self.anno_files: anno_file = osp.join(self.data_root, anno_file) with open(anno_file, 'r') as anno_obj: @@ -38,17 +45,20 @@ def load_annotations(self): data = json.loads(line) y_samples = data['h_samples'] gt_lanes = data['lanes'] + category = data['categories'] + category = list(map(df.get,category)) + mask_path = data['raw_file'].replace('clips', 'seg_label')[:-3] + 'png' lanes = [[(x, y) for (x, y) in zip(lane, y_samples) if x >= 0] for lane in gt_lanes] lanes = [lane for lane in lanes if len(lane) > 0] max_lanes = max(max_lanes, len(lanes)) self.data_infos.append({ - 'img_path': osp.join(self.data_root, data['raw_file']), + 'img_path': osp.join(self.data_root, data['raw_file']), #append all the samples in all the json files 'img_name': data['raw_file'], 'mask_path': osp.join(self.data_root, mask_path), 'lanes': lanes, + 'categories':category }) - if self.training: random.shuffle(self.data_infos) self.max_lanes = max_lanes @@ -82,9 +92,56 @@ def save_tusimple_predictions(self, predictions, filename, runtimes=None): with open(filename, 'w') as output_file: output_file.write('\n'.join(lines)) - def evaluate(self, predictions, output_basedir, runtimes=None): + def evaluate_detection(self, predictions, output_basedir, runtimes=None): pred_filename = os.path.join(output_basedir, 'tusimple_predictions.json') self.save_tusimple_predictions(predictions, pred_filename, runtimes) result, acc = LaneEval.bench_one_submit(pred_filename, self.cfg.test_json_file) self.logger.info(result) return acc + + # Calculate accuracy (a classification metric) + def accuracy_fn(self, y_true, y_pred): + """Calculates accuracy between truth labels and predictions. + Args: + y_true (torch.Tensor): Truth labels for predictions. + y_pred (torch.Tensor): Predictions to be compared to predictions. + Returns: + [torch.float]: Accuracy value between y_true and y_pred, e.g. 78.45 + """ + correct = torch.eq(y_true, y_pred).sum().item() + acc = (correct / torch.numel(y_pred)) + return acc + + def evaluate_classification(self, predictions, ground_truth): + score = F.softmax(predictions, dim=2) + y_pred = score.argmax(dim=2) + return self.accuracy_fn(ground_truth, y_pred) + + def plot_confusion_matrix(self, y_true, y_pred): + + cf_matrix = confusion_matrix(y_true, y_pred) + class_names = ('background','solid-yellow', 'solid-white', 'dashed', 'double-dashed','botts\'-dots', 'double-solid-yellow', 'unknown') + + # Create pandas dataframe + dataframe = pd.DataFrame(cf_matrix, index=class_names, columns=class_names) + + # compute metrices from confusion matrix + FP = cf_matrix.sum(axis=0) - np.diag(cf_matrix) + FN = cf_matrix.sum(axis=1) - np.diag(cf_matrix) + TP = np.diag(cf_matrix) + TN = cf_matrix.sum() - (FP + FN + TP) + + # Overall accuracy + ACC = (TP+TN)/(TP+FP+FN+TN) + + # plot the confusion matrix + plt.figure(figsize=(8, 6)) + + # Create heatmap + sns.heatmap(dataframe, annot=True, cbar=None,cmap="YlGnBu",fmt="d") + + plt.title("Confusion Matrix"), plt.tight_layout() + + plt.ylabel("True Class"), + plt.xlabel("Predicted Class") + plt.show() diff --git a/lanedet/engine/runner.py b/lanedet/engine/runner.py index 126f34c..0c81596 100644 --- a/lanedet/engine/runner.py +++ b/lanedet/engine/runner.py @@ -38,8 +38,10 @@ def __init__(self, cfg): if self.cfg.optimizer.type == 'SGD': self.warmup_scheduler = warmup.LinearWarmup( self.optimizer, warmup_period=5000) - self.metric = 0. + self.detection_metric = 0. + self.classification_metric = 0. self.val_loader = None + self.test_loader = None def resume(self): if not self.cfg.load_from and not self.cfg.finetune_from: @@ -105,23 +107,71 @@ def validate(self): if not self.val_loader: self.val_loader = build_dataloader(self.cfg.dataset.val, self.cfg, is_train=False) self.net.eval() - predictions = [] + detection_predictions = [] + classification_acc = 0 for i, data in enumerate(tqdm(self.val_loader, desc=f'Validate')): data = self.to_cuda(data) with torch.no_grad(): output = self.net(data) - output = self.net.module.get_lanes(output) - predictions.extend(output) + detection_output = self.net.module.get_lanes(output)['lane_output'] + detection_predictions.extend(detection_output) + if self.cfg.classification: + classification_acc += self.val_loader.dataset.evaluate_classification(output['category'].cuda(), data['category'].cuda()) + if self.cfg.view: - self.val_loader.dataset.view(output, data['meta']) + self.val_loader.dataset.view(detection_output, data['meta']) - out = self.val_loader.dataset.evaluate(predictions, self.cfg.work_dir) - self.recorder.logger.info(out) - metric = out - if metric > self.metric: - self.metric = metric + detection_out = self.val_loader.dataset.evaluate_detection(detection_predictions, self.cfg.work_dir) + detection_metric = detection_out + if detection_metric > self.detection_metric: + self.detection_metric = detection_metric self.save_ckpt(is_best=True) - self.recorder.logger.info('Best metric: ' + str(self.metric)) + + if self.cfg.classification: + classification_acc /= len(self.val_loader) + self.recorder.logger.info("Detection: " +str(detection_out) + " "+ "classification accuracy: " + str(classification_acc)) + classification_metric = classification_acc + if classification_metric > self.classification_metric: + self.classification_metric = classification_metric + #self.save_ckpt(is_best=True) + self.recorder.logger.info('Best detection metric: ' + str(self.detection_metric) + " " + 'Best classification metric: ' + str(self.classification_metric)) + else: + self.recorder.logger.info("Detection: " +str(detection_out)) + self.recorder.logger.info('Best detection metric: ' + str(self.detection_metric)) + + def test(self): + if not self.test_loader: + self.test_loader = build_dataloader(self.cfg.dataset.test, self.cfg, is_train=False) + self.recorder.logger.info('Start testing...') + classification_acc = 0 + y_true = [] + y_pred = [] + self.net.eval() + detection_predictions = [] + for i, data in enumerate(tqdm(self.test_loader, desc=f'test')): + data = self.to_cuda(data) + with torch.no_grad(): + output = self.net(data) + detection_output = self.net.module.get_lanes(output)['lane_output'] + detection_predictions.extend(detection_output) + + if self.cfg.classification: + y_true.extend((data['category'].cpu().numpy()).flatten('C').tolist()) + score = F.softmax(output['category'].cuda(), dim=2) + score = score.argmax(dim=2) + y_pred.extend((score.cpu().numpy()).flatten('C').tolist()) + + classification_acc += self.test_loader.dataset.evaluate_classification(output['category'].cuda(), data['category'].cuda()) + + detection_out = self.test_loader.dataset.evaluate_detection(detection_predictions, self.cfg.work_dir) + + if self.cfg.classification: + classification_acc /= len(self.test_loader) + self.recorder.logger.info("Detection: " +str(detection_out) + " "+ "classification accuracy: " + str(classification_acc)) + self.test_loader.dataset.plot_confusion_matrix(y_true, y_pred) + else: + self.recorder.logger.info("Detection: " +str(detection_out)) + def save_ckpt(self, is_best=False): save_model(self.net, self.optimizer, self.scheduler, diff --git a/lanedet/models/heads/busd.py b/lanedet/models/heads/busd.py index c41e9b7..acab131 100644 --- a/lanedet/models/heads/busd.py +++ b/lanedet/models/heads/busd.py @@ -99,7 +99,7 @@ def __init__(self, cfg): super().__init__() img_height = cfg.img_height img_width = cfg.img_width - num_classes = cfg.num_classes + num_lanes = cfg.num_lanes self.layers = nn.ModuleList() @@ -110,7 +110,7 @@ def __init__(self, cfg): self.layers.append(UpsamplerBlock(ninput=32, noutput=16, up_height=int(img_height)//1, up_width=int(img_width)//1)) - self.output_conv = conv1x1(16, num_classes) + self.output_conv = conv1x1(16, num_lanes) def forward(self, input): output = input diff --git a/lanedet/models/heads/lane_seg.py b/lanedet/models/heads/lane_seg.py index 5a2c189..a0efd56 100644 --- a/lanedet/models/heads/lane_seg.py +++ b/lanedet/models/heads/lane_seg.py @@ -9,39 +9,73 @@ @HEADS.register_module class LaneSeg(nn.Module): - def __init__(self, decoder, exist=None, thr=0.6, - sample_y=None, cfg=None): + def __init__(self, decoder, exist=None, thr=0.6, + sample_y=None, cat_dim = None, cfg=None, in_channels=6, out_channels=6): super(LaneSeg, self).__init__() self.cfg = cfg self.thr = thr self.sample_y = sample_y + self.cat_dim = cat_dim self.decoder = build_head(decoder, cfg) self.exist = build_head(exist, cfg) if exist else None + if self.cfg.classification: + + self.maxpool = torch.nn.MaxPool2d(kernel_size=3, stride=2, padding=1) + self.conv1 = torch.nn.Conv2d( + in_channels, + out_channels, + kernel_size=3, + stride=1, + padding=1, + bias=False + ) + self.bn1 = torch.nn.BatchNorm2d(out_channels) + self.relu = torch.nn.ReLU(inplace=True) + + self.category = torch.nn.Sequential( + torch.nn.Dropout(p=0.2), + torch.nn.Linear(353280, 512), + torch.nn.BatchNorm1d(512), + torch.nn.ReLU(), + torch.nn.Linear(512, 100), + torch.nn.ReLU(), + torch.nn.Linear(100, np.prod(self.cat_dim)) + ) def get_lanes(self, output): segs = output['seg'] segs = F.softmax(segs, dim=1) segs = segs.detach().cpu().numpy() + #print(segs.shape) + #print("------------") if 'exist' in output: exists = output['exist'] exists = exists.detach().cpu().numpy() exists = exists > 0.5 else: exists = [None for _ in segs] + ret= {} - ret = [] + lane_output = [] + lane_indexes = [] for seg, exist in zip(segs, exists): - lanes = self.probmap2lane(seg, exist) - ret.append(lanes) + #print(seg.shape) + lanes, lane_indx = self.probmap2lane(seg, exist) + lane_output.append(lanes) + lane_indexes.append(lane_indx) + ret.update({'lane_output': lane_output, 'lane indexes': lane_indexes}) return ret def probmap2lane(self, probmaps, exists=None): lanes = [] probmaps = probmaps[1:, ...] + #print(probmaps.shape) if exists is None: exists = [True for _ in probmaps] - for probmap, exist in zip(probmaps, exists): + + lane_indx = [] + for i, (probmap, exist) in enumerate(zip(probmaps, exists)): if exist == 0: continue probmap = cv2.blur(probmap, (9, 9), borderType=cv2.BORDER_REPLICATE) @@ -65,11 +99,12 @@ def probmap2lane(self, probmaps, exists=None): coord[:, 0] /= self.cfg.ori_img_w coord[:, 1] /= self.cfg.ori_img_h lanes.append(Lane(coord)) + lane_indx.append(i) - return lanes + return lanes, lane_indx def loss(self, output, batch): - weights = torch.ones(self.cfg.num_classes) + weights = torch.ones(self.cfg.num_lanes) weights[0] = self.cfg.bg_weight weights = weights.cuda() criterion = torch.nn.NLLLoss(ignore_index=self.cfg.ignore_label, @@ -81,6 +116,15 @@ def loss(self, output, batch): output['seg'], dim=1), batch['mask'].long()) loss += seg_loss loss_stats.update({'seg_loss': seg_loss}) + + if self.cfg.classification: + loss_fn = torch.nn.CrossEntropyLoss() + classification_output = output['category'].reshape(self.cfg.batch_size*(self.cfg.num_lanes - 1), self.cfg.num_classes) + targets = batch['category'].reshape(self.cfg.batch_size*(self.cfg.num_lanes - 1)) + + cat_loss = loss_fn(classification_output, targets) + loss += cat_loss + loss_stats.update({'cls_loss': cat_loss}) if 'exist' in output: exist_loss = 0.1 * \ @@ -98,5 +142,14 @@ def forward(self, x, **kwargs): output.update(self.decoder(x)) if self.exist: output.update(self.exist(x)) + + if self.cfg.classification: + x= output['seg'][:,1:, ...] + x = self.maxpool(x) + x = self.conv1(x) + x = self.bn1(x) + x = self.relu(x).view(-1, 353280) + category = self.category(x).view(-1, *self.cat_dim) + output.update({'category': category}) return output