Inference speed issue

### Search before asking

- [X] I have searched the YOLOv5 [issues](https://github.com/ultralytics/yolov5/issues) and found no similar bug report.


### YOLOv5 Component

Detection, Integrations

### Bug

When executing real-time prediction using Realsense d415, there is a big difference in speed depending on the camera fps.
The higher the camera FPS, the slower the inference speed.
Obviously, I wonder why this happens even though the function that reads data is composed of threads.

- fps: 30
avg time:  10.963139772415161
- fps: 60
avg time:  24.456851720809937
- fps: 90
avg time:  35.522178649902344

### Environment

- detect: weights=['runs/train/exp2/weights/best.pt'], data=data/coco128.yaml, imgsz=[500, 500], conf_thres=0.25, iou_thres=0.45, max_det=1000, device=, view_img=False, save_txt=False, save_conf=False, save_crop=False, nosave=False, classes=None, agnostic_nms=False, augment=False, visualize=False, update=False, project=runs/detect, name=exp, exist_ok=False, line_thickness=3, hide_labels=False, hide_conf=False, half=False, dnn=False, vid_stride=1
- YOLOv5 🚀 v6.2-211-g32a9218 Python-3.7.13 torch-1.8.1 CUDA:0 (NVIDIA TITAN RTX, 24217MiB)

Fusing layers... 
Model summary: 157 layers, 1760518 parameters, 0 gradients, 4.1 GFLOPs
WARNING ⚠️ --img-size [500, 500] must be multiple of max stride 32, updating to [512, 512]

### Minimal Reproducible Example

```
class LoadStreams1:
    # YOLOv5 streamloader, i.e. `python detect.py --source 'rtsp://example.com/media.mp4'  # RTSP, RTMP, HTTP streams`
    def __init__(
        self,
        img_size=640,
        stride=32,
        auto=True,
        transforms=None,
        vid_stride=1,
        fps=30,
    ):
        torch.backends.cudnn.benchmark = True  # faster for fixed-size inference
        self.img_size = img_size
        self.stride = stride
        self.vid_stride = vid_stride  # video frame-rate stride
        self.img, self.depth, self.fps, self.frame, self.thread = (
            None,
            None,
            0,
            0,
            None,
        )

        self.pipeline = rs.pipeline()
        config = rs.config()
        config.enable_stream(rs.stream.color, 848, 480, rs.format.rgb8, fps)
        config.enable_stream(rs.stream.depth, 848, 480, rs.format.z16, fps)
        self.align_to_color = rs.align(rs.stream.color)

        self.prof = self.pipeline.start(config)
        device = self.prof.get_device().first_depth_sensor()
        preset_range = device.get_option_range(rs.option.visual_preset)
        for i in range(int(preset_range.max)):
            visulpreset = device.get_option_value_description(
                rs.option.visual_preset, i
            )
            print("%02d: %s" % (i, visulpreset))
            if visulpreset == "High Accuracy":
                device.set_option(rs.option.visual_preset, i)
                print(":: set preset to High Accuracy")

        # warm up
        for i in range(60):
            # ret, frames = self.pipeline.wait_for_frames()
            ret, frames = self.pipeline.try_wait_for_frames()
            if not ret:
                print("warm up failed")
                continue
            aligned_frames = self.align_to_color.process(frames)
            color_frame = aligned_frames.get_color_frame()
            rgb_image = np.asanyarray(color_frame.get_data())
            bgr_image = cv2.cvtColor(rgb_image, cv2.COLOR_RGB2BGR)
            self.img = bgr_image

            depth_frame = aligned_frames.get_depth_frame()
            depth_data = np.array(depth_frame.data)
            self.depth = depth_data

            break

        self.thread = threading.Thread(
            target=self.update, args=(), daemon=True
        )
        self.thread.start()
        while True:
            if self.thread.is_alive():
                break
            time.sleep(0.1)

        # check for common shapes
        s = np.stack(
            [
                letterbox(self.img, img_size, stride=stride, auto=auto)[0].shape
            ]
        )
        self.rect = (
            np.unique(s, axis=0).shape[0] == 1
        )  # rect inference if all shapes equal
        self.auto = auto and self.rect
        self.transforms = transforms  # optional
        if not self.rect:
            LOGGER.warning(
                "WARNING ⚠️ Stream shapes differ. For optimal performance supply similarly-shaped streams."
            )

    def update(self):
        while True:
            ret, frames = self.pipeline.try_wait_for_frames()
            if not ret:
                LOGGER.warning(
                    "WARNING ⚠️ Video stream unresponsive, please check your IP camera connection."
                )
                break
            aligned_frames = self.align_to_color.process(frames)
            color_frame = aligned_frames.get_color_frame()
            rgb_image = np.asanyarray(color_frame.get_data())
            bgr_image = cv2.cvtColor(rgb_image, cv2.COLOR_RGB2BGR)

            depth_frame = aligned_frames.get_depth_frame()
            depth_data = np.array(depth_frame.data)

            self.imgs = bgr_image
            self.depth = depth_data
            time.sleep(0.0)  # wait time

    def __iter__(self):
        self.count = -1
        return self

    def __next__(self):
        self.count += 1
        if not self.thread.is_alive() or cv2.waitKey(1) == ord(
            "q"
        ):  # q to quit
            cv2.destroyAllWindows()
            raise StopIteration

        im0 = self.img.copy()
        depthu = None
        if isinstance(self.depth, np.ndarray):
            depthu = self.depth.copy()
        im = np.stack(
            [
                letterbox(im0, self.img_size, stride=self.stride, auto=self.auto)[0]
            ]
        )  # resize
        im = im[..., ::-1].transpose((0, 3, 1, 2))  # BGR to RGB, BHWC to BCHW
        im = np.ascontiguousarray(im)  # contiguous

        return im, im0, depthu
```


```
    # Load model
    device = select_device(device)
    model = DetectMultiBackend(weights, device=device, dnn=dnn, data=data, fp16=half)
    stride, names, pt = model.stride, model.names, model.pt
    imgsz = check_img_size(imgsz, s=stride)  # check image size

    # Dataloader
    bs = 1  # batch_size

    # Run inference
    model.warmup(imgsz=(1 if pt or model.triton else bs, 3, *imgsz))  # warmup
    dataset = LoadStreams1(
         img_size=imgsz, stride=stride, auto=pt, vid_stride=vid_stride, fps=90
    )
    exit(0)
    times = []
    for  _, im0s, depth in dataset:
        if len(times) == 1000:
            break
        s_time = time.time()
        img0 = letterbox(im0s, imgsz, stride, pt)[0]
        img = img0.transpose((2, 0, 1))[::-1]  # HWC to CHW, BGR to RGB
        img = np.ascontiguousarray(img)
        im = torch.from_numpy(img).to(model.device)
        im = im.half() if model.fp16 else im.float()  # uint8 to fp16/32
        im /= 255  # 0 - 255 to 0.0 - 1.0
        if len(im.shape) == 3:
            im = im[None]  # expand for batch dim

        # Inference
        pred = model(im, augment=augment, visualize=False)

        # NMS
        pred = non_max_suppression(
            pred, conf_thres, iou_thres, classes, agnostic_nms, max_det=max_det
        )

        e_time = (time.time() - s_time) * 1000
        times.append(e_time)
        print("elapsed time: ", e_time)
    print("avg time: ", np.mean(times))
```

### Additional

_No response_

### Are you willing to submit a PR?

- [ ] Yes I'd like to help by submitting a PR!

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Uh oh!

Inference speed issue #9951

Search before asking

YOLOv5 Component

Bug

Environment

Minimal Reproducible Example

Additional

Are you willing to submit a PR?

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Uh oh!

Inference speed issue #9951

Description

Search before asking

YOLOv5 Component

Bug

Environment

Minimal Reproducible Example

Additional

Are you willing to submit a PR?

Metadata

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Issue actions