文章目录
- 1.改进目的
- 2.demo引入
- 2.1代码
- 2.2 结果展示
- 2.3 BottleNeck详解
 
 
1.改进目的
原始YOLO11模型训练好以后,检测结果mAP结果很低,视频检测结果很差,于是想到改进网络,这里介绍改进主干网络。
2.demo引入
2.1代码
# @File: 21.YOLO11修改主干网络.py
# @Author: chen_song
# @Time: 2025-02-28 21:29
import torch
import torch.nn as nn
import torchvision.models as models
class YOLO11Backbone(nn.Module):
 def __init__(self, num_classes=80):
  super(YOLO11Backbone, self).__init__()
  # 使用预训练的ResNet50作为主干网络
  self.backbone = models.resnet50(pretrained=True)
  # 修改最后一层全连接层以适应YOLO的输出
  self.backbone.fc = nn.Linear(self.backbone.fc.in_features, num_classes)
 def forward(self, x):
  x = self.backbone(x)
  return x
# 一个简单的测试用例
if __name__ == "__main__":
 model = YOLO11Backbone(num_classes=80)
 print(model)
 # 创建一个随机输入张量
 input_tensor = torch.randn(1, 3, 224, 224)
 output = model(input_tensor)
 print(output.shape)
2.2 结果展示
D:\anaconda3\envs\yolov5_cuda12.4\python.exe E:\PROJ\yolo11\ultralytics\ultralytics\demo\21.YOLO11修改主干网络.py
 D:\anaconda3\envs\yolov5_cuda12.4\lib\site-packages\torchvision\models_utils.py:208: UserWarning: The parameter ‘pretrained’ is deprecated since 0.13 and may be removed in the future, please use ‘weights’ instead.
 warnings.warn(
 D:\anaconda3\envs\yolov5_cuda12.4\lib\site-packages\torchvision\models_utils.py:223: UserWarning: Arguments other than a weight enum or None for ‘weights’ are deprecated since 0.13 and may be removed in the future. The current behavior is equivalent to passing weights=ResNet50_Weights.IMAGENET1K_V1. You can also use weights=ResNet50_Weights.DEFAULT to get the most up-to-date weights.
 warnings.warn(msg)
 Downloading: “https://download.pytorch.org/models/resnet50-0676ba61.pth” to C:\Users\PC/.cache\torch\hub\checkpoints\resnet50-0676ba61.pth
 100%|██████████| 97.8M/97.8M [02:45<00:00, 619kB/s]
 YOLO11Backbone(
 (backbone): ResNet(
 (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
 (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
 (relu): ReLU(inplace=True)
 (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
 (layer1): Sequential(
 (0): Bottleneck(
 (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
 (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
 (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
 (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
 (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
 (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
 (relu): ReLU(inplace=True)
 (downsample): Sequential(
 (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
 (1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
 )
 )
 (1): Bottleneck(
 (conv1): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
 (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
 (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
 (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
 (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
 (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
 (relu): ReLU(inplace=True)
 )
 (2): Bottleneck(
 (conv1): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
 (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
 (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
 (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
 (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
 (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
 (relu): ReLU(inplace=True)
 )
 )
 (layer2): Sequential(
 (0): Bottleneck(
 (conv1): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
 (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
 (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
 (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
 (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
 (bn3): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
 (relu): ReLU(inplace=True)
 (downsample): Sequential(
 (0): Conv2d(256, 512, kernel_size=(1, 1), stride=(2, 2), bias=False)
 (1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
 )
 )
 (1): Bottleneck(
 (conv1): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
 (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
 (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
 (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
 (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
 (bn3): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
 (relu): ReLU(inplace=True)
 )
 (2): Bottleneck(
 (conv1): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
 (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
 (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
 (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
 (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
 (bn3): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
 (relu): ReLU(inplace=True)
 )
 (3): Bottleneck(
 (conv1): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
 (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
 (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
 (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
 (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
 (bn3): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
 (relu): ReLU(inplace=True)
 )
 )
 (layer3): Sequential(
 (0): Bottleneck(
 (conv1): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
 (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
 (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
 (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
 (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
 (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
 (relu): ReLU(inplace=True)
 (downsample): Sequential(
 (0): Conv2d(512, 1024, kernel_size=(1, 1), stride=(2, 2), bias=False)
 (1): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
 )
 )
 (1): Bottleneck(
 (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
 (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
 (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
 (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
 (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
 (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
 (relu): ReLU(inplace=True)
 )
 (2): Bottleneck(
 (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
 (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
 (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
 (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
 (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
 (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
 (relu): ReLU(inplace=True)
 )
 (3): Bottleneck(
 (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
 (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
 (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
 (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
 (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
 (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
 (relu): ReLU(inplace=True)
 )
 (4): Bottleneck(
 (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
 (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
 (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
 (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
 (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
 (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
 (relu): ReLU(inplace=True)
 )
 (5): Bottleneck(
 (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
 (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
 (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
 (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
 (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
 (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
 (relu): ReLU(inplace=True)
 )
 )
 (layer4): Sequential(
 (0): Bottleneck(
 (conv1): Conv2d(1024, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
 (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
 (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
 (bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
 (conv3): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)
 (bn3): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
 (relu): ReLU(inplace=True)
 (downsample): Sequential(
 (0): Conv2d(1024, 2048, kernel_size=(1, 1), stride=(2, 2), bias=False)
 (1): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
 )
 )
 (1): Bottleneck(
 (conv1): Conv2d(2048, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
 (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
 (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
 (bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
 (conv3): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)
 (bn3): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
 (relu): ReLU(inplace=True)
 )
 (2): Bottleneck(
 (conv1): Conv2d(2048, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
 (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
 (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
 (bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
 (conv3): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)
 (bn3): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
 (relu): ReLU(inplace=True)
 )
 )
 (avgpool): AdaptiveAvgPool2d(output_size=(1, 1))
 (fc): Linear(in_features=2048, out_features=80, bias=True)
 )
 )
 torch.Size([1, 80])
Process finished with exit code 0
2.3 BottleNeck详解
 由于ResNet可以构建更深的网络,所以最后对特征的提取必定比原始YOLO11强。
由于ResNet可以构建更深的网络,所以最后对特征的提取必定比原始YOLO11强。



















