Sparse4Dv3 代码学习(Ⅰ)模型定义和初始化
里面的anchor_handler是 SparseBox3DKeyPointsGenerator。对应的配置文件:(用的是rasnet50和fpn)加载anchor:(应该是那个npy文件)应该是关于box解码后处理相关的。
·
从这里开始模型定义:
model = build_detector(cfg.model, test_cfg=cfg.get("test_cfg"))
A.初始化模型的backbone和neck:
if pretrained is not None:
backbone.pretrained = pretrained
self.img_backbone = build_backbone(img_backbone)
if img_neck is not None:
self.img_neck = build_neck(img_neck)
对应的配置文件:(用的是rasnet50和fpn)
img_backbone=dict(
type="ResNet",
depth=50,
num_stages=4,
frozen_stages=-1,
norm_eval=False,
style="pytorch",
with_cp=True,
out_indices=(0, 1, 2, 3),
norm_cfg=dict(type="BN", requires_grad=True),
pretrained="ckpt/resnet50-19c8e357.pth",
),
img_neck=dict(
type="FPN",
num_outs=num_levels,
start_level=0,
out_channels=embed_dims,
add_extra_convs="on_output",
relu_before_extra_convs=True,
in_channels=[256, 512, 1024, 2048],
),
B.Sparse4DHead的初始化
self.head = build_head(head)
self.num_decoder = num_decoder # 6
self.num_single_frame_decoder = num_single_frame_decoder # 1
self.gt_cls_key = gt_cls_key # 'gt_labels_3d'
self.gt_reg_key = gt_reg_key # 'gt_bboxes_3d'
self.cls_threshold_to_reg = cls_threshold_to_reg # 0.05
self.dn_loss_weight = dn_loss_weight # 5.0
self.decouple_attn = decouple_attn # True
①self.instance_bank (InstanceBank)
self.instance_bank = build(instance_bank, PLUGIN_LAYERS)
self.embed_dims = embed_dims # 256
self.num_temp_instances = num_temp_instances # 600
self.default_time_interval = default_time_interval # 0.5
self.confidence_decay = confidence_decay # 0.6
self.max_time_interval = max_time_interval # 2
if anchor_handler is not None:
anchor_handler = build_from_cfg(anchor_handler, PLUGIN_LAYERS)
assert hasattr(anchor_handler, "anchor_projection")
self.anchor_handler = anchor_handler
里面的anchor_handler是 SparseBox3DKeyPointsGenerator
@PLUGIN_LAYERS.register_module()
class SparseBox3DKeyPointsGenerator(BaseModule):
def __init__(
self,
embed_dims=256,
num_learnable_pts=0,
fix_scale=None,
):
super(SparseBox3DKeyPointsGenerator, self).__init__()
self.embed_dims = embed_dims # 256
self.num_learnable_pts = num_learnable_pts # 0
if fix_scale is None: # 进入
fix_scale = ((0.0, 0.0, 0.0),)
self.fix_scale = nn.Parameter(
torch.tensor(fix_scale), requires_grad=False
)
self.num_pts = len(self.fix_scale) + num_learnable_pts # 1
if num_learnable_pts > 0: # 不进入
self.learnable_fc = Linear(self.embed_dims, num_learnable_pts * 3)
加载anchor:(应该是那个npy文件)
if isinstance(anchor, str):
anchor = np.load(anchor)
初始化anchor和实例特征:
self.anchor = nn.Parameter(
torch.tensor(anchor, dtype=torch.float32),
requires_grad=anchor_grad,
)
self.anchor_init = anchor
self.instance_feature = nn.Parameter(
torch.zeros([self.anchor.shape[0], self.embed_dims]),
requires_grad=feat_grad,
)
②anchor的编码( SparseBox3DEncoder)
self.anchor_encoder = build(anchor_encoder, POSITIONAL_ENCODING)
对位置,大小、方向、速度都要编码,用的是Linear线性映射 :
@POSITIONAL_ENCODING.register_module()
class SparseBox3DEncoder(BaseModule):
def __init__(
self,
embed_dims,
vel_dims=3,
mode="add",
output_fc=True,
in_loops=1,
out_loops=2,
):
super().__init__()
assert mode in ["add", "cat"]
self.embed_dims = embed_dims # [128, 32, 32, 64]
self.vel_dims = vel_dims # 3
self.mode = mode # 'cat'
def embedding_layer(input_dims, output_dims):
return nn.Sequential(
*linear_relu_ln(output_dims, in_loops, out_loops, input_dims)
)
if not isinstance(embed_dims, (list, tuple)):
embed_dims = [embed_dims] * 5
self.pos_fc = embedding_layer(3, embed_dims[0])
self.size_fc = embedding_layer(3, embed_dims[1])
self.yaw_fc = embedding_layer(2, embed_dims[2])
if vel_dims > 0: # 进入
self.vel_fc = embedding_layer(self.vel_dims, embed_dims[3])
if output_fc: # 不进入
self.output_fc = embedding_layer(embed_dims[-1], embed_dims[-1])
else:
self.output_fc = None
③self.sampler(SparseBox3DTarget)
self.sampler = build(sampler, BBOX_SAMPLERS)
class BaseTargetWithDenoising(ABC):
def __init__(self, num_dn_groups=0, num_temp_dn_groups=0):
super(BaseTargetWithDenoising, self).__init__()
self.num_dn_groups = num_dn_groups # 5
self.num_temp_dn_groups = num_temp_dn_groups # 3
self.dn_metas = None
好像是关于box的一些定义的:
@BBOX_SAMPLERS.register_module()
class SparseBox3DTarget(BaseTargetWithDenoising):
def __init__(
self,
cls_weight=2.0,
alpha=0.25,
gamma=2,
eps=1e-12,
box_weight=0.25,
reg_weights=None,
cls_wise_reg_weights=None,
num_dn_groups=0,
dn_noise_scale=0.5,
max_dn_gt=32,
add_neg_dn=True,
num_temp_dn_groups=0,
):
super(SparseBox3DTarget, self).__init__(
num_dn_groups, num_temp_dn_groups
)
self.cls_weight = cls_weight # 2.0
self.box_weight = box_weight # 0.25
self.alpha = alpha # 0.25
self.gamma = gamma # 2
self.eps = eps # 1e-12
self.reg_weights = reg_weights # [2.0, 2.0, 2.0, 0.5, 0.5, 0.5, 0.0, 0.0, 0.0, 0.0]
if self.reg_weights is None:
self.reg_weights = [1.0] * 8 + [0.0] * 2
self.cls_wise_reg_weights = cls_wise_reg_weights # {9: [2.0, 2.0, 2.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0]}
self.dn_noise_scale = dn_noise_scale # [2.0, 2.0, 2.0, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]
self.max_dn_gt = max_dn_gt # 32
self.add_neg_dn = add_neg_dn # True
④self.decoder(SparseBox3DDecoder)
self.decoder = build(decoder, BBOX_CODERS)
应该是关于box解码后处理相关的
@BBOX_CODERS.register_module()
class SparseBox3DDecoder(object):
def __init__(
self,
num_output: int = 300,
score_threshold: Optional[float] = None,
sorted: bool = True,
):
super(SparseBox3DDecoder, self).__init__()
self.num_output = num_output # 300
self.score_threshold = score_threshold # None
self.sorted = sorted # True
⑤self.loss_cls(FocalLoss)
self.loss_cls = build(loss_cls, LOSSES)
⑥self.loss_reg(SparseBox3DLoss)
self.loss_reg = build(loss_reg, LOSSES)
应该是关于怎么算各项损失的:
class SparseBox3DLoss(nn.Module):
def __init__(
self,
loss_box,
loss_centerness=None,
loss_yawness=None,
cls_allow_reverse=None,
):
super().__init__()
def build(cfg, registry):
if cfg is None:
return None
return build_from_cfg(cfg, registry)
self.loss_box = build(loss_box, LOSSES) # L1Loss()
self.loss_cns = build(loss_centerness, LOSSES) # CrossEntropyLoss(avg_non_ignore=False)
self.loss_yns = build(loss_yawness, LOSSES) # GaussianFocalLoss()
self.cls_allow_reverse = cls_allow_reverse # [5]
⑦然后是循环里面的一堆操作:
self.op_config_map = {
"temp_gnn": [temp_graph_model, ATTENTION],
"gnn": [graph_model, ATTENTION],
"norm": [norm_layer, NORM_LAYERS],
"ffn": [ffn, FEEDFORWARD_NETWORK],
"deformable": [deformable_model, ATTENTION],
"refine": [refine_layer, PLUGIN_LAYERS],
}
self.layers = nn.ModuleList(
[
build(*self.op_config_map.get(op, [None, None]))
for op in self.operation_order
]
)
DeformableFeatureAggregation
又出现了一个anchor编码的:
@PLUGIN_LAYERS.register_module()
class SparseBox3DKeyPointsGenerator(BaseModule):
def __init__(
self,
embed_dims=256,
num_learnable_pts=0,
fix_scale=None,
):
super(SparseBox3DKeyPointsGenerator, self).__init__()
self.embed_dims = embed_dims # 256
self.num_learnable_pts = num_learnable_pts # 0
if fix_scale is None: # 进入
fix_scale = ((0.0, 0.0, 0.0),)
self.fix_scale = nn.Parameter(
torch.tensor(fix_scale), requires_grad=False
)
self.num_pts = len(self.fix_scale) + num_learnable_pts # 1
if num_learnable_pts > 0: # 不进入
self.learnable_fc = Linear(self.embed_dims, num_learnable_pts * 3)
self.kps_generator = build_from_cfg(kps_generator, PLUGIN_LAYERS)
self.kps_generator {'type': 'SparseBox3DKeyPointsGenerator', 'num_learnable_pts': 6, 'fix_scale': [[...], [...], [...], [...], [...], [...], [...]], 'embed_dims': 256}
这次fix_scale是7个,代表7个点,6个面和box中心点:
camera嵌入的编码:
if use_camera_embed: # 进入
self.camera_encoder = Sequential(
*linear_relu_ln(embed_dims, 1, 2, 12)
)
self.weights_fc = Linear(
embed_dims, num_groups * num_levels * self.num_pts
)
AsymmetricFFN
refine(SparseBox3DRefinementModule)
基本都是一些特征预测输出:
@PLUGIN_LAYERS.register_module()
class SparseBox3DRefinementModule(BaseModule):
def __init__(
self,
embed_dims=256,
output_dim=11,
num_cls=10,
normalize_yaw=False,
refine_yaw=False,
with_cls_branch=True,
with_quality_estimation=False,
):
super(SparseBox3DRefinementModule, self).__init__()
self.embed_dims = embed_dims # 256
self.output_dim = output_dim # 11
self.num_cls = num_cls # 10
self.normalize_yaw = normalize_yaw # False
self.refine_yaw = refine_yaw # True
self.refine_state = [X, Y, Z, W, L, H]
if self.refine_yaw:
self.refine_state += [SIN_YAW, COS_YAW] # [0, 1, 2, 3, 4, 5, 6, 7]
self.layers = nn.Sequential(
*linear_relu_ln(embed_dims, 2, 2),
Linear(self.embed_dims, self.output_dim),
Scale([1.0] * self.output_dim),
)
self.with_cls_branch = with_cls_branch
if with_cls_branch:
self.cls_layers = nn.Sequential(
*linear_relu_ln(embed_dims, 1, 2),
Linear(self.embed_dims, self.num_cls),
)
self.with_quality_estimation = with_quality_estimation
if with_quality_estimation:
self.quality_layers = nn.Sequential(
*linear_relu_ln(embed_dims, 1, 2),
Linear(self.embed_dims, 2),
)
C.深度头
if depth_branch is not None: # 进入
self.depth_branch = build_from_cfg(depth_branch, PLUGIN_LAYERS)
@PLUGIN_LAYERS.register_module()
class DenseDepthNet(BaseModule):
def __init__(
self,
embed_dims=256,
num_depth_layers=1,
equal_focal=100,
max_depth=60,
loss_weight=1.0,
):
super().__init__()
self.embed_dims = embed_dims # 256
self.equal_focal = equal_focal # 100
self.num_depth_layers = num_depth_layers # 3
self.max_depth = max_depth # 60
self.loss_weight = loss_weight # 0.2
self.depth_layers = nn.ModuleList()
for i in range(num_depth_layers):
self.depth_layers.append(
nn.Conv2d(embed_dims, 1, kernel_size=1, stride=1, padding=0)
)
更多推荐
所有评论(0)