手把手教你用PyTorch复现PointGPT:从点块排序到双重掩码的完整实现指南
用PyTorch从零构建PointGPT深入解析点云自回归预训练技术在3D视觉领域点云数据因其直接反映物体空间结构的特性而备受关注。然而点云的无序性和稀疏性给深度学习模型的设计带来了独特挑战。本文将带您深入探索PointGPT这一创新架构通过PyTorch代码实现从点块处理到双重掩码Transformer的完整流程。1. 点云序列化模块实现点云序列化是PointGPT处理原始数据的第一步其核心是将无序的点云转化为有序的序列。我们首先实现点块划分和排序的关键组件。1.1 点块划分与Morton排序点块划分采用最远点采样(FPS)和K近邻(KNN)的组合策略。以下是PyTorch实现代码import torch import torch.nn.functional as F from torch_cluster import knn def fps(points, n_samples): # 最远点采样实现 device points.device B, N, _ points.shape centroids torch.zeros(B, n_samples, dtypetorch.long, devicedevice) distance torch.ones(B, N, devicedevice) * 1e10 farthest torch.randint(0, N, (B,), dtypetorch.long, devicedevice) batch_indices torch.arange(B, devicedevice) for i in range(n_samples): centroids[:, i] farthest centroid points[batch_indices, farthest, :].view(B, 1, 3) dist torch.sum((points - centroid) ** 2, -1) mask dist distance distance[mask] dist[mask] farthest torch.max(distance, -1)[1] return centroids def point_patch_partition(points, n_patches, k_neighbors): # 点块划分完整实现 centroids fps(points, n_patches) centroids_points points.gather(1, centroids.unsqueeze(-1).expand(-1, -1, 3)) # KNN搜索 idx knn(points.view(-1, 3), centroids_points.view(-1, 3), k_neighbors) idx idx[1].view(points.size(0), n_patches, k_neighbors) # 组织点块 patches points.gather(1, idx.unsqueeze(-1).expand(-1, -1, -1, 3)) return patches, centroids_pointsMorton排序将3D空间中的点映射到1维Z形曲线def morton_sort(points): # 将坐标转换为二进制并交错位 coords (points * 1024).int() coords torch.clamp(coords, 0, 1023) x coords[..., 0] y coords[..., 1] z coords[..., 2] # Morton编码计算 morton_code torch.zeros_like(x) for i in range(10): morton_code | ((x (1 i)) (2*i)) | \ ((y (1 i)) (2*i1)) | \ ((z (1 i)) (2*i2)) # 获取排序索引 sorted_indices torch.argsort(morton_code, dim-1) return sorted_indices1.2 点块嵌入与归一化使用轻量级PointNet提取点块特征class PointNetEmbedding(nn.Module): def __init__(self, embed_dim256): super().__init__() self.mlp nn.Sequential( nn.Conv1d(3, 64, 1), nn.BatchNorm1d(64), nn.ReLU(), nn.Conv1d(64, 128, 1), nn.BatchNorm1d(128), nn.ReLU(), nn.Conv1d(128, embed_dim, 1), nn.BatchNorm1d(embed_dim), nn.ReLU() ) def forward(self, patches): # 归一化到局部坐标系 centroids patches.mean(dim2, keepdimTrue) normalized patches - centroids # 特征提取 B, N, K, _ normalized.shape patches normalized.permute(0, 1, 3, 2).reshape(B*N, 3, K) features self.mlp(patches).max(dim-1)[0] return features.view(B, N, -1), centroids.squeeze(2)2. 双重掩码Transformer架构PointGPT的核心创新在于其独特的双重掩码策略和提取器-生成器架构设计。2.1 基础Transformer解码器实现首先构建基础的Transformer解码器块class TransformerDecoderLayer(nn.Module): def __init__(self, d_model, nhead, dim_feedforward2048, dropout0.1): super().__init__() self.self_attn nn.MultiheadAttention(d_model, nhead, dropoutdropout) self.linear1 nn.Linear(d_model, dim_feedforward) self.dropout nn.Dropout(dropout) self.linear2 nn.Linear(dim_feedforward, d_model) self.norm1 nn.LayerNorm(d_model) self.norm2 nn.LayerNorm(d_model) self.dropout1 nn.Dropout(dropout) self.dropout2 nn.Dropout(dropout) def forward(self, src, maskNone, key_padding_maskNone): # 自注意力层 src2 self.self_attn(src, src, src, attn_maskmask, key_padding_maskkey_padding_mask)[0] src src self.dropout1(src2) src self.norm1(src) # 前馈网络 src2 self.linear2(self.dropout(F.relu(self.linear1(src)))) src src self.dropout2(src2) src self.norm2(src) return src2.2 双重掩码策略实现双重掩码包含标准因果掩码和随机组掩码def create_dual_masks(seq_len, group_size4, mask_ratio0.3): # 标准因果掩码 causal_mask torch.triu(torch.ones(seq_len, seq_len), diagonal1).bool() # 随机组掩码 group_mask torch.zeros(seq_len, seq_len) groups torch.arange(seq_len) // group_size for i in range(seq_len): same_group (groups groups[i]) to_mask torch.randperm(int(same_group.sum()))[:int(mask_ratio*same_group.sum())] mask_indices torch.where(same_group)[0][to_mask] group_mask[i, mask_indices] 1 # 组合两种掩码 combined_mask causal_mask | group_mask.bool() return combined_mask.to(device)2.3 提取器-生成器架构完整实现提取器和生成器的级联结构class PointGPT(nn.Module): def __init__(self, n_layers6, n_heads8, d_model256, generator_layers2, dropout0.1): super().__init__() # 提取器完整Transformer self.extractor nn.ModuleList([ TransformerDecoderLayer(d_model, n_heads, d_model*4, dropout) for _ in range(n_layers) ]) # 生成器浅层Transformer self.generator nn.ModuleList([ TransformerDecoderLayer(d_model, n_heads, d_model*4, dropout) for _ in range(generator_layers) ]) # 位置编码 self.pos_encoder PositionalEncoding(d_model, dropout) # 预测头 self.pred_head nn.Sequential( nn.Linear(d_model, d_model*2), nn.ReLU(), nn.Linear(d_model*2, 3) # 预测点坐标 ) def forward(self, x, centroids): # 位置编码 x self.pos_encoder(x) # 双重掩码 mask create_dual_masks(x.size(0)) # 提取器处理 for layer in self.extractor: x layer(x, mask) # 生成器处理 tg x.clone() for layer in self.generator: tg layer(tg, mask) # 预测点块 pred_points self.pred_head(tg) # 添加方向提示 direction centroids[1:] - centroids[:-1] direction F.pad(direction, (0,0,0,1)) pred_points pred_points direction.unsqueeze(1) return x, pred_points3. 训练策略与损失函数PointGPT采用自回归预测和Chamfer距离损失进行预训练。3.1 Chamfer距离实现实现L1和L2两种形式的Chamfer距离def chamfer_distance(pred, target, l2False): # 计算两组点云之间的最近邻距离 dist torch.cdist(pred, target) min_dist1, _ dist.min(dim2) min_dist2, _ dist.min(dim1) if l2: loss (min_dist1.mean(dim1) min_dist2.mean(dim1)) / 2 else: loss (min_dist1.sum(dim1) min_dist2.sum(dim1)) / 2 return loss.mean()3.2 自回归训练流程实现点块的自回归预测训练循环def train_step(model, optimizer, points, n_patches32, k_neighbors32): model.train() # 点块划分和排序 patches, centroids point_patch_partition(points, n_patches, k_neighbors) sorted_idx morton_sort(centroids) patches patches.gather(1, sorted_idx.unsqueeze(-1).unsqueeze(-1).expand(-1,-1,k_neighbors,3)) centroids centroids.gather(1, sorted_idx.unsqueeze(-1).expand(-1,-1,3)) # 点块嵌入 embeddings, _ PointNetEmbedding()(patches) # 自回归预测 total_loss 0 for i in range(1, n_patches): # 输入序列 input_seq embeddings[:, :i] current_centroids centroids[:, :i] # 模型预测 _, pred_points model(input_seq, current_centroids) # 计算损失 target_patch patches[:, i] loss chamfer_distance(pred_points[:, -1], target_patch) loss chamfer_distance(pred_points[:, -1], target_patch, l2True) # 反向传播 optimizer.zero_grad() loss.backward() optimizer.step() total_loss loss.item() return total_loss / n_patches4. 模型优化与扩展实践在实际应用中我们还需要考虑以下关键优化点4.1 高效训练技巧梯度累积在显存有限时通过多次前向传播累积梯度再更新参数混合精度训练使用AMP(自动混合精度)加速训练过程学习率预热逐步提高学习率避免早期训练不稳定from torch.cuda.amp import autocast, GradScaler def train_with_amp(model, train_loader, epochs100): scaler GradScaler() optimizer torch.optim.AdamW(model.parameters(), lr1e-4) scheduler get_linear_schedule_with_warmup(optimizer, 1000, len(train_loader)*epochs) for epoch in range(epochs): for batch in train_loader: with autocast(): loss train_step(model, optimizer, batch) scaler.scale(loss).backward() scaler.step(optimizer) scaler.update() scheduler.step()4.2 下游任务迁移预训练完成后提取器可用于各类下游任务class DownstreamClassifier(nn.Module): def __init__(self, extractor, num_classes): super().__init__() self.extractor extractor self.classifier nn.Sequential( nn.Linear(256, 128), nn.ReLU(), nn.Linear(128, num_classes) ) def forward(self, points): # 提取全局特征 patches, _ point_patch_partition(points, 32, 32) embeddings, _ PointNetEmbedding()(patches) # 通过提取器 features embeddings for layer in self.extractor.extractor: features layer(features) # 全局平均池化 global_feature features.mean(dim1) return self.classifier(global_feature)在实际项目中PointGPT的预训练表示可以显著提升小样本学习场景下的性能。例如在ModelNet40分类任务中仅用10%标注数据就能达到85%以上的准确率这验证了自回归预训练在点云理解中的有效性。
本文来自互联网用户投稿,该文观点仅代表作者本人,不代表本站立场。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如若转载,请注明出处:http://www.coloradmin.cn/o/2557101.html
如若内容造成侵权/违法违规/事实不符,请联系多彩编程网进行投诉反馈,一经查实,立即删除!