fficientvit-模型结构
fficientvit 模型的文本结构输出
EfficientVit(
(stem): Stem(
(in_conv): ConvNormAct(
(dropout): Dropout(p=0.0, inplace=False)
(conv): Conv2d(3, 8, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
(norm): BatchNorm2d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(act): Hardswish()
)
(res0): ResidualBlock(
(pre_norm): Identity()
(main): DSConv(
(depth_conv): ConvNormAct(
(dropout): Dropout(p=0.0, inplace=False)
(conv): Conv2d(8, 8, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=8, bias=False)
(norm): BatchNorm2d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(act): Hardswish()
)
(point_conv): ConvNormAct(
(dropout): Dropout(p=0.0, inplace=False)
(conv): Conv2d(8, 8, kernel_size=(1, 1), stride=(1, 1), bias=False)
(norm): BatchNorm2d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(act): Identity()
)
)
(shortcut): Identity()
)
)
(stages): Sequential(
(0): EfficientVitStage(
(blocks): Sequential(
(0): ResidualBlock(
(pre_norm): Identity()
(main): MBConv(
(inverted_conv): ConvNormAct(
(dropout): Dropout(p=0.0, inplace=False)
(conv): Conv2d(8, 32, kernel_size=(1, 1), stride=(1, 1), bias=False)
(norm): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(act): Hardswish()
)
(depth_conv): ConvNormAct(
(dropout): Dropout(p=0.0, inplace=False)
(conv): Conv2d(32, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=32, bias=False)
(norm): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(act): Hardswish()
)
(point_conv): ConvNormAct(
(dropout): Dropout(p=0.0, inplace=False)
(conv): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
(norm): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(act): Identity()
)
)
)
(1): ResidualBlock(
(pre_norm): Identity()
(main): MBConv(
(inverted_conv): ConvNormAct(
(dropout): Dropout(p=0.0, inplace=False)
(conv): Conv2d(16, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
(norm): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(act): Hardswish()
)
(depth_conv): ConvNormAct(
(dropout): Dropout(p=0.0, inplace=False)
(conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=64, bias=False)
(norm): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(act): Hardswish()
)
(point_conv): ConvNormAct(
(dropout): Dropout(p=0.0, inplace=False)
(conv): Conv2d(64, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
(norm): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(act): Identity()
)
)
(shortcut): Identity()
)
)
)
(1): EfficientVitStage(
(blocks): Sequential(
(0): ResidualBlock(
(pre_norm): Identity()
(main): MBConv(
(inverted_conv): ConvNormAct(
(dropout): Dropout(p=0.0, inplace=False)
(conv): Conv2d(16, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
(norm): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(act): Hardswish()
)
(depth_conv): ConvNormAct(
(dropout): Dropout(p=0.0, inplace=False)
(conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=64, bias=False)
(norm): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(act): Hardswish()
)
(point_conv): ConvNormAct(
(dropout): Dropout(p=0.0, inplace=False)
(conv): Conv2d(64, 32, kernel_size=(1, 1), stride=(1, 1), bias=False)
(norm): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(act): Identity()
)
)
)
(1): ResidualBlock(
(pre_norm): Identity()
(main): MBConv(
(inverted_conv): ConvNormAct(
(dropout): Dropout(p=0.0, inplace=False)
(conv): Conv2d(32, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
(norm): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(act): Hardswish()
)
(depth_conv): ConvNormAct(
(dropout): Dropout(p=0.0, inplace=False)
(conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=128, bias=False)
(norm): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(act): Hardswish()
)
(point_conv): ConvNormAct(
(dropout): Dropout(p=0.0, inplace=False)
(conv): Conv2d(128, 32, kernel_size=(1, 1), stride=(1, 1), bias=False)
(norm): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(act): Identity()
)
)
(shortcut): Identity()
)
)
)
(2): EfficientVitStage(
(blocks): Sequential(
(0): ResidualBlock(
(pre_norm): Identity()
(main): MBConv(
(inverted_conv): ConvNormAct(
(dropout): Dropout(p=0.0, inplace=False)
(conv): Conv2d(32, 128, kernel_size=(1, 1), stride=(1, 1))
(norm): Identity()
(act): Hardswish()
)
(depth_conv): ConvNormAct(
(dropout): Dropout(p=0.0, inplace=False)
(conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=128)
(norm): Identity()
(act): Hardswish()
)
(point_conv): ConvNormAct(
(dropout): Dropout(p=0.0, inplace=False)
(conv): Conv2d(128, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
(norm): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(act): Identity()
)
)
)
(1): EfficientVitBlock(
(context_module): ResidualBlock(
(pre_norm): Identity()
(main): LiteMLA(
(qkv): ConvNormAct(
(dropout): Dropout(p=0.0, inplace=False)
(conv): Conv2d(64, 192, kernel_size=(1, 1), stride=(1, 1), bias=False)
(norm): Identity()
(act): Identity()
)
(aggreg): ModuleList(
(0): Sequential(
(0): Conv2d(192, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), groups=192, bias=False)
(1): Conv2d(192, 192, kernel_size=(1, 1), stride=(1, 1), groups=12, bias=False)
)
)
(kernel_func): ReLU()
(proj): ConvNormAct(
(dropout): Dropout(p=0.0, inplace=False)
(conv): Conv2d(128, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
(norm): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(act): Identity()
)
)
(shortcut): Identity()
)
(local_module): ResidualBlock(
(pre_norm): Identity()
(main): MBConv(
(inverted_conv): ConvNormAct(
(dropout): Dropout(p=0.0, inplace=False)
(conv): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1))
(norm): Identity()
(act): Hardswish()
)
(depth_conv): ConvNormAct(
(dropout): Dropout(p=0.0, inplace=False)
(conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=256)
(norm): Identity()
(act): Hardswish()
)
(point_conv): ConvNormAct(
(dropout): Dropout(p=0.0, inplace=False)
(conv): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
(norm): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(act): Identity()
)
)
(shortcut): Identity()
)
)
(2): EfficientVitBlock(
(context_module): ResidualBlock(
(pre_norm): Identity()
(main): LiteMLA(
(qkv): ConvNormAct(
(dropout): Dropout(p=0.0, inplace=False)
(conv): Conv2d(64, 192, kernel_size=(1, 1), stride=(1, 1), bias=False)
(norm): Identity()
(act): Identity()
)
(aggreg): ModuleList(
(0): Sequential(
(0): Conv2d(192, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), groups=192, bias=False)
(1): Conv2d(192, 192, kernel_size=(1, 1), stride=(1, 1), groups=12, bias=False)
)
)
(kernel_func): ReLU()
(proj): ConvNormAct(
(dropout): Dropout(p=0.0, inplace=False)
(conv): Conv2d(128, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
(norm): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(act): Identity()
)
)
(shortcut): Identity()
)
(local_module): ResidualBlock(
(pre_norm): Identity()
(main): MBConv(
(inverted_conv): ConvNormAct(
(dropout): Dropout(p=0.0, inplace=False)
(conv): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1))
(norm): Identity()
(act): Hardswish()
)
(depth_conv): ConvNormAct(
(dropout): Dropout(p=0.0, inplace=False)
(conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=256)
(norm): Identity()
(act): Hardswish()
)
(point_conv): ConvNormAct(
(dropout): Dropout(p=0.0, inplace=False)
(conv): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
(norm): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(act): Identity()
)
)
(shortcut): Identity()
)
)
)
)
(3): EfficientVitStage(
(blocks): Sequential(
(0): ResidualBlock(
(pre_norm): Identity()
(main): MBConv(
(inverted_conv): ConvNormAct(
(dropout): Dropout(p=0.0, inplace=False)
(conv): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1))
(norm): Identity()
(act): Hardswish()
)
(depth_conv): ConvNormAct(
(dropout): Dropout(p=0.0, inplace=False)
(conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=256)
(norm): Identity()
(act): Hardswish()
)
(point_conv): ConvNormAct(
(dropout): Dropout(p=0.0, inplace=False)
(conv): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
(norm): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(act): Identity()
)
)
)
(1): EfficientVitBlock(
(context_module): ResidualBlock(
(pre_norm): Identity()
(main): LiteMLA(
(qkv): ConvNormAct(
(dropout): Dropout(p=0.0, inplace=False)
(conv): Conv2d(128, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
(norm): Identity()
(act): Identity()
)
(aggreg): ModuleList(
(0): Sequential(
(0): Conv2d(384, 384, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), groups=384, bias=False)
(1): Conv2d(384, 384, kernel_size=(1, 1), stride=(1, 1), groups=24, bias=False)
)
)
(kernel_func): ReLU()
(proj): ConvNormAct(
(dropout): Dropout(p=0.0, inplace=False)
(conv): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
(norm): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(act): Identity()
)
)
(shortcut): Identity()
)
(local_module): ResidualBlock(
(pre_norm): Identity()
(main): MBConv(
(inverted_conv): ConvNormAct(
(dropout): Dropout(p=0.0, inplace=False)
(conv): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1))
(norm): Identity()
(act): Hardswish()
)
(depth_conv): ConvNormAct(
(dropout): Dropout(p=0.0, inplace=False)
(conv): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=512)
(norm): Identity()
(act): Hardswish()
)
(point_conv): ConvNormAct(
(dropout): Dropout(p=0.0, inplace=False)
(conv): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
(norm): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(act): Identity()
)
)
(shortcut): Identity()
)
)
(2): EfficientVitBlock(
(context_module): ResidualBlock(
(pre_norm): Identity()
(main): LiteMLA(
(qkv): ConvNormAct(
(dropout): Dropout(p=0.0, inplace=False)
(conv): Conv2d(128, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
(norm): Identity()
(act): Identity()
)
(aggreg): ModuleList(
(0): Sequential(
(0): Conv2d(384, 384, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), groups=384, bias=False)
(1): Conv2d(384, 384, kernel_size=(1, 1), stride=(1, 1), groups=24, bias=False)
)
)
(kernel_func): ReLU()
(proj): ConvNormAct(
(dropout): Dropout(p=0.0, inplace=False)
(conv): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
(norm): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(act): Identity()
)
)
(shortcut): Identity()
)
(local_module): ResidualBlock(
(pre_norm): Identity()
(main): MBConv(
(inverted_conv): ConvNormAct(
(dropout): Dropout(p=0.0, inplace=False)
(conv): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1))
(norm): Identity()
(act): Hardswish()
)
(depth_conv): ConvNormAct(
(dropout): Dropout(p=0.0, inplace=False)
(conv): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=512)
(norm): Identity()
(act): Hardswish()
)
(point_conv): ConvNormAct(
(dropout): Dropout(p=0.0, inplace=False)
(conv): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
(norm): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(act): Identity()
)
)
(shortcut): Identity()
)
)
)
)
)
(head): ClassifierHead(
(in_conv): ConvNormAct(
(dropout): Dropout(p=0.0, inplace=False)
(conv): Conv2d(128, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
(norm): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(act): Hardswish()
)
(global_pool): SelectAdaptivePool2d(pool_type=avg, flatten=Flatten(start_dim=1, end_dim=-1))
(classifier): Sequential(
(0): Linear(in_features=1024, out_features=1280, bias=False)
(1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
(2): Hardswish()
(3): Dropout(p=0.0, inplace=False)
(4): Linear(in_features=1280, out_features=1000, bias=True)
)
)
)

评论