====== efficientvit-模型结构 ====== ===== efficientvit 模型的文本结构输出 ===== baseline版本: EfficientVit(   (stem): Stem(     (in_conv): ConvNormAct(       (dropout): Dropout(p=0.0, inplace=False)       (conv): Conv2d(3, 8, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)       (norm): BatchNorm2d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)       (act): Hardswish()     )     (res0): ResidualBlock(       (pre_norm): Identity()       (main): DSConv(         (depth_conv): ConvNormAct(           (dropout): Dropout(p=0.0, inplace=False)           (conv): Conv2d(8, 8, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=8, bias=False)           (norm): BatchNorm2d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)           (act): Hardswish()         )         (point_conv): ConvNormAct(           (dropout): Dropout(p=0.0, inplace=False)           (conv): Conv2d(8, 8, kernel_size=(1, 1), stride=(1, 1), bias=False)           (norm): BatchNorm2d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)           (act): Identity()         )       )       (shortcut): Identity()     )   )   (stages): Sequential(     (0): EfficientVitStage(       (blocks): Sequential(         (0): ResidualBlock(           (pre_norm): Identity()           (main): MBConv(             (inverted_conv): ConvNormAct(               (dropout): Dropout(p=0.0, inplace=False)               (conv): Conv2d(8, 32, kernel_size=(1, 1), stride=(1, 1), bias=False)               (norm): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)               (act): Hardswish()             )             (depth_conv): ConvNormAct(               (dropout): Dropout(p=0.0, inplace=False)               (conv): Conv2d(32, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=32, bias=False)               (norm): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)               (act): Hardswish()             )             (point_conv): ConvNormAct(               (dropout): Dropout(p=0.0, inplace=False)               (conv): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)               (norm): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)               (act): Identity()             )           )         )         (1): ResidualBlock(           (pre_norm): Identity()           (main): MBConv(             (inverted_conv): ConvNormAct(               (dropout): Dropout(p=0.0, inplace=False)               (conv): Conv2d(16, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)               (norm): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)               (act): Hardswish()             )             (depth_conv): ConvNormAct(               (dropout): Dropout(p=0.0, inplace=False)               (conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=64, bias=False)               (norm): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)               (act): Hardswish()             )             (point_conv): ConvNormAct(               (dropout): Dropout(p=0.0, inplace=False)               (conv): Conv2d(64, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)               (norm): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)               (act): Identity()             )           )           (shortcut): Identity()         )       )     )     (1): EfficientVitStage(       (blocks): Sequential(         (0): ResidualBlock(           (pre_norm): Identity()           (main): MBConv(             (inverted_conv): ConvNormAct(               (dropout): Dropout(p=0.0, inplace=False)               (conv): Conv2d(16, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)               (norm): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)               (act): Hardswish()             )             (depth_conv): ConvNormAct(               (dropout): Dropout(p=0.0, inplace=False)               (conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=64, bias=False)               (norm): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)               (act): Hardswish()             )             (point_conv): ConvNormAct(               (dropout): Dropout(p=0.0, inplace=False)               (conv): Conv2d(64, 32, kernel_size=(1, 1), stride=(1, 1), bias=False)               (norm): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)               (act): Identity()             )           )         )         (1): ResidualBlock(           (pre_norm): Identity()           (main): MBConv(             (inverted_conv): ConvNormAct(               (dropout): Dropout(p=0.0, inplace=False)               (conv): Conv2d(32, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)               (norm): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)               (act): Hardswish()             )             (depth_conv): ConvNormAct(               (dropout): Dropout(p=0.0, inplace=False)               (conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=128, bias=False)               (norm): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)               (act): Hardswish()             )             (point_conv): ConvNormAct(               (dropout): Dropout(p=0.0, inplace=False)               (conv): Conv2d(128, 32, kernel_size=(1, 1), stride=(1, 1), bias=False)               (norm): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)               (act): Identity()             )           )           (shortcut): Identity()         )       )     )     (2): EfficientVitStage(       (blocks): Sequential(         (0): ResidualBlock(           (pre_norm): Identity()           (main): MBConv(             (inverted_conv): ConvNormAct(               (dropout): Dropout(p=0.0, inplace=False)               (conv): Conv2d(32, 128, kernel_size=(1, 1), stride=(1, 1))               (norm): Identity()               (act): Hardswish()             )             (depth_conv): ConvNormAct(               (dropout): Dropout(p=0.0, inplace=False)               (conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=128)               (norm): Identity()               (act): Hardswish()             )             (point_conv): ConvNormAct(               (dropout): Dropout(p=0.0, inplace=False)               (conv): Conv2d(128, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)               (norm): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)               (act): Identity()             )           )         )         (1): EfficientVitBlock(           (context_module): ResidualBlock(             (pre_norm): Identity()             (main): LiteMLA(               (qkv): ConvNormAct(                 (dropout): Dropout(p=0.0, inplace=False)                 (conv): Conv2d(64, 192, kernel_size=(1, 1), stride=(1, 1), bias=False)                 (norm): Identity()                 (act): Identity()               )               (aggreg): ModuleList(                 (0): Sequential(                   (0): Conv2d(192, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), groups=192, bias=False)                   (1): Conv2d(192, 192, kernel_size=(1, 1), stride=(1, 1), groups=12, bias=False)                 )               )               (kernel_func): ReLU()               (proj): ConvNormAct(                 (dropout): Dropout(p=0.0, inplace=False)                 (conv): Conv2d(128, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)                 (norm): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)                 (act): Identity()               )             )             (shortcut): Identity()           )           (local_module): ResidualBlock(             (pre_norm): Identity()             (main): MBConv(               (inverted_conv): ConvNormAct(                 (dropout): Dropout(p=0.0, inplace=False)                 (conv): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1))                 (norm): Identity()                 (act): Hardswish()               )               (depth_conv): ConvNormAct(                 (dropout): Dropout(p=0.0, inplace=False)                 (conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=256)                 (norm): Identity()                 (act): Hardswish()               )               (point_conv): ConvNormAct(                 (dropout): Dropout(p=0.0, inplace=False)                 (conv): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)                 (norm): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)                 (act): Identity()               )             )             (shortcut): Identity()           )         )         (2): EfficientVitBlock(           (context_module): ResidualBlock(             (pre_norm): Identity()             (main): LiteMLA(               (qkv): ConvNormAct(                 (dropout): Dropout(p=0.0, inplace=False)                 (conv): Conv2d(64, 192, kernel_size=(1, 1), stride=(1, 1), bias=False)                 (norm): Identity()                 (act): Identity()               )               (aggreg): ModuleList(                 (0): Sequential(                   (0): Conv2d(192, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), groups=192, bias=False)                   (1): Conv2d(192, 192, kernel_size=(1, 1), stride=(1, 1), groups=12, bias=False)                 )               )               (kernel_func): ReLU()               (proj): ConvNormAct(                 (dropout): Dropout(p=0.0, inplace=False)                 (conv): Conv2d(128, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)                 (norm): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)                 (act): Identity()               )             )             (shortcut): Identity()           )           (local_module): ResidualBlock(             (pre_norm): Identity()             (main): MBConv(               (inverted_conv): ConvNormAct(                 (dropout): Dropout(p=0.0, inplace=False)                 (conv): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1))                 (norm): Identity()                 (act): Hardswish()               )               (depth_conv): ConvNormAct(                 (dropout): Dropout(p=0.0, inplace=False)                 (conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=256)                 (norm): Identity()                 (act): Hardswish()               )               (point_conv): ConvNormAct(                 (dropout): Dropout(p=0.0, inplace=False)                 (conv): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)                 (norm): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)                 (act): Identity()               )             )             (shortcut): Identity()           )         )       )     )     (3): EfficientVitStage(       (blocks): Sequential(         (0): ResidualBlock(           (pre_norm): Identity()           (main): MBConv(             (inverted_conv): ConvNormAct(               (dropout): Dropout(p=0.0, inplace=False)               (conv): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1))               (norm): Identity()               (act): Hardswish()             )             (depth_conv): ConvNormAct(               (dropout): Dropout(p=0.0, inplace=False)               (conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=256)               (norm): Identity()               (act): Hardswish()             )             (point_conv): ConvNormAct(               (dropout): Dropout(p=0.0, inplace=False)               (conv): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)               (norm): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)               (act): Identity()             )           )         )         (1): EfficientVitBlock(           (context_module): ResidualBlock(             (pre_norm): Identity()             (main): LiteMLA(               (qkv): ConvNormAct(                 (dropout): Dropout(p=0.0, inplace=False)                 (conv): Conv2d(128, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)                 (norm): Identity()                 (act): Identity()               )               (aggreg): ModuleList(                 (0): Sequential(                   (0): Conv2d(384, 384, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), groups=384, bias=False)                   (1): Conv2d(384, 384, kernel_size=(1, 1), stride=(1, 1), groups=24, bias=False)                 )               )               (kernel_func): ReLU()               (proj): ConvNormAct(                 (dropout): Dropout(p=0.0, inplace=False)                 (conv): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)                 (norm): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)                 (act): Identity()               )             )             (shortcut): Identity()           )           (local_module): ResidualBlock(             (pre_norm): Identity()             (main): MBConv(               (inverted_conv): ConvNormAct(                 (dropout): Dropout(p=0.0, inplace=False)                 (conv): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1))                 (norm): Identity()                 (act): Hardswish()               )               (depth_conv): ConvNormAct(                 (dropout): Dropout(p=0.0, inplace=False)                 (conv): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=512)                 (norm): Identity()                 (act): Hardswish()               )               (point_conv): ConvNormAct(                 (dropout): Dropout(p=0.0, inplace=False)                 (conv): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)                 (norm): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)                 (act): Identity()               )             )             (shortcut): Identity()           )         )         (2): EfficientVitBlock(           (context_module): ResidualBlock(             (pre_norm): Identity()             (main): LiteMLA(               (qkv): ConvNormAct(                 (dropout): Dropout(p=0.0, inplace=False)                 (conv): Conv2d(128, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)                 (norm): Identity()                 (act): Identity()               )               (aggreg): ModuleList(                 (0): Sequential(                   (0): Conv2d(384, 384, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), groups=384, bias=False)                   (1): Conv2d(384, 384, kernel_size=(1, 1), stride=(1, 1), groups=24, bias=False)                 )               )               (kernel_func): ReLU()               (proj): ConvNormAct(                 (dropout): Dropout(p=0.0, inplace=False)                 (conv): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)                 (norm): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)                 (act): Identity()               )             )             (shortcut): Identity()           )           (local_module): ResidualBlock(             (pre_norm): Identity()             (main): MBConv(               (inverted_conv): ConvNormAct(                 (dropout): Dropout(p=0.0, inplace=False)                 (conv): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1))                 (norm): Identity()                 (act): Hardswish()               )               (depth_conv): ConvNormAct(                 (dropout): Dropout(p=0.0, inplace=False)                 (conv): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=512)                 (norm): Identity()                 (act): Hardswish()               )               (point_conv): ConvNormAct(                 (dropout): Dropout(p=0.0, inplace=False)                 (conv): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)                 (norm): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)                 (act): Identity()               )             )             (shortcut): Identity()           )         )       )     )   )   (head): ClassifierHead(     (in_conv): ConvNormAct(       (dropout): Dropout(p=0.0, inplace=False)       (conv): Conv2d(128, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)       (norm): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)       (act): Hardswish()     )     (global_pool): SelectAdaptivePool2d(pool_type=avg, flatten=Flatten(start_dim=1, end_dim=-1))     (classifier): Sequential(       (0): Linear(in_features=1024, out_features=1280, bias=False)       (1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)       (2): Hardswish()       (3): Dropout(p=0.0, inplace=False)       (4): Linear(in_features=1280, out_features=1000, bias=True)     )   ) ) large版本模型 EfficientVitLarge( (stem): Stem( (in_conv): ConvNormAct( (dropout): Dropout(p=0.0, inplace=False) (conv): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False) (norm): BatchNorm2d(32, eps=1e-07, momentum=0.1, affine=True, track_running_stats=True) (act): GELUTanh() ) (res0): ResidualBlock( (pre_norm): Identity() (main): ConvBlock( (conv1): ConvNormAct( (dropout): Dropout(p=0.0, inplace=False) (conv): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (norm): BatchNorm2d(32, eps=1e-07, momentum=0.1, affine=True, track_running_stats=True) (act): GELUTanh() ) (conv2): ConvNormAct( (dropout): Dropout(p=0.0, inplace=False) (conv): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (norm): BatchNorm2d(32, eps=1e-07, momentum=0.1, affine=True, track_running_stats=True) (act): Identity() ) ) (shortcut): Identity() ) ) (stages): Sequential( (0): EfficientVitLargeStage( (blocks): Sequential( (0): ResidualBlock( (pre_norm): Identity() (main): FusedMBConv( (spatial_conv): ConvNormAct( (dropout): Dropout(p=0.0, inplace=False) (conv): Conv2d(32, 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False) (norm): BatchNorm2d(512, eps=1e-07, momentum=0.1, affine=True, track_running_stats=True) (act): GELUTanh() ) (point_conv): ConvNormAct( (dropout): Dropout(p=0.0, inplace=False) (conv): Conv2d(512, 64, kernel_size=(1, 1), stride=(1, 1), bias=False) (norm): BatchNorm2d(64, eps=1e-07, momentum=0.1, affine=True, track_running_stats=True) (act): Identity() ) ) ) (1): ResidualBlock( (pre_norm): Identity() (main): FusedMBConv( (spatial_conv): ConvNormAct( (dropout): Dropout(p=0.0, inplace=False) (conv): Conv2d(64, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (norm): BatchNorm2d(256, eps=1e-07, momentum=0.1, affine=True, track_running_stats=True) (act): GELUTanh() ) (point_conv): ConvNormAct( (dropout): Dropout(p=0.0, inplace=False) (conv): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False) (norm): BatchNorm2d(64, eps=1e-07, momentum=0.1, affine=True, track_running_stats=True) (act): Identity() ) ) (shortcut): Identity() ) ) ) (1): EfficientVitLargeStage( (blocks): Sequential( (0): ResidualBlock( (pre_norm): Identity() (main): FusedMBConv( (spatial_conv): ConvNormAct( (dropout): Dropout(p=0.0, inplace=False) (conv): Conv2d(64, 1024, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False) (norm): BatchNorm2d(1024, eps=1e-07, momentum=0.1, affine=True, track_running_stats=True) (act): GELUTanh() ) (point_conv): ConvNormAct( (dropout): Dropout(p=0.0, inplace=False) (conv): Conv2d(1024, 128, kernel_size=(1, 1), stride=(1, 1), bias=False) (norm): BatchNorm2d(128, eps=1e-07, momentum=0.1, affine=True, track_running_stats=True) (act): Identity() ) ) ) (1): ResidualBlock( (pre_norm): Identity() (main): FusedMBConv( (spatial_conv): ConvNormAct( (dropout): Dropout(p=0.0, inplace=False) (conv): Conv2d(128, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (norm): BatchNorm2d(512, eps=1e-07, momentum=0.1, affine=True, track_running_stats=True) (act): GELUTanh() ) (point_conv): ConvNormAct( (dropout): Dropout(p=0.0, inplace=False) (conv): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False) (norm): BatchNorm2d(128, eps=1e-07, momentum=0.1, affine=True, track_running_stats=True) (act): Identity() ) ) (shortcut): Identity() ) ) ) (2): EfficientVitLargeStage( (blocks): Sequential( (0): ResidualBlock( (pre_norm): Identity() (main): MBConv( (inverted_conv): ConvNormAct( (dropout): Dropout(p=0.0, inplace=False) (conv): Conv2d(128, 2048, kernel_size=(1, 1), stride=(1, 1)) (norm): Identity() (act): GELUTanh() ) (depth_conv): ConvNormAct( (dropout): Dropout(p=0.0, inplace=False) (conv): Conv2d(2048, 2048, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=2048) (norm): Identity() (act): GELUTanh() ) (point_conv): ConvNormAct( (dropout): Dropout(p=0.0, inplace=False) (conv): Conv2d(2048, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) (norm): BatchNorm2d(256, eps=1e-07, momentum=0.1, affine=True, track_running_stats=True) (act): Identity() ) ) ) (1): ResidualBlock( (pre_norm): Identity() (main): MBConv( (inverted_conv): ConvNormAct( (dropout): Dropout(p=0.0, inplace=False) (conv): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1)) (norm): Identity() (act): GELUTanh() ) (depth_conv): ConvNormAct( (dropout): Dropout(p=0.0, inplace=False) (conv): Conv2d(1024, 1024, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=1024) (norm): Identity() (act): GELUTanh() ) (point_conv): ConvNormAct( (dropout): Dropout(p=0.0, inplace=False) (conv): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) (norm): BatchNorm2d(256, eps=1e-07, momentum=0.1, affine=True, track_running_stats=True) (act): Identity() ) ) (shortcut): Identity() ) (2): ResidualBlock( (pre_norm): Identity() (main): MBConv( (inverted_conv): ConvNormAct( (dropout): Dropout(p=0.0, inplace=False) (conv): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1)) (norm): Identity() (act): GELUTanh() ) (depth_conv): ConvNormAct( (dropout): Dropout(p=0.0, inplace=False) (conv): Conv2d(1024, 1024, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=1024) (norm): Identity() (act): GELUTanh() ) (point_conv): ConvNormAct( (dropout): Dropout(p=0.0, inplace=False) (conv): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) (norm): BatchNorm2d(256, eps=1e-07, momentum=0.1, affine=True, track_running_stats=True) (act): Identity() ) ) (shortcut): Identity() ) (3): ResidualBlock( (pre_norm): Identity() (main): MBConv( (inverted_conv): ConvNormAct( (dropout): Dropout(p=0.0, inplace=False) (conv): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1)) (norm): Identity() (act): GELUTanh() ) (depth_conv): ConvNormAct( (dropout): Dropout(p=0.0, inplace=False) (conv): Conv2d(1024, 1024, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=1024) (norm): Identity() (act): GELUTanh() ) (point_conv): ConvNormAct( (dropout): Dropout(p=0.0, inplace=False) (conv): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) (norm): BatchNorm2d(256, eps=1e-07, momentum=0.1, affine=True, track_running_stats=True) (act): Identity() ) ) (shortcut): Identity() ) (4): ResidualBlock( (pre_norm): Identity() (main): MBConv( (inverted_conv): ConvNormAct( (dropout): Dropout(p=0.0, inplace=False) (conv): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1)) (norm): Identity() (act): GELUTanh() ) (depth_conv): ConvNormAct( (dropout): Dropout(p=0.0, inplace=False) (conv): Conv2d(1024, 1024, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=1024) (norm): Identity() (act): GELUTanh() ) (point_conv): ConvNormAct( (dropout): Dropout(p=0.0, inplace=False) (conv): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) (norm): BatchNorm2d(256, eps=1e-07, momentum=0.1, affine=True, track_running_stats=True) (act): Identity() ) ) (shortcut): Identity() ) (5): ResidualBlock( (pre_norm): Identity() (main): MBConv( (inverted_conv): ConvNormAct( (dropout): Dropout(p=0.0, inplace=False) (conv): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1)) (norm): Identity() (act): GELUTanh() ) (depth_conv): ConvNormAct( (dropout): Dropout(p=0.0, inplace=False) (conv): Conv2d(1024, 1024, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=1024) (norm): Identity() (act): GELUTanh() ) (point_conv): ConvNormAct( (dropout): Dropout(p=0.0, inplace=False) (conv): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) (norm): BatchNorm2d(256, eps=1e-07, momentum=0.1, affine=True, track_running_stats=True) (act): Identity() ) ) (shortcut): Identity() ) (6): ResidualBlock( (pre_norm): Identity() (main): MBConv( (inverted_conv): ConvNormAct( (dropout): Dropout(p=0.0, inplace=False) (conv): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1)) (norm): Identity() (act): GELUTanh() ) (depth_conv): ConvNormAct( (dropout): Dropout(p=0.0, inplace=False) (conv): Conv2d(1024, 1024, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=1024) (norm): Identity() (act): GELUTanh() ) (point_conv): ConvNormAct( (dropout): Dropout(p=0.0, inplace=False) (conv): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) (norm): BatchNorm2d(256, eps=1e-07, momentum=0.1, affine=True, track_running_stats=True) (act): Identity() ) ) (shortcut): Identity() ) ) ) (3): EfficientVitLargeStage( (blocks): Sequential( (0): ResidualBlock( (pre_norm): Identity() (main): MBConv( (inverted_conv): ConvNormAct( (dropout): Dropout(p=0.0, inplace=False) (conv): Conv2d(256, 6144, kernel_size=(1, 1), stride=(1, 1)) (norm): Identity() (act): GELUTanh() ) (depth_conv): ConvNormAct( (dropout): Dropout(p=0.0, inplace=False) (conv): Conv2d(6144, 6144, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=6144) (norm): Identity() (act): GELUTanh() ) (point_conv): ConvNormAct( (dropout): Dropout(p=0.0, inplace=False) (conv): Conv2d(6144, 512, kernel_size=(1, 1), stride=(1, 1), bias=False) (norm): BatchNorm2d(512, eps=1e-07, momentum=0.1, affine=True, track_running_stats=True) (act): Identity() ) ) ) (1): EfficientVitBlock( (context_module): ResidualBlock( (pre_norm): Identity() (main): LiteMLA( (qkv): ConvNormAct( (dropout): Dropout(p=0.0, inplace=False) (conv): Conv2d(512, 1536, kernel_size=(1, 1), stride=(1, 1), bias=False) (norm): Identity() (act): Identity() ) (aggreg): ModuleList( (0): Sequential( (0): Conv2d(1536, 1536, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), groups=1536, bias=False) (1): Conv2d(1536, 1536, kernel_size=(1, 1), stride=(1, 1), groups=48, bias=False) ) ) (kernel_func): ReLU() (proj): ConvNormAct( (dropout): Dropout(p=0.0, inplace=False) (conv): Conv2d(1024, 512, kernel_size=(1, 1), stride=(1, 1), bias=False) (norm): BatchNorm2d(512, eps=1e-07, momentum=0.1, affine=True, track_running_stats=True) (act): Identity() ) ) (shortcut): Identity() ) (local_module): ResidualBlock( (pre_norm): Identity() (main): MBConv( (inverted_conv): ConvNormAct( (dropout): Dropout(p=0.0, inplace=False) (conv): Conv2d(512, 3072, kernel_size=(1, 1), stride=(1, 1)) (norm): Identity() (act): GELUTanh() ) (depth_conv): ConvNormAct( (dropout): Dropout(p=0.0, inplace=False) (conv): Conv2d(3072, 3072, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=3072) (norm): Identity() (act): GELUTanh() ) (point_conv): ConvNormAct( (dropout): Dropout(p=0.0, inplace=False) (conv): Conv2d(3072, 512, kernel_size=(1, 1), stride=(1, 1), bias=False) (norm): BatchNorm2d(512, eps=1e-07, momentum=0.1, affine=True, track_running_stats=True) (act): Identity() ) ) (shortcut): Identity() ) ) (2): EfficientVitBlock( (context_module): ResidualBlock( (pre_norm): Identity() (main): LiteMLA( (qkv): ConvNormAct( (dropout): Dropout(p=0.0, inplace=False) (conv): Conv2d(512, 1536, kernel_size=(1, 1), stride=(1, 1), bias=False) (norm): Identity() (act): Identity() ) (aggreg): ModuleList( (0): Sequential( (0): Conv2d(1536, 1536, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), groups=1536, bias=False) (1): Conv2d(1536, 1536, kernel_size=(1, 1), stride=(1, 1), groups=48, bias=False) ) ) (kernel_func): ReLU() (proj): ConvNormAct( (dropout): Dropout(p=0.0, inplace=False) (conv): Conv2d(1024, 512, kernel_size=(1, 1), stride=(1, 1), bias=False) (norm): BatchNorm2d(512, eps=1e-07, momentum=0.1, affine=True, track_running_stats=True) (act): Identity() ) ) (shortcut): Identity() ) (local_module): ResidualBlock( (pre_norm): Identity() (main): MBConv( (inverted_conv): ConvNormAct( (dropout): Dropout(p=0.0, inplace=False) (conv): Conv2d(512, 3072, kernel_size=(1, 1), stride=(1, 1)) (norm): Identity() (act): GELUTanh() ) (depth_conv): ConvNormAct( (dropout): Dropout(p=0.0, inplace=False) (conv): Conv2d(3072, 3072, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=3072) (norm): Identity() (act): GELUTanh() ) (point_conv): ConvNormAct( (dropout): Dropout(p=0.0, inplace=False) (conv): Conv2d(3072, 512, kernel_size=(1, 1), stride=(1, 1), bias=False) (norm): BatchNorm2d(512, eps=1e-07, momentum=0.1, affine=True, track_running_stats=True) (act): Identity() ) ) (shortcut): Identity() ) ) (3): EfficientVitBlock( (context_module): ResidualBlock( (pre_norm): Identity() (main): LiteMLA( (qkv): ConvNormAct( (dropout): Dropout(p=0.0, inplace=False) (conv): Conv2d(512, 1536, kernel_size=(1, 1), stride=(1, 1), bias=False) (norm): Identity() (act): Identity() ) (aggreg): ModuleList( (0): Sequential( (0): Conv2d(1536, 1536, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), groups=1536, bias=False) (1): Conv2d(1536, 1536, kernel_size=(1, 1), stride=(1, 1), groups=48, bias=False) ) ) (kernel_func): ReLU() (proj): ConvNormAct( (dropout): Dropout(p=0.0, inplace=False) (conv): Conv2d(1024, 512, kernel_size=(1, 1), stride=(1, 1), bias=False) (norm): BatchNorm2d(512, eps=1e-07, momentum=0.1, affine=True, track_running_stats=True) (act): Identity() ) ) (shortcut): Identity() ) (local_module): ResidualBlock( (pre_norm): Identity() (main): MBConv( (inverted_conv): ConvNormAct( (dropout): Dropout(p=0.0, inplace=False) (conv): Conv2d(512, 3072, kernel_size=(1, 1), stride=(1, 1)) (norm): Identity() (act): GELUTanh() ) (depth_conv): ConvNormAct( (dropout): Dropout(p=0.0, inplace=False) (conv): Conv2d(3072, 3072, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=3072) (norm): Identity() (act): GELUTanh() ) (point_conv): ConvNormAct( (dropout): Dropout(p=0.0, inplace=False) (conv): Conv2d(3072, 512, kernel_size=(1, 1), stride=(1, 1), bias=False) (norm): BatchNorm2d(512, eps=1e-07, momentum=0.1, affine=True, track_running_stats=True) (act): Identity() ) ) (shortcut): Identity() ) ) (4): EfficientVitBlock( (context_module): ResidualBlock( (pre_norm): Identity() (main): LiteMLA( (qkv): ConvNormAct( (dropout): Dropout(p=0.0, inplace=False) (conv): Conv2d(512, 1536, kernel_size=(1, 1), stride=(1, 1), bias=False) (norm): Identity() (act): Identity() ) (aggreg): ModuleList( (0): Sequential( (0): Conv2d(1536, 1536, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), groups=1536, bias=False) (1): Conv2d(1536, 1536, kernel_size=(1, 1), stride=(1, 1), groups=48, bias=False) ) ) (kernel_func): ReLU() (proj): ConvNormAct( (dropout): Dropout(p=0.0, inplace=False) (conv): Conv2d(1024, 512, kernel_size=(1, 1), stride=(1, 1), bias=False) (norm): BatchNorm2d(512, eps=1e-07, momentum=0.1, affine=True, track_running_stats=True) (act): Identity() ) ) (shortcut): Identity() ) (local_module): ResidualBlock( (pre_norm): Identity() (main): MBConv( (inverted_conv): ConvNormAct( (dropout): Dropout(p=0.0, inplace=False) (conv): Conv2d(512, 3072, kernel_size=(1, 1), stride=(1, 1)) (norm): Identity() (act): GELUTanh() ) (depth_conv): ConvNormAct( (dropout): Dropout(p=0.0, inplace=False) (conv): Conv2d(3072, 3072, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=3072) (norm): Identity() (act): GELUTanh() ) (point_conv): ConvNormAct( (dropout): Dropout(p=0.0, inplace=False) (conv): Conv2d(3072, 512, kernel_size=(1, 1), stride=(1, 1), bias=False) (norm): BatchNorm2d(512, eps=1e-07, momentum=0.1, affine=True, track_running_stats=True) (act): Identity() ) ) (shortcut): Identity() ) ) (5): EfficientVitBlock( (context_module): ResidualBlock( (pre_norm): Identity() (main): LiteMLA( (qkv): ConvNormAct( (dropout): Dropout(p=0.0, inplace=False) (conv): Conv2d(512, 1536, kernel_size=(1, 1), stride=(1, 1), bias=False) (norm): Identity() (act): Identity() ) (aggreg): ModuleList( (0): Sequential( (0): Conv2d(1536, 1536, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), groups=1536, bias=False) (1): Conv2d(1536, 1536, kernel_size=(1, 1), stride=(1, 1), groups=48, bias=False) ) ) (kernel_func): ReLU() (proj): ConvNormAct( (dropout): Dropout(p=0.0, inplace=False) (conv): Conv2d(1024, 512, kernel_size=(1, 1), stride=(1, 1), bias=False) (norm): BatchNorm2d(512, eps=1e-07, momentum=0.1, affine=True, track_running_stats=True) (act): Identity() ) ) (shortcut): Identity() ) (local_module): ResidualBlock( (pre_norm): Identity() (main): MBConv( (inverted_conv): ConvNormAct( (dropout): Dropout(p=0.0, inplace=False) (conv): Conv2d(512, 3072, kernel_size=(1, 1), stride=(1, 1)) (norm): Identity() (act): GELUTanh() ) (depth_conv): ConvNormAct( (dropout): Dropout(p=0.0, inplace=False) (conv): Conv2d(3072, 3072, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=3072) (norm): Identity() (act): GELUTanh() ) (point_conv): ConvNormAct( (dropout): Dropout(p=0.0, inplace=False) (conv): Conv2d(3072, 512, kernel_size=(1, 1), stride=(1, 1), bias=False) (norm): BatchNorm2d(512, eps=1e-07, momentum=0.1, affine=True, track_running_stats=True) (act): Identity() ) ) (shortcut): Identity() ) ) (6): EfficientVitBlock( (context_module): ResidualBlock( (pre_norm): Identity() (main): LiteMLA( (qkv): ConvNormAct( (dropout): Dropout(p=0.0, inplace=False) (conv): Conv2d(512, 1536, kernel_size=(1, 1), stride=(1, 1), bias=False) (norm): Identity() (act): Identity() ) (aggreg): ModuleList( (0): Sequential( (0): Conv2d(1536, 1536, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), groups=1536, bias=False) (1): Conv2d(1536, 1536, kernel_size=(1, 1), stride=(1, 1), groups=48, bias=False) ) ) (kernel_func): ReLU() (proj): ConvNormAct( (dropout): Dropout(p=0.0, inplace=False) (conv): Conv2d(1024, 512, kernel_size=(1, 1), stride=(1, 1), bias=False) (norm): BatchNorm2d(512, eps=1e-07, momentum=0.1, affine=True, track_running_stats=True) (act): Identity() ) ) (shortcut): Identity() ) (local_module): ResidualBlock( (pre_norm): Identity() (main): MBConv( (inverted_conv): ConvNormAct( (dropout): Dropout(p=0.0, inplace=False) (conv): Conv2d(512, 3072, kernel_size=(1, 1), stride=(1, 1)) (norm): Identity() (act): GELUTanh() ) (depth_conv): ConvNormAct( (dropout): Dropout(p=0.0, inplace=False) (conv): Conv2d(3072, 3072, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=3072) (norm): Identity() (act): GELUTanh() ) (point_conv): ConvNormAct( (dropout): Dropout(p=0.0, inplace=False) (conv): Conv2d(3072, 512, kernel_size=(1, 1), stride=(1, 1), bias=False) (norm): BatchNorm2d(512, eps=1e-07, momentum=0.1, affine=True, track_running_stats=True) (act): Identity() ) ) (shortcut): Identity() ) ) ) ) ) (head): ClassifierHead( (in_conv): ConvNormAct( (dropout): Dropout(p=0.0, inplace=False) (conv): Conv2d(512, 3072, kernel_size=(1, 1), stride=(1, 1), bias=False) (norm): BatchNorm2d(3072, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (act): GELUTanh() ) (global_pool): SelectAdaptivePool2d(pool_type=avg, flatten=Flatten(start_dim=1, end_dim=-1)) (classifier): Sequential( (0): Linear(in_features=3072, out_features=3200, bias=False) (1): LayerNorm((3200,), eps=1e-07, elementwise_affine=True) (2): GELUTanh() (3): Dropout(p=0.0, inplace=False) (4): Linear(in_features=3200, out_features=1000, bias=True) ) ) ) ===== efficientvit 模型的模型PNG格式 ===== {{:人工智能:torch:常用模型结构:efficientvit结构图.png}}