提交源码

pull/15/head
wangziyang 3 years ago
parent 4f82bcdf00
commit 761126b726

@ -0,0 +1,20 @@
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from . import optimizer
from .arch import *
from .optimizer import *
from .data import *
from .utils import *

@ -0,0 +1,134 @@
#copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
import copy
import importlib
import paddle.nn as nn
from paddle.jit import to_static
from paddle.static import InputSpec
from . import backbone, gears
from .backbone import *
from .gears import build_gear
from .utils import *
from ppcls.arch.backbone.base.theseus_layer import TheseusLayer
from ppcls.utils import logger
from ppcls.utils.save_load import load_dygraph_pretrain
from ppcls.arch.slim import prune_model, quantize_model
__all__ = ["build_model", "RecModel", "DistillationModel"]
def build_model(config):
arch_config = copy.deepcopy(config["Arch"])
model_type = arch_config.pop("name")
mod = importlib.import_module(__name__)
arch = getattr(mod, model_type)(**arch_config)
if isinstance(arch, TheseusLayer):
prune_model(config, arch)
quantize_model(config, arch)
return arch
def apply_to_static(config, model):
support_to_static = config['Global'].get('to_static', False)
if support_to_static:
specs = None
if 'image_shape' in config['Global']:
specs = [InputSpec([None] + config['Global']['image_shape'])]
model = to_static(model, input_spec=specs)
logger.info("Successfully to apply @to_static with specs: {}".format(
specs))
return model
class RecModel(TheseusLayer):
def __init__(self, **config):
super().__init__()
backbone_config = config["Backbone"]
backbone_name = backbone_config.pop("name")
self.backbone = eval(backbone_name)(**backbone_config)
if "BackboneStopLayer" in config:
backbone_stop_layer = config["BackboneStopLayer"]["name"]
self.backbone.stop_after(backbone_stop_layer)
if "Neck" in config:
self.neck = build_gear(config["Neck"])
else:
self.neck = None
if "Head" in config:
self.head = build_gear(config["Head"])
else:
self.head = None
def forward(self, x, label=None):
out = dict()
x = self.backbone(x)
out["backbone"] = x
if self.neck is not None:
x = self.neck(x)
out["neck"] = x
out["features"] = x
if self.head is not None:
y = self.head(x, label)
out["logits"] = y
return out
class DistillationModel(nn.Layer):
def __init__(self,
models=None,
pretrained_list=None,
freeze_params_list=None,
**kargs):
super().__init__()
assert isinstance(models, list)
self.model_list = []
self.model_name_list = []
if pretrained_list is not None:
assert len(pretrained_list) == len(models)
if freeze_params_list is None:
freeze_params_list = [False] * len(models)
assert len(freeze_params_list) == len(models)
for idx, model_config in enumerate(models):
assert len(model_config) == 1
key = list(model_config.keys())[0]
model_config = model_config[key]
model_name = model_config.pop("name")
model = eval(model_name)(**model_config)
if freeze_params_list[idx]:
for param in model.parameters():
param.trainable = False
self.model_list.append(self.add_sublayer(key, model))
self.model_name_list.append(key)
if pretrained_list is not None:
for idx, pretrained in enumerate(pretrained_list):
if pretrained is not None:
load_dygraph_pretrain(
self.model_name_list[idx], path=pretrained)
def forward(self, x, label=None):
result_dict = dict()
for idx, model_name in enumerate(self.model_name_list):
if label is None:
result_dict[model_name] = self.model_list[idx](x)
else:
result_dict[model_name] = self.model_list[idx](x, label)
return result_dict

@ -0,0 +1,83 @@
# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys
import inspect
from ppcls.arch.backbone.legendary_models.mobilenet_v1 import MobileNetV1_x0_25, MobileNetV1_x0_5, MobileNetV1_x0_75, MobileNetV1
from ppcls.arch.backbone.legendary_models.mobilenet_v3 import MobileNetV3_small_x0_35, MobileNetV3_small_x0_5, MobileNetV3_small_x0_75, MobileNetV3_small_x1_0, MobileNetV3_small_x1_25, MobileNetV3_large_x0_35, MobileNetV3_large_x0_5, MobileNetV3_large_x0_75, MobileNetV3_large_x1_0, MobileNetV3_large_x1_25
from ppcls.arch.backbone.legendary_models.resnet import ResNet18, ResNet18_vd, ResNet34, ResNet34_vd, ResNet50, ResNet50_vd, ResNet101, ResNet101_vd, ResNet152, ResNet152_vd, ResNet200_vd
from ppcls.arch.backbone.legendary_models.vgg import VGG11, VGG13, VGG16, VGG19
from ppcls.arch.backbone.legendary_models.inception_v3 import InceptionV3
from ppcls.arch.backbone.legendary_models.hrnet import HRNet_W18_C, HRNet_W30_C, HRNet_W32_C, HRNet_W40_C, HRNet_W44_C, HRNet_W48_C, HRNet_W60_C, HRNet_W64_C, SE_HRNet_W64_C
from ppcls.arch.backbone.legendary_models.pp_lcnet import PPLCNet_x0_25, PPLCNet_x0_35, PPLCNet_x0_5, PPLCNet_x0_75, PPLCNet_x1_0, PPLCNet_x1_5, PPLCNet_x2_0, PPLCNet_x2_5
from ppcls.arch.backbone.legendary_models.esnet import ESNet_x0_25, ESNet_x0_5, ESNet_x0_75, ESNet_x1_0
from ppcls.arch.backbone.model_zoo.resnet_vc import ResNet50_vc
from ppcls.arch.backbone.model_zoo.resnext import ResNeXt50_32x4d, ResNeXt50_64x4d, ResNeXt101_32x4d, ResNeXt101_64x4d, ResNeXt152_32x4d, ResNeXt152_64x4d
from ppcls.arch.backbone.model_zoo.resnext_vd import ResNeXt50_vd_32x4d, ResNeXt50_vd_64x4d, ResNeXt101_vd_32x4d, ResNeXt101_vd_64x4d, ResNeXt152_vd_32x4d, ResNeXt152_vd_64x4d
from ppcls.arch.backbone.model_zoo.res2net import Res2Net50_26w_4s, Res2Net50_14w_8s
from ppcls.arch.backbone.model_zoo.res2net_vd import Res2Net50_vd_26w_4s, Res2Net101_vd_26w_4s, Res2Net200_vd_26w_4s
from ppcls.arch.backbone.model_zoo.se_resnet_vd import SE_ResNet18_vd, SE_ResNet34_vd, SE_ResNet50_vd
from ppcls.arch.backbone.model_zoo.se_resnext_vd import SE_ResNeXt50_vd_32x4d, SE_ResNeXt50_vd_32x4d, SENet154_vd
from ppcls.arch.backbone.model_zoo.se_resnext import SE_ResNeXt50_32x4d, SE_ResNeXt101_32x4d, SE_ResNeXt152_64x4d
from ppcls.arch.backbone.model_zoo.dpn import DPN68, DPN92, DPN98, DPN107, DPN131
from ppcls.arch.backbone.model_zoo.densenet import DenseNet121, DenseNet161, DenseNet169, DenseNet201, DenseNet264
from ppcls.arch.backbone.model_zoo.efficientnet import EfficientNetB0, EfficientNetB1, EfficientNetB2, EfficientNetB3, EfficientNetB4, EfficientNetB5, EfficientNetB6, EfficientNetB7, EfficientNetB0_small
from ppcls.arch.backbone.model_zoo.resnest import ResNeSt50_fast_1s1x64d, ResNeSt50, ResNeSt101
from ppcls.arch.backbone.model_zoo.googlenet import GoogLeNet
from ppcls.arch.backbone.model_zoo.mobilenet_v2 import MobileNetV2_x0_25, MobileNetV2_x0_5, MobileNetV2_x0_75, MobileNetV2, MobileNetV2_x1_5, MobileNetV2_x2_0
from ppcls.arch.backbone.model_zoo.shufflenet_v2 import ShuffleNetV2_x0_25, ShuffleNetV2_x0_33, ShuffleNetV2_x0_5, ShuffleNetV2_x1_0, ShuffleNetV2_x1_5, ShuffleNetV2_x2_0, ShuffleNetV2_swish
from ppcls.arch.backbone.model_zoo.ghostnet import GhostNet_x0_5, GhostNet_x1_0, GhostNet_x1_3
from ppcls.arch.backbone.model_zoo.alexnet import AlexNet
from ppcls.arch.backbone.model_zoo.inception_v4 import InceptionV4
from ppcls.arch.backbone.model_zoo.xception import Xception41, Xception65, Xception71
from ppcls.arch.backbone.model_zoo.xception_deeplab import Xception41_deeplab, Xception65_deeplab
from ppcls.arch.backbone.model_zoo.resnext101_wsl import ResNeXt101_32x8d_wsl, ResNeXt101_32x16d_wsl, ResNeXt101_32x32d_wsl, ResNeXt101_32x48d_wsl
from ppcls.arch.backbone.model_zoo.squeezenet import SqueezeNet1_0, SqueezeNet1_1
from ppcls.arch.backbone.model_zoo.darknet import DarkNet53
from ppcls.arch.backbone.model_zoo.regnet import RegNetX_200MF, RegNetX_4GF, RegNetX_32GF, RegNetY_200MF, RegNetY_4GF, RegNetY_32GF
from ppcls.arch.backbone.model_zoo.vision_transformer import ViT_small_patch16_224, ViT_base_patch16_224, ViT_base_patch16_384, ViT_base_patch32_384, ViT_large_patch16_224, ViT_large_patch16_384, ViT_large_patch32_384
from ppcls.arch.backbone.model_zoo.distilled_vision_transformer import DeiT_tiny_patch16_224, DeiT_small_patch16_224, DeiT_base_patch16_224, DeiT_tiny_distilled_patch16_224, DeiT_small_distilled_patch16_224, DeiT_base_distilled_patch16_224, DeiT_base_patch16_384, DeiT_base_distilled_patch16_384
from ppcls.arch.backbone.model_zoo.swin_transformer import SwinTransformer_tiny_patch4_window7_224, SwinTransformer_small_patch4_window7_224, SwinTransformer_base_patch4_window7_224, SwinTransformer_base_patch4_window12_384, SwinTransformer_large_patch4_window7_224, SwinTransformer_large_patch4_window12_384
from ppcls.arch.backbone.model_zoo.mixnet import MixNet_S, MixNet_M, MixNet_L
from ppcls.arch.backbone.model_zoo.rexnet import ReXNet_1_0, ReXNet_1_3, ReXNet_1_5, ReXNet_2_0, ReXNet_3_0
from ppcls.arch.backbone.model_zoo.gvt import pcpvt_small, pcpvt_base, pcpvt_large, alt_gvt_small, alt_gvt_base, alt_gvt_large
from ppcls.arch.backbone.model_zoo.levit import LeViT_128S, LeViT_128, LeViT_192, LeViT_256, LeViT_384
from ppcls.arch.backbone.model_zoo.dla import DLA34, DLA46_c, DLA46x_c, DLA60, DLA60x, DLA60x_c, DLA102, DLA102x, DLA102x2, DLA169
from ppcls.arch.backbone.model_zoo.rednet import RedNet26, RedNet38, RedNet50, RedNet101, RedNet152
from ppcls.arch.backbone.model_zoo.tnt import TNT_small
from ppcls.arch.backbone.model_zoo.hardnet import HarDNet68, HarDNet85, HarDNet39_ds, HarDNet68_ds
from ppcls.arch.backbone.model_zoo.cspnet import CSPDarkNet53
from ppcls.arch.backbone.model_zoo.pvt_v2 import PVT_V2_B0, PVT_V2_B1, PVT_V2_B2_Linear, PVT_V2_B2, PVT_V2_B3, PVT_V2_B4, PVT_V2_B5
from ppcls.arch.backbone.model_zoo.repvgg import RepVGG_A0, RepVGG_A1, RepVGG_A2, RepVGG_B0, RepVGG_B1, RepVGG_B2, RepVGG_B1g2, RepVGG_B1g4, RepVGG_B2g4, RepVGG_B3g4
from ppcls.arch.backbone.variant_models.resnet_variant import ResNet50_last_stage_stride1
from ppcls.arch.backbone.variant_models.vgg_variant import VGG19Sigmoid
from ppcls.arch.backbone.variant_models.pp_lcnet_variant import PPLCNet_x2_5_Tanh
# help whl get all the models' api (class type) and components' api (func type)
def get_apis():
current_func = sys._getframe().f_code.co_name
current_module = sys.modules[__name__]
api = []
for _, obj in inspect.getmembers(current_module,
inspect.isclass) + inspect.getmembers(
current_module, inspect.isfunction):
api.append(obj.__name__)
api.remove(current_func)
return api
__all__ = get_apis()

@ -0,0 +1,301 @@
# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import Tuple, List, Dict, Union, Callable, Any
from paddle import nn
from ppcls.utils import logger
class Identity(nn.Layer):
def __init__(self):
super(Identity, self).__init__()
def forward(self, inputs):
return inputs
class TheseusLayer(nn.Layer):
def __init__(self, *args, **kwargs):
super(TheseusLayer, self).__init__()
self.res_dict = {}
self.res_name = self.full_name()
self.pruner = None
self.quanter = None
def _return_dict_hook(self, layer, input, output):
res_dict = {"output": output}
# 'list' is needed to avoid error raised by popping self.res_dict
for res_key in list(self.res_dict):
# clear the res_dict because the forward process may change according to input
res_dict[res_key] = self.res_dict.pop(res_key)
return res_dict
def init_res(self,
stages_pattern,
return_patterns=None,
return_stages=None):
if return_patterns and return_stages:
msg = f"The 'return_patterns' would be ignored when 'return_stages' is set."
logger.warning(msg)
return_stages = None
if return_stages is True:
return_patterns = stages_pattern
# return_stages is int or bool
if type(return_stages) is int:
return_stages = [return_stages]
if isinstance(return_stages, list):
if max(return_stages) > len(stages_pattern) or min(
return_stages) < 0:
msg = f"The 'return_stages' set error. Illegal value(s) have been ignored. The stages' pattern list is {stages_pattern}."
logger.warning(msg)
return_stages = [
val for val in return_stages
if val >= 0 and val < len(stages_pattern)
]
return_patterns = [stages_pattern[i] for i in return_stages]
if return_patterns:
self.update_res(return_patterns)
def replace_sub(self, *args, **kwargs) -> None:
msg = "The function 'replace_sub()' is deprecated, please use 'upgrade_sublayer()' instead."
logger.error(DeprecationWarning(msg))
raise DeprecationWarning(msg)
def upgrade_sublayer(self,
layer_name_pattern: Union[str, List[str]],
handle_func: Callable[[nn.Layer, str], nn.Layer]
) -> Dict[str, nn.Layer]:
"""use 'handle_func' to modify the sub-layer(s) specified by 'layer_name_pattern'.
Args:
layer_name_pattern (Union[str, List[str]]): The name of layer to be modified by 'handle_func'.
handle_func (Callable[[nn.Layer, str], nn.Layer]): The function to modify target layer specified by 'layer_name_pattern'. The formal params are the layer(nn.Layer) and pattern(str) that is (a member of) layer_name_pattern (when layer_name_pattern is List type). And the return is the layer processed.
Returns:
Dict[str, nn.Layer]: The key is the pattern and corresponding value is the result returned by 'handle_func()'.
Examples:
from paddle import nn
import paddleclas
def rep_func(layer: nn.Layer, pattern: str):
new_layer = nn.Conv2D(
in_channels=layer._in_channels,
out_channels=layer._out_channels,
kernel_size=5,
padding=2
)
return new_layer
net = paddleclas.MobileNetV1()
res = net.replace_sub(layer_name_pattern=["blocks[11].depthwise_conv.conv", "blocks[12].depthwise_conv.conv"], handle_func=rep_func)
print(res)
# {'blocks[11].depthwise_conv.conv': the corresponding new_layer, 'blocks[12].depthwise_conv.conv': the corresponding new_layer}
"""
if not isinstance(layer_name_pattern, list):
layer_name_pattern = [layer_name_pattern]
hit_layer_pattern_list = []
for pattern in layer_name_pattern:
# parse pattern to find target layer and its parent
layer_list = parse_pattern_str(pattern=pattern, parent_layer=self)
if not layer_list:
continue
sub_layer_parent = layer_list[-2]["layer"] if len(
layer_list) > 1 else self
sub_layer = layer_list[-1]["layer"]
sub_layer_name = layer_list[-1]["name"]
sub_layer_index = layer_list[-1]["index"]
new_sub_layer = handle_func(sub_layer, pattern)
if sub_layer_index:
getattr(sub_layer_parent,
sub_layer_name)[sub_layer_index] = new_sub_layer
else:
setattr(sub_layer_parent, sub_layer_name, new_sub_layer)
hit_layer_pattern_list.append(pattern)
return hit_layer_pattern_list
def stop_after(self, stop_layer_name: str) -> bool:
"""stop forward and backward after 'stop_layer_name'.
Args:
stop_layer_name (str): The name of layer that stop forward and backward after this layer.
Returns:
bool: 'True' if successful, 'False' otherwise.
"""
layer_list = parse_pattern_str(stop_layer_name, self)
if not layer_list:
return False
parent_layer = self
for layer_dict in layer_list:
name, index = layer_dict["name"], layer_dict["index"]
if not set_identity(parent_layer, name, index):
msg = f"Failed to set the layers that after stop_layer_name('{stop_layer_name}') to IdentityLayer. The error layer's name is '{name}'."
logger.warning(msg)
return False
parent_layer = layer_dict["layer"]
return True
def update_res(
self,
return_patterns: Union[str, List[str]]) -> Dict[str, nn.Layer]:
"""update the result(s) to be returned.
Args:
return_patterns (Union[str, List[str]]): The name of layer to return output.
Returns:
Dict[str, nn.Layer]: The pattern(str) and corresponding layer(nn.Layer) that have been set successfully.
"""
# clear res_dict that could have been set
self.res_dict = {}
class Handler(object):
def __init__(self, res_dict):
# res_dict is a reference
self.res_dict = res_dict
def __call__(self, layer, pattern):
layer.res_dict = self.res_dict
layer.res_name = pattern
if hasattr(layer, "hook_remove_helper"):
layer.hook_remove_helper.remove()
layer.hook_remove_helper = layer.register_forward_post_hook(
save_sub_res_hook)
return layer
handle_func = Handler(self.res_dict)
hit_layer_pattern_list = self.upgrade_sublayer(
return_patterns, handle_func=handle_func)
if hasattr(self, "hook_remove_helper"):
self.hook_remove_helper.remove()
self.hook_remove_helper = self.register_forward_post_hook(
self._return_dict_hook)
return hit_layer_pattern_list
def save_sub_res_hook(layer, input, output):
layer.res_dict[layer.res_name] = output
def set_identity(parent_layer: nn.Layer,
layer_name: str,
layer_index: str=None) -> bool:
"""set the layer specified by layer_name and layer_index to Indentity.
Args:
parent_layer (nn.Layer): The parent layer of target layer specified by layer_name and layer_index.
layer_name (str): The name of target layer to be set to Indentity.
layer_index (str, optional): The index of target layer to be set to Indentity in parent_layer. Defaults to None.
Returns:
bool: True if successfully, False otherwise.
"""
stop_after = False
for sub_layer_name in parent_layer._sub_layers:
if stop_after:
parent_layer._sub_layers[sub_layer_name] = Identity()
continue
if sub_layer_name == layer_name:
stop_after = True
if layer_index and stop_after:
stop_after = False
for sub_layer_index in parent_layer._sub_layers[
layer_name]._sub_layers:
if stop_after:
parent_layer._sub_layers[layer_name][
sub_layer_index] = Identity()
continue
if layer_index == sub_layer_index:
stop_after = True
return stop_after
def parse_pattern_str(pattern: str, parent_layer: nn.Layer) -> Union[
None, List[Dict[str, Union[nn.Layer, str, None]]]]:
"""parse the string type pattern.
Args:
pattern (str): The pattern to discribe layer.
parent_layer (nn.Layer): The root layer relative to the pattern.
Returns:
Union[None, List[Dict[str, Union[nn.Layer, str, None]]]]: None if failed. If successfully, the members are layers parsed in order:
[
{"layer": first layer, "name": first layer's name parsed, "index": first layer's index parsed if exist},
{"layer": second layer, "name": second layer's name parsed, "index": second layer's index parsed if exist},
...
]
"""
pattern_list = pattern.split(".")
if not pattern_list:
msg = f"The pattern('{pattern}') is illegal. Please check and retry."
logger.warning(msg)
return None
layer_list = []
while len(pattern_list) > 0:
if '[' in pattern_list[0]:
target_layer_name = pattern_list[0].split('[')[0]
target_layer_index = pattern_list[0].split('[')[1].split(']')[0]
else:
target_layer_name = pattern_list[0]
target_layer_index = None
target_layer = getattr(parent_layer, target_layer_name, None)
if target_layer is None:
msg = f"Not found layer named('{target_layer_name}') specifed in pattern('{pattern}')."
logger.warning(msg)
return None
if target_layer_index and target_layer:
if int(target_layer_index) < 0 or int(target_layer_index) >= len(
target_layer):
msg = f"Not found layer by index('{target_layer_index}') specifed in pattern('{pattern}'). The index should < {len(target_layer)} and > 0."
logger.warning(msg)
return None
target_layer = target_layer[target_layer_index]
layer_list.append({
"layer": target_layer,
"name": target_layer_name,
"index": target_layer_index
})
pattern_list = pattern_list[1:]
parent_layer = target_layer
return layer_list

@ -0,0 +1,6 @@
from .resnet import ResNet18, ResNet34, ResNet50, ResNet101, ResNet152, ResNet18_vd, ResNet34_vd, ResNet50_vd, ResNet101_vd, ResNet152_vd
from .hrnet import HRNet_W18_C, HRNet_W30_C, HRNet_W32_C, HRNet_W40_C, HRNet_W44_C, HRNet_W48_C, HRNet_W64_C
from .mobilenet_v1 import MobileNetV1_x0_25, MobileNetV1_x0_5, MobileNetV1_x0_75, MobileNetV1
from .mobilenet_v3 import MobileNetV3_small_x0_35, MobileNetV3_small_x0_5, MobileNetV3_small_x0_75, MobileNetV3_small_x1_0, MobileNetV3_small_x1_25, MobileNetV3_large_x0_35, MobileNetV3_large_x0_5, MobileNetV3_large_x0_75, MobileNetV3_large_x1_0, MobileNetV3_large_x1_25
from .inception_v3 import InceptionV3
from .vgg import VGG11, VGG13, VGG16, VGG19

@ -0,0 +1,369 @@
# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import, division, print_function
import math
import paddle
from paddle import ParamAttr, reshape, transpose, concat, split
import paddle.nn as nn
from paddle.nn import Conv2D, BatchNorm, Linear, Dropout
from paddle.nn import AdaptiveAvgPool2D, MaxPool2D
from paddle.nn.initializer import KaimingNormal
from paddle.regularizer import L2Decay
from ppcls.arch.backbone.base.theseus_layer import TheseusLayer
from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
MODEL_URLS = {
"ESNet_x0_25":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/ESNet_x0_25_pretrained.pdparams",
"ESNet_x0_5":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/ESNet_x0_5_pretrained.pdparams",
"ESNet_x0_75":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/ESNet_x0_75_pretrained.pdparams",
"ESNet_x1_0":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/ESNet_x1_0_pretrained.pdparams",
}
MODEL_STAGES_PATTERN = {"ESNet": ["blocks[2]", "blocks[9]", "blocks[12]"]}
__all__ = list(MODEL_URLS.keys())
def channel_shuffle(x, groups):
batch_size, num_channels, height, width = x.shape[0:4]
channels_per_group = num_channels // groups
x = reshape(
x=x, shape=[batch_size, groups, channels_per_group, height, width])
x = transpose(x=x, perm=[0, 2, 1, 3, 4])
x = reshape(x=x, shape=[batch_size, num_channels, height, width])
return x
def make_divisible(v, divisor=8, min_value=None):
if min_value is None:
min_value = divisor
new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
if new_v < 0.9 * v:
new_v += divisor
return new_v
class ConvBNLayer(TheseusLayer):
def __init__(self,
in_channels,
out_channels,
kernel_size,
stride=1,
groups=1,
if_act=True):
super().__init__()
self.conv = Conv2D(
in_channels=in_channels,
out_channels=out_channels,
kernel_size=kernel_size,
stride=stride,
padding=(kernel_size - 1) // 2,
groups=groups,
weight_attr=ParamAttr(initializer=KaimingNormal()),
bias_attr=False)
self.bn = BatchNorm(
out_channels,
param_attr=ParamAttr(regularizer=L2Decay(0.0)),
bias_attr=ParamAttr(regularizer=L2Decay(0.0)))
self.if_act = if_act
self.hardswish = nn.Hardswish()
def forward(self, x):
x = self.conv(x)
x = self.bn(x)
if self.if_act:
x = self.hardswish(x)
return x
class SEModule(TheseusLayer):
def __init__(self, channel, reduction=4):
super().__init__()
self.avg_pool = AdaptiveAvgPool2D(1)
self.conv1 = Conv2D(
in_channels=channel,
out_channels=channel // reduction,
kernel_size=1,
stride=1,
padding=0)
self.relu = nn.ReLU()
self.conv2 = Conv2D(
in_channels=channel // reduction,
out_channels=channel,
kernel_size=1,
stride=1,
padding=0)
self.hardsigmoid = nn.Hardsigmoid()
def forward(self, x):
identity = x
x = self.avg_pool(x)
x = self.conv1(x)
x = self.relu(x)
x = self.conv2(x)
x = self.hardsigmoid(x)
x = paddle.multiply(x=identity, y=x)
return x
class ESBlock1(TheseusLayer):
def __init__(self, in_channels, out_channels):
super().__init__()
self.pw_1_1 = ConvBNLayer(
in_channels=in_channels // 2,
out_channels=out_channels // 2,
kernel_size=1,
stride=1)
self.dw_1 = ConvBNLayer(
in_channels=out_channels // 2,
out_channels=out_channels // 2,
kernel_size=3,
stride=1,
groups=out_channels // 2,
if_act=False)
self.se = SEModule(out_channels)
self.pw_1_2 = ConvBNLayer(
in_channels=out_channels,
out_channels=out_channels // 2,
kernel_size=1,
stride=1)
def forward(self, x):
x1, x2 = split(
x, num_or_sections=[x.shape[1] // 2, x.shape[1] // 2], axis=1)
x2 = self.pw_1_1(x2)
x3 = self.dw_1(x2)
x3 = concat([x2, x3], axis=1)
x3 = self.se(x3)
x3 = self.pw_1_2(x3)
x = concat([x1, x3], axis=1)
return channel_shuffle(x, 2)
class ESBlock2(TheseusLayer):
def __init__(self, in_channels, out_channels):
super().__init__()
# branch1
self.dw_1 = ConvBNLayer(
in_channels=in_channels,
out_channels=in_channels,
kernel_size=3,
stride=2,
groups=in_channels,
if_act=False)
self.pw_1 = ConvBNLayer(
in_channels=in_channels,
out_channels=out_channels // 2,
kernel_size=1,
stride=1)
# branch2
self.pw_2_1 = ConvBNLayer(
in_channels=in_channels,
out_channels=out_channels // 2,
kernel_size=1)
self.dw_2 = ConvBNLayer(
in_channels=out_channels // 2,
out_channels=out_channels // 2,
kernel_size=3,
stride=2,
groups=out_channels // 2,
if_act=False)
self.se = SEModule(out_channels // 2)
self.pw_2_2 = ConvBNLayer(
in_channels=out_channels // 2,
out_channels=out_channels // 2,
kernel_size=1)
self.concat_dw = ConvBNLayer(
in_channels=out_channels,
out_channels=out_channels,
kernel_size=3,
groups=out_channels)
self.concat_pw = ConvBNLayer(
in_channels=out_channels, out_channels=out_channels, kernel_size=1)
def forward(self, x):
x1 = self.dw_1(x)
x1 = self.pw_1(x1)
x2 = self.pw_2_1(x)
x2 = self.dw_2(x2)
x2 = self.se(x2)
x2 = self.pw_2_2(x2)
x = concat([x1, x2], axis=1)
x = self.concat_dw(x)
x = self.concat_pw(x)
return x
class ESNet(TheseusLayer):
def __init__(self,
stages_pattern,
class_num=1000,
scale=1.0,
dropout_prob=0.2,
class_expand=1280,
return_patterns=None,
return_stages=None):
super().__init__()
self.scale = scale
self.class_num = class_num
self.class_expand = class_expand
stage_repeats = [3, 7, 3]
stage_out_channels = [
-1, 24, make_divisible(116 * scale), make_divisible(232 * scale),
make_divisible(464 * scale), 1024
]
self.conv1 = ConvBNLayer(
in_channels=3,
out_channels=stage_out_channels[1],
kernel_size=3,
stride=2)
self.max_pool = MaxPool2D(kernel_size=3, stride=2, padding=1)
block_list = []
for stage_id, num_repeat in enumerate(stage_repeats):
for i in range(num_repeat):
if i == 0:
block = ESBlock2(
in_channels=stage_out_channels[stage_id + 1],
out_channels=stage_out_channels[stage_id + 2])
else:
block = ESBlock1(
in_channels=stage_out_channels[stage_id + 2],
out_channels=stage_out_channels[stage_id + 2])
block_list.append(block)
self.blocks = nn.Sequential(*block_list)
self.conv2 = ConvBNLayer(
in_channels=stage_out_channels[-2],
out_channels=stage_out_channels[-1],
kernel_size=1)
self.avg_pool = AdaptiveAvgPool2D(1)
self.last_conv = Conv2D(
in_channels=stage_out_channels[-1],
out_channels=self.class_expand,
kernel_size=1,
stride=1,
padding=0,
bias_attr=False)
self.hardswish = nn.Hardswish()
self.dropout = Dropout(p=dropout_prob, mode="downscale_in_infer")
self.flatten = nn.Flatten(start_axis=1, stop_axis=-1)
self.fc = Linear(self.class_expand, self.class_num)
super().init_res(
stages_pattern,
return_patterns=return_patterns,
return_stages=return_stages)
def forward(self, x):
x = self.conv1(x)
x = self.max_pool(x)
x = self.blocks(x)
x = self.conv2(x)
x = self.avg_pool(x)
x = self.last_conv(x)
x = self.hardswish(x)
x = self.dropout(x)
x = self.flatten(x)
x = self.fc(x)
return x
def _load_pretrained(pretrained, model, model_url, use_ssld):
if pretrained is False:
pass
elif pretrained is True:
load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
elif isinstance(pretrained, str):
load_dygraph_pretrain(model, pretrained)
else:
raise RuntimeError(
"pretrained type is not available. Please use `string` or `boolean` type."
)
def ESNet_x0_25(pretrained=False, use_ssld=False, **kwargs):
"""
ESNet_x0_25
Args:
pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
If str, means the path of the pretrained model.
use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
Returns:
model: nn.Layer. Specific `ESNet_x0_25` model depends on args.
"""
model = ESNet(
scale=0.25, stages_pattern=MODEL_STAGES_PATTERN["ESNet"], **kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["ESNet_x0_25"], use_ssld)
return model
def ESNet_x0_5(pretrained=False, use_ssld=False, **kwargs):
"""
ESNet_x0_5
Args:
pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
If str, means the path of the pretrained model.
use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
Returns:
model: nn.Layer. Specific `ESNet_x0_5` model depends on args.
"""
model = ESNet(
scale=0.5, stages_pattern=MODEL_STAGES_PATTERN["ESNet"], **kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["ESNet_x0_5"], use_ssld)
return model
def ESNet_x0_75(pretrained=False, use_ssld=False, **kwargs):
"""
ESNet_x0_75
Args:
pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
If str, means the path of the pretrained model.
use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
Returns:
model: nn.Layer. Specific `ESNet_x0_75` model depends on args.
"""
model = ESNet(
scale=0.75, stages_pattern=MODEL_STAGES_PATTERN["ESNet"], **kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["ESNet_x0_75"], use_ssld)
return model
def ESNet_x1_0(pretrained=False, use_ssld=False, **kwargs):
"""
ESNet_x1_0
Args:
pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
If str, means the path of the pretrained model.
use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
Returns:
model: nn.Layer. Specific `ESNet_x1_0` model depends on args.
"""
model = ESNet(
scale=1.0, stages_pattern=MODEL_STAGES_PATTERN["ESNet"], **kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["ESNet_x1_0"], use_ssld)
return model

@ -0,0 +1,794 @@
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import math
import paddle
from paddle import nn
from paddle import ParamAttr
from paddle.nn.functional import upsample
from paddle.nn.initializer import Uniform
from ppcls.arch.backbone.base.theseus_layer import TheseusLayer, Identity
from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
MODEL_URLS = {
"HRNet_W18_C":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/HRNet_W18_C_pretrained.pdparams",
"HRNet_W30_C":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/HRNet_W30_C_pretrained.pdparams",
"HRNet_W32_C":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/HRNet_W32_C_pretrained.pdparams",
"HRNet_W40_C":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/HRNet_W40_C_pretrained.pdparams",
"HRNet_W44_C":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/HRNet_W44_C_pretrained.pdparams",
"HRNet_W48_C":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/HRNet_W48_C_pretrained.pdparams",
"HRNet_W64_C":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/HRNet_W64_C_pretrained.pdparams"
}
MODEL_STAGES_PATTERN = {"HRNet": ["st4"]}
__all__ = list(MODEL_URLS.keys())
def _create_act(act):
if act == "hardswish":
return nn.Hardswish()
elif act == "relu":
return nn.ReLU()
elif act is None:
return Identity()
else:
raise RuntimeError(
"The activation function is not supported: {}".format(act))
class ConvBNLayer(TheseusLayer):
def __init__(self,
num_channels,
num_filters,
filter_size,
stride=1,
groups=1,
act="relu"):
super().__init__()
self.conv = nn.Conv2D(
in_channels=num_channels,
out_channels=num_filters,
kernel_size=filter_size,
stride=stride,
padding=(filter_size - 1) // 2,
groups=groups,
bias_attr=False)
self.bn = nn.BatchNorm(num_filters, act=None)
self.act = _create_act(act)
def forward(self, x):
x = self.conv(x)
x = self.bn(x)
x = self.act(x)
return x
class BottleneckBlock(TheseusLayer):
def __init__(self,
num_channels,
num_filters,
has_se,
stride=1,
downsample=False):
super().__init__()
self.has_se = has_se
self.downsample = downsample
self.conv1 = ConvBNLayer(
num_channels=num_channels,
num_filters=num_filters,
filter_size=1,
act="relu")
self.conv2 = ConvBNLayer(
num_channels=num_filters,
num_filters=num_filters,
filter_size=3,
stride=stride,
act="relu")
self.conv3 = ConvBNLayer(
num_channels=num_filters,
num_filters=num_filters * 4,
filter_size=1,
act=None)
if self.downsample:
self.conv_down = ConvBNLayer(
num_channels=num_channels,
num_filters=num_filters * 4,
filter_size=1,
act=None)
if self.has_se:
self.se = SELayer(
num_channels=num_filters * 4,
num_filters=num_filters * 4,
reduction_ratio=16)
self.relu = nn.ReLU()
def forward(self, x, res_dict=None):
residual = x
x = self.conv1(x)
x = self.conv2(x)
x = self.conv3(x)
if self.downsample:
residual = self.conv_down(residual)
if self.has_se:
x = self.se(x)
x = paddle.add(x=residual, y=x)
x = self.relu(x)
return x
class BasicBlock(nn.Layer):
def __init__(self, num_channels, num_filters, has_se=False):
super().__init__()
self.has_se = has_se
self.conv1 = ConvBNLayer(
num_channels=num_channels,
num_filters=num_filters,
filter_size=3,
stride=1,
act="relu")
self.conv2 = ConvBNLayer(
num_channels=num_filters,
num_filters=num_filters,
filter_size=3,
stride=1,
act=None)
if self.has_se:
self.se = SELayer(
num_channels=num_filters,
num_filters=num_filters,
reduction_ratio=16)
self.relu = nn.ReLU()
def forward(self, x):
residual = x
x = self.conv1(x)
x = self.conv2(x)
if self.has_se:
x = self.se(x)
x = paddle.add(x=residual, y=x)
x = self.relu(x)
return x
class SELayer(TheseusLayer):
def __init__(self, num_channels, num_filters, reduction_ratio):
super().__init__()
self.avg_pool = nn.AdaptiveAvgPool2D(1)
self._num_channels = num_channels
med_ch = int(num_channels / reduction_ratio)
stdv = 1.0 / math.sqrt(num_channels * 1.0)
self.fc_squeeze = nn.Linear(
num_channels,
med_ch,
weight_attr=ParamAttr(initializer=Uniform(-stdv, stdv)))
self.relu = nn.ReLU()
stdv = 1.0 / math.sqrt(med_ch * 1.0)
self.fc_excitation = nn.Linear(
med_ch,
num_filters,
weight_attr=ParamAttr(initializer=Uniform(-stdv, stdv)))
self.sigmoid = nn.Sigmoid()
def forward(self, x, res_dict=None):
residual = x
x = self.avg_pool(x)
x = paddle.squeeze(x, axis=[2, 3])
x = self.fc_squeeze(x)
x = self.relu(x)
x = self.fc_excitation(x)
x = self.sigmoid(x)
x = paddle.unsqueeze(x, axis=[2, 3])
x = residual * x
return x
class Stage(TheseusLayer):
def __init__(self, num_modules, num_filters, has_se=False):
super().__init__()
self._num_modules = num_modules
self.stage_func_list = nn.LayerList()
for i in range(num_modules):
self.stage_func_list.append(
HighResolutionModule(
num_filters=num_filters, has_se=has_se))
def forward(self, x, res_dict=None):
x = x
for idx in range(self._num_modules):
x = self.stage_func_list[idx](x)
return x
class HighResolutionModule(TheseusLayer):
def __init__(self, num_filters, has_se=False):
super().__init__()
self.basic_block_list = nn.LayerList()
for i in range(len(num_filters)):
self.basic_block_list.append(
nn.Sequential(* [
BasicBlock(
num_channels=num_filters[i],
num_filters=num_filters[i],
has_se=has_se) for j in range(4)
]))
self.fuse_func = FuseLayers(
in_channels=num_filters, out_channels=num_filters)
def forward(self, x, res_dict=None):
out = []
for idx, xi in enumerate(x):
basic_block_list = self.basic_block_list[idx]
for basic_block_func in basic_block_list:
xi = basic_block_func(xi)
out.append(xi)
out = self.fuse_func(out)
return out
class FuseLayers(TheseusLayer):
def __init__(self, in_channels, out_channels):
super().__init__()
self._actual_ch = len(in_channels)
self._in_channels = in_channels
self.residual_func_list = nn.LayerList()
self.relu = nn.ReLU()
for i in range(len(in_channels)):
for j in range(len(in_channels)):
if j > i:
self.residual_func_list.append(
ConvBNLayer(
num_channels=in_channels[j],
num_filters=out_channels[i],
filter_size=1,
stride=1,
act=None))
elif j < i:
pre_num_filters = in_channels[j]
for k in range(i - j):
if k == i - j - 1:
self.residual_func_list.append(
ConvBNLayer(
num_channels=pre_num_filters,
num_filters=out_channels[i],
filter_size=3,
stride=2,
act=None))
pre_num_filters = out_channels[i]
else:
self.residual_func_list.append(
ConvBNLayer(
num_channels=pre_num_filters,
num_filters=out_channels[j],
filter_size=3,
stride=2,
act="relu"))
pre_num_filters = out_channels[j]
def forward(self, x, res_dict=None):
out = []
residual_func_idx = 0
for i in range(len(self._in_channels)):
residual = x[i]
for j in range(len(self._in_channels)):
if j > i:
xj = self.residual_func_list[residual_func_idx](x[j])
residual_func_idx += 1
xj = upsample(xj, scale_factor=2**(j - i), mode="nearest")
residual = paddle.add(x=residual, y=xj)
elif j < i:
xj = x[j]
for k in range(i - j):
xj = self.residual_func_list[residual_func_idx](xj)
residual_func_idx += 1
residual = paddle.add(x=residual, y=xj)
residual = self.relu(residual)
out.append(residual)
return out
class LastClsOut(TheseusLayer):
def __init__(self,
num_channel_list,
has_se,
num_filters_list=[32, 64, 128, 256]):
super().__init__()
self.func_list = nn.LayerList()
for idx in range(len(num_channel_list)):
self.func_list.append(
BottleneckBlock(
num_channels=num_channel_list[idx],
num_filters=num_filters_list[idx],
has_se=has_se,
downsample=True))
def forward(self, x, res_dict=None):
out = []
for idx, xi in enumerate(x):
xi = self.func_list[idx](xi)
out.append(xi)
return out
class HRNet(TheseusLayer):
"""
HRNet
Args:
width: int=18. Base channel number of HRNet.
has_se: bool=False. If 'True', add se module to HRNet.
class_num: int=1000. Output num of last fc layer.
Returns:
model: nn.Layer. Specific HRNet model depends on args.
"""
def __init__(self,
stages_pattern,
width=18,
has_se=False,
class_num=1000,
return_patterns=None,
return_stages=None):
super().__init__()
self.width = width
self.has_se = has_se
self._class_num = class_num
channels_2 = [self.width, self.width * 2]
channels_3 = [self.width, self.width * 2, self.width * 4]
channels_4 = [
self.width, self.width * 2, self.width * 4, self.width * 8
]
self.conv_layer1_1 = ConvBNLayer(
num_channels=3,
num_filters=64,
filter_size=3,
stride=2,
act="relu")
self.conv_layer1_2 = ConvBNLayer(
num_channels=64,
num_filters=64,
filter_size=3,
stride=2,
act="relu")
self.layer1 = nn.Sequential(* [
BottleneckBlock(
num_channels=64 if i == 0 else 256,
num_filters=64,
has_se=has_se,
stride=1,
downsample=True if i == 0 else False) for i in range(4)
])
self.conv_tr1_1 = ConvBNLayer(
num_channels=256, num_filters=width, filter_size=3)
self.conv_tr1_2 = ConvBNLayer(
num_channels=256, num_filters=width * 2, filter_size=3, stride=2)
self.st2 = Stage(
num_modules=1, num_filters=channels_2, has_se=self.has_se)
self.conv_tr2 = ConvBNLayer(
num_channels=width * 2,
num_filters=width * 4,
filter_size=3,
stride=2)
self.st3 = Stage(
num_modules=4, num_filters=channels_3, has_se=self.has_se)
self.conv_tr3 = ConvBNLayer(
num_channels=width * 4,
num_filters=width * 8,
filter_size=3,
stride=2)
self.st4 = Stage(
num_modules=3, num_filters=channels_4, has_se=self.has_se)
# classification
num_filters_list = [32, 64, 128, 256]
self.last_cls = LastClsOut(
num_channel_list=channels_4,
has_se=self.has_se,
num_filters_list=num_filters_list)
last_num_filters = [256, 512, 1024]
self.cls_head_conv_list = nn.LayerList()
for idx in range(3):
self.cls_head_conv_list.append(
ConvBNLayer(
num_channels=num_filters_list[idx] * 4,
num_filters=last_num_filters[idx],
filter_size=3,
stride=2))
self.conv_last = ConvBNLayer(
num_channels=1024, num_filters=2048, filter_size=1, stride=1)
self.avg_pool = nn.AdaptiveAvgPool2D(1)
stdv = 1.0 / math.sqrt(2048 * 1.0)
self.fc = nn.Linear(
2048,
class_num,
weight_attr=ParamAttr(initializer=Uniform(-stdv, stdv)))
super().init_res(
stages_pattern,
return_patterns=return_patterns,
return_stages=return_stages)
def forward(self, x):
x = self.conv_layer1_1(x)
x = self.conv_layer1_2(x)
x = self.layer1(x)
tr1_1 = self.conv_tr1_1(x)
tr1_2 = self.conv_tr1_2(x)
x = self.st2([tr1_1, tr1_2])
tr2 = self.conv_tr2(x[-1])
x.append(tr2)
x = self.st3(x)
tr3 = self.conv_tr3(x[-1])
x.append(tr3)
x = self.st4(x)
x = self.last_cls(x)
y = x[0]
for idx in range(3):
y = paddle.add(x[idx + 1], self.cls_head_conv_list[idx](y))
y = self.conv_last(y)
y = self.avg_pool(y)
y = paddle.reshape(y, shape=[-1, y.shape[1]])
y = self.fc(y)
return y
def _load_pretrained(pretrained, model, model_url, use_ssld):
if pretrained is False:
pass
elif pretrained is True:
load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
elif isinstance(pretrained, str):
load_dygraph_pretrain(model, pretrained)
else:
raise RuntimeError(
"pretrained type is not available. Please use `string` or `boolean` type."
)
def HRNet_W18_C(pretrained=False, use_ssld=False, **kwargs):
"""
HRNet_W18_C
Args:
pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
If str, means the path of the pretrained model.
use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
Returns:
model: nn.Layer. Specific `HRNet_W18_C` model depends on args.
"""
model = HRNet(
width=18, stages_pattern=MODEL_STAGES_PATTERN["HRNet"], **kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["HRNet_W18_C"], use_ssld)
return model
def HRNet_W30_C(pretrained=False, use_ssld=False, **kwargs):
"""
HRNet_W30_C
Args:
pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
If str, means the path of the pretrained model.
use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
Returns:
model: nn.Layer. Specific `HRNet_W30_C` model depends on args.
"""
model = HRNet(
width=30, stages_pattern=MODEL_STAGES_PATTERN["HRNet"], **kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["HRNet_W30_C"], use_ssld)
return model
def HRNet_W32_C(pretrained=False, use_ssld=False, **kwargs):
"""
HRNet_W32_C
Args:
pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
If str, means the path of the pretrained model.
use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
Returns:
model: nn.Layer. Specific `HRNet_W32_C` model depends on args.
"""
model = HRNet(
width=32, stages_pattern=MODEL_STAGES_PATTERN["HRNet"], **kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["HRNet_W32_C"], use_ssld)
return model
def HRNet_W40_C(pretrained=False, use_ssld=False, **kwargs):
"""
HRNet_W40_C
Args:
pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
If str, means the path of the pretrained model.
use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
Returns:
model: nn.Layer. Specific `HRNet_W40_C` model depends on args.
"""
model = HRNet(
width=40, stages_pattern=MODEL_STAGES_PATTERN["HRNet"], **kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["HRNet_W40_C"], use_ssld)
return model
def HRNet_W44_C(pretrained=False, use_ssld=False, **kwargs):
"""
HRNet_W44_C
Args:
pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
If str, means the path of the pretrained model.
use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
Returns:
model: nn.Layer. Specific `HRNet_W44_C` model depends on args.
"""
model = HRNet(
width=44, stages_pattern=MODEL_STAGES_PATTERN["HRNet"], **kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["HRNet_W44_C"], use_ssld)
return model
def HRNet_W48_C(pretrained=False, use_ssld=False, **kwargs):
"""
HRNet_W48_C
Args:
pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
If str, means the path of the pretrained model.
use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
Returns:
model: nn.Layer. Specific `HRNet_W48_C` model depends on args.
"""
model = HRNet(
width=48, stages_pattern=MODEL_STAGES_PATTERN["HRNet"], **kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["HRNet_W48_C"], use_ssld)
return model
def HRNet_W60_C(pretrained=False, use_ssld=False, **kwargs):
"""
HRNet_W60_C
Args:
pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
If str, means the path of the pretrained model.
use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
Returns:
model: nn.Layer. Specific `HRNet_W60_C` model depends on args.
"""
model = HRNet(
width=60, stages_pattern=MODEL_STAGES_PATTERN["HRNet"], **kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["HRNet_W60_C"], use_ssld)
return model
def HRNet_W64_C(pretrained=False, use_ssld=False, **kwargs):
"""
HRNet_W64_C
Args:
pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
If str, means the path of the pretrained model.
use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
Returns:
model: nn.Layer. Specific `HRNet_W64_C` model depends on args.
"""
model = HRNet(
width=64, stages_pattern=MODEL_STAGES_PATTERN["HRNet"], **kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["HRNet_W64_C"], use_ssld)
return model
def SE_HRNet_W18_C(pretrained=False, use_ssld=False, **kwargs):
"""
SE_HRNet_W18_C
Args:
pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
If str, means the path of the pretrained model.
use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
Returns:
model: nn.Layer. Specific `SE_HRNet_W18_C` model depends on args.
"""
model = HRNet(
width=18,
stages_pattern=MODEL_STAGES_PATTERN["HRNet"],
has_se=True,
**kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["SE_HRNet_W18_C"], use_ssld)
return model
def SE_HRNet_W30_C(pretrained=False, use_ssld=False, **kwargs):
"""
SE_HRNet_W30_C
Args:
pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
If str, means the path of the pretrained model.
use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
Returns:
model: nn.Layer. Specific `SE_HRNet_W30_C` model depends on args.
"""
model = HRNet(
width=30,
stages_pattern=MODEL_STAGES_PATTERN["HRNet"],
has_se=True,
**kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["SE_HRNet_W30_C"], use_ssld)
return model
def SE_HRNet_W32_C(pretrained=False, use_ssld=False, **kwargs):
"""
SE_HRNet_W32_C
Args:
pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
If str, means the path of the pretrained model.
use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
Returns:
model: nn.Layer. Specific `SE_HRNet_W32_C` model depends on args.
"""
model = HRNet(
width=32,
stages_pattern=MODEL_STAGES_PATTERN["HRNet"],
has_se=True,
**kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["SE_HRNet_W32_C"], use_ssld)
return model
def SE_HRNet_W40_C(pretrained=False, use_ssld=False, **kwargs):
"""
SE_HRNet_W40_C
Args:
pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
If str, means the path of the pretrained model.
use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
Returns:
model: nn.Layer. Specific `SE_HRNet_W40_C` model depends on args.
"""
model = HRNet(
width=40,
stages_pattern=MODEL_STAGES_PATTERN["HRNet"],
has_se=True,
**kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["SE_HRNet_W40_C"], use_ssld)
return model
def SE_HRNet_W44_C(pretrained=False, use_ssld=False, **kwargs):
"""
SE_HRNet_W44_C
Args:
pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
If str, means the path of the pretrained model.
use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
Returns:
model: nn.Layer. Specific `SE_HRNet_W44_C` model depends on args.
"""
model = HRNet(
width=44,
stages_pattern=MODEL_STAGES_PATTERN["HRNet"],
has_se=True,
**kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["SE_HRNet_W44_C"], use_ssld)
return model
def SE_HRNet_W48_C(pretrained=False, use_ssld=False, **kwargs):
"""
SE_HRNet_W48_C
Args:
pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
If str, means the path of the pretrained model.
use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
Returns:
model: nn.Layer. Specific `SE_HRNet_W48_C` model depends on args.
"""
model = HRNet(
width=48,
stages_pattern=MODEL_STAGES_PATTERN["HRNet"],
has_se=True,
**kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["SE_HRNet_W48_C"], use_ssld)
return model
def SE_HRNet_W60_C(pretrained=False, use_ssld=False, **kwargs):
"""
SE_HRNet_W60_C
Args:
pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
If str, means the path of the pretrained model.
use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
Returns:
model: nn.Layer. Specific `SE_HRNet_W60_C` model depends on args.
"""
model = HRNet(
width=60,
stages_pattern=MODEL_STAGES_PATTERN["HRNet"],
has_se=True,
**kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["SE_HRNet_W60_C"], use_ssld)
return model
def SE_HRNet_W64_C(pretrained=False, use_ssld=False, **kwargs):
"""
SE_HRNet_W64_C
Args:
pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
If str, means the path of the pretrained model.
use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
Returns:
model: nn.Layer. Specific `SE_HRNet_W64_C` model depends on args.
"""
model = HRNet(
width=64,
stages_pattern=MODEL_STAGES_PATTERN["HRNet"],
has_se=True,
**kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["SE_HRNet_W64_C"], use_ssld)
return model

@ -0,0 +1,557 @@
# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import, division, print_function
import math
import paddle
from paddle import ParamAttr
import paddle.nn as nn
from paddle.nn import Conv2D, BatchNorm, Linear, Dropout
from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D
from paddle.nn.initializer import Uniform
from ppcls.arch.backbone.base.theseus_layer import TheseusLayer
from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
MODEL_URLS = {
"InceptionV3":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/InceptionV3_pretrained.pdparams"
}
MODEL_STAGES_PATTERN = {
"InceptionV3": [
"inception_block_list[2]", "inception_block_list[3]",
"inception_block_list[7]", "inception_block_list[8]",
"inception_block_list[10]"
]
}
__all__ = MODEL_URLS.keys()
'''
InceptionV3 config: dict.
key: inception blocks of InceptionV3.
values: conv num in different blocks.
'''
NET_CONFIG = {
"inception_a": [[192, 256, 288], [32, 64, 64]],
"inception_b": [288],
"inception_c": [[768, 768, 768, 768], [128, 160, 160, 192]],
"inception_d": [768],
"inception_e": [1280, 2048]
}
class ConvBNLayer(TheseusLayer):
def __init__(self,
num_channels,
num_filters,
filter_size,
stride=1,
padding=0,
groups=1,
act="relu"):
super().__init__()
self.act = act
self.conv = Conv2D(
in_channels=num_channels,
out_channels=num_filters,
kernel_size=filter_size,
stride=stride,
padding=padding,
groups=groups,
bias_attr=False)
self.bn = BatchNorm(num_filters)
self.relu = nn.ReLU()
def forward(self, x):
x = self.conv(x)
x = self.bn(x)
if self.act:
x = self.relu(x)
return x
class InceptionStem(TheseusLayer):
def __init__(self):
super().__init__()
self.conv_1a_3x3 = ConvBNLayer(
num_channels=3,
num_filters=32,
filter_size=3,
stride=2,
act="relu")
self.conv_2a_3x3 = ConvBNLayer(
num_channels=32,
num_filters=32,
filter_size=3,
stride=1,
act="relu")
self.conv_2b_3x3 = ConvBNLayer(
num_channels=32,
num_filters=64,
filter_size=3,
padding=1,
act="relu")
self.max_pool = MaxPool2D(kernel_size=3, stride=2, padding=0)
self.conv_3b_1x1 = ConvBNLayer(
num_channels=64, num_filters=80, filter_size=1, act="relu")
self.conv_4a_3x3 = ConvBNLayer(
num_channels=80, num_filters=192, filter_size=3, act="relu")
def forward(self, x):
x = self.conv_1a_3x3(x)
x = self.conv_2a_3x3(x)
x = self.conv_2b_3x3(x)
x = self.max_pool(x)
x = self.conv_3b_1x1(x)
x = self.conv_4a_3x3(x)
x = self.max_pool(x)
return x
class InceptionA(TheseusLayer):
def __init__(self, num_channels, pool_features):
super().__init__()
self.branch1x1 = ConvBNLayer(
num_channels=num_channels,
num_filters=64,
filter_size=1,
act="relu")
self.branch5x5_1 = ConvBNLayer(
num_channels=num_channels,
num_filters=48,
filter_size=1,
act="relu")
self.branch5x5_2 = ConvBNLayer(
num_channels=48,
num_filters=64,
filter_size=5,
padding=2,
act="relu")
self.branch3x3dbl_1 = ConvBNLayer(
num_channels=num_channels,
num_filters=64,
filter_size=1,
act="relu")
self.branch3x3dbl_2 = ConvBNLayer(
num_channels=64,
num_filters=96,
filter_size=3,
padding=1,
act="relu")
self.branch3x3dbl_3 = ConvBNLayer(
num_channels=96,
num_filters=96,
filter_size=3,
padding=1,
act="relu")
self.branch_pool = AvgPool2D(
kernel_size=3, stride=1, padding=1, exclusive=False)
self.branch_pool_conv = ConvBNLayer(
num_channels=num_channels,
num_filters=pool_features,
filter_size=1,
act="relu")
def forward(self, x):
branch1x1 = self.branch1x1(x)
branch5x5 = self.branch5x5_1(x)
branch5x5 = self.branch5x5_2(branch5x5)
branch3x3dbl = self.branch3x3dbl_1(x)
branch3x3dbl = self.branch3x3dbl_2(branch3x3dbl)
branch3x3dbl = self.branch3x3dbl_3(branch3x3dbl)
branch_pool = self.branch_pool(x)
branch_pool = self.branch_pool_conv(branch_pool)
x = paddle.concat(
[branch1x1, branch5x5, branch3x3dbl, branch_pool], axis=1)
return x
class InceptionB(TheseusLayer):
def __init__(self, num_channels):
super().__init__()
self.branch3x3 = ConvBNLayer(
num_channels=num_channels,
num_filters=384,
filter_size=3,
stride=2,
act="relu")
self.branch3x3dbl_1 = ConvBNLayer(
num_channels=num_channels,
num_filters=64,
filter_size=1,
act="relu")
self.branch3x3dbl_2 = ConvBNLayer(
num_channels=64,
num_filters=96,
filter_size=3,
padding=1,
act="relu")
self.branch3x3dbl_3 = ConvBNLayer(
num_channels=96,
num_filters=96,
filter_size=3,
stride=2,
act="relu")
self.branch_pool = MaxPool2D(kernel_size=3, stride=2)
def forward(self, x):
branch3x3 = self.branch3x3(x)
branch3x3dbl = self.branch3x3dbl_1(x)
branch3x3dbl = self.branch3x3dbl_2(branch3x3dbl)
branch3x3dbl = self.branch3x3dbl_3(branch3x3dbl)
branch_pool = self.branch_pool(x)
x = paddle.concat([branch3x3, branch3x3dbl, branch_pool], axis=1)
return x
class InceptionC(TheseusLayer):
def __init__(self, num_channels, channels_7x7):
super().__init__()
self.branch1x1 = ConvBNLayer(
num_channels=num_channels,
num_filters=192,
filter_size=1,
act="relu")
self.branch7x7_1 = ConvBNLayer(
num_channels=num_channels,
num_filters=channels_7x7,
filter_size=1,
stride=1,
act="relu")
self.branch7x7_2 = ConvBNLayer(
num_channels=channels_7x7,
num_filters=channels_7x7,
filter_size=(1, 7),
stride=1,
padding=(0, 3),
act="relu")
self.branch7x7_3 = ConvBNLayer(
num_channels=channels_7x7,
num_filters=192,
filter_size=(7, 1),
stride=1,
padding=(3, 0),
act="relu")
self.branch7x7dbl_1 = ConvBNLayer(
num_channels=num_channels,
num_filters=channels_7x7,
filter_size=1,
act="relu")
self.branch7x7dbl_2 = ConvBNLayer(
num_channels=channels_7x7,
num_filters=channels_7x7,
filter_size=(7, 1),
padding=(3, 0),
act="relu")
self.branch7x7dbl_3 = ConvBNLayer(
num_channels=channels_7x7,
num_filters=channels_7x7,
filter_size=(1, 7),
padding=(0, 3),
act="relu")
self.branch7x7dbl_4 = ConvBNLayer(
num_channels=channels_7x7,
num_filters=channels_7x7,
filter_size=(7, 1),
padding=(3, 0),
act="relu")
self.branch7x7dbl_5 = ConvBNLayer(
num_channels=channels_7x7,
num_filters=192,
filter_size=(1, 7),
padding=(0, 3),
act="relu")
self.branch_pool = AvgPool2D(
kernel_size=3, stride=1, padding=1, exclusive=False)
self.branch_pool_conv = ConvBNLayer(
num_channels=num_channels,
num_filters=192,
filter_size=1,
act="relu")
def forward(self, x):
branch1x1 = self.branch1x1(x)
branch7x7 = self.branch7x7_1(x)
branch7x7 = self.branch7x7_2(branch7x7)
branch7x7 = self.branch7x7_3(branch7x7)
branch7x7dbl = self.branch7x7dbl_1(x)
branch7x7dbl = self.branch7x7dbl_2(branch7x7dbl)
branch7x7dbl = self.branch7x7dbl_3(branch7x7dbl)
branch7x7dbl = self.branch7x7dbl_4(branch7x7dbl)
branch7x7dbl = self.branch7x7dbl_5(branch7x7dbl)
branch_pool = self.branch_pool(x)
branch_pool = self.branch_pool_conv(branch_pool)
x = paddle.concat(
[branch1x1, branch7x7, branch7x7dbl, branch_pool], axis=1)
return x
class InceptionD(TheseusLayer):
def __init__(self, num_channels):
super().__init__()
self.branch3x3_1 = ConvBNLayer(
num_channels=num_channels,
num_filters=192,
filter_size=1,
act="relu")
self.branch3x3_2 = ConvBNLayer(
num_channels=192,
num_filters=320,
filter_size=3,
stride=2,
act="relu")
self.branch7x7x3_1 = ConvBNLayer(
num_channels=num_channels,
num_filters=192,
filter_size=1,
act="relu")
self.branch7x7x3_2 = ConvBNLayer(
num_channels=192,
num_filters=192,
filter_size=(1, 7),
padding=(0, 3),
act="relu")
self.branch7x7x3_3 = ConvBNLayer(
num_channels=192,
num_filters=192,
filter_size=(7, 1),
padding=(3, 0),
act="relu")
self.branch7x7x3_4 = ConvBNLayer(
num_channels=192,
num_filters=192,
filter_size=3,
stride=2,
act="relu")
self.branch_pool = MaxPool2D(kernel_size=3, stride=2)
def forward(self, x):
branch3x3 = self.branch3x3_1(x)
branch3x3 = self.branch3x3_2(branch3x3)
branch7x7x3 = self.branch7x7x3_1(x)
branch7x7x3 = self.branch7x7x3_2(branch7x7x3)
branch7x7x3 = self.branch7x7x3_3(branch7x7x3)
branch7x7x3 = self.branch7x7x3_4(branch7x7x3)
branch_pool = self.branch_pool(x)
x = paddle.concat([branch3x3, branch7x7x3, branch_pool], axis=1)
return x
class InceptionE(TheseusLayer):
def __init__(self, num_channels):
super().__init__()
self.branch1x1 = ConvBNLayer(
num_channels=num_channels,
num_filters=320,
filter_size=1,
act="relu")
self.branch3x3_1 = ConvBNLayer(
num_channels=num_channels,
num_filters=384,
filter_size=1,
act="relu")
self.branch3x3_2a = ConvBNLayer(
num_channels=384,
num_filters=384,
filter_size=(1, 3),
padding=(0, 1),
act="relu")
self.branch3x3_2b = ConvBNLayer(
num_channels=384,
num_filters=384,
filter_size=(3, 1),
padding=(1, 0),
act="relu")
self.branch3x3dbl_1 = ConvBNLayer(
num_channels=num_channels,
num_filters=448,
filter_size=1,
act="relu")
self.branch3x3dbl_2 = ConvBNLayer(
num_channels=448,
num_filters=384,
filter_size=3,
padding=1,
act="relu")
self.branch3x3dbl_3a = ConvBNLayer(
num_channels=384,
num_filters=384,
filter_size=(1, 3),
padding=(0, 1),
act="relu")
self.branch3x3dbl_3b = ConvBNLayer(
num_channels=384,
num_filters=384,
filter_size=(3, 1),
padding=(1, 0),
act="relu")
self.branch_pool = AvgPool2D(
kernel_size=3, stride=1, padding=1, exclusive=False)
self.branch_pool_conv = ConvBNLayer(
num_channels=num_channels,
num_filters=192,
filter_size=1,
act="relu")
def forward(self, x):
branch1x1 = self.branch1x1(x)
branch3x3 = self.branch3x3_1(x)
branch3x3 = [
self.branch3x3_2a(branch3x3),
self.branch3x3_2b(branch3x3),
]
branch3x3 = paddle.concat(branch3x3, axis=1)
branch3x3dbl = self.branch3x3dbl_1(x)
branch3x3dbl = self.branch3x3dbl_2(branch3x3dbl)
branch3x3dbl = [
self.branch3x3dbl_3a(branch3x3dbl),
self.branch3x3dbl_3b(branch3x3dbl),
]
branch3x3dbl = paddle.concat(branch3x3dbl, axis=1)
branch_pool = self.branch_pool(x)
branch_pool = self.branch_pool_conv(branch_pool)
x = paddle.concat(
[branch1x1, branch3x3, branch3x3dbl, branch_pool], axis=1)
return x
class Inception_V3(TheseusLayer):
"""
Inception_V3
Args:
config: dict. config of Inception_V3.
class_num: int=1000. The number of classes.
pretrained: (True or False) or path of pretrained_model. Whether to load the pretrained model.
Returns:
model: nn.Layer. Specific Inception_V3 model depends on args.
"""
def __init__(self,
config,
stages_pattern,
class_num=1000,
return_patterns=None,
return_stages=None):
super().__init__()
self.inception_a_list = config["inception_a"]
self.inception_c_list = config["inception_c"]
self.inception_b_list = config["inception_b"]
self.inception_d_list = config["inception_d"]
self.inception_e_list = config["inception_e"]
self.inception_stem = InceptionStem()
self.inception_block_list = nn.LayerList()
for i in range(len(self.inception_a_list[0])):
inception_a = InceptionA(self.inception_a_list[0][i],
self.inception_a_list[1][i])
self.inception_block_list.append(inception_a)
for i in range(len(self.inception_b_list)):
inception_b = InceptionB(self.inception_b_list[i])
self.inception_block_list.append(inception_b)
for i in range(len(self.inception_c_list[0])):
inception_c = InceptionC(self.inception_c_list[0][i],
self.inception_c_list[1][i])
self.inception_block_list.append(inception_c)
for i in range(len(self.inception_d_list)):
inception_d = InceptionD(self.inception_d_list[i])
self.inception_block_list.append(inception_d)
for i in range(len(self.inception_e_list)):
inception_e = InceptionE(self.inception_e_list[i])
self.inception_block_list.append(inception_e)
self.avg_pool = AdaptiveAvgPool2D(1)
self.dropout = Dropout(p=0.2, mode="downscale_in_infer")
stdv = 1.0 / math.sqrt(2048 * 1.0)
self.fc = Linear(
2048,
class_num,
weight_attr=ParamAttr(initializer=Uniform(-stdv, stdv)),
bias_attr=ParamAttr())
super().init_res(
stages_pattern,
return_patterns=return_patterns,
return_stages=return_stages)
def forward(self, x):
x = self.inception_stem(x)
for inception_block in self.inception_block_list:
x = inception_block(x)
x = self.avg_pool(x)
x = paddle.reshape(x, shape=[-1, 2048])
x = self.dropout(x)
x = self.fc(x)
return x
def _load_pretrained(pretrained, model, model_url, use_ssld):
if pretrained is False:
pass
elif pretrained is True:
load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
elif isinstance(pretrained, str):
load_dygraph_pretrain(model, pretrained)
else:
raise RuntimeError(
"pretrained type is not available. Please use `string` or `boolean` type."
)
def InceptionV3(pretrained=False, use_ssld=False, **kwargs):
"""
InceptionV3
Args:
pretrained: bool=false or str. if `true` load pretrained parameters, `false` otherwise.
if str, means the path of the pretrained model.
use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
Returns:
model: nn.Layer. Specific `InceptionV3` model
"""
model = Inception_V3(
NET_CONFIG,
stages_pattern=MODEL_STAGES_PATTERN["InceptionV3"],
**kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["InceptionV3"], use_ssld)
return model

@ -0,0 +1,257 @@
# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import, division, print_function
from paddle import ParamAttr
import paddle.nn as nn
from paddle.nn import Conv2D, BatchNorm, Linear, ReLU, Flatten
from paddle.nn import AdaptiveAvgPool2D
from paddle.nn.initializer import KaimingNormal
from ppcls.arch.backbone.base.theseus_layer import TheseusLayer
from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
MODEL_URLS = {
"MobileNetV1_x0_25":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/MobileNetV1_x0_25_pretrained.pdparams",
"MobileNetV1_x0_5":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/MobileNetV1_x0_5_pretrained.pdparams",
"MobileNetV1_x0_75":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/MobileNetV1_x0_75_pretrained.pdparams",
"MobileNetV1":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/MobileNetV1_pretrained.pdparams"
}
MODEL_STAGES_PATTERN = {
"MobileNetV1": ["blocks[0]", "blocks[2]", "blocks[4]", "blocks[10]"]
}
__all__ = MODEL_URLS.keys()
class ConvBNLayer(TheseusLayer):
def __init__(self,
num_channels,
filter_size,
num_filters,
stride,
padding,
num_groups=1):
super().__init__()
self.conv = Conv2D(
in_channels=num_channels,
out_channels=num_filters,
kernel_size=filter_size,
stride=stride,
padding=padding,
groups=num_groups,
weight_attr=ParamAttr(initializer=KaimingNormal()),
bias_attr=False)
self.bn = BatchNorm(num_filters)
self.relu = ReLU()
def forward(self, x):
x = self.conv(x)
x = self.bn(x)
x = self.relu(x)
return x
class DepthwiseSeparable(TheseusLayer):
def __init__(self, num_channels, num_filters1, num_filters2, num_groups,
stride, scale):
super().__init__()
self.depthwise_conv = ConvBNLayer(
num_channels=num_channels,
num_filters=int(num_filters1 * scale),
filter_size=3,
stride=stride,
padding=1,
num_groups=int(num_groups * scale))
self.pointwise_conv = ConvBNLayer(
num_channels=int(num_filters1 * scale),
filter_size=1,
num_filters=int(num_filters2 * scale),
stride=1,
padding=0)
def forward(self, x):
x = self.depthwise_conv(x)
x = self.pointwise_conv(x)
return x
class MobileNet(TheseusLayer):
"""
MobileNet
Args:
scale: float=1.0. The coefficient that controls the size of network parameters.
class_num: int=1000. The number of classes.
Returns:
model: nn.Layer. Specific MobileNet model depends on args.
"""
def __init__(self,
stages_pattern,
scale=1.0,
class_num=1000,
return_patterns=None,
return_stages=None):
super().__init__()
self.scale = scale
self.conv = ConvBNLayer(
num_channels=3,
filter_size=3,
num_filters=int(32 * scale),
stride=2,
padding=1)
#num_channels, num_filters1, num_filters2, num_groups, stride
self.cfg = [[int(32 * scale), 32, 64, 32, 1],
[int(64 * scale), 64, 128, 64, 2],
[int(128 * scale), 128, 128, 128, 1],
[int(128 * scale), 128, 256, 128, 2],
[int(256 * scale), 256, 256, 256, 1],
[int(256 * scale), 256, 512, 256, 2],
[int(512 * scale), 512, 512, 512, 1],
[int(512 * scale), 512, 512, 512, 1],
[int(512 * scale), 512, 512, 512, 1],
[int(512 * scale), 512, 512, 512, 1],
[int(512 * scale), 512, 512, 512, 1],
[int(512 * scale), 512, 1024, 512, 2],
[int(1024 * scale), 1024, 1024, 1024, 1]]
self.blocks = nn.Sequential(* [
DepthwiseSeparable(
num_channels=params[0],
num_filters1=params[1],
num_filters2=params[2],
num_groups=params[3],
stride=params[4],
scale=scale) for params in self.cfg
])
self.avg_pool = AdaptiveAvgPool2D(1)
self.flatten = Flatten(start_axis=1, stop_axis=-1)
self.fc = Linear(
int(1024 * scale),
class_num,
weight_attr=ParamAttr(initializer=KaimingNormal()))
super().init_res(
stages_pattern,
return_patterns=return_patterns,
return_stages=return_stages)
def forward(self, x):
x = self.conv(x)
x = self.blocks(x)
x = self.avg_pool(x)
x = self.flatten(x)
x = self.fc(x)
return x
def _load_pretrained(pretrained, model, model_url, use_ssld):
if pretrained is False:
pass
elif pretrained is True:
load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
elif isinstance(pretrained, str):
load_dygraph_pretrain(model, pretrained)
else:
raise RuntimeError(
"pretrained type is not available. Please use `string` or `boolean` type."
)
def MobileNetV1_x0_25(pretrained=False, use_ssld=False, **kwargs):
"""
MobileNetV1_x0_25
Args:
pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
If str, means the path of the pretrained model.
use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
Returns:
model: nn.Layer. Specific `MobileNetV1_x0_25` model depends on args.
"""
model = MobileNet(
scale=0.25,
stages_pattern=MODEL_STAGES_PATTERN["MobileNetV1"],
**kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["MobileNetV1_x0_25"],
use_ssld)
return model
def MobileNetV1_x0_5(pretrained=False, use_ssld=False, **kwargs):
"""
MobileNetV1_x0_5
Args:
pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
If str, means the path of the pretrained model.
use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
Returns:
model: nn.Layer. Specific `MobileNetV1_x0_5` model depends on args.
"""
model = MobileNet(
scale=0.5,
stages_pattern=MODEL_STAGES_PATTERN["MobileNetV1"],
**kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["MobileNetV1_x0_5"],
use_ssld)
return model
def MobileNetV1_x0_75(pretrained=False, use_ssld=False, **kwargs):
"""
MobileNetV1_x0_75
Args:
pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
If str, means the path of the pretrained model.
use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
Returns:
model: nn.Layer. Specific `MobileNetV1_x0_75` model depends on args.
"""
model = MobileNet(
scale=0.75,
stages_pattern=MODEL_STAGES_PATTERN["MobileNetV1"],
**kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["MobileNetV1_x0_75"],
use_ssld)
return model
def MobileNetV1(pretrained=False, use_ssld=False, **kwargs):
"""
MobileNetV1
Args:
pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
If str, means the path of the pretrained model.
use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
Returns:
model: nn.Layer. Specific `MobileNetV1` model depends on args.
"""
model = MobileNet(
scale=1.0,
stages_pattern=MODEL_STAGES_PATTERN["MobileNetV1"],
**kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["MobileNetV1"], use_ssld)
return model

@ -0,0 +1,586 @@
# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import, division, print_function
import paddle
import paddle.nn as nn
from paddle import ParamAttr
from paddle.nn import AdaptiveAvgPool2D, BatchNorm, Conv2D, Dropout, Linear
from paddle.regularizer import L2Decay
from ppcls.arch.backbone.base.theseus_layer import TheseusLayer
from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
MODEL_URLS = {
"MobileNetV3_small_x0_35":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/MobileNetV3_small_x0_35_pretrained.pdparams",
"MobileNetV3_small_x0_5":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/MobileNetV3_small_x0_5_pretrained.pdparams",
"MobileNetV3_small_x0_75":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/MobileNetV3_small_x0_75_pretrained.pdparams",
"MobileNetV3_small_x1_0":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/MobileNetV3_small_x1_0_pretrained.pdparams",
"MobileNetV3_small_x1_25":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/MobileNetV3_small_x1_25_pretrained.pdparams",
"MobileNetV3_large_x0_35":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/MobileNetV3_large_x0_35_pretrained.pdparams",
"MobileNetV3_large_x0_5":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/MobileNetV3_large_x0_5_pretrained.pdparams",
"MobileNetV3_large_x0_75":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/MobileNetV3_large_x0_75_pretrained.pdparams",
"MobileNetV3_large_x1_0":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/MobileNetV3_large_x1_0_pretrained.pdparams",
"MobileNetV3_large_x1_25":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/MobileNetV3_large_x1_25_pretrained.pdparams",
}
MODEL_STAGES_PATTERN = {
"MobileNetV3_small":
["blocks[0]", "blocks[2]", "blocks[7]", "blocks[10]"],
"MobileNetV3_large":
["blocks[0]", "blocks[2]", "blocks[5]", "blocks[11]", "blocks[14]"]
}
__all__ = MODEL_URLS.keys()
# "large", "small" is just for MobinetV3_large, MobileNetV3_small respectively.
# The type of "large" or "small" config is a list. Each element(list) represents a depthwise block, which is composed of k, exp, se, act, s.
# k: kernel_size
# exp: middle channel number in depthwise block
# c: output channel number in depthwise block
# se: whether to use SE block
# act: which activation to use
# s: stride in depthwise block
NET_CONFIG = {
"large": [
# k, exp, c, se, act, s
[3, 16, 16, False, "relu", 1],
[3, 64, 24, False, "relu", 2],
[3, 72, 24, False, "relu", 1],
[5, 72, 40, True, "relu", 2],
[5, 120, 40, True, "relu", 1],
[5, 120, 40, True, "relu", 1],
[3, 240, 80, False, "hardswish", 2],
[3, 200, 80, False, "hardswish", 1],
[3, 184, 80, False, "hardswish", 1],
[3, 184, 80, False, "hardswish", 1],
[3, 480, 112, True, "hardswish", 1],
[3, 672, 112, True, "hardswish", 1],
[5, 672, 160, True, "hardswish", 2],
[5, 960, 160, True, "hardswish", 1],
[5, 960, 160, True, "hardswish", 1],
],
"small": [
# k, exp, c, se, act, s
[3, 16, 16, True, "relu", 2],
[3, 72, 24, False, "relu", 2],
[3, 88, 24, False, "relu", 1],
[5, 96, 40, True, "hardswish", 2],
[5, 240, 40, True, "hardswish", 1],
[5, 240, 40, True, "hardswish", 1],
[5, 120, 48, True, "hardswish", 1],
[5, 144, 48, True, "hardswish", 1],
[5, 288, 96, True, "hardswish", 2],
[5, 576, 96, True, "hardswish", 1],
[5, 576, 96, True, "hardswish", 1],
]
}
# first conv output channel number in MobileNetV3
STEM_CONV_NUMBER = 16
# last second conv output channel for "small"
LAST_SECOND_CONV_SMALL = 576
# last second conv output channel for "large"
LAST_SECOND_CONV_LARGE = 960
# last conv output channel number for "large" and "small"
LAST_CONV = 1280
def _make_divisible(v, divisor=8, min_value=None):
if min_value is None:
min_value = divisor
new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
if new_v < 0.9 * v:
new_v += divisor
return new_v
def _create_act(act):
if act == "hardswish":
return nn.Hardswish()
elif act == "relu":
return nn.ReLU()
elif act is None:
return None
else:
raise RuntimeError(
"The activation function is not supported: {}".format(act))
class MobileNetV3(TheseusLayer):
"""
MobileNetV3
Args:
config: list. MobileNetV3 depthwise blocks config.
scale: float=1.0. The coefficient that controls the size of network parameters.
class_num: int=1000. The number of classes.
inplanes: int=16. The output channel number of first convolution layer.
class_squeeze: int=960. The output channel number of penultimate convolution layer.
class_expand: int=1280. The output channel number of last convolution layer.
dropout_prob: float=0.2. Probability of setting units to zero.
Returns:
model: nn.Layer. Specific MobileNetV3 model depends on args.
"""
def __init__(self,
config,
stages_pattern,
scale=1.0,
class_num=1000,
inplanes=STEM_CONV_NUMBER,
class_squeeze=LAST_SECOND_CONV_LARGE,
class_expand=LAST_CONV,
dropout_prob=0.2,
return_patterns=None,
return_stages=None):
super().__init__()
self.cfg = config
self.scale = scale
self.inplanes = inplanes
self.class_squeeze = class_squeeze
self.class_expand = class_expand
self.class_num = class_num
self.conv = ConvBNLayer(
in_c=3,
out_c=_make_divisible(self.inplanes * self.scale),
filter_size=3,
stride=2,
padding=1,
num_groups=1,
if_act=True,
act="hardswish")
self.blocks = nn.Sequential(* [
ResidualUnit(
in_c=_make_divisible(self.inplanes * self.scale if i == 0 else
self.cfg[i - 1][2] * self.scale),
mid_c=_make_divisible(self.scale * exp),
out_c=_make_divisible(self.scale * c),
filter_size=k,
stride=s,
use_se=se,
act=act) for i, (k, exp, c, se, act, s) in enumerate(self.cfg)
])
self.last_second_conv = ConvBNLayer(
in_c=_make_divisible(self.cfg[-1][2] * self.scale),
out_c=_make_divisible(self.scale * self.class_squeeze),
filter_size=1,
stride=1,
padding=0,
num_groups=1,
if_act=True,
act="hardswish")
self.avg_pool = AdaptiveAvgPool2D(1)
self.last_conv = Conv2D(
in_channels=_make_divisible(self.scale * self.class_squeeze),
out_channels=self.class_expand,
kernel_size=1,
stride=1,
padding=0,
bias_attr=False)
self.hardswish = nn.Hardswish()
if dropout_prob is not None:
self.dropout = Dropout(p=dropout_prob, mode="downscale_in_infer")
else:
self.dropout = None
self.flatten = nn.Flatten(start_axis=1, stop_axis=-1)
self.fc = Linear(self.class_expand, class_num)
super().init_res(
stages_pattern,
return_patterns=return_patterns,
return_stages=return_stages)
def forward(self, x):
x = self.conv(x)
x = self.blocks(x)
x = self.last_second_conv(x)
x = self.avg_pool(x)
x = self.last_conv(x)
x = self.hardswish(x)
if self.dropout is not None:
x = self.dropout(x)
x = self.flatten(x)
x = self.fc(x)
return x
class ConvBNLayer(TheseusLayer):
def __init__(self,
in_c,
out_c,
filter_size,
stride,
padding,
num_groups=1,
if_act=True,
act=None):
super().__init__()
self.conv = Conv2D(
in_channels=in_c,
out_channels=out_c,
kernel_size=filter_size,
stride=stride,
padding=padding,
groups=num_groups,
bias_attr=False)
self.bn = BatchNorm(
num_channels=out_c,
act=None,
param_attr=ParamAttr(regularizer=L2Decay(0.0)),
bias_attr=ParamAttr(regularizer=L2Decay(0.0)))
self.if_act = if_act
self.act = _create_act(act)
def forward(self, x):
x = self.conv(x)
x = self.bn(x)
if self.if_act:
x = self.act(x)
return x
class ResidualUnit(TheseusLayer):
def __init__(self,
in_c,
mid_c,
out_c,
filter_size,
stride,
use_se,
act=None):
super().__init__()
self.if_shortcut = stride == 1 and in_c == out_c
self.if_se = use_se
self.expand_conv = ConvBNLayer(
in_c=in_c,
out_c=mid_c,
filter_size=1,
stride=1,
padding=0,
if_act=True,
act=act)
self.bottleneck_conv = ConvBNLayer(
in_c=mid_c,
out_c=mid_c,
filter_size=filter_size,
stride=stride,
padding=int((filter_size - 1) // 2),
num_groups=mid_c,
if_act=True,
act=act)
if self.if_se:
self.mid_se = SEModule(mid_c)
self.linear_conv = ConvBNLayer(
in_c=mid_c,
out_c=out_c,
filter_size=1,
stride=1,
padding=0,
if_act=False,
act=None)
def forward(self, x):
identity = x
x = self.expand_conv(x)
x = self.bottleneck_conv(x)
if self.if_se:
x = self.mid_se(x)
x = self.linear_conv(x)
if self.if_shortcut:
x = paddle.add(identity, x)
return x
# nn.Hardsigmoid can't transfer "slope" and "offset" in nn.functional.hardsigmoid
class Hardsigmoid(TheseusLayer):
def __init__(self, slope=0.2, offset=0.5):
super().__init__()
self.slope = slope
self.offset = offset
def forward(self, x):
return nn.functional.hardsigmoid(
x, slope=self.slope, offset=self.offset)
class SEModule(TheseusLayer):
def __init__(self, channel, reduction=4):
super().__init__()
self.avg_pool = AdaptiveAvgPool2D(1)
self.conv1 = Conv2D(
in_channels=channel,
out_channels=channel // reduction,
kernel_size=1,
stride=1,
padding=0)
self.relu = nn.ReLU()
self.conv2 = Conv2D(
in_channels=channel // reduction,
out_channels=channel,
kernel_size=1,
stride=1,
padding=0)
self.hardsigmoid = Hardsigmoid(slope=0.2, offset=0.5)
def forward(self, x):
identity = x
x = self.avg_pool(x)
x = self.conv1(x)
x = self.relu(x)
x = self.conv2(x)
x = self.hardsigmoid(x)
return paddle.multiply(x=identity, y=x)
def _load_pretrained(pretrained, model, model_url, use_ssld):
if pretrained is False:
pass
elif pretrained is True:
load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
elif isinstance(pretrained, str):
load_dygraph_pretrain(model, pretrained)
else:
raise RuntimeError(
"pretrained type is not available. Please use `string` or `boolean` type."
)
def MobileNetV3_small_x0_35(pretrained=False, use_ssld=False, **kwargs):
"""
MobileNetV3_small_x0_35
Args:
pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
If str, means the path of the pretrained model.
use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
Returns:
model: nn.Layer. Specific `MobileNetV3_small_x0_35` model depends on args.
"""
model = MobileNetV3(
config=NET_CONFIG["small"],
scale=0.35,
stages_pattern=MODEL_STAGES_PATTERN["MobileNetV3_small"],
class_squeeze=LAST_SECOND_CONV_SMALL,
**kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["MobileNetV3_small_x0_35"],
use_ssld)
return model
def MobileNetV3_small_x0_5(pretrained=False, use_ssld=False, **kwargs):
"""
MobileNetV3_small_x0_5
Args:
pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
If str, means the path of the pretrained model.
use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
Returns:
model: nn.Layer. Specific `MobileNetV3_small_x0_5` model depends on args.
"""
model = MobileNetV3(
config=NET_CONFIG["small"],
scale=0.5,
stages_pattern=MODEL_STAGES_PATTERN["MobileNetV3_small"],
class_squeeze=LAST_SECOND_CONV_SMALL,
**kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["MobileNetV3_small_x0_5"],
use_ssld)
return model
def MobileNetV3_small_x0_75(pretrained=False, use_ssld=False, **kwargs):
"""
MobileNetV3_small_x0_75
Args:
pretrained: bool=false or str. if `true` load pretrained parameters, `false` otherwise.
if str, means the path of the pretrained model.
use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
Returns:
model: nn.Layer. Specific `MobileNetV3_small_x0_75` model depends on args.
"""
model = MobileNetV3(
config=NET_CONFIG["small"],
scale=0.75,
stages_pattern=MODEL_STAGES_PATTERN["MobileNetV3_small"],
class_squeeze=LAST_SECOND_CONV_SMALL,
**kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["MobileNetV3_small_x0_75"],
use_ssld)
return model
def MobileNetV3_small_x1_0(pretrained=False, use_ssld=False, **kwargs):
"""
MobileNetV3_small_x1_0
Args:
pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
If str, means the path of the pretrained model.
use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
Returns:
model: nn.Layer. Specific `MobileNetV3_small_x1_0` model depends on args.
"""
model = MobileNetV3(
config=NET_CONFIG["small"],
scale=1.0,
stages_pattern=MODEL_STAGES_PATTERN["MobileNetV3_small"],
class_squeeze=LAST_SECOND_CONV_SMALL,
**kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["MobileNetV3_small_x1_0"],
use_ssld)
return model
def MobileNetV3_small_x1_25(pretrained=False, use_ssld=False, **kwargs):
"""
MobileNetV3_small_x1_25
Args:
pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
If str, means the path of the pretrained model.
use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
Returns:
model: nn.Layer. Specific `MobileNetV3_small_x1_25` model depends on args.
"""
model = MobileNetV3(
config=NET_CONFIG["small"],
scale=1.25,
stages_pattern=MODEL_STAGES_PATTERN["MobileNetV3_small"],
class_squeeze=LAST_SECOND_CONV_SMALL,
**kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["MobileNetV3_small_x1_25"],
use_ssld)
return model
def MobileNetV3_large_x0_35(pretrained=False, use_ssld=False, **kwargs):
"""
MobileNetV3_large_x0_35
Args:
pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
If str, means the path of the pretrained model.
use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
Returns:
model: nn.Layer. Specific `MobileNetV3_large_x0_35` model depends on args.
"""
model = MobileNetV3(
config=NET_CONFIG["large"],
scale=0.35,
stages_pattern=MODEL_STAGES_PATTERN["MobileNetV3_small"],
class_squeeze=LAST_SECOND_CONV_LARGE,
**kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["MobileNetV3_large_x0_35"],
use_ssld)
return model
def MobileNetV3_large_x0_5(pretrained=False, use_ssld=False, **kwargs):
"""
MobileNetV3_large_x0_5
Args:
pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
If str, means the path of the pretrained model.
use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
Returns:
model: nn.Layer. Specific `MobileNetV3_large_x0_5` model depends on args.
"""
model = MobileNetV3(
config=NET_CONFIG["large"],
scale=0.5,
stages_pattern=MODEL_STAGES_PATTERN["MobileNetV3_large"],
class_squeeze=LAST_SECOND_CONV_LARGE,
**kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["MobileNetV3_large_x0_5"],
use_ssld)
return model
def MobileNetV3_large_x0_75(pretrained=False, use_ssld=False, **kwargs):
"""
MobileNetV3_large_x0_75
Args:
pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
If str, means the path of the pretrained model.
use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
Returns:
model: nn.Layer. Specific `MobileNetV3_large_x0_75` model depends on args.
"""
model = MobileNetV3(
config=NET_CONFIG["large"],
scale=0.75,
stages_pattern=MODEL_STAGES_PATTERN["MobileNetV3_large"],
class_squeeze=LAST_SECOND_CONV_LARGE,
**kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["MobileNetV3_large_x0_75"],
use_ssld)
return model
def MobileNetV3_large_x1_0(pretrained=False, use_ssld=False, **kwargs):
"""
MobileNetV3_large_x1_0
Args:
pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
If str, means the path of the pretrained model.
use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
Returns:
model: nn.Layer. Specific `MobileNetV3_large_x1_0` model depends on args.
"""
model = MobileNetV3(
config=NET_CONFIG["large"],
scale=1.0,
stages_pattern=MODEL_STAGES_PATTERN["MobileNetV3_large"],
class_squeeze=LAST_SECOND_CONV_LARGE,
**kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["MobileNetV3_large_x1_0"],
use_ssld)
return model
def MobileNetV3_large_x1_25(pretrained=False, use_ssld=False, **kwargs):
"""
MobileNetV3_large_x1_25
Args:
pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
If str, means the path of the pretrained model.
use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
Returns:
model: nn.Layer. Specific `MobileNetV3_large_x1_25` model depends on args.
"""
model = MobileNetV3(
config=NET_CONFIG["large"],
scale=1.25,
stages_pattern=MODEL_STAGES_PATTERN["MobileNetV3_large"],
class_squeeze=LAST_SECOND_CONV_LARGE,
**kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["MobileNetV3_large_x1_25"],
use_ssld)
return model

@ -0,0 +1,419 @@
# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import, division, print_function
import paddle
import paddle.nn as nn
from paddle import ParamAttr
from paddle.nn import AdaptiveAvgPool2D, BatchNorm, Conv2D, Dropout, Linear
from paddle.regularizer import L2Decay
from paddle.nn.initializer import KaimingNormal
from ppcls.arch.backbone.base.theseus_layer import TheseusLayer
from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
MODEL_URLS = {
"PPLCNet_x0_25":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/PPLCNet_x0_25_pretrained.pdparams",
"PPLCNet_x0_35":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/PPLCNet_x0_35_pretrained.pdparams",
"PPLCNet_x0_5":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/PPLCNet_x0_5_pretrained.pdparams",
"PPLCNet_x0_75":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/PPLCNet_x0_75_pretrained.pdparams",
"PPLCNet_x1_0":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/PPLCNet_x1_0_pretrained.pdparams",
"PPLCNet_x1_5":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/PPLCNet_x1_5_pretrained.pdparams",
"PPLCNet_x2_0":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/PPLCNet_x2_0_pretrained.pdparams",
"PPLCNet_x2_5":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/PPLCNet_x2_5_pretrained.pdparams"
}
MODEL_STAGES_PATTERN = {
"PPLCNet": ["blocks2", "blocks3", "blocks4", "blocks5", "blocks6"]
}
__all__ = list(MODEL_URLS.keys())
# Each element(list) represents a depthwise block, which is composed of k, in_c, out_c, s, use_se.
# k: kernel_size
# in_c: input channel number in depthwise block
# out_c: output channel number in depthwise block
# s: stride in depthwise block
# use_se: whether to use SE block
NET_CONFIG = {
"blocks2":
#k, in_c, out_c, s, use_se
[[3, 16, 32, 1, False]],
"blocks3": [[3, 32, 64, 2, False], [3, 64, 64, 1, False]],
"blocks4": [[3, 64, 128, 2, False], [3, 128, 128, 1, False]],
"blocks5": [[3, 128, 256, 2, False], [5, 256, 256, 1, False],
[5, 256, 256, 1, False], [5, 256, 256, 1, False],
[5, 256, 256, 1, False], [5, 256, 256, 1, False]],
"blocks6": [[5, 256, 512, 2, True], [5, 512, 512, 1, True]]
}
def make_divisible(v, divisor=8, min_value=None):
if min_value is None:
min_value = divisor
new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
if new_v < 0.9 * v:
new_v += divisor
return new_v
class ConvBNLayer(TheseusLayer):
def __init__(self,
num_channels,
filter_size,
num_filters,
stride,
num_groups=1):
super().__init__()
self.conv = Conv2D(
in_channels=num_channels,
out_channels=num_filters,
kernel_size=filter_size,
stride=stride,
padding=(filter_size - 1) // 2,
groups=num_groups,
weight_attr=ParamAttr(initializer=KaimingNormal()),
bias_attr=False)
self.bn = BatchNorm(
num_filters,
param_attr=ParamAttr(regularizer=L2Decay(0.0)),
bias_attr=ParamAttr(regularizer=L2Decay(0.0)))
self.hardswish = nn.Hardswish()
def forward(self, x):
x = self.conv(x)
x = self.bn(x)
x = self.hardswish(x)
return x
class DepthwiseSeparable(TheseusLayer):
def __init__(self,
num_channels,
num_filters,
stride,
dw_size=3,
use_se=False):
super().__init__()
self.use_se = use_se
self.dw_conv = ConvBNLayer(
num_channels=num_channels,
num_filters=num_channels,
filter_size=dw_size,
stride=stride,
num_groups=num_channels)
if use_se:
self.se = SEModule(num_channels)
self.pw_conv = ConvBNLayer(
num_channels=num_channels,
filter_size=1,
num_filters=num_filters,
stride=1)
def forward(self, x):
x = self.dw_conv(x)
if self.use_se:
x = self.se(x)
x = self.pw_conv(x)
return x
class SEModule(TheseusLayer):
def __init__(self, channel, reduction=4):
super().__init__()
self.avg_pool = AdaptiveAvgPool2D(1)
self.conv1 = Conv2D(
in_channels=channel,
out_channels=channel // reduction,
kernel_size=1,
stride=1,
padding=0)
self.relu = nn.ReLU()
self.conv2 = Conv2D(
in_channels=channel // reduction,
out_channels=channel,
kernel_size=1,
stride=1,
padding=0)
self.hardsigmoid = nn.Hardsigmoid()
def forward(self, x):
identity = x
x = self.avg_pool(x)
x = self.conv1(x)
x = self.relu(x)
x = self.conv2(x)
x = self.hardsigmoid(x)
x = paddle.multiply(x=identity, y=x)
return x
class PPLCNet(TheseusLayer):
def __init__(self,
stages_pattern,
scale=1.0,
class_num=1000,
dropout_prob=0.2,
class_expand=1280,
return_patterns=None,
return_stages=None):
super().__init__()
self.scale = scale
self.class_expand = class_expand
self.conv1 = ConvBNLayer(
num_channels=3,
filter_size=3,
num_filters=make_divisible(16 * scale),
stride=2)
self.blocks2 = nn.Sequential(* [
DepthwiseSeparable(
num_channels=make_divisible(in_c * scale),
num_filters=make_divisible(out_c * scale),
dw_size=k,
stride=s,
use_se=se)
for i, (k, in_c, out_c, s, se) in enumerate(NET_CONFIG["blocks2"])
])
self.blocks3 = nn.Sequential(* [
DepthwiseSeparable(
num_channels=make_divisible(in_c * scale),
num_filters=make_divisible(out_c * scale),
dw_size=k,
stride=s,
use_se=se)
for i, (k, in_c, out_c, s, se) in enumerate(NET_CONFIG["blocks3"])
])
self.blocks4 = nn.Sequential(* [
DepthwiseSeparable(
num_channels=make_divisible(in_c * scale),
num_filters=make_divisible(out_c * scale),
dw_size=k,
stride=s,
use_se=se)
for i, (k, in_c, out_c, s, se) in enumerate(NET_CONFIG["blocks4"])
])
self.blocks5 = nn.Sequential(* [
DepthwiseSeparable(
num_channels=make_divisible(in_c * scale),
num_filters=make_divisible(out_c * scale),
dw_size=k,
stride=s,
use_se=se)
for i, (k, in_c, out_c, s, se) in enumerate(NET_CONFIG["blocks5"])
])
self.blocks6 = nn.Sequential(* [
DepthwiseSeparable(
num_channels=make_divisible(in_c * scale),
num_filters=make_divisible(out_c * scale),
dw_size=k,
stride=s,
use_se=se)
for i, (k, in_c, out_c, s, se) in enumerate(NET_CONFIG["blocks6"])
])
self.avg_pool = AdaptiveAvgPool2D(1)
self.last_conv = Conv2D(
in_channels=make_divisible(NET_CONFIG["blocks6"][-1][2] * scale),
out_channels=self.class_expand,
kernel_size=1,
stride=1,
padding=0,
bias_attr=False)
self.hardswish = nn.Hardswish()
self.dropout = Dropout(p=dropout_prob, mode="downscale_in_infer")
self.flatten = nn.Flatten(start_axis=1, stop_axis=-1)
self.fc = Linear(self.class_expand, class_num)
super().init_res(
stages_pattern,
return_patterns=return_patterns,
return_stages=return_stages)
def forward(self, x):
x = self.conv1(x)
x = self.blocks2(x)
x = self.blocks3(x)
x = self.blocks4(x)
x = self.blocks5(x)
x = self.blocks6(x)
x = self.avg_pool(x)
x = self.last_conv(x)
x = self.hardswish(x)
x = self.dropout(x)
x = self.flatten(x)
x = self.fc(x)
return x
def _load_pretrained(pretrained, model, model_url, use_ssld):
if pretrained is False:
pass
elif pretrained is True:
load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
elif isinstance(pretrained, str):
load_dygraph_pretrain(model, pretrained)
else:
raise RuntimeError(
"pretrained type is not available. Please use `string` or `boolean` type."
)
def PPLCNet_x0_25(pretrained=False, use_ssld=False, **kwargs):
"""
PPLCNet_x0_25
Args:
pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
If str, means the path of the pretrained model.
use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
Returns:
model: nn.Layer. Specific `PPLCNet_x0_25` model depends on args.
"""
model = PPLCNet(
scale=0.25, stages_pattern=MODEL_STAGES_PATTERN["PPLCNet"], **kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["PPLCNet_x0_25"], use_ssld)
return model
def PPLCNet_x0_35(pretrained=False, use_ssld=False, **kwargs):
"""
PPLCNet_x0_35
Args:
pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
If str, means the path of the pretrained model.
use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
Returns:
model: nn.Layer. Specific `PPLCNet_x0_35` model depends on args.
"""
model = PPLCNet(
scale=0.35, stages_pattern=MODEL_STAGES_PATTERN["PPLCNet"], **kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["PPLCNet_x0_35"], use_ssld)
return model
def PPLCNet_x0_5(pretrained=False, use_ssld=False, **kwargs):
"""
PPLCNet_x0_5
Args:
pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
If str, means the path of the pretrained model.
use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
Returns:
model: nn.Layer. Specific `PPLCNet_x0_5` model depends on args.
"""
model = PPLCNet(
scale=0.5, stages_pattern=MODEL_STAGES_PATTERN["PPLCNet"], **kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["PPLCNet_x0_5"], use_ssld)
return model
def PPLCNet_x0_75(pretrained=False, use_ssld=False, **kwargs):
"""
PPLCNet_x0_75
Args:
pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
If str, means the path of the pretrained model.
use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
Returns:
model: nn.Layer. Specific `PPLCNet_x0_75` model depends on args.
"""
model = PPLCNet(
scale=0.75, stages_pattern=MODEL_STAGES_PATTERN["PPLCNet"], **kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["PPLCNet_x0_75"], use_ssld)
return model
def PPLCNet_x1_0(pretrained=False, use_ssld=False, **kwargs):
"""
PPLCNet_x1_0
Args:
pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
If str, means the path of the pretrained model.
use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
Returns:
model: nn.Layer. Specific `PPLCNet_x1_0` model depends on args.
"""
model = PPLCNet(
scale=1.0, stages_pattern=MODEL_STAGES_PATTERN["PPLCNet"], **kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["PPLCNet_x1_0"], use_ssld)
return model
def PPLCNet_x1_5(pretrained=False, use_ssld=False, **kwargs):
"""
PPLCNet_x1_5
Args:
pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
If str, means the path of the pretrained model.
use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
Returns:
model: nn.Layer. Specific `PPLCNet_x1_5` model depends on args.
"""
model = PPLCNet(
scale=1.5, stages_pattern=MODEL_STAGES_PATTERN["PPLCNet"], **kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["PPLCNet_x1_5"], use_ssld)
return model
def PPLCNet_x2_0(pretrained=False, use_ssld=False, **kwargs):
"""
PPLCNet_x2_0
Args:
pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
If str, means the path of the pretrained model.
use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
Returns:
model: nn.Layer. Specific `PPLCNet_x2_0` model depends on args.
"""
model = PPLCNet(
scale=2.0, stages_pattern=MODEL_STAGES_PATTERN["PPLCNet"], **kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["PPLCNet_x2_0"], use_ssld)
return model
def PPLCNet_x2_5(pretrained=False, use_ssld=False, **kwargs):
"""
PPLCNet_x2_5
Args:
pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
If str, means the path of the pretrained model.
use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
Returns:
model: nn.Layer. Specific `PPLCNet_x2_5` model depends on args.
"""
model = PPLCNet(
scale=2.5, stages_pattern=MODEL_STAGES_PATTERN["PPLCNet"], **kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["PPLCNet_x2_5"], use_ssld)
return model

@ -0,0 +1,591 @@
# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import, division, print_function
import numpy as np
import paddle
from paddle import ParamAttr
import paddle.nn as nn
from paddle.nn import Conv2D, BatchNorm, Linear
from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D
from paddle.nn.initializer import Uniform
import math
from ppcls.arch.backbone.base.theseus_layer import TheseusLayer
from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
MODEL_URLS = {
"ResNet18":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/ResNet18_pretrained.pdparams",
"ResNet18_vd":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/ResNet18_vd_pretrained.pdparams",
"ResNet34":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/ResNet34_pretrained.pdparams",
"ResNet34_vd":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/ResNet34_vd_pretrained.pdparams",
"ResNet50":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/ResNet50_pretrained.pdparams",
"ResNet50_vd":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/ResNet50_vd_pretrained.pdparams",
"ResNet101":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/ResNet101_pretrained.pdparams",
"ResNet101_vd":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/ResNet101_vd_pretrained.pdparams",
"ResNet152":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/ResNet152_pretrained.pdparams",
"ResNet152_vd":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/ResNet152_vd_pretrained.pdparams",
"ResNet200_vd":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/ResNet200_vd_pretrained.pdparams",
}
MODEL_STAGES_PATTERN = {
"ResNet18": ["blocks[1]", "blocks[3]", "blocks[5]", "blocks[7]"],
"ResNet34": ["blocks[2]", "blocks[6]", "blocks[12]", "blocks[15]"],
"ResNet50": ["blocks[2]", "blocks[6]", "blocks[12]", "blocks[15]"],
"ResNet101": ["blocks[2]", "blocks[6]", "blocks[29]", "blocks[32]"],
"ResNet152": ["blocks[2]", "blocks[10]", "blocks[46]", "blocks[49]"],
"ResNet200": ["blocks[2]", "blocks[14]", "blocks[62]", "blocks[65]"]
}
__all__ = MODEL_URLS.keys()
'''
ResNet config: dict.
key: depth of ResNet.
values: config's dict of specific model.
keys:
block_type: Two different blocks in ResNet, BasicBlock and BottleneckBlock are optional.
block_depth: The number of blocks in different stages in ResNet.
num_channels: The number of channels to enter the next stage.
'''
NET_CONFIG = {
"18": {
"block_type": "BasicBlock",
"block_depth": [2, 2, 2, 2],
"num_channels": [64, 64, 128, 256]
},
"34": {
"block_type": "BasicBlock",
"block_depth": [3, 4, 6, 3],
"num_channels": [64, 64, 128, 256]
},
"50": {
"block_type": "BottleneckBlock",
"block_depth": [3, 4, 6, 3],
"num_channels": [64, 256, 512, 1024]
},
"101": {
"block_type": "BottleneckBlock",
"block_depth": [3, 4, 23, 3],
"num_channels": [64, 256, 512, 1024]
},
"152": {
"block_type": "BottleneckBlock",
"block_depth": [3, 8, 36, 3],
"num_channels": [64, 256, 512, 1024]
},
"200": {
"block_type": "BottleneckBlock",
"block_depth": [3, 12, 48, 3],
"num_channels": [64, 256, 512, 1024]
},
}
class ConvBNLayer(TheseusLayer):
def __init__(self,
num_channels,
num_filters,
filter_size,
stride=1,
groups=1,
is_vd_mode=False,
act=None,
lr_mult=1.0,
data_format="NCHW"):
super().__init__()
self.is_vd_mode = is_vd_mode
self.act = act
self.avg_pool = AvgPool2D(
kernel_size=2, stride=2, padding=0, ceil_mode=True)
self.conv = Conv2D(
in_channels=num_channels,
out_channels=num_filters,
kernel_size=filter_size,
stride=stride,
padding=(filter_size - 1) // 2,
groups=groups,
weight_attr=ParamAttr(learning_rate=lr_mult),
bias_attr=False,
data_format=data_format)
self.bn = BatchNorm(
num_filters,
param_attr=ParamAttr(learning_rate=lr_mult),
bias_attr=ParamAttr(learning_rate=lr_mult),
data_layout=data_format)
self.relu = nn.ReLU()
def forward(self, x):
if self.is_vd_mode:
x = self.avg_pool(x)
x = self.conv(x)
x = self.bn(x)
if self.act:
x = self.relu(x)
return x
class BottleneckBlock(TheseusLayer):
def __init__(self,
num_channels,
num_filters,
stride,
shortcut=True,
if_first=False,
lr_mult=1.0,
data_format="NCHW"):
super().__init__()
self.conv0 = ConvBNLayer(
num_channels=num_channels,
num_filters=num_filters,
filter_size=1,
act="relu",
lr_mult=lr_mult,
data_format=data_format)
self.conv1 = ConvBNLayer(
num_channels=num_filters,
num_filters=num_filters,
filter_size=3,
stride=stride,
act="relu",
lr_mult=lr_mult,
data_format=data_format)
self.conv2 = ConvBNLayer(
num_channels=num_filters,
num_filters=num_filters * 4,
filter_size=1,
act=None,
lr_mult=lr_mult,
data_format=data_format)
if not shortcut:
self.short = ConvBNLayer(
num_channels=num_channels,
num_filters=num_filters * 4,
filter_size=1,
stride=stride if if_first else 1,
is_vd_mode=False if if_first else True,
lr_mult=lr_mult,
data_format=data_format)
self.relu = nn.ReLU()
self.shortcut = shortcut
def forward(self, x):
identity = x
x = self.conv0(x)
x = self.conv1(x)
x = self.conv2(x)
if self.shortcut:
short = identity
else:
short = self.short(identity)
x = paddle.add(x=x, y=short)
x = self.relu(x)
return x
class BasicBlock(TheseusLayer):
def __init__(self,
num_channels,
num_filters,
stride,
shortcut=True,
if_first=False,
lr_mult=1.0,
data_format="NCHW"):
super().__init__()
self.stride = stride
self.conv0 = ConvBNLayer(
num_channels=num_channels,
num_filters=num_filters,
filter_size=3,
stride=stride,
act="relu",
lr_mult=lr_mult,
data_format=data_format)
self.conv1 = ConvBNLayer(
num_channels=num_filters,
num_filters=num_filters,
filter_size=3,
act=None,
lr_mult=lr_mult,
data_format=data_format)
if not shortcut:
self.short = ConvBNLayer(
num_channels=num_channels,
num_filters=num_filters,
filter_size=1,
stride=stride if if_first else 1,
is_vd_mode=False if if_first else True,
lr_mult=lr_mult,
data_format=data_format)
self.shortcut = shortcut
self.relu = nn.ReLU()
def forward(self, x):
identity = x
x = self.conv0(x)
x = self.conv1(x)
if self.shortcut:
short = identity
else:
short = self.short(identity)
x = paddle.add(x=x, y=short)
x = self.relu(x)
return x
class ResNet(TheseusLayer):
"""
ResNet
Args:
config: dict. config of ResNet.
version: str="vb". Different version of ResNet, version vd can perform better.
class_num: int=1000. The number of classes.
lr_mult_list: list. Control the learning rate of different stages.
Returns:
model: nn.Layer. Specific ResNet model depends on args.
"""
def __init__(self,
config,
stages_pattern,
version="vb",
class_num=1000,
lr_mult_list=[1.0, 1.0, 1.0, 1.0, 1.0],
data_format="NCHW",
input_image_channel=3,
return_patterns=None,
return_stages=None):
super().__init__()
self.cfg = config
self.lr_mult_list = lr_mult_list
self.is_vd_mode = version == "vd"
self.class_num = class_num
self.num_filters = [64, 128, 256, 512]
self.block_depth = self.cfg["block_depth"]
self.block_type = self.cfg["block_type"]
self.num_channels = self.cfg["num_channels"]
self.channels_mult = 1 if self.num_channels[-1] == 256 else 4
assert isinstance(self.lr_mult_list, (
list, tuple
)), "lr_mult_list should be in (list, tuple) but got {}".format(
type(self.lr_mult_list))
assert len(self.lr_mult_list
) == 5, "lr_mult_list length should be 5 but got {}".format(
len(self.lr_mult_list))
self.stem_cfg = {
#num_channels, num_filters, filter_size, stride
"vb": [[input_image_channel, 64, 7, 2]],
"vd":
[[input_image_channel, 32, 3, 2], [32, 32, 3, 1], [32, 64, 3, 1]]
}
self.stem = nn.Sequential(* [
ConvBNLayer(
num_channels=in_c,
num_filters=out_c,
filter_size=k,
stride=s,
act="relu",
lr_mult=self.lr_mult_list[0],
data_format=data_format)
for in_c, out_c, k, s in self.stem_cfg[version]
])
self.max_pool = MaxPool2D(
kernel_size=3, stride=2, padding=1, data_format=data_format)
block_list = []
for block_idx in range(len(self.block_depth)):
shortcut = False
for i in range(self.block_depth[block_idx]):
block_list.append(globals()[self.block_type](
num_channels=self.num_channels[block_idx] if i == 0 else
self.num_filters[block_idx] * self.channels_mult,
num_filters=self.num_filters[block_idx],
stride=2 if i == 0 and block_idx != 0 else 1,
shortcut=shortcut,
if_first=block_idx == i == 0 if version == "vd" else True,
lr_mult=self.lr_mult_list[block_idx + 1],
data_format=data_format))
shortcut = True
self.blocks = nn.Sequential(*block_list)
self.avg_pool = AdaptiveAvgPool2D(1, data_format=data_format)
self.flatten = nn.Flatten()
self.avg_pool_channels = self.num_channels[-1] * 2
stdv = 1.0 / math.sqrt(self.avg_pool_channels * 1.0)
self.fc = Linear(
self.avg_pool_channels,
self.class_num,
weight_attr=ParamAttr(initializer=Uniform(-stdv, stdv)))
self.data_format = data_format
super().init_res(
stages_pattern,
return_patterns=return_patterns,
return_stages=return_stages)
def forward(self, x):
with paddle.static.amp.fp16_guard():
if self.data_format == "NHWC":
x = paddle.transpose(x, [0, 2, 3, 1])
x.stop_gradient = True
x = self.stem(x)
x = self.max_pool(x)
x = self.blocks(x)
x = self.avg_pool(x)
x = self.flatten(x)
x = self.fc(x)
return x
def _load_pretrained(pretrained, model, model_url, use_ssld):
if pretrained is False:
pass
elif pretrained is True:
load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
elif isinstance(pretrained, str):
load_dygraph_pretrain(model, pretrained)
else:
raise RuntimeError(
"pretrained type is not available. Please use `string` or `boolean` type."
)
def ResNet18(pretrained=False, use_ssld=False, **kwargs):
"""
ResNet18
Args:
pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
If str, means the path of the pretrained model.
use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
Returns:
model: nn.Layer. Specific `ResNet18` model depends on args.
"""
model = ResNet(
config=NET_CONFIG["18"],
stages_pattern=MODEL_STAGES_PATTERN["ResNet18"],
version="vb",
**kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["ResNet18"], use_ssld)
return model
def ResNet18_vd(pretrained=False, use_ssld=False, **kwargs):
"""
ResNet18_vd
Args:
pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
If str, means the path of the pretrained model.
use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
Returns:
model: nn.Layer. Specific `ResNet18_vd` model depends on args.
"""
model = ResNet(
config=NET_CONFIG["18"],
stages_pattern=MODEL_STAGES_PATTERN["ResNet18"],
version="vd",
**kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["ResNet18_vd"], use_ssld)
return model
def ResNet34(pretrained=False, use_ssld=False, **kwargs):
"""
ResNet34
Args:
pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
If str, means the path of the pretrained model.
use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
Returns:
model: nn.Layer. Specific `ResNet34` model depends on args.
"""
model = ResNet(
config=NET_CONFIG["34"],
stages_pattern=MODEL_STAGES_PATTERN["ResNet34"],
version="vb",
**kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["ResNet34"], use_ssld)
return model
def ResNet34_vd(pretrained=False, use_ssld=False, **kwargs):
"""
ResNet34_vd
Args:
pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
If str, means the path of the pretrained model.
use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
Returns:
model: nn.Layer. Specific `ResNet34_vd` model depends on args.
"""
model = ResNet(
config=NET_CONFIG["34"],
stages_pattern=MODEL_STAGES_PATTERN["ResNet34"],
version="vd",
**kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["ResNet34_vd"], use_ssld)
return model
def ResNet50(pretrained=False, use_ssld=False, **kwargs):
"""
ResNet50
Args:
pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
If str, means the path of the pretrained model.
use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
Returns:
model: nn.Layer. Specific `ResNet50` model depends on args.
"""
model = ResNet(
config=NET_CONFIG["50"],
stages_pattern=MODEL_STAGES_PATTERN["ResNet50"],
version="vb",
**kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["ResNet50"], use_ssld)
return model
def ResNet50_vd(pretrained=False, use_ssld=False, **kwargs):
"""
ResNet50_vd
Args:
pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
If str, means the path of the pretrained model.
use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
Returns:
model: nn.Layer. Specific `ResNet50_vd` model depends on args.
"""
model = ResNet(
config=NET_CONFIG["50"],
stages_pattern=MODEL_STAGES_PATTERN["ResNet50"],
version="vd",
**kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["ResNet50_vd"], use_ssld)
return model
def ResNet101(pretrained=False, use_ssld=False, **kwargs):
"""
ResNet101
Args:
pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
If str, means the path of the pretrained model.
use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
Returns:
model: nn.Layer. Specific `ResNet101` model depends on args.
"""
model = ResNet(
config=NET_CONFIG["101"],
stages_pattern=MODEL_STAGES_PATTERN["ResNet101"],
version="vb",
**kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["ResNet101"], use_ssld)
return model
def ResNet101_vd(pretrained=False, use_ssld=False, **kwargs):
"""
ResNet101_vd
Args:
pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
If str, means the path of the pretrained model.
use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
Returns:
model: nn.Layer. Specific `ResNet101_vd` model depends on args.
"""
model = ResNet(
config=NET_CONFIG["101"],
stages_pattern=MODEL_STAGES_PATTERN["ResNet101"],
version="vd",
**kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["ResNet101_vd"], use_ssld)
return model
def ResNet152(pretrained=False, use_ssld=False, **kwargs):
"""
ResNet152
Args:
pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
If str, means the path of the pretrained model.
use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
Returns:
model: nn.Layer. Specific `ResNet152` model depends on args.
"""
model = ResNet(
config=NET_CONFIG["152"],
stages_pattern=MODEL_STAGES_PATTERN["ResNet152"],
version="vb",
**kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["ResNet152"], use_ssld)
return model
def ResNet152_vd(pretrained=False, use_ssld=False, **kwargs):
"""
ResNet152_vd
Args:
pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
If str, means the path of the pretrained model.
use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
Returns:
model: nn.Layer. Specific `ResNet152_vd` model depends on args.
"""
model = ResNet(
config=NET_CONFIG["152"],
stages_pattern=MODEL_STAGES_PATTERN["ResNet152"],
version="vd",
**kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["ResNet152_vd"], use_ssld)
return model
def ResNet200_vd(pretrained=False, use_ssld=False, **kwargs):
"""
ResNet200_vd
Args:
pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
If str, means the path of the pretrained model.
use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
Returns:
model: nn.Layer. Specific `ResNet200_vd` model depends on args.
"""
model = ResNet(
config=NET_CONFIG["200"],
stages_pattern=MODEL_STAGES_PATTERN["ResNet200"],
version="vd",
**kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["ResNet200_vd"], use_ssld)
return model

@ -0,0 +1,259 @@
# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import, division, print_function
import paddle.nn as nn
from paddle.nn import Conv2D, BatchNorm, Linear, Dropout
from paddle.nn import MaxPool2D
from ppcls.arch.backbone.base.theseus_layer import TheseusLayer
from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
MODEL_URLS = {
"VGG11":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/VGG11_pretrained.pdparams",
"VGG13":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/VGG13_pretrained.pdparams",
"VGG16":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/VGG16_pretrained.pdparams",
"VGG19":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/VGG19_pretrained.pdparams",
}
MODEL_STAGES_PATTERN = {
"VGG": [
"conv_block_1", "conv_block_2", "conv_block_3", "conv_block_4",
"conv_block_5"
]
}
__all__ = MODEL_URLS.keys()
# VGG config
# key: VGG network depth
# value: conv num in different blocks
NET_CONFIG = {
11: [1, 1, 2, 2, 2],
13: [2, 2, 2, 2, 2],
16: [2, 2, 3, 3, 3],
19: [2, 2, 4, 4, 4]
}
class ConvBlock(TheseusLayer):
def __init__(self, input_channels, output_channels, groups):
super().__init__()
self.groups = groups
self.conv1 = Conv2D(
in_channels=input_channels,
out_channels=output_channels,
kernel_size=3,
stride=1,
padding=1,
bias_attr=False)
if groups == 2 or groups == 3 or groups == 4:
self.conv2 = Conv2D(
in_channels=output_channels,
out_channels=output_channels,
kernel_size=3,
stride=1,
padding=1,
bias_attr=False)
if groups == 3 or groups == 4:
self.conv3 = Conv2D(
in_channels=output_channels,
out_channels=output_channels,
kernel_size=3,
stride=1,
padding=1,
bias_attr=False)
if groups == 4:
self.conv4 = Conv2D(
in_channels=output_channels,
out_channels=output_channels,
kernel_size=3,
stride=1,
padding=1,
bias_attr=False)
self.max_pool = MaxPool2D(kernel_size=2, stride=2, padding=0)
self.relu = nn.ReLU()
def forward(self, inputs):
x = self.conv1(inputs)
x = self.relu(x)
if self.groups == 2 or self.groups == 3 or self.groups == 4:
x = self.conv2(x)
x = self.relu(x)
if self.groups == 3 or self.groups == 4:
x = self.conv3(x)
x = self.relu(x)
if self.groups == 4:
x = self.conv4(x)
x = self.relu(x)
x = self.max_pool(x)
return x
class VGGNet(TheseusLayer):
"""
VGGNet
Args:
config: list. VGGNet config.
stop_grad_layers: int=0. The parameters in blocks which index larger than `stop_grad_layers`, will be set `param.trainable=False`
class_num: int=1000. The number of classes.
Returns:
model: nn.Layer. Specific VGG model depends on args.
"""
def __init__(self,
config,
stages_pattern,
stop_grad_layers=0,
class_num=1000,
return_patterns=None,
return_stages=None):
super().__init__()
self.stop_grad_layers = stop_grad_layers
self.conv_block_1 = ConvBlock(3, 64, config[0])
self.conv_block_2 = ConvBlock(64, 128, config[1])
self.conv_block_3 = ConvBlock(128, 256, config[2])
self.conv_block_4 = ConvBlock(256, 512, config[3])
self.conv_block_5 = ConvBlock(512, 512, config[4])
self.relu = nn.ReLU()
self.flatten = nn.Flatten(start_axis=1, stop_axis=-1)
for idx, block in enumerate([
self.conv_block_1, self.conv_block_2, self.conv_block_3,
self.conv_block_4, self.conv_block_5
]):
if self.stop_grad_layers >= idx + 1:
for param in block.parameters():
param.trainable = False
self.drop = Dropout(p=0.5, mode="downscale_in_infer")
self.fc1 = Linear(7 * 7 * 512, 4096)
self.fc2 = Linear(4096, 4096)
self.fc3 = Linear(4096, class_num)
super().init_res(
stages_pattern,
return_patterns=return_patterns,
return_stages=return_stages)
def forward(self, inputs):
x = self.conv_block_1(inputs)
x = self.conv_block_2(x)
x = self.conv_block_3(x)
x = self.conv_block_4(x)
x = self.conv_block_5(x)
x = self.flatten(x)
x = self.fc1(x)
x = self.relu(x)
x = self.drop(x)
x = self.fc2(x)
x = self.relu(x)
x = self.drop(x)
x = self.fc3(x)
return x
def _load_pretrained(pretrained, model, model_url, use_ssld):
if pretrained is False:
pass
elif pretrained is True:
load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
elif isinstance(pretrained, str):
load_dygraph_pretrain(model, pretrained)
else:
raise RuntimeError(
"pretrained type is not available. Please use `string` or `boolean` type."
)
def VGG11(pretrained=False, use_ssld=False, **kwargs):
"""
VGG11
Args:
pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
If str, means the path of the pretrained model.
use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
Returns:
model: nn.Layer. Specific `VGG11` model depends on args.
"""
model = VGGNet(
config=NET_CONFIG[11],
stages_pattern=MODEL_STAGES_PATTERN["VGG"],
**kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["VGG11"], use_ssld)
return model
def VGG13(pretrained=False, use_ssld=False, **kwargs):
"""
VGG13
Args:
pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
If str, means the path of the pretrained model.
use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
Returns:
model: nn.Layer. Specific `VGG13` model depends on args.
"""
model = VGGNet(
config=NET_CONFIG[13],
stages_pattern=MODEL_STAGES_PATTERN["VGG"],
**kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["VGG13"], use_ssld)
return model
def VGG16(pretrained=False, use_ssld=False, **kwargs):
"""
VGG16
Args:
pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
If str, means the path of the pretrained model.
use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
Returns:
model: nn.Layer. Specific `VGG16` model depends on args.
"""
model = VGGNet(
config=NET_CONFIG[16],
stages_pattern=MODEL_STAGES_PATTERN["VGG"],
**kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["VGG16"], use_ssld)
return model
def VGG19(pretrained=False, use_ssld=False, **kwargs):
"""
VGG19
Args:
pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
If str, means the path of the pretrained model.
use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
Returns:
model: nn.Layer. Specific `VGG19` model depends on args.
"""
model = VGGNet(
config=NET_CONFIG[19],
stages_pattern=MODEL_STAGES_PATTERN["VGG"],
**kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["VGG19"], use_ssld)
return model

@ -0,0 +1,168 @@
# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle
from paddle import ParamAttr
import paddle.nn as nn
import paddle.nn.functional as F
from paddle.nn import Conv2D, BatchNorm, Linear, Dropout, ReLU
from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D
from paddle.nn.initializer import Uniform
import math
from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
MODEL_URLS = {
"AlexNet":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/AlexNet_pretrained.pdparams"
}
__all__ = list(MODEL_URLS.keys())
class ConvPoolLayer(nn.Layer):
def __init__(self,
input_channels,
output_channels,
filter_size,
stride,
padding,
stdv,
groups=1,
act=None,
name=None):
super(ConvPoolLayer, self).__init__()
self.relu = ReLU() if act == "relu" else None
self._conv = Conv2D(
in_channels=input_channels,
out_channels=output_channels,
kernel_size=filter_size,
stride=stride,
padding=padding,
groups=groups,
weight_attr=ParamAttr(
name=name + "_weights", initializer=Uniform(-stdv, stdv)),
bias_attr=ParamAttr(
name=name + "_offset", initializer=Uniform(-stdv, stdv)))
self._pool = MaxPool2D(kernel_size=3, stride=2, padding=0)
def forward(self, inputs):
x = self._conv(inputs)
if self.relu is not None:
x = self.relu(x)
x = self._pool(x)
return x
class AlexNetDY(nn.Layer):
def __init__(self, class_num=1000):
super(AlexNetDY, self).__init__()
stdv = 1.0 / math.sqrt(3 * 11 * 11)
self._conv1 = ConvPoolLayer(
3, 64, 11, 4, 2, stdv, act="relu", name="conv1")
stdv = 1.0 / math.sqrt(64 * 5 * 5)
self._conv2 = ConvPoolLayer(
64, 192, 5, 1, 2, stdv, act="relu", name="conv2")
stdv = 1.0 / math.sqrt(192 * 3 * 3)
self._conv3 = Conv2D(
192,
384,
3,
stride=1,
padding=1,
weight_attr=ParamAttr(
name="conv3_weights", initializer=Uniform(-stdv, stdv)),
bias_attr=ParamAttr(
name="conv3_offset", initializer=Uniform(-stdv, stdv)))
stdv = 1.0 / math.sqrt(384 * 3 * 3)
self._conv4 = Conv2D(
384,
256,
3,
stride=1,
padding=1,
weight_attr=ParamAttr(
name="conv4_weights", initializer=Uniform(-stdv, stdv)),
bias_attr=ParamAttr(
name="conv4_offset", initializer=Uniform(-stdv, stdv)))
stdv = 1.0 / math.sqrt(256 * 3 * 3)
self._conv5 = ConvPoolLayer(
256, 256, 3, 1, 1, stdv, act="relu", name="conv5")
stdv = 1.0 / math.sqrt(256 * 6 * 6)
self._drop1 = Dropout(p=0.5, mode="downscale_in_infer")
self._fc6 = Linear(
in_features=256 * 6 * 6,
out_features=4096,
weight_attr=ParamAttr(
name="fc6_weights", initializer=Uniform(-stdv, stdv)),
bias_attr=ParamAttr(
name="fc6_offset", initializer=Uniform(-stdv, stdv)))
self._drop2 = Dropout(p=0.5, mode="downscale_in_infer")
self._fc7 = Linear(
in_features=4096,
out_features=4096,
weight_attr=ParamAttr(
name="fc7_weights", initializer=Uniform(-stdv, stdv)),
bias_attr=ParamAttr(
name="fc7_offset", initializer=Uniform(-stdv, stdv)))
self._fc8 = Linear(
in_features=4096,
out_features=class_num,
weight_attr=ParamAttr(
name="fc8_weights", initializer=Uniform(-stdv, stdv)),
bias_attr=ParamAttr(
name="fc8_offset", initializer=Uniform(-stdv, stdv)))
def forward(self, inputs):
x = self._conv1(inputs)
x = self._conv2(x)
x = self._conv3(x)
x = F.relu(x)
x = self._conv4(x)
x = F.relu(x)
x = self._conv5(x)
x = paddle.flatten(x, start_axis=1, stop_axis=-1)
x = self._drop1(x)
x = self._fc6(x)
x = F.relu(x)
x = self._drop2(x)
x = self._fc7(x)
x = F.relu(x)
x = self._fc8(x)
return x
def _load_pretrained(pretrained, model, model_url, use_ssld=False):
if pretrained is False:
pass
elif pretrained is True:
load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
elif isinstance(pretrained, str):
load_dygraph_pretrain(model, pretrained)
else:
raise RuntimeError(
"pretrained type is not available. Please use `string` or `boolean` type."
)
def AlexNet(pretrained=False, use_ssld=False, **kwargs):
model = AlexNetDY(**kwargs)
_load_pretrained(
pretrained, model, MODEL_URLS["AlexNet"], use_ssld=use_ssld)
return model

@ -0,0 +1,376 @@
# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Code was heavily based on https://github.com/rwightman/pytorch-image-models
import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from paddle import ParamAttr
from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
MODEL_URLS = {
"CSPDarkNet53":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/CSPDarkNet53_pretrained.pdparams"
}
MODEL_CFGS = {
"CSPDarkNet53": dict(
stem=dict(
out_chs=32, kernel_size=3, stride=1, pool=''),
stage=dict(
out_chs=(64, 128, 256, 512, 1024),
depth=(1, 2, 8, 8, 4),
stride=(2, ) * 5,
exp_ratio=(2., ) + (1., ) * 4,
bottle_ratio=(0.5, ) + (1.0, ) * 4,
block_ratio=(1., ) + (0.5, ) * 4,
down_growth=True, ))
}
__all__ = ['CSPDarkNet53'
] # model_registry will add each entrypoint fn to this
class ConvBnAct(nn.Layer):
def __init__(self,
input_channels,
output_channels,
kernel_size=1,
stride=1,
padding=None,
dilation=1,
groups=1,
act_layer=nn.LeakyReLU,
norm_layer=nn.BatchNorm2D):
super().__init__()
if padding is None:
padding = (kernel_size - 1) // 2
self.conv = nn.Conv2D(
in_channels=input_channels,
out_channels=output_channels,
kernel_size=kernel_size,
stride=stride,
padding=padding,
dilation=dilation,
groups=groups,
weight_attr=ParamAttr(),
bias_attr=False)
self.bn = norm_layer(num_features=output_channels)
self.act = act_layer()
def forward(self, inputs):
x = self.conv(inputs)
x = self.bn(x)
if self.act is not None:
x = self.act(x)
return x
def create_stem(in_chans=3,
out_chs=32,
kernel_size=3,
stride=2,
pool='',
act_layer=None,
norm_layer=None):
stem = nn.Sequential()
if not isinstance(out_chs, (tuple, list)):
out_chs = [out_chs]
assert len(out_chs)
in_c = in_chans
for i, out_c in enumerate(out_chs):
conv_name = f'conv{i + 1}'
stem.add_sublayer(
conv_name,
ConvBnAct(
in_c,
out_c,
kernel_size,
stride=stride if i == 0 else 1,
act_layer=act_layer,
norm_layer=norm_layer))
in_c = out_c
last_conv = conv_name
if pool:
stem.add_sublayer(
'pool', nn.MaxPool2D(
kernel_size=3, stride=2, padding=1))
return stem, dict(
num_chs=in_c, reduction=stride, module='.'.join(['stem', last_conv]))
class DarkBlock(nn.Layer):
def __init__(self,
in_chs,
out_chs,
dilation=1,
bottle_ratio=0.5,
groups=1,
act_layer=nn.ReLU,
norm_layer=nn.BatchNorm2D,
attn_layer=None,
drop_block=None):
super(DarkBlock, self).__init__()
mid_chs = int(round(out_chs * bottle_ratio))
ckwargs = dict(act_layer=act_layer, norm_layer=norm_layer)
self.conv1 = ConvBnAct(in_chs, mid_chs, kernel_size=1, **ckwargs)
self.conv2 = ConvBnAct(
mid_chs,
out_chs,
kernel_size=3,
dilation=dilation,
groups=groups,
**ckwargs)
def forward(self, x):
shortcut = x
x = self.conv1(x)
x = self.conv2(x)
x = x + shortcut
return x
class CrossStage(nn.Layer):
def __init__(self,
in_chs,
out_chs,
stride,
dilation,
depth,
block_ratio=1.,
bottle_ratio=1.,
exp_ratio=1.,
groups=1,
first_dilation=None,
down_growth=False,
cross_linear=False,
block_dpr=None,
block_fn=DarkBlock,
**block_kwargs):
super(CrossStage, self).__init__()
first_dilation = first_dilation or dilation
down_chs = out_chs if down_growth else in_chs
exp_chs = int(round(out_chs * exp_ratio))
block_out_chs = int(round(out_chs * block_ratio))
conv_kwargs = dict(
act_layer=block_kwargs.get('act_layer'),
norm_layer=block_kwargs.get('norm_layer'))
if stride != 1 or first_dilation != dilation:
self.conv_down = ConvBnAct(
in_chs,
down_chs,
kernel_size=3,
stride=stride,
dilation=first_dilation,
groups=groups,
**conv_kwargs)
prev_chs = down_chs
else:
self.conv_down = None
prev_chs = in_chs
self.conv_exp = ConvBnAct(
prev_chs, exp_chs, kernel_size=1, **conv_kwargs)
prev_chs = exp_chs // 2 # output of conv_exp is always split in two
self.blocks = nn.Sequential()
for i in range(depth):
self.blocks.add_sublayer(
str(i),
block_fn(prev_chs, block_out_chs, dilation, bottle_ratio,
groups, **block_kwargs))
prev_chs = block_out_chs
# transition convs
self.conv_transition_b = ConvBnAct(
prev_chs, exp_chs // 2, kernel_size=1, **conv_kwargs)
self.conv_transition = ConvBnAct(
exp_chs, out_chs, kernel_size=1, **conv_kwargs)
def forward(self, x):
if self.conv_down is not None:
x = self.conv_down(x)
x = self.conv_exp(x)
split = x.shape[1] // 2
xs, xb = x[:, :split], x[:, split:]
xb = self.blocks(xb)
xb = self.conv_transition_b(xb)
out = self.conv_transition(paddle.concat([xs, xb], axis=1))
return out
class DarkStage(nn.Layer):
def __init__(self,
in_chs,
out_chs,
stride,
dilation,
depth,
block_ratio=1.,
bottle_ratio=1.,
groups=1,
first_dilation=None,
block_fn=DarkBlock,
block_dpr=None,
**block_kwargs):
super().__init__()
first_dilation = first_dilation or dilation
self.conv_down = ConvBnAct(
in_chs,
out_chs,
kernel_size=3,
stride=stride,
dilation=first_dilation,
groups=groups,
act_layer=block_kwargs.get('act_layer'),
norm_layer=block_kwargs.get('norm_layer'))
prev_chs = out_chs
block_out_chs = int(round(out_chs * block_ratio))
self.blocks = nn.Sequential()
for i in range(depth):
self.blocks.add_sublayer(
str(i),
block_fn(prev_chs, block_out_chs, dilation, bottle_ratio,
groups, **block_kwargs))
prev_chs = block_out_chs
def forward(self, x):
x = self.conv_down(x)
x = self.blocks(x)
return x
def _cfg_to_stage_args(cfg, curr_stride=2, output_stride=32):
# get per stage args for stage and containing blocks, calculate strides to meet target output_stride
num_stages = len(cfg['depth'])
if 'groups' not in cfg:
cfg['groups'] = (1, ) * num_stages
if 'down_growth' in cfg and not isinstance(cfg['down_growth'],
(list, tuple)):
cfg['down_growth'] = (cfg['down_growth'], ) * num_stages
stage_strides = []
stage_dilations = []
stage_first_dilations = []
dilation = 1
for cfg_stride in cfg['stride']:
stage_first_dilations.append(dilation)
if curr_stride >= output_stride:
dilation *= cfg_stride
stride = 1
else:
stride = cfg_stride
curr_stride *= stride
stage_strides.append(stride)
stage_dilations.append(dilation)
cfg['stride'] = stage_strides
cfg['dilation'] = stage_dilations
cfg['first_dilation'] = stage_first_dilations
stage_args = [
dict(zip(cfg.keys(), values)) for values in zip(*cfg.values())
]
return stage_args
class CSPNet(nn.Layer):
def __init__(self,
cfg,
in_chans=3,
class_num=1000,
output_stride=32,
global_pool='avg',
drop_rate=0.,
act_layer=nn.LeakyReLU,
norm_layer=nn.BatchNorm2D,
zero_init_last_bn=True,
stage_fn=CrossStage,
block_fn=DarkBlock):
super().__init__()
self.class_num = class_num
self.drop_rate = drop_rate
assert output_stride in (8, 16, 32)
layer_args = dict(act_layer=act_layer, norm_layer=norm_layer)
# Construct the stem
self.stem, stem_feat_info = create_stem(in_chans, **cfg['stem'],
**layer_args)
self.feature_info = [stem_feat_info]
prev_chs = stem_feat_info['num_chs']
curr_stride = stem_feat_info[
'reduction'] # reduction does not include pool
if cfg['stem']['pool']:
curr_stride *= 2
# Construct the stages
per_stage_args = _cfg_to_stage_args(
cfg['stage'], curr_stride=curr_stride, output_stride=output_stride)
self.stages = nn.LayerList()
for i, sa in enumerate(per_stage_args):
self.stages.add_sublayer(
str(i),
stage_fn(
prev_chs, **sa, **layer_args, block_fn=block_fn))
prev_chs = sa['out_chs']
curr_stride *= sa['stride']
self.feature_info += [
dict(
num_chs=prev_chs,
reduction=curr_stride,
module=f'stages.{i}')
]
# Construct the head
self.num_features = prev_chs
self.pool = nn.AdaptiveAvgPool2D(1)
self.flatten = nn.Flatten(1)
self.fc = nn.Linear(
prev_chs,
class_num,
weight_attr=ParamAttr(),
bias_attr=ParamAttr())
def forward(self, x):
x = self.stem(x)
for stage in self.stages:
x = stage(x)
x = self.pool(x)
x = self.flatten(x)
x = self.fc(x)
return x
def _load_pretrained(pretrained, model, model_url, use_ssld=False):
if pretrained is False:
pass
elif pretrained is True:
load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
elif isinstance(pretrained, str):
load_dygraph_pretrain(model, pretrained)
else:
raise RuntimeError(
"pretrained type is not available. Please use `string` or `boolean` type."
)
def CSPDarkNet53(pretrained=False, use_ssld=False, **kwargs):
model = CSPNet(MODEL_CFGS["CSPDarkNet53"], block_fn=DarkBlock, **kwargs)
_load_pretrained(
pretrained, model, MODEL_URLS["CSPDarkNet53"], use_ssld=use_ssld)
return model

@ -0,0 +1,197 @@
# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle
from paddle import ParamAttr
import paddle.nn as nn
import paddle.nn.functional as F
from paddle.nn import Conv2D, BatchNorm, Linear, Dropout
from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D
from paddle.nn.initializer import Uniform
import math
from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
MODEL_URLS = {
"DarkNet53":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DarkNet53_pretrained.pdparams"
}
__all__ = list(MODEL_URLS.keys())
class ConvBNLayer(nn.Layer):
def __init__(self,
input_channels,
output_channels,
filter_size,
stride,
padding,
name=None):
super(ConvBNLayer, self).__init__()
self._conv = Conv2D(
in_channels=input_channels,
out_channels=output_channels,
kernel_size=filter_size,
stride=stride,
padding=padding,
weight_attr=ParamAttr(name=name + ".conv.weights"),
bias_attr=False)
bn_name = name + ".bn"
self._bn = BatchNorm(
num_channels=output_channels,
act="relu",
param_attr=ParamAttr(name=bn_name + ".scale"),
bias_attr=ParamAttr(name=bn_name + ".offset"),
moving_mean_name=bn_name + ".mean",
moving_variance_name=bn_name + ".var")
def forward(self, inputs):
x = self._conv(inputs)
x = self._bn(x)
return x
class BasicBlock(nn.Layer):
def __init__(self, input_channels, output_channels, name=None):
super(BasicBlock, self).__init__()
self._conv1 = ConvBNLayer(
input_channels, output_channels, 1, 1, 0, name=name + ".0")
self._conv2 = ConvBNLayer(
output_channels, output_channels * 2, 3, 1, 1, name=name + ".1")
def forward(self, inputs):
x = self._conv1(inputs)
x = self._conv2(x)
return paddle.add(x=inputs, y=x)
class DarkNet(nn.Layer):
def __init__(self, class_num=1000):
super(DarkNet, self).__init__()
self.stages = [1, 2, 8, 8, 4]
self._conv1 = ConvBNLayer(3, 32, 3, 1, 1, name="yolo_input")
self._conv2 = ConvBNLayer(
32, 64, 3, 2, 1, name="yolo_input.downsample")
self._basic_block_01 = BasicBlock(64, 32, name="stage.0.0")
self._downsample_0 = ConvBNLayer(
64, 128, 3, 2, 1, name="stage.0.downsample")
self._basic_block_11 = BasicBlock(128, 64, name="stage.1.0")
self._basic_block_12 = BasicBlock(128, 64, name="stage.1.1")
self._downsample_1 = ConvBNLayer(
128, 256, 3, 2, 1, name="stage.1.downsample")
self._basic_block_21 = BasicBlock(256, 128, name="stage.2.0")
self._basic_block_22 = BasicBlock(256, 128, name="stage.2.1")
self._basic_block_23 = BasicBlock(256, 128, name="stage.2.2")
self._basic_block_24 = BasicBlock(256, 128, name="stage.2.3")
self._basic_block_25 = BasicBlock(256, 128, name="stage.2.4")
self._basic_block_26 = BasicBlock(256, 128, name="stage.2.5")
self._basic_block_27 = BasicBlock(256, 128, name="stage.2.6")
self._basic_block_28 = BasicBlock(256, 128, name="stage.2.7")
self._downsample_2 = ConvBNLayer(
256, 512, 3, 2, 1, name="stage.2.downsample")
self._basic_block_31 = BasicBlock(512, 256, name="stage.3.0")
self._basic_block_32 = BasicBlock(512, 256, name="stage.3.1")
self._basic_block_33 = BasicBlock(512, 256, name="stage.3.2")
self._basic_block_34 = BasicBlock(512, 256, name="stage.3.3")
self._basic_block_35 = BasicBlock(512, 256, name="stage.3.4")
self._basic_block_36 = BasicBlock(512, 256, name="stage.3.5")
self._basic_block_37 = BasicBlock(512, 256, name="stage.3.6")
self._basic_block_38 = BasicBlock(512, 256, name="stage.3.7")
self._downsample_3 = ConvBNLayer(
512, 1024, 3, 2, 1, name="stage.3.downsample")
self._basic_block_41 = BasicBlock(1024, 512, name="stage.4.0")
self._basic_block_42 = BasicBlock(1024, 512, name="stage.4.1")
self._basic_block_43 = BasicBlock(1024, 512, name="stage.4.2")
self._basic_block_44 = BasicBlock(1024, 512, name="stage.4.3")
self._pool = AdaptiveAvgPool2D(1)
stdv = 1.0 / math.sqrt(1024.0)
self._out = Linear(
1024,
class_num,
weight_attr=ParamAttr(
name="fc_weights", initializer=Uniform(-stdv, stdv)),
bias_attr=ParamAttr(name="fc_offset"))
def forward(self, inputs):
x = self._conv1(inputs)
x = self._conv2(x)
x = self._basic_block_01(x)
x = self._downsample_0(x)
x = self._basic_block_11(x)
x = self._basic_block_12(x)
x = self._downsample_1(x)
x = self._basic_block_21(x)
x = self._basic_block_22(x)
x = self._basic_block_23(x)
x = self._basic_block_24(x)
x = self._basic_block_25(x)
x = self._basic_block_26(x)
x = self._basic_block_27(x)
x = self._basic_block_28(x)
x = self._downsample_2(x)
x = self._basic_block_31(x)
x = self._basic_block_32(x)
x = self._basic_block_33(x)
x = self._basic_block_34(x)
x = self._basic_block_35(x)
x = self._basic_block_36(x)
x = self._basic_block_37(x)
x = self._basic_block_38(x)
x = self._downsample_3(x)
x = self._basic_block_41(x)
x = self._basic_block_42(x)
x = self._basic_block_43(x)
x = self._basic_block_44(x)
x = self._pool(x)
x = paddle.squeeze(x, axis=[2, 3])
x = self._out(x)
return x
def _load_pretrained(pretrained, model, model_url, use_ssld=False):
if pretrained is False:
pass
elif pretrained is True:
load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
elif isinstance(pretrained, str):
load_dygraph_pretrain(model, pretrained)
else:
raise RuntimeError(
"pretrained type is not available. Please use `string` or `boolean` type."
)
def DarkNet53(pretrained=False, use_ssld=False, **kwargs):
model = DarkNet(**kwargs)
_load_pretrained(
pretrained, model, MODEL_URLS["DarkNet53"], use_ssld=use_ssld)
return model

@ -0,0 +1,344 @@
# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import paddle
from paddle import ParamAttr
import paddle.nn as nn
from paddle.nn import Conv2D, BatchNorm, Linear, Dropout
from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D
from paddle.nn.initializer import Uniform
import math
from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
MODEL_URLS = {
"DenseNet121":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DenseNet121_pretrained.pdparams",
"DenseNet161":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DenseNet161_pretrained.pdparams",
"DenseNet169":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DenseNet169_pretrained.pdparams",
"DenseNet201":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DenseNet201_pretrained.pdparams",
"DenseNet264":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DenseNet264_pretrained.pdparams",
}
__all__ = list(MODEL_URLS.keys())
class BNACConvLayer(nn.Layer):
def __init__(self,
num_channels,
num_filters,
filter_size,
stride=1,
pad=0,
groups=1,
act="relu",
name=None):
super(BNACConvLayer, self).__init__()
self._batch_norm = BatchNorm(
num_channels,
act=act,
param_attr=ParamAttr(name=name + '_bn_scale'),
bias_attr=ParamAttr(name + '_bn_offset'),
moving_mean_name=name + '_bn_mean',
moving_variance_name=name + '_bn_variance')
self._conv = Conv2D(
in_channels=num_channels,
out_channels=num_filters,
kernel_size=filter_size,
stride=stride,
padding=pad,
groups=groups,
weight_attr=ParamAttr(name=name + "_weights"),
bias_attr=False)
def forward(self, input):
y = self._batch_norm(input)
y = self._conv(y)
return y
class DenseLayer(nn.Layer):
def __init__(self, num_channels, growth_rate, bn_size, dropout, name=None):
super(DenseLayer, self).__init__()
self.dropout = dropout
self.bn_ac_func1 = BNACConvLayer(
num_channels=num_channels,
num_filters=bn_size * growth_rate,
filter_size=1,
pad=0,
stride=1,
name=name + "_x1")
self.bn_ac_func2 = BNACConvLayer(
num_channels=bn_size * growth_rate,
num_filters=growth_rate,
filter_size=3,
pad=1,
stride=1,
name=name + "_x2")
if dropout:
self.dropout_func = Dropout(p=dropout, mode="downscale_in_infer")
def forward(self, input):
conv = self.bn_ac_func1(input)
conv = self.bn_ac_func2(conv)
if self.dropout:
conv = self.dropout_func(conv)
conv = paddle.concat([input, conv], axis=1)
return conv
class DenseBlock(nn.Layer):
def __init__(self,
num_channels,
num_layers,
bn_size,
growth_rate,
dropout,
name=None):
super(DenseBlock, self).__init__()
self.dropout = dropout
self.dense_layer_func = []
pre_channel = num_channels
for layer in range(num_layers):
self.dense_layer_func.append(
self.add_sublayer(
"{}_{}".format(name, layer + 1),
DenseLayer(
num_channels=pre_channel,
growth_rate=growth_rate,
bn_size=bn_size,
dropout=dropout,
name=name + '_' + str(layer + 1))))
pre_channel = pre_channel + growth_rate
def forward(self, input):
conv = input
for func in self.dense_layer_func:
conv = func(conv)
return conv
class TransitionLayer(nn.Layer):
def __init__(self, num_channels, num_output_features, name=None):
super(TransitionLayer, self).__init__()
self.conv_ac_func = BNACConvLayer(
num_channels=num_channels,
num_filters=num_output_features,
filter_size=1,
pad=0,
stride=1,
name=name)
self.pool2d_avg = AvgPool2D(kernel_size=2, stride=2, padding=0)
def forward(self, input):
y = self.conv_ac_func(input)
y = self.pool2d_avg(y)
return y
class ConvBNLayer(nn.Layer):
def __init__(self,
num_channels,
num_filters,
filter_size,
stride=1,
pad=0,
groups=1,
act="relu",
name=None):
super(ConvBNLayer, self).__init__()
self._conv = Conv2D(
in_channels=num_channels,
out_channels=num_filters,
kernel_size=filter_size,
stride=stride,
padding=pad,
groups=groups,
weight_attr=ParamAttr(name=name + "_weights"),
bias_attr=False)
self._batch_norm = BatchNorm(
num_filters,
act=act,
param_attr=ParamAttr(name=name + '_bn_scale'),
bias_attr=ParamAttr(name + '_bn_offset'),
moving_mean_name=name + '_bn_mean',
moving_variance_name=name + '_bn_variance')
def forward(self, input):
y = self._conv(input)
y = self._batch_norm(y)
return y
class DenseNet(nn.Layer):
def __init__(self, layers=60, bn_size=4, dropout=0, class_num=1000):
super(DenseNet, self).__init__()
supported_layers = [121, 161, 169, 201, 264]
assert layers in supported_layers, \
"supported layers are {} but input layer is {}".format(
supported_layers, layers)
densenet_spec = {
121: (64, 32, [6, 12, 24, 16]),
161: (96, 48, [6, 12, 36, 24]),
169: (64, 32, [6, 12, 32, 32]),
201: (64, 32, [6, 12, 48, 32]),
264: (64, 32, [6, 12, 64, 48])
}
num_init_features, growth_rate, block_config = densenet_spec[layers]
self.conv1_func = ConvBNLayer(
num_channels=3,
num_filters=num_init_features,
filter_size=7,
stride=2,
pad=3,
act='relu',
name="conv1")
self.pool2d_max = MaxPool2D(kernel_size=3, stride=2, padding=1)
self.block_config = block_config
self.dense_block_func_list = []
self.transition_func_list = []
pre_num_channels = num_init_features
num_features = num_init_features
for i, num_layers in enumerate(block_config):
self.dense_block_func_list.append(
self.add_sublayer(
"db_conv_{}".format(i + 2),
DenseBlock(
num_channels=pre_num_channels,
num_layers=num_layers,
bn_size=bn_size,
growth_rate=growth_rate,
dropout=dropout,
name='conv' + str(i + 2))))
num_features = num_features + num_layers * growth_rate
pre_num_channels = num_features
if i != len(block_config) - 1:
self.transition_func_list.append(
self.add_sublayer(
"tr_conv{}_blk".format(i + 2),
TransitionLayer(
num_channels=pre_num_channels,
num_output_features=num_features // 2,
name='conv' + str(i + 2) + "_blk")))
pre_num_channels = num_features // 2
num_features = num_features // 2
self.batch_norm = BatchNorm(
num_features,
act="relu",
param_attr=ParamAttr(name='conv5_blk_bn_scale'),
bias_attr=ParamAttr(name='conv5_blk_bn_offset'),
moving_mean_name='conv5_blk_bn_mean',
moving_variance_name='conv5_blk_bn_variance')
self.pool2d_avg = AdaptiveAvgPool2D(1)
stdv = 1.0 / math.sqrt(num_features * 1.0)
self.out = Linear(
num_features,
class_num,
weight_attr=ParamAttr(
initializer=Uniform(-stdv, stdv), name="fc_weights"),
bias_attr=ParamAttr(name="fc_offset"))
def forward(self, input):
conv = self.conv1_func(input)
conv = self.pool2d_max(conv)
for i, num_layers in enumerate(self.block_config):
conv = self.dense_block_func_list[i](conv)
if i != len(self.block_config) - 1:
conv = self.transition_func_list[i](conv)
conv = self.batch_norm(conv)
y = self.pool2d_avg(conv)
y = paddle.flatten(y, start_axis=1, stop_axis=-1)
y = self.out(y)
return y
def _load_pretrained(pretrained, model, model_url, use_ssld=False):
if pretrained is False:
pass
elif pretrained is True:
load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
elif isinstance(pretrained, str):
load_dygraph_pretrain(model, pretrained)
else:
raise RuntimeError(
"pretrained type is not available. Please use `string` or `boolean` type."
)
def DenseNet121(pretrained=False, use_ssld=False, **kwargs):
model = DenseNet(layers=121, **kwargs)
_load_pretrained(
pretrained, model, MODEL_URLS["DenseNet121"], use_ssld=use_ssld)
return model
def DenseNet161(pretrained=False, use_ssld=False, **kwargs):
model = DenseNet(layers=161, **kwargs)
_load_pretrained(
pretrained, model, MODEL_URLS["DenseNet161"], use_ssld=use_ssld)
return model
def DenseNet169(pretrained=False, use_ssld=False, **kwargs):
model = DenseNet(layers=169, **kwargs)
_load_pretrained(
pretrained, model, MODEL_URLS["DenseNet169"], use_ssld=use_ssld)
return model
def DenseNet201(pretrained=False, use_ssld=False, **kwargs):
model = DenseNet(layers=201, **kwargs)
_load_pretrained(
pretrained, model, MODEL_URLS["DenseNet201"], use_ssld=use_ssld)
return model
def DenseNet264(pretrained=False, use_ssld=False, **kwargs):
model = DenseNet(layers=264, **kwargs)
_load_pretrained(
pretrained, model, MODEL_URLS["DenseNet264"], use_ssld=use_ssld)
return model

@ -0,0 +1,272 @@
# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Code was heavily based on https://github.com/facebookresearch/deit
import paddle
import paddle.nn as nn
from .vision_transformer import VisionTransformer, Identity, trunc_normal_, zeros_
from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
MODEL_URLS = {
"DeiT_tiny_patch16_224":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DeiT_tiny_patch16_224_pretrained.pdparams",
"DeiT_small_patch16_224":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DeiT_small_patch16_224_pretrained.pdparams",
"DeiT_base_patch16_224":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DeiT_base_patch16_224_pretrained.pdparams",
"DeiT_tiny_distilled_patch16_224":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DeiT_tiny_distilled_patch16_224_pretrained.pdparams",
"DeiT_small_distilled_patch16_224":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DeiT_small_distilled_patch16_224_pretrained.pdparams",
"DeiT_base_distilled_patch16_224":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DeiT_base_distilled_patch16_224_pretrained.pdparams",
"DeiT_base_patch16_384":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DeiT_base_patch16_384_pretrained.pdparams",
"DeiT_base_distilled_patch16_384":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DeiT_base_distilled_patch16_384_pretrained.pdparams",
}
__all__ = list(MODEL_URLS.keys())
class DistilledVisionTransformer(VisionTransformer):
def __init__(self,
img_size=224,
patch_size=16,
class_num=1000,
embed_dim=768,
depth=12,
num_heads=12,
mlp_ratio=4,
qkv_bias=False,
norm_layer='nn.LayerNorm',
epsilon=1e-5,
**kwargs):
super().__init__(
img_size=img_size,
patch_size=patch_size,
class_num=class_num,
embed_dim=embed_dim,
depth=depth,
num_heads=num_heads,
mlp_ratio=mlp_ratio,
qkv_bias=qkv_bias,
norm_layer=norm_layer,
epsilon=epsilon,
**kwargs)
self.pos_embed = self.create_parameter(
shape=(1, self.patch_embed.num_patches + 2, self.embed_dim),
default_initializer=zeros_)
self.add_parameter("pos_embed", self.pos_embed)
self.dist_token = self.create_parameter(
shape=(1, 1, self.embed_dim), default_initializer=zeros_)
self.add_parameter("cls_token", self.cls_token)
self.head_dist = nn.Linear(
self.embed_dim,
self.class_num) if self.class_num > 0 else Identity()
trunc_normal_(self.dist_token)
trunc_normal_(self.pos_embed)
self.head_dist.apply(self._init_weights)
def forward_features(self, x):
B = paddle.shape(x)[0]
x = self.patch_embed(x)
cls_tokens = self.cls_token.expand((B, -1, -1))
dist_token = self.dist_token.expand((B, -1, -1))
x = paddle.concat((cls_tokens, dist_token, x), axis=1)
x = x + self.pos_embed
x = self.pos_drop(x)
for blk in self.blocks:
x = blk(x)
x = self.norm(x)
return x[:, 0], x[:, 1]
def forward(self, x):
x, x_dist = self.forward_features(x)
x = self.head(x)
x_dist = self.head_dist(x_dist)
return (x + x_dist) / 2
def _load_pretrained(pretrained, model, model_url, use_ssld=False):
if pretrained is False:
pass
elif pretrained is True:
load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
elif isinstance(pretrained, str):
load_dygraph_pretrain(model, pretrained)
else:
raise RuntimeError(
"pretrained type is not available. Please use `string` or `boolean` type."
)
def DeiT_tiny_patch16_224(pretrained=False, use_ssld=False, **kwargs):
model = VisionTransformer(
patch_size=16,
embed_dim=192,
depth=12,
num_heads=3,
mlp_ratio=4,
qkv_bias=True,
epsilon=1e-6,
**kwargs)
_load_pretrained(
pretrained,
model,
MODEL_URLS["DeiT_tiny_patch16_224"],
use_ssld=use_ssld)
return model
def DeiT_small_patch16_224(pretrained=False, use_ssld=False, **kwargs):
model = VisionTransformer(
patch_size=16,
embed_dim=384,
depth=12,
num_heads=6,
mlp_ratio=4,
qkv_bias=True,
epsilon=1e-6,
**kwargs)
_load_pretrained(
pretrained,
model,
MODEL_URLS["DeiT_small_patch16_224"],
use_ssld=use_ssld)
return model
def DeiT_base_patch16_224(pretrained=False, use_ssld=False, **kwargs):
model = VisionTransformer(
patch_size=16,
embed_dim=768,
depth=12,
num_heads=12,
mlp_ratio=4,
qkv_bias=True,
epsilon=1e-6,
**kwargs)
_load_pretrained(
pretrained,
model,
MODEL_URLS["DeiT_base_patch16_224"],
use_ssld=use_ssld)
return model
def DeiT_tiny_distilled_patch16_224(pretrained=False, use_ssld=False,
**kwargs):
model = DistilledVisionTransformer(
patch_size=16,
embed_dim=192,
depth=12,
num_heads=3,
mlp_ratio=4,
qkv_bias=True,
epsilon=1e-6,
**kwargs)
_load_pretrained(
pretrained,
model,
MODEL_URLS["DeiT_tiny_distilled_patch16_224"],
use_ssld=use_ssld)
return model
def DeiT_small_distilled_patch16_224(pretrained=False,
use_ssld=False,
**kwargs):
model = DistilledVisionTransformer(
patch_size=16,
embed_dim=384,
depth=12,
num_heads=6,
mlp_ratio=4,
qkv_bias=True,
epsilon=1e-6,
**kwargs)
_load_pretrained(
pretrained,
model,
MODEL_URLS["DeiT_small_distilled_patch16_224"],
use_ssld=use_ssld)
return model
def DeiT_base_distilled_patch16_224(pretrained=False, use_ssld=False,
**kwargs):
model = DistilledVisionTransformer(
patch_size=16,
embed_dim=768,
depth=12,
num_heads=12,
mlp_ratio=4,
qkv_bias=True,
epsilon=1e-6,
**kwargs)
_load_pretrained(
pretrained,
model,
MODEL_URLS["DeiT_base_distilled_patch16_224"],
use_ssld=use_ssld)
return model
def DeiT_base_patch16_384(pretrained=False, use_ssld=False, **kwargs):
model = VisionTransformer(
img_size=384,
patch_size=16,
embed_dim=768,
depth=12,
num_heads=12,
mlp_ratio=4,
qkv_bias=True,
epsilon=1e-6,
**kwargs)
_load_pretrained(
pretrained,
model,
MODEL_URLS["DeiT_base_patch16_384"],
use_ssld=use_ssld)
return model
def DeiT_base_distilled_patch16_384(pretrained=False, use_ssld=False,
**kwargs):
model = DistilledVisionTransformer(
img_size=384,
patch_size=16,
embed_dim=768,
depth=12,
num_heads=12,
mlp_ratio=4,
qkv_bias=True,
epsilon=1e-6,
**kwargs)
_load_pretrained(
pretrained,
model,
MODEL_URLS["DeiT_base_distilled_patch16_384"],
use_ssld=use_ssld)
return model

@ -0,0 +1,528 @@
# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Code was based on https://github.com/ucbdrive/dla
import math
import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from paddle.nn.initializer import Normal, Constant
from ppcls.arch.backbone.base.theseus_layer import Identity
from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
MODEL_URLS = {
"DLA34":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DLA34_pretrained.pdparams",
"DLA46_c":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DLA46_c_pretrained.pdparams",
"DLA46x_c":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DLA46x_c_pretrained.pdparams",
"DLA60":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DLA60_pretrained.pdparams",
"DLA60x":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DLA60x_pretrained.pdparams",
"DLA60x_c":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DLA60x_c_pretrained.pdparams",
"DLA102":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DLA102_pretrained.pdparams",
"DLA102x":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DLA102x_pretrained.pdparams",
"DLA102x2":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DLA102x2_pretrained.pdparams",
"DLA169":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DLA169_pretrained.pdparams"
}
__all__ = MODEL_URLS.keys()
zeros_ = Constant(value=0.)
ones_ = Constant(value=1.)
class DlaBasic(nn.Layer):
def __init__(self, inplanes, planes, stride=1, dilation=1, **cargs):
super(DlaBasic, self).__init__()
self.conv1 = nn.Conv2D(
inplanes,
planes,
kernel_size=3,
stride=stride,
padding=dilation,
bias_attr=False,
dilation=dilation)
self.bn1 = nn.BatchNorm2D(planes)
self.relu = nn.ReLU()
self.conv2 = nn.Conv2D(
planes,
planes,
kernel_size=3,
stride=1,
padding=dilation,
bias_attr=False,
dilation=dilation)
self.bn2 = nn.BatchNorm2D(planes)
self.stride = stride
def forward(self, x, residual=None):
if residual is None:
residual = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
out += residual
out = self.relu(out)
return out
class DlaBottleneck(nn.Layer):
expansion = 2
def __init__(self,
inplanes,
outplanes,
stride=1,
dilation=1,
cardinality=1,
base_width=64):
super(DlaBottleneck, self).__init__()
self.stride = stride
mid_planes = int(
math.floor(outplanes * (base_width / 64)) * cardinality)
mid_planes = mid_planes // self.expansion
self.conv1 = nn.Conv2D(
inplanes, mid_planes, kernel_size=1, bias_attr=False)
self.bn1 = nn.BatchNorm2D(mid_planes)
self.conv2 = nn.Conv2D(
mid_planes,
mid_planes,
kernel_size=3,
stride=stride,
padding=dilation,
bias_attr=False,
dilation=dilation,
groups=cardinality)
self.bn2 = nn.BatchNorm2D(mid_planes)
self.conv3 = nn.Conv2D(
mid_planes, outplanes, kernel_size=1, bias_attr=False)
self.bn3 = nn.BatchNorm2D(outplanes)
self.relu = nn.ReLU()
def forward(self, x, residual=None):
if residual is None:
residual = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
out = self.relu(out)
out = self.conv3(out)
out = self.bn3(out)
out += residual
out = self.relu(out)
return out
class DlaRoot(nn.Layer):
def __init__(self, in_channels, out_channels, kernel_size, residual):
super(DlaRoot, self).__init__()
self.conv = nn.Conv2D(
in_channels,
out_channels,
1,
stride=1,
bias_attr=False,
padding=(kernel_size - 1) // 2)
self.bn = nn.BatchNorm2D(out_channels)
self.relu = nn.ReLU()
self.residual = residual
def forward(self, *x):
children = x
x = self.conv(paddle.concat(x, 1))
x = self.bn(x)
if self.residual:
x += children[0]
x = self.relu(x)
return x
class DlaTree(nn.Layer):
def __init__(self,
levels,
block,
in_channels,
out_channels,
stride=1,
dilation=1,
cardinality=1,
base_width=64,
level_root=False,
root_dim=0,
root_kernel_size=1,
root_residual=False):
super(DlaTree, self).__init__()
if root_dim == 0:
root_dim = 2 * out_channels
if level_root:
root_dim += in_channels
self.downsample = nn.MaxPool2D(
stride, stride=stride) if stride > 1 else Identity()
self.project = Identity()
cargs = dict(
dilation=dilation, cardinality=cardinality, base_width=base_width)
if levels == 1:
self.tree1 = block(in_channels, out_channels, stride, **cargs)
self.tree2 = block(out_channels, out_channels, 1, **cargs)
if in_channels != out_channels:
self.project = nn.Sequential(
nn.Conv2D(
in_channels,
out_channels,
kernel_size=1,
stride=1,
bias_attr=False),
nn.BatchNorm2D(out_channels))
else:
cargs.update(
dict(
root_kernel_size=root_kernel_size,
root_residual=root_residual))
self.tree1 = DlaTree(
levels - 1,
block,
in_channels,
out_channels,
stride,
root_dim=0,
**cargs)
self.tree2 = DlaTree(
levels - 1,
block,
out_channels,
out_channels,
root_dim=root_dim + out_channels,
**cargs)
if levels == 1:
self.root = DlaRoot(root_dim, out_channels, root_kernel_size,
root_residual)
self.level_root = level_root
self.root_dim = root_dim
self.levels = levels
def forward(self, x, residual=None, children=None):
children = [] if children is None else children
bottom = self.downsample(x)
residual = self.project(bottom)
if self.level_root:
children.append(bottom)
x1 = self.tree1(x, residual)
if self.levels == 1:
x2 = self.tree2(x1)
x = self.root(x2, x1, *children)
else:
children.append(x1)
x = self.tree2(x1, children=children)
return x
class DLA(nn.Layer):
def __init__(self,
levels,
channels,
in_chans=3,
cardinality=1,
base_width=64,
block=DlaBottleneck,
residual_root=False,
drop_rate=0.0,
class_num=1000,
with_pool=True):
super(DLA, self).__init__()
self.channels = channels
self.class_num = class_num
self.with_pool = with_pool
self.cardinality = cardinality
self.base_width = base_width
self.drop_rate = drop_rate
self.base_layer = nn.Sequential(
nn.Conv2D(
in_chans,
channels[0],
kernel_size=7,
stride=1,
padding=3,
bias_attr=False),
nn.BatchNorm2D(channels[0]),
nn.ReLU())
self.level0 = self._make_conv_level(channels[0], channels[0],
levels[0])
self.level1 = self._make_conv_level(
channels[0], channels[1], levels[1], stride=2)
cargs = dict(
cardinality=cardinality,
base_width=base_width,
root_residual=residual_root)
self.level2 = DlaTree(
levels[2],
block,
channels[1],
channels[2],
2,
level_root=False,
**cargs)
self.level3 = DlaTree(
levels[3],
block,
channels[2],
channels[3],
2,
level_root=True,
**cargs)
self.level4 = DlaTree(
levels[4],
block,
channels[3],
channels[4],
2,
level_root=True,
**cargs)
self.level5 = DlaTree(
levels[5],
block,
channels[4],
channels[5],
2,
level_root=True,
**cargs)
self.feature_info = [
# rare to have a meaningful stride 1 level
dict(
num_chs=channels[0], reduction=1, module='level0'),
dict(
num_chs=channels[1], reduction=2, module='level1'),
dict(
num_chs=channels[2], reduction=4, module='level2'),
dict(
num_chs=channels[3], reduction=8, module='level3'),
dict(
num_chs=channels[4], reduction=16, module='level4'),
dict(
num_chs=channels[5], reduction=32, module='level5'),
]
self.num_features = channels[-1]
if with_pool:
self.global_pool = nn.AdaptiveAvgPool2D(1)
if class_num > 0:
self.fc = nn.Conv2D(self.num_features, class_num, 1)
for m in self.sublayers():
if isinstance(m, nn.Conv2D):
n = m._kernel_size[0] * m._kernel_size[1] * m._out_channels
normal_ = Normal(mean=0.0, std=math.sqrt(2. / n))
normal_(m.weight)
elif isinstance(m, nn.BatchNorm2D):
ones_(m.weight)
zeros_(m.bias)
def _make_conv_level(self, inplanes, planes, convs, stride=1, dilation=1):
modules = []
for i in range(convs):
modules.extend([
nn.Conv2D(
inplanes,
planes,
kernel_size=3,
stride=stride if i == 0 else 1,
padding=dilation,
bias_attr=False,
dilation=dilation), nn.BatchNorm2D(planes), nn.ReLU()
])
inplanes = planes
return nn.Sequential(*modules)
def forward_features(self, x):
x = self.base_layer(x)
x = self.level0(x)
x = self.level1(x)
x = self.level2(x)
x = self.level3(x)
x = self.level4(x)
x = self.level5(x)
return x
def forward(self, x):
x = self.forward_features(x)
if self.with_pool:
x = self.global_pool(x)
if self.drop_rate > 0.:
x = F.dropout(x, p=self.drop_rate, training=self.training)
if self.class_num > 0:
x = self.fc(x)
x = x.flatten(1)
return x
def _load_pretrained(pretrained, model, model_url, use_ssld=False):
if pretrained is False:
pass
elif pretrained is True:
load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
elif isinstance(pretrained, str):
load_dygraph_pretrain(model, pretrained)
else:
raise RuntimeError(
"pretrained type is not available. Please use `string` or `boolean` type."
)
def DLA34(pretrained=False, **kwargs):
model = DLA(levels=(1, 1, 1, 2, 2, 1),
channels=(16, 32, 64, 128, 256, 512),
block=DlaBasic,
**kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["DLA34"])
return model
def DLA46_c(pretrained=False, **kwargs):
model = DLA(levels=(1, 1, 1, 2, 2, 1),
channels=(16, 32, 64, 64, 128, 256),
block=DlaBottleneck,
**kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["DLA46_c"])
return model
def DLA46x_c(pretrained=False, **kwargs):
model = DLA(levels=(1, 1, 1, 2, 2, 1),
channels=(16, 32, 64, 64, 128, 256),
block=DlaBottleneck,
cardinality=32,
base_width=4,
**kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["DLA46x_c"])
return model
def DLA60(pretrained=False, **kwargs):
model = DLA(levels=(1, 1, 1, 2, 3, 1),
channels=(16, 32, 128, 256, 512, 1024),
block=DlaBottleneck,
**kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["DLA60"])
return model
def DLA60x(pretrained=False, **kwargs):
model = DLA(levels=(1, 1, 1, 2, 3, 1),
channels=(16, 32, 128, 256, 512, 1024),
block=DlaBottleneck,
cardinality=32,
base_width=4,
**kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["DLA60x"])
return model
def DLA60x_c(pretrained=False, **kwargs):
model = DLA(levels=(1, 1, 1, 2, 3, 1),
channels=(16, 32, 64, 64, 128, 256),
block=DlaBottleneck,
cardinality=32,
base_width=4,
**kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["DLA60x_c"])
return model
def DLA102(pretrained=False, **kwargs):
model = DLA(levels=(1, 1, 1, 3, 4, 1),
channels=(16, 32, 128, 256, 512, 1024),
block=DlaBottleneck,
residual_root=True,
**kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["DLA102"])
return model
def DLA102x(pretrained=False, **kwargs):
model = DLA(levels=(1, 1, 1, 3, 4, 1),
channels=(16, 32, 128, 256, 512, 1024),
block=DlaBottleneck,
cardinality=32,
base_width=4,
residual_root=True,
**kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["DLA102x"])
return model
def DLA102x2(pretrained=False, **kwargs):
model = DLA(levels=(1, 1, 1, 3, 4, 1),
channels=(16, 32, 128, 256, 512, 1024),
block=DlaBottleneck,
cardinality=64,
base_width=4,
residual_root=True,
**kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["DLA102x2"])
return model
def DLA169(pretrained=False, **kwargs):
model = DLA(levels=(1, 1, 2, 3, 5, 1),
channels=(16, 32, 128, 256, 512, 1024),
block=DlaBottleneck,
residual_root=True,
**kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["DLA169"])
return model

@ -0,0 +1,451 @@
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import sys
import paddle
from paddle import ParamAttr
import paddle.nn as nn
from paddle.nn import Conv2D, BatchNorm, Linear
from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D
from paddle.nn.initializer import Uniform
import math
from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
MODEL_URLS = {
"DPN68":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DPN68_pretrained.pdparams",
"DPN92":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DPN92_pretrained.pdparams",
"DPN98":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DPN98_pretrained.pdparams",
"DPN107":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DPN107_pretrained.pdparams",
"DPN131":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DPN131_pretrained.pdparams",
}
__all__ = list(MODEL_URLS.keys())
class ConvBNLayer(nn.Layer):
def __init__(self,
num_channels,
num_filters,
filter_size,
stride=1,
pad=0,
groups=1,
act="relu",
name=None):
super(ConvBNLayer, self).__init__()
self._conv = Conv2D(
in_channels=num_channels,
out_channels=num_filters,
kernel_size=filter_size,
stride=stride,
padding=pad,
groups=groups,
weight_attr=ParamAttr(name=name + "_weights"),
bias_attr=False)
self._batch_norm = BatchNorm(
num_filters,
act=act,
param_attr=ParamAttr(name=name + '_bn_scale'),
bias_attr=ParamAttr(name + '_bn_offset'),
moving_mean_name=name + '_bn_mean',
moving_variance_name=name + '_bn_variance')
def forward(self, input):
y = self._conv(input)
y = self._batch_norm(y)
return y
class BNACConvLayer(nn.Layer):
def __init__(self,
num_channels,
num_filters,
filter_size,
stride=1,
pad=0,
groups=1,
act="relu",
name=None):
super(BNACConvLayer, self).__init__()
self.num_channels = num_channels
self._batch_norm = BatchNorm(
num_channels,
act=act,
param_attr=ParamAttr(name=name + '_bn_scale'),
bias_attr=ParamAttr(name + '_bn_offset'),
moving_mean_name=name + '_bn_mean',
moving_variance_name=name + '_bn_variance')
self._conv = Conv2D(
in_channels=num_channels,
out_channels=num_filters,
kernel_size=filter_size,
stride=stride,
padding=pad,
groups=groups,
weight_attr=ParamAttr(name=name + "_weights"),
bias_attr=False)
def forward(self, input):
y = self._batch_norm(input)
y = self._conv(y)
return y
class DualPathFactory(nn.Layer):
def __init__(self,
num_channels,
num_1x1_a,
num_3x3_b,
num_1x1_c,
inc,
G,
_type='normal',
name=None):
super(DualPathFactory, self).__init__()
self.num_1x1_c = num_1x1_c
self.inc = inc
self.name = name
kw = 3
kh = 3
pw = (kw - 1) // 2
ph = (kh - 1) // 2
# type
if _type == 'proj':
key_stride = 1
self.has_proj = True
elif _type == 'down':
key_stride = 2
self.has_proj = True
elif _type == 'normal':
key_stride = 1
self.has_proj = False
else:
print("not implemented now!!!")
sys.exit(1)
data_in_ch = sum(num_channels) if isinstance(num_channels,
list) else num_channels
if self.has_proj:
self.c1x1_w_func = BNACConvLayer(
num_channels=data_in_ch,
num_filters=num_1x1_c + 2 * inc,
filter_size=(1, 1),
pad=(0, 0),
stride=(key_stride, key_stride),
name=name + "_match")
self.c1x1_a_func = BNACConvLayer(
num_channels=data_in_ch,
num_filters=num_1x1_a,
filter_size=(1, 1),
pad=(0, 0),
name=name + "_conv1")
self.c3x3_b_func = BNACConvLayer(
num_channels=num_1x1_a,
num_filters=num_3x3_b,
filter_size=(kw, kh),
pad=(pw, ph),
stride=(key_stride, key_stride),
groups=G,
name=name + "_conv2")
self.c1x1_c_func = BNACConvLayer(
num_channels=num_3x3_b,
num_filters=num_1x1_c + inc,
filter_size=(1, 1),
pad=(0, 0),
name=name + "_conv3")
def forward(self, input):
# PROJ
if isinstance(input, list):
data_in = paddle.concat([input[0], input[1]], axis=1)
else:
data_in = input
if self.has_proj:
c1x1_w = self.c1x1_w_func(data_in)
data_o1, data_o2 = paddle.split(
c1x1_w, num_or_sections=[self.num_1x1_c, 2 * self.inc], axis=1)
else:
data_o1 = input[0]
data_o2 = input[1]
c1x1_a = self.c1x1_a_func(data_in)
c3x3_b = self.c3x3_b_func(c1x1_a)
c1x1_c = self.c1x1_c_func(c3x3_b)
c1x1_c1, c1x1_c2 = paddle.split(
c1x1_c, num_or_sections=[self.num_1x1_c, self.inc], axis=1)
# OUTPUTS
summ = paddle.add(x=data_o1, y=c1x1_c1)
dense = paddle.concat([data_o2, c1x1_c2], axis=1)
# tensor, channels
return [summ, dense]
class DPN(nn.Layer):
def __init__(self, layers=68, class_num=1000):
super(DPN, self).__init__()
self._class_num = class_num
args = self.get_net_args(layers)
bws = args['bw']
inc_sec = args['inc_sec']
rs = args['r']
k_r = args['k_r']
k_sec = args['k_sec']
G = args['G']
init_num_filter = args['init_num_filter']
init_filter_size = args['init_filter_size']
init_padding = args['init_padding']
self.k_sec = k_sec
self.conv1_x_1_func = ConvBNLayer(
num_channels=3,
num_filters=init_num_filter,
filter_size=init_filter_size,
stride=2,
pad=init_padding,
act='relu',
name="conv1")
self.pool2d_max = MaxPool2D(kernel_size=3, stride=2, padding=1)
num_channel_dpn = init_num_filter
self.dpn_func_list = []
#conv2 - conv5
match_list, num = [], 0
for gc in range(4):
bw = bws[gc]
inc = inc_sec[gc]
R = (k_r * bw) // rs[gc]
if gc == 0:
_type1 = 'proj'
_type2 = 'normal'
match = 1
else:
_type1 = 'down'
_type2 = 'normal'
match = match + k_sec[gc - 1]
match_list.append(match)
self.dpn_func_list.append(
self.add_sublayer(
"dpn{}".format(match),
DualPathFactory(
num_channels=num_channel_dpn,
num_1x1_a=R,
num_3x3_b=R,
num_1x1_c=bw,
inc=inc,
G=G,
_type=_type1,
name="dpn" + str(match))))
num_channel_dpn = [bw, 3 * inc]
for i_ly in range(2, k_sec[gc] + 1):
num += 1
if num in match_list:
num += 1
self.dpn_func_list.append(
self.add_sublayer(
"dpn{}".format(num),
DualPathFactory(
num_channels=num_channel_dpn,
num_1x1_a=R,
num_3x3_b=R,
num_1x1_c=bw,
inc=inc,
G=G,
_type=_type2,
name="dpn" + str(num))))
num_channel_dpn = [
num_channel_dpn[0], num_channel_dpn[1] + inc
]
out_channel = sum(num_channel_dpn)
self.conv5_x_x_bn = BatchNorm(
num_channels=sum(num_channel_dpn),
act="relu",
param_attr=ParamAttr(name='final_concat_bn_scale'),
bias_attr=ParamAttr('final_concat_bn_offset'),
moving_mean_name='final_concat_bn_mean',
moving_variance_name='final_concat_bn_variance')
self.pool2d_avg = AdaptiveAvgPool2D(1)
stdv = 0.01
self.out = Linear(
out_channel,
class_num,
weight_attr=ParamAttr(
initializer=Uniform(-stdv, stdv), name="fc_weights"),
bias_attr=ParamAttr(name="fc_offset"))
def forward(self, input):
conv1_x_1 = self.conv1_x_1_func(input)
convX_x_x = self.pool2d_max(conv1_x_1)
dpn_idx = 0
for gc in range(4):
convX_x_x = self.dpn_func_list[dpn_idx](convX_x_x)
dpn_idx += 1
for i_ly in range(2, self.k_sec[gc] + 1):
convX_x_x = self.dpn_func_list[dpn_idx](convX_x_x)
dpn_idx += 1
conv5_x_x = paddle.concat(convX_x_x, axis=1)
conv5_x_x = self.conv5_x_x_bn(conv5_x_x)
y = self.pool2d_avg(conv5_x_x)
y = paddle.flatten(y, start_axis=1, stop_axis=-1)
y = self.out(y)
return y
def get_net_args(self, layers):
if layers == 68:
k_r = 128
G = 32
k_sec = [3, 4, 12, 3]
inc_sec = [16, 32, 32, 64]
bw = [64, 128, 256, 512]
r = [64, 64, 64, 64]
init_num_filter = 10
init_filter_size = 3
init_padding = 1
elif layers == 92:
k_r = 96
G = 32
k_sec = [3, 4, 20, 3]
inc_sec = [16, 32, 24, 128]
bw = [256, 512, 1024, 2048]
r = [256, 256, 256, 256]
init_num_filter = 64
init_filter_size = 7
init_padding = 3
elif layers == 98:
k_r = 160
G = 40
k_sec = [3, 6, 20, 3]
inc_sec = [16, 32, 32, 128]
bw = [256, 512, 1024, 2048]
r = [256, 256, 256, 256]
init_num_filter = 96
init_filter_size = 7
init_padding = 3
elif layers == 107:
k_r = 200
G = 50
k_sec = [4, 8, 20, 3]
inc_sec = [20, 64, 64, 128]
bw = [256, 512, 1024, 2048]
r = [256, 256, 256, 256]
init_num_filter = 128
init_filter_size = 7
init_padding = 3
elif layers == 131:
k_r = 160
G = 40
k_sec = [4, 8, 28, 3]
inc_sec = [16, 32, 32, 128]
bw = [256, 512, 1024, 2048]
r = [256, 256, 256, 256]
init_num_filter = 128
init_filter_size = 7
init_padding = 3
else:
raise NotImplementedError
net_arg = {
'k_r': k_r,
'G': G,
'k_sec': k_sec,
'inc_sec': inc_sec,
'bw': bw,
'r': r
}
net_arg['init_num_filter'] = init_num_filter
net_arg['init_filter_size'] = init_filter_size
net_arg['init_padding'] = init_padding
return net_arg
def _load_pretrained(pretrained, model, model_url, use_ssld=False):
if pretrained is False:
pass
elif pretrained is True:
load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
elif isinstance(pretrained, str):
load_dygraph_pretrain(model, pretrained)
else:
raise RuntimeError(
"pretrained type is not available. Please use `string` or `boolean` type."
)
def DPN68(pretrained=False, use_ssld=False, **kwargs):
model = DPN(layers=68, **kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["DPN68"])
return model
def DPN92(pretrained=False, use_ssld=False, **kwargs):
model = DPN(layers=92, **kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["DPN92"])
return model
def DPN98(pretrained=False, use_ssld=False, **kwargs):
model = DPN(layers=98, **kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["DPN98"])
return model
def DPN107(pretrained=False, use_ssld=False, **kwargs):
model = DPN(layers=107, **kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["DPN107"])
return model
def DPN131(pretrained=False, use_ssld=False, **kwargs):
model = DPN(layers=131, **kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["DPN131"])
return model

@ -0,0 +1,976 @@
# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Code was based on https://github.com/lukemelas/EfficientNet-PyTorch
import paddle
from paddle import ParamAttr
import paddle.nn as nn
import paddle.nn.functional as F
from paddle.nn import Conv2D, BatchNorm, Linear, Dropout
from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D
import math
import collections
import re
import copy
from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
MODEL_URLS = {
"EfficientNetB0_small":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/EfficientNetB0_small_pretrained.pdparams",
"EfficientNetB0":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/EfficientNetB0_pretrained.pdparams",
"EfficientNetB1":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/EfficientNetB1_pretrained.pdparams",
"EfficientNetB2":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/EfficientNetB2_pretrained.pdparams",
"EfficientNetB3":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/EfficientNetB3_pretrained.pdparams",
"EfficientNetB4":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/EfficientNetB4_pretrained.pdparams",
"EfficientNetB5":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/EfficientNetB5_pretrained.pdparams",
"EfficientNetB6":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/EfficientNetB6_pretrained.pdparams",
"EfficientNetB7":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/EfficientNetB7_pretrained.pdparams",
}
__all__ = list(MODEL_URLS.keys())
GlobalParams = collections.namedtuple('GlobalParams', [
'batch_norm_momentum',
'batch_norm_epsilon',
'dropout_rate',
'num_classes',
'width_coefficient',
'depth_coefficient',
'depth_divisor',
'min_depth',
'drop_connect_rate',
])
BlockArgs = collections.namedtuple('BlockArgs', [
'kernel_size', 'num_repeat', 'input_filters', 'output_filters',
'expand_ratio', 'id_skip', 'stride', 'se_ratio'
])
GlobalParams.__new__.__defaults__ = (None, ) * len(GlobalParams._fields)
BlockArgs.__new__.__defaults__ = (None, ) * len(BlockArgs._fields)
def efficientnet_params(model_name):
""" Map EfficientNet model name to parameter coefficients. """
params_dict = {
# Coefficients: width,depth,resolution,dropout
'efficientnet-b0': (1.0, 1.0, 224, 0.2),
'efficientnet-b1': (1.0, 1.1, 240, 0.2),
'efficientnet-b2': (1.1, 1.2, 260, 0.3),
'efficientnet-b3': (1.2, 1.4, 300, 0.3),
'efficientnet-b4': (1.4, 1.8, 380, 0.4),
'efficientnet-b5': (1.6, 2.2, 456, 0.4),
'efficientnet-b6': (1.8, 2.6, 528, 0.5),
'efficientnet-b7': (2.0, 3.1, 600, 0.5),
}
return params_dict[model_name]
def efficientnet(width_coefficient=None,
depth_coefficient=None,
dropout_rate=0.2,
drop_connect_rate=0.2):
""" Get block arguments according to parameter and coefficients. """
blocks_args = [
'r1_k3_s11_e1_i32_o16_se0.25',
'r2_k3_s22_e6_i16_o24_se0.25',
'r2_k5_s22_e6_i24_o40_se0.25',
'r3_k3_s22_e6_i40_o80_se0.25',
'r3_k5_s11_e6_i80_o112_se0.25',
'r4_k5_s22_e6_i112_o192_se0.25',
'r1_k3_s11_e6_i192_o320_se0.25',
]
blocks_args = BlockDecoder.decode(blocks_args)
global_params = GlobalParams(
batch_norm_momentum=0.99,
batch_norm_epsilon=1e-3,
dropout_rate=dropout_rate,
drop_connect_rate=drop_connect_rate,
num_classes=1000,
width_coefficient=width_coefficient,
depth_coefficient=depth_coefficient,
depth_divisor=8,
min_depth=None)
return blocks_args, global_params
def get_model_params(model_name, override_params):
""" Get the block args and global params for a given model """
if model_name.startswith('efficientnet'):
w, d, _, p = efficientnet_params(model_name)
blocks_args, global_params = efficientnet(
width_coefficient=w, depth_coefficient=d, dropout_rate=p)
else:
raise NotImplementedError('model name is not pre-defined: %s' %
model_name)
if override_params:
global_params = global_params._replace(**override_params)
return blocks_args, global_params
def round_filters(filters, global_params):
""" Calculate and round number of filters based on depth multiplier. """
multiplier = global_params.width_coefficient
if not multiplier:
return filters
divisor = global_params.depth_divisor
min_depth = global_params.min_depth
filters *= multiplier
min_depth = min_depth or divisor
new_filters = max(min_depth,
int(filters + divisor / 2) // divisor * divisor)
if new_filters < 0.9 * filters: # prevent rounding by more than 10%
new_filters += divisor
return int(new_filters)
def round_repeats(repeats, global_params):
""" Round number of filters based on depth multiplier. """
multiplier = global_params.depth_coefficient
if not multiplier:
return repeats
return int(math.ceil(multiplier * repeats))
class BlockDecoder(object):
"""
Block Decoder, straight from the official TensorFlow repository.
"""
@staticmethod
def _decode_block_string(block_string):
""" Gets a block through a string notation of arguments. """
assert isinstance(block_string, str)
ops = block_string.split('_')
options = {}
for op in ops:
splits = re.split(r'(\d.*)', op)
if len(splits) >= 2:
key, value = splits[:2]
options[key] = value
# Check stride
cond_1 = ('s' in options and len(options['s']) == 1)
cond_2 = ((len(options['s']) == 2) and
(options['s'][0] == options['s'][1]))
assert (cond_1 or cond_2)
return BlockArgs(
kernel_size=int(options['k']),
num_repeat=int(options['r']),
input_filters=int(options['i']),
output_filters=int(options['o']),
expand_ratio=int(options['e']),
id_skip=('noskip' not in block_string),
se_ratio=float(options['se']) if 'se' in options else None,
stride=[int(options['s'][0])])
@staticmethod
def _encode_block_string(block):
"""Encodes a block to a string."""
args = [
'r%d' % block.num_repeat, 'k%d' % block.kernel_size, 's%d%d' %
(block.strides[0], block.strides[1]), 'e%s' % block.expand_ratio,
'i%d' % block.input_filters, 'o%d' % block.output_filters
]
if 0 < block.se_ratio <= 1:
args.append('se%s' % block.se_ratio)
if block.id_skip is False:
args.append('noskip')
return '_'.join(args)
@staticmethod
def decode(string_list):
"""
Decode a list of string notations to specify blocks in the network.
string_list: list of strings, each string is a notation of block
return
list of BlockArgs namedtuples of block args
"""
assert isinstance(string_list, list)
blocks_args = []
for block_string in string_list:
blocks_args.append(BlockDecoder._decode_block_string(block_string))
return blocks_args
@staticmethod
def encode(blocks_args):
"""
Encodes a list of BlockArgs to a list of strings.
:param blocks_args: a list of BlockArgs namedtuples of block args
:return: a list of strings, each string is a notation of block
"""
block_strings = []
for block in blocks_args:
block_strings.append(BlockDecoder._encode_block_string(block))
return block_strings
def initial_type(name, use_bias=False):
param_attr = ParamAttr(name=name + "_weights")
if use_bias:
bias_attr = ParamAttr(name=name + "_offset")
else:
bias_attr = False
return param_attr, bias_attr
def init_batch_norm_layer(name="batch_norm"):
param_attr = ParamAttr(name=name + "_scale")
bias_attr = ParamAttr(name=name + "_offset")
return param_attr, bias_attr
def init_fc_layer(name="fc"):
param_attr = ParamAttr(name=name + "_weights")
bias_attr = ParamAttr(name=name + "_offset")
return param_attr, bias_attr
def cal_padding(img_size, stride, filter_size, dilation=1):
"""Calculate padding size."""
if img_size % stride == 0:
out_size = max(filter_size - stride, 0)
else:
out_size = max(filter_size - (img_size % stride), 0)
return out_size // 2, out_size - out_size // 2
inp_shape = {
"b0_small": [224, 112, 112, 56, 28, 14, 14, 7],
"b0": [224, 112, 112, 56, 28, 14, 14, 7],
"b1": [240, 120, 120, 60, 30, 15, 15, 8],
"b2": [260, 130, 130, 65, 33, 17, 17, 9],
"b3": [300, 150, 150, 75, 38, 19, 19, 10],
"b4": [380, 190, 190, 95, 48, 24, 24, 12],
"b5": [456, 228, 228, 114, 57, 29, 29, 15],
"b6": [528, 264, 264, 132, 66, 33, 33, 17],
"b7": [600, 300, 300, 150, 75, 38, 38, 19]
}
def _drop_connect(inputs, prob, is_test):
if is_test:
output = inputs
else:
keep_prob = 1.0 - prob
inputs_shape = paddle.shape(inputs)
random_tensor = keep_prob + paddle.rand(
shape=[inputs_shape[0], 1, 1, 1])
binary_tensor = paddle.floor(random_tensor)
output = paddle.multiply(inputs, binary_tensor) / keep_prob
return output
class Conv2ds(nn.Layer):
def __init__(self,
input_channels,
output_channels,
filter_size,
stride=1,
padding=0,
groups=None,
name="conv2d",
act=None,
use_bias=False,
padding_type=None,
model_name=None,
cur_stage=None):
super(Conv2ds, self).__init__()
assert act in [None, "swish", "sigmoid"]
self.act = act
param_attr, bias_attr = initial_type(name=name, use_bias=use_bias)
def get_padding(filter_size, stride=1, dilation=1):
padding = ((stride - 1) + dilation * (filter_size - 1)) // 2
return padding
inps = 1 if model_name == None and cur_stage == None else inp_shape[
model_name][cur_stage]
self.need_crop = False
if padding_type == "SAME":
top_padding, bottom_padding = cal_padding(inps, stride,
filter_size)
left_padding, right_padding = cal_padding(inps, stride,
filter_size)
height_padding = bottom_padding
width_padding = right_padding
if top_padding != bottom_padding or left_padding != right_padding:
height_padding = top_padding + stride
width_padding = left_padding + stride
self.need_crop = True
padding = [height_padding, width_padding]
elif padding_type == "VALID":
height_padding = 0
width_padding = 0
padding = [height_padding, width_padding]
elif padding_type == "DYNAMIC":
padding = get_padding(filter_size, stride)
else:
padding = padding_type
groups = 1 if groups is None else groups
self._conv = Conv2D(
input_channels,
output_channels,
filter_size,
groups=groups,
stride=stride,
# act=act,
padding=padding,
weight_attr=param_attr,
bias_attr=bias_attr)
def forward(self, inputs):
x = self._conv(inputs)
if self.act == "swish":
x = F.swish(x)
elif self.act == "sigmoid":
x = F.sigmoid(x)
if self.need_crop:
x = x[:, :, 1:, 1:]
return x
class ConvBNLayer(nn.Layer):
def __init__(self,
input_channels,
filter_size,
output_channels,
stride=1,
num_groups=1,
padding_type="SAME",
conv_act=None,
bn_act="swish",
use_bn=True,
use_bias=False,
name=None,
conv_name=None,
bn_name=None,
model_name=None,
cur_stage=None):
super(ConvBNLayer, self).__init__()
self._conv = Conv2ds(
input_channels=input_channels,
output_channels=output_channels,
filter_size=filter_size,
stride=stride,
groups=num_groups,
act=conv_act,
padding_type=padding_type,
name=conv_name,
use_bias=use_bias,
model_name=model_name,
cur_stage=cur_stage)
self.use_bn = use_bn
if use_bn is True:
bn_name = name + bn_name
param_attr, bias_attr = init_batch_norm_layer(bn_name)
self._bn = BatchNorm(
num_channels=output_channels,
act=bn_act,
momentum=0.99,
epsilon=0.001,
moving_mean_name=bn_name + "_mean",
moving_variance_name=bn_name + "_variance",
param_attr=param_attr,
bias_attr=bias_attr)
def forward(self, inputs):
if self.use_bn:
x = self._conv(inputs)
x = self._bn(x)
return x
else:
return self._conv(inputs)
class ExpandConvNorm(nn.Layer):
def __init__(self,
input_channels,
block_args,
padding_type,
name=None,
model_name=None,
cur_stage=None):
super(ExpandConvNorm, self).__init__()
self.oup = block_args.input_filters * block_args.expand_ratio
self.expand_ratio = block_args.expand_ratio
if self.expand_ratio != 1:
self._conv = ConvBNLayer(
input_channels,
1,
self.oup,
bn_act=None,
padding_type=padding_type,
name=name,
conv_name=name + "_expand_conv",
bn_name="_bn0",
model_name=model_name,
cur_stage=cur_stage)
def forward(self, inputs):
if self.expand_ratio != 1:
return self._conv(inputs)
else:
return inputs
class DepthwiseConvNorm(nn.Layer):
def __init__(self,
input_channels,
block_args,
padding_type,
name=None,
model_name=None,
cur_stage=None):
super(DepthwiseConvNorm, self).__init__()
self.k = block_args.kernel_size
self.s = block_args.stride
if isinstance(self.s, list) or isinstance(self.s, tuple):
self.s = self.s[0]
oup = block_args.input_filters * block_args.expand_ratio
self._conv = ConvBNLayer(
input_channels,
self.k,
oup,
self.s,
num_groups=input_channels,
bn_act=None,
padding_type=padding_type,
name=name,
conv_name=name + "_depthwise_conv",
bn_name="_bn1",
model_name=model_name,
cur_stage=cur_stage)
def forward(self, inputs):
return self._conv(inputs)
class ProjectConvNorm(nn.Layer):
def __init__(self,
input_channels,
block_args,
padding_type,
name=None,
model_name=None,
cur_stage=None):
super(ProjectConvNorm, self).__init__()
final_oup = block_args.output_filters
self._conv = ConvBNLayer(
input_channels,
1,
final_oup,
bn_act=None,
padding_type=padding_type,
name=name,
conv_name=name + "_project_conv",
bn_name="_bn2",
model_name=model_name,
cur_stage=cur_stage)
def forward(self, inputs):
return self._conv(inputs)
class SEBlock(nn.Layer):
def __init__(self,
input_channels,
num_squeezed_channels,
oup,
padding_type,
name=None,
model_name=None,
cur_stage=None):
super(SEBlock, self).__init__()
self._pool = AdaptiveAvgPool2D(1)
self._conv1 = Conv2ds(
input_channels,
num_squeezed_channels,
1,
use_bias=True,
padding_type=padding_type,
act="swish",
name=name + "_se_reduce")
self._conv2 = Conv2ds(
num_squeezed_channels,
oup,
1,
act="sigmoid",
use_bias=True,
padding_type=padding_type,
name=name + "_se_expand")
def forward(self, inputs):
x = self._pool(inputs)
x = self._conv1(x)
x = self._conv2(x)
out = paddle.multiply(inputs, x)
return out
class MbConvBlock(nn.Layer):
def __init__(self,
input_channels,
block_args,
padding_type,
use_se,
name=None,
drop_connect_rate=None,
model_name=None,
cur_stage=None):
super(MbConvBlock, self).__init__()
oup = block_args.input_filters * block_args.expand_ratio
self.block_args = block_args
self.has_se = use_se and (block_args.se_ratio is not None) and (
0 < block_args.se_ratio <= 1)
self.id_skip = block_args.id_skip
self.expand_ratio = block_args.expand_ratio
self.drop_connect_rate = drop_connect_rate
if self.expand_ratio != 1:
self._ecn = ExpandConvNorm(
input_channels,
block_args,
padding_type=padding_type,
name=name,
model_name=model_name,
cur_stage=cur_stage)
self._dcn = DepthwiseConvNorm(
input_channels * block_args.expand_ratio,
block_args,
padding_type=padding_type,
name=name,
model_name=model_name,
cur_stage=cur_stage)
if self.has_se:
num_squeezed_channels = max(
1, int(block_args.input_filters * block_args.se_ratio))
self._se = SEBlock(
input_channels * block_args.expand_ratio,
num_squeezed_channels,
oup,
padding_type=padding_type,
name=name,
model_name=model_name,
cur_stage=cur_stage)
self._pcn = ProjectConvNorm(
input_channels * block_args.expand_ratio,
block_args,
padding_type=padding_type,
name=name,
model_name=model_name,
cur_stage=cur_stage)
def forward(self, inputs):
x = inputs
if self.expand_ratio != 1:
x = self._ecn(x)
x = F.swish(x)
x = self._dcn(x)
x = F.swish(x)
if self.has_se:
x = self._se(x)
x = self._pcn(x)
if self.id_skip and \
self.block_args.stride == 1 and \
self.block_args.input_filters == self.block_args.output_filters:
if self.drop_connect_rate:
x = _drop_connect(x, self.drop_connect_rate, not self.training)
x = paddle.add(x, inputs)
return x
class ConvStemNorm(nn.Layer):
def __init__(self,
input_channels,
padding_type,
_global_params,
name=None,
model_name=None,
cur_stage=None):
super(ConvStemNorm, self).__init__()
output_channels = round_filters(32, _global_params)
self._conv = ConvBNLayer(
input_channels,
filter_size=3,
output_channels=output_channels,
stride=2,
bn_act=None,
padding_type=padding_type,
name="",
conv_name="_conv_stem",
bn_name="_bn0",
model_name=model_name,
cur_stage=cur_stage)
def forward(self, inputs):
return self._conv(inputs)
class ExtractFeatures(nn.Layer):
def __init__(self,
input_channels,
_block_args,
_global_params,
padding_type,
use_se,
model_name=None):
super(ExtractFeatures, self).__init__()
self._global_params = _global_params
self._conv_stem = ConvStemNorm(
input_channels,
padding_type=padding_type,
_global_params=_global_params,
model_name=model_name,
cur_stage=0)
self.block_args_copy = copy.deepcopy(_block_args)
idx = 0
block_size = 0
for block_arg in self.block_args_copy:
block_arg = block_arg._replace(
input_filters=round_filters(block_arg.input_filters,
_global_params),
output_filters=round_filters(block_arg.output_filters,
_global_params),
num_repeat=round_repeats(block_arg.num_repeat, _global_params))
block_size += 1
for _ in range(block_arg.num_repeat - 1):
block_size += 1
self.conv_seq = []
cur_stage = 1
for block_args in _block_args:
block_args = block_args._replace(
input_filters=round_filters(block_args.input_filters,
_global_params),
output_filters=round_filters(block_args.output_filters,
_global_params),
num_repeat=round_repeats(block_args.num_repeat,
_global_params))
drop_connect_rate = self._global_params.drop_connect_rate
if drop_connect_rate:
drop_connect_rate *= float(idx) / block_size
_mc_block = self.add_sublayer(
"_blocks." + str(idx) + ".",
MbConvBlock(
block_args.input_filters,
block_args=block_args,
padding_type=padding_type,
use_se=use_se,
name="_blocks." + str(idx) + ".",
drop_connect_rate=drop_connect_rate,
model_name=model_name,
cur_stage=cur_stage))
self.conv_seq.append(_mc_block)
idx += 1
if block_args.num_repeat > 1:
block_args = block_args._replace(
input_filters=block_args.output_filters, stride=1)
for _ in range(block_args.num_repeat - 1):
drop_connect_rate = self._global_params.drop_connect_rate
if drop_connect_rate:
drop_connect_rate *= float(idx) / block_size
_mc_block = self.add_sublayer(
"block." + str(idx) + ".",
MbConvBlock(
block_args.input_filters,
block_args,
padding_type=padding_type,
use_se=use_se,
name="_blocks." + str(idx) + ".",
drop_connect_rate=drop_connect_rate,
model_name=model_name,
cur_stage=cur_stage))
self.conv_seq.append(_mc_block)
idx += 1
cur_stage += 1
def forward(self, inputs):
x = self._conv_stem(inputs)
x = F.swish(x)
for _mc_block in self.conv_seq:
x = _mc_block(x)
return x
class EfficientNet(nn.Layer):
def __init__(self,
name="b0",
padding_type="SAME",
override_params=None,
use_se=True,
class_num=1000):
super(EfficientNet, self).__init__()
model_name = 'efficientnet-' + name
self.name = name
self._block_args, self._global_params = get_model_params(
model_name, override_params)
self.padding_type = padding_type
self.use_se = use_se
self._ef = ExtractFeatures(
3,
self._block_args,
self._global_params,
self.padding_type,
self.use_se,
model_name=self.name)
output_channels = round_filters(1280, self._global_params)
if name == "b0_small" or name == "b0" or name == "b1":
oup = 320
elif name == "b2":
oup = 352
elif name == "b3":
oup = 384
elif name == "b4":
oup = 448
elif name == "b5":
oup = 512
elif name == "b6":
oup = 576
elif name == "b7":
oup = 640
self._conv = ConvBNLayer(
oup,
1,
output_channels,
bn_act="swish",
padding_type=self.padding_type,
name="",
conv_name="_conv_head",
bn_name="_bn1",
model_name=self.name,
cur_stage=7)
self._pool = AdaptiveAvgPool2D(1)
if self._global_params.dropout_rate:
self._drop = Dropout(
p=self._global_params.dropout_rate, mode="upscale_in_train")
param_attr, bias_attr = init_fc_layer("_fc")
self._fc = Linear(
output_channels,
class_num,
weight_attr=param_attr,
bias_attr=bias_attr)
def forward(self, inputs):
x = self._ef(inputs)
x = self._conv(x)
x = self._pool(x)
if self._global_params.dropout_rate:
x = self._drop(x)
x = paddle.squeeze(x, axis=[2, 3])
x = self._fc(x)
return x
def _load_pretrained(pretrained, model, model_url, use_ssld=False):
if pretrained is False:
pass
elif pretrained is True:
load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
elif isinstance(pretrained, str):
load_dygraph_pretrain(model, pretrained)
else:
raise RuntimeError(
"pretrained type is not available. Please use `string` or `boolean` type."
)
def EfficientNetB0_small(padding_type='DYNAMIC',
override_params=None,
use_se=False,
pretrained=False,
use_ssld=False,
**kwargs):
model = EfficientNet(
name='b0',
padding_type=padding_type,
override_params=override_params,
use_se=use_se,
**kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["EfficientNetB0_small"])
return model
def EfficientNetB0(padding_type='SAME',
override_params=None,
use_se=True,
pretrained=False,
use_ssld=False,
**kwargs):
model = EfficientNet(
name='b0',
padding_type=padding_type,
override_params=override_params,
use_se=use_se,
**kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["EfficientNetB0"])
return model
def EfficientNetB1(padding_type='SAME',
override_params=None,
use_se=True,
pretrained=False,
use_ssld=False,
**kwargs):
model = EfficientNet(
name='b1',
padding_type=padding_type,
override_params=override_params,
use_se=use_se,
**kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["EfficientNetB1"])
return model
def EfficientNetB2(padding_type='SAME',
override_params=None,
use_se=True,
pretrained=False,
use_ssld=False,
**kwargs):
model = EfficientNet(
name='b2',
padding_type=padding_type,
override_params=override_params,
use_se=use_se,
**kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["EfficientNetB2"])
return model
def EfficientNetB3(padding_type='SAME',
override_params=None,
use_se=True,
pretrained=False,
use_ssld=False,
**kwargs):
model = EfficientNet(
name='b3',
padding_type=padding_type,
override_params=override_params,
use_se=use_se,
**kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["EfficientNetB3"])
return model
def EfficientNetB4(padding_type='SAME',
override_params=None,
use_se=True,
pretrained=False,
use_ssld=False,
**kwargs):
model = EfficientNet(
name='b4',
padding_type=padding_type,
override_params=override_params,
use_se=use_se,
**kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["EfficientNetB4"])
return model
def EfficientNetB5(padding_type='SAME',
override_params=None,
use_se=True,
pretrained=False,
use_ssld=False,
**kwargs):
model = EfficientNet(
name='b5',
padding_type=padding_type,
override_params=override_params,
use_se=use_se,
**kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["EfficientNetB5"])
return model
def EfficientNetB6(padding_type='SAME',
override_params=None,
use_se=True,
pretrained=False,
use_ssld=False,
**kwargs):
model = EfficientNet(
name='b6',
padding_type=padding_type,
override_params=override_params,
use_se=use_se,
**kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["EfficientNetB6"])
return model
def EfficientNetB7(padding_type='SAME',
override_params=None,
use_se=True,
pretrained=False,
use_ssld=False,
**kwargs):
model = EfficientNet(
name='b7',
padding_type=padding_type,
override_params=override_params,
use_se=use_se,
**kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["EfficientNetB7"])
return model

@ -0,0 +1,363 @@
# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Code was based on https://github.com/huawei-noah/CV-Backbones/tree/master/ghostnet_pytorch
import math
import paddle
from paddle import ParamAttr
import paddle.nn as nn
import paddle.nn.functional as F
from paddle.nn import Conv2D, BatchNorm, AdaptiveAvgPool2D, Linear
from paddle.regularizer import L2Decay
from paddle.nn.initializer import Uniform, KaimingNormal
from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
MODEL_URLS = {
"GhostNet_x0_5":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/GhostNet_x0_5_pretrained.pdparams",
"GhostNet_x1_0":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/GhostNet_x1_0_pretrained.pdparams",
"GhostNet_x1_3":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/GhostNet_x1_3_pretrained.pdparams",
}
__all__ = list(MODEL_URLS.keys())
class ConvBNLayer(nn.Layer):
def __init__(self,
in_channels,
out_channels,
kernel_size,
stride=1,
groups=1,
act="relu",
name=None):
super(ConvBNLayer, self).__init__()
self._conv = Conv2D(
in_channels=in_channels,
out_channels=out_channels,
kernel_size=kernel_size,
stride=stride,
padding=(kernel_size - 1) // 2,
groups=groups,
weight_attr=ParamAttr(
initializer=KaimingNormal(), name=name + "_weights"),
bias_attr=False)
bn_name = name + "_bn"
self._batch_norm = BatchNorm(
num_channels=out_channels,
act=act,
param_attr=ParamAttr(
name=bn_name + "_scale", regularizer=L2Decay(0.0)),
bias_attr=ParamAttr(
name=bn_name + "_offset", regularizer=L2Decay(0.0)),
moving_mean_name=bn_name + "_mean",
moving_variance_name=bn_name + "_variance")
def forward(self, inputs):
y = self._conv(inputs)
y = self._batch_norm(y)
return y
class SEBlock(nn.Layer):
def __init__(self, num_channels, reduction_ratio=4, name=None):
super(SEBlock, self).__init__()
self.pool2d_gap = AdaptiveAvgPool2D(1)
self._num_channels = num_channels
stdv = 1.0 / math.sqrt(num_channels * 1.0)
med_ch = num_channels // reduction_ratio
self.squeeze = Linear(
num_channels,
med_ch,
weight_attr=ParamAttr(
initializer=Uniform(-stdv, stdv), name=name + "_1_weights"),
bias_attr=ParamAttr(name=name + "_1_offset"))
stdv = 1.0 / math.sqrt(med_ch * 1.0)
self.excitation = Linear(
med_ch,
num_channels,
weight_attr=ParamAttr(
initializer=Uniform(-stdv, stdv), name=name + "_2_weights"),
bias_attr=ParamAttr(name=name + "_2_offset"))
def forward(self, inputs):
pool = self.pool2d_gap(inputs)
pool = paddle.squeeze(pool, axis=[2, 3])
squeeze = self.squeeze(pool)
squeeze = F.relu(squeeze)
excitation = self.excitation(squeeze)
excitation = paddle.clip(x=excitation, min=0, max=1)
excitation = paddle.unsqueeze(excitation, axis=[2, 3])
out = paddle.multiply(inputs, excitation)
return out
class GhostModule(nn.Layer):
def __init__(self,
in_channels,
output_channels,
kernel_size=1,
ratio=2,
dw_size=3,
stride=1,
relu=True,
name=None):
super(GhostModule, self).__init__()
init_channels = int(math.ceil(output_channels / ratio))
new_channels = int(init_channels * (ratio - 1))
self.primary_conv = ConvBNLayer(
in_channels=in_channels,
out_channels=init_channels,
kernel_size=kernel_size,
stride=stride,
groups=1,
act="relu" if relu else None,
name=name + "_primary_conv")
self.cheap_operation = ConvBNLayer(
in_channels=init_channels,
out_channels=new_channels,
kernel_size=dw_size,
stride=1,
groups=init_channels,
act="relu" if relu else None,
name=name + "_cheap_operation")
def forward(self, inputs):
x = self.primary_conv(inputs)
y = self.cheap_operation(x)
out = paddle.concat([x, y], axis=1)
return out
class GhostBottleneck(nn.Layer):
def __init__(self,
in_channels,
hidden_dim,
output_channels,
kernel_size,
stride,
use_se,
name=None):
super(GhostBottleneck, self).__init__()
self._stride = stride
self._use_se = use_se
self._num_channels = in_channels
self._output_channels = output_channels
self.ghost_module_1 = GhostModule(
in_channels=in_channels,
output_channels=hidden_dim,
kernel_size=1,
stride=1,
relu=True,
name=name + "_ghost_module_1")
if stride == 2:
self.depthwise_conv = ConvBNLayer(
in_channels=hidden_dim,
out_channels=hidden_dim,
kernel_size=kernel_size,
stride=stride,
groups=hidden_dim,
act=None,
name=name +
"_depthwise_depthwise" # looks strange due to an old typo, will be fixed later.
)
if use_se:
self.se_block = SEBlock(num_channels=hidden_dim, name=name + "_se")
self.ghost_module_2 = GhostModule(
in_channels=hidden_dim,
output_channels=output_channels,
kernel_size=1,
relu=False,
name=name + "_ghost_module_2")
if stride != 1 or in_channels != output_channels:
self.shortcut_depthwise = ConvBNLayer(
in_channels=in_channels,
out_channels=in_channels,
kernel_size=kernel_size,
stride=stride,
groups=in_channels,
act=None,
name=name +
"_shortcut_depthwise_depthwise" # looks strange due to an old typo, will be fixed later.
)
self.shortcut_conv = ConvBNLayer(
in_channels=in_channels,
out_channels=output_channels,
kernel_size=1,
stride=1,
groups=1,
act=None,
name=name + "_shortcut_conv")
def forward(self, inputs):
x = self.ghost_module_1(inputs)
if self._stride == 2:
x = self.depthwise_conv(x)
if self._use_se:
x = self.se_block(x)
x = self.ghost_module_2(x)
if self._stride == 1 and self._num_channels == self._output_channels:
shortcut = inputs
else:
shortcut = self.shortcut_depthwise(inputs)
shortcut = self.shortcut_conv(shortcut)
return paddle.add(x=x, y=shortcut)
class GhostNet(nn.Layer):
def __init__(self, scale, class_num=1000):
super(GhostNet, self).__init__()
self.cfgs = [
# k, t, c, SE, s
[3, 16, 16, 0, 1],
[3, 48, 24, 0, 2],
[3, 72, 24, 0, 1],
[5, 72, 40, 1, 2],
[5, 120, 40, 1, 1],
[3, 240, 80, 0, 2],
[3, 200, 80, 0, 1],
[3, 184, 80, 0, 1],
[3, 184, 80, 0, 1],
[3, 480, 112, 1, 1],
[3, 672, 112, 1, 1],
[5, 672, 160, 1, 2],
[5, 960, 160, 0, 1],
[5, 960, 160, 1, 1],
[5, 960, 160, 0, 1],
[5, 960, 160, 1, 1]
]
self.scale = scale
output_channels = int(self._make_divisible(16 * self.scale, 4))
self.conv1 = ConvBNLayer(
in_channels=3,
out_channels=output_channels,
kernel_size=3,
stride=2,
groups=1,
act="relu",
name="conv1")
# build inverted residual blocks
idx = 0
self.ghost_bottleneck_list = []
for k, exp_size, c, use_se, s in self.cfgs:
in_channels = output_channels
output_channels = int(self._make_divisible(c * self.scale, 4))
hidden_dim = int(self._make_divisible(exp_size * self.scale, 4))
ghost_bottleneck = self.add_sublayer(
name="_ghostbottleneck_" + str(idx),
sublayer=GhostBottleneck(
in_channels=in_channels,
hidden_dim=hidden_dim,
output_channels=output_channels,
kernel_size=k,
stride=s,
use_se=use_se,
name="_ghostbottleneck_" + str(idx)))
self.ghost_bottleneck_list.append(ghost_bottleneck)
idx += 1
# build last several layers
in_channels = output_channels
output_channels = int(self._make_divisible(exp_size * self.scale, 4))
self.conv_last = ConvBNLayer(
in_channels=in_channels,
out_channels=output_channels,
kernel_size=1,
stride=1,
groups=1,
act="relu",
name="conv_last")
self.pool2d_gap = AdaptiveAvgPool2D(1)
in_channels = output_channels
self._fc0_output_channels = 1280
self.fc_0 = ConvBNLayer(
in_channels=in_channels,
out_channels=self._fc0_output_channels,
kernel_size=1,
stride=1,
act="relu",
name="fc_0")
self.dropout = nn.Dropout(p=0.2)
stdv = 1.0 / math.sqrt(self._fc0_output_channels * 1.0)
self.fc_1 = Linear(
self._fc0_output_channels,
class_num,
weight_attr=ParamAttr(
name="fc_1_weights", initializer=Uniform(-stdv, stdv)),
bias_attr=ParamAttr(name="fc_1_offset"))
def forward(self, inputs):
x = self.conv1(inputs)
for ghost_bottleneck in self.ghost_bottleneck_list:
x = ghost_bottleneck(x)
x = self.conv_last(x)
x = self.pool2d_gap(x)
x = self.fc_0(x)
x = self.dropout(x)
x = paddle.reshape(x, shape=[-1, self._fc0_output_channels])
x = self.fc_1(x)
return x
def _make_divisible(self, v, divisor, min_value=None):
"""
This function is taken from the original tf repo.
It ensures that all layers have a channel number that is divisible by 8
It can be seen here:
https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py
"""
if min_value is None:
min_value = divisor
new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
# Make sure that round down does not go down by more than 10%.
if new_v < 0.9 * v:
new_v += divisor
return new_v
def _load_pretrained(pretrained, model, model_url, use_ssld=False):
if pretrained is False:
pass
elif pretrained is True:
load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
elif isinstance(pretrained, str):
load_dygraph_pretrain(model, pretrained)
else:
raise RuntimeError(
"pretrained type is not available. Please use `string` or `boolean` type."
)
def GhostNet_x0_5(pretrained=False, use_ssld=False, **kwargs):
model = GhostNet(scale=0.5, **kwargs)
_load_pretrained(
pretrained, model, MODEL_URLS["GhostNet_x0_5"], use_ssld=use_ssld)
return model
def GhostNet_x1_0(pretrained=False, use_ssld=False, **kwargs):
model = GhostNet(scale=1.0, **kwargs)
_load_pretrained(
pretrained, model, MODEL_URLS["GhostNet_x1_0"], use_ssld=use_ssld)
return model
def GhostNet_x1_3(pretrained=False, use_ssld=False, **kwargs):
model = GhostNet(scale=1.3, **kwargs)
_load_pretrained(
pretrained, model, MODEL_URLS["GhostNet_x1_3"], use_ssld=use_ssld)
return model

@ -0,0 +1,229 @@
import paddle
from paddle import ParamAttr
import paddle.nn as nn
import paddle.nn.functional as F
from paddle.nn import Conv2D, BatchNorm, Linear, Dropout
from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D
from paddle.nn.initializer import Uniform
import math
from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
MODEL_URLS = {
"GoogLeNet":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/GoogLeNet_pretrained.pdparams",
}
__all__ = list(MODEL_URLS.keys())
def xavier(channels, filter_size, name):
stdv = (3.0 / (filter_size**2 * channels))**0.5
param_attr = ParamAttr(
initializer=Uniform(-stdv, stdv), name=name + "_weights")
return param_attr
class ConvLayer(nn.Layer):
def __init__(self,
num_channels,
num_filters,
filter_size,
stride=1,
groups=1,
act=None,
name=None):
super(ConvLayer, self).__init__()
self._conv = Conv2D(
in_channels=num_channels,
out_channels=num_filters,
kernel_size=filter_size,
stride=stride,
padding=(filter_size - 1) // 2,
groups=groups,
weight_attr=ParamAttr(name=name + "_weights"),
bias_attr=False)
def forward(self, inputs):
y = self._conv(inputs)
return y
class Inception(nn.Layer):
def __init__(self,
input_channels,
output_channels,
filter1,
filter3R,
filter3,
filter5R,
filter5,
proj,
name=None):
super(Inception, self).__init__()
self._conv1 = ConvLayer(
input_channels, filter1, 1, name="inception_" + name + "_1x1")
self._conv3r = ConvLayer(
input_channels,
filter3R,
1,
name="inception_" + name + "_3x3_reduce")
self._conv3 = ConvLayer(
filter3R, filter3, 3, name="inception_" + name + "_3x3")
self._conv5r = ConvLayer(
input_channels,
filter5R,
1,
name="inception_" + name + "_5x5_reduce")
self._conv5 = ConvLayer(
filter5R, filter5, 5, name="inception_" + name + "_5x5")
self._pool = MaxPool2D(kernel_size=3, stride=1, padding=1)
self._convprj = ConvLayer(
input_channels, proj, 1, name="inception_" + name + "_3x3_proj")
def forward(self, inputs):
conv1 = self._conv1(inputs)
conv3r = self._conv3r(inputs)
conv3 = self._conv3(conv3r)
conv5r = self._conv5r(inputs)
conv5 = self._conv5(conv5r)
pool = self._pool(inputs)
convprj = self._convprj(pool)
cat = paddle.concat([conv1, conv3, conv5, convprj], axis=1)
cat = F.relu(cat)
return cat
class GoogLeNetDY(nn.Layer):
def __init__(self, class_num=1000):
super(GoogLeNetDY, self).__init__()
self._conv = ConvLayer(3, 64, 7, 2, name="conv1")
self._pool = MaxPool2D(kernel_size=3, stride=2)
self._conv_1 = ConvLayer(64, 64, 1, name="conv2_1x1")
self._conv_2 = ConvLayer(64, 192, 3, name="conv2_3x3")
self._ince3a = Inception(
192, 192, 64, 96, 128, 16, 32, 32, name="ince3a")
self._ince3b = Inception(
256, 256, 128, 128, 192, 32, 96, 64, name="ince3b")
self._ince4a = Inception(
480, 480, 192, 96, 208, 16, 48, 64, name="ince4a")
self._ince4b = Inception(
512, 512, 160, 112, 224, 24, 64, 64, name="ince4b")
self._ince4c = Inception(
512, 512, 128, 128, 256, 24, 64, 64, name="ince4c")
self._ince4d = Inception(
512, 512, 112, 144, 288, 32, 64, 64, name="ince4d")
self._ince4e = Inception(
528, 528, 256, 160, 320, 32, 128, 128, name="ince4e")
self._ince5a = Inception(
832, 832, 256, 160, 320, 32, 128, 128, name="ince5a")
self._ince5b = Inception(
832, 832, 384, 192, 384, 48, 128, 128, name="ince5b")
self._pool_5 = AdaptiveAvgPool2D(1)
self._drop = Dropout(p=0.4, mode="downscale_in_infer")
self._fc_out = Linear(
1024,
class_num,
weight_attr=xavier(1024, 1, "out"),
bias_attr=ParamAttr(name="out_offset"))
self._pool_o1 = AvgPool2D(kernel_size=5, stride=3)
self._conv_o1 = ConvLayer(512, 128, 1, name="conv_o1")
self._fc_o1 = Linear(
1152,
1024,
weight_attr=xavier(2048, 1, "fc_o1"),
bias_attr=ParamAttr(name="fc_o1_offset"))
self._drop_o1 = Dropout(p=0.7, mode="downscale_in_infer")
self._out1 = Linear(
1024,
class_num,
weight_attr=xavier(1024, 1, "out1"),
bias_attr=ParamAttr(name="out1_offset"))
self._pool_o2 = AvgPool2D(kernel_size=5, stride=3)
self._conv_o2 = ConvLayer(528, 128, 1, name="conv_o2")
self._fc_o2 = Linear(
1152,
1024,
weight_attr=xavier(2048, 1, "fc_o2"),
bias_attr=ParamAttr(name="fc_o2_offset"))
self._drop_o2 = Dropout(p=0.7, mode="downscale_in_infer")
self._out2 = Linear(
1024,
class_num,
weight_attr=xavier(1024, 1, "out2"),
bias_attr=ParamAttr(name="out2_offset"))
def forward(self, inputs):
x = self._conv(inputs)
x = self._pool(x)
x = self._conv_1(x)
x = self._conv_2(x)
x = self._pool(x)
x = self._ince3a(x)
x = self._ince3b(x)
x = self._pool(x)
ince4a = self._ince4a(x)
x = self._ince4b(ince4a)
x = self._ince4c(x)
ince4d = self._ince4d(x)
x = self._ince4e(ince4d)
x = self._pool(x)
x = self._ince5a(x)
ince5b = self._ince5b(x)
x = self._pool_5(ince5b)
x = self._drop(x)
x = paddle.squeeze(x, axis=[2, 3])
out = self._fc_out(x)
x = self._pool_o1(ince4a)
x = self._conv_o1(x)
x = paddle.flatten(x, start_axis=1, stop_axis=-1)
x = self._fc_o1(x)
x = F.relu(x)
x = self._drop_o1(x)
out1 = self._out1(x)
x = self._pool_o2(ince4d)
x = self._conv_o2(x)
x = paddle.flatten(x, start_axis=1, stop_axis=-1)
x = self._fc_o2(x)
x = self._drop_o2(x)
out2 = self._out2(x)
return [out, out1, out2]
def _load_pretrained(pretrained, model, model_url, use_ssld=False):
if pretrained is False:
pass
elif pretrained is True:
load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
elif isinstance(pretrained, str):
load_dygraph_pretrain(model, pretrained)
else:
raise RuntimeError(
"pretrained type is not available. Please use `string` or `boolean` type."
)
def GoogLeNet(pretrained=False, use_ssld=False, **kwargs):
model = GoogLeNetDY(**kwargs)
_load_pretrained(
pretrained, model, MODEL_URLS["GoogLeNet"], use_ssld=use_ssld)
return model

@ -0,0 +1,693 @@
# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Code was based on https://github.com/Meituan-AutoML/Twins
from functools import partial
import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from paddle.regularizer import L2Decay
from .vision_transformer import trunc_normal_, normal_, zeros_, ones_, to_2tuple, DropPath, Identity, Mlp
from .vision_transformer import Block as ViTBlock
from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
MODEL_URLS = {
"pcpvt_small":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/pcpvt_small_pretrained.pdparams",
"pcpvt_base":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/pcpvt_base_pretrained.pdparams",
"pcpvt_large":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/pcpvt_large_pretrained.pdparams",
"alt_gvt_small":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/alt_gvt_small_pretrained.pdparams",
"alt_gvt_base":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/alt_gvt_base_pretrained.pdparams",
"alt_gvt_large":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/alt_gvt_large_pretrained.pdparams"
}
__all__ = list(MODEL_URLS.keys())
class GroupAttention(nn.Layer):
"""LSA: self attention within a group.
"""
def __init__(self,
dim,
num_heads=8,
qkv_bias=False,
qk_scale=None,
attn_drop=0.,
proj_drop=0.,
ws=1):
super().__init__()
if ws == 1:
raise Exception("ws {ws} should not be 1")
if dim % num_heads != 0:
raise Exception(
"dim {dim} should be divided by num_heads {num_heads}.")
self.dim = dim
self.num_heads = num_heads
head_dim = dim // num_heads
self.scale = qk_scale or head_dim**-0.5
self.qkv = nn.Linear(dim, dim * 3, bias_attr=qkv_bias)
self.attn_drop = nn.Dropout(attn_drop)
self.proj = nn.Linear(dim, dim)
self.proj_drop = nn.Dropout(proj_drop)
self.ws = ws
def forward(self, x, H, W):
B, N, C = x.shape
h_group, w_group = H // self.ws, W // self.ws
total_groups = h_group * w_group
x = x.reshape([B, h_group, self.ws, w_group, self.ws, C]).transpose(
[0, 1, 3, 2, 4, 5])
qkv = self.qkv(x).reshape([
B, total_groups, self.ws**2, 3, self.num_heads, C // self.num_heads
]).transpose([3, 0, 1, 4, 2, 5])
q, k, v = qkv[0], qkv[1], qkv[2]
attn = paddle.matmul(q, k.transpose([0, 1, 2, 4, 3])) * self.scale
attn = nn.Softmax(axis=-1)(attn)
attn = self.attn_drop(attn)
attn = paddle.matmul(attn, v).transpose([0, 1, 3, 2, 4]).reshape(
[B, h_group, w_group, self.ws, self.ws, C])
x = attn.transpose([0, 1, 3, 2, 4, 5]).reshape([B, N, C])
x = self.proj(x)
x = self.proj_drop(x)
return x
class Attention(nn.Layer):
"""GSA: using a key to summarize the information for a group to be efficient.
"""
def __init__(self,
dim,
num_heads=8,
qkv_bias=False,
qk_scale=None,
attn_drop=0.,
proj_drop=0.,
sr_ratio=1):
super().__init__()
assert dim % num_heads == 0, f"dim {dim} should be divided by num_heads {num_heads}."
self.dim = dim
self.num_heads = num_heads
head_dim = dim // num_heads
self.scale = qk_scale or head_dim**-0.5
self.q = nn.Linear(dim, dim, bias_attr=qkv_bias)
self.kv = nn.Linear(dim, dim * 2, bias_attr=qkv_bias)
self.attn_drop = nn.Dropout(attn_drop)
self.proj = nn.Linear(dim, dim)
self.proj_drop = nn.Dropout(proj_drop)
self.sr_ratio = sr_ratio
if sr_ratio > 1:
self.sr = nn.Conv2D(
dim, dim, kernel_size=sr_ratio, stride=sr_ratio)
self.norm = nn.LayerNorm(dim)
def forward(self, x, H, W):
B, N, C = x.shape
q = self.q(x).reshape(
[B, N, self.num_heads, C // self.num_heads]).transpose(
[0, 2, 1, 3])
if self.sr_ratio > 1:
x_ = x.transpose([0, 2, 1]).reshape([B, C, H, W])
tmp_n = H * W // self.sr_ratio**2
x_ = self.sr(x_).reshape([B, C, tmp_n]).transpose([0, 2, 1])
x_ = self.norm(x_)
kv = self.kv(x_).reshape(
[B, tmp_n, 2, self.num_heads, C // self.num_heads]).transpose(
[2, 0, 3, 1, 4])
else:
kv = self.kv(x).reshape(
[B, N, 2, self.num_heads, C // self.num_heads]).transpose(
[2, 0, 3, 1, 4])
k, v = kv[0], kv[1]
attn = paddle.matmul(q, k.transpose([0, 1, 3, 2])) * self.scale
attn = nn.Softmax(axis=-1)(attn)
attn = self.attn_drop(attn)
x = paddle.matmul(attn, v).transpose([0, 2, 1, 3]).reshape([B, N, C])
x = self.proj(x)
x = self.proj_drop(x)
return x
class Block(nn.Layer):
def __init__(self,
dim,
num_heads,
mlp_ratio=4.,
qkv_bias=False,
qk_scale=None,
drop=0.,
attn_drop=0.,
drop_path=0.,
act_layer=nn.GELU,
norm_layer=nn.LayerNorm,
sr_ratio=1):
super().__init__()
self.norm1 = norm_layer(dim)
self.attn = Attention(
dim,
num_heads=num_heads,
qkv_bias=qkv_bias,
qk_scale=qk_scale,
attn_drop=attn_drop,
proj_drop=drop,
sr_ratio=sr_ratio)
self.drop_path = DropPath(drop_path) if drop_path > 0. else Identity()
self.norm2 = norm_layer(dim)
mlp_hidden_dim = int(dim * mlp_ratio)
self.mlp = Mlp(in_features=dim,
hidden_features=mlp_hidden_dim,
act_layer=act_layer,
drop=drop)
def forward(self, x, H, W):
x = x + self.drop_path(self.attn(self.norm1(x), H, W))
x = x + self.drop_path(self.mlp(self.norm2(x)))
return x
class SBlock(ViTBlock):
def __init__(self,
dim,
num_heads,
mlp_ratio=4.,
qkv_bias=False,
qk_scale=None,
drop=0.,
attn_drop=0.,
drop_path=0.,
act_layer=nn.GELU,
norm_layer=nn.LayerNorm,
sr_ratio=1):
super().__init__(dim, num_heads, mlp_ratio, qkv_bias, qk_scale, drop,
attn_drop, drop_path, act_layer, norm_layer)
def forward(self, x, H, W):
return super().forward(x)
class GroupBlock(ViTBlock):
def __init__(self,
dim,
num_heads,
mlp_ratio=4.,
qkv_bias=False,
qk_scale=None,
drop=0.,
attn_drop=0.,
drop_path=0.,
act_layer=nn.GELU,
norm_layer=nn.LayerNorm,
sr_ratio=1,
ws=1):
super().__init__(dim, num_heads, mlp_ratio, qkv_bias, qk_scale, drop,
attn_drop, drop_path, act_layer, norm_layer)
del self.attn
if ws == 1:
self.attn = Attention(dim, num_heads, qkv_bias, qk_scale,
attn_drop, drop, sr_ratio)
else:
self.attn = GroupAttention(dim, num_heads, qkv_bias, qk_scale,
attn_drop, drop, ws)
def forward(self, x, H, W):
x = x + self.drop_path(self.attn(self.norm1(x), H, W))
x = x + self.drop_path(self.mlp(self.norm2(x)))
return x
class PatchEmbed(nn.Layer):
""" Image to Patch Embedding.
"""
def __init__(self, img_size=224, patch_size=16, in_chans=3, embed_dim=768):
super().__init__()
if img_size % patch_size != 0:
raise Exception(
f"img_size {img_size} should be divided by patch_size {patch_size}."
)
img_size = to_2tuple(img_size)
patch_size = to_2tuple(patch_size)
self.img_size = img_size
self.patch_size = patch_size
self.H, self.W = img_size[0] // patch_size[0], img_size[
1] // patch_size[1]
self.num_patches = self.H * self.W
self.proj = nn.Conv2D(
in_chans, embed_dim, kernel_size=patch_size, stride=patch_size)
self.norm = nn.LayerNorm(embed_dim)
def forward(self, x):
B, C, H, W = x.shape
x = self.proj(x).flatten(2).transpose([0, 2, 1])
x = self.norm(x)
H, W = H // self.patch_size[0], W // self.patch_size[1]
return x, (H, W)
# borrow from PVT https://github.com/whai362/PVT.git
class PyramidVisionTransformer(nn.Layer):
def __init__(self,
img_size=224,
patch_size=16,
in_chans=3,
class_num=1000,
embed_dims=[64, 128, 256, 512],
num_heads=[1, 2, 4, 8],
mlp_ratios=[4, 4, 4, 4],
qkv_bias=False,
qk_scale=None,
drop_rate=0.,
attn_drop_rate=0.,
drop_path_rate=0.,
norm_layer=nn.LayerNorm,
depths=[3, 4, 6, 3],
sr_ratios=[8, 4, 2, 1],
block_cls=Block):
super().__init__()
self.class_num = class_num
self.depths = depths
# patch_embed
self.patch_embeds = nn.LayerList()
self.pos_embeds = nn.ParameterList()
self.pos_drops = nn.LayerList()
self.blocks = nn.LayerList()
for i in range(len(depths)):
if i == 0:
self.patch_embeds.append(
PatchEmbed(img_size, patch_size, in_chans, embed_dims[i]))
else:
self.patch_embeds.append(
PatchEmbed(img_size // patch_size // 2**(i - 1), 2,
embed_dims[i - 1], embed_dims[i]))
patch_num = self.patch_embeds[i].num_patches + 1 if i == len(
embed_dims) - 1 else self.patch_embeds[i].num_patches
self.pos_embeds.append(
self.create_parameter(
shape=[1, patch_num, embed_dims[i]],
default_initializer=zeros_))
self.pos_drops.append(nn.Dropout(p=drop_rate))
dpr = [
x.numpy()[0]
for x in paddle.linspace(0, drop_path_rate, sum(depths))
] # stochastic depth decay rule
cur = 0
for k in range(len(depths)):
_block = nn.LayerList([
block_cls(
dim=embed_dims[k],
num_heads=num_heads[k],
mlp_ratio=mlp_ratios[k],
qkv_bias=qkv_bias,
qk_scale=qk_scale,
drop=drop_rate,
attn_drop=attn_drop_rate,
drop_path=dpr[cur + i],
norm_layer=norm_layer,
sr_ratio=sr_ratios[k]) for i in range(depths[k])
])
self.blocks.append(_block)
cur += depths[k]
self.norm = norm_layer(embed_dims[-1])
# cls_token
self.cls_token = self.create_parameter(
shape=[1, 1, embed_dims[-1]],
default_initializer=zeros_,
attr=paddle.ParamAttr(regularizer=L2Decay(0.0)))
# classification head
self.head = nn.Linear(embed_dims[-1],
class_num) if class_num > 0 else Identity()
# init weights
for pos_emb in self.pos_embeds:
trunc_normal_(pos_emb)
self.apply(self._init_weights)
def _init_weights(self, m):
if isinstance(m, nn.Linear):
trunc_normal_(m.weight)
if isinstance(m, nn.Linear) and m.bias is not None:
zeros_(m.bias)
elif isinstance(m, nn.LayerNorm):
zeros_(m.bias)
ones_(m.weight)
def forward_features(self, x):
B = x.shape[0]
for i in range(len(self.depths)):
x, (H, W) = self.patch_embeds[i](x)
if i == len(self.depths) - 1:
cls_tokens = self.cls_token.expand([B, -1, -1])
x = paddle.concat([cls_tokens, x], dim=1)
x = x + self.pos_embeds[i]
x = self.pos_drops[i](x)
for blk in self.blocks[i]:
x = blk(x, H, W)
if i < len(self.depths) - 1:
x = x.reshape([B, H, W, -1]).transpose(
[0, 3, 1, 2]).contiguous()
x = self.norm(x)
return x[:, 0]
def forward(self, x):
x = self.forward_features(x)
x = self.head(x)
return x
# PEG from https://arxiv.org/abs/2102.10882
class PosCNN(nn.Layer):
def __init__(self, in_chans, embed_dim=768, s=1):
super().__init__()
self.proj = nn.Sequential(
nn.Conv2D(
in_chans,
embed_dim,
3,
s,
1,
bias_attr=paddle.ParamAttr(regularizer=L2Decay(0.0)),
groups=embed_dim,
weight_attr=paddle.ParamAttr(regularizer=L2Decay(0.0)), ))
self.s = s
def forward(self, x, H, W):
B, N, C = x.shape
feat_token = x
cnn_feat = feat_token.transpose([0, 2, 1]).reshape([B, C, H, W])
if self.s == 1:
x = self.proj(cnn_feat) + cnn_feat
else:
x = self.proj(cnn_feat)
x = x.flatten(2).transpose([0, 2, 1])
return x
class CPVTV2(PyramidVisionTransformer):
"""
Use useful results from CPVT. PEG and GAP.
Therefore, cls token is no longer required.
PEG is used to encode the absolute position on the fly, which greatly affects the performance when input resolution
changes during the training (such as segmentation, detection)
"""
def __init__(self,
img_size=224,
patch_size=4,
in_chans=3,
class_num=1000,
embed_dims=[64, 128, 256, 512],
num_heads=[1, 2, 4, 8],
mlp_ratios=[4, 4, 4, 4],
qkv_bias=False,
qk_scale=None,
drop_rate=0.,
attn_drop_rate=0.,
drop_path_rate=0.,
norm_layer=nn.LayerNorm,
depths=[3, 4, 6, 3],
sr_ratios=[8, 4, 2, 1],
block_cls=Block):
super().__init__(img_size, patch_size, in_chans, class_num, embed_dims,
num_heads, mlp_ratios, qkv_bias, qk_scale, drop_rate,
attn_drop_rate, drop_path_rate, norm_layer, depths,
sr_ratios, block_cls)
del self.pos_embeds
del self.cls_token
self.pos_block = nn.LayerList(
[PosCNN(embed_dim, embed_dim) for embed_dim in embed_dims])
self.apply(self._init_weights)
def _init_weights(self, m):
import math
if isinstance(m, nn.Linear):
trunc_normal_(m.weight)
if isinstance(m, nn.Linear) and m.bias is not None:
zeros_(m.bias)
elif isinstance(m, nn.LayerNorm):
zeros_(m.bias)
ones_(m.weight)
elif isinstance(m, nn.Conv2D):
fan_out = m._kernel_size[0] * m._kernel_size[1] * m._out_channels
fan_out //= m._groups
normal_(0, math.sqrt(2.0 / fan_out))(m.weight)
if m.bias is not None:
zeros_(m.bias)
elif isinstance(m, nn.BatchNorm2D):
m.weight.data.fill_(1.0)
m.bias.data.zero_()
def forward_features(self, x):
B = x.shape[0]
for i in range(len(self.depths)):
x, (H, W) = self.patch_embeds[i](x)
x = self.pos_drops[i](x)
for j, blk in enumerate(self.blocks[i]):
x = blk(x, H, W)
if j == 0:
x = self.pos_block[i](x, H, W) # PEG here
if i < len(self.depths) - 1:
x = x.reshape([B, H, W, x.shape[-1]]).transpose([0, 3, 1, 2])
x = self.norm(x)
return x.mean(axis=1) # GAP here
class PCPVT(CPVTV2):
def __init__(self,
img_size=224,
patch_size=4,
in_chans=3,
class_num=1000,
embed_dims=[64, 128, 256],
num_heads=[1, 2, 4],
mlp_ratios=[4, 4, 4],
qkv_bias=False,
qk_scale=None,
drop_rate=0.,
attn_drop_rate=0.,
drop_path_rate=0.,
norm_layer=nn.LayerNorm,
depths=[4, 4, 4],
sr_ratios=[4, 2, 1],
block_cls=SBlock):
super().__init__(img_size, patch_size, in_chans, class_num, embed_dims,
num_heads, mlp_ratios, qkv_bias, qk_scale, drop_rate,
attn_drop_rate, drop_path_rate, norm_layer, depths,
sr_ratios, block_cls)
class ALTGVT(PCPVT):
"""
alias Twins-SVT
"""
def __init__(self,
img_size=224,
patch_size=4,
in_chans=3,
class_num=1000,
embed_dims=[64, 128, 256],
num_heads=[1, 2, 4],
mlp_ratios=[4, 4, 4],
qkv_bias=False,
qk_scale=None,
drop_rate=0.,
attn_drop_rate=0.,
drop_path_rate=0.,
norm_layer=nn.LayerNorm,
depths=[4, 4, 4],
sr_ratios=[4, 2, 1],
block_cls=GroupBlock,
wss=[7, 7, 7]):
super().__init__(img_size, patch_size, in_chans, class_num, embed_dims,
num_heads, mlp_ratios, qkv_bias, qk_scale, drop_rate,
attn_drop_rate, drop_path_rate, norm_layer, depths,
sr_ratios, block_cls)
del self.blocks
self.wss = wss
# transformer encoder
dpr = [
x.numpy()[0]
for x in paddle.linspace(0, drop_path_rate, sum(depths))
] # stochastic depth decay rule
cur = 0
self.blocks = nn.LayerList()
for k in range(len(depths)):
_block = nn.LayerList([
block_cls(
dim=embed_dims[k],
num_heads=num_heads[k],
mlp_ratio=mlp_ratios[k],
qkv_bias=qkv_bias,
qk_scale=qk_scale,
drop=drop_rate,
attn_drop=attn_drop_rate,
drop_path=dpr[cur + i],
norm_layer=norm_layer,
sr_ratio=sr_ratios[k],
ws=1 if i % 2 == 1 else wss[k]) for i in range(depths[k])
])
self.blocks.append(_block)
cur += depths[k]
self.apply(self._init_weights)
def _load_pretrained(pretrained, model, model_url, use_ssld=False):
if pretrained is False:
pass
elif pretrained is True:
load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
elif isinstance(pretrained, str):
load_dygraph_pretrain(model, pretrained)
else:
raise RuntimeError(
"pretrained type is not available. Please use `string` or `boolean` type."
)
def pcpvt_small(pretrained=False, use_ssld=False, **kwargs):
model = CPVTV2(
patch_size=4,
embed_dims=[64, 128, 320, 512],
num_heads=[1, 2, 5, 8],
mlp_ratios=[8, 8, 4, 4],
qkv_bias=True,
norm_layer=partial(
nn.LayerNorm, epsilon=1e-6),
depths=[3, 4, 6, 3],
sr_ratios=[8, 4, 2, 1],
**kwargs)
_load_pretrained(
pretrained, model, MODEL_URLS["pcpvt_small"], use_ssld=use_ssld)
return model
def pcpvt_base(pretrained=False, use_ssld=False, **kwargs):
model = CPVTV2(
patch_size=4,
embed_dims=[64, 128, 320, 512],
num_heads=[1, 2, 5, 8],
mlp_ratios=[8, 8, 4, 4],
qkv_bias=True,
norm_layer=partial(
nn.LayerNorm, epsilon=1e-6),
depths=[3, 4, 18, 3],
sr_ratios=[8, 4, 2, 1],
**kwargs)
_load_pretrained(
pretrained, model, MODEL_URLS["pcpvt_base"], use_ssld=use_ssld)
return model
def pcpvt_large(pretrained=False, use_ssld=False, **kwargs):
model = CPVTV2(
patch_size=4,
embed_dims=[64, 128, 320, 512],
num_heads=[1, 2, 5, 8],
mlp_ratios=[8, 8, 4, 4],
qkv_bias=True,
norm_layer=partial(
nn.LayerNorm, epsilon=1e-6),
depths=[3, 8, 27, 3],
sr_ratios=[8, 4, 2, 1],
**kwargs)
_load_pretrained(
pretrained, model, MODEL_URLS["pcpvt_large"], use_ssld=use_ssld)
return model
def alt_gvt_small(pretrained=False, use_ssld=False, **kwargs):
model = ALTGVT(
patch_size=4,
embed_dims=[64, 128, 256, 512],
num_heads=[2, 4, 8, 16],
mlp_ratios=[4, 4, 4, 4],
qkv_bias=True,
norm_layer=partial(
nn.LayerNorm, epsilon=1e-6),
depths=[2, 2, 10, 4],
wss=[7, 7, 7, 7],
sr_ratios=[8, 4, 2, 1],
**kwargs)
_load_pretrained(
pretrained, model, MODEL_URLS["alt_gvt_small"], use_ssld=use_ssld)
return model
def alt_gvt_base(pretrained=False, use_ssld=False, **kwargs):
model = ALTGVT(
patch_size=4,
embed_dims=[96, 192, 384, 768],
num_heads=[3, 6, 12, 24],
mlp_ratios=[4, 4, 4, 4],
qkv_bias=True,
norm_layer=partial(
nn.LayerNorm, epsilon=1e-6),
depths=[2, 2, 18, 2],
wss=[7, 7, 7, 7],
sr_ratios=[8, 4, 2, 1],
**kwargs)
_load_pretrained(
pretrained, model, MODEL_URLS["alt_gvt_base"], use_ssld=use_ssld)
return model
def alt_gvt_large(pretrained=False, use_ssld=False, **kwargs):
model = ALTGVT(
patch_size=4,
embed_dims=[128, 256, 512, 1024],
num_heads=[4, 8, 16, 32],
mlp_ratios=[4, 4, 4, 4],
qkv_bias=True,
norm_layer=partial(
nn.LayerNorm, epsilon=1e-6),
depths=[2, 2, 18, 2],
wss=[7, 7, 7, 7],
sr_ratios=[8, 4, 2, 1],
**kwargs)
_load_pretrained(
pretrained, model, MODEL_URLS["alt_gvt_large"], use_ssld=use_ssld)
return model

@ -0,0 +1,293 @@
# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Code was based on https://github.com/PingoLH/Pytorch-HarDNet
import paddle
import paddle.nn as nn
from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
MODEL_URLS = {
'HarDNet39_ds':
'https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/HarDNet39_ds_pretrained.pdparams',
'HarDNet68_ds':
'https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/HarDNet68_ds_pretrained.pdparams',
'HarDNet68':
'https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/HarDNet68_pretrained.pdparams',
'HarDNet85':
'https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/HarDNet85_pretrained.pdparams'
}
__all__ = MODEL_URLS.keys()
def ConvLayer(in_channels,
out_channels,
kernel_size=3,
stride=1,
bias_attr=False):
layer = nn.Sequential(
('conv', nn.Conv2D(
in_channels,
out_channels,
kernel_size=kernel_size,
stride=stride,
padding=kernel_size // 2,
groups=1,
bias_attr=bias_attr)), ('norm', nn.BatchNorm2D(out_channels)),
('relu', nn.ReLU6()))
return layer
def DWConvLayer(in_channels,
out_channels,
kernel_size=3,
stride=1,
bias_attr=False):
layer = nn.Sequential(
('dwconv', nn.Conv2D(
in_channels,
out_channels,
kernel_size=kernel_size,
stride=stride,
padding=1,
groups=out_channels,
bias_attr=bias_attr)), ('norm', nn.BatchNorm2D(out_channels)))
return layer
def CombConvLayer(in_channels, out_channels, kernel_size=1, stride=1):
layer = nn.Sequential(
('layer1', ConvLayer(
in_channels, out_channels, kernel_size=kernel_size)),
('layer2', DWConvLayer(
out_channels, out_channels, stride=stride)))
return layer
class HarDBlock(nn.Layer):
def __init__(self,
in_channels,
growth_rate,
grmul,
n_layers,
keepBase=False,
residual_out=False,
dwconv=False):
super().__init__()
self.keepBase = keepBase
self.links = []
layers_ = []
self.out_channels = 0 # if upsample else in_channels
for i in range(n_layers):
outch, inch, link = self.get_link(i + 1, in_channels, growth_rate,
grmul)
self.links.append(link)
if dwconv:
layers_.append(CombConvLayer(inch, outch))
else:
layers_.append(ConvLayer(inch, outch))
if (i % 2 == 0) or (i == n_layers - 1):
self.out_channels += outch
# print("Blk out =",self.out_channels)
self.layers = nn.LayerList(layers_)
def get_link(self, layer, base_ch, growth_rate, grmul):
if layer == 0:
return base_ch, 0, []
out_channels = growth_rate
link = []
for i in range(10):
dv = 2**i
if layer % dv == 0:
k = layer - dv
link.append(k)
if i > 0:
out_channels *= grmul
out_channels = int(int(out_channels + 1) / 2) * 2
in_channels = 0
for i in link:
ch, _, _ = self.get_link(i, base_ch, growth_rate, grmul)
in_channels += ch
return out_channels, in_channels, link
def forward(self, x):
layers_ = [x]
for layer in range(len(self.layers)):
link = self.links[layer]
tin = []
for i in link:
tin.append(layers_[i])
if len(tin) > 1:
x = paddle.concat(tin, 1)
else:
x = tin[0]
out = self.layers[layer](x)
layers_.append(out)
t = len(layers_)
out_ = []
for i in range(t):
if (i == 0 and self.keepBase) or (i == t - 1) or (i % 2 == 1):
out_.append(layers_[i])
out = paddle.concat(out_, 1)
return out
class HarDNet(nn.Layer):
def __init__(self,
depth_wise=False,
arch=85,
class_num=1000,
with_pool=True):
super().__init__()
first_ch = [32, 64]
second_kernel = 3
max_pool = True
grmul = 1.7
drop_rate = 0.1
# HarDNet68
ch_list = [128, 256, 320, 640, 1024]
gr = [14, 16, 20, 40, 160]
n_layers = [8, 16, 16, 16, 4]
downSamp = [1, 0, 1, 1, 0]
if arch == 85:
# HarDNet85
first_ch = [48, 96]
ch_list = [192, 256, 320, 480, 720, 1280]
gr = [24, 24, 28, 36, 48, 256]
n_layers = [8, 16, 16, 16, 16, 4]
downSamp = [1, 0, 1, 0, 1, 0]
drop_rate = 0.2
elif arch == 39:
# HarDNet39
first_ch = [24, 48]
ch_list = [96, 320, 640, 1024]
grmul = 1.6
gr = [16, 20, 64, 160]
n_layers = [4, 16, 8, 4]
downSamp = [1, 1, 1, 0]
if depth_wise:
second_kernel = 1
max_pool = False
drop_rate = 0.05
blks = len(n_layers)
self.base = nn.LayerList([])
# First Layer: Standard Conv3x3, Stride=2
self.base.append(
ConvLayer(
in_channels=3,
out_channels=first_ch[0],
kernel_size=3,
stride=2,
bias_attr=False))
# Second Layer
self.base.append(
ConvLayer(
first_ch[0], first_ch[1], kernel_size=second_kernel))
# Maxpooling or DWConv3x3 downsampling
if max_pool:
self.base.append(nn.MaxPool2D(kernel_size=3, stride=2, padding=1))
else:
self.base.append(DWConvLayer(first_ch[1], first_ch[1], stride=2))
# Build all HarDNet blocks
ch = first_ch[1]
for i in range(blks):
blk = HarDBlock(ch, gr[i], grmul, n_layers[i], dwconv=depth_wise)
ch = blk.out_channels
self.base.append(blk)
if i == blks - 1 and arch == 85:
self.base.append(nn.Dropout(0.1))
self.base.append(ConvLayer(ch, ch_list[i], kernel_size=1))
ch = ch_list[i]
if downSamp[i] == 1:
if max_pool:
self.base.append(nn.MaxPool2D(kernel_size=2, stride=2))
else:
self.base.append(DWConvLayer(ch, ch, stride=2))
ch = ch_list[blks - 1]
layers = []
if with_pool:
layers.append(nn.AdaptiveAvgPool2D((1, 1)))
if class_num > 0:
layers.append(nn.Flatten())
layers.append(nn.Dropout(drop_rate))
layers.append(nn.Linear(ch, class_num))
self.base.append(nn.Sequential(*layers))
def forward(self, x):
for layer in self.base:
x = layer(x)
return x
def _load_pretrained(pretrained, model, model_url, use_ssld=False):
if pretrained is False:
pass
elif pretrained is True:
load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
elif isinstance(pretrained, str):
load_dygraph_pretrain(model, pretrained)
else:
raise RuntimeError(
"pretrained type is not available. Please use `string` or `boolean` type."
)
def HarDNet39_ds(pretrained=False, **kwargs):
model = HarDNet(arch=39, depth_wise=True, **kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["HarDNet39_ds"])
return model
def HarDNet68_ds(pretrained=False, **kwargs):
model = HarDNet(arch=68, depth_wise=True, **kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["HarDNet68_ds"])
return model
def HarDNet68(pretrained=False, **kwargs):
model = HarDNet(arch=68, **kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["HarDNet68"])
return model
def HarDNet85(pretrained=False, **kwargs):
model = HarDNet(arch=85, **kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["HarDNet85"])
return model

@ -0,0 +1,477 @@
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle
from paddle import ParamAttr
import paddle.nn as nn
import paddle.nn.functional as F
from paddle.nn import Conv2D, BatchNorm, Linear, Dropout
from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D
from paddle.nn.initializer import Uniform
import math
from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
MODEL_URLS = {
"InceptionV4":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/InceptionV4_pretrained.pdparams"
}
__all__ = list(MODEL_URLS.keys())
class ConvBNLayer(nn.Layer):
def __init__(self,
num_channels,
num_filters,
filter_size,
stride=1,
padding=0,
groups=1,
act='relu',
name=None):
super(ConvBNLayer, self).__init__()
self._conv = Conv2D(
in_channels=num_channels,
out_channels=num_filters,
kernel_size=filter_size,
stride=stride,
padding=padding,
groups=groups,
weight_attr=ParamAttr(name=name + "_weights"),
bias_attr=False)
bn_name = name + "_bn"
self._batch_norm = BatchNorm(
num_filters,
act=act,
param_attr=ParamAttr(name=bn_name + "_scale"),
bias_attr=ParamAttr(name=bn_name + "_offset"),
moving_mean_name=bn_name + '_mean',
moving_variance_name=bn_name + '_variance')
def forward(self, inputs):
y = self._conv(inputs)
y = self._batch_norm(y)
return y
class InceptionStem(nn.Layer):
def __init__(self):
super(InceptionStem, self).__init__()
self._conv_1 = ConvBNLayer(
3, 32, 3, stride=2, act="relu", name="conv1_3x3_s2")
self._conv_2 = ConvBNLayer(32, 32, 3, act="relu", name="conv2_3x3_s1")
self._conv_3 = ConvBNLayer(
32, 64, 3, padding=1, act="relu", name="conv3_3x3_s1")
self._pool = MaxPool2D(kernel_size=3, stride=2, padding=0)
self._conv2 = ConvBNLayer(
64, 96, 3, stride=2, act="relu", name="inception_stem1_3x3_s2")
self._conv1_1 = ConvBNLayer(
160, 64, 1, act="relu", name="inception_stem2_3x3_reduce")
self._conv1_2 = ConvBNLayer(
64, 96, 3, act="relu", name="inception_stem2_3x3")
self._conv2_1 = ConvBNLayer(
160, 64, 1, act="relu", name="inception_stem2_1x7_reduce")
self._conv2_2 = ConvBNLayer(
64,
64, (7, 1),
padding=(3, 0),
act="relu",
name="inception_stem2_1x7")
self._conv2_3 = ConvBNLayer(
64,
64, (1, 7),
padding=(0, 3),
act="relu",
name="inception_stem2_7x1")
self._conv2_4 = ConvBNLayer(
64, 96, 3, act="relu", name="inception_stem2_3x3_2")
self._conv3 = ConvBNLayer(
192, 192, 3, stride=2, act="relu", name="inception_stem3_3x3_s2")
def forward(self, inputs):
conv = self._conv_1(inputs)
conv = self._conv_2(conv)
conv = self._conv_3(conv)
pool1 = self._pool(conv)
conv2 = self._conv2(conv)
concat = paddle.concat([pool1, conv2], axis=1)
conv1 = self._conv1_1(concat)
conv1 = self._conv1_2(conv1)
conv2 = self._conv2_1(concat)
conv2 = self._conv2_2(conv2)
conv2 = self._conv2_3(conv2)
conv2 = self._conv2_4(conv2)
concat = paddle.concat([conv1, conv2], axis=1)
conv1 = self._conv3(concat)
pool1 = self._pool(concat)
concat = paddle.concat([conv1, pool1], axis=1)
return concat
class InceptionA(nn.Layer):
def __init__(self, name):
super(InceptionA, self).__init__()
self._pool = AvgPool2D(kernel_size=3, stride=1, padding=1)
self._conv1 = ConvBNLayer(
384, 96, 1, act="relu", name="inception_a" + name + "_1x1")
self._conv2 = ConvBNLayer(
384, 96, 1, act="relu", name="inception_a" + name + "_1x1_2")
self._conv3_1 = ConvBNLayer(
384, 64, 1, act="relu", name="inception_a" + name + "_3x3_reduce")
self._conv3_2 = ConvBNLayer(
64,
96,
3,
padding=1,
act="relu",
name="inception_a" + name + "_3x3")
self._conv4_1 = ConvBNLayer(
384,
64,
1,
act="relu",
name="inception_a" + name + "_3x3_2_reduce")
self._conv4_2 = ConvBNLayer(
64,
96,
3,
padding=1,
act="relu",
name="inception_a" + name + "_3x3_2")
self._conv4_3 = ConvBNLayer(
96,
96,
3,
padding=1,
act="relu",
name="inception_a" + name + "_3x3_3")
def forward(self, inputs):
pool1 = self._pool(inputs)
conv1 = self._conv1(pool1)
conv2 = self._conv2(inputs)
conv3 = self._conv3_1(inputs)
conv3 = self._conv3_2(conv3)
conv4 = self._conv4_1(inputs)
conv4 = self._conv4_2(conv4)
conv4 = self._conv4_3(conv4)
concat = paddle.concat([conv1, conv2, conv3, conv4], axis=1)
return concat
class ReductionA(nn.Layer):
def __init__(self):
super(ReductionA, self).__init__()
self._pool = MaxPool2D(kernel_size=3, stride=2, padding=0)
self._conv2 = ConvBNLayer(
384, 384, 3, stride=2, act="relu", name="reduction_a_3x3")
self._conv3_1 = ConvBNLayer(
384, 192, 1, act="relu", name="reduction_a_3x3_2_reduce")
self._conv3_2 = ConvBNLayer(
192, 224, 3, padding=1, act="relu", name="reduction_a_3x3_2")
self._conv3_3 = ConvBNLayer(
224, 256, 3, stride=2, act="relu", name="reduction_a_3x3_3")
def forward(self, inputs):
pool1 = self._pool(inputs)
conv2 = self._conv2(inputs)
conv3 = self._conv3_1(inputs)
conv3 = self._conv3_2(conv3)
conv3 = self._conv3_3(conv3)
concat = paddle.concat([pool1, conv2, conv3], axis=1)
return concat
class InceptionB(nn.Layer):
def __init__(self, name=None):
super(InceptionB, self).__init__()
self._pool = AvgPool2D(kernel_size=3, stride=1, padding=1)
self._conv1 = ConvBNLayer(
1024, 128, 1, act="relu", name="inception_b" + name + "_1x1")
self._conv2 = ConvBNLayer(
1024, 384, 1, act="relu", name="inception_b" + name + "_1x1_2")
self._conv3_1 = ConvBNLayer(
1024,
192,
1,
act="relu",
name="inception_b" + name + "_1x7_reduce")
self._conv3_2 = ConvBNLayer(
192,
224, (1, 7),
padding=(0, 3),
act="relu",
name="inception_b" + name + "_1x7")
self._conv3_3 = ConvBNLayer(
224,
256, (7, 1),
padding=(3, 0),
act="relu",
name="inception_b" + name + "_7x1")
self._conv4_1 = ConvBNLayer(
1024,
192,
1,
act="relu",
name="inception_b" + name + "_7x1_2_reduce")
self._conv4_2 = ConvBNLayer(
192,
192, (1, 7),
padding=(0, 3),
act="relu",
name="inception_b" + name + "_1x7_2")
self._conv4_3 = ConvBNLayer(
192,
224, (7, 1),
padding=(3, 0),
act="relu",
name="inception_b" + name + "_7x1_2")
self._conv4_4 = ConvBNLayer(
224,
224, (1, 7),
padding=(0, 3),
act="relu",
name="inception_b" + name + "_1x7_3")
self._conv4_5 = ConvBNLayer(
224,
256, (7, 1),
padding=(3, 0),
act="relu",
name="inception_b" + name + "_7x1_3")
def forward(self, inputs):
pool1 = self._pool(inputs)
conv1 = self._conv1(pool1)
conv2 = self._conv2(inputs)
conv3 = self._conv3_1(inputs)
conv3 = self._conv3_2(conv3)
conv3 = self._conv3_3(conv3)
conv4 = self._conv4_1(inputs)
conv4 = self._conv4_2(conv4)
conv4 = self._conv4_3(conv4)
conv4 = self._conv4_4(conv4)
conv4 = self._conv4_5(conv4)
concat = paddle.concat([conv1, conv2, conv3, conv4], axis=1)
return concat
class ReductionB(nn.Layer):
def __init__(self):
super(ReductionB, self).__init__()
self._pool = MaxPool2D(kernel_size=3, stride=2, padding=0)
self._conv2_1 = ConvBNLayer(
1024, 192, 1, act="relu", name="reduction_b_3x3_reduce")
self._conv2_2 = ConvBNLayer(
192, 192, 3, stride=2, act="relu", name="reduction_b_3x3")
self._conv3_1 = ConvBNLayer(
1024, 256, 1, act="relu", name="reduction_b_1x7_reduce")
self._conv3_2 = ConvBNLayer(
256,
256, (1, 7),
padding=(0, 3),
act="relu",
name="reduction_b_1x7")
self._conv3_3 = ConvBNLayer(
256,
320, (7, 1),
padding=(3, 0),
act="relu",
name="reduction_b_7x1")
self._conv3_4 = ConvBNLayer(
320, 320, 3, stride=2, act="relu", name="reduction_b_3x3_2")
def forward(self, inputs):
pool1 = self._pool(inputs)
conv2 = self._conv2_1(inputs)
conv2 = self._conv2_2(conv2)
conv3 = self._conv3_1(inputs)
conv3 = self._conv3_2(conv3)
conv3 = self._conv3_3(conv3)
conv3 = self._conv3_4(conv3)
concat = paddle.concat([pool1, conv2, conv3], axis=1)
return concat
class InceptionC(nn.Layer):
def __init__(self, name=None):
super(InceptionC, self).__init__()
self._pool = AvgPool2D(kernel_size=3, stride=1, padding=1)
self._conv1 = ConvBNLayer(
1536, 256, 1, act="relu", name="inception_c" + name + "_1x1")
self._conv2 = ConvBNLayer(
1536, 256, 1, act="relu", name="inception_c" + name + "_1x1_2")
self._conv3_0 = ConvBNLayer(
1536, 384, 1, act="relu", name="inception_c" + name + "_1x1_3")
self._conv3_1 = ConvBNLayer(
384,
256, (1, 3),
padding=(0, 1),
act="relu",
name="inception_c" + name + "_1x3")
self._conv3_2 = ConvBNLayer(
384,
256, (3, 1),
padding=(1, 0),
act="relu",
name="inception_c" + name + "_3x1")
self._conv4_0 = ConvBNLayer(
1536, 384, 1, act="relu", name="inception_c" + name + "_1x1_4")
self._conv4_00 = ConvBNLayer(
384,
448, (1, 3),
padding=(0, 1),
act="relu",
name="inception_c" + name + "_1x3_2")
self._conv4_000 = ConvBNLayer(
448,
512, (3, 1),
padding=(1, 0),
act="relu",
name="inception_c" + name + "_3x1_2")
self._conv4_1 = ConvBNLayer(
512,
256, (1, 3),
padding=(0, 1),
act="relu",
name="inception_c" + name + "_1x3_3")
self._conv4_2 = ConvBNLayer(
512,
256, (3, 1),
padding=(1, 0),
act="relu",
name="inception_c" + name + "_3x1_3")
def forward(self, inputs):
pool1 = self._pool(inputs)
conv1 = self._conv1(pool1)
conv2 = self._conv2(inputs)
conv3 = self._conv3_0(inputs)
conv3_1 = self._conv3_1(conv3)
conv3_2 = self._conv3_2(conv3)
conv4 = self._conv4_0(inputs)
conv4 = self._conv4_00(conv4)
conv4 = self._conv4_000(conv4)
conv4_1 = self._conv4_1(conv4)
conv4_2 = self._conv4_2(conv4)
concat = paddle.concat(
[conv1, conv2, conv3_1, conv3_2, conv4_1, conv4_2], axis=1)
return concat
class InceptionV4DY(nn.Layer):
def __init__(self, class_num=1000):
super(InceptionV4DY, self).__init__()
self._inception_stem = InceptionStem()
self._inceptionA_1 = InceptionA(name="1")
self._inceptionA_2 = InceptionA(name="2")
self._inceptionA_3 = InceptionA(name="3")
self._inceptionA_4 = InceptionA(name="4")
self._reductionA = ReductionA()
self._inceptionB_1 = InceptionB(name="1")
self._inceptionB_2 = InceptionB(name="2")
self._inceptionB_3 = InceptionB(name="3")
self._inceptionB_4 = InceptionB(name="4")
self._inceptionB_5 = InceptionB(name="5")
self._inceptionB_6 = InceptionB(name="6")
self._inceptionB_7 = InceptionB(name="7")
self._reductionB = ReductionB()
self._inceptionC_1 = InceptionC(name="1")
self._inceptionC_2 = InceptionC(name="2")
self._inceptionC_3 = InceptionC(name="3")
self.avg_pool = AdaptiveAvgPool2D(1)
self._drop = Dropout(p=0.2, mode="downscale_in_infer")
stdv = 1.0 / math.sqrt(1536 * 1.0)
self.out = Linear(
1536,
class_num,
weight_attr=ParamAttr(
initializer=Uniform(-stdv, stdv), name="final_fc_weights"),
bias_attr=ParamAttr(name="final_fc_offset"))
def forward(self, inputs):
x = self._inception_stem(inputs)
x = self._inceptionA_1(x)
x = self._inceptionA_2(x)
x = self._inceptionA_3(x)
x = self._inceptionA_4(x)
x = self._reductionA(x)
x = self._inceptionB_1(x)
x = self._inceptionB_2(x)
x = self._inceptionB_3(x)
x = self._inceptionB_4(x)
x = self._inceptionB_5(x)
x = self._inceptionB_6(x)
x = self._inceptionB_7(x)
x = self._reductionB(x)
x = self._inceptionC_1(x)
x = self._inceptionC_2(x)
x = self._inceptionC_3(x)
x = self.avg_pool(x)
x = paddle.squeeze(x, axis=[2, 3])
x = self._drop(x)
x = self.out(x)
return x
def _load_pretrained(pretrained, model, model_url, use_ssld=False):
if pretrained is False:
pass
elif pretrained is True:
load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
elif isinstance(pretrained, str):
load_dygraph_pretrain(model, pretrained)
else:
raise RuntimeError(
"pretrained type is not available. Please use `string` or `boolean` type."
)
def InceptionV4(pretrained=False, use_ssld=False, **kwargs):
model = InceptionV4DY(**kwargs)
_load_pretrained(
pretrained, model, MODEL_URLS["InceptionV4"], use_ssld=use_ssld)
return model

@ -0,0 +1,589 @@
# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Code was based on https://github.com/facebookresearch/LeViT
import itertools
import math
import warnings
import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from paddle.nn.initializer import TruncatedNormal, Constant
from paddle.regularizer import L2Decay
from .vision_transformer import trunc_normal_, zeros_, ones_, Identity
from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
MODEL_URLS = {
"LeViT_128S":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/LeViT_128S_pretrained.pdparams",
"LeViT_128":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/LeViT_128_pretrained.pdparams",
"LeViT_192":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/LeViT_192_pretrained.pdparams",
"LeViT_256":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/LeViT_256_pretrained.pdparams",
"LeViT_384":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/LeViT_384_pretrained.pdparams",
}
__all__ = list(MODEL_URLS.keys())
def cal_attention_biases(attention_biases, attention_bias_idxs):
gather_list = []
attention_bias_t = paddle.transpose(attention_biases, (1, 0))
nums = attention_bias_idxs.shape[0]
for idx in range(nums):
gather = paddle.gather(attention_bias_t, attention_bias_idxs[idx])
gather_list.append(gather)
shape0, shape1 = attention_bias_idxs.shape
gather = paddle.concat(gather_list)
return paddle.transpose(gather, (1, 0)).reshape((0, shape0, shape1))
class Conv2d_BN(nn.Sequential):
def __init__(self,
a,
b,
ks=1,
stride=1,
pad=0,
dilation=1,
groups=1,
bn_weight_init=1,
resolution=-10000):
super().__init__()
self.add_sublayer(
'c',
nn.Conv2D(
a, b, ks, stride, pad, dilation, groups, bias_attr=False))
bn = nn.BatchNorm2D(b)
ones_(bn.weight)
zeros_(bn.bias)
self.add_sublayer('bn', bn)
class Linear_BN(nn.Sequential):
def __init__(self, a, b, bn_weight_init=1):
super().__init__()
self.add_sublayer('c', nn.Linear(a, b, bias_attr=False))
bn = nn.BatchNorm1D(b)
if bn_weight_init == 0:
zeros_(bn.weight)
else:
ones_(bn.weight)
zeros_(bn.bias)
self.add_sublayer('bn', bn)
def forward(self, x):
l, bn = self._sub_layers.values()
x = l(x)
return paddle.reshape(bn(x.flatten(0, 1)), x.shape)
class BN_Linear(nn.Sequential):
def __init__(self, a, b, bias=True, std=0.02):
super().__init__()
self.add_sublayer('bn', nn.BatchNorm1D(a))
l = nn.Linear(a, b, bias_attr=bias)
trunc_normal_(l.weight)
if bias:
zeros_(l.bias)
self.add_sublayer('l', l)
def b16(n, activation, resolution=224):
return nn.Sequential(
Conv2d_BN(
3, n // 8, 3, 2, 1, resolution=resolution),
activation(),
Conv2d_BN(
n // 8, n // 4, 3, 2, 1, resolution=resolution // 2),
activation(),
Conv2d_BN(
n // 4, n // 2, 3, 2, 1, resolution=resolution // 4),
activation(),
Conv2d_BN(
n // 2, n, 3, 2, 1, resolution=resolution // 8))
class Residual(nn.Layer):
def __init__(self, m, drop):
super().__init__()
self.m = m
self.drop = drop
def forward(self, x):
if self.training and self.drop > 0:
y = paddle.rand(
shape=[x.shape[0], 1, 1]).__ge__(self.drop).astype("float32")
y = y.divide(paddle.full_like(y, 1 - self.drop))
return paddle.add(x, y)
else:
return paddle.add(x, self.m(x))
class Attention(nn.Layer):
def __init__(self,
dim,
key_dim,
num_heads=8,
attn_ratio=4,
activation=None,
resolution=14):
super().__init__()
self.num_heads = num_heads
self.scale = key_dim**-0.5
self.key_dim = key_dim
self.nh_kd = nh_kd = key_dim * num_heads
self.d = int(attn_ratio * key_dim)
self.dh = int(attn_ratio * key_dim) * num_heads
self.attn_ratio = attn_ratio
self.h = self.dh + nh_kd * 2
self.qkv = Linear_BN(dim, self.h)
self.proj = nn.Sequential(
activation(), Linear_BN(
self.dh, dim, bn_weight_init=0))
points = list(itertools.product(range(resolution), range(resolution)))
N = len(points)
attention_offsets = {}
idxs = []
for p1 in points:
for p2 in points:
offset = (abs(p1[0] - p2[0]), abs(p1[1] - p2[1]))
if offset not in attention_offsets:
attention_offsets[offset] = len(attention_offsets)
idxs.append(attention_offsets[offset])
self.attention_biases = self.create_parameter(
shape=(num_heads, len(attention_offsets)),
default_initializer=zeros_,
attr=paddle.ParamAttr(regularizer=L2Decay(0.0)))
tensor_idxs = paddle.to_tensor(idxs, dtype='int64')
self.register_buffer('attention_bias_idxs',
paddle.reshape(tensor_idxs, [N, N]))
@paddle.no_grad()
def train(self, mode=True):
if mode:
super().train()
else:
super().eval()
if mode and hasattr(self, 'ab'):
del self.ab
else:
self.ab = cal_attention_biases(self.attention_biases,
self.attention_bias_idxs)
def forward(self, x):
self.training = True
B, N, C = x.shape
qkv = self.qkv(x)
qkv = paddle.reshape(qkv,
[B, N, self.num_heads, self.h // self.num_heads])
q, k, v = paddle.split(
qkv, [self.key_dim, self.key_dim, self.d], axis=3)
q = paddle.transpose(q, perm=[0, 2, 1, 3])
k = paddle.transpose(k, perm=[0, 2, 1, 3])
v = paddle.transpose(v, perm=[0, 2, 1, 3])
k_transpose = paddle.transpose(k, perm=[0, 1, 3, 2])
if self.training:
attention_biases = cal_attention_biases(self.attention_biases,
self.attention_bias_idxs)
else:
attention_biases = self.ab
attn = (paddle.matmul(q, k_transpose) * self.scale + attention_biases)
attn = F.softmax(attn)
x = paddle.transpose(paddle.matmul(attn, v), perm=[0, 2, 1, 3])
x = paddle.reshape(x, [B, N, self.dh])
x = self.proj(x)
return x
class Subsample(nn.Layer):
def __init__(self, stride, resolution):
super().__init__()
self.stride = stride
self.resolution = resolution
def forward(self, x):
B, N, C = x.shape
x = paddle.reshape(x, [B, self.resolution, self.resolution, C])
end1, end2 = x.shape[1], x.shape[2]
x = x[:, 0:end1:self.stride, 0:end2:self.stride]
x = paddle.reshape(x, [B, -1, C])
return x
class AttentionSubsample(nn.Layer):
def __init__(self,
in_dim,
out_dim,
key_dim,
num_heads=8,
attn_ratio=2,
activation=None,
stride=2,
resolution=14,
resolution_=7):
super().__init__()
self.num_heads = num_heads
self.scale = key_dim**-0.5
self.key_dim = key_dim
self.nh_kd = nh_kd = key_dim * num_heads
self.d = int(attn_ratio * key_dim)
self.dh = int(attn_ratio * key_dim) * self.num_heads
self.attn_ratio = attn_ratio
self.resolution_ = resolution_
self.resolution_2 = resolution_**2
self.training = True
h = self.dh + nh_kd
self.kv = Linear_BN(in_dim, h)
self.q = nn.Sequential(
Subsample(stride, resolution), Linear_BN(in_dim, nh_kd))
self.proj = nn.Sequential(activation(), Linear_BN(self.dh, out_dim))
self.stride = stride
self.resolution = resolution
points = list(itertools.product(range(resolution), range(resolution)))
points_ = list(
itertools.product(range(resolution_), range(resolution_)))
N = len(points)
N_ = len(points_)
attention_offsets = {}
idxs = []
i = 0
j = 0
for p1 in points_:
i += 1
for p2 in points:
j += 1
size = 1
offset = (abs(p1[0] * stride - p2[0] + (size - 1) / 2),
abs(p1[1] * stride - p2[1] + (size - 1) / 2))
if offset not in attention_offsets:
attention_offsets[offset] = len(attention_offsets)
idxs.append(attention_offsets[offset])
self.attention_biases = self.create_parameter(
shape=(num_heads, len(attention_offsets)),
default_initializer=zeros_,
attr=paddle.ParamAttr(regularizer=L2Decay(0.0)))
tensor_idxs_ = paddle.to_tensor(idxs, dtype='int64')
self.register_buffer('attention_bias_idxs',
paddle.reshape(tensor_idxs_, [N_, N]))
@paddle.no_grad()
def train(self, mode=True):
if mode:
super().train()
else:
super().eval()
if mode and hasattr(self, 'ab'):
del self.ab
else:
self.ab = cal_attention_biases(self.attention_biases,
self.attention_bias_idxs)
def forward(self, x):
self.training = True
B, N, C = x.shape
kv = self.kv(x)
kv = paddle.reshape(kv, [B, N, self.num_heads, -1])
k, v = paddle.split(kv, [self.key_dim, self.d], axis=3)
k = paddle.transpose(k, perm=[0, 2, 1, 3]) # BHNC
v = paddle.transpose(v, perm=[0, 2, 1, 3])
q = paddle.reshape(
self.q(x), [B, self.resolution_2, self.num_heads, self.key_dim])
q = paddle.transpose(q, perm=[0, 2, 1, 3])
if self.training:
attention_biases = cal_attention_biases(self.attention_biases,
self.attention_bias_idxs)
else:
attention_biases = self.ab
attn = (paddle.matmul(
q, paddle.transpose(
k, perm=[0, 1, 3, 2]))) * self.scale + attention_biases
attn = F.softmax(attn)
x = paddle.reshape(
paddle.transpose(
paddle.matmul(attn, v), perm=[0, 2, 1, 3]), [B, -1, self.dh])
x = self.proj(x)
return x
class LeViT(nn.Layer):
""" Vision Transformer with support for patch or hybrid CNN input stage
"""
def __init__(self,
img_size=224,
patch_size=16,
in_chans=3,
class_num=1000,
embed_dim=[192],
key_dim=[64],
depth=[12],
num_heads=[3],
attn_ratio=[2],
mlp_ratio=[2],
hybrid_backbone=None,
down_ops=[],
attention_activation=nn.Hardswish,
mlp_activation=nn.Hardswish,
distillation=True,
drop_path=0):
super().__init__()
self.class_num = class_num
self.num_features = embed_dim[-1]
self.embed_dim = embed_dim
self.distillation = distillation
self.patch_embed = hybrid_backbone
self.blocks = []
down_ops.append([''])
resolution = img_size // patch_size
for i, (ed, kd, dpth, nh, ar, mr, do) in enumerate(
zip(embed_dim, key_dim, depth, num_heads, attn_ratio,
mlp_ratio, down_ops)):
for _ in range(dpth):
self.blocks.append(
Residual(
Attention(
ed,
kd,
nh,
attn_ratio=ar,
activation=attention_activation,
resolution=resolution, ),
drop_path))
if mr > 0:
h = int(ed * mr)
self.blocks.append(
Residual(
nn.Sequential(
Linear_BN(ed, h),
mlp_activation(),
Linear_BN(
h, ed, bn_weight_init=0), ),
drop_path))
if do[0] == 'Subsample':
#('Subsample',key_dim, num_heads, attn_ratio, mlp_ratio, stride)
resolution_ = (resolution - 1) // do[5] + 1
self.blocks.append(
AttentionSubsample(
*embed_dim[i:i + 2],
key_dim=do[1],
num_heads=do[2],
attn_ratio=do[3],
activation=attention_activation,
stride=do[5],
resolution=resolution,
resolution_=resolution_))
resolution = resolution_
if do[4] > 0: # mlp_ratio
h = int(embed_dim[i + 1] * do[4])
self.blocks.append(
Residual(
nn.Sequential(
Linear_BN(embed_dim[i + 1], h),
mlp_activation(),
Linear_BN(
h, embed_dim[i + 1], bn_weight_init=0), ),
drop_path))
self.blocks = nn.Sequential(*self.blocks)
# Classifier head
self.head = BN_Linear(embed_dim[-1],
class_num) if class_num > 0 else Identity()
if distillation:
self.head_dist = BN_Linear(
embed_dim[-1], class_num) if class_num > 0 else Identity()
def forward(self, x):
x = self.patch_embed(x)
x = x.flatten(2)
x = paddle.transpose(x, perm=[0, 2, 1])
x = self.blocks(x)
x = x.mean(1)
x = paddle.reshape(x, [-1, self.embed_dim[-1]])
if self.distillation:
x = self.head(x), self.head_dist(x)
if not self.training:
x = (x[0] + x[1]) / 2
else:
x = self.head(x)
return x
def model_factory(C, D, X, N, drop_path, class_num, distillation):
embed_dim = [int(x) for x in C.split('_')]
num_heads = [int(x) for x in N.split('_')]
depth = [int(x) for x in X.split('_')]
act = nn.Hardswish
model = LeViT(
patch_size=16,
embed_dim=embed_dim,
num_heads=num_heads,
key_dim=[D] * 3,
depth=depth,
attn_ratio=[2, 2, 2],
mlp_ratio=[2, 2, 2],
down_ops=[
#('Subsample',key_dim, num_heads, attn_ratio, mlp_ratio, stride)
['Subsample', D, embed_dim[0] // D, 4, 2, 2],
['Subsample', D, embed_dim[1] // D, 4, 2, 2],
],
attention_activation=act,
mlp_activation=act,
hybrid_backbone=b16(embed_dim[0], activation=act),
class_num=class_num,
drop_path=drop_path,
distillation=distillation)
return model
specification = {
'LeViT_128S': {
'C': '128_256_384',
'D': 16,
'N': '4_6_8',
'X': '2_3_4',
'drop_path': 0
},
'LeViT_128': {
'C': '128_256_384',
'D': 16,
'N': '4_8_12',
'X': '4_4_4',
'drop_path': 0
},
'LeViT_192': {
'C': '192_288_384',
'D': 32,
'N': '3_5_6',
'X': '4_4_4',
'drop_path': 0
},
'LeViT_256': {
'C': '256_384_512',
'D': 32,
'N': '4_6_8',
'X': '4_4_4',
'drop_path': 0
},
'LeViT_384': {
'C': '384_512_768',
'D': 32,
'N': '6_9_12',
'X': '4_4_4',
'drop_path': 0.1
},
}
def _load_pretrained(pretrained, model, model_url, use_ssld=False):
if pretrained is False:
pass
elif pretrained is True:
load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
elif isinstance(pretrained, str):
load_dygraph_pretrain(model, pretrained)
else:
raise RuntimeError(
"pretrained type is not available. Please use `string` or `boolean` type."
)
def LeViT_128S(pretrained=False,
use_ssld=False,
class_num=1000,
distillation=False,
**kwargs):
model = model_factory(
**specification['LeViT_128S'],
class_num=class_num,
distillation=distillation)
_load_pretrained(
pretrained, model, MODEL_URLS["LeViT_128S"], use_ssld=use_ssld)
return model
def LeViT_128(pretrained=False,
use_ssld=False,
class_num=1000,
distillation=False,
**kwargs):
model = model_factory(
**specification['LeViT_128'],
class_num=class_num,
distillation=distillation)
_load_pretrained(
pretrained, model, MODEL_URLS["LeViT_128"], use_ssld=use_ssld)
return model
def LeViT_192(pretrained=False,
use_ssld=False,
class_num=1000,
distillation=False,
**kwargs):
model = model_factory(
**specification['LeViT_192'],
class_num=class_num,
distillation=distillation)
_load_pretrained(
pretrained, model, MODEL_URLS["LeViT_192"], use_ssld=use_ssld)
return model
def LeViT_256(pretrained=False,
use_ssld=False,
class_num=1000,
distillation=False,
**kwargs):
model = model_factory(
**specification['LeViT_256'],
class_num=class_num,
distillation=distillation)
_load_pretrained(
pretrained, model, MODEL_URLS["LeViT_256"], use_ssld=use_ssld)
return model
def LeViT_384(pretrained=False,
use_ssld=False,
class_num=1000,
distillation=False,
**kwargs):
model = model_factory(
**specification['LeViT_384'],
class_num=class_num,
distillation=distillation)
_load_pretrained(
pretrained, model, MODEL_URLS["LeViT_384"], use_ssld=use_ssld)
return model

@ -0,0 +1,815 @@
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
MixNet for ImageNet-1K, implemented in Paddle.
Original paper: 'MixConv: Mixed Depthwise Convolutional Kernels,'
https://arxiv.org/abs/1907.09595.
"""
import os
from inspect import isfunction
from functools import reduce
import paddle
import paddle.nn as nn
from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
MODEL_URLS = {
"MixNet_S":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MixNet_S_pretrained.pdparams",
"MixNet_M":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MixNet_M_pretrained.pdparams",
"MixNet_L":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MixNet_L_pretrained.pdparams"
}
__all__ = list(MODEL_URLS.keys())
class Identity(nn.Layer):
"""
Identity block.
"""
def __init__(self):
super(Identity, self).__init__()
def forward(self, x):
return x
def round_channels(channels, divisor=8):
"""
Round weighted channel number (make divisible operation).
Parameters:
----------
channels : int or float
Original number of channels.
divisor : int, default 8
Alignment value.
Returns:
-------
int
Weighted number of channels.
"""
rounded_channels = max(
int(channels + divisor / 2.0) // divisor * divisor, divisor)
if float(rounded_channels) < 0.9 * channels:
rounded_channels += divisor
return rounded_channels
def get_activation_layer(activation):
"""
Create activation layer from string/function.
Parameters:
----------
activation : function, or str, or nn.Module
Activation function or name of activation function.
Returns:
-------
nn.Module
Activation layer.
"""
assert activation is not None
if isfunction(activation):
return activation()
elif isinstance(activation, str):
if activation == "relu":
return nn.ReLU()
elif activation == "relu6":
return nn.ReLU6()
elif activation == "swish":
return nn.Swish()
elif activation == "hswish":
return nn.Hardswish()
elif activation == "sigmoid":
return nn.Sigmoid()
elif activation == "hsigmoid":
return nn.Hardsigmoid()
elif activation == "identity":
return Identity()
else:
raise NotImplementedError()
else:
assert isinstance(activation, nn.Layer)
return activation
class ConvBlock(nn.Layer):
"""
Standard convolution block with Batch normalization and activation.
Parameters:
----------
in_channels : int
Number of input channels.
out_channels : int
Number of output channels.
kernel_size : int or tuple/list of 2 int
Convolution window size.
stride : int or tuple/list of 2 int
Strides of the convolution.
padding : int, or tuple/list of 2 int, or tuple/list of 4 int
Padding value for convolution layer.
dilation : int or tuple/list of 2 int, default 1
Dilation value for convolution layer.
groups : int, default 1
Number of groups.
bias : bool, default False
Whether the layer uses a bias vector.
use_bn : bool, default True
Whether to use BatchNorm layer.
bn_eps : float, default 1e-5
Small float added to variance in Batch norm.
activation : function or str or None, default nn.ReLU()
Activation function or name of activation function.
"""
def __init__(self,
in_channels,
out_channels,
kernel_size,
stride,
padding,
dilation=1,
groups=1,
bias=False,
use_bn=True,
bn_eps=1e-5,
activation=nn.ReLU()):
super(ConvBlock, self).__init__()
self.activate = (activation is not None)
self.use_bn = use_bn
self.use_pad = (isinstance(padding, (list, tuple)) and
(len(padding) == 4))
if self.use_pad:
self.pad = padding
self.conv = nn.Conv2D(
in_channels=in_channels,
out_channels=out_channels,
kernel_size=kernel_size,
stride=stride,
padding=padding,
dilation=dilation,
groups=groups,
bias_attr=bias,
weight_attr=None)
if self.use_bn:
self.bn = nn.BatchNorm2D(num_features=out_channels, epsilon=bn_eps)
if self.activate:
self.activ = get_activation_layer(activation)
def forward(self, x):
x = self.conv(x)
if self.use_bn:
x = self.bn(x)
if self.activate:
x = self.activ(x)
return x
class SEBlock(nn.Layer):
def __init__(self,
channels,
reduction=16,
mid_channels=None,
round_mid=False,
use_conv=True,
mid_activation=nn.ReLU(),
out_activation=nn.Sigmoid()):
super(SEBlock, self).__init__()
self.use_conv = use_conv
if mid_channels is None:
mid_channels = channels // reduction if not round_mid else round_channels(
float(channels) / reduction)
self.pool = nn.AdaptiveAvgPool2D(output_size=1)
if use_conv:
self.conv1 = nn.Conv2D(
in_channels=channels,
out_channels=mid_channels,
kernel_size=1,
stride=1,
groups=1,
bias_attr=True,
weight_attr=None)
else:
self.fc1 = nn.Linear(
in_features=channels, out_features=mid_channels)
self.activ = get_activation_layer(mid_activation)
if use_conv:
self.conv2 = nn.Conv2D(
in_channels=mid_channels,
out_channels=channels,
kernel_size=1,
stride=1,
groups=1,
bias_attr=True,
weight_attr=None)
else:
self.fc2 = nn.Linear(
in_features=mid_channels, out_features=channels)
self.sigmoid = get_activation_layer(out_activation)
def forward(self, x):
w = self.pool(x)
if not self.use_conv:
w = w.reshape(shape=[w.shape[0], -1])
w = self.conv1(w) if self.use_conv else self.fc1(w)
w = self.activ(w)
w = self.conv2(w) if self.use_conv else self.fc2(w)
w = self.sigmoid(w)
if not self.use_conv:
w = w.unsqueeze(2).unsqueeze(3)
x = x * w
return x
class MixConv(nn.Layer):
"""
Mixed convolution layer from 'MixConv: Mixed Depthwise Convolutional Kernels,'
https://arxiv.org/abs/1907.09595.
Parameters:
----------
in_channels : int
Number of input channels.
out_channels : int
Number of output channels.
kernel_size : int or tuple/list of int, or tuple/list of tuple/list of 2 int
Convolution window size.
stride : int or tuple/list of 2 int
Strides of the convolution.
padding : int or tuple/list of int, or tuple/list of tuple/list of 2 int
Padding value for convolution layer.
dilation : int or tuple/list of 2 int, default 1
Dilation value for convolution layer.
groups : int, default 1
Number of groups.
bias : bool, default False
Whether the layer uses a bias vector.
axis : int, default 1
The axis on which to concatenate the outputs.
"""
def __init__(self,
in_channels,
out_channels,
kernel_size,
stride,
padding,
dilation=1,
groups=1,
bias=False,
axis=1):
super(MixConv, self).__init__()
kernel_size = kernel_size if isinstance(kernel_size,
list) else [kernel_size]
padding = padding if isinstance(padding, list) else [padding]
kernel_count = len(kernel_size)
self.splitted_in_channels = self.split_channels(in_channels,
kernel_count)
splitted_out_channels = self.split_channels(out_channels, kernel_count)
for i, kernel_size_i in enumerate(kernel_size):
in_channels_i = self.splitted_in_channels[i]
out_channels_i = splitted_out_channels[i]
padding_i = padding[i]
_ = self.add_sublayer(
name=str(i),
sublayer=nn.Conv2D(
in_channels=in_channels_i,
out_channels=out_channels_i,
kernel_size=kernel_size_i,
stride=stride,
padding=padding_i,
dilation=dilation,
groups=(out_channels_i
if out_channels == groups else groups),
bias_attr=bias,
weight_attr=None))
self.axis = axis
def forward(self, x):
xx = paddle.split(x, self.splitted_in_channels, axis=self.axis)
xx = paddle.split(x, self.splitted_in_channels, axis=self.axis)
out = [
conv_i(x_i) for x_i, conv_i in zip(xx, self._sub_layers.values())
]
x = paddle.concat(tuple(out), axis=self.axis)
return x
@staticmethod
def split_channels(channels, kernel_count):
splitted_channels = [channels // kernel_count] * kernel_count
splitted_channels[0] += channels - sum(splitted_channels)
return splitted_channels
class MixConvBlock(nn.Layer):
"""
Mixed convolution block with Batch normalization and activation.
Parameters:
----------
in_channels : int
Number of input channels.
out_channels : int
Number of output channels.
kernel_size : int or tuple/list of int, or tuple/list of tuple/list of 2 int
Convolution window size.
stride : int or tuple/list of 2 int
Strides of the convolution.
padding : int or tuple/list of int, or tuple/list of tuple/list of 2 int
Padding value for convolution layer.
dilation : int or tuple/list of 2 int, default 1
Dilation value for convolution layer.
groups : int, default 1
Number of groups.
bias : bool, default False
Whether the layer uses a bias vector.
use_bn : bool, default True
Whether to use BatchNorm layer.
bn_eps : float, default 1e-5
Small float added to variance in Batch norm.
activation : function or str or None, default nn.ReLU()
Activation function or name of activation function.
activate : bool, default True
Whether activate the convolution block.
"""
def __init__(self,
in_channels,
out_channels,
kernel_size,
stride,
padding,
dilation=1,
groups=1,
bias=False,
use_bn=True,
bn_eps=1e-5,
activation=nn.ReLU()):
super(MixConvBlock, self).__init__()
self.activate = (activation is not None)
self.use_bn = use_bn
self.conv = MixConv(
in_channels=in_channels,
out_channels=out_channels,
kernel_size=kernel_size,
stride=stride,
padding=padding,
dilation=dilation,
groups=groups,
bias=bias)
if self.use_bn:
self.bn = nn.BatchNorm2D(num_features=out_channels, epsilon=bn_eps)
if self.activate:
self.activ = get_activation_layer(activation)
def forward(self, x):
x = self.conv(x)
if self.use_bn:
x = self.bn(x)
if self.activate:
x = self.activ(x)
return x
def mixconv1x1_block(in_channels,
out_channels,
kernel_count,
stride=1,
groups=1,
bias=False,
use_bn=True,
bn_eps=1e-5,
activation=nn.ReLU()):
"""
1x1 version of the mixed convolution block.
Parameters:
----------
in_channels : int
Number of input channels.
out_channels : int
Number of output channels.
kernel_count : int
Kernel count.
stride : int or tuple/list of 2 int, default 1
Strides of the convolution.
groups : int, default 1
Number of groups.
bias : bool, default False
Whether the layer uses a bias vector.
use_bn : bool, default True
Whether to use BatchNorm layer.
bn_eps : float, default 1e-5
Small float added to variance in Batch norm.
activation : function or str, or None, default nn.ReLU()
Activation function or name of activation function.
"""
return MixConvBlock(
in_channels=in_channels,
out_channels=out_channels,
kernel_size=([1] * kernel_count),
stride=stride,
padding=([0] * kernel_count),
groups=groups,
bias=bias,
use_bn=use_bn,
bn_eps=bn_eps,
activation=activation)
class MixUnit(nn.Layer):
"""
MixNet unit.
Parameters:
----------
in_channels : int
Number of input channels.
out_channels : int
Number of output channels. exp_channels : int
Number of middle (expanded) channels.
stride : int or tuple/list of 2 int
Strides of the second convolution layer.
exp_kernel_count : int
Expansion convolution kernel count for each unit.
conv1_kernel_count : int
Conv1 kernel count for each unit.
conv2_kernel_count : int
Conv2 kernel count for each unit.
exp_factor : int
Expansion factor for each unit.
se_factor : int
SE reduction factor for each unit.
activation : str
Activation function or name of activation function.
"""
def __init__(self, in_channels, out_channels, stride, exp_kernel_count,
conv1_kernel_count, conv2_kernel_count, exp_factor, se_factor,
activation):
super(MixUnit, self).__init__()
assert exp_factor >= 1
assert se_factor >= 0
self.residual = (in_channels == out_channels) and (stride == 1)
self.use_se = se_factor > 0
mid_channels = exp_factor * in_channels
self.use_exp_conv = exp_factor > 1
if self.use_exp_conv:
if exp_kernel_count == 1:
self.exp_conv = ConvBlock(
in_channels=in_channels,
out_channels=mid_channels,
kernel_size=1,
stride=1,
padding=0,
groups=1,
bias=False,
use_bn=True,
bn_eps=1e-5,
activation=activation)
else:
self.exp_conv = mixconv1x1_block(
in_channels=in_channels,
out_channels=mid_channels,
kernel_count=exp_kernel_count,
activation=activation)
if conv1_kernel_count == 1:
self.conv1 = ConvBlock(
in_channels=mid_channels,
out_channels=mid_channels,
kernel_size=3,
stride=stride,
padding=1,
dilation=1,
groups=mid_channels,
bias=False,
use_bn=True,
bn_eps=1e-5,
activation=activation)
else:
self.conv1 = MixConvBlock(
in_channels=mid_channels,
out_channels=mid_channels,
kernel_size=[3 + 2 * i for i in range(conv1_kernel_count)],
stride=stride,
padding=[1 + i for i in range(conv1_kernel_count)],
groups=mid_channels,
activation=activation)
if self.use_se:
self.se = SEBlock(
channels=mid_channels,
reduction=(exp_factor * se_factor),
round_mid=False,
mid_activation=activation)
if conv2_kernel_count == 1:
self.conv2 = ConvBlock(
in_channels=mid_channels,
out_channels=out_channels,
activation=None,
kernel_size=1,
stride=1,
padding=0,
groups=1,
bias=False,
use_bn=True,
bn_eps=1e-5)
else:
self.conv2 = mixconv1x1_block(
in_channels=mid_channels,
out_channels=out_channels,
kernel_count=conv2_kernel_count,
activation=None)
def forward(self, x):
if self.residual:
identity = x
if self.use_exp_conv:
x = self.exp_conv(x)
x = self.conv1(x)
if self.use_se:
x = self.se(x)
x = self.conv2(x)
if self.residual:
x = x + identity
return x
class MixInitBlock(nn.Layer):
"""
MixNet specific initial block.
Parameters:
----------
in_channels : int
Number of input channels.
out_channels : int
Number of output channels.
"""
def __init__(self, in_channels, out_channels):
super(MixInitBlock, self).__init__()
self.conv1 = ConvBlock(
in_channels=in_channels,
out_channels=out_channels,
stride=2,
kernel_size=3,
padding=1)
self.conv2 = MixUnit(
in_channels=out_channels,
out_channels=out_channels,
stride=1,
exp_kernel_count=1,
conv1_kernel_count=1,
conv2_kernel_count=1,
exp_factor=1,
se_factor=0,
activation="relu")
def forward(self, x):
x = self.conv1(x)
x = self.conv2(x)
return x
class MixNet(nn.Layer):
"""
MixNet model from 'MixConv: Mixed Depthwise Convolutional Kernels,'
https://arxiv.org/abs/1907.09595.
Parameters:
----------
channels : list of list of int
Number of output channels for each unit.
init_block_channels : int
Number of output channels for the initial unit.
final_block_channels : int
Number of output channels for the final block of the feature extractor.
exp_kernel_counts : list of list of int
Expansion convolution kernel count for each unit.
conv1_kernel_counts : list of list of int
Conv1 kernel count for each unit.
conv2_kernel_counts : list of list of int
Conv2 kernel count for each unit.
exp_factors : list of list of int
Expansion factor for each unit.
se_factors : list of list of int
SE reduction factor for each unit.
in_channels : int, default 3
Number of input channels.
in_size : tuple of two ints, default (224, 224)
Spatial size of the expected input image.
class_num : int, default 1000
Number of classification classes.
"""
def __init__(self,
channels,
init_block_channels,
final_block_channels,
exp_kernel_counts,
conv1_kernel_counts,
conv2_kernel_counts,
exp_factors,
se_factors,
in_channels=3,
in_size=(224, 224),
class_num=1000):
super(MixNet, self).__init__()
self.in_size = in_size
self.class_num = class_num
self.features = nn.Sequential()
self.features.add_sublayer(
"init_block",
MixInitBlock(
in_channels=in_channels, out_channels=init_block_channels))
in_channels = init_block_channels
for i, channels_per_stage in enumerate(channels):
stage = nn.Sequential()
for j, out_channels in enumerate(channels_per_stage):
stride = 2 if ((j == 0) and (i != 3)) or (
(j == len(channels_per_stage) // 2) and (i == 3)) else 1
exp_kernel_count = exp_kernel_counts[i][j]
conv1_kernel_count = conv1_kernel_counts[i][j]
conv2_kernel_count = conv2_kernel_counts[i][j]
exp_factor = exp_factors[i][j]
se_factor = se_factors[i][j]
activation = "relu" if i == 0 else "swish"
stage.add_sublayer(
"unit{}".format(j + 1),
MixUnit(
in_channels=in_channels,
out_channels=out_channels,
stride=stride,
exp_kernel_count=exp_kernel_count,
conv1_kernel_count=conv1_kernel_count,
conv2_kernel_count=conv2_kernel_count,
exp_factor=exp_factor,
se_factor=se_factor,
activation=activation))
in_channels = out_channels
self.features.add_sublayer("stage{}".format(i + 1), stage)
self.features.add_sublayer(
"final_block",
ConvBlock(
in_channels=in_channels,
out_channels=final_block_channels,
kernel_size=1,
stride=1,
padding=0,
groups=1,
bias=False,
use_bn=True,
bn_eps=1e-5,
activation=nn.ReLU()))
in_channels = final_block_channels
self.features.add_sublayer(
"final_pool", nn.AvgPool2D(
kernel_size=7, stride=1))
self.output = nn.Linear(
in_features=in_channels, out_features=class_num)
def forward(self, x):
x = self.features(x)
reshape_dim = reduce(lambda x, y: x * y, x.shape[1:])
x = x.reshape(shape=[x.shape[0], reshape_dim])
x = self.output(x)
return x
def get_mixnet(version, width_scale, model_name=None, **kwargs):
"""
Create MixNet model with specific parameters.
Parameters:
----------
version : str
Version of MobileNetV3 ('s' or 'm').
width_scale : float
Scale factor for width of layers.
model_name : str or None, default None
Model name.
"""
if version == "s":
init_block_channels = 16
channels = [[24, 24], [40, 40, 40, 40], [80, 80, 80],
[120, 120, 120, 200, 200, 200]]
exp_kernel_counts = [[2, 2], [1, 2, 2, 2], [1, 1, 1],
[2, 2, 2, 1, 1, 1]]
conv1_kernel_counts = [[1, 1], [3, 2, 2, 2], [3, 2, 2],
[3, 4, 4, 5, 4, 4]]
conv2_kernel_counts = [[2, 2], [1, 2, 2, 2], [2, 2, 2],
[2, 2, 2, 1, 2, 2]]
exp_factors = [[6, 3], [6, 6, 6, 6], [6, 6, 6], [6, 3, 3, 6, 6, 6]]
se_factors = [[0, 0], [2, 2, 2, 2], [4, 4, 4], [2, 2, 2, 2, 2, 2]]
elif version == "m":
init_block_channels = 24
channels = [[32, 32], [40, 40, 40, 40], [80, 80, 80, 80],
[120, 120, 120, 120, 200, 200, 200, 200]]
exp_kernel_counts = [[2, 2], [1, 2, 2, 2], [1, 2, 2, 2],
[1, 2, 2, 2, 1, 1, 1, 1]]
conv1_kernel_counts = [[3, 1], [4, 2, 2, 2], [3, 4, 4, 4],
[1, 4, 4, 4, 4, 4, 4, 4]]
conv2_kernel_counts = [[2, 2], [1, 2, 2, 2], [1, 2, 2, 2],
[1, 2, 2, 2, 1, 2, 2, 2]]
exp_factors = [[6, 3], [6, 6, 6, 6], [6, 6, 6, 6],
[6, 3, 3, 3, 6, 6, 6, 6]]
se_factors = [[0, 0], [2, 2, 2, 2], [4, 4, 4, 4],
[2, 2, 2, 2, 2, 2, 2, 2]]
else:
raise ValueError("Unsupported MixNet version {}".format(version))
final_block_channels = 1536
if width_scale != 1.0:
channels = [[round_channels(cij * width_scale) for cij in ci]
for ci in channels]
init_block_channels = round_channels(init_block_channels * width_scale)
net = MixNet(
channels=channels,
init_block_channels=init_block_channels,
final_block_channels=final_block_channels,
exp_kernel_counts=exp_kernel_counts,
conv1_kernel_counts=conv1_kernel_counts,
conv2_kernel_counts=conv2_kernel_counts,
exp_factors=exp_factors,
se_factors=se_factors,
**kwargs)
return net
def _load_pretrained(pretrained, model, model_url, use_ssld=False):
if pretrained is False:
pass
elif pretrained is True:
load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
elif isinstance(pretrained, str):
load_dygraph_pretrain(model, pretrained)
else:
raise RuntimeError(
"pretrained type is not available. Please use `string` or `boolean` type."
)
def MixNet_S(pretrained=False, use_ssld=False, **kwargs):
"""
MixNet-S model from 'MixConv: Mixed Depthwise Convolutional Kernels,'
https://arxiv.org/abs/1907.09595.
"""
model = get_mixnet(
version="s", width_scale=1.0, model_name="MixNet_S", **kwargs)
_load_pretrained(
pretrained, model, MODEL_URLS["MixNet_S"], use_ssld=use_ssld)
return model
def MixNet_M(pretrained=False, use_ssld=False, **kwargs):
"""
MixNet-M model from 'MixConv: Mixed Depthwise Convolutional Kernels,'
https://arxiv.org/abs/1907.09595.
"""
model = get_mixnet(
version="m", width_scale=1.0, model_name="MixNet_M", **kwargs)
_load_pretrained(
pretrained, model, MODEL_URLS["MixNet_M"], use_ssld=use_ssld)
return model
def MixNet_L(pretrained=False, use_ssld=False, **kwargs):
"""
MixNet-S model from 'MixConv: Mixed Depthwise Convolutional Kernels,'
https://arxiv.org/abs/1907.09595.
"""
model = get_mixnet(
version="m", width_scale=1.3, model_name="MixNet_L", **kwargs)
_load_pretrained(
pretrained, model, MODEL_URLS["MixNet_L"], use_ssld=use_ssld)
return model

@ -0,0 +1,287 @@
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import paddle
from paddle import ParamAttr
import paddle.nn as nn
import paddle.nn.functional as F
from paddle.nn import Conv2D, BatchNorm, Linear, Dropout
from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D
import math
from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
MODEL_URLS = {
"MobileNetV2_x0_25":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV2_x0_25_pretrained.pdparams",
"MobileNetV2_x0_5":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV2_x0_5_pretrained.pdparams",
"MobileNetV2_x0_75":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV2_x0_75_pretrained.pdparams",
"MobileNetV2":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV2_pretrained.pdparams",
"MobileNetV2_x1_5":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV2_x1_5_pretrained.pdparams",
"MobileNetV2_x2_0":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV2_x2_0_pretrained.pdparams"
}
__all__ = list(MODEL_URLS.keys())
class ConvBNLayer(nn.Layer):
def __init__(self,
num_channels,
filter_size,
num_filters,
stride,
padding,
channels=None,
num_groups=1,
name=None,
use_cudnn=True):
super(ConvBNLayer, self).__init__()
self._conv = Conv2D(
in_channels=num_channels,
out_channels=num_filters,
kernel_size=filter_size,
stride=stride,
padding=padding,
groups=num_groups,
weight_attr=ParamAttr(name=name + "_weights"),
bias_attr=False)
self._batch_norm = BatchNorm(
num_filters,
param_attr=ParamAttr(name=name + "_bn_scale"),
bias_attr=ParamAttr(name=name + "_bn_offset"),
moving_mean_name=name + "_bn_mean",
moving_variance_name=name + "_bn_variance")
def forward(self, inputs, if_act=True):
y = self._conv(inputs)
y = self._batch_norm(y)
if if_act:
y = F.relu6(y)
return y
class InvertedResidualUnit(nn.Layer):
def __init__(self, num_channels, num_in_filter, num_filters, stride,
filter_size, padding, expansion_factor, name):
super(InvertedResidualUnit, self).__init__()
num_expfilter = int(round(num_in_filter * expansion_factor))
self._expand_conv = ConvBNLayer(
num_channels=num_channels,
num_filters=num_expfilter,
filter_size=1,
stride=1,
padding=0,
num_groups=1,
name=name + "_expand")
self._bottleneck_conv = ConvBNLayer(
num_channels=num_expfilter,
num_filters=num_expfilter,
filter_size=filter_size,
stride=stride,
padding=padding,
num_groups=num_expfilter,
use_cudnn=False,
name=name + "_dwise")
self._linear_conv = ConvBNLayer(
num_channels=num_expfilter,
num_filters=num_filters,
filter_size=1,
stride=1,
padding=0,
num_groups=1,
name=name + "_linear")
def forward(self, inputs, ifshortcut):
y = self._expand_conv(inputs, if_act=True)
y = self._bottleneck_conv(y, if_act=True)
y = self._linear_conv(y, if_act=False)
if ifshortcut:
y = paddle.add(inputs, y)
return y
class InvresiBlocks(nn.Layer):
def __init__(self, in_c, t, c, n, s, name):
super(InvresiBlocks, self).__init__()
self._first_block = InvertedResidualUnit(
num_channels=in_c,
num_in_filter=in_c,
num_filters=c,
stride=s,
filter_size=3,
padding=1,
expansion_factor=t,
name=name + "_1")
self._block_list = []
for i in range(1, n):
block = self.add_sublayer(
name + "_" + str(i + 1),
sublayer=InvertedResidualUnit(
num_channels=c,
num_in_filter=c,
num_filters=c,
stride=1,
filter_size=3,
padding=1,
expansion_factor=t,
name=name + "_" + str(i + 1)))
self._block_list.append(block)
def forward(self, inputs):
y = self._first_block(inputs, ifshortcut=False)
for block in self._block_list:
y = block(y, ifshortcut=True)
return y
class MobileNet(nn.Layer):
def __init__(self, class_num=1000, scale=1.0, prefix_name=""):
super(MobileNet, self).__init__()
self.scale = scale
self.class_num = class_num
bottleneck_params_list = [
(1, 16, 1, 1),
(6, 24, 2, 2),
(6, 32, 3, 2),
(6, 64, 4, 2),
(6, 96, 3, 1),
(6, 160, 3, 2),
(6, 320, 1, 1),
]
self.conv1 = ConvBNLayer(
num_channels=3,
num_filters=int(32 * scale),
filter_size=3,
stride=2,
padding=1,
name=prefix_name + "conv1_1")
self.block_list = []
i = 1
in_c = int(32 * scale)
for layer_setting in bottleneck_params_list:
t, c, n, s = layer_setting
i += 1
block = self.add_sublayer(
prefix_name + "conv" + str(i),
sublayer=InvresiBlocks(
in_c=in_c,
t=t,
c=int(c * scale),
n=n,
s=s,
name=prefix_name + "conv" + str(i)))
self.block_list.append(block)
in_c = int(c * scale)
self.out_c = int(1280 * scale) if scale > 1.0 else 1280
self.conv9 = ConvBNLayer(
num_channels=in_c,
num_filters=self.out_c,
filter_size=1,
stride=1,
padding=0,
name=prefix_name + "conv9")
self.pool2d_avg = AdaptiveAvgPool2D(1)
self.out = Linear(
self.out_c,
class_num,
weight_attr=ParamAttr(name=prefix_name + "fc10_weights"),
bias_attr=ParamAttr(name=prefix_name + "fc10_offset"))
def forward(self, inputs):
y = self.conv1(inputs, if_act=True)
for block in self.block_list:
y = block(y)
y = self.conv9(y, if_act=True)
y = self.pool2d_avg(y)
y = paddle.flatten(y, start_axis=1, stop_axis=-1)
y = self.out(y)
return y
def _load_pretrained(pretrained, model, model_url, use_ssld=False):
if pretrained is False:
pass
elif pretrained is True:
load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
elif isinstance(pretrained, str):
load_dygraph_pretrain(model, pretrained)
else:
raise RuntimeError(
"pretrained type is not available. Please use `string` or `boolean` type."
)
def MobileNetV2_x0_25(pretrained=False, use_ssld=False, **kwargs):
model = MobileNet(scale=0.25, **kwargs)
_load_pretrained(
pretrained, model, MODEL_URLS["MobileNetV2_x0_25"], use_ssld=use_ssld)
return model
def MobileNetV2_x0_5(pretrained=False, use_ssld=False, **kwargs):
model = MobileNet(scale=0.5, **kwargs)
_load_pretrained(
pretrained, model, MODEL_URLS["MobileNetV2_x0_5"], use_ssld=use_ssld)
return model
def MobileNetV2_x0_75(pretrained=False, use_ssld=False, **kwargs):
model = MobileNet(scale=0.75, **kwargs)
_load_pretrained(
pretrained, model, MODEL_URLS["MobileNetV2_x0_75"], use_ssld=use_ssld)
return model
def MobileNetV2(pretrained=False, use_ssld=False, **kwargs):
model = MobileNet(scale=1.0, **kwargs)
_load_pretrained(
pretrained, model, MODEL_URLS["MobileNetV2"], use_ssld=use_ssld)
return model
def MobileNetV2_x1_5(pretrained=False, use_ssld=False, **kwargs):
model = MobileNet(scale=1.5, **kwargs)
_load_pretrained(
pretrained, model, MODEL_URLS["MobileNetV2_x1_5"], use_ssld=use_ssld)
return model
def MobileNetV2_x2_0(pretrained=False, use_ssld=False, **kwargs):
model = MobileNet(scale=2.0, **kwargs)
_load_pretrained(
pretrained, model, MODEL_URLS["MobileNetV2_x2_0"], use_ssld=use_ssld)
return model

@ -0,0 +1,492 @@
# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Code was heavily based on https://github.com/whai362/PVT
from functools import partial
import math
import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from paddle.nn.initializer import TruncatedNormal, Constant
from .vision_transformer import trunc_normal_, zeros_, ones_, to_2tuple, DropPath, Identity, drop_path
from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
MODEL_URLS = {
"PVT_V2_B0":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/PVT_V2_B0_pretrained.pdparams",
"PVT_V2_B1":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/PVT_V2_B1_pretrained.pdparams",
"PVT_V2_B2":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/PVT_V2_B2_pretrained.pdparams",
"PVT_V2_B2_Linear":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/PVT_V2_B2_Linear_pretrained.pdparams",
"PVT_V2_B3":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/PVT_V2_B3_pretrained.pdparams",
"PVT_V2_B4":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/PVT_V2_B4_pretrained.pdparams",
"PVT_V2_B5":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/PVT_V2_B5_pretrained.pdparams",
}
__all__ = list(MODEL_URLS.keys())
@paddle.jit.not_to_static
def swapdim(x, dim1, dim2):
a = list(range(len(x.shape)))
a[dim1], a[dim2] = a[dim2], a[dim1]
return x.transpose(a)
class Mlp(nn.Layer):
def __init__(self,
in_features,
hidden_features=None,
out_features=None,
act_layer=nn.GELU,
drop=0.,
linear=False):
super().__init__()
out_features = out_features or in_features
hidden_features = hidden_features or in_features
self.fc1 = nn.Linear(in_features, hidden_features)
self.dwconv = DWConv(hidden_features)
self.act = act_layer()
self.fc2 = nn.Linear(hidden_features, out_features)
self.drop = nn.Dropout(drop)
self.linear = linear
if self.linear:
self.relu = nn.ReLU()
def forward(self, x, H, W):
x = self.fc1(x)
if self.linear:
x = self.relu(x)
x = self.dwconv(x, H, W)
x = self.act(x)
x = self.drop(x)
x = self.fc2(x)
x = self.drop(x)
return x
class Attention(nn.Layer):
def __init__(self,
dim,
num_heads=8,
qkv_bias=False,
qk_scale=None,
attn_drop=0.,
proj_drop=0.,
sr_ratio=1,
linear=False):
super().__init__()
assert dim % num_heads == 0
self.dim = dim
self.num_heads = num_heads
head_dim = dim // num_heads
self.scale = qk_scale or head_dim**-0.5
self.q = nn.Linear(dim, dim, bias_attr=qkv_bias)
self.kv = nn.Linear(dim, dim * 2, bias_attr=qkv_bias)
self.attn_drop = nn.Dropout(attn_drop)
self.proj = nn.Linear(dim, dim)
self.proj_drop = nn.Dropout(proj_drop)
self.linear = linear
self.sr_ratio = sr_ratio
if not linear:
if sr_ratio > 1:
self.sr = nn.Conv2D(
dim, dim, kernel_size=sr_ratio, stride=sr_ratio)
self.norm = nn.LayerNorm(dim)
else:
self.pool = nn.AdaptiveAvgPool2D(7)
self.sr = nn.Conv2D(dim, dim, kernel_size=1, stride=1)
self.norm = nn.LayerNorm(dim)
self.act = nn.GELU()
def forward(self, x, H, W):
B, N, C = x.shape
q = self.q(x).reshape(
[B, N, self.num_heads, C // self.num_heads]).transpose(
[0, 2, 1, 3])
if not self.linear:
if self.sr_ratio > 1:
x_ = x.transpose([0, 2, 1]).reshape([B, C, H, W])
x_ = self.sr(x_)
h_, w_ = x_.shape[-2:]
x_ = x_.reshape([B, C, h_ * w_]).transpose([0, 2, 1])
x_ = self.norm(x_)
kv = self.kv(x_)
kv = kv.reshape([
B, kv.shape[2] * kv.shape[1] // 2 // C, 2, self.num_heads,
C // self.num_heads
]).transpose([2, 0, 3, 1, 4])
else:
kv = self.kv(x)
kv = kv.reshape([
B, kv.shape[2] * kv.shape[1] // 2 // C, 2, self.num_heads,
C // self.num_heads
]).transpose([2, 0, 3, 1, 4])
else:
x_ = x.transpose([0, 2, 1]).reshape([B, C, H, W])
x_ = self.sr(self.pool(x_))
x_ = x_.reshape([B, C, x_.shape[2] * x_.shape[3]]).transpose(
[0, 2, 1])
x_ = self.norm(x_)
x_ = self.act(x_)
kv = self.kv(x_)
kv = kv.reshape([
B, kv.shape[2] * kv.shape[1] // 2 // C, 2, self.num_heads,
C // self.num_heads
]).transpose([2, 0, 3, 1, 4])
k, v = kv[0], kv[1]
attn = (q @swapdim(k, -2, -1)) * self.scale
attn = F.softmax(attn, axis=-1)
attn = self.attn_drop(attn)
x = swapdim((attn @v), 1, 2).reshape([B, N, C])
x = self.proj(x)
x = self.proj_drop(x)
return x
class Block(nn.Layer):
def __init__(self,
dim,
num_heads,
mlp_ratio=4.,
qkv_bias=False,
qk_scale=None,
drop=0.,
attn_drop=0.,
drop_path=0.,
act_layer=nn.GELU,
norm_layer=nn.LayerNorm,
sr_ratio=1,
linear=False):
super().__init__()
self.norm1 = norm_layer(dim)
self.attn = Attention(
dim,
num_heads=num_heads,
qkv_bias=qkv_bias,
qk_scale=qk_scale,
attn_drop=attn_drop,
proj_drop=drop,
sr_ratio=sr_ratio,
linear=linear)
self.drop_path = DropPath(drop_path) if drop_path > 0. else Identity()
self.norm2 = norm_layer(dim)
mlp_hidden_dim = int(dim * mlp_ratio)
self.mlp = Mlp(in_features=dim,
hidden_features=mlp_hidden_dim,
act_layer=act_layer,
drop=drop,
linear=linear)
def forward(self, x, H, W):
x = x + self.drop_path(self.attn(self.norm1(x), H, W))
x = x + self.drop_path(self.mlp(self.norm2(x), H, W))
return x
class OverlapPatchEmbed(nn.Layer):
""" Image to Patch Embedding
"""
def __init__(self,
img_size=224,
patch_size=7,
stride=4,
in_chans=3,
embed_dim=768):
super().__init__()
img_size = to_2tuple(img_size)
patch_size = to_2tuple(patch_size)
self.img_size = img_size
self.patch_size = patch_size
self.H, self.W = img_size[0] // patch_size[0], img_size[
1] // patch_size[1]
self.num_patches = self.H * self.W
self.proj = nn.Conv2D(
in_chans,
embed_dim,
kernel_size=patch_size,
stride=stride,
padding=(patch_size[0] // 2, patch_size[1] // 2))
self.norm = nn.LayerNorm(embed_dim)
def forward(self, x):
x = self.proj(x)
_, _, H, W = x.shape
x = x.flatten(2)
x = swapdim(x, 1, 2)
x = self.norm(x)
return x, H, W
class PyramidVisionTransformerV2(nn.Layer):
def __init__(self,
img_size=224,
patch_size=16,
in_chans=3,
class_num=1000,
embed_dims=[64, 128, 256, 512],
num_heads=[1, 2, 4, 8],
mlp_ratios=[4, 4, 4, 4],
qkv_bias=False,
qk_scale=None,
drop_rate=0.,
attn_drop_rate=0.,
drop_path_rate=0.,
norm_layer=nn.LayerNorm,
depths=[3, 4, 6, 3],
sr_ratios=[8, 4, 2, 1],
num_stages=4,
linear=False):
super().__init__()
self.class_num = class_num
self.depths = depths
self.num_stages = num_stages
dpr = [x for x in paddle.linspace(0, drop_path_rate, sum(depths))
] # stochastic depth decay rule
cur = 0
for i in range(num_stages):
patch_embed = OverlapPatchEmbed(
img_size=img_size if i == 0 else img_size // (2**(i + 1)),
patch_size=7 if i == 0 else 3,
stride=4 if i == 0 else 2,
in_chans=in_chans if i == 0 else embed_dims[i - 1],
embed_dim=embed_dims[i])
block = nn.LayerList([
Block(
dim=embed_dims[i],
num_heads=num_heads[i],
mlp_ratio=mlp_ratios[i],
qkv_bias=qkv_bias,
qk_scale=qk_scale,
drop=drop_rate,
attn_drop=attn_drop_rate,
drop_path=dpr[cur + j],
norm_layer=norm_layer,
sr_ratio=sr_ratios[i],
linear=linear) for j in range(depths[i])
])
norm = norm_layer(embed_dims[i])
cur += depths[i]
setattr(self, f"patch_embed{i + 1}", patch_embed)
setattr(self, f"block{i + 1}", block)
setattr(self, f"norm{i + 1}", norm)
# classification head
self.head = nn.Linear(embed_dims[3],
class_num) if class_num > 0 else Identity()
self.apply(self._init_weights)
def _init_weights(self, m):
if isinstance(m, nn.Linear):
trunc_normal_(m.weight)
if isinstance(m, nn.Linear) and m.bias is not None:
zeros_(m.bias)
elif isinstance(m, nn.LayerNorm):
zeros_(m.bias)
ones_(m.weight)
def forward_features(self, x):
B = x.shape[0]
for i in range(self.num_stages):
patch_embed = getattr(self, f"patch_embed{i + 1}")
block = getattr(self, f"block{i + 1}")
norm = getattr(self, f"norm{i + 1}")
x, H, W = patch_embed(x)
for blk in block:
x = blk(x, H, W)
x = norm(x)
if i != self.num_stages - 1:
x = x.reshape([B, H, W, x.shape[2]]).transpose([0, 3, 1, 2])
return x.mean(axis=1)
def forward(self, x):
x = self.forward_features(x)
x = self.head(x)
return x
class DWConv(nn.Layer):
def __init__(self, dim=768):
super().__init__()
self.dwconv = nn.Conv2D(dim, dim, 3, 1, 1, bias_attr=True, groups=dim)
def forward(self, x, H, W):
B, N, C = x.shape
x = swapdim(x, 1, 2)
x = x.reshape([B, C, H, W])
x = self.dwconv(x)
x = x.flatten(2)
x = swapdim(x, 1, 2)
return x
def _load_pretrained(pretrained, model, model_url, use_ssld=False):
if pretrained is False:
pass
elif pretrained is True:
load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
elif isinstance(pretrained, str):
load_dygraph_pretrain(model, pretrained)
else:
raise RuntimeError(
"pretrained type is not available. Please use `string` or `boolean` type."
)
def PVT_V2_B0(pretrained=False, use_ssld=False, **kwargs):
model = PyramidVisionTransformerV2(
patch_size=4,
embed_dims=[32, 64, 160, 256],
num_heads=[1, 2, 5, 8],
mlp_ratios=[8, 8, 4, 4],
qkv_bias=True,
norm_layer=partial(
nn.LayerNorm, epsilon=1e-6),
depths=[2, 2, 2, 2],
sr_ratios=[8, 4, 2, 1],
**kwargs)
_load_pretrained(
pretrained, model, MODEL_URLS["PVT_V2_B0"], use_ssld=use_ssld)
return model
def PVT_V2_B1(pretrained=False, use_ssld=False, **kwargs):
model = PyramidVisionTransformerV2(
patch_size=4,
embed_dims=[64, 128, 320, 512],
num_heads=[1, 2, 5, 8],
mlp_ratios=[8, 8, 4, 4],
qkv_bias=True,
norm_layer=partial(
nn.LayerNorm, epsilon=1e-6),
depths=[2, 2, 2, 2],
sr_ratios=[8, 4, 2, 1],
**kwargs)
_load_pretrained(
pretrained, model, MODEL_URLS["PVT_V2_B1"], use_ssld=use_ssld)
return model
def PVT_V2_B2(pretrained=False, use_ssld=False, **kwargs):
model = PyramidVisionTransformerV2(
patch_size=4,
embed_dims=[64, 128, 320, 512],
num_heads=[1, 2, 5, 8],
mlp_ratios=[8, 8, 4, 4],
qkv_bias=True,
norm_layer=partial(
nn.LayerNorm, epsilon=1e-6),
depths=[3, 4, 6, 3],
sr_ratios=[8, 4, 2, 1],
**kwargs)
_load_pretrained(
pretrained, model, MODEL_URLS["PVT_V2_B2"], use_ssld=use_ssld)
return model
def PVT_V2_B3(pretrained=False, use_ssld=False, **kwargs):
model = PyramidVisionTransformerV2(
patch_size=4,
embed_dims=[64, 128, 320, 512],
num_heads=[1, 2, 5, 8],
mlp_ratios=[8, 8, 4, 4],
qkv_bias=True,
norm_layer=partial(
nn.LayerNorm, epsilon=1e-6),
depths=[3, 4, 18, 3],
sr_ratios=[8, 4, 2, 1],
**kwargs)
_load_pretrained(
pretrained, model, MODEL_URLS["PVT_V2_B3"], use_ssld=use_ssld)
return model
def PVT_V2_B4(pretrained=False, use_ssld=False, **kwargs):
model = PyramidVisionTransformerV2(
patch_size=4,
embed_dims=[64, 128, 320, 512],
num_heads=[1, 2, 5, 8],
mlp_ratios=[8, 8, 4, 4],
qkv_bias=True,
norm_layer=partial(
nn.LayerNorm, epsilon=1e-6),
depths=[3, 8, 27, 3],
sr_ratios=[8, 4, 2, 1],
**kwargs)
_load_pretrained(
pretrained, model, MODEL_URLS["PVT_V2_B4"], use_ssld=use_ssld)
return model
def PVT_V2_B5(pretrained=False, use_ssld=False, **kwargs):
model = PyramidVisionTransformerV2(
patch_size=4,
embed_dims=[64, 128, 320, 512],
num_heads=[1, 2, 5, 8],
mlp_ratios=[4, 4, 4, 4],
qkv_bias=True,
norm_layer=partial(
nn.LayerNorm, epsilon=1e-6),
depths=[3, 6, 40, 3],
sr_ratios=[8, 4, 2, 1],
**kwargs)
_load_pretrained(
pretrained, model, MODEL_URLS["PVT_V2_B5"], use_ssld=use_ssld)
return model
def PVT_V2_B2_Linear(pretrained=False, use_ssld=False, **kwargs):
model = PyramidVisionTransformerV2(
patch_size=4,
embed_dims=[64, 128, 320, 512],
num_heads=[1, 2, 5, 8],
mlp_ratios=[8, 8, 4, 4],
qkv_bias=True,
norm_layer=partial(
nn.LayerNorm, epsilon=1e-6),
depths=[3, 4, 6, 3],
sr_ratios=[8, 4, 2, 1],
linear=True,
**kwargs)
_load_pretrained(
pretrained, model, MODEL_URLS["PVT_V2_B2_Linear"], use_ssld=use_ssld)
return model

@ -0,0 +1,203 @@
# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Code was based on https://github.com/d-li14/involution
import paddle
import paddle.nn as nn
from paddle.vision.models import resnet
from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
MODEL_URLS = {
"RedNet26":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RedNet26_pretrained.pdparams",
"RedNet38":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RedNet38_pretrained.pdparams",
"RedNet50":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RedNet50_pretrained.pdparams",
"RedNet101":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RedNet101_pretrained.pdparams",
"RedNet152":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RedNet152_pretrained.pdparams"
}
__all__ = MODEL_URLS.keys()
class Involution(nn.Layer):
def __init__(self, channels, kernel_size, stride):
super(Involution, self).__init__()
self.kernel_size = kernel_size
self.stride = stride
self.channels = channels
reduction_ratio = 4
self.group_channels = 16
self.groups = self.channels // self.group_channels
self.conv1 = nn.Sequential(
('conv', nn.Conv2D(
in_channels=channels,
out_channels=channels // reduction_ratio,
kernel_size=1,
bias_attr=False)),
('bn', nn.BatchNorm2D(channels // reduction_ratio)),
('activate', nn.ReLU()))
self.conv2 = nn.Sequential(('conv', nn.Conv2D(
in_channels=channels // reduction_ratio,
out_channels=kernel_size**2 * self.groups,
kernel_size=1,
stride=1)))
if stride > 1:
self.avgpool = nn.AvgPool2D(stride, stride)
def forward(self, x):
weight = self.conv2(
self.conv1(x if self.stride == 1 else self.avgpool(x)))
b, c, h, w = weight.shape
weight = weight.reshape(
(b, self.groups, self.kernel_size**2, h, w)).unsqueeze(2)
out = nn.functional.unfold(x, self.kernel_size, self.stride,
(self.kernel_size - 1) // 2, 1)
out = out.reshape(
(b, self.groups, self.group_channels, self.kernel_size**2, h, w))
out = (weight * out).sum(axis=3).reshape((b, self.channels, h, w))
return out
class BottleneckBlock(resnet.BottleneckBlock):
def __init__(self,
inplanes,
planes,
stride=1,
downsample=None,
groups=1,
base_width=64,
dilation=1,
norm_layer=None):
super(BottleneckBlock, self).__init__(inplanes, planes, stride,
downsample, groups, base_width,
dilation, norm_layer)
width = int(planes * (base_width / 64.)) * groups
self.conv2 = Involution(width, 7, stride)
class RedNet(resnet.ResNet):
def __init__(self, block, depth, class_num=1000, with_pool=True):
super(RedNet, self).__init__(
block=block, depth=50, num_classes=class_num, with_pool=with_pool)
layer_cfg = {
26: [1, 2, 4, 1],
38: [2, 3, 5, 2],
50: [3, 4, 6, 3],
101: [3, 4, 23, 3],
152: [3, 8, 36, 3]
}
layers = layer_cfg[depth]
self.conv1 = None
self.bn1 = None
self.relu = None
self.inplanes = 64
self.class_num = class_num
self.stem = nn.Sequential(
nn.Sequential(
('conv', nn.Conv2D(
in_channels=3,
out_channels=self.inplanes // 2,
kernel_size=3,
stride=2,
padding=1,
bias_attr=False)),
('bn', nn.BatchNorm2D(self.inplanes // 2)),
('activate', nn.ReLU())),
Involution(self.inplanes // 2, 3, 1),
nn.BatchNorm2D(self.inplanes // 2),
nn.ReLU(),
nn.Sequential(
('conv', nn.Conv2D(
in_channels=self.inplanes // 2,
out_channels=self.inplanes,
kernel_size=3,
stride=1,
padding=1,
bias_attr=False)), ('bn', nn.BatchNorm2D(self.inplanes)),
('activate', nn.ReLU())))
self.layer1 = self._make_layer(block, 64, layers[0])
self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
def forward(self, x):
x = self.stem(x)
x = self.maxpool(x)
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)
if self.with_pool:
x = self.avgpool(x)
if self.class_num > 0:
x = paddle.flatten(x, 1)
x = self.fc(x)
return x
def _load_pretrained(pretrained, model, model_url, use_ssld=False):
if pretrained is False:
pass
elif pretrained is True:
load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
elif isinstance(pretrained, str):
load_dygraph_pretrain(model, pretrained)
else:
raise RuntimeError(
"pretrained type is not available. Please use `string` or `boolean` type."
)
def RedNet26(pretrained=False, **kwargs):
model = RedNet(BottleneckBlock, 26, **kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["RedNet26"])
return model
def RedNet38(pretrained=False, **kwargs):
model = RedNet(BottleneckBlock, 38, **kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["RedNet38"])
return model
def RedNet50(pretrained=False, **kwargs):
model = RedNet(BottleneckBlock, 50, **kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["RedNet50"])
return model
def RedNet101(pretrained=False, **kwargs):
model = RedNet(BottleneckBlock, 101, **kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["RedNet101"])
return model
def RedNet152(pretrained=False, **kwargs):
model = RedNet(BottleneckBlock, 152, **kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["RedNet152"])
return model

@ -0,0 +1,431 @@
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Code was based on https://github.com/facebookresearch/pycls
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import paddle
from paddle import ParamAttr
import paddle.nn as nn
import paddle.nn.functional as F
from paddle.nn import Conv2D, BatchNorm, Linear, Dropout
from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D
from paddle.nn.initializer import Uniform
import math
from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
MODEL_URLS = {
"RegNetX_200MF":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RegNetX_200MF_pretrained.pdparams",
"RegNetX_4GF":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RegNetX_4GF_pretrained.pdparams",
"RegNetX_32GF":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RegNetX_32GF_pretrained.pdparams",
"RegNetY_200MF":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RegNetY_200MF_pretrained.pdparams",
"RegNetY_4GF":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RegNetY_4GF_pretrained.pdparams",
"RegNetY_32GF":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RegNetY_32GF_pretrained.pdparams",
}
__all__ = list(MODEL_URLS.keys())
def quantize_float(f, q):
"""Converts a float to closest non-zero int divisible by q."""
return int(round(f / q) * q)
def adjust_ws_gs_comp(ws, bms, gs):
"""Adjusts the compatibility of widths and groups."""
ws_bot = [int(w * b) for w, b in zip(ws, bms)]
gs = [min(g, w_bot) for g, w_bot in zip(gs, ws_bot)]
ws_bot = [quantize_float(w_bot, g) for w_bot, g in zip(ws_bot, gs)]
ws = [int(w_bot / b) for w_bot, b in zip(ws_bot, bms)]
return ws, gs
def get_stages_from_blocks(ws, rs):
"""Gets ws/ds of network at each stage from per block values."""
ts = [
w != wp or r != rp
for w, wp, r, rp in zip(ws + [0], [0] + ws, rs + [0], [0] + rs)
]
s_ws = [w for w, t in zip(ws, ts[:-1]) if t]
s_ds = np.diff([d for d, t in zip(range(len(ts)), ts) if t]).tolist()
return s_ws, s_ds
def generate_regnet(w_a, w_0, w_m, d, q=8):
"""Generates per block ws from RegNet parameters."""
assert w_a >= 0 and w_0 > 0 and w_m > 1 and w_0 % q == 0
ws_cont = np.arange(d) * w_a + w_0
ks = np.round(np.log(ws_cont / w_0) / np.log(w_m))
ws = w_0 * np.power(w_m, ks)
ws = np.round(np.divide(ws, q)) * q
num_stages, max_stage = len(np.unique(ws)), ks.max() + 1
ws, ws_cont = ws.astype(int).tolist(), ws_cont.tolist()
return ws, num_stages, max_stage, ws_cont
class ConvBNLayer(nn.Layer):
def __init__(self,
num_channels,
num_filters,
filter_size,
stride=1,
groups=1,
padding=0,
act=None,
name=None):
super(ConvBNLayer, self).__init__()
self._conv = Conv2D(
in_channels=num_channels,
out_channels=num_filters,
kernel_size=filter_size,
stride=stride,
padding=padding,
groups=groups,
weight_attr=ParamAttr(name=name + ".conv2d.output.1.w_0"),
bias_attr=ParamAttr(name=name + ".conv2d.output.1.b_0"))
bn_name = name + "_bn"
self._batch_norm = BatchNorm(
num_filters,
act=act,
param_attr=ParamAttr(name=bn_name + ".output.1.w_0"),
bias_attr=ParamAttr(bn_name + ".output.1.b_0"),
moving_mean_name=bn_name + "_mean",
moving_variance_name=bn_name + "_variance")
def forward(self, inputs):
y = self._conv(inputs)
y = self._batch_norm(y)
return y
class BottleneckBlock(nn.Layer):
def __init__(self,
num_channels,
num_filters,
stride,
bm,
gw,
se_on,
se_r,
shortcut=True,
name=None):
super(BottleneckBlock, self).__init__()
# Compute the bottleneck width
w_b = int(round(num_filters * bm))
# Compute the number of groups
num_gs = w_b // gw
self.se_on = se_on
self.conv0 = ConvBNLayer(
num_channels=num_channels,
num_filters=w_b,
filter_size=1,
padding=0,
act="relu",
name=name + "_branch2a")
self.conv1 = ConvBNLayer(
num_channels=w_b,
num_filters=w_b,
filter_size=3,
stride=stride,
padding=1,
groups=num_gs,
act="relu",
name=name + "_branch2b")
if se_on:
w_se = int(round(num_channels * se_r))
self.se_block = SELayer(
num_channels=w_b,
num_filters=w_b,
reduction_ratio=w_se,
name=name + "_branch2se")
self.conv2 = ConvBNLayer(
num_channels=w_b,
num_filters=num_filters,
filter_size=1,
act=None,
name=name + "_branch2c")
if not shortcut:
self.short = ConvBNLayer(
num_channels=num_channels,
num_filters=num_filters,
filter_size=1,
stride=stride,
name=name + "_branch1")
self.shortcut = shortcut
def forward(self, inputs):
y = self.conv0(inputs)
conv1 = self.conv1(y)
if self.se_on:
conv1 = self.se_block(conv1)
conv2 = self.conv2(conv1)
if self.shortcut:
short = inputs
else:
short = self.short(inputs)
y = paddle.add(x=short, y=conv2)
y = F.relu(y)
return y
class SELayer(nn.Layer):
def __init__(self, num_channels, num_filters, reduction_ratio, name=None):
super(SELayer, self).__init__()
self.pool2d_gap = AdaptiveAvgPool2D(1)
self._num_channels = num_channels
med_ch = int(num_channels / reduction_ratio)
stdv = 1.0 / math.sqrt(num_channels * 1.0)
self.squeeze = Linear(
num_channels,
med_ch,
weight_attr=ParamAttr(
initializer=Uniform(-stdv, stdv), name=name + "_sqz_weights"),
bias_attr=ParamAttr(name=name + "_sqz_offset"))
stdv = 1.0 / math.sqrt(med_ch * 1.0)
self.excitation = Linear(
med_ch,
num_filters,
weight_attr=ParamAttr(
initializer=Uniform(-stdv, stdv), name=name + "_exc_weights"),
bias_attr=ParamAttr(name=name + "_exc_offset"))
def forward(self, input):
pool = self.pool2d_gap(input)
pool = paddle.reshape(pool, shape=[-1, self._num_channels])
squeeze = self.squeeze(pool)
squeeze = F.relu(squeeze)
excitation = self.excitation(squeeze)
excitation = F.sigmoid(excitation)
excitation = paddle.reshape(
excitation, shape=[-1, self._num_channels, 1, 1])
out = input * excitation
return out
class RegNet(nn.Layer):
def __init__(self,
w_a,
w_0,
w_m,
d,
group_w,
bot_mul,
q=8,
se_on=False,
class_num=1000):
super(RegNet, self).__init__()
# Generate RegNet ws per block
b_ws, num_s, max_s, ws_cont = generate_regnet(w_a, w_0, w_m, d, q)
# Convert to per stage format
ws, ds = get_stages_from_blocks(b_ws, b_ws)
# Generate group widths and bot muls
gws = [group_w for _ in range(num_s)]
bms = [bot_mul for _ in range(num_s)]
# Adjust the compatibility of ws and gws
ws, gws = adjust_ws_gs_comp(ws, bms, gws)
# Use the same stride for each stage
ss = [2 for _ in range(num_s)]
# Use SE for RegNetY
se_r = 0.25
# Construct the model
# Group params by stage
stage_params = list(zip(ds, ws, ss, bms, gws))
# Construct the stem
stem_type = "simple_stem_in"
stem_w = 32
block_type = "res_bottleneck_block"
self.conv = ConvBNLayer(
num_channels=3,
num_filters=stem_w,
filter_size=3,
stride=2,
padding=1,
act="relu",
name="stem_conv")
self.block_list = []
for block, (d, w_out, stride, bm, gw) in enumerate(stage_params):
shortcut = False
for i in range(d):
num_channels = stem_w if block == i == 0 else in_channels
# Stride apply to the first block of the stage
b_stride = stride if i == 0 else 1
conv_name = "s" + str(block + 1) + "_b" + str(i +
1) # chr(97 + i)
bottleneck_block = self.add_sublayer(
conv_name,
BottleneckBlock(
num_channels=num_channels,
num_filters=w_out,
stride=b_stride,
bm=bm,
gw=gw,
se_on=se_on,
se_r=se_r,
shortcut=shortcut,
name=conv_name))
in_channels = w_out
self.block_list.append(bottleneck_block)
shortcut = True
self.pool2d_avg = AdaptiveAvgPool2D(1)
self.pool2d_avg_channels = w_out
stdv = 1.0 / math.sqrt(self.pool2d_avg_channels * 1.0)
self.out = Linear(
self.pool2d_avg_channels,
class_num,
weight_attr=ParamAttr(
initializer=Uniform(-stdv, stdv), name="fc_0.w_0"),
bias_attr=ParamAttr(name="fc_0.b_0"))
def forward(self, inputs):
y = self.conv(inputs)
for block in self.block_list:
y = block(y)
y = self.pool2d_avg(y)
y = paddle.reshape(y, shape=[-1, self.pool2d_avg_channels])
y = self.out(y)
return y
def _load_pretrained(pretrained, model, model_url, use_ssld=False):
if pretrained is False:
pass
elif pretrained is True:
load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
elif isinstance(pretrained, str):
load_dygraph_pretrain(model, pretrained)
else:
raise RuntimeError(
"pretrained type is not available. Please use `string` or `boolean` type."
)
def RegNetX_200MF(pretrained=False, use_ssld=False, **kwargs):
model = RegNet(
w_a=36.44,
w_0=24,
w_m=2.49,
d=13,
group_w=8,
bot_mul=1.0,
q=8,
**kwargs)
_load_pretrained(
pretrained, model, MODEL_URLS["RegNetX_200MF"], use_ssld=use_ssld)
return model
def RegNetX_4GF(pretrained=False, use_ssld=False, **kwargs):
model = RegNet(
w_a=38.65,
w_0=96,
w_m=2.43,
d=23,
group_w=40,
bot_mul=1.0,
q=8,
**kwargs)
_load_pretrained(
pretrained, model, MODEL_URLS["RegNetX_4GF"], use_ssld=use_ssld)
return model
def RegNetX_32GF(pretrained=False, use_ssld=False, **kwargs):
model = RegNet(
w_a=69.86,
w_0=320,
w_m=2.0,
d=23,
group_w=168,
bot_mul=1.0,
q=8,
**kwargs)
_load_pretrained(
pretrained, model, MODEL_URLS["RegNetX_32GF"], use_ssld=use_ssld)
return model
def RegNetY_200MF(pretrained=False, use_ssld=False, **kwargs):
model = RegNet(
w_a=36.44,
w_0=24,
w_m=2.49,
d=13,
group_w=8,
bot_mul=1.0,
q=8,
se_on=True,
**kwargs)
_load_pretrained(
pretrained, model, MODEL_URLS["RegNetX_32GF"], use_ssld=use_ssld)
return model
def RegNetY_4GF(pretrained=False, use_ssld=False, **kwargs):
model = RegNet(
w_a=31.41,
w_0=96,
w_m=2.24,
d=22,
group_w=64,
bot_mul=1.0,
q=8,
se_on=True,
**kwargs)
_load_pretrained(
pretrained, model, MODEL_URLS["RegNetX_32GF"], use_ssld=use_ssld)
return model
def RegNetY_32GF(pretrained=False, use_ssld=False, **kwargs):
model = RegNet(
w_a=115.89,
w_0=232,
w_m=2.53,
d=20,
group_w=232,
bot_mul=1.0,
q=8,
se_on=True,
**kwargs)
_load_pretrained(
pretrained, model, MODEL_URLS["RegNetX_32GF"], use_ssld=use_ssld)
return model

@ -0,0 +1,382 @@
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Code was based on https://github.com/DingXiaoH/RepVGG
import paddle.nn as nn
import paddle
import numpy as np
from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
MODEL_URLS = {
"RepVGG_A0":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_A0_pretrained.pdparams",
"RepVGG_A1":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_A1_pretrained.pdparams",
"RepVGG_A2":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_A2_pretrained.pdparams",
"RepVGG_B0":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_B0_pretrained.pdparams",
"RepVGG_B1":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_B1_pretrained.pdparams",
"RepVGG_B2":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_B2_pretrained.pdparams",
"RepVGG_B1g2":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_B1g2_pretrained.pdparams",
"RepVGG_B1g4":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_B1g4_pretrained.pdparams",
"RepVGG_B2g4":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_B2g4_pretrained.pdparams",
"RepVGG_B3g4":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_B3g4_pretrained.pdparams",
}
__all__ = list(MODEL_URLS.keys())
optional_groupwise_layers = [2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26]
g2_map = {l: 2 for l in optional_groupwise_layers}
g4_map = {l: 4 for l in optional_groupwise_layers}
class ConvBN(nn.Layer):
def __init__(self,
in_channels,
out_channels,
kernel_size,
stride,
padding,
groups=1):
super(ConvBN, self).__init__()
self.conv = nn.Conv2D(
in_channels=in_channels,
out_channels=out_channels,
kernel_size=kernel_size,
stride=stride,
padding=padding,
groups=groups,
bias_attr=False)
self.bn = nn.BatchNorm2D(num_features=out_channels)
def forward(self, x):
y = self.conv(x)
y = self.bn(y)
return y
class RepVGGBlock(nn.Layer):
def __init__(self,
in_channels,
out_channels,
kernel_size,
stride=1,
padding=0,
dilation=1,
groups=1,
padding_mode='zeros'):
super(RepVGGBlock, self).__init__()
self.is_repped = False
self.in_channels = in_channels
self.out_channels = out_channels
self.kernel_size = kernel_size
self.stride = stride
self.padding = padding
self.dilation = dilation
self.groups = groups
self.padding_mode = padding_mode
assert kernel_size == 3
assert padding == 1
padding_11 = padding - kernel_size // 2
self.nonlinearity = nn.ReLU()
self.rbr_identity = nn.BatchNorm2D(
num_features=in_channels
) if out_channels == in_channels and stride == 1 else None
self.rbr_dense = ConvBN(
in_channels=in_channels,
out_channels=out_channels,
kernel_size=kernel_size,
stride=stride,
padding=padding,
groups=groups)
self.rbr_1x1 = ConvBN(
in_channels=in_channels,
out_channels=out_channels,
kernel_size=1,
stride=stride,
padding=padding_11,
groups=groups)
def forward(self, inputs):
if not self.training and not self.is_repped:
self.rep()
self.is_repped = True
if self.training and self.is_repped:
self.is_repped = False
if not self.training:
return self.nonlinearity(self.rbr_reparam(inputs))
if self.rbr_identity is None:
id_out = 0
else:
id_out = self.rbr_identity(inputs)
return self.nonlinearity(
self.rbr_dense(inputs) + self.rbr_1x1(inputs) + id_out)
def rep(self):
if not hasattr(self, 'rbr_reparam'):
self.rbr_reparam = nn.Conv2D(
in_channels=self.in_channels,
out_channels=self.out_channels,
kernel_size=self.kernel_size,
stride=self.stride,
padding=self.padding,
dilation=self.dilation,
groups=self.groups,
padding_mode=self.padding_mode)
kernel, bias = self.get_equivalent_kernel_bias()
self.rbr_reparam.weight.set_value(kernel)
self.rbr_reparam.bias.set_value(bias)
def get_equivalent_kernel_bias(self):
kernel3x3, bias3x3 = self._fuse_bn_tensor(self.rbr_dense)
kernel1x1, bias1x1 = self._fuse_bn_tensor(self.rbr_1x1)
kernelid, biasid = self._fuse_bn_tensor(self.rbr_identity)
return kernel3x3 + self._pad_1x1_to_3x3_tensor(
kernel1x1) + kernelid, bias3x3 + bias1x1 + biasid
def _pad_1x1_to_3x3_tensor(self, kernel1x1):
if kernel1x1 is None:
return 0
else:
return nn.functional.pad(kernel1x1, [1, 1, 1, 1])
def _fuse_bn_tensor(self, branch):
if branch is None:
return 0, 0
if isinstance(branch, ConvBN):
kernel = branch.conv.weight
running_mean = branch.bn._mean
running_var = branch.bn._variance
gamma = branch.bn.weight
beta = branch.bn.bias
eps = branch.bn._epsilon
else:
assert isinstance(branch, nn.BatchNorm2D)
if not hasattr(self, 'id_tensor'):
input_dim = self.in_channels // self.groups
kernel_value = np.zeros(
(self.in_channels, input_dim, 3, 3), dtype=np.float32)
for i in range(self.in_channels):
kernel_value[i, i % input_dim, 1, 1] = 1
self.id_tensor = paddle.to_tensor(kernel_value)
kernel = self.id_tensor
running_mean = branch._mean
running_var = branch._variance
gamma = branch.weight
beta = branch.bias
eps = branch._epsilon
std = (running_var + eps).sqrt()
t = (gamma / std).reshape((-1, 1, 1, 1))
return kernel * t, beta - running_mean * gamma / std
class RepVGG(nn.Layer):
def __init__(self,
num_blocks,
width_multiplier=None,
override_groups_map=None,
class_num=1000):
super(RepVGG, self).__init__()
assert len(width_multiplier) == 4
self.override_groups_map = override_groups_map or dict()
assert 0 not in self.override_groups_map
self.in_planes = min(64, int(64 * width_multiplier[0]))
self.stage0 = RepVGGBlock(
in_channels=3,
out_channels=self.in_planes,
kernel_size=3,
stride=2,
padding=1)
self.cur_layer_idx = 1
self.stage1 = self._make_stage(
int(64 * width_multiplier[0]), num_blocks[0], stride=2)
self.stage2 = self._make_stage(
int(128 * width_multiplier[1]), num_blocks[1], stride=2)
self.stage3 = self._make_stage(
int(256 * width_multiplier[2]), num_blocks[2], stride=2)
self.stage4 = self._make_stage(
int(512 * width_multiplier[3]), num_blocks[3], stride=2)
self.gap = nn.AdaptiveAvgPool2D(output_size=1)
self.linear = nn.Linear(int(512 * width_multiplier[3]), class_num)
def _make_stage(self, planes, num_blocks, stride):
strides = [stride] + [1] * (num_blocks - 1)
blocks = []
for stride in strides:
cur_groups = self.override_groups_map.get(self.cur_layer_idx, 1)
blocks.append(
RepVGGBlock(
in_channels=self.in_planes,
out_channels=planes,
kernel_size=3,
stride=stride,
padding=1,
groups=cur_groups))
self.in_planes = planes
self.cur_layer_idx += 1
return nn.Sequential(*blocks)
def forward(self, x):
out = self.stage0(x)
out = self.stage1(out)
out = self.stage2(out)
out = self.stage3(out)
out = self.stage4(out)
out = self.gap(out)
out = paddle.flatten(out, start_axis=1)
out = self.linear(out)
return out
def _load_pretrained(pretrained, model, model_url, use_ssld=False):
if pretrained is False:
pass
elif pretrained is True:
load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
elif isinstance(pretrained, str):
load_dygraph_pretrain(model, pretrained)
else:
raise RuntimeError(
"pretrained type is not available. Please use `string` or `boolean` type."
)
def RepVGG_A0(pretrained=False, use_ssld=False, **kwargs):
model = RepVGG(
num_blocks=[2, 4, 14, 1],
width_multiplier=[0.75, 0.75, 0.75, 2.5],
override_groups_map=None,
**kwargs)
_load_pretrained(
pretrained, model, MODEL_URLS["RepVGG_A0"], use_ssld=use_ssld)
return model
def RepVGG_A1(pretrained=False, use_ssld=False, **kwargs):
model = RepVGG(
num_blocks=[2, 4, 14, 1],
width_multiplier=[1, 1, 1, 2.5],
override_groups_map=None,
**kwargs)
_load_pretrained(
pretrained, model, MODEL_URLS["RepVGG_A1"], use_ssld=use_ssld)
return model
def RepVGG_A2(pretrained=False, use_ssld=False, **kwargs):
model = RepVGG(
num_blocks=[2, 4, 14, 1],
width_multiplier=[1.5, 1.5, 1.5, 2.75],
override_groups_map=None,
**kwargs)
_load_pretrained(
pretrained, model, MODEL_URLS["RepVGG_A2"], use_ssld=use_ssld)
return model
def RepVGG_B0(pretrained=False, use_ssld=False, **kwargs):
model = RepVGG(
num_blocks=[4, 6, 16, 1],
width_multiplier=[1, 1, 1, 2.5],
override_groups_map=None,
**kwargs)
_load_pretrained(
pretrained, model, MODEL_URLS["RepVGG_B0"], use_ssld=use_ssld)
return model
def RepVGG_B1(pretrained=False, use_ssld=False, **kwargs):
model = RepVGG(
num_blocks=[4, 6, 16, 1],
width_multiplier=[2, 2, 2, 4],
override_groups_map=None,
**kwargs)
_load_pretrained(
pretrained, model, MODEL_URLS["RepVGG_B1"], use_ssld=use_ssld)
return model
def RepVGG_B1g2(pretrained=False, use_ssld=False, **kwargs):
model = RepVGG(
num_blocks=[4, 6, 16, 1],
width_multiplier=[2, 2, 2, 4],
override_groups_map=g2_map,
**kwargs)
_load_pretrained(
pretrained, model, MODEL_URLS["RepVGG_B1g2"], use_ssld=use_ssld)
return model
def RepVGG_B1g4(pretrained=False, use_ssld=False, **kwargs):
model = RepVGG(
num_blocks=[4, 6, 16, 1],
width_multiplier=[2, 2, 2, 4],
override_groups_map=g4_map,
**kwargs)
_load_pretrained(
pretrained, model, MODEL_URLS["RepVGG_B1g4"], use_ssld=use_ssld)
return model
def RepVGG_B2(pretrained=False, use_ssld=False, **kwargs):
model = RepVGG(
num_blocks=[4, 6, 16, 1],
width_multiplier=[2.5, 2.5, 2.5, 5],
override_groups_map=None,
**kwargs)
_load_pretrained(
pretrained, model, MODEL_URLS["RepVGG_B2"], use_ssld=use_ssld)
return model
def RepVGG_B2g4(pretrained=False, use_ssld=False, **kwargs):
model = RepVGG(
num_blocks=[4, 6, 16, 1],
width_multiplier=[2.5, 2.5, 2.5, 5],
override_groups_map=g4_map,
**kwargs)
_load_pretrained(
pretrained, model, MODEL_URLS["RepVGG_B2g4"], use_ssld=use_ssld)
return model
def RepVGG_B3g4(pretrained=False, use_ssld=False, **kwargs):
model = RepVGG(
num_blocks=[4, 6, 16, 1],
width_multiplier=[3, 3, 3, 5],
override_groups_map=g4_map,
**kwargs)
_load_pretrained(
pretrained, model, MODEL_URLS["RepVGG_B3g4"], use_ssld=use_ssld)
return model

@ -0,0 +1,264 @@
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import paddle
from paddle import ParamAttr
import paddle.nn as nn
import paddle.nn.functional as F
from paddle.nn import Conv2D, BatchNorm, Linear, Dropout
from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D
from paddle.nn.initializer import Uniform
import math
from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
MODEL_URLS = {
"Res2Net50_26w_4s":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/Res2Net50_26w_4s_pretrained.pdparams",
"Res2Net50_14w_8s":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/Res2Net50_14w_8s_pretrained.pdparams",
}
__all__ = list(MODEL_URLS.keys())
class ConvBNLayer(nn.Layer):
def __init__(
self,
num_channels,
num_filters,
filter_size,
stride=1,
groups=1,
act=None,
name=None, ):
super(ConvBNLayer, self).__init__()
self._conv = Conv2D(
in_channels=num_channels,
out_channels=num_filters,
kernel_size=filter_size,
stride=stride,
padding=(filter_size - 1) // 2,
groups=groups,
weight_attr=ParamAttr(name=name + "_weights"),
bias_attr=False)
if name == "conv1":
bn_name = "bn_" + name
else:
bn_name = "bn" + name[3:]
self._batch_norm = BatchNorm(
num_filters,
act=act,
param_attr=ParamAttr(name=bn_name + '_scale'),
bias_attr=ParamAttr(bn_name + '_offset'),
moving_mean_name=bn_name + '_mean',
moving_variance_name=bn_name + '_variance')
def forward(self, inputs):
y = self._conv(inputs)
y = self._batch_norm(y)
return y
class BottleneckBlock(nn.Layer):
def __init__(self,
num_channels1,
num_channels2,
num_filters,
stride,
scales,
shortcut=True,
if_first=False,
name=None):
super(BottleneckBlock, self).__init__()
self.stride = stride
self.scales = scales
self.conv0 = ConvBNLayer(
num_channels=num_channels1,
num_filters=num_filters,
filter_size=1,
act='relu',
name=name + "_branch2a")
self.conv1_list = []
for s in range(scales - 1):
conv1 = self.add_sublayer(
name + '_branch2b_' + str(s + 1),
ConvBNLayer(
num_channels=num_filters // scales,
num_filters=num_filters // scales,
filter_size=3,
stride=stride,
act='relu',
name=name + '_branch2b_' + str(s + 1)))
self.conv1_list.append(conv1)
self.pool2d_avg = AvgPool2D(kernel_size=3, stride=stride, padding=1)
self.conv2 = ConvBNLayer(
num_channels=num_filters,
num_filters=num_channels2,
filter_size=1,
act=None,
name=name + "_branch2c")
if not shortcut:
self.short = ConvBNLayer(
num_channels=num_channels1,
num_filters=num_channels2,
filter_size=1,
stride=stride,
name=name + "_branch1")
self.shortcut = shortcut
def forward(self, inputs):
y = self.conv0(inputs)
xs = paddle.split(y, self.scales, 1)
ys = []
for s, conv1 in enumerate(self.conv1_list):
if s == 0 or self.stride == 2:
ys.append(conv1(xs[s]))
else:
ys.append(conv1(paddle.add(xs[s], ys[-1])))
if self.stride == 1:
ys.append(xs[-1])
else:
ys.append(self.pool2d_avg(xs[-1]))
conv1 = paddle.concat(ys, axis=1)
conv2 = self.conv2(conv1)
if self.shortcut:
short = inputs
else:
short = self.short(inputs)
y = paddle.add(x=short, y=conv2)
y = F.relu(y)
return y
class Res2Net(nn.Layer):
def __init__(self, layers=50, scales=4, width=26, class_num=1000):
super(Res2Net, self).__init__()
self.layers = layers
self.scales = scales
self.width = width
basic_width = self.width * self.scales
supported_layers = [50, 101, 152, 200]
assert layers in supported_layers, \
"supported layers are {} but input layer is {}".format(
supported_layers, layers)
if layers == 50:
depth = [3, 4, 6, 3]
elif layers == 101:
depth = [3, 4, 23, 3]
elif layers == 152:
depth = [3, 8, 36, 3]
elif layers == 200:
depth = [3, 12, 48, 3]
num_channels = [64, 256, 512, 1024]
num_channels2 = [256, 512, 1024, 2048]
num_filters = [basic_width * t for t in [1, 2, 4, 8]]
self.conv1 = ConvBNLayer(
num_channels=3,
num_filters=64,
filter_size=7,
stride=2,
act='relu',
name="conv1")
self.pool2d_max = MaxPool2D(kernel_size=3, stride=2, padding=1)
self.block_list = []
for block in range(len(depth)):
shortcut = False
for i in range(depth[block]):
if layers in [101, 152] and block == 2:
if i == 0:
conv_name = "res" + str(block + 2) + "a"
else:
conv_name = "res" + str(block + 2) + "b" + str(i)
else:
conv_name = "res" + str(block + 2) + chr(97 + i)
bottleneck_block = self.add_sublayer(
'bb_%d_%d' % (block, i),
BottleneckBlock(
num_channels1=num_channels[block]
if i == 0 else num_channels2[block],
num_channels2=num_channels2[block],
num_filters=num_filters[block],
stride=2 if i == 0 and block != 0 else 1,
scales=scales,
shortcut=shortcut,
if_first=block == i == 0,
name=conv_name))
self.block_list.append(bottleneck_block)
shortcut = True
self.pool2d_avg = AdaptiveAvgPool2D(1)
self.pool2d_avg_channels = num_channels[-1] * 2
stdv = 1.0 / math.sqrt(self.pool2d_avg_channels * 1.0)
self.out = Linear(
self.pool2d_avg_channels,
class_num,
weight_attr=ParamAttr(
initializer=Uniform(-stdv, stdv), name="fc_weights"),
bias_attr=ParamAttr(name="fc_offset"))
def forward(self, inputs):
y = self.conv1(inputs)
y = self.pool2d_max(y)
for block in self.block_list:
y = block(y)
y = self.pool2d_avg(y)
y = paddle.reshape(y, shape=[-1, self.pool2d_avg_channels])
y = self.out(y)
return y
def _load_pretrained(pretrained, model, model_url, use_ssld=False):
if pretrained is False:
pass
elif pretrained is True:
load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
elif isinstance(pretrained, str):
load_dygraph_pretrain(model, pretrained)
else:
raise RuntimeError(
"pretrained type is not available. Please use `string` or `boolean` type."
)
def Res2Net50_26w_4s(pretrained=False, use_ssld=False, **kwargs):
model = Res2Net(layers=50, scales=4, width=26, **kwargs)
_load_pretrained(
pretrained, model, MODEL_URLS["Res2Net50_26w_4s"], use_ssld=use_ssld)
return model
def Res2Net50_14w_8s(pretrained=False, use_ssld=False, **kwargs):
model = Res2Net(layers=50, scales=8, width=14, **kwargs)
_load_pretrained(
pretrained, model, MODEL_URLS["Res2Net50_14w_8s"], use_ssld=use_ssld)
return model

@ -0,0 +1,305 @@
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import paddle
from paddle import ParamAttr
import paddle.nn as nn
import paddle.nn.functional as F
from paddle.nn import Conv2D, BatchNorm, Linear, Dropout
from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D
from paddle.nn.initializer import Uniform
import math
from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
MODEL_URLS = {
"Res2Net50_vd_26w_4s":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/Res2Net50_vd_26w_4s_pretrained.pdparams",
"Res2Net101_vd_26w_4s":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/Res2Net101_vd_26w_4s_pretrained.pdparams",
"Res2Net200_vd_26w_4s":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/Res2Net200_vd_26w_4s_pretrained.pdparams",
}
__all__ = list(MODEL_URLS.keys())
class ConvBNLayer(nn.Layer):
def __init__(
self,
num_channels,
num_filters,
filter_size,
stride=1,
groups=1,
is_vd_mode=False,
act=None,
name=None, ):
super(ConvBNLayer, self).__init__()
self.is_vd_mode = is_vd_mode
self._pool2d_avg = AvgPool2D(
kernel_size=2, stride=2, padding=0, ceil_mode=True)
self._conv = Conv2D(
in_channels=num_channels,
out_channels=num_filters,
kernel_size=filter_size,
stride=stride,
padding=(filter_size - 1) // 2,
groups=groups,
weight_attr=ParamAttr(name=name + "_weights"),
bias_attr=False)
if name == "conv1":
bn_name = "bn_" + name
else:
bn_name = "bn" + name[3:]
self._batch_norm = BatchNorm(
num_filters,
act=act,
param_attr=ParamAttr(name=bn_name + '_scale'),
bias_attr=ParamAttr(bn_name + '_offset'),
moving_mean_name=bn_name + '_mean',
moving_variance_name=bn_name + '_variance')
def forward(self, inputs):
if self.is_vd_mode:
inputs = self._pool2d_avg(inputs)
y = self._conv(inputs)
y = self._batch_norm(y)
return y
class BottleneckBlock(nn.Layer):
def __init__(self,
num_channels1,
num_channels2,
num_filters,
stride,
scales,
shortcut=True,
if_first=False,
name=None):
super(BottleneckBlock, self).__init__()
self.stride = stride
self.scales = scales
self.conv0 = ConvBNLayer(
num_channels=num_channels1,
num_filters=num_filters,
filter_size=1,
act='relu',
name=name + "_branch2a")
self.conv1_list = []
for s in range(scales - 1):
conv1 = self.add_sublayer(
name + '_branch2b_' + str(s + 1),
ConvBNLayer(
num_channels=num_filters // scales,
num_filters=num_filters // scales,
filter_size=3,
stride=stride,
act='relu',
name=name + '_branch2b_' + str(s + 1)))
self.conv1_list.append(conv1)
self.pool2d_avg = AvgPool2D(kernel_size=3, stride=stride, padding=1)
self.conv2 = ConvBNLayer(
num_channels=num_filters,
num_filters=num_channels2,
filter_size=1,
act=None,
name=name + "_branch2c")
if not shortcut:
self.short = ConvBNLayer(
num_channels=num_channels1,
num_filters=num_channels2,
filter_size=1,
stride=1,
is_vd_mode=False if if_first else True,
name=name + "_branch1")
self.shortcut = shortcut
def forward(self, inputs):
y = self.conv0(inputs)
xs = paddle.split(y, self.scales, 1)
ys = []
for s, conv1 in enumerate(self.conv1_list):
if s == 0 or self.stride == 2:
ys.append(conv1(xs[s]))
else:
ys.append(conv1(xs[s] + ys[-1]))
if self.stride == 1:
ys.append(xs[-1])
else:
ys.append(self.pool2d_avg(xs[-1]))
conv1 = paddle.concat(ys, axis=1)
conv2 = self.conv2(conv1)
if self.shortcut:
short = inputs
else:
short = self.short(inputs)
y = paddle.add(x=short, y=conv2)
y = F.relu(y)
return y
class Res2Net_vd(nn.Layer):
def __init__(self, layers=50, scales=4, width=26, class_num=1000):
super(Res2Net_vd, self).__init__()
self.layers = layers
self.scales = scales
self.width = width
basic_width = self.width * self.scales
supported_layers = [50, 101, 152, 200]
assert layers in supported_layers, \
"supported layers are {} but input layer is {}".format(
supported_layers, layers)
if layers == 50:
depth = [3, 4, 6, 3]
elif layers == 101:
depth = [3, 4, 23, 3]
elif layers == 152:
depth = [3, 8, 36, 3]
elif layers == 200:
depth = [3, 12, 48, 3]
num_channels = [64, 256, 512, 1024]
num_channels2 = [256, 512, 1024, 2048]
num_filters = [basic_width * t for t in [1, 2, 4, 8]]
self.conv1_1 = ConvBNLayer(
num_channels=3,
num_filters=32,
filter_size=3,
stride=2,
act='relu',
name="conv1_1")
self.conv1_2 = ConvBNLayer(
num_channels=32,
num_filters=32,
filter_size=3,
stride=1,
act='relu',
name="conv1_2")
self.conv1_3 = ConvBNLayer(
num_channels=32,
num_filters=64,
filter_size=3,
stride=1,
act='relu',
name="conv1_3")
self.pool2d_max = MaxPool2D(kernel_size=3, stride=2, padding=1)
self.block_list = []
for block in range(len(depth)):
shortcut = False
for i in range(depth[block]):
if layers in [101, 152, 200] and block == 2:
if i == 0:
conv_name = "res" + str(block + 2) + "a"
else:
conv_name = "res" + str(block + 2) + "b" + str(i)
else:
conv_name = "res" + str(block + 2) + chr(97 + i)
bottleneck_block = self.add_sublayer(
'bb_%d_%d' % (block, i),
BottleneckBlock(
num_channels1=num_channels[block]
if i == 0 else num_channels2[block],
num_channels2=num_channels2[block],
num_filters=num_filters[block],
stride=2 if i == 0 and block != 0 else 1,
scales=scales,
shortcut=shortcut,
if_first=block == i == 0,
name=conv_name))
self.block_list.append(bottleneck_block)
shortcut = True
self.pool2d_avg = AdaptiveAvgPool2D(1)
self.pool2d_avg_channels = num_channels[-1] * 2
stdv = 1.0 / math.sqrt(self.pool2d_avg_channels * 1.0)
self.out = Linear(
self.pool2d_avg_channels,
class_num,
weight_attr=ParamAttr(
initializer=Uniform(-stdv, stdv), name="fc_weights"),
bias_attr=ParamAttr(name="fc_offset"))
def forward(self, inputs):
y = self.conv1_1(inputs)
y = self.conv1_2(y)
y = self.conv1_3(y)
y = self.pool2d_max(y)
for block in self.block_list:
y = block(y)
y = self.pool2d_avg(y)
y = paddle.reshape(y, shape=[-1, self.pool2d_avg_channels])
y = self.out(y)
return y
def _load_pretrained(pretrained, model, model_url, use_ssld=False):
if pretrained is False:
pass
elif pretrained is True:
load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
elif isinstance(pretrained, str):
load_dygraph_pretrain(model, pretrained)
else:
raise RuntimeError(
"pretrained type is not available. Please use `string` or `boolean` type."
)
def Res2Net50_vd_26w_4s(pretrained=False, use_ssld=False, **kwargs):
model = Res2Net_vd(layers=50, scales=4, width=26, **kwargs)
_load_pretrained(
pretrained,
model,
MODEL_URLS["Res2Net50_vd_26w_4s"],
use_ssld=use_ssld)
return model
def Res2Net101_vd_26w_4s(pretrained=False, use_ssld=False, **kwargs):
model = Res2Net_vd(layers=101, scales=4, width=26, **kwargs)
_load_pretrained(
pretrained,
model,
MODEL_URLS["Res2Net101_vd_26w_4s"],
use_ssld=use_ssld)
return model
def Res2Net200_vd_26w_4s(pretrained=False, use_ssld=False, **kwargs):
model = Res2Net_vd(layers=200, scales=4, width=26, **kwargs)
_load_pretrained(
pretrained,
model,
MODEL_URLS["Res2Net200_vd_26w_4s"],
use_ssld=use_ssld)
return model

@ -0,0 +1,740 @@
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Code was based on https://github.com/zhanghang1989/ResNeSt
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import paddle
import math
import paddle.nn as nn
import paddle.nn.functional as F
from paddle import ParamAttr
from paddle.nn.initializer import KaimingNormal
from paddle.nn import Conv2D, BatchNorm, Linear, Dropout
from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D
from paddle.regularizer import L2Decay
from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
MODEL_URLS = {
"ResNeSt50_fast_1s1x64d":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeSt50_fast_1s1x64d_pretrained.pdparams",
"ResNeSt50":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeSt50_pretrained.pdparams",
"ResNeSt101":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeSt101_pretrained.pdparams",
}
__all__ = list(MODEL_URLS.keys())
class ConvBNLayer(nn.Layer):
def __init__(self,
num_channels,
num_filters,
filter_size,
stride=1,
dilation=1,
groups=1,
act=None,
name=None):
super(ConvBNLayer, self).__init__()
bn_decay = 0.0
self._conv = Conv2D(
in_channels=num_channels,
out_channels=num_filters,
kernel_size=filter_size,
stride=stride,
padding=(filter_size - 1) // 2,
dilation=dilation,
groups=groups,
weight_attr=ParamAttr(name=name + "_weight"),
bias_attr=False)
self._batch_norm = BatchNorm(
num_filters,
act=act,
param_attr=ParamAttr(
name=name + "_scale", regularizer=L2Decay(bn_decay)),
bias_attr=ParamAttr(
name + "_offset", regularizer=L2Decay(bn_decay)),
moving_mean_name=name + "_mean",
moving_variance_name=name + "_variance")
def forward(self, x):
x = self._conv(x)
x = self._batch_norm(x)
return x
class rSoftmax(nn.Layer):
def __init__(self, radix, cardinality):
super(rSoftmax, self).__init__()
self.radix = radix
self.cardinality = cardinality
def forward(self, x):
cardinality = self.cardinality
radix = self.radix
batch, r, h, w = x.shape
if self.radix > 1:
x = paddle.reshape(
x=x,
shape=[
batch, cardinality, radix,
int(r * h * w / cardinality / radix)
])
x = paddle.transpose(x=x, perm=[0, 2, 1, 3])
x = nn.functional.softmax(x, axis=1)
x = paddle.reshape(x=x, shape=[batch, r * h * w, 1, 1])
else:
x = nn.functional.sigmoid(x)
return x
class SplatConv(nn.Layer):
def __init__(self,
in_channels,
channels,
kernel_size,
stride=1,
padding=0,
dilation=1,
groups=1,
bias=True,
radix=2,
reduction_factor=4,
rectify_avg=False,
name=None):
super(SplatConv, self).__init__()
self.radix = radix
self.conv1 = ConvBNLayer(
num_channels=in_channels,
num_filters=channels * radix,
filter_size=kernel_size,
stride=stride,
groups=groups * radix,
act="relu",
name=name + "_1_weights")
self.avg_pool2d = AdaptiveAvgPool2D(1)
inter_channels = int(max(in_channels * radix // reduction_factor, 32))
# to calc gap
self.conv2 = ConvBNLayer(
num_channels=channels,
num_filters=inter_channels,
filter_size=1,
stride=1,
groups=groups,
act="relu",
name=name + "_2_weights")
# to calc atten
self.conv3 = Conv2D(
in_channels=inter_channels,
out_channels=channels * radix,
kernel_size=1,
stride=1,
padding=0,
groups=groups,
weight_attr=ParamAttr(
name=name + "_weights", initializer=KaimingNormal()),
bias_attr=False)
self.rsoftmax = rSoftmax(radix=radix, cardinality=groups)
def forward(self, x):
x = self.conv1(x)
if self.radix > 1:
splited = paddle.split(x, num_or_sections=self.radix, axis=1)
gap = paddle.add_n(splited)
else:
gap = x
gap = self.avg_pool2d(gap)
gap = self.conv2(gap)
atten = self.conv3(gap)
atten = self.rsoftmax(atten)
if self.radix > 1:
attens = paddle.split(atten, num_or_sections=self.radix, axis=1)
y = paddle.add_n([
paddle.multiply(split, att)
for (att, split) in zip(attens, splited)
])
else:
y = paddle.multiply(x, atten)
return y
class BottleneckBlock(nn.Layer):
def __init__(self,
inplanes,
planes,
stride=1,
radix=1,
cardinality=1,
bottleneck_width=64,
avd=False,
avd_first=False,
dilation=1,
is_first=False,
rectify_avg=False,
last_gamma=False,
avg_down=False,
name=None):
super(BottleneckBlock, self).__init__()
self.inplanes = inplanes
self.planes = planes
self.stride = stride
self.radix = radix
self.cardinality = cardinality
self.avd = avd
self.avd_first = avd_first
self.dilation = dilation
self.is_first = is_first
self.rectify_avg = rectify_avg
self.last_gamma = last_gamma
self.avg_down = avg_down
group_width = int(planes * (bottleneck_width / 64.)) * cardinality
self.conv1 = ConvBNLayer(
num_channels=self.inplanes,
num_filters=group_width,
filter_size=1,
stride=1,
groups=1,
act="relu",
name=name + "_conv1")
if avd and avd_first and (stride > 1 or is_first):
self.avg_pool2d_1 = AvgPool2D(
kernel_size=3, stride=stride, padding=1)
if radix >= 1:
self.conv2 = SplatConv(
in_channels=group_width,
channels=group_width,
kernel_size=3,
stride=1,
padding=dilation,
dilation=dilation,
groups=cardinality,
bias=False,
radix=radix,
rectify_avg=rectify_avg,
name=name + "_splat")
else:
self.conv2 = ConvBNLayer(
num_channels=group_width,
num_filters=group_width,
filter_size=3,
stride=1,
dilation=dilation,
groups=cardinality,
act="relu",
name=name + "_conv2")
if avd and avd_first == False and (stride > 1 or is_first):
self.avg_pool2d_2 = AvgPool2D(
kernel_size=3, stride=stride, padding=1)
self.conv3 = ConvBNLayer(
num_channels=group_width,
num_filters=planes * 4,
filter_size=1,
stride=1,
groups=1,
act=None,
name=name + "_conv3")
if stride != 1 or self.inplanes != self.planes * 4:
if avg_down:
if dilation == 1:
self.avg_pool2d_3 = AvgPool2D(
kernel_size=stride, stride=stride, padding=0)
else:
self.avg_pool2d_3 = AvgPool2D(
kernel_size=1, stride=1, padding=0, ceil_mode=True)
self.conv4 = Conv2D(
in_channels=self.inplanes,
out_channels=planes * 4,
kernel_size=1,
stride=1,
padding=0,
groups=1,
weight_attr=ParamAttr(
name=name + "_weights", initializer=KaimingNormal()),
bias_attr=False)
else:
self.conv4 = Conv2D(
in_channels=self.inplanes,
out_channels=planes * 4,
kernel_size=1,
stride=stride,
padding=0,
groups=1,
weight_attr=ParamAttr(
name=name + "_shortcut_weights",
initializer=KaimingNormal()),
bias_attr=False)
bn_decay = 0.0
self._batch_norm = BatchNorm(
planes * 4,
act=None,
param_attr=ParamAttr(
name=name + "_shortcut_scale",
regularizer=L2Decay(bn_decay)),
bias_attr=ParamAttr(
name + "_shortcut_offset", regularizer=L2Decay(bn_decay)),
moving_mean_name=name + "_shortcut_mean",
moving_variance_name=name + "_shortcut_variance")
def forward(self, x):
short = x
x = self.conv1(x)
if self.avd and self.avd_first and (self.stride > 1 or self.is_first):
x = self.avg_pool2d_1(x)
x = self.conv2(x)
if self.avd and self.avd_first == False and (self.stride > 1 or
self.is_first):
x = self.avg_pool2d_2(x)
x = self.conv3(x)
if self.stride != 1 or self.inplanes != self.planes * 4:
if self.avg_down:
short = self.avg_pool2d_3(short)
short = self.conv4(short)
short = self._batch_norm(short)
y = paddle.add(x=short, y=x)
y = F.relu(y)
return y
class ResNeStLayer(nn.Layer):
def __init__(self,
inplanes,
planes,
blocks,
radix,
cardinality,
bottleneck_width,
avg_down,
avd,
avd_first,
rectify_avg,
last_gamma,
stride=1,
dilation=1,
is_first=True,
name=None):
super(ResNeStLayer, self).__init__()
self.inplanes = inplanes
self.planes = planes
self.blocks = blocks
self.radix = radix
self.cardinality = cardinality
self.bottleneck_width = bottleneck_width
self.avg_down = avg_down
self.avd = avd
self.avd_first = avd_first
self.rectify_avg = rectify_avg
self.last_gamma = last_gamma
self.is_first = is_first
if dilation == 1 or dilation == 2:
bottleneck_func = self.add_sublayer(
name + "_bottleneck_0",
BottleneckBlock(
inplanes=self.inplanes,
planes=planes,
stride=stride,
radix=radix,
cardinality=cardinality,
bottleneck_width=bottleneck_width,
avg_down=self.avg_down,
avd=avd,
avd_first=avd_first,
dilation=1,
is_first=is_first,
rectify_avg=rectify_avg,
last_gamma=last_gamma,
name=name + "_bottleneck_0"))
elif dilation == 4:
bottleneck_func = self.add_sublayer(
name + "_bottleneck_0",
BottleneckBlock(
inplanes=self.inplanes,
planes=planes,
stride=stride,
radix=radix,
cardinality=cardinality,
bottleneck_width=bottleneck_width,
avg_down=self.avg_down,
avd=avd,
avd_first=avd_first,
dilation=2,
is_first=is_first,
rectify_avg=rectify_avg,
last_gamma=last_gamma,
name=name + "_bottleneck_0"))
else:
raise RuntimeError("=>unknown dilation size")
self.inplanes = planes * 4
self.bottleneck_block_list = [bottleneck_func]
for i in range(1, blocks):
curr_name = name + "_bottleneck_" + str(i)
bottleneck_func = self.add_sublayer(
curr_name,
BottleneckBlock(
inplanes=self.inplanes,
planes=planes,
radix=radix,
cardinality=cardinality,
bottleneck_width=bottleneck_width,
avg_down=self.avg_down,
avd=avd,
avd_first=avd_first,
dilation=dilation,
rectify_avg=rectify_avg,
last_gamma=last_gamma,
name=curr_name))
self.bottleneck_block_list.append(bottleneck_func)
def forward(self, x):
for bottleneck_block in self.bottleneck_block_list:
x = bottleneck_block(x)
return x
class ResNeSt(nn.Layer):
def __init__(self,
layers,
radix=1,
groups=1,
bottleneck_width=64,
dilated=False,
dilation=1,
deep_stem=False,
stem_width=64,
avg_down=False,
rectify_avg=False,
avd=False,
avd_first=False,
final_drop=0.0,
last_gamma=False,
class_num=1000):
super(ResNeSt, self).__init__()
self.cardinality = groups
self.bottleneck_width = bottleneck_width
# ResNet-D params
self.inplanes = stem_width * 2 if deep_stem else 64
self.avg_down = avg_down
self.last_gamma = last_gamma
# ResNeSt params
self.radix = radix
self.avd = avd
self.avd_first = avd_first
self.deep_stem = deep_stem
self.stem_width = stem_width
self.layers = layers
self.final_drop = final_drop
self.dilated = dilated
self.dilation = dilation
self.rectify_avg = rectify_avg
if self.deep_stem:
self.stem = nn.Sequential(
("conv1", ConvBNLayer(
num_channels=3,
num_filters=stem_width,
filter_size=3,
stride=2,
act="relu",
name="conv1")), ("conv2", ConvBNLayer(
num_channels=stem_width,
num_filters=stem_width,
filter_size=3,
stride=1,
act="relu",
name="conv2")), ("conv3", ConvBNLayer(
num_channels=stem_width,
num_filters=stem_width * 2,
filter_size=3,
stride=1,
act="relu",
name="conv3")))
else:
self.stem = ConvBNLayer(
num_channels=3,
num_filters=stem_width,
filter_size=7,
stride=2,
act="relu",
name="conv1")
self.max_pool2d = MaxPool2D(kernel_size=3, stride=2, padding=1)
self.layer1 = ResNeStLayer(
inplanes=self.stem_width * 2
if self.deep_stem else self.stem_width,
planes=64,
blocks=self.layers[0],
radix=radix,
cardinality=self.cardinality,
bottleneck_width=bottleneck_width,
avg_down=self.avg_down,
avd=avd,
avd_first=avd_first,
rectify_avg=rectify_avg,
last_gamma=last_gamma,
stride=1,
dilation=1,
is_first=False,
name="layer1")
# return
self.layer2 = ResNeStLayer(
inplanes=256,
planes=128,
blocks=self.layers[1],
radix=radix,
cardinality=self.cardinality,
bottleneck_width=bottleneck_width,
avg_down=self.avg_down,
avd=avd,
avd_first=avd_first,
rectify_avg=rectify_avg,
last_gamma=last_gamma,
stride=2,
name="layer2")
if self.dilated or self.dilation == 4:
self.layer3 = ResNeStLayer(
inplanes=512,
planes=256,
blocks=self.layers[2],
radix=radix,
cardinality=self.cardinality,
bottleneck_width=bottleneck_width,
avg_down=self.avg_down,
avd=avd,
avd_first=avd_first,
rectify_avg=rectify_avg,
last_gamma=last_gamma,
stride=1,
dilation=2,
name="layer3")
self.layer4 = ResNeStLayer(
inplanes=1024,
planes=512,
blocks=self.layers[3],
radix=radix,
cardinality=self.cardinality,
bottleneck_width=bottleneck_width,
avg_down=self.avg_down,
avd=avd,
avd_first=avd_first,
rectify_avg=rectify_avg,
last_gamma=last_gamma,
stride=1,
dilation=4,
name="layer4")
elif self.dilation == 2:
self.layer3 = ResNeStLayer(
inplanes=512,
planes=256,
blocks=self.layers[2],
radix=radix,
cardinality=self.cardinality,
bottleneck_width=bottleneck_width,
avg_down=self.avg_down,
avd=avd,
avd_first=avd_first,
rectify_avg=rectify_avg,
last_gamma=last_gamma,
stride=2,
dilation=1,
name="layer3")
self.layer4 = ResNeStLayer(
inplanes=1024,
planes=512,
blocks=self.layers[3],
radix=radix,
cardinality=self.cardinality,
bottleneck_width=bottleneck_width,
avg_down=self.avg_down,
avd=avd,
avd_first=avd_first,
rectify_avg=rectify_avg,
last_gamma=last_gamma,
stride=1,
dilation=2,
name="layer4")
else:
self.layer3 = ResNeStLayer(
inplanes=512,
planes=256,
blocks=self.layers[2],
radix=radix,
cardinality=self.cardinality,
bottleneck_width=bottleneck_width,
avg_down=self.avg_down,
avd=avd,
avd_first=avd_first,
rectify_avg=rectify_avg,
last_gamma=last_gamma,
stride=2,
name="layer3")
self.layer4 = ResNeStLayer(
inplanes=1024,
planes=512,
blocks=self.layers[3],
radix=radix,
cardinality=self.cardinality,
bottleneck_width=bottleneck_width,
avg_down=self.avg_down,
avd=avd,
avd_first=avd_first,
rectify_avg=rectify_avg,
last_gamma=last_gamma,
stride=2,
name="layer4")
self.pool2d_avg = AdaptiveAvgPool2D(1)
self.out_channels = 2048
stdv = 1.0 / math.sqrt(self.out_channels * 1.0)
self.out = Linear(
self.out_channels,
class_num,
weight_attr=ParamAttr(
initializer=nn.initializer.Uniform(-stdv, stdv),
name="fc_weights"),
bias_attr=ParamAttr(name="fc_offset"))
def forward(self, x):
x = self.stem(x)
x = self.max_pool2d(x)
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)
x = self.pool2d_avg(x)
x = paddle.reshape(x, shape=[-1, self.out_channels])
x = self.out(x)
return x
def _load_pretrained(pretrained, model, model_url, use_ssld=False):
if pretrained is False:
pass
elif pretrained is True:
load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
elif isinstance(pretrained, str):
load_dygraph_pretrain(model, pretrained)
else:
raise RuntimeError(
"pretrained type is not available. Please use `string` or `boolean` type."
)
def ResNeSt50_fast_1s1x64d(pretrained=False, use_ssld=False, **kwargs):
model = ResNeSt(
layers=[3, 4, 6, 3],
radix=1,
groups=1,
bottleneck_width=64,
deep_stem=True,
stem_width=32,
avg_down=True,
avd=True,
avd_first=True,
final_drop=0.0,
**kwargs)
_load_pretrained(
pretrained,
model,
MODEL_URLS["ResNeSt50_fast_1s1x64d"],
use_ssld=use_ssld)
return model
def ResNeSt50(pretrained=False, use_ssld=False, **kwargs):
model = ResNeSt(
layers=[3, 4, 6, 3],
radix=2,
groups=1,
bottleneck_width=64,
deep_stem=True,
stem_width=32,
avg_down=True,
avd=True,
avd_first=False,
final_drop=0.0,
**kwargs)
_load_pretrained(
pretrained, model, MODEL_URLS["ResNeSt50"], use_ssld=use_ssld)
return model
def ResNeSt101(pretrained=False, use_ssld=False, **kwargs):
model = ResNeSt(
layers=[3, 4, 23, 3],
radix=2,
groups=1,
bottleneck_width=64,
deep_stem=True,
stem_width=64,
avg_down=True,
avd=True,
avd_first=False,
final_drop=0.0,
**kwargs)
_load_pretrained(
pretrained, model, MODEL_URLS["ResNeSt101"], use_ssld=use_ssld)
return model

@ -0,0 +1,309 @@
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import paddle
from paddle import ParamAttr
import paddle.nn as nn
import paddle.nn.functional as F
from paddle.nn import Conv2D, BatchNorm, Linear, Dropout
from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D
from paddle.nn.initializer import Uniform
import math
from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
MODEL_URLS = {
"ResNet50_vc":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNet50_vc_pretrained.pdparams",
}
__all__ = list(MODEL_URLS.keys())
class ConvBNLayer(nn.Layer):
def __init__(self,
num_channels,
num_filters,
filter_size,
stride=1,
groups=1,
act=None,
name=None):
super(ConvBNLayer, self).__init__()
self._conv = Conv2D(
in_channels=num_channels,
out_channels=num_filters,
kernel_size=filter_size,
stride=stride,
padding=(filter_size - 1) // 2,
groups=groups,
weight_attr=ParamAttr(name=name + "_weights"),
bias_attr=False)
if name == "conv1":
bn_name = "bn_" + name
else:
bn_name = "bn" + name[3:]
self._batch_norm = BatchNorm(
num_filters,
act=act,
param_attr=ParamAttr(name=bn_name + '_scale'),
bias_attr=ParamAttr(bn_name + '_offset'),
moving_mean_name=bn_name + '_mean',
moving_variance_name=bn_name + '_variance')
def forward(self, inputs):
y = self._conv(inputs)
y = self._batch_norm(y)
return y
class BottleneckBlock(nn.Layer):
def __init__(self,
num_channels,
num_filters,
stride,
shortcut=True,
name=None):
super(BottleneckBlock, self).__init__()
self.conv0 = ConvBNLayer(
num_channels=num_channels,
num_filters=num_filters,
filter_size=1,
act='relu',
name=name + "_branch2a")
self.conv1 = ConvBNLayer(
num_channels=num_filters,
num_filters=num_filters,
filter_size=3,
stride=stride,
act='relu',
name=name + "_branch2b")
self.conv2 = ConvBNLayer(
num_channels=num_filters,
num_filters=num_filters * 4,
filter_size=1,
act=None,
name=name + "_branch2c")
if not shortcut:
self.short = ConvBNLayer(
num_channels=num_channels,
num_filters=num_filters * 4,
filter_size=1,
stride=stride,
name=name + "_branch1")
self.shortcut = shortcut
self._num_channels_out = num_filters * 4
def forward(self, inputs):
y = self.conv0(inputs)
conv1 = self.conv1(y)
conv2 = self.conv2(conv1)
if self.shortcut:
short = inputs
else:
short = self.short(inputs)
y = paddle.add(x=short, y=conv2)
y = F.relu(y)
return y
class BasicBlock(nn.Layer):
def __init__(self,
num_channels,
num_filters,
stride,
shortcut=True,
name=None):
super(BasicBlock, self).__init__()
self.stride = stride
self.conv0 = ConvBNLayer(
num_channels=num_channels,
num_filters=num_filters,
filter_size=3,
stride=stride,
act='relu',
name=name + "_branch2a")
self.conv1 = ConvBNLayer(
num_channels=num_filters,
num_filters=num_filters,
filter_size=3,
act=None,
name=name + "_branch2b")
if not shortcut:
self.short = ConvBNLayer(
num_channels=num_channels,
num_filters=num_filters,
filter_size=1,
stride=stride,
name=name + "_branch1")
self.shortcut = shortcut
def forward(self, inputs):
y = self.conv0(inputs)
conv1 = self.conv1(y)
if self.shortcut:
short = inputs
else:
short = self.short(inputs)
y = paddle.add(x=short, y=conv1)
y = F.relu(y)
return y
class ResNet_vc(nn.Layer):
def __init__(self, layers=50, class_num=1000):
super(ResNet_vc, self).__init__()
self.layers = layers
supported_layers = [18, 34, 50, 101, 152]
assert layers in supported_layers, \
"supported layers are {} but input layer is {}".format(
supported_layers, layers)
if layers == 18:
depth = [2, 2, 2, 2]
elif layers == 34 or layers == 50:
depth = [3, 4, 6, 3]
elif layers == 101:
depth = [3, 4, 23, 3]
elif layers == 152:
depth = [3, 8, 36, 3]
num_channels = [64, 256, 512,
1024] if layers >= 50 else [64, 64, 128, 256]
num_filters = [64, 128, 256, 512]
self.conv1_1 = ConvBNLayer(
num_channels=3,
num_filters=32,
filter_size=3,
stride=2,
act='relu',
name="conv1_1")
self.conv1_2 = ConvBNLayer(
num_channels=32,
num_filters=32,
filter_size=3,
stride=1,
act='relu',
name="conv1_2")
self.conv1_3 = ConvBNLayer(
num_channels=32,
num_filters=64,
filter_size=3,
stride=1,
act='relu',
name="conv1_3")
self.pool2d_max = MaxPool2D(kernel_size=3, stride=2, padding=1)
self.block_list = []
if layers >= 50:
for block in range(len(depth)):
shortcut = False
for i in range(depth[block]):
if layers in [101, 152] and block == 2:
if i == 0:
conv_name = "res" + str(block + 2) + "a"
else:
conv_name = "res" + str(block + 2) + "b" + str(i)
else:
conv_name = "res" + str(block + 2) + chr(97 + i)
bottleneck_block = self.add_sublayer(
'bb_%d_%d' % (block, i),
BottleneckBlock(
num_channels=num_channels[block]
if i == 0 else num_filters[block] * 4,
num_filters=num_filters[block],
stride=2 if i == 0 and block != 0 else 1,
shortcut=shortcut,
name=conv_name))
self.block_list.append(bottleneck_block)
shortcut = True
else:
for block in range(len(depth)):
shortcut = False
for i in range(depth[block]):
conv_name = "res" + str(block + 2) + chr(97 + i)
basic_block = self.add_sublayer(
'bb_%d_%d' % (block, i),
BasicBlock(
num_channels=num_channels[block]
if i == 0 else num_filters[block],
num_filters=num_filters[block],
stride=2 if i == 0 and block != 0 else 1,
shortcut=shortcut,
name=conv_name))
self.block_list.append(basic_block)
shortcut = True
self.pool2d_avg = AdaptiveAvgPool2D(1)
self.pool2d_avg_channels = num_channels[-1] * 2
stdv = 1.0 / math.sqrt(self.pool2d_avg_channels * 1.0)
self.out = Linear(
self.pool2d_avg_channels,
class_num,
weight_attr=ParamAttr(
initializer=Uniform(-stdv, stdv), name="fc_0.w_0"),
bias_attr=ParamAttr(name="fc_0.b_0"))
def forward(self, inputs):
y = self.conv1_1(inputs)
y = self.conv1_2(y)
y = self.conv1_3(y)
y = self.pool2d_max(y)
for block in self.block_list:
y = block(y)
y = self.pool2d_avg(y)
y = paddle.reshape(y, shape=[-1, self.pool2d_avg_channels])
y = self.out(y)
return y
def _load_pretrained(pretrained, model, model_url, use_ssld=False):
if pretrained is False:
pass
elif pretrained is True:
load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
elif isinstance(pretrained, str):
load_dygraph_pretrain(model, pretrained)
else:
raise RuntimeError(
"pretrained type is not available. Please use `string` or `boolean` type."
)
def ResNet50_vc(pretrained=False, use_ssld=False, **kwargs):
model = ResNet_vc(layers=50, **kwargs)
_load_pretrained(
pretrained, model, MODEL_URLS["ResNet50_vc"], use_ssld=use_ssld)
return model

@ -0,0 +1,298 @@
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import paddle
from paddle import ParamAttr
import paddle.nn as nn
import paddle.nn.functional as F
from paddle.nn import Conv2D, BatchNorm, Linear, Dropout
from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D
from paddle.nn.initializer import Uniform
import math
from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
MODEL_URLS = {
"ResNeXt50_32x4d":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt50_32x4d_pretrained.pdparams",
"ResNeXt50_64x4d":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt50_64x4d_pretrained.pdparams",
"ResNeXt101_32x4d":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt101_32x4d_pretrained.pdparams",
"ResNeXt101_64x4d":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt101_64x4d_pretrained.pdparams",
"ResNeXt152_32x4d":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt152_32x4d_pretrained.pdparams",
"ResNeXt152_64x4d":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt152_64x4d_pretrained.pdparams",
}
__all__ = list(MODEL_URLS.keys())
class ConvBNLayer(nn.Layer):
def __init__(self,
num_channels,
num_filters,
filter_size,
stride=1,
groups=1,
act=None,
name=None,
data_format="NCHW"):
super(ConvBNLayer, self).__init__()
self._conv = Conv2D(
in_channels=num_channels,
out_channels=num_filters,
kernel_size=filter_size,
stride=stride,
padding=(filter_size - 1) // 2,
groups=groups,
weight_attr=ParamAttr(name=name + "_weights"),
bias_attr=False,
data_format=data_format)
if name == "conv1":
bn_name = "bn_" + name
else:
bn_name = "bn" + name[3:]
self._batch_norm = BatchNorm(
num_filters,
act=act,
param_attr=ParamAttr(name=bn_name + '_scale'),
bias_attr=ParamAttr(bn_name + '_offset'),
moving_mean_name=bn_name + '_mean',
moving_variance_name=bn_name + '_variance',
data_layout=data_format)
def forward(self, inputs):
y = self._conv(inputs)
y = self._batch_norm(y)
return y
class BottleneckBlock(nn.Layer):
def __init__(self,
num_channels,
num_filters,
stride,
cardinality,
shortcut=True,
name=None,
data_format="NCHW"):
super(BottleneckBlock, self).__init__()
self.conv0 = ConvBNLayer(
num_channels=num_channels,
num_filters=num_filters,
filter_size=1,
act='relu',
name=name + "_branch2a",
data_format=data_format)
self.conv1 = ConvBNLayer(
num_channels=num_filters,
num_filters=num_filters,
filter_size=3,
groups=cardinality,
stride=stride,
act='relu',
name=name + "_branch2b",
data_format=data_format)
self.conv2 = ConvBNLayer(
num_channels=num_filters,
num_filters=num_filters * 2 if cardinality == 32 else num_filters,
filter_size=1,
act=None,
name=name + "_branch2c",
data_format=data_format)
if not shortcut:
self.short = ConvBNLayer(
num_channels=num_channels,
num_filters=num_filters * 2
if cardinality == 32 else num_filters,
filter_size=1,
stride=stride,
name=name + "_branch1",
data_format=data_format)
self.shortcut = shortcut
def forward(self, inputs):
y = self.conv0(inputs)
conv1 = self.conv1(y)
conv2 = self.conv2(conv1)
if self.shortcut:
short = inputs
else:
short = self.short(inputs)
y = paddle.add(x=short, y=conv2)
y = F.relu(y)
return y
class ResNeXt(nn.Layer):
def __init__(self,
layers=50,
class_num=1000,
cardinality=32,
input_image_channel=3,
data_format="NCHW"):
super(ResNeXt, self).__init__()
self.layers = layers
self.data_format = data_format
self.input_image_channel = input_image_channel
self.cardinality = cardinality
supported_layers = [50, 101, 152]
assert layers in supported_layers, \
"supported layers are {} but input layer is {}".format(
supported_layers, layers)
supported_cardinality = [32, 64]
assert cardinality in supported_cardinality, \
"supported cardinality is {} but input cardinality is {}" \
.format(supported_cardinality, cardinality)
if layers == 50:
depth = [3, 4, 6, 3]
elif layers == 101:
depth = [3, 4, 23, 3]
elif layers == 152:
depth = [3, 8, 36, 3]
num_channels = [64, 256, 512, 1024]
num_filters = [128, 256, 512,
1024] if cardinality == 32 else [256, 512, 1024, 2048]
self.conv = ConvBNLayer(
num_channels=self.input_image_channel,
num_filters=64,
filter_size=7,
stride=2,
act='relu',
name="res_conv1",
data_format=self.data_format)
self.pool2d_max = MaxPool2D(
kernel_size=3, stride=2, padding=1, data_format=self.data_format)
self.block_list = []
for block in range(len(depth)):
shortcut = False
for i in range(depth[block]):
if layers in [101, 152] and block == 2:
if i == 0:
conv_name = "res" + str(block + 2) + "a"
else:
conv_name = "res" + str(block + 2) + "b" + str(i)
else:
conv_name = "res" + str(block + 2) + chr(97 + i)
bottleneck_block = self.add_sublayer(
'bb_%d_%d' % (block, i),
BottleneckBlock(
num_channels=num_channels[block] if i == 0 else
num_filters[block] * int(64 // self.cardinality),
num_filters=num_filters[block],
stride=2 if i == 0 and block != 0 else 1,
cardinality=self.cardinality,
shortcut=shortcut,
name=conv_name,
data_format=self.data_format))
self.block_list.append(bottleneck_block)
shortcut = True
self.pool2d_avg = AdaptiveAvgPool2D(1, data_format=self.data_format)
self.pool2d_avg_channels = num_channels[-1] * 2
stdv = 1.0 / math.sqrt(self.pool2d_avg_channels * 1.0)
self.out = Linear(
self.pool2d_avg_channels,
class_num,
weight_attr=ParamAttr(
initializer=Uniform(-stdv, stdv), name="fc_weights"),
bias_attr=ParamAttr(name="fc_offset"))
def forward(self, inputs):
with paddle.static.amp.fp16_guard():
if self.data_format == "NHWC":
inputs = paddle.tensor.transpose(inputs, [0, 2, 3, 1])
inputs.stop_gradient = True
y = self.conv(inputs)
y = self.pool2d_max(y)
for block in self.block_list:
y = block(y)
y = self.pool2d_avg(y)
y = paddle.reshape(y, shape=[-1, self.pool2d_avg_channels])
y = self.out(y)
return y
def _load_pretrained(pretrained, model, model_url, use_ssld=False):
if pretrained is False:
pass
elif pretrained is True:
load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
elif isinstance(pretrained, str):
load_dygraph_pretrain(model, pretrained)
else:
raise RuntimeError(
"pretrained type is not available. Please use `string` or `boolean` type."
)
def ResNeXt50_32x4d(pretrained=False, use_ssld=False, **kwargs):
model = ResNeXt(layers=50, cardinality=32, **kwargs)
_load_pretrained(
pretrained, model, MODEL_URLS["ResNeXt50_32x4d"], use_ssld=use_ssld)
return model
def ResNeXt50_64x4d(pretrained=False, use_ssld=False, **kwargs):
model = ResNeXt(layers=50, cardinality=64, **kwargs)
_load_pretrained(
pretrained, model, MODEL_URLS["ResNeXt50_64x4d"], use_ssld=use_ssld)
return model
def ResNeXt101_32x4d(pretrained=False, use_ssld=False, **kwargs):
model = ResNeXt(layers=101, cardinality=32, **kwargs)
_load_pretrained(
pretrained, model, MODEL_URLS["ResNeXt101_32x4d"], use_ssld=use_ssld)
return model
def ResNeXt101_64x4d(pretrained=False, use_ssld=False, **kwargs):
model = ResNeXt(layers=101, cardinality=64, **kwargs)
_load_pretrained(
pretrained, model, MODEL_URLS["ResNeXt101_64x4d"], use_ssld=use_ssld)
return model
def ResNeXt152_32x4d(pretrained=False, use_ssld=False, **kwargs):
model = ResNeXt(layers=152, cardinality=32, **kwargs)
_load_pretrained(
pretrained, model, MODEL_URLS["ResNeXt152_32x4d"], use_ssld=use_ssld)
return model
def ResNeXt152_64x4d(pretrained=False, use_ssld=False, **kwargs):
model = ResNeXt(layers=152, cardinality=64, **kwargs)
_load_pretrained(
pretrained, model, MODEL_URLS["ResNeXt152_64x4d"], use_ssld=use_ssld)
return model

@ -0,0 +1,490 @@
import paddle
from paddle import ParamAttr
import paddle.nn as nn
import paddle.nn.functional as F
from paddle.nn import Conv2D, BatchNorm, Linear, Dropout
from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D
from paddle.nn.initializer import Uniform
from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
MODEL_URLS = {
"ResNeXt101_32x8d_wsl":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt101_32x8d_wsl_pretrained.pdparams",
"ResNeXt101_32x16d_wsl":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt101_32x16_wsl_pretrained.pdparams",
"ResNeXt101_32x32d_wsl":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt101_32x32d_wsl_pretrained.pdparams",
"ResNeXt101_32x48d_wsl":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt101_32x48d_wsl_pretrained.pdparams",
}
__all__ = list(MODEL_URLS.keys())
class ConvBNLayer(nn.Layer):
def __init__(self,
input_channels,
output_channels,
filter_size,
stride=1,
groups=1,
act=None,
name=None):
super(ConvBNLayer, self).__init__()
if "downsample" in name:
conv_name = name + ".0"
else:
conv_name = name
self._conv = Conv2D(
in_channels=input_channels,
out_channels=output_channels,
kernel_size=filter_size,
stride=stride,
padding=(filter_size - 1) // 2,
groups=groups,
weight_attr=ParamAttr(name=conv_name + ".weight"),
bias_attr=False)
if "downsample" in name:
bn_name = name[:9] + "downsample.1"
else:
if "conv1" == name:
bn_name = "bn" + name[-1]
else:
bn_name = (name[:10] if name[7:9].isdigit() else name[:9]
) + "bn" + name[-1]
self._bn = BatchNorm(
num_channels=output_channels,
act=act,
param_attr=ParamAttr(name=bn_name + ".weight"),
bias_attr=ParamAttr(name=bn_name + ".bias"),
moving_mean_name=bn_name + ".running_mean",
moving_variance_name=bn_name + ".running_var")
def forward(self, inputs):
x = self._conv(inputs)
x = self._bn(x)
return x
class ShortCut(nn.Layer):
def __init__(self, input_channels, output_channels, stride, name=None):
super(ShortCut, self).__init__()
self.input_channels = input_channels
self.output_channels = output_channels
self.stride = stride
if input_channels != output_channels or stride != 1:
self._conv = ConvBNLayer(
input_channels,
output_channels,
filter_size=1,
stride=stride,
name=name)
def forward(self, inputs):
if self.input_channels != self.output_channels or self.stride != 1:
return self._conv(inputs)
return inputs
class BottleneckBlock(nn.Layer):
def __init__(self, input_channels, output_channels, stride, cardinality,
width, name):
super(BottleneckBlock, self).__init__()
self._conv0 = ConvBNLayer(
input_channels,
output_channels,
filter_size=1,
act="relu",
name=name + ".conv1")
self._conv1 = ConvBNLayer(
output_channels,
output_channels,
filter_size=3,
act="relu",
stride=stride,
groups=cardinality,
name=name + ".conv2")
self._conv2 = ConvBNLayer(
output_channels,
output_channels // (width // 8),
filter_size=1,
act=None,
name=name + ".conv3")
self._short = ShortCut(
input_channels,
output_channels // (width // 8),
stride=stride,
name=name + ".downsample")
def forward(self, inputs):
x = self._conv0(inputs)
x = self._conv1(x)
x = self._conv2(x)
y = self._short(inputs)
y = paddle.add(x, y)
y = F.relu(y)
return y
class ResNeXt101WSL(nn.Layer):
def __init__(self, layers=101, cardinality=32, width=48, class_num=1000):
super(ResNeXt101WSL, self).__init__()
self.class_num = class_num
self.layers = layers
self.cardinality = cardinality
self.width = width
self.scale = width // 8
self.depth = [3, 4, 23, 3]
self.base_width = cardinality * width
num_filters = [self.base_width * i
for i in [1, 2, 4, 8]] # [256, 512, 1024, 2048]
self._conv_stem = ConvBNLayer(
3, 64, 7, stride=2, act="relu", name="conv1")
self._pool = MaxPool2D(kernel_size=3, stride=2, padding=1)
self._conv1_0 = BottleneckBlock(
64,
num_filters[0],
stride=1,
cardinality=self.cardinality,
width=self.width,
name="layer1.0")
self._conv1_1 = BottleneckBlock(
num_filters[0] // (width // 8),
num_filters[0],
stride=1,
cardinality=self.cardinality,
width=self.width,
name="layer1.1")
self._conv1_2 = BottleneckBlock(
num_filters[0] // (width // 8),
num_filters[0],
stride=1,
cardinality=self.cardinality,
width=self.width,
name="layer1.2")
self._conv2_0 = BottleneckBlock(
num_filters[0] // (width // 8),
num_filters[1],
stride=2,
cardinality=self.cardinality,
width=self.width,
name="layer2.0")
self._conv2_1 = BottleneckBlock(
num_filters[1] // (width // 8),
num_filters[1],
stride=1,
cardinality=self.cardinality,
width=self.width,
name="layer2.1")
self._conv2_2 = BottleneckBlock(
num_filters[1] // (width // 8),
num_filters[1],
stride=1,
cardinality=self.cardinality,
width=self.width,
name="layer2.2")
self._conv2_3 = BottleneckBlock(
num_filters[1] // (width // 8),
num_filters[1],
stride=1,
cardinality=self.cardinality,
width=self.width,
name="layer2.3")
self._conv3_0 = BottleneckBlock(
num_filters[1] // (width // 8),
num_filters[2],
stride=2,
cardinality=self.cardinality,
width=self.width,
name="layer3.0")
self._conv3_1 = BottleneckBlock(
num_filters[2] // (width // 8),
num_filters[2],
stride=1,
cardinality=self.cardinality,
width=self.width,
name="layer3.1")
self._conv3_2 = BottleneckBlock(
num_filters[2] // (width // 8),
num_filters[2],
stride=1,
cardinality=self.cardinality,
width=self.width,
name="layer3.2")
self._conv3_3 = BottleneckBlock(
num_filters[2] // (width // 8),
num_filters[2],
stride=1,
cardinality=self.cardinality,
width=self.width,
name="layer3.3")
self._conv3_4 = BottleneckBlock(
num_filters[2] // (width // 8),
num_filters[2],
stride=1,
cardinality=self.cardinality,
width=self.width,
name="layer3.4")
self._conv3_5 = BottleneckBlock(
num_filters[2] // (width // 8),
num_filters[2],
stride=1,
cardinality=self.cardinality,
width=self.width,
name="layer3.5")
self._conv3_6 = BottleneckBlock(
num_filters[2] // (width // 8),
num_filters[2],
stride=1,
cardinality=self.cardinality,
width=self.width,
name="layer3.6")
self._conv3_7 = BottleneckBlock(
num_filters[2] // (width // 8),
num_filters[2],
stride=1,
cardinality=self.cardinality,
width=self.width,
name="layer3.7")
self._conv3_8 = BottleneckBlock(
num_filters[2] // (width // 8),
num_filters[2],
stride=1,
cardinality=self.cardinality,
width=self.width,
name="layer3.8")
self._conv3_9 = BottleneckBlock(
num_filters[2] // (width // 8),
num_filters[2],
stride=1,
cardinality=self.cardinality,
width=self.width,
name="layer3.9")
self._conv3_10 = BottleneckBlock(
num_filters[2] // (width // 8),
num_filters[2],
stride=1,
cardinality=self.cardinality,
width=self.width,
name="layer3.10")
self._conv3_11 = BottleneckBlock(
num_filters[2] // (width // 8),
num_filters[2],
stride=1,
cardinality=self.cardinality,
width=self.width,
name="layer3.11")
self._conv3_12 = BottleneckBlock(
num_filters[2] // (width // 8),
num_filters[2],
stride=1,
cardinality=self.cardinality,
width=self.width,
name="layer3.12")
self._conv3_13 = BottleneckBlock(
num_filters[2] // (width // 8),
num_filters[2],
stride=1,
cardinality=self.cardinality,
width=self.width,
name="layer3.13")
self._conv3_14 = BottleneckBlock(
num_filters[2] // (width // 8),
num_filters[2],
stride=1,
cardinality=self.cardinality,
width=self.width,
name="layer3.14")
self._conv3_15 = BottleneckBlock(
num_filters[2] // (width // 8),
num_filters[2],
stride=1,
cardinality=self.cardinality,
width=self.width,
name="layer3.15")
self._conv3_16 = BottleneckBlock(
num_filters[2] // (width // 8),
num_filters[2],
stride=1,
cardinality=self.cardinality,
width=self.width,
name="layer3.16")
self._conv3_17 = BottleneckBlock(
num_filters[2] // (width // 8),
num_filters[2],
stride=1,
cardinality=self.cardinality,
width=self.width,
name="layer3.17")
self._conv3_18 = BottleneckBlock(
num_filters[2] // (width // 8),
num_filters[2],
stride=1,
cardinality=self.cardinality,
width=self.width,
name="layer3.18")
self._conv3_19 = BottleneckBlock(
num_filters[2] // (width // 8),
num_filters[2],
stride=1,
cardinality=self.cardinality,
width=self.width,
name="layer3.19")
self._conv3_20 = BottleneckBlock(
num_filters[2] // (width // 8),
num_filters[2],
stride=1,
cardinality=self.cardinality,
width=self.width,
name="layer3.20")
self._conv3_21 = BottleneckBlock(
num_filters[2] // (width // 8),
num_filters[2],
stride=1,
cardinality=self.cardinality,
width=self.width,
name="layer3.21")
self._conv3_22 = BottleneckBlock(
num_filters[2] // (width // 8),
num_filters[2],
stride=1,
cardinality=self.cardinality,
width=self.width,
name="layer3.22")
self._conv4_0 = BottleneckBlock(
num_filters[2] // (width // 8),
num_filters[3],
stride=2,
cardinality=self.cardinality,
width=self.width,
name="layer4.0")
self._conv4_1 = BottleneckBlock(
num_filters[3] // (width // 8),
num_filters[3],
stride=1,
cardinality=self.cardinality,
width=self.width,
name="layer4.1")
self._conv4_2 = BottleneckBlock(
num_filters[3] // (width // 8),
num_filters[3],
stride=1,
cardinality=self.cardinality,
width=self.width,
name="layer4.2")
self._avg_pool = AdaptiveAvgPool2D(1)
self._out = Linear(
num_filters[3] // (width // 8),
class_num,
weight_attr=ParamAttr(name="fc.weight"),
bias_attr=ParamAttr(name="fc.bias"))
def forward(self, inputs):
x = self._conv_stem(inputs)
x = self._pool(x)
x = self._conv1_0(x)
x = self._conv1_1(x)
x = self._conv1_2(x)
x = self._conv2_0(x)
x = self._conv2_1(x)
x = self._conv2_2(x)
x = self._conv2_3(x)
x = self._conv3_0(x)
x = self._conv3_1(x)
x = self._conv3_2(x)
x = self._conv3_3(x)
x = self._conv3_4(x)
x = self._conv3_5(x)
x = self._conv3_6(x)
x = self._conv3_7(x)
x = self._conv3_8(x)
x = self._conv3_9(x)
x = self._conv3_10(x)
x = self._conv3_11(x)
x = self._conv3_12(x)
x = self._conv3_13(x)
x = self._conv3_14(x)
x = self._conv3_15(x)
x = self._conv3_16(x)
x = self._conv3_17(x)
x = self._conv3_18(x)
x = self._conv3_19(x)
x = self._conv3_20(x)
x = self._conv3_21(x)
x = self._conv3_22(x)
x = self._conv4_0(x)
x = self._conv4_1(x)
x = self._conv4_2(x)
x = self._avg_pool(x)
x = paddle.squeeze(x, axis=[2, 3])
x = self._out(x)
return x
def _load_pretrained(pretrained, model, model_url, use_ssld=False):
if pretrained is False:
pass
elif pretrained is True:
load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
elif isinstance(pretrained, str):
load_dygraph_pretrain(model, pretrained)
else:
raise RuntimeError(
"pretrained type is not available. Please use `string` or `boolean` type."
)
def ResNeXt101_32x8d_wsl(pretrained=False, use_ssld=False, **kwargs):
model = ResNeXt101WSL(cardinality=32, width=8, **kwargs)
_load_pretrained(
pretrained,
model,
MODEL_URLS["ResNeXt101_32x8d_wsl"],
use_ssld=use_ssld)
return model
def ResNeXt101_32x16d_wsl(pretrained=False, use_ssld=False, **kwargs):
model = ResNeXt101WSL(cardinality=32, width=16, **kwargs)
_load_pretrained(
pretrained,
model,
MODEL_URLS["ResNeXt101_32x16d_wsl"],
use_ssld=use_ssld)
return model
def ResNeXt101_32x32d_wsl(pretrained=False, use_ssld=False, **kwargs):
model = ResNeXt101WSL(cardinality=32, width=32, **kwargs)
_load_pretrained(
pretrained,
model,
MODEL_URLS["ResNeXt101_32x32d_wsl"],
use_ssld=use_ssld)
return model
def ResNeXt101_32x48d_wsl(pretrained=False, use_ssld=False, **kwargs):
model = ResNeXt101WSL(cardinality=32, width=48, **kwargs)
_load_pretrained(
pretrained,
model,
MODEL_URLS["ResNeXt101_32x48d_wsl"],
use_ssld=use_ssld)
return model

@ -0,0 +1,317 @@
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import paddle
from paddle import ParamAttr
import paddle.nn as nn
import paddle.nn.functional as F
from paddle.nn import Conv2D, BatchNorm, Linear, Dropout
from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D
from paddle.nn.initializer import Uniform
import math
from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
MODEL_URLS = {
"ResNeXt50_vd_32x4d":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt50_vd_32x4d_pretrained.pdparams",
"ResNeXt50_vd_64x4d":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt50_vd_64x4d_pretrained.pdparams",
"ResNeXt101_vd_32x4d":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt101_vd_32x4d_pretrained.pdparams",
"ResNeXt101_vd_64x4d":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt101_vd_64x4d_pretrained.pdparams",
"ResNeXt152_vd_32x4d":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt152_vd_32x4d_pretrained.pdparams",
"ResNeXt152_vd_64x4d":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt152_vd_64x4d_pretrained.pdparams",
}
__all__ = list(MODEL_URLS.keys())
class ConvBNLayer(nn.Layer):
def __init__(
self,
num_channels,
num_filters,
filter_size,
stride=1,
groups=1,
is_vd_mode=False,
act=None,
name=None, ):
super(ConvBNLayer, self).__init__()
self.is_vd_mode = is_vd_mode
self._pool2d_avg = AvgPool2D(
kernel_size=2, stride=2, padding=0, ceil_mode=True)
self._conv = Conv2D(
in_channels=num_channels,
out_channels=num_filters,
kernel_size=filter_size,
stride=stride,
padding=(filter_size - 1) // 2,
groups=groups,
weight_attr=ParamAttr(name=name + "_weights"),
bias_attr=False)
if name == "conv1":
bn_name = "bn_" + name
else:
bn_name = "bn" + name[3:]
self._batch_norm = BatchNorm(
num_filters,
act=act,
param_attr=ParamAttr(name=bn_name + '_scale'),
bias_attr=ParamAttr(bn_name + '_offset'),
moving_mean_name=bn_name + '_mean',
moving_variance_name=bn_name + '_variance')
def forward(self, inputs):
if self.is_vd_mode:
inputs = self._pool2d_avg(inputs)
y = self._conv(inputs)
y = self._batch_norm(y)
return y
class BottleneckBlock(nn.Layer):
def __init__(self,
num_channels,
num_filters,
stride,
cardinality,
shortcut=True,
if_first=False,
name=None):
super(BottleneckBlock, self).__init__()
self.conv0 = ConvBNLayer(
num_channels=num_channels,
num_filters=num_filters,
filter_size=1,
act='relu',
name=name + "_branch2a")
self.conv1 = ConvBNLayer(
num_channels=num_filters,
num_filters=num_filters,
filter_size=3,
groups=cardinality,
stride=stride,
act='relu',
name=name + "_branch2b")
self.conv2 = ConvBNLayer(
num_channels=num_filters,
num_filters=num_filters * 2 if cardinality == 32 else num_filters,
filter_size=1,
act=None,
name=name + "_branch2c")
if not shortcut:
self.short = ConvBNLayer(
num_channels=num_channels,
num_filters=num_filters * 2
if cardinality == 32 else num_filters,
filter_size=1,
stride=1,
is_vd_mode=False if if_first else True,
name=name + "_branch1")
self.shortcut = shortcut
def forward(self, inputs):
y = self.conv0(inputs)
conv1 = self.conv1(y)
conv2 = self.conv2(conv1)
if self.shortcut:
short = inputs
else:
short = self.short(inputs)
y = paddle.add(x=short, y=conv2)
y = F.relu(y)
return y
class ResNeXt(nn.Layer):
def __init__(self, layers=50, class_num=1000, cardinality=32):
super(ResNeXt, self).__init__()
self.layers = layers
self.cardinality = cardinality
supported_layers = [50, 101, 152]
assert layers in supported_layers, \
"supported layers are {} but input layer is {}".format(
supported_layers, layers)
supported_cardinality = [32, 64]
assert cardinality in supported_cardinality, \
"supported cardinality is {} but input cardinality is {}" \
.format(supported_cardinality, cardinality)
if layers == 50:
depth = [3, 4, 6, 3]
elif layers == 101:
depth = [3, 4, 23, 3]
elif layers == 152:
depth = [3, 8, 36, 3]
num_channels = [64, 256, 512, 1024]
num_filters = [128, 256, 512,
1024] if cardinality == 32 else [256, 512, 1024, 2048]
self.conv1_1 = ConvBNLayer(
num_channels=3,
num_filters=32,
filter_size=3,
stride=2,
act='relu',
name="conv1_1")
self.conv1_2 = ConvBNLayer(
num_channels=32,
num_filters=32,
filter_size=3,
stride=1,
act='relu',
name="conv1_2")
self.conv1_3 = ConvBNLayer(
num_channels=32,
num_filters=64,
filter_size=3,
stride=1,
act='relu',
name="conv1_3")
self.pool2d_max = MaxPool2D(kernel_size=3, stride=2, padding=1)
self.block_list = []
for block in range(len(depth)):
shortcut = False
for i in range(depth[block]):
if layers in [101, 152] and block == 2:
if i == 0:
conv_name = "res" + str(block + 2) + "a"
else:
conv_name = "res" + str(block + 2) + "b" + str(i)
else:
conv_name = "res" + str(block + 2) + chr(97 + i)
bottleneck_block = self.add_sublayer(
'bb_%d_%d' % (block, i),
BottleneckBlock(
num_channels=num_channels[block] if i == 0 else
num_filters[block] * int(64 // self.cardinality),
num_filters=num_filters[block],
stride=2 if i == 0 and block != 0 else 1,
cardinality=self.cardinality,
shortcut=shortcut,
if_first=block == i == 0,
name=conv_name))
self.block_list.append(bottleneck_block)
shortcut = True
self.pool2d_avg = AdaptiveAvgPool2D(1)
self.pool2d_avg_channels = num_channels[-1] * 2
stdv = 1.0 / math.sqrt(self.pool2d_avg_channels * 1.0)
self.out = Linear(
self.pool2d_avg_channels,
class_num,
weight_attr=ParamAttr(
initializer=Uniform(-stdv, stdv), name="fc_weights"),
bias_attr=ParamAttr(name="fc_offset"))
def forward(self, inputs):
y = self.conv1_1(inputs)
y = self.conv1_2(y)
y = self.conv1_3(y)
y = self.pool2d_max(y)
for block in self.block_list:
y = block(y)
y = self.pool2d_avg(y)
y = paddle.reshape(y, shape=[-1, self.pool2d_avg_channels])
y = self.out(y)
return y
def _load_pretrained(pretrained, model, model_url, use_ssld=False):
if pretrained is False:
pass
elif pretrained is True:
load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
elif isinstance(pretrained, str):
load_dygraph_pretrain(model, pretrained)
else:
raise RuntimeError(
"pretrained type is not available. Please use `string` or `boolean` type."
)
def ResNeXt50_vd_32x4d(pretrained=False, use_ssld=False, **kwargs):
model = ResNeXt(layers=50, cardinality=32, **kwargs)
_load_pretrained(
pretrained, model, MODEL_URLS["ResNeXt50_vd_32x4d"], use_ssld=use_ssld)
return model
def ResNeXt50_vd_64x4d(pretrained=False, use_ssld=False, **kwargs):
model = ResNeXt(layers=50, cardinality=64, **kwargs)
_load_pretrained(
pretrained, model, MODEL_URLS["ResNeXt50_vd_64x4d"], use_ssld=use_ssld)
return model
def ResNeXt101_vd_32x4d(pretrained=False, use_ssld=False, **kwargs):
model = ResNeXt(layers=101, cardinality=32, **kwargs)
_load_pretrained(
pretrained,
model,
MODEL_URLS["ResNeXt101_vd_32x4d"],
use_ssld=use_ssld)
return model
def ResNeXt101_vd_64x4d(pretrained=False, use_ssld=False, **kwargs):
model = ResNeXt(layers=101, cardinality=64, **kwargs)
_load_pretrained(
pretrained,
model,
MODEL_URLS["ResNeXt101_vd_64x4d"],
use_ssld=use_ssld)
return model
def ResNeXt152_vd_32x4d(pretrained=False, use_ssld=False, **kwargs):
model = ResNeXt(layers=152, cardinality=32, **kwargs)
_load_pretrained(
pretrained,
model,
MODEL_URLS["ResNeXt152_vd_32x4d"],
use_ssld=use_ssld)
return model
def ResNeXt152_vd_64x4d(pretrained=False, use_ssld=False, **kwargs):
model = ResNeXt(layers=152, cardinality=64, **kwargs)
_load_pretrained(
pretrained,
model,
MODEL_URLS["ResNeXt152_vd_64x4d"],
use_ssld=use_ssld)
return model

@ -0,0 +1,281 @@
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import paddle
from paddle import ParamAttr
import paddle.nn as nn
from math import ceil
from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
MODEL_URLS = {
"ReXNet_1_0":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ReXNet_1_0_pretrained.pdparams",
"ReXNet_1_3":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ReXNet_1_3_pretrained.pdparams",
"ReXNet_1_5":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ReXNet_1_5_pretrained.pdparams",
"ReXNet_2_0":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ReXNet_2_0_pretrained.pdparams",
"ReXNet_3_0":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ReXNet_3_0_pretrained.pdparams",
}
__all__ = list(MODEL_URLS.keys())
def conv_bn_act(out,
in_channels,
channels,
kernel=1,
stride=1,
pad=0,
num_group=1,
active=True,
relu6=False):
out.append(
nn.Conv2D(
in_channels,
channels,
kernel,
stride,
pad,
groups=num_group,
bias_attr=False))
out.append(nn.BatchNorm2D(channels))
if active:
out.append(nn.ReLU6() if relu6 else nn.ReLU())
def conv_bn_swish(out,
in_channels,
channels,
kernel=1,
stride=1,
pad=0,
num_group=1):
out.append(
nn.Conv2D(
in_channels,
channels,
kernel,
stride,
pad,
groups=num_group,
bias_attr=False))
out.append(nn.BatchNorm2D(channels))
out.append(nn.Swish())
class SE(nn.Layer):
def __init__(self, in_channels, channels, se_ratio=12):
super(SE, self).__init__()
self.avg_pool = nn.AdaptiveAvgPool2D(1)
self.fc = nn.Sequential(
nn.Conv2D(
in_channels, channels // se_ratio, kernel_size=1, padding=0),
nn.BatchNorm2D(channels // se_ratio),
nn.ReLU(),
nn.Conv2D(
channels // se_ratio, channels, kernel_size=1, padding=0),
nn.Sigmoid())
def forward(self, x):
y = self.avg_pool(x)
y = self.fc(y)
return x * y
class LinearBottleneck(nn.Layer):
def __init__(self,
in_channels,
channels,
t,
stride,
use_se=True,
se_ratio=12,
**kwargs):
super(LinearBottleneck, self).__init__(**kwargs)
self.use_shortcut = stride == 1 and in_channels <= channels
self.in_channels = in_channels
self.out_channels = channels
out = []
if t != 1:
dw_channels = in_channels * t
conv_bn_swish(out, in_channels=in_channels, channels=dw_channels)
else:
dw_channels = in_channels
conv_bn_act(
out,
in_channels=dw_channels,
channels=dw_channels,
kernel=3,
stride=stride,
pad=1,
num_group=dw_channels,
active=False)
if use_se:
out.append(SE(dw_channels, dw_channels, se_ratio))
out.append(nn.ReLU6())
conv_bn_act(
out,
in_channels=dw_channels,
channels=channels,
active=False,
relu6=True)
self.out = nn.Sequential(*out)
def forward(self, x):
out = self.out(x)
if self.use_shortcut:
out[:, 0:self.in_channels] += x
return out
class ReXNetV1(nn.Layer):
def __init__(self,
input_ch=16,
final_ch=180,
width_mult=1.0,
depth_mult=1.0,
class_num=1000,
use_se=True,
se_ratio=12,
dropout_ratio=0.2,
bn_momentum=0.9):
super(ReXNetV1, self).__init__()
layers = [1, 2, 2, 3, 3, 5]
strides = [1, 2, 2, 2, 1, 2]
use_ses = [False, False, True, True, True, True]
layers = [ceil(element * depth_mult) for element in layers]
strides = sum([[element] + [1] * (layers[idx] - 1)
for idx, element in enumerate(strides)], [])
if use_se:
use_ses = sum([[element] * layers[idx]
for idx, element in enumerate(use_ses)], [])
else:
use_ses = [False] * sum(layers[:])
ts = [1] * layers[0] + [6] * sum(layers[1:])
self.depth = sum(layers[:]) * 3
stem_channel = 32 / width_mult if width_mult < 1.0 else 32
inplanes = input_ch / width_mult if width_mult < 1.0 else input_ch
features = []
in_channels_group = []
channels_group = []
# The following channel configuration is a simple instance to make each layer become an expand layer.
for i in range(self.depth // 3):
if i == 0:
in_channels_group.append(int(round(stem_channel * width_mult)))
channels_group.append(int(round(inplanes * width_mult)))
else:
in_channels_group.append(int(round(inplanes * width_mult)))
inplanes += final_ch / (self.depth // 3 * 1.0)
channels_group.append(int(round(inplanes * width_mult)))
conv_bn_swish(
features,
3,
int(round(stem_channel * width_mult)),
kernel=3,
stride=2,
pad=1)
for block_idx, (in_c, c, t, s, se) in enumerate(
zip(in_channels_group, channels_group, ts, strides, use_ses)):
features.append(
LinearBottleneck(
in_channels=in_c,
channels=c,
t=t,
stride=s,
use_se=se,
se_ratio=se_ratio))
pen_channels = int(1280 * width_mult)
conv_bn_swish(features, c, pen_channels)
features.append(nn.AdaptiveAvgPool2D(1))
self.features = nn.Sequential(*features)
self.output = nn.Sequential(
nn.Dropout(dropout_ratio),
nn.Conv2D(
pen_channels, class_num, 1, bias_attr=True))
def forward(self, x):
x = self.features(x)
x = self.output(x).squeeze(axis=-1).squeeze(axis=-1)
return x
def _load_pretrained(pretrained, model, model_url, use_ssld=False):
if pretrained is False:
pass
elif pretrained is True:
load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
elif isinstance(pretrained, str):
load_dygraph_pretrain(model, pretrained)
else:
raise RuntimeError(
"pretrained type is not available. Please use `string` or `boolean` type."
)
def ReXNet_1_0(pretrained=False, use_ssld=False, **kwargs):
model = ReXNetV1(width_mult=1.0, **kwargs)
_load_pretrained(
pretrained, model, MODEL_URLS["ReXNet_1_0"], use_ssld=use_ssld)
return model
def ReXNet_1_3(pretrained=False, use_ssld=False, **kwargs):
model = ReXNetV1(width_mult=1.3, **kwargs)
_load_pretrained(
pretrained, model, MODEL_URLS["ReXNet_1_3"], use_ssld=use_ssld)
return model
def ReXNet_1_5(pretrained=False, use_ssld=False, **kwargs):
model = ReXNetV1(width_mult=1.5, **kwargs)
_load_pretrained(
pretrained, model, MODEL_URLS["ReXNet_1_5"], use_ssld=use_ssld)
return model
def ReXNet_2_0(pretrained=False, use_ssld=False, **kwargs):
model = ReXNetV1(width_mult=2.0, **kwargs)
_load_pretrained(
pretrained, model, MODEL_URLS["ReXNet_2_0"], use_ssld=use_ssld)
return model
def ReXNet_3_0(pretrained=False, use_ssld=False, **kwargs):
model = ReXNetV1(width_mult=3.0, **kwargs)
_load_pretrained(
pretrained, model, MODEL_URLS["ReXNet_3_0"], use_ssld=use_ssld)
return model

@ -0,0 +1,390 @@
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import paddle
from paddle import ParamAttr
import paddle.nn as nn
import paddle.nn.functional as F
from paddle.nn import Conv2D, BatchNorm, Linear, Dropout
from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D
from paddle.nn.initializer import Uniform
import math
from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
MODEL_URLS = {
"SE_ResNet18_vd":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SE_ResNet18_vd_pretrained.pdparams",
"SE_ResNet34_vd":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SE_ResNet34_vd_pretrained.pdparams",
"SE_ResNet50_vd":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SE_ResNet50_vd_pretrained.pdparams",
}
__all__ = list(MODEL_URLS.keys())
class ConvBNLayer(nn.Layer):
def __init__(
self,
num_channels,
num_filters,
filter_size,
stride=1,
groups=1,
is_vd_mode=False,
act=None,
name=None, ):
super(ConvBNLayer, self).__init__()
self.is_vd_mode = is_vd_mode
self._pool2d_avg = AvgPool2D(
kernel_size=2, stride=2, padding=0, ceil_mode=True)
self._conv = Conv2D(
in_channels=num_channels,
out_channels=num_filters,
kernel_size=filter_size,
stride=stride,
padding=(filter_size - 1) // 2,
groups=groups,
weight_attr=ParamAttr(name=name + "_weights"),
bias_attr=False)
if name == "conv1":
bn_name = "bn_" + name
else:
bn_name = "bn" + name[3:]
self._batch_norm = BatchNorm(
num_filters,
act=act,
param_attr=ParamAttr(name=bn_name + '_scale'),
bias_attr=ParamAttr(bn_name + '_offset'),
moving_mean_name=bn_name + '_mean',
moving_variance_name=bn_name + '_variance')
def forward(self, inputs):
if self.is_vd_mode:
inputs = self._pool2d_avg(inputs)
y = self._conv(inputs)
y = self._batch_norm(y)
return y
class BottleneckBlock(nn.Layer):
def __init__(self,
num_channels,
num_filters,
stride,
shortcut=True,
if_first=False,
reduction_ratio=16,
name=None):
super(BottleneckBlock, self).__init__()
self.conv0 = ConvBNLayer(
num_channels=num_channels,
num_filters=num_filters,
filter_size=1,
act='relu',
name=name + "_branch2a")
self.conv1 = ConvBNLayer(
num_channels=num_filters,
num_filters=num_filters,
filter_size=3,
stride=stride,
act='relu',
name=name + "_branch2b")
self.conv2 = ConvBNLayer(
num_channels=num_filters,
num_filters=num_filters * 4,
filter_size=1,
act=None,
name=name + "_branch2c")
self.scale = SELayer(
num_channels=num_filters * 4,
num_filters=num_filters * 4,
reduction_ratio=reduction_ratio,
name='fc_' + name)
if not shortcut:
self.short = ConvBNLayer(
num_channels=num_channels,
num_filters=num_filters * 4,
filter_size=1,
stride=1,
is_vd_mode=False if if_first else True,
name=name + "_branch1")
self.shortcut = shortcut
def forward(self, inputs):
y = self.conv0(inputs)
conv1 = self.conv1(y)
conv2 = self.conv2(conv1)
scale = self.scale(conv2)
if self.shortcut:
short = inputs
else:
short = self.short(inputs)
y = paddle.add(x=short, y=scale)
y = F.relu(y)
return y
class BasicBlock(nn.Layer):
def __init__(self,
num_channels,
num_filters,
stride,
shortcut=True,
if_first=False,
reduction_ratio=16,
name=None):
super(BasicBlock, self).__init__()
self.stride = stride
self.conv0 = ConvBNLayer(
num_channels=num_channels,
num_filters=num_filters,
filter_size=3,
stride=stride,
act='relu',
name=name + "_branch2a")
self.conv1 = ConvBNLayer(
num_channels=num_filters,
num_filters=num_filters,
filter_size=3,
act=None,
name=name + "_branch2b")
self.scale = SELayer(
num_channels=num_filters,
num_filters=num_filters,
reduction_ratio=reduction_ratio,
name='fc_' + name)
if not shortcut:
self.short = ConvBNLayer(
num_channels=num_channels,
num_filters=num_filters,
filter_size=1,
stride=1,
is_vd_mode=False if if_first else True,
name=name + "_branch1")
self.shortcut = shortcut
def forward(self, inputs):
y = self.conv0(inputs)
conv1 = self.conv1(y)
scale = self.scale(conv1)
if self.shortcut:
short = inputs
else:
short = self.short(inputs)
y = paddle.add(x=short, y=scale)
y = F.relu(y)
return y
class SELayer(nn.Layer):
def __init__(self, num_channels, num_filters, reduction_ratio, name=None):
super(SELayer, self).__init__()
self.pool2d_gap = AdaptiveAvgPool2D(1)
self._num_channels = num_channels
med_ch = int(num_channels / reduction_ratio)
stdv = 1.0 / math.sqrt(num_channels * 1.0)
self.squeeze = Linear(
num_channels,
med_ch,
weight_attr=ParamAttr(
initializer=Uniform(-stdv, stdv), name=name + "_sqz_weights"),
bias_attr=ParamAttr(name=name + '_sqz_offset'))
stdv = 1.0 / math.sqrt(med_ch * 1.0)
self.excitation = Linear(
med_ch,
num_filters,
weight_attr=ParamAttr(
initializer=Uniform(-stdv, stdv), name=name + "_exc_weights"),
bias_attr=ParamAttr(name=name + '_exc_offset'))
def forward(self, input):
pool = self.pool2d_gap(input)
pool = paddle.squeeze(pool, axis=[2, 3])
squeeze = self.squeeze(pool)
squeeze = F.relu(squeeze)
excitation = self.excitation(squeeze)
excitation = F.sigmoid(excitation)
excitation = paddle.unsqueeze(excitation, axis=[2, 3])
out = input * excitation
return out
class SE_ResNet_vd(nn.Layer):
def __init__(self, layers=50, class_num=1000):
super(SE_ResNet_vd, self).__init__()
self.layers = layers
supported_layers = [18, 34, 50, 101, 152, 200]
assert layers in supported_layers, \
"supported layers are {} but input layer is {}".format(
supported_layers, layers)
if layers == 18:
depth = [2, 2, 2, 2]
elif layers == 34 or layers == 50:
depth = [3, 4, 6, 3]
elif layers == 101:
depth = [3, 4, 23, 3]
elif layers == 152:
depth = [3, 8, 36, 3]
elif layers == 200:
depth = [3, 12, 48, 3]
num_channels = [64, 256, 512,
1024] if layers >= 50 else [64, 64, 128, 256]
num_filters = [64, 128, 256, 512]
self.conv1_1 = ConvBNLayer(
num_channels=3,
num_filters=32,
filter_size=3,
stride=2,
act='relu',
name="conv1_1")
self.conv1_2 = ConvBNLayer(
num_channels=32,
num_filters=32,
filter_size=3,
stride=1,
act='relu',
name="conv1_2")
self.conv1_3 = ConvBNLayer(
num_channels=32,
num_filters=64,
filter_size=3,
stride=1,
act='relu',
name="conv1_3")
self.pool2d_max = MaxPool2D(kernel_size=3, stride=2, padding=1)
self.block_list = []
if layers >= 50:
for block in range(len(depth)):
shortcut = False
for i in range(depth[block]):
if layers in [101, 152] and block == 2:
if i == 0:
conv_name = "res" + str(block + 2) + "a"
else:
conv_name = "res" + str(block + 2) + "b" + str(i)
else:
conv_name = "res" + str(block + 2) + chr(97 + i)
bottleneck_block = self.add_sublayer(
'bb_%d_%d' % (block, i),
BottleneckBlock(
num_channels=num_channels[block]
if i == 0 else num_filters[block] * 4,
num_filters=num_filters[block],
stride=2 if i == 0 and block != 0 else 1,
shortcut=shortcut,
if_first=block == i == 0,
name=conv_name))
self.block_list.append(bottleneck_block)
shortcut = True
else:
for block in range(len(depth)):
shortcut = False
for i in range(depth[block]):
conv_name = "res" + str(block + 2) + chr(97 + i)
basic_block = self.add_sublayer(
'bb_%d_%d' % (block, i),
BasicBlock(
num_channels=num_channels[block]
if i == 0 else num_filters[block],
num_filters=num_filters[block],
stride=2 if i == 0 and block != 0 else 1,
shortcut=shortcut,
if_first=block == i == 0,
name=conv_name))
self.block_list.append(basic_block)
shortcut = True
self.pool2d_avg = AdaptiveAvgPool2D(1)
self.pool2d_avg_channels = num_channels[-1] * 2
stdv = 1.0 / math.sqrt(self.pool2d_avg_channels * 1.0)
self.out = Linear(
self.pool2d_avg_channels,
class_num,
weight_attr=ParamAttr(
initializer=Uniform(-stdv, stdv), name="fc6_weights"),
bias_attr=ParamAttr(name="fc6_offset"))
def forward(self, inputs):
y = self.conv1_1(inputs)
y = self.conv1_2(y)
y = self.conv1_3(y)
y = self.pool2d_max(y)
for block in self.block_list:
y = block(y)
y = self.pool2d_avg(y)
y = paddle.reshape(y, shape=[-1, self.pool2d_avg_channels])
y = self.out(y)
return y
def _load_pretrained(pretrained, model, model_url, use_ssld=False):
if pretrained is False:
pass
elif pretrained is True:
load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
elif isinstance(pretrained, str):
load_dygraph_pretrain(model, pretrained)
else:
raise RuntimeError(
"pretrained type is not available. Please use `string` or `boolean` type."
)
def SE_ResNet18_vd(pretrained=False, use_ssld=False, **kwargs):
model = SE_ResNet_vd(layers=18, **kwargs)
_load_pretrained(
pretrained, model, MODEL_URLS["SE_ResNet18_vd"], use_ssld=use_ssld)
return model
def SE_ResNet34_vd(pretrained=False, use_ssld=False, **kwargs):
model = SE_ResNet_vd(layers=34, **kwargs)
_load_pretrained(
pretrained, model, MODEL_URLS["SE_ResNet34_vd"], use_ssld=use_ssld)
return model
def SE_ResNet50_vd(pretrained=False, use_ssld=False, **kwargs):
model = SE_ResNet_vd(layers=50, **kwargs)
_load_pretrained(
pretrained, model, MODEL_URLS["SE_ResNet50_vd"], use_ssld=use_ssld)
return model

@ -0,0 +1,364 @@
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import paddle
from paddle import ParamAttr
import paddle.nn as nn
import paddle.nn.functional as F
from paddle.nn import Conv2D, BatchNorm, Linear, Dropout
from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D
from paddle.nn.initializer import Uniform
import math
from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
MODEL_URLS = {
"SE_ResNeXt50_32x4d":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SE_ResNeXt50_32x4d_pretrained.pdparams",
"SE_ResNeXt101_32x4d":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SE_ResNeXt101_32x4d_pretrained.pdparams",
"SE_ResNeXt152_64x4d":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SE_ResNeXt152_64x4d_pretrained.pdparams",
}
__all__ = list(MODEL_URLS.keys())
class ConvBNLayer(nn.Layer):
def __init__(self,
num_channels,
num_filters,
filter_size,
stride=1,
groups=1,
act=None,
name=None,
data_format='NCHW'):
super(ConvBNLayer, self).__init__()
self._conv = Conv2D(
in_channels=num_channels,
out_channels=num_filters,
kernel_size=filter_size,
stride=stride,
padding=(filter_size - 1) // 2,
groups=groups,
weight_attr=ParamAttr(name=name + "_weights"),
bias_attr=False,
data_format=data_format)
bn_name = name + '_bn'
self._batch_norm = BatchNorm(
num_filters,
act=act,
param_attr=ParamAttr(name=bn_name + '_scale'),
bias_attr=ParamAttr(bn_name + '_offset'),
moving_mean_name=bn_name + '_mean',
moving_variance_name=bn_name + '_variance',
data_layout=data_format)
def forward(self, inputs):
y = self._conv(inputs)
y = self._batch_norm(y)
return y
class BottleneckBlock(nn.Layer):
def __init__(self,
num_channels,
num_filters,
stride,
cardinality,
reduction_ratio,
shortcut=True,
if_first=False,
name=None,
data_format="NCHW"):
super(BottleneckBlock, self).__init__()
self.conv0 = ConvBNLayer(
num_channels=num_channels,
num_filters=num_filters,
filter_size=1,
act='relu',
name='conv' + name + '_x1',
data_format=data_format)
self.conv1 = ConvBNLayer(
num_channels=num_filters,
num_filters=num_filters,
filter_size=3,
groups=cardinality,
stride=stride,
act='relu',
name='conv' + name + '_x2',
data_format=data_format)
self.conv2 = ConvBNLayer(
num_channels=num_filters,
num_filters=num_filters * 2 if cardinality == 32 else num_filters,
filter_size=1,
act=None,
name='conv' + name + '_x3',
data_format=data_format)
self.scale = SELayer(
num_channels=num_filters * 2 if cardinality == 32 else num_filters,
num_filters=num_filters * 2 if cardinality == 32 else num_filters,
reduction_ratio=reduction_ratio,
name='fc' + name,
data_format=data_format)
if not shortcut:
self.short = ConvBNLayer(
num_channels=num_channels,
num_filters=num_filters * 2
if cardinality == 32 else num_filters,
filter_size=1,
stride=stride,
name='conv' + name + '_prj',
data_format=data_format)
self.shortcut = shortcut
def forward(self, inputs):
y = self.conv0(inputs)
conv1 = self.conv1(y)
conv2 = self.conv2(conv1)
scale = self.scale(conv2)
if self.shortcut:
short = inputs
else:
short = self.short(inputs)
y = paddle.add(x=short, y=scale)
y = F.relu(y)
return y
class SELayer(nn.Layer):
def __init__(self,
num_channels,
num_filters,
reduction_ratio,
name=None,
data_format="NCHW"):
super(SELayer, self).__init__()
self.data_format = data_format
self.pool2d_gap = AdaptiveAvgPool2D(1, data_format=self.data_format)
self._num_channels = num_channels
med_ch = int(num_channels / reduction_ratio)
stdv = 1.0 / math.sqrt(num_channels * 1.0)
self.squeeze = Linear(
num_channels,
med_ch,
weight_attr=ParamAttr(
initializer=Uniform(-stdv, stdv), name=name + "_sqz_weights"),
bias_attr=ParamAttr(name=name + '_sqz_offset'))
self.relu = nn.ReLU()
stdv = 1.0 / math.sqrt(med_ch * 1.0)
self.excitation = Linear(
med_ch,
num_filters,
weight_attr=ParamAttr(
initializer=Uniform(-stdv, stdv), name=name + "_exc_weights"),
bias_attr=ParamAttr(name=name + '_exc_offset'))
self.sigmoid = nn.Sigmoid()
def forward(self, input):
pool = self.pool2d_gap(input)
if self.data_format == "NHWC":
pool = paddle.squeeze(pool, axis=[1, 2])
else:
pool = paddle.squeeze(pool, axis=[2, 3])
squeeze = self.squeeze(pool)
squeeze = self.relu(squeeze)
excitation = self.excitation(squeeze)
excitation = self.sigmoid(excitation)
if self.data_format == "NHWC":
excitation = paddle.unsqueeze(excitation, axis=[1, 2])
else:
excitation = paddle.unsqueeze(excitation, axis=[2, 3])
out = input * excitation
return out
class ResNeXt(nn.Layer):
def __init__(self,
layers=50,
class_num=1000,
cardinality=32,
input_image_channel=3,
data_format="NCHW"):
super(ResNeXt, self).__init__()
self.layers = layers
self.cardinality = cardinality
self.reduction_ratio = 16
self.data_format = data_format
self.input_image_channel = input_image_channel
supported_layers = [50, 101, 152]
assert layers in supported_layers, \
"supported layers are {} but input layer is {}".format(
supported_layers, layers)
supported_cardinality = [32, 64]
assert cardinality in supported_cardinality, \
"supported cardinality is {} but input cardinality is {}" \
.format(supported_cardinality, cardinality)
if layers == 50:
depth = [3, 4, 6, 3]
elif layers == 101:
depth = [3, 4, 23, 3]
elif layers == 152:
depth = [3, 8, 36, 3]
num_channels = [64, 256, 512, 1024]
num_filters = [128, 256, 512,
1024] if cardinality == 32 else [256, 512, 1024, 2048]
if layers < 152:
self.conv = ConvBNLayer(
num_channels=self.input_image_channel,
num_filters=64,
filter_size=7,
stride=2,
act='relu',
name="conv1",
data_format=self.data_format)
else:
self.conv1_1 = ConvBNLayer(
num_channels=self.input_image_channel,
num_filters=64,
filter_size=3,
stride=2,
act='relu',
name="conv1",
data_format=self.data_format)
self.conv1_2 = ConvBNLayer(
num_channels=64,
num_filters=64,
filter_size=3,
stride=1,
act='relu',
name="conv2",
data_format=self.data_format)
self.conv1_3 = ConvBNLayer(
num_channels=64,
num_filters=128,
filter_size=3,
stride=1,
act='relu',
name="conv3",
data_format=self.data_format)
self.pool2d_max = MaxPool2D(
kernel_size=3, stride=2, padding=1, data_format=self.data_format)
self.block_list = []
n = 1 if layers == 50 or layers == 101 else 3
for block in range(len(depth)):
n += 1
shortcut = False
for i in range(depth[block]):
bottleneck_block = self.add_sublayer(
'bb_%d_%d' % (block, i),
BottleneckBlock(
num_channels=num_channels[block] if i == 0 else
num_filters[block] * int(64 // self.cardinality),
num_filters=num_filters[block],
stride=2 if i == 0 and block != 0 else 1,
cardinality=self.cardinality,
reduction_ratio=self.reduction_ratio,
shortcut=shortcut,
if_first=block == 0,
name=str(n) + '_' + str(i + 1),
data_format=self.data_format))
self.block_list.append(bottleneck_block)
shortcut = True
self.pool2d_avg = AdaptiveAvgPool2D(1, data_format=self.data_format)
self.pool2d_avg_channels = num_channels[-1] * 2
stdv = 1.0 / math.sqrt(self.pool2d_avg_channels * 1.0)
self.out = Linear(
self.pool2d_avg_channels,
class_num,
weight_attr=ParamAttr(
initializer=Uniform(-stdv, stdv), name="fc6_weights"),
bias_attr=ParamAttr(name="fc6_offset"))
def forward(self, inputs):
with paddle.static.amp.fp16_guard():
if self.data_format == "NHWC":
inputs = paddle.tensor.transpose(inputs, [0, 2, 3, 1])
inputs.stop_gradient = True
if self.layers < 152:
y = self.conv(inputs)
else:
y = self.conv1_1(inputs)
y = self.conv1_2(y)
y = self.conv1_3(y)
y = self.pool2d_max(y)
for i, block in enumerate(self.block_list):
y = block(y)
y = self.pool2d_avg(y)
y = paddle.reshape(y, shape=[-1, self.pool2d_avg_channels])
y = self.out(y)
return y
def _load_pretrained(pretrained, model, model_url, use_ssld=False):
if pretrained is False:
pass
elif pretrained is True:
load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
elif isinstance(pretrained, str):
load_dygraph_pretrain(model, pretrained)
else:
raise RuntimeError(
"pretrained type is not available. Please use `string` or `boolean` type."
)
def SE_ResNeXt50_32x4d(pretrained=False, use_ssld=False, **kwargs):
model = ResNeXt(layers=50, cardinality=32, **kwargs)
_load_pretrained(
pretrained, model, MODEL_URLS["SE_ResNeXt50_32x4d"], use_ssld=use_ssld)
return model
def SE_ResNeXt101_32x4d(pretrained=False, use_ssld=False, **kwargs):
model = ResNeXt(layers=101, cardinality=32, **kwargs)
_load_pretrained(
pretrained,
model,
MODEL_URLS["SE_ResNeXt101_32x4d"],
use_ssld=use_ssld)
return model
def SE_ResNeXt152_64x4d(pretrained=False, use_ssld=False, **kwargs):
model = ResNeXt(layers=152, cardinality=64, **kwargs)
_load_pretrained(
pretrained,
model,
MODEL_URLS["SE_ResNeXt152_64x4d"],
use_ssld=use_ssld)
return model

@ -0,0 +1,309 @@
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import paddle
from paddle import ParamAttr
import paddle.nn as nn
import paddle.nn.functional as F
from paddle.nn import Conv2D, BatchNorm, Linear, Dropout
from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D
from paddle.nn.initializer import Uniform
import math
from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
MODEL_URLS = {
"SE_ResNeXt50_vd_32x4d":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SE_ResNeXt50_vd_32x4d_pretrained.pdparams",
"SENet154_vd":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SENet154_vd_pretrained.pdparams",
}
__all__ = list(MODEL_URLS.keys())
class ConvBNLayer(nn.Layer):
def __init__(self,
num_channels,
num_filters,
filter_size,
stride=1,
groups=1,
is_vd_mode=False,
act=None,
name=None):
super(ConvBNLayer, self).__init__()
self.is_vd_mode = is_vd_mode
self._pool2d_avg = AvgPool2D(
kernel_size=2, stride=2, padding=0, ceil_mode=True)
self._conv = Conv2D(
in_channels=num_channels,
out_channels=num_filters,
kernel_size=filter_size,
stride=stride,
padding=(filter_size - 1) // 2,
groups=groups,
weight_attr=ParamAttr(name=name + "_weights"),
bias_attr=False)
bn_name = name + '_bn'
self._batch_norm = BatchNorm(
num_filters,
act=act,
param_attr=ParamAttr(name=bn_name + '_scale'),
bias_attr=ParamAttr(bn_name + '_offset'),
moving_mean_name=bn_name + '_mean',
moving_variance_name=bn_name + '_variance')
def forward(self, inputs):
if self.is_vd_mode:
inputs = self._pool2d_avg(inputs)
y = self._conv(inputs)
y = self._batch_norm(y)
return y
class BottleneckBlock(nn.Layer):
def __init__(self,
num_channels,
num_filters,
stride,
cardinality,
reduction_ratio,
shortcut=True,
if_first=False,
name=None):
super(BottleneckBlock, self).__init__()
self.conv0 = ConvBNLayer(
num_channels=num_channels,
num_filters=num_filters,
filter_size=1,
act='relu',
name='conv' + name + '_x1')
self.conv1 = ConvBNLayer(
num_channels=num_filters,
num_filters=num_filters,
filter_size=3,
groups=cardinality,
stride=stride,
act='relu',
name='conv' + name + '_x2')
self.conv2 = ConvBNLayer(
num_channels=num_filters,
num_filters=num_filters * 2 if cardinality == 32 else num_filters,
filter_size=1,
act=None,
name='conv' + name + '_x3')
self.scale = SELayer(
num_channels=num_filters * 2 if cardinality == 32 else num_filters,
num_filters=num_filters * 2 if cardinality == 32 else num_filters,
reduction_ratio=reduction_ratio,
name='fc' + name)
if not shortcut:
self.short = ConvBNLayer(
num_channels=num_channels,
num_filters=num_filters * 2
if cardinality == 32 else num_filters,
filter_size=1,
stride=1,
is_vd_mode=False if if_first else True,
name='conv' + name + '_prj')
self.shortcut = shortcut
def forward(self, inputs):
y = self.conv0(inputs)
conv1 = self.conv1(y)
conv2 = self.conv2(conv1)
scale = self.scale(conv2)
if self.shortcut:
short = inputs
else:
short = self.short(inputs)
y = paddle.add(x=short, y=scale)
y = F.relu(y)
return y
class SELayer(nn.Layer):
def __init__(self, num_channels, num_filters, reduction_ratio, name=None):
super(SELayer, self).__init__()
self.pool2d_gap = AdaptiveAvgPool2D(1)
self._num_channels = num_channels
med_ch = int(num_channels / reduction_ratio)
stdv = 1.0 / math.sqrt(num_channels * 1.0)
self.squeeze = Linear(
num_channels,
med_ch,
weight_attr=ParamAttr(
initializer=Uniform(-stdv, stdv), name=name + "_sqz_weights"),
bias_attr=ParamAttr(name=name + '_sqz_offset'))
self.relu = nn.ReLU()
stdv = 1.0 / math.sqrt(med_ch * 1.0)
self.excitation = Linear(
med_ch,
num_filters,
weight_attr=ParamAttr(
initializer=Uniform(-stdv, stdv), name=name + "_exc_weights"),
bias_attr=ParamAttr(name=name + '_exc_offset'))
self.sigmoid = nn.Sigmoid()
def forward(self, input):
pool = self.pool2d_gap(input)
pool = paddle.squeeze(pool, axis=[2, 3])
squeeze = self.squeeze(pool)
squeeze = self.relu(squeeze)
excitation = self.excitation(squeeze)
excitation = self.sigmoid(excitation)
excitation = paddle.unsqueeze(excitation, axis=[2, 3])
out = paddle.multiply(input, excitation)
return out
class ResNeXt(nn.Layer):
def __init__(self, layers=50, class_num=1000, cardinality=32):
super(ResNeXt, self).__init__()
self.layers = layers
self.cardinality = cardinality
self.reduction_ratio = 16
supported_layers = [50, 101, 152]
assert layers in supported_layers, \
"supported layers are {} but input layer is {}".format(
supported_layers, layers)
supported_cardinality = [32, 64]
assert cardinality in supported_cardinality, \
"supported cardinality is {} but input cardinality is {}" \
.format(supported_cardinality, cardinality)
if layers == 50:
depth = [3, 4, 6, 3]
elif layers == 101:
depth = [3, 4, 23, 3]
elif layers == 152:
depth = [3, 8, 36, 3]
num_channels = [128, 256, 512, 1024]
num_filters = [128, 256, 512,
1024] if cardinality == 32 else [256, 512, 1024, 2048]
self.conv1_1 = ConvBNLayer(
num_channels=3,
num_filters=64,
filter_size=3,
stride=2,
act='relu',
name="conv1_1")
self.conv1_2 = ConvBNLayer(
num_channels=64,
num_filters=64,
filter_size=3,
stride=1,
act='relu',
name="conv1_2")
self.conv1_3 = ConvBNLayer(
num_channels=64,
num_filters=128,
filter_size=3,
stride=1,
act='relu',
name="conv1_3")
self.pool2d_max = MaxPool2D(kernel_size=3, stride=2, padding=1)
self.block_list = []
n = 1 if layers == 50 or layers == 101 else 3
for block in range(len(depth)):
n += 1
shortcut = False
for i in range(depth[block]):
bottleneck_block = self.add_sublayer(
'bb_%d_%d' % (block, i),
BottleneckBlock(
num_channels=num_channels[block] if i == 0 else
num_filters[block] * int(64 // self.cardinality),
num_filters=num_filters[block],
stride=2 if i == 0 and block != 0 else 1,
cardinality=self.cardinality,
reduction_ratio=self.reduction_ratio,
shortcut=shortcut,
if_first=block == 0,
name=str(n) + '_' + str(i + 1)))
self.block_list.append(bottleneck_block)
shortcut = True
self.pool2d_avg = AdaptiveAvgPool2D(1)
self.pool2d_avg_channels = num_channels[-1] * 2
stdv = 1.0 / math.sqrt(self.pool2d_avg_channels * 1.0)
self.out = Linear(
self.pool2d_avg_channels,
class_num,
weight_attr=ParamAttr(
initializer=Uniform(-stdv, stdv), name="fc6_weights"),
bias_attr=ParamAttr(name="fc6_offset"))
def forward(self, inputs):
y = self.conv1_1(inputs)
y = self.conv1_2(y)
y = self.conv1_3(y)
y = self.pool2d_max(y)
for block in self.block_list:
y = block(y)
y = self.pool2d_avg(y)
y = paddle.reshape(y, shape=[-1, self.pool2d_avg_channels])
y = self.out(y)
return y
def _load_pretrained(pretrained, model, model_url, use_ssld=False):
if pretrained is False:
pass
elif pretrained is True:
load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
elif isinstance(pretrained, str):
load_dygraph_pretrain(model, pretrained)
else:
raise RuntimeError(
"pretrained type is not available. Please use `string` or `boolean` type."
)
def SE_ResNeXt50_vd_32x4d(pretrained=False, use_ssld=False, **kwargs):
model = ResNeXt(layers=50, cardinality=32, **kwargs)
_load_pretrained(
pretrained,
model,
MODEL_URLS["SE_ResNeXt50_vd_32x4d"],
use_ssld=use_ssld)
return model
def SENet154_vd(pretrained=False, use_ssld=False, **kwargs):
model = ResNeXt(layers=152, cardinality=64, **kwargs)
_load_pretrained(
pretrained, model, MODEL_URLS["SENet154_vd"], use_ssld=use_ssld)
return model

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save