diff --git a/src/PaddleClas/ppcls/__init__.py b/src/PaddleClas/ppcls/__init__.py new file mode 100644 index 0000000..d6cdb6f --- /dev/null +++ b/src/PaddleClas/ppcls/__init__.py @@ -0,0 +1,20 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from . import optimizer + +from .arch import * +from .optimizer import * +from .data import * +from .utils import * diff --git a/src/PaddleClas/ppcls/__pycache__/__init__.cpython-39.pyc b/src/PaddleClas/ppcls/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000..d088393 Binary files /dev/null and b/src/PaddleClas/ppcls/__pycache__/__init__.cpython-39.pyc differ diff --git a/src/PaddleClas/ppcls/arch/__init__.py b/src/PaddleClas/ppcls/arch/__init__.py new file mode 100644 index 0000000..2d5e29d --- /dev/null +++ b/src/PaddleClas/ppcls/arch/__init__.py @@ -0,0 +1,134 @@ +#copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. + +import copy +import importlib + +import paddle.nn as nn +from paddle.jit import to_static +from paddle.static import InputSpec + +from . import backbone, gears +from .backbone import * +from .gears import build_gear +from .utils import * +from ppcls.arch.backbone.base.theseus_layer import TheseusLayer +from ppcls.utils import logger +from ppcls.utils.save_load import load_dygraph_pretrain +from ppcls.arch.slim import prune_model, quantize_model + +__all__ = ["build_model", "RecModel", "DistillationModel"] + + +def build_model(config): + arch_config = copy.deepcopy(config["Arch"]) + model_type = arch_config.pop("name") + mod = importlib.import_module(__name__) + arch = getattr(mod, model_type)(**arch_config) + if isinstance(arch, TheseusLayer): + prune_model(config, arch) + quantize_model(config, arch) + return arch + + +def apply_to_static(config, model): + support_to_static = config['Global'].get('to_static', False) + + if support_to_static: + specs = None + if 'image_shape' in config['Global']: + specs = [InputSpec([None] + config['Global']['image_shape'])] + model = to_static(model, input_spec=specs) + logger.info("Successfully to apply @to_static with specs: {}".format( + specs)) + return model + + +class RecModel(TheseusLayer): + def __init__(self, **config): + super().__init__() + backbone_config = config["Backbone"] + backbone_name = backbone_config.pop("name") + self.backbone = eval(backbone_name)(**backbone_config) + if "BackboneStopLayer" in config: + backbone_stop_layer = config["BackboneStopLayer"]["name"] + self.backbone.stop_after(backbone_stop_layer) + + if "Neck" in config: + self.neck = build_gear(config["Neck"]) + else: + self.neck = None + + if "Head" in config: + self.head = build_gear(config["Head"]) + else: + self.head = None + + def forward(self, x, label=None): + out = dict() + x = self.backbone(x) + out["backbone"] = x + if self.neck is not None: + x = self.neck(x) + out["neck"] = x + out["features"] = x + if self.head is not None: + y = self.head(x, label) + out["logits"] = y + return out + + +class DistillationModel(nn.Layer): + def __init__(self, + models=None, + pretrained_list=None, + freeze_params_list=None, + **kargs): + super().__init__() + assert isinstance(models, list) + self.model_list = [] + self.model_name_list = [] + if pretrained_list is not None: + assert len(pretrained_list) == len(models) + + if freeze_params_list is None: + freeze_params_list = [False] * len(models) + assert len(freeze_params_list) == len(models) + for idx, model_config in enumerate(models): + assert len(model_config) == 1 + key = list(model_config.keys())[0] + model_config = model_config[key] + model_name = model_config.pop("name") + model = eval(model_name)(**model_config) + + if freeze_params_list[idx]: + for param in model.parameters(): + param.trainable = False + self.model_list.append(self.add_sublayer(key, model)) + self.model_name_list.append(key) + + if pretrained_list is not None: + for idx, pretrained in enumerate(pretrained_list): + if pretrained is not None: + load_dygraph_pretrain( + self.model_name_list[idx], path=pretrained) + + def forward(self, x, label=None): + result_dict = dict() + for idx, model_name in enumerate(self.model_name_list): + if label is None: + result_dict[model_name] = self.model_list[idx](x) + else: + result_dict[model_name] = self.model_list[idx](x, label) + return result_dict diff --git a/src/PaddleClas/ppcls/arch/__pycache__/__init__.cpython-39.pyc b/src/PaddleClas/ppcls/arch/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000..8585526 Binary files /dev/null and b/src/PaddleClas/ppcls/arch/__pycache__/__init__.cpython-39.pyc differ diff --git a/src/PaddleClas/ppcls/arch/__pycache__/utils.cpython-39.pyc b/src/PaddleClas/ppcls/arch/__pycache__/utils.cpython-39.pyc new file mode 100644 index 0000000..e1b3292 Binary files /dev/null and b/src/PaddleClas/ppcls/arch/__pycache__/utils.cpython-39.pyc differ diff --git a/src/PaddleClas/ppcls/arch/backbone/__init__.py b/src/PaddleClas/ppcls/arch/backbone/__init__.py new file mode 100644 index 0000000..1bd23a9 --- /dev/null +++ b/src/PaddleClas/ppcls/arch/backbone/__init__.py @@ -0,0 +1,83 @@ +# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +import inspect + +from ppcls.arch.backbone.legendary_models.mobilenet_v1 import MobileNetV1_x0_25, MobileNetV1_x0_5, MobileNetV1_x0_75, MobileNetV1 +from ppcls.arch.backbone.legendary_models.mobilenet_v3 import MobileNetV3_small_x0_35, MobileNetV3_small_x0_5, MobileNetV3_small_x0_75, MobileNetV3_small_x1_0, MobileNetV3_small_x1_25, MobileNetV3_large_x0_35, MobileNetV3_large_x0_5, MobileNetV3_large_x0_75, MobileNetV3_large_x1_0, MobileNetV3_large_x1_25 +from ppcls.arch.backbone.legendary_models.resnet import ResNet18, ResNet18_vd, ResNet34, ResNet34_vd, ResNet50, ResNet50_vd, ResNet101, ResNet101_vd, ResNet152, ResNet152_vd, ResNet200_vd +from ppcls.arch.backbone.legendary_models.vgg import VGG11, VGG13, VGG16, VGG19 +from ppcls.arch.backbone.legendary_models.inception_v3 import InceptionV3 +from ppcls.arch.backbone.legendary_models.hrnet import HRNet_W18_C, HRNet_W30_C, HRNet_W32_C, HRNet_W40_C, HRNet_W44_C, HRNet_W48_C, HRNet_W60_C, HRNet_W64_C, SE_HRNet_W64_C +from ppcls.arch.backbone.legendary_models.pp_lcnet import PPLCNet_x0_25, PPLCNet_x0_35, PPLCNet_x0_5, PPLCNet_x0_75, PPLCNet_x1_0, PPLCNet_x1_5, PPLCNet_x2_0, PPLCNet_x2_5 +from ppcls.arch.backbone.legendary_models.esnet import ESNet_x0_25, ESNet_x0_5, ESNet_x0_75, ESNet_x1_0 + +from ppcls.arch.backbone.model_zoo.resnet_vc import ResNet50_vc +from ppcls.arch.backbone.model_zoo.resnext import ResNeXt50_32x4d, ResNeXt50_64x4d, ResNeXt101_32x4d, ResNeXt101_64x4d, ResNeXt152_32x4d, ResNeXt152_64x4d +from ppcls.arch.backbone.model_zoo.resnext_vd import ResNeXt50_vd_32x4d, ResNeXt50_vd_64x4d, ResNeXt101_vd_32x4d, ResNeXt101_vd_64x4d, ResNeXt152_vd_32x4d, ResNeXt152_vd_64x4d +from ppcls.arch.backbone.model_zoo.res2net import Res2Net50_26w_4s, Res2Net50_14w_8s +from ppcls.arch.backbone.model_zoo.res2net_vd import Res2Net50_vd_26w_4s, Res2Net101_vd_26w_4s, Res2Net200_vd_26w_4s +from ppcls.arch.backbone.model_zoo.se_resnet_vd import SE_ResNet18_vd, SE_ResNet34_vd, SE_ResNet50_vd +from ppcls.arch.backbone.model_zoo.se_resnext_vd import SE_ResNeXt50_vd_32x4d, SE_ResNeXt50_vd_32x4d, SENet154_vd +from ppcls.arch.backbone.model_zoo.se_resnext import SE_ResNeXt50_32x4d, SE_ResNeXt101_32x4d, SE_ResNeXt152_64x4d +from ppcls.arch.backbone.model_zoo.dpn import DPN68, DPN92, DPN98, DPN107, DPN131 +from ppcls.arch.backbone.model_zoo.densenet import DenseNet121, DenseNet161, DenseNet169, DenseNet201, DenseNet264 +from ppcls.arch.backbone.model_zoo.efficientnet import EfficientNetB0, EfficientNetB1, EfficientNetB2, EfficientNetB3, EfficientNetB4, EfficientNetB5, EfficientNetB6, EfficientNetB7, EfficientNetB0_small +from ppcls.arch.backbone.model_zoo.resnest import ResNeSt50_fast_1s1x64d, ResNeSt50, ResNeSt101 +from ppcls.arch.backbone.model_zoo.googlenet import GoogLeNet +from ppcls.arch.backbone.model_zoo.mobilenet_v2 import MobileNetV2_x0_25, MobileNetV2_x0_5, MobileNetV2_x0_75, MobileNetV2, MobileNetV2_x1_5, MobileNetV2_x2_0 +from ppcls.arch.backbone.model_zoo.shufflenet_v2 import ShuffleNetV2_x0_25, ShuffleNetV2_x0_33, ShuffleNetV2_x0_5, ShuffleNetV2_x1_0, ShuffleNetV2_x1_5, ShuffleNetV2_x2_0, ShuffleNetV2_swish +from ppcls.arch.backbone.model_zoo.ghostnet import GhostNet_x0_5, GhostNet_x1_0, GhostNet_x1_3 +from ppcls.arch.backbone.model_zoo.alexnet import AlexNet +from ppcls.arch.backbone.model_zoo.inception_v4 import InceptionV4 +from ppcls.arch.backbone.model_zoo.xception import Xception41, Xception65, Xception71 +from ppcls.arch.backbone.model_zoo.xception_deeplab import Xception41_deeplab, Xception65_deeplab +from ppcls.arch.backbone.model_zoo.resnext101_wsl import ResNeXt101_32x8d_wsl, ResNeXt101_32x16d_wsl, ResNeXt101_32x32d_wsl, ResNeXt101_32x48d_wsl +from ppcls.arch.backbone.model_zoo.squeezenet import SqueezeNet1_0, SqueezeNet1_1 +from ppcls.arch.backbone.model_zoo.darknet import DarkNet53 +from ppcls.arch.backbone.model_zoo.regnet import RegNetX_200MF, RegNetX_4GF, RegNetX_32GF, RegNetY_200MF, RegNetY_4GF, RegNetY_32GF +from ppcls.arch.backbone.model_zoo.vision_transformer import ViT_small_patch16_224, ViT_base_patch16_224, ViT_base_patch16_384, ViT_base_patch32_384, ViT_large_patch16_224, ViT_large_patch16_384, ViT_large_patch32_384 +from ppcls.arch.backbone.model_zoo.distilled_vision_transformer import DeiT_tiny_patch16_224, DeiT_small_patch16_224, DeiT_base_patch16_224, DeiT_tiny_distilled_patch16_224, DeiT_small_distilled_patch16_224, DeiT_base_distilled_patch16_224, DeiT_base_patch16_384, DeiT_base_distilled_patch16_384 +from ppcls.arch.backbone.model_zoo.swin_transformer import SwinTransformer_tiny_patch4_window7_224, SwinTransformer_small_patch4_window7_224, SwinTransformer_base_patch4_window7_224, SwinTransformer_base_patch4_window12_384, SwinTransformer_large_patch4_window7_224, SwinTransformer_large_patch4_window12_384 +from ppcls.arch.backbone.model_zoo.mixnet import MixNet_S, MixNet_M, MixNet_L +from ppcls.arch.backbone.model_zoo.rexnet import ReXNet_1_0, ReXNet_1_3, ReXNet_1_5, ReXNet_2_0, ReXNet_3_0 +from ppcls.arch.backbone.model_zoo.gvt import pcpvt_small, pcpvt_base, pcpvt_large, alt_gvt_small, alt_gvt_base, alt_gvt_large +from ppcls.arch.backbone.model_zoo.levit import LeViT_128S, LeViT_128, LeViT_192, LeViT_256, LeViT_384 +from ppcls.arch.backbone.model_zoo.dla import DLA34, DLA46_c, DLA46x_c, DLA60, DLA60x, DLA60x_c, DLA102, DLA102x, DLA102x2, DLA169 +from ppcls.arch.backbone.model_zoo.rednet import RedNet26, RedNet38, RedNet50, RedNet101, RedNet152 +from ppcls.arch.backbone.model_zoo.tnt import TNT_small +from ppcls.arch.backbone.model_zoo.hardnet import HarDNet68, HarDNet85, HarDNet39_ds, HarDNet68_ds +from ppcls.arch.backbone.model_zoo.cspnet import CSPDarkNet53 +from ppcls.arch.backbone.model_zoo.pvt_v2 import PVT_V2_B0, PVT_V2_B1, PVT_V2_B2_Linear, PVT_V2_B2, PVT_V2_B3, PVT_V2_B4, PVT_V2_B5 +from ppcls.arch.backbone.model_zoo.repvgg import RepVGG_A0, RepVGG_A1, RepVGG_A2, RepVGG_B0, RepVGG_B1, RepVGG_B2, RepVGG_B1g2, RepVGG_B1g4, RepVGG_B2g4, RepVGG_B3g4 +from ppcls.arch.backbone.variant_models.resnet_variant import ResNet50_last_stage_stride1 +from ppcls.arch.backbone.variant_models.vgg_variant import VGG19Sigmoid +from ppcls.arch.backbone.variant_models.pp_lcnet_variant import PPLCNet_x2_5_Tanh + + +# help whl get all the models' api (class type) and components' api (func type) +def get_apis(): + current_func = sys._getframe().f_code.co_name + current_module = sys.modules[__name__] + api = [] + for _, obj in inspect.getmembers(current_module, + inspect.isclass) + inspect.getmembers( + current_module, inspect.isfunction): + api.append(obj.__name__) + api.remove(current_func) + return api + + +__all__ = get_apis() diff --git a/src/PaddleClas/ppcls/arch/backbone/__pycache__/__init__.cpython-39.pyc b/src/PaddleClas/ppcls/arch/backbone/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000..ed10327 Binary files /dev/null and b/src/PaddleClas/ppcls/arch/backbone/__pycache__/__init__.cpython-39.pyc differ diff --git a/src/PaddleClas/ppcls/arch/backbone/base/__init__.py b/src/PaddleClas/ppcls/arch/backbone/base/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/PaddleClas/ppcls/arch/backbone/base/__pycache__/__init__.cpython-39.pyc b/src/PaddleClas/ppcls/arch/backbone/base/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000..c69b585 Binary files /dev/null and b/src/PaddleClas/ppcls/arch/backbone/base/__pycache__/__init__.cpython-39.pyc differ diff --git a/src/PaddleClas/ppcls/arch/backbone/base/__pycache__/theseus_layer.cpython-39.pyc b/src/PaddleClas/ppcls/arch/backbone/base/__pycache__/theseus_layer.cpython-39.pyc new file mode 100644 index 0000000..fd599a5 Binary files /dev/null and b/src/PaddleClas/ppcls/arch/backbone/base/__pycache__/theseus_layer.cpython-39.pyc differ diff --git a/src/PaddleClas/ppcls/arch/backbone/base/theseus_layer.py b/src/PaddleClas/ppcls/arch/backbone/base/theseus_layer.py new file mode 100644 index 0000000..908d944 --- /dev/null +++ b/src/PaddleClas/ppcls/arch/backbone/base/theseus_layer.py @@ -0,0 +1,301 @@ +# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Tuple, List, Dict, Union, Callable, Any + +from paddle import nn +from ppcls.utils import logger + + +class Identity(nn.Layer): + def __init__(self): + super(Identity, self).__init__() + + def forward(self, inputs): + return inputs + + +class TheseusLayer(nn.Layer): + def __init__(self, *args, **kwargs): + super(TheseusLayer, self).__init__() + self.res_dict = {} + self.res_name = self.full_name() + self.pruner = None + self.quanter = None + + def _return_dict_hook(self, layer, input, output): + res_dict = {"output": output} + # 'list' is needed to avoid error raised by popping self.res_dict + for res_key in list(self.res_dict): + # clear the res_dict because the forward process may change according to input + res_dict[res_key] = self.res_dict.pop(res_key) + return res_dict + + def init_res(self, + stages_pattern, + return_patterns=None, + return_stages=None): + if return_patterns and return_stages: + msg = f"The 'return_patterns' would be ignored when 'return_stages' is set." + logger.warning(msg) + return_stages = None + + if return_stages is True: + return_patterns = stages_pattern + # return_stages is int or bool + if type(return_stages) is int: + return_stages = [return_stages] + if isinstance(return_stages, list): + if max(return_stages) > len(stages_pattern) or min( + return_stages) < 0: + msg = f"The 'return_stages' set error. Illegal value(s) have been ignored. The stages' pattern list is {stages_pattern}." + logger.warning(msg) + return_stages = [ + val for val in return_stages + if val >= 0 and val < len(stages_pattern) + ] + return_patterns = [stages_pattern[i] for i in return_stages] + + if return_patterns: + self.update_res(return_patterns) + + def replace_sub(self, *args, **kwargs) -> None: + msg = "The function 'replace_sub()' is deprecated, please use 'upgrade_sublayer()' instead." + logger.error(DeprecationWarning(msg)) + raise DeprecationWarning(msg) + + def upgrade_sublayer(self, + layer_name_pattern: Union[str, List[str]], + handle_func: Callable[[nn.Layer, str], nn.Layer] + ) -> Dict[str, nn.Layer]: + """use 'handle_func' to modify the sub-layer(s) specified by 'layer_name_pattern'. + + Args: + layer_name_pattern (Union[str, List[str]]): The name of layer to be modified by 'handle_func'. + handle_func (Callable[[nn.Layer, str], nn.Layer]): The function to modify target layer specified by 'layer_name_pattern'. The formal params are the layer(nn.Layer) and pattern(str) that is (a member of) layer_name_pattern (when layer_name_pattern is List type). And the return is the layer processed. + + Returns: + Dict[str, nn.Layer]: The key is the pattern and corresponding value is the result returned by 'handle_func()'. + + Examples: + + from paddle import nn + import paddleclas + + def rep_func(layer: nn.Layer, pattern: str): + new_layer = nn.Conv2D( + in_channels=layer._in_channels, + out_channels=layer._out_channels, + kernel_size=5, + padding=2 + ) + return new_layer + + net = paddleclas.MobileNetV1() + res = net.replace_sub(layer_name_pattern=["blocks[11].depthwise_conv.conv", "blocks[12].depthwise_conv.conv"], handle_func=rep_func) + print(res) + # {'blocks[11].depthwise_conv.conv': the corresponding new_layer, 'blocks[12].depthwise_conv.conv': the corresponding new_layer} + """ + + if not isinstance(layer_name_pattern, list): + layer_name_pattern = [layer_name_pattern] + + hit_layer_pattern_list = [] + for pattern in layer_name_pattern: + # parse pattern to find target layer and its parent + layer_list = parse_pattern_str(pattern=pattern, parent_layer=self) + if not layer_list: + continue + sub_layer_parent = layer_list[-2]["layer"] if len( + layer_list) > 1 else self + + sub_layer = layer_list[-1]["layer"] + sub_layer_name = layer_list[-1]["name"] + sub_layer_index = layer_list[-1]["index"] + + new_sub_layer = handle_func(sub_layer, pattern) + + if sub_layer_index: + getattr(sub_layer_parent, + sub_layer_name)[sub_layer_index] = new_sub_layer + else: + setattr(sub_layer_parent, sub_layer_name, new_sub_layer) + + hit_layer_pattern_list.append(pattern) + return hit_layer_pattern_list + + def stop_after(self, stop_layer_name: str) -> bool: + """stop forward and backward after 'stop_layer_name'. + + Args: + stop_layer_name (str): The name of layer that stop forward and backward after this layer. + + Returns: + bool: 'True' if successful, 'False' otherwise. + """ + + layer_list = parse_pattern_str(stop_layer_name, self) + if not layer_list: + return False + + parent_layer = self + for layer_dict in layer_list: + name, index = layer_dict["name"], layer_dict["index"] + if not set_identity(parent_layer, name, index): + msg = f"Failed to set the layers that after stop_layer_name('{stop_layer_name}') to IdentityLayer. The error layer's name is '{name}'." + logger.warning(msg) + return False + parent_layer = layer_dict["layer"] + + return True + + def update_res( + self, + return_patterns: Union[str, List[str]]) -> Dict[str, nn.Layer]: + """update the result(s) to be returned. + + Args: + return_patterns (Union[str, List[str]]): The name of layer to return output. + + Returns: + Dict[str, nn.Layer]: The pattern(str) and corresponding layer(nn.Layer) that have been set successfully. + """ + + # clear res_dict that could have been set + self.res_dict = {} + + class Handler(object): + def __init__(self, res_dict): + # res_dict is a reference + self.res_dict = res_dict + + def __call__(self, layer, pattern): + layer.res_dict = self.res_dict + layer.res_name = pattern + if hasattr(layer, "hook_remove_helper"): + layer.hook_remove_helper.remove() + layer.hook_remove_helper = layer.register_forward_post_hook( + save_sub_res_hook) + return layer + + handle_func = Handler(self.res_dict) + + hit_layer_pattern_list = self.upgrade_sublayer( + return_patterns, handle_func=handle_func) + + if hasattr(self, "hook_remove_helper"): + self.hook_remove_helper.remove() + self.hook_remove_helper = self.register_forward_post_hook( + self._return_dict_hook) + + return hit_layer_pattern_list + + +def save_sub_res_hook(layer, input, output): + layer.res_dict[layer.res_name] = output + + +def set_identity(parent_layer: nn.Layer, + layer_name: str, + layer_index: str=None) -> bool: + """set the layer specified by layer_name and layer_index to Indentity. + + Args: + parent_layer (nn.Layer): The parent layer of target layer specified by layer_name and layer_index. + layer_name (str): The name of target layer to be set to Indentity. + layer_index (str, optional): The index of target layer to be set to Indentity in parent_layer. Defaults to None. + + Returns: + bool: True if successfully, False otherwise. + """ + + stop_after = False + for sub_layer_name in parent_layer._sub_layers: + if stop_after: + parent_layer._sub_layers[sub_layer_name] = Identity() + continue + if sub_layer_name == layer_name: + stop_after = True + + if layer_index and stop_after: + stop_after = False + for sub_layer_index in parent_layer._sub_layers[ + layer_name]._sub_layers: + if stop_after: + parent_layer._sub_layers[layer_name][ + sub_layer_index] = Identity() + continue + if layer_index == sub_layer_index: + stop_after = True + + return stop_after + + +def parse_pattern_str(pattern: str, parent_layer: nn.Layer) -> Union[ + None, List[Dict[str, Union[nn.Layer, str, None]]]]: + """parse the string type pattern. + + Args: + pattern (str): The pattern to discribe layer. + parent_layer (nn.Layer): The root layer relative to the pattern. + + Returns: + Union[None, List[Dict[str, Union[nn.Layer, str, None]]]]: None if failed. If successfully, the members are layers parsed in order: + [ + {"layer": first layer, "name": first layer's name parsed, "index": first layer's index parsed if exist}, + {"layer": second layer, "name": second layer's name parsed, "index": second layer's index parsed if exist}, + ... + ] + """ + + pattern_list = pattern.split(".") + if not pattern_list: + msg = f"The pattern('{pattern}') is illegal. Please check and retry." + logger.warning(msg) + return None + + layer_list = [] + while len(pattern_list) > 0: + if '[' in pattern_list[0]: + target_layer_name = pattern_list[0].split('[')[0] + target_layer_index = pattern_list[0].split('[')[1].split(']')[0] + else: + target_layer_name = pattern_list[0] + target_layer_index = None + + target_layer = getattr(parent_layer, target_layer_name, None) + + if target_layer is None: + msg = f"Not found layer named('{target_layer_name}') specifed in pattern('{pattern}')." + logger.warning(msg) + return None + + if target_layer_index and target_layer: + if int(target_layer_index) < 0 or int(target_layer_index) >= len( + target_layer): + msg = f"Not found layer by index('{target_layer_index}') specifed in pattern('{pattern}'). The index should < {len(target_layer)} and > 0." + logger.warning(msg) + return None + + target_layer = target_layer[target_layer_index] + + layer_list.append({ + "layer": target_layer, + "name": target_layer_name, + "index": target_layer_index + }) + + pattern_list = pattern_list[1:] + parent_layer = target_layer + return layer_list diff --git a/src/PaddleClas/ppcls/arch/backbone/legendary_models/__init__.py b/src/PaddleClas/ppcls/arch/backbone/legendary_models/__init__.py new file mode 100644 index 0000000..1f837da --- /dev/null +++ b/src/PaddleClas/ppcls/arch/backbone/legendary_models/__init__.py @@ -0,0 +1,6 @@ +from .resnet import ResNet18, ResNet34, ResNet50, ResNet101, ResNet152, ResNet18_vd, ResNet34_vd, ResNet50_vd, ResNet101_vd, ResNet152_vd +from .hrnet import HRNet_W18_C, HRNet_W30_C, HRNet_W32_C, HRNet_W40_C, HRNet_W44_C, HRNet_W48_C, HRNet_W64_C +from .mobilenet_v1 import MobileNetV1_x0_25, MobileNetV1_x0_5, MobileNetV1_x0_75, MobileNetV1 +from .mobilenet_v3 import MobileNetV3_small_x0_35, MobileNetV3_small_x0_5, MobileNetV3_small_x0_75, MobileNetV3_small_x1_0, MobileNetV3_small_x1_25, MobileNetV3_large_x0_35, MobileNetV3_large_x0_5, MobileNetV3_large_x0_75, MobileNetV3_large_x1_0, MobileNetV3_large_x1_25 +from .inception_v3 import InceptionV3 +from .vgg import VGG11, VGG13, VGG16, VGG19 diff --git a/src/PaddleClas/ppcls/arch/backbone/legendary_models/__pycache__/__init__.cpython-39.pyc b/src/PaddleClas/ppcls/arch/backbone/legendary_models/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000..118b131 Binary files /dev/null and b/src/PaddleClas/ppcls/arch/backbone/legendary_models/__pycache__/__init__.cpython-39.pyc differ diff --git a/src/PaddleClas/ppcls/arch/backbone/legendary_models/__pycache__/esnet.cpython-39.pyc b/src/PaddleClas/ppcls/arch/backbone/legendary_models/__pycache__/esnet.cpython-39.pyc new file mode 100644 index 0000000..135d838 Binary files /dev/null and b/src/PaddleClas/ppcls/arch/backbone/legendary_models/__pycache__/esnet.cpython-39.pyc differ diff --git a/src/PaddleClas/ppcls/arch/backbone/legendary_models/__pycache__/hrnet.cpython-39.pyc b/src/PaddleClas/ppcls/arch/backbone/legendary_models/__pycache__/hrnet.cpython-39.pyc new file mode 100644 index 0000000..d1ee9ed Binary files /dev/null and b/src/PaddleClas/ppcls/arch/backbone/legendary_models/__pycache__/hrnet.cpython-39.pyc differ diff --git a/src/PaddleClas/ppcls/arch/backbone/legendary_models/__pycache__/inception_v3.cpython-39.pyc b/src/PaddleClas/ppcls/arch/backbone/legendary_models/__pycache__/inception_v3.cpython-39.pyc new file mode 100644 index 0000000..a330df1 Binary files /dev/null and b/src/PaddleClas/ppcls/arch/backbone/legendary_models/__pycache__/inception_v3.cpython-39.pyc differ diff --git a/src/PaddleClas/ppcls/arch/backbone/legendary_models/__pycache__/mobilenet_v1.cpython-39.pyc b/src/PaddleClas/ppcls/arch/backbone/legendary_models/__pycache__/mobilenet_v1.cpython-39.pyc new file mode 100644 index 0000000..b5ec613 Binary files /dev/null and b/src/PaddleClas/ppcls/arch/backbone/legendary_models/__pycache__/mobilenet_v1.cpython-39.pyc differ diff --git a/src/PaddleClas/ppcls/arch/backbone/legendary_models/__pycache__/mobilenet_v3.cpython-39.pyc b/src/PaddleClas/ppcls/arch/backbone/legendary_models/__pycache__/mobilenet_v3.cpython-39.pyc new file mode 100644 index 0000000..54ad9f4 Binary files /dev/null and b/src/PaddleClas/ppcls/arch/backbone/legendary_models/__pycache__/mobilenet_v3.cpython-39.pyc differ diff --git a/src/PaddleClas/ppcls/arch/backbone/legendary_models/__pycache__/pp_lcnet.cpython-39.pyc b/src/PaddleClas/ppcls/arch/backbone/legendary_models/__pycache__/pp_lcnet.cpython-39.pyc new file mode 100644 index 0000000..8b2be9d Binary files /dev/null and b/src/PaddleClas/ppcls/arch/backbone/legendary_models/__pycache__/pp_lcnet.cpython-39.pyc differ diff --git a/src/PaddleClas/ppcls/arch/backbone/legendary_models/__pycache__/resnet.cpython-39.pyc b/src/PaddleClas/ppcls/arch/backbone/legendary_models/__pycache__/resnet.cpython-39.pyc new file mode 100644 index 0000000..3e2cd63 Binary files /dev/null and b/src/PaddleClas/ppcls/arch/backbone/legendary_models/__pycache__/resnet.cpython-39.pyc differ diff --git a/src/PaddleClas/ppcls/arch/backbone/legendary_models/__pycache__/vgg.cpython-39.pyc b/src/PaddleClas/ppcls/arch/backbone/legendary_models/__pycache__/vgg.cpython-39.pyc new file mode 100644 index 0000000..0739199 Binary files /dev/null and b/src/PaddleClas/ppcls/arch/backbone/legendary_models/__pycache__/vgg.cpython-39.pyc differ diff --git a/src/PaddleClas/ppcls/arch/backbone/legendary_models/esnet.py b/src/PaddleClas/ppcls/arch/backbone/legendary_models/esnet.py new file mode 100644 index 0000000..e05e0ce --- /dev/null +++ b/src/PaddleClas/ppcls/arch/backbone/legendary_models/esnet.py @@ -0,0 +1,369 @@ +# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import, division, print_function +import math +import paddle +from paddle import ParamAttr, reshape, transpose, concat, split +import paddle.nn as nn +from paddle.nn import Conv2D, BatchNorm, Linear, Dropout +from paddle.nn import AdaptiveAvgPool2D, MaxPool2D +from paddle.nn.initializer import KaimingNormal +from paddle.regularizer import L2Decay + +from ppcls.arch.backbone.base.theseus_layer import TheseusLayer +from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url + +MODEL_URLS = { + "ESNet_x0_25": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/ESNet_x0_25_pretrained.pdparams", + "ESNet_x0_5": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/ESNet_x0_5_pretrained.pdparams", + "ESNet_x0_75": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/ESNet_x0_75_pretrained.pdparams", + "ESNet_x1_0": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/ESNet_x1_0_pretrained.pdparams", +} + +MODEL_STAGES_PATTERN = {"ESNet": ["blocks[2]", "blocks[9]", "blocks[12]"]} + +__all__ = list(MODEL_URLS.keys()) + + +def channel_shuffle(x, groups): + batch_size, num_channels, height, width = x.shape[0:4] + channels_per_group = num_channels // groups + x = reshape( + x=x, shape=[batch_size, groups, channels_per_group, height, width]) + x = transpose(x=x, perm=[0, 2, 1, 3, 4]) + x = reshape(x=x, shape=[batch_size, num_channels, height, width]) + return x + + +def make_divisible(v, divisor=8, min_value=None): + if min_value is None: + min_value = divisor + new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) + if new_v < 0.9 * v: + new_v += divisor + return new_v + + +class ConvBNLayer(TheseusLayer): + def __init__(self, + in_channels, + out_channels, + kernel_size, + stride=1, + groups=1, + if_act=True): + super().__init__() + self.conv = Conv2D( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=kernel_size, + stride=stride, + padding=(kernel_size - 1) // 2, + groups=groups, + weight_attr=ParamAttr(initializer=KaimingNormal()), + bias_attr=False) + + self.bn = BatchNorm( + out_channels, + param_attr=ParamAttr(regularizer=L2Decay(0.0)), + bias_attr=ParamAttr(regularizer=L2Decay(0.0))) + self.if_act = if_act + self.hardswish = nn.Hardswish() + + def forward(self, x): + x = self.conv(x) + x = self.bn(x) + if self.if_act: + x = self.hardswish(x) + return x + + +class SEModule(TheseusLayer): + def __init__(self, channel, reduction=4): + super().__init__() + self.avg_pool = AdaptiveAvgPool2D(1) + self.conv1 = Conv2D( + in_channels=channel, + out_channels=channel // reduction, + kernel_size=1, + stride=1, + padding=0) + self.relu = nn.ReLU() + self.conv2 = Conv2D( + in_channels=channel // reduction, + out_channels=channel, + kernel_size=1, + stride=1, + padding=0) + self.hardsigmoid = nn.Hardsigmoid() + + def forward(self, x): + identity = x + x = self.avg_pool(x) + x = self.conv1(x) + x = self.relu(x) + x = self.conv2(x) + x = self.hardsigmoid(x) + x = paddle.multiply(x=identity, y=x) + return x + + +class ESBlock1(TheseusLayer): + def __init__(self, in_channels, out_channels): + super().__init__() + self.pw_1_1 = ConvBNLayer( + in_channels=in_channels // 2, + out_channels=out_channels // 2, + kernel_size=1, + stride=1) + self.dw_1 = ConvBNLayer( + in_channels=out_channels // 2, + out_channels=out_channels // 2, + kernel_size=3, + stride=1, + groups=out_channels // 2, + if_act=False) + self.se = SEModule(out_channels) + + self.pw_1_2 = ConvBNLayer( + in_channels=out_channels, + out_channels=out_channels // 2, + kernel_size=1, + stride=1) + + def forward(self, x): + x1, x2 = split( + x, num_or_sections=[x.shape[1] // 2, x.shape[1] // 2], axis=1) + x2 = self.pw_1_1(x2) + x3 = self.dw_1(x2) + x3 = concat([x2, x3], axis=1) + x3 = self.se(x3) + x3 = self.pw_1_2(x3) + x = concat([x1, x3], axis=1) + return channel_shuffle(x, 2) + + +class ESBlock2(TheseusLayer): + def __init__(self, in_channels, out_channels): + super().__init__() + + # branch1 + self.dw_1 = ConvBNLayer( + in_channels=in_channels, + out_channels=in_channels, + kernel_size=3, + stride=2, + groups=in_channels, + if_act=False) + self.pw_1 = ConvBNLayer( + in_channels=in_channels, + out_channels=out_channels // 2, + kernel_size=1, + stride=1) + # branch2 + self.pw_2_1 = ConvBNLayer( + in_channels=in_channels, + out_channels=out_channels // 2, + kernel_size=1) + self.dw_2 = ConvBNLayer( + in_channels=out_channels // 2, + out_channels=out_channels // 2, + kernel_size=3, + stride=2, + groups=out_channels // 2, + if_act=False) + self.se = SEModule(out_channels // 2) + self.pw_2_2 = ConvBNLayer( + in_channels=out_channels // 2, + out_channels=out_channels // 2, + kernel_size=1) + self.concat_dw = ConvBNLayer( + in_channels=out_channels, + out_channels=out_channels, + kernel_size=3, + groups=out_channels) + self.concat_pw = ConvBNLayer( + in_channels=out_channels, out_channels=out_channels, kernel_size=1) + + def forward(self, x): + x1 = self.dw_1(x) + x1 = self.pw_1(x1) + x2 = self.pw_2_1(x) + x2 = self.dw_2(x2) + x2 = self.se(x2) + x2 = self.pw_2_2(x2) + x = concat([x1, x2], axis=1) + x = self.concat_dw(x) + x = self.concat_pw(x) + return x + + +class ESNet(TheseusLayer): + def __init__(self, + stages_pattern, + class_num=1000, + scale=1.0, + dropout_prob=0.2, + class_expand=1280, + return_patterns=None, + return_stages=None): + super().__init__() + self.scale = scale + self.class_num = class_num + self.class_expand = class_expand + stage_repeats = [3, 7, 3] + stage_out_channels = [ + -1, 24, make_divisible(116 * scale), make_divisible(232 * scale), + make_divisible(464 * scale), 1024 + ] + + self.conv1 = ConvBNLayer( + in_channels=3, + out_channels=stage_out_channels[1], + kernel_size=3, + stride=2) + self.max_pool = MaxPool2D(kernel_size=3, stride=2, padding=1) + + block_list = [] + for stage_id, num_repeat in enumerate(stage_repeats): + for i in range(num_repeat): + if i == 0: + block = ESBlock2( + in_channels=stage_out_channels[stage_id + 1], + out_channels=stage_out_channels[stage_id + 2]) + else: + block = ESBlock1( + in_channels=stage_out_channels[stage_id + 2], + out_channels=stage_out_channels[stage_id + 2]) + block_list.append(block) + self.blocks = nn.Sequential(*block_list) + + self.conv2 = ConvBNLayer( + in_channels=stage_out_channels[-2], + out_channels=stage_out_channels[-1], + kernel_size=1) + + self.avg_pool = AdaptiveAvgPool2D(1) + + self.last_conv = Conv2D( + in_channels=stage_out_channels[-1], + out_channels=self.class_expand, + kernel_size=1, + stride=1, + padding=0, + bias_attr=False) + self.hardswish = nn.Hardswish() + self.dropout = Dropout(p=dropout_prob, mode="downscale_in_infer") + self.flatten = nn.Flatten(start_axis=1, stop_axis=-1) + self.fc = Linear(self.class_expand, self.class_num) + + super().init_res( + stages_pattern, + return_patterns=return_patterns, + return_stages=return_stages) + + def forward(self, x): + x = self.conv1(x) + x = self.max_pool(x) + x = self.blocks(x) + x = self.conv2(x) + x = self.avg_pool(x) + x = self.last_conv(x) + x = self.hardswish(x) + x = self.dropout(x) + x = self.flatten(x) + x = self.fc(x) + return x + + +def _load_pretrained(pretrained, model, model_url, use_ssld): + if pretrained is False: + pass + elif pretrained is True: + load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld) + elif isinstance(pretrained, str): + load_dygraph_pretrain(model, pretrained) + else: + raise RuntimeError( + "pretrained type is not available. Please use `string` or `boolean` type." + ) + + +def ESNet_x0_25(pretrained=False, use_ssld=False, **kwargs): + """ + ESNet_x0_25 + Args: + pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise. + If str, means the path of the pretrained model. + use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True. + Returns: + model: nn.Layer. Specific `ESNet_x0_25` model depends on args. + """ + model = ESNet( + scale=0.25, stages_pattern=MODEL_STAGES_PATTERN["ESNet"], **kwargs) + _load_pretrained(pretrained, model, MODEL_URLS["ESNet_x0_25"], use_ssld) + return model + + +def ESNet_x0_5(pretrained=False, use_ssld=False, **kwargs): + """ + ESNet_x0_5 + Args: + pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise. + If str, means the path of the pretrained model. + use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True. + Returns: + model: nn.Layer. Specific `ESNet_x0_5` model depends on args. + """ + model = ESNet( + scale=0.5, stages_pattern=MODEL_STAGES_PATTERN["ESNet"], **kwargs) + _load_pretrained(pretrained, model, MODEL_URLS["ESNet_x0_5"], use_ssld) + return model + + +def ESNet_x0_75(pretrained=False, use_ssld=False, **kwargs): + """ + ESNet_x0_75 + Args: + pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise. + If str, means the path of the pretrained model. + use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True. + Returns: + model: nn.Layer. Specific `ESNet_x0_75` model depends on args. + """ + model = ESNet( + scale=0.75, stages_pattern=MODEL_STAGES_PATTERN["ESNet"], **kwargs) + _load_pretrained(pretrained, model, MODEL_URLS["ESNet_x0_75"], use_ssld) + return model + + +def ESNet_x1_0(pretrained=False, use_ssld=False, **kwargs): + """ + ESNet_x1_0 + Args: + pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise. + If str, means the path of the pretrained model. + use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True. + Returns: + model: nn.Layer. Specific `ESNet_x1_0` model depends on args. + """ + model = ESNet( + scale=1.0, stages_pattern=MODEL_STAGES_PATTERN["ESNet"], **kwargs) + _load_pretrained(pretrained, model, MODEL_URLS["ESNet_x1_0"], use_ssld) + return model diff --git a/src/PaddleClas/ppcls/arch/backbone/legendary_models/hrnet.py b/src/PaddleClas/ppcls/arch/backbone/legendary_models/hrnet.py new file mode 100644 index 0000000..c3f7759 --- /dev/null +++ b/src/PaddleClas/ppcls/arch/backbone/legendary_models/hrnet.py @@ -0,0 +1,794 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import math +import paddle +from paddle import nn +from paddle import ParamAttr +from paddle.nn.functional import upsample +from paddle.nn.initializer import Uniform + +from ppcls.arch.backbone.base.theseus_layer import TheseusLayer, Identity +from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url + +MODEL_URLS = { + "HRNet_W18_C": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/HRNet_W18_C_pretrained.pdparams", + "HRNet_W30_C": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/HRNet_W30_C_pretrained.pdparams", + "HRNet_W32_C": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/HRNet_W32_C_pretrained.pdparams", + "HRNet_W40_C": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/HRNet_W40_C_pretrained.pdparams", + "HRNet_W44_C": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/HRNet_W44_C_pretrained.pdparams", + "HRNet_W48_C": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/HRNet_W48_C_pretrained.pdparams", + "HRNet_W64_C": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/HRNet_W64_C_pretrained.pdparams" +} + +MODEL_STAGES_PATTERN = {"HRNet": ["st4"]} + +__all__ = list(MODEL_URLS.keys()) + + +def _create_act(act): + if act == "hardswish": + return nn.Hardswish() + elif act == "relu": + return nn.ReLU() + elif act is None: + return Identity() + else: + raise RuntimeError( + "The activation function is not supported: {}".format(act)) + + +class ConvBNLayer(TheseusLayer): + def __init__(self, + num_channels, + num_filters, + filter_size, + stride=1, + groups=1, + act="relu"): + super().__init__() + + self.conv = nn.Conv2D( + in_channels=num_channels, + out_channels=num_filters, + kernel_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + groups=groups, + bias_attr=False) + self.bn = nn.BatchNorm(num_filters, act=None) + self.act = _create_act(act) + + def forward(self, x): + x = self.conv(x) + x = self.bn(x) + x = self.act(x) + return x + + +class BottleneckBlock(TheseusLayer): + def __init__(self, + num_channels, + num_filters, + has_se, + stride=1, + downsample=False): + super().__init__() + + self.has_se = has_se + self.downsample = downsample + + self.conv1 = ConvBNLayer( + num_channels=num_channels, + num_filters=num_filters, + filter_size=1, + act="relu") + self.conv2 = ConvBNLayer( + num_channels=num_filters, + num_filters=num_filters, + filter_size=3, + stride=stride, + act="relu") + self.conv3 = ConvBNLayer( + num_channels=num_filters, + num_filters=num_filters * 4, + filter_size=1, + act=None) + + if self.downsample: + self.conv_down = ConvBNLayer( + num_channels=num_channels, + num_filters=num_filters * 4, + filter_size=1, + act=None) + + if self.has_se: + self.se = SELayer( + num_channels=num_filters * 4, + num_filters=num_filters * 4, + reduction_ratio=16) + self.relu = nn.ReLU() + + def forward(self, x, res_dict=None): + residual = x + x = self.conv1(x) + x = self.conv2(x) + x = self.conv3(x) + if self.downsample: + residual = self.conv_down(residual) + if self.has_se: + x = self.se(x) + x = paddle.add(x=residual, y=x) + x = self.relu(x) + return x + + +class BasicBlock(nn.Layer): + def __init__(self, num_channels, num_filters, has_se=False): + super().__init__() + + self.has_se = has_se + + self.conv1 = ConvBNLayer( + num_channels=num_channels, + num_filters=num_filters, + filter_size=3, + stride=1, + act="relu") + self.conv2 = ConvBNLayer( + num_channels=num_filters, + num_filters=num_filters, + filter_size=3, + stride=1, + act=None) + + if self.has_se: + self.se = SELayer( + num_channels=num_filters, + num_filters=num_filters, + reduction_ratio=16) + self.relu = nn.ReLU() + + def forward(self, x): + residual = x + x = self.conv1(x) + x = self.conv2(x) + + if self.has_se: + x = self.se(x) + + x = paddle.add(x=residual, y=x) + x = self.relu(x) + return x + + +class SELayer(TheseusLayer): + def __init__(self, num_channels, num_filters, reduction_ratio): + super().__init__() + + self.avg_pool = nn.AdaptiveAvgPool2D(1) + + self._num_channels = num_channels + + med_ch = int(num_channels / reduction_ratio) + stdv = 1.0 / math.sqrt(num_channels * 1.0) + self.fc_squeeze = nn.Linear( + num_channels, + med_ch, + weight_attr=ParamAttr(initializer=Uniform(-stdv, stdv))) + self.relu = nn.ReLU() + stdv = 1.0 / math.sqrt(med_ch * 1.0) + self.fc_excitation = nn.Linear( + med_ch, + num_filters, + weight_attr=ParamAttr(initializer=Uniform(-stdv, stdv))) + self.sigmoid = nn.Sigmoid() + + def forward(self, x, res_dict=None): + residual = x + x = self.avg_pool(x) + x = paddle.squeeze(x, axis=[2, 3]) + x = self.fc_squeeze(x) + x = self.relu(x) + x = self.fc_excitation(x) + x = self.sigmoid(x) + x = paddle.unsqueeze(x, axis=[2, 3]) + x = residual * x + return x + + +class Stage(TheseusLayer): + def __init__(self, num_modules, num_filters, has_se=False): + super().__init__() + + self._num_modules = num_modules + + self.stage_func_list = nn.LayerList() + for i in range(num_modules): + self.stage_func_list.append( + HighResolutionModule( + num_filters=num_filters, has_se=has_se)) + + def forward(self, x, res_dict=None): + x = x + for idx in range(self._num_modules): + x = self.stage_func_list[idx](x) + return x + + +class HighResolutionModule(TheseusLayer): + def __init__(self, num_filters, has_se=False): + super().__init__() + + self.basic_block_list = nn.LayerList() + + for i in range(len(num_filters)): + self.basic_block_list.append( + nn.Sequential(* [ + BasicBlock( + num_channels=num_filters[i], + num_filters=num_filters[i], + has_se=has_se) for j in range(4) + ])) + + self.fuse_func = FuseLayers( + in_channels=num_filters, out_channels=num_filters) + + def forward(self, x, res_dict=None): + out = [] + for idx, xi in enumerate(x): + basic_block_list = self.basic_block_list[idx] + for basic_block_func in basic_block_list: + xi = basic_block_func(xi) + out.append(xi) + out = self.fuse_func(out) + return out + + +class FuseLayers(TheseusLayer): + def __init__(self, in_channels, out_channels): + super().__init__() + + self._actual_ch = len(in_channels) + self._in_channels = in_channels + + self.residual_func_list = nn.LayerList() + self.relu = nn.ReLU() + for i in range(len(in_channels)): + for j in range(len(in_channels)): + if j > i: + self.residual_func_list.append( + ConvBNLayer( + num_channels=in_channels[j], + num_filters=out_channels[i], + filter_size=1, + stride=1, + act=None)) + elif j < i: + pre_num_filters = in_channels[j] + for k in range(i - j): + if k == i - j - 1: + self.residual_func_list.append( + ConvBNLayer( + num_channels=pre_num_filters, + num_filters=out_channels[i], + filter_size=3, + stride=2, + act=None)) + pre_num_filters = out_channels[i] + else: + self.residual_func_list.append( + ConvBNLayer( + num_channels=pre_num_filters, + num_filters=out_channels[j], + filter_size=3, + stride=2, + act="relu")) + pre_num_filters = out_channels[j] + + def forward(self, x, res_dict=None): + out = [] + residual_func_idx = 0 + for i in range(len(self._in_channels)): + residual = x[i] + for j in range(len(self._in_channels)): + if j > i: + xj = self.residual_func_list[residual_func_idx](x[j]) + residual_func_idx += 1 + + xj = upsample(xj, scale_factor=2**(j - i), mode="nearest") + residual = paddle.add(x=residual, y=xj) + elif j < i: + xj = x[j] + for k in range(i - j): + xj = self.residual_func_list[residual_func_idx](xj) + residual_func_idx += 1 + + residual = paddle.add(x=residual, y=xj) + + residual = self.relu(residual) + out.append(residual) + + return out + + +class LastClsOut(TheseusLayer): + def __init__(self, + num_channel_list, + has_se, + num_filters_list=[32, 64, 128, 256]): + super().__init__() + + self.func_list = nn.LayerList() + for idx in range(len(num_channel_list)): + self.func_list.append( + BottleneckBlock( + num_channels=num_channel_list[idx], + num_filters=num_filters_list[idx], + has_se=has_se, + downsample=True)) + + def forward(self, x, res_dict=None): + out = [] + for idx, xi in enumerate(x): + xi = self.func_list[idx](xi) + out.append(xi) + return out + + +class HRNet(TheseusLayer): + """ + HRNet + Args: + width: int=18. Base channel number of HRNet. + has_se: bool=False. If 'True', add se module to HRNet. + class_num: int=1000. Output num of last fc layer. + Returns: + model: nn.Layer. Specific HRNet model depends on args. + """ + + def __init__(self, + stages_pattern, + width=18, + has_se=False, + class_num=1000, + return_patterns=None, + return_stages=None): + super().__init__() + + self.width = width + self.has_se = has_se + self._class_num = class_num + + channels_2 = [self.width, self.width * 2] + channels_3 = [self.width, self.width * 2, self.width * 4] + channels_4 = [ + self.width, self.width * 2, self.width * 4, self.width * 8 + ] + + self.conv_layer1_1 = ConvBNLayer( + num_channels=3, + num_filters=64, + filter_size=3, + stride=2, + act="relu") + + self.conv_layer1_2 = ConvBNLayer( + num_channels=64, + num_filters=64, + filter_size=3, + stride=2, + act="relu") + + self.layer1 = nn.Sequential(* [ + BottleneckBlock( + num_channels=64 if i == 0 else 256, + num_filters=64, + has_se=has_se, + stride=1, + downsample=True if i == 0 else False) for i in range(4) + ]) + + self.conv_tr1_1 = ConvBNLayer( + num_channels=256, num_filters=width, filter_size=3) + self.conv_tr1_2 = ConvBNLayer( + num_channels=256, num_filters=width * 2, filter_size=3, stride=2) + + self.st2 = Stage( + num_modules=1, num_filters=channels_2, has_se=self.has_se) + + self.conv_tr2 = ConvBNLayer( + num_channels=width * 2, + num_filters=width * 4, + filter_size=3, + stride=2) + self.st3 = Stage( + num_modules=4, num_filters=channels_3, has_se=self.has_se) + + self.conv_tr3 = ConvBNLayer( + num_channels=width * 4, + num_filters=width * 8, + filter_size=3, + stride=2) + + self.st4 = Stage( + num_modules=3, num_filters=channels_4, has_se=self.has_se) + + # classification + num_filters_list = [32, 64, 128, 256] + self.last_cls = LastClsOut( + num_channel_list=channels_4, + has_se=self.has_se, + num_filters_list=num_filters_list) + + last_num_filters = [256, 512, 1024] + self.cls_head_conv_list = nn.LayerList() + for idx in range(3): + self.cls_head_conv_list.append( + ConvBNLayer( + num_channels=num_filters_list[idx] * 4, + num_filters=last_num_filters[idx], + filter_size=3, + stride=2)) + + self.conv_last = ConvBNLayer( + num_channels=1024, num_filters=2048, filter_size=1, stride=1) + + self.avg_pool = nn.AdaptiveAvgPool2D(1) + + stdv = 1.0 / math.sqrt(2048 * 1.0) + + self.fc = nn.Linear( + 2048, + class_num, + weight_attr=ParamAttr(initializer=Uniform(-stdv, stdv))) + + super().init_res( + stages_pattern, + return_patterns=return_patterns, + return_stages=return_stages) + + def forward(self, x): + x = self.conv_layer1_1(x) + x = self.conv_layer1_2(x) + + x = self.layer1(x) + + tr1_1 = self.conv_tr1_1(x) + tr1_2 = self.conv_tr1_2(x) + x = self.st2([tr1_1, tr1_2]) + + tr2 = self.conv_tr2(x[-1]) + x.append(tr2) + x = self.st3(x) + + tr3 = self.conv_tr3(x[-1]) + x.append(tr3) + x = self.st4(x) + + x = self.last_cls(x) + + y = x[0] + for idx in range(3): + y = paddle.add(x[idx + 1], self.cls_head_conv_list[idx](y)) + + y = self.conv_last(y) + y = self.avg_pool(y) + y = paddle.reshape(y, shape=[-1, y.shape[1]]) + y = self.fc(y) + return y + + +def _load_pretrained(pretrained, model, model_url, use_ssld): + if pretrained is False: + pass + elif pretrained is True: + load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld) + elif isinstance(pretrained, str): + load_dygraph_pretrain(model, pretrained) + else: + raise RuntimeError( + "pretrained type is not available. Please use `string` or `boolean` type." + ) + + +def HRNet_W18_C(pretrained=False, use_ssld=False, **kwargs): + """ + HRNet_W18_C + Args: + pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise. + If str, means the path of the pretrained model. + use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True. + Returns: + model: nn.Layer. Specific `HRNet_W18_C` model depends on args. + """ + model = HRNet( + width=18, stages_pattern=MODEL_STAGES_PATTERN["HRNet"], **kwargs) + _load_pretrained(pretrained, model, MODEL_URLS["HRNet_W18_C"], use_ssld) + return model + + +def HRNet_W30_C(pretrained=False, use_ssld=False, **kwargs): + """ + HRNet_W30_C + Args: + pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise. + If str, means the path of the pretrained model. + use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True. + Returns: + model: nn.Layer. Specific `HRNet_W30_C` model depends on args. + """ + model = HRNet( + width=30, stages_pattern=MODEL_STAGES_PATTERN["HRNet"], **kwargs) + _load_pretrained(pretrained, model, MODEL_URLS["HRNet_W30_C"], use_ssld) + return model + + +def HRNet_W32_C(pretrained=False, use_ssld=False, **kwargs): + """ + HRNet_W32_C + Args: + pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise. + If str, means the path of the pretrained model. + use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True. + Returns: + model: nn.Layer. Specific `HRNet_W32_C` model depends on args. + """ + model = HRNet( + width=32, stages_pattern=MODEL_STAGES_PATTERN["HRNet"], **kwargs) + _load_pretrained(pretrained, model, MODEL_URLS["HRNet_W32_C"], use_ssld) + return model + + +def HRNet_W40_C(pretrained=False, use_ssld=False, **kwargs): + """ + HRNet_W40_C + Args: + pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise. + If str, means the path of the pretrained model. + use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True. + Returns: + model: nn.Layer. Specific `HRNet_W40_C` model depends on args. + """ + model = HRNet( + width=40, stages_pattern=MODEL_STAGES_PATTERN["HRNet"], **kwargs) + _load_pretrained(pretrained, model, MODEL_URLS["HRNet_W40_C"], use_ssld) + return model + + +def HRNet_W44_C(pretrained=False, use_ssld=False, **kwargs): + """ + HRNet_W44_C + Args: + pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise. + If str, means the path of the pretrained model. + use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True. + Returns: + model: nn.Layer. Specific `HRNet_W44_C` model depends on args. + """ + model = HRNet( + width=44, stages_pattern=MODEL_STAGES_PATTERN["HRNet"], **kwargs) + _load_pretrained(pretrained, model, MODEL_URLS["HRNet_W44_C"], use_ssld) + return model + + +def HRNet_W48_C(pretrained=False, use_ssld=False, **kwargs): + """ + HRNet_W48_C + Args: + pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise. + If str, means the path of the pretrained model. + use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True. + Returns: + model: nn.Layer. Specific `HRNet_W48_C` model depends on args. + """ + model = HRNet( + width=48, stages_pattern=MODEL_STAGES_PATTERN["HRNet"], **kwargs) + _load_pretrained(pretrained, model, MODEL_URLS["HRNet_W48_C"], use_ssld) + return model + + +def HRNet_W60_C(pretrained=False, use_ssld=False, **kwargs): + """ + HRNet_W60_C + Args: + pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise. + If str, means the path of the pretrained model. + use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True. + Returns: + model: nn.Layer. Specific `HRNet_W60_C` model depends on args. + """ + model = HRNet( + width=60, stages_pattern=MODEL_STAGES_PATTERN["HRNet"], **kwargs) + _load_pretrained(pretrained, model, MODEL_URLS["HRNet_W60_C"], use_ssld) + return model + + +def HRNet_W64_C(pretrained=False, use_ssld=False, **kwargs): + """ + HRNet_W64_C + Args: + pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise. + If str, means the path of the pretrained model. + use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True. + Returns: + model: nn.Layer. Specific `HRNet_W64_C` model depends on args. + """ + model = HRNet( + width=64, stages_pattern=MODEL_STAGES_PATTERN["HRNet"], **kwargs) + _load_pretrained(pretrained, model, MODEL_URLS["HRNet_W64_C"], use_ssld) + return model + + +def SE_HRNet_W18_C(pretrained=False, use_ssld=False, **kwargs): + """ + SE_HRNet_W18_C + Args: + pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise. + If str, means the path of the pretrained model. + use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True. + Returns: + model: nn.Layer. Specific `SE_HRNet_W18_C` model depends on args. + """ + model = HRNet( + width=18, + stages_pattern=MODEL_STAGES_PATTERN["HRNet"], + has_se=True, + **kwargs) + _load_pretrained(pretrained, model, MODEL_URLS["SE_HRNet_W18_C"], use_ssld) + return model + + +def SE_HRNet_W30_C(pretrained=False, use_ssld=False, **kwargs): + """ + SE_HRNet_W30_C + Args: + pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise. + If str, means the path of the pretrained model. + use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True. + Returns: + model: nn.Layer. Specific `SE_HRNet_W30_C` model depends on args. + """ + model = HRNet( + width=30, + stages_pattern=MODEL_STAGES_PATTERN["HRNet"], + has_se=True, + **kwargs) + _load_pretrained(pretrained, model, MODEL_URLS["SE_HRNet_W30_C"], use_ssld) + return model + + +def SE_HRNet_W32_C(pretrained=False, use_ssld=False, **kwargs): + """ + SE_HRNet_W32_C + Args: + pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise. + If str, means the path of the pretrained model. + use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True. + Returns: + model: nn.Layer. Specific `SE_HRNet_W32_C` model depends on args. + """ + model = HRNet( + width=32, + stages_pattern=MODEL_STAGES_PATTERN["HRNet"], + has_se=True, + **kwargs) + _load_pretrained(pretrained, model, MODEL_URLS["SE_HRNet_W32_C"], use_ssld) + return model + + +def SE_HRNet_W40_C(pretrained=False, use_ssld=False, **kwargs): + """ + SE_HRNet_W40_C + Args: + pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise. + If str, means the path of the pretrained model. + use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True. + Returns: + model: nn.Layer. Specific `SE_HRNet_W40_C` model depends on args. + """ + model = HRNet( + width=40, + stages_pattern=MODEL_STAGES_PATTERN["HRNet"], + has_se=True, + **kwargs) + _load_pretrained(pretrained, model, MODEL_URLS["SE_HRNet_W40_C"], use_ssld) + return model + + +def SE_HRNet_W44_C(pretrained=False, use_ssld=False, **kwargs): + """ + SE_HRNet_W44_C + Args: + pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise. + If str, means the path of the pretrained model. + use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True. + Returns: + model: nn.Layer. Specific `SE_HRNet_W44_C` model depends on args. + """ + model = HRNet( + width=44, + stages_pattern=MODEL_STAGES_PATTERN["HRNet"], + has_se=True, + **kwargs) + _load_pretrained(pretrained, model, MODEL_URLS["SE_HRNet_W44_C"], use_ssld) + return model + + +def SE_HRNet_W48_C(pretrained=False, use_ssld=False, **kwargs): + """ + SE_HRNet_W48_C + Args: + pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise. + If str, means the path of the pretrained model. + use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True. + Returns: + model: nn.Layer. Specific `SE_HRNet_W48_C` model depends on args. + """ + model = HRNet( + width=48, + stages_pattern=MODEL_STAGES_PATTERN["HRNet"], + has_se=True, + **kwargs) + _load_pretrained(pretrained, model, MODEL_URLS["SE_HRNet_W48_C"], use_ssld) + return model + + +def SE_HRNet_W60_C(pretrained=False, use_ssld=False, **kwargs): + """ + SE_HRNet_W60_C + Args: + pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise. + If str, means the path of the pretrained model. + use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True. + Returns: + model: nn.Layer. Specific `SE_HRNet_W60_C` model depends on args. + """ + model = HRNet( + width=60, + stages_pattern=MODEL_STAGES_PATTERN["HRNet"], + has_se=True, + **kwargs) + _load_pretrained(pretrained, model, MODEL_URLS["SE_HRNet_W60_C"], use_ssld) + return model + + +def SE_HRNet_W64_C(pretrained=False, use_ssld=False, **kwargs): + """ + SE_HRNet_W64_C + Args: + pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise. + If str, means the path of the pretrained model. + use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True. + Returns: + model: nn.Layer. Specific `SE_HRNet_W64_C` model depends on args. + """ + model = HRNet( + width=64, + stages_pattern=MODEL_STAGES_PATTERN["HRNet"], + has_se=True, + **kwargs) + _load_pretrained(pretrained, model, MODEL_URLS["SE_HRNet_W64_C"], use_ssld) + return model diff --git a/src/PaddleClas/ppcls/arch/backbone/legendary_models/inception_v3.py b/src/PaddleClas/ppcls/arch/backbone/legendary_models/inception_v3.py new file mode 100644 index 0000000..5575f8c --- /dev/null +++ b/src/PaddleClas/ppcls/arch/backbone/legendary_models/inception_v3.py @@ -0,0 +1,557 @@ +# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import, division, print_function +import math +import paddle +from paddle import ParamAttr +import paddle.nn as nn +from paddle.nn import Conv2D, BatchNorm, Linear, Dropout +from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D +from paddle.nn.initializer import Uniform + +from ppcls.arch.backbone.base.theseus_layer import TheseusLayer +from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url + +MODEL_URLS = { + "InceptionV3": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/InceptionV3_pretrained.pdparams" +} + +MODEL_STAGES_PATTERN = { + "InceptionV3": [ + "inception_block_list[2]", "inception_block_list[3]", + "inception_block_list[7]", "inception_block_list[8]", + "inception_block_list[10]" + ] +} + +__all__ = MODEL_URLS.keys() +''' +InceptionV3 config: dict. + key: inception blocks of InceptionV3. + values: conv num in different blocks. +''' +NET_CONFIG = { + "inception_a": [[192, 256, 288], [32, 64, 64]], + "inception_b": [288], + "inception_c": [[768, 768, 768, 768], [128, 160, 160, 192]], + "inception_d": [768], + "inception_e": [1280, 2048] +} + + +class ConvBNLayer(TheseusLayer): + def __init__(self, + num_channels, + num_filters, + filter_size, + stride=1, + padding=0, + groups=1, + act="relu"): + super().__init__() + self.act = act + self.conv = Conv2D( + in_channels=num_channels, + out_channels=num_filters, + kernel_size=filter_size, + stride=stride, + padding=padding, + groups=groups, + bias_attr=False) + self.bn = BatchNorm(num_filters) + self.relu = nn.ReLU() + + def forward(self, x): + x = self.conv(x) + x = self.bn(x) + if self.act: + x = self.relu(x) + return x + + +class InceptionStem(TheseusLayer): + def __init__(self): + super().__init__() + self.conv_1a_3x3 = ConvBNLayer( + num_channels=3, + num_filters=32, + filter_size=3, + stride=2, + act="relu") + self.conv_2a_3x3 = ConvBNLayer( + num_channels=32, + num_filters=32, + filter_size=3, + stride=1, + act="relu") + self.conv_2b_3x3 = ConvBNLayer( + num_channels=32, + num_filters=64, + filter_size=3, + padding=1, + act="relu") + + self.max_pool = MaxPool2D(kernel_size=3, stride=2, padding=0) + self.conv_3b_1x1 = ConvBNLayer( + num_channels=64, num_filters=80, filter_size=1, act="relu") + self.conv_4a_3x3 = ConvBNLayer( + num_channels=80, num_filters=192, filter_size=3, act="relu") + + def forward(self, x): + x = self.conv_1a_3x3(x) + x = self.conv_2a_3x3(x) + x = self.conv_2b_3x3(x) + x = self.max_pool(x) + x = self.conv_3b_1x1(x) + x = self.conv_4a_3x3(x) + x = self.max_pool(x) + return x + + +class InceptionA(TheseusLayer): + def __init__(self, num_channels, pool_features): + super().__init__() + self.branch1x1 = ConvBNLayer( + num_channels=num_channels, + num_filters=64, + filter_size=1, + act="relu") + self.branch5x5_1 = ConvBNLayer( + num_channels=num_channels, + num_filters=48, + filter_size=1, + act="relu") + self.branch5x5_2 = ConvBNLayer( + num_channels=48, + num_filters=64, + filter_size=5, + padding=2, + act="relu") + + self.branch3x3dbl_1 = ConvBNLayer( + num_channels=num_channels, + num_filters=64, + filter_size=1, + act="relu") + self.branch3x3dbl_2 = ConvBNLayer( + num_channels=64, + num_filters=96, + filter_size=3, + padding=1, + act="relu") + self.branch3x3dbl_3 = ConvBNLayer( + num_channels=96, + num_filters=96, + filter_size=3, + padding=1, + act="relu") + self.branch_pool = AvgPool2D( + kernel_size=3, stride=1, padding=1, exclusive=False) + self.branch_pool_conv = ConvBNLayer( + num_channels=num_channels, + num_filters=pool_features, + filter_size=1, + act="relu") + + def forward(self, x): + branch1x1 = self.branch1x1(x) + branch5x5 = self.branch5x5_1(x) + branch5x5 = self.branch5x5_2(branch5x5) + + branch3x3dbl = self.branch3x3dbl_1(x) + branch3x3dbl = self.branch3x3dbl_2(branch3x3dbl) + branch3x3dbl = self.branch3x3dbl_3(branch3x3dbl) + + branch_pool = self.branch_pool(x) + branch_pool = self.branch_pool_conv(branch_pool) + x = paddle.concat( + [branch1x1, branch5x5, branch3x3dbl, branch_pool], axis=1) + return x + + +class InceptionB(TheseusLayer): + def __init__(self, num_channels): + super().__init__() + self.branch3x3 = ConvBNLayer( + num_channels=num_channels, + num_filters=384, + filter_size=3, + stride=2, + act="relu") + self.branch3x3dbl_1 = ConvBNLayer( + num_channels=num_channels, + num_filters=64, + filter_size=1, + act="relu") + self.branch3x3dbl_2 = ConvBNLayer( + num_channels=64, + num_filters=96, + filter_size=3, + padding=1, + act="relu") + self.branch3x3dbl_3 = ConvBNLayer( + num_channels=96, + num_filters=96, + filter_size=3, + stride=2, + act="relu") + self.branch_pool = MaxPool2D(kernel_size=3, stride=2) + + def forward(self, x): + branch3x3 = self.branch3x3(x) + + branch3x3dbl = self.branch3x3dbl_1(x) + branch3x3dbl = self.branch3x3dbl_2(branch3x3dbl) + branch3x3dbl = self.branch3x3dbl_3(branch3x3dbl) + + branch_pool = self.branch_pool(x) + + x = paddle.concat([branch3x3, branch3x3dbl, branch_pool], axis=1) + + return x + + +class InceptionC(TheseusLayer): + def __init__(self, num_channels, channels_7x7): + super().__init__() + self.branch1x1 = ConvBNLayer( + num_channels=num_channels, + num_filters=192, + filter_size=1, + act="relu") + + self.branch7x7_1 = ConvBNLayer( + num_channels=num_channels, + num_filters=channels_7x7, + filter_size=1, + stride=1, + act="relu") + self.branch7x7_2 = ConvBNLayer( + num_channels=channels_7x7, + num_filters=channels_7x7, + filter_size=(1, 7), + stride=1, + padding=(0, 3), + act="relu") + self.branch7x7_3 = ConvBNLayer( + num_channels=channels_7x7, + num_filters=192, + filter_size=(7, 1), + stride=1, + padding=(3, 0), + act="relu") + + self.branch7x7dbl_1 = ConvBNLayer( + num_channels=num_channels, + num_filters=channels_7x7, + filter_size=1, + act="relu") + self.branch7x7dbl_2 = ConvBNLayer( + num_channels=channels_7x7, + num_filters=channels_7x7, + filter_size=(7, 1), + padding=(3, 0), + act="relu") + self.branch7x7dbl_3 = ConvBNLayer( + num_channels=channels_7x7, + num_filters=channels_7x7, + filter_size=(1, 7), + padding=(0, 3), + act="relu") + self.branch7x7dbl_4 = ConvBNLayer( + num_channels=channels_7x7, + num_filters=channels_7x7, + filter_size=(7, 1), + padding=(3, 0), + act="relu") + self.branch7x7dbl_5 = ConvBNLayer( + num_channels=channels_7x7, + num_filters=192, + filter_size=(1, 7), + padding=(0, 3), + act="relu") + + self.branch_pool = AvgPool2D( + kernel_size=3, stride=1, padding=1, exclusive=False) + self.branch_pool_conv = ConvBNLayer( + num_channels=num_channels, + num_filters=192, + filter_size=1, + act="relu") + + def forward(self, x): + branch1x1 = self.branch1x1(x) + + branch7x7 = self.branch7x7_1(x) + branch7x7 = self.branch7x7_2(branch7x7) + branch7x7 = self.branch7x7_3(branch7x7) + + branch7x7dbl = self.branch7x7dbl_1(x) + branch7x7dbl = self.branch7x7dbl_2(branch7x7dbl) + branch7x7dbl = self.branch7x7dbl_3(branch7x7dbl) + branch7x7dbl = self.branch7x7dbl_4(branch7x7dbl) + branch7x7dbl = self.branch7x7dbl_5(branch7x7dbl) + + branch_pool = self.branch_pool(x) + branch_pool = self.branch_pool_conv(branch_pool) + + x = paddle.concat( + [branch1x1, branch7x7, branch7x7dbl, branch_pool], axis=1) + + return x + + +class InceptionD(TheseusLayer): + def __init__(self, num_channels): + super().__init__() + self.branch3x3_1 = ConvBNLayer( + num_channels=num_channels, + num_filters=192, + filter_size=1, + act="relu") + self.branch3x3_2 = ConvBNLayer( + num_channels=192, + num_filters=320, + filter_size=3, + stride=2, + act="relu") + self.branch7x7x3_1 = ConvBNLayer( + num_channels=num_channels, + num_filters=192, + filter_size=1, + act="relu") + self.branch7x7x3_2 = ConvBNLayer( + num_channels=192, + num_filters=192, + filter_size=(1, 7), + padding=(0, 3), + act="relu") + self.branch7x7x3_3 = ConvBNLayer( + num_channels=192, + num_filters=192, + filter_size=(7, 1), + padding=(3, 0), + act="relu") + self.branch7x7x3_4 = ConvBNLayer( + num_channels=192, + num_filters=192, + filter_size=3, + stride=2, + act="relu") + self.branch_pool = MaxPool2D(kernel_size=3, stride=2) + + def forward(self, x): + branch3x3 = self.branch3x3_1(x) + branch3x3 = self.branch3x3_2(branch3x3) + + branch7x7x3 = self.branch7x7x3_1(x) + branch7x7x3 = self.branch7x7x3_2(branch7x7x3) + branch7x7x3 = self.branch7x7x3_3(branch7x7x3) + branch7x7x3 = self.branch7x7x3_4(branch7x7x3) + + branch_pool = self.branch_pool(x) + + x = paddle.concat([branch3x3, branch7x7x3, branch_pool], axis=1) + return x + + +class InceptionE(TheseusLayer): + def __init__(self, num_channels): + super().__init__() + self.branch1x1 = ConvBNLayer( + num_channels=num_channels, + num_filters=320, + filter_size=1, + act="relu") + self.branch3x3_1 = ConvBNLayer( + num_channels=num_channels, + num_filters=384, + filter_size=1, + act="relu") + self.branch3x3_2a = ConvBNLayer( + num_channels=384, + num_filters=384, + filter_size=(1, 3), + padding=(0, 1), + act="relu") + self.branch3x3_2b = ConvBNLayer( + num_channels=384, + num_filters=384, + filter_size=(3, 1), + padding=(1, 0), + act="relu") + + self.branch3x3dbl_1 = ConvBNLayer( + num_channels=num_channels, + num_filters=448, + filter_size=1, + act="relu") + self.branch3x3dbl_2 = ConvBNLayer( + num_channels=448, + num_filters=384, + filter_size=3, + padding=1, + act="relu") + self.branch3x3dbl_3a = ConvBNLayer( + num_channels=384, + num_filters=384, + filter_size=(1, 3), + padding=(0, 1), + act="relu") + self.branch3x3dbl_3b = ConvBNLayer( + num_channels=384, + num_filters=384, + filter_size=(3, 1), + padding=(1, 0), + act="relu") + self.branch_pool = AvgPool2D( + kernel_size=3, stride=1, padding=1, exclusive=False) + self.branch_pool_conv = ConvBNLayer( + num_channels=num_channels, + num_filters=192, + filter_size=1, + act="relu") + + def forward(self, x): + branch1x1 = self.branch1x1(x) + + branch3x3 = self.branch3x3_1(x) + branch3x3 = [ + self.branch3x3_2a(branch3x3), + self.branch3x3_2b(branch3x3), + ] + branch3x3 = paddle.concat(branch3x3, axis=1) + + branch3x3dbl = self.branch3x3dbl_1(x) + branch3x3dbl = self.branch3x3dbl_2(branch3x3dbl) + branch3x3dbl = [ + self.branch3x3dbl_3a(branch3x3dbl), + self.branch3x3dbl_3b(branch3x3dbl), + ] + branch3x3dbl = paddle.concat(branch3x3dbl, axis=1) + + branch_pool = self.branch_pool(x) + branch_pool = self.branch_pool_conv(branch_pool) + + x = paddle.concat( + [branch1x1, branch3x3, branch3x3dbl, branch_pool], axis=1) + return x + + +class Inception_V3(TheseusLayer): + """ + Inception_V3 + Args: + config: dict. config of Inception_V3. + class_num: int=1000. The number of classes. + pretrained: (True or False) or path of pretrained_model. Whether to load the pretrained model. + Returns: + model: nn.Layer. Specific Inception_V3 model depends on args. + """ + + def __init__(self, + config, + stages_pattern, + class_num=1000, + return_patterns=None, + return_stages=None): + super().__init__() + + self.inception_a_list = config["inception_a"] + self.inception_c_list = config["inception_c"] + self.inception_b_list = config["inception_b"] + self.inception_d_list = config["inception_d"] + self.inception_e_list = config["inception_e"] + + self.inception_stem = InceptionStem() + + self.inception_block_list = nn.LayerList() + for i in range(len(self.inception_a_list[0])): + inception_a = InceptionA(self.inception_a_list[0][i], + self.inception_a_list[1][i]) + self.inception_block_list.append(inception_a) + + for i in range(len(self.inception_b_list)): + inception_b = InceptionB(self.inception_b_list[i]) + self.inception_block_list.append(inception_b) + + for i in range(len(self.inception_c_list[0])): + inception_c = InceptionC(self.inception_c_list[0][i], + self.inception_c_list[1][i]) + self.inception_block_list.append(inception_c) + + for i in range(len(self.inception_d_list)): + inception_d = InceptionD(self.inception_d_list[i]) + self.inception_block_list.append(inception_d) + + for i in range(len(self.inception_e_list)): + inception_e = InceptionE(self.inception_e_list[i]) + self.inception_block_list.append(inception_e) + + self.avg_pool = AdaptiveAvgPool2D(1) + self.dropout = Dropout(p=0.2, mode="downscale_in_infer") + stdv = 1.0 / math.sqrt(2048 * 1.0) + self.fc = Linear( + 2048, + class_num, + weight_attr=ParamAttr(initializer=Uniform(-stdv, stdv)), + bias_attr=ParamAttr()) + + super().init_res( + stages_pattern, + return_patterns=return_patterns, + return_stages=return_stages) + + def forward(self, x): + x = self.inception_stem(x) + for inception_block in self.inception_block_list: + x = inception_block(x) + x = self.avg_pool(x) + x = paddle.reshape(x, shape=[-1, 2048]) + x = self.dropout(x) + x = self.fc(x) + return x + + +def _load_pretrained(pretrained, model, model_url, use_ssld): + if pretrained is False: + pass + elif pretrained is True: + load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld) + elif isinstance(pretrained, str): + load_dygraph_pretrain(model, pretrained) + else: + raise RuntimeError( + "pretrained type is not available. Please use `string` or `boolean` type." + ) + + +def InceptionV3(pretrained=False, use_ssld=False, **kwargs): + """ + InceptionV3 + Args: + pretrained: bool=false or str. if `true` load pretrained parameters, `false` otherwise. + if str, means the path of the pretrained model. + use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True. + Returns: + model: nn.Layer. Specific `InceptionV3` model + """ + model = Inception_V3( + NET_CONFIG, + stages_pattern=MODEL_STAGES_PATTERN["InceptionV3"], + **kwargs) + _load_pretrained(pretrained, model, MODEL_URLS["InceptionV3"], use_ssld) + return model diff --git a/src/PaddleClas/ppcls/arch/backbone/legendary_models/mobilenet_v1.py b/src/PaddleClas/ppcls/arch/backbone/legendary_models/mobilenet_v1.py new file mode 100644 index 0000000..9767d69 --- /dev/null +++ b/src/PaddleClas/ppcls/arch/backbone/legendary_models/mobilenet_v1.py @@ -0,0 +1,257 @@ +# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import, division, print_function + +from paddle import ParamAttr +import paddle.nn as nn +from paddle.nn import Conv2D, BatchNorm, Linear, ReLU, Flatten +from paddle.nn import AdaptiveAvgPool2D +from paddle.nn.initializer import KaimingNormal + +from ppcls.arch.backbone.base.theseus_layer import TheseusLayer +from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url + +MODEL_URLS = { + "MobileNetV1_x0_25": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/MobileNetV1_x0_25_pretrained.pdparams", + "MobileNetV1_x0_5": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/MobileNetV1_x0_5_pretrained.pdparams", + "MobileNetV1_x0_75": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/MobileNetV1_x0_75_pretrained.pdparams", + "MobileNetV1": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/MobileNetV1_pretrained.pdparams" +} + +MODEL_STAGES_PATTERN = { + "MobileNetV1": ["blocks[0]", "blocks[2]", "blocks[4]", "blocks[10]"] +} + +__all__ = MODEL_URLS.keys() + + +class ConvBNLayer(TheseusLayer): + def __init__(self, + num_channels, + filter_size, + num_filters, + stride, + padding, + num_groups=1): + super().__init__() + + self.conv = Conv2D( + in_channels=num_channels, + out_channels=num_filters, + kernel_size=filter_size, + stride=stride, + padding=padding, + groups=num_groups, + weight_attr=ParamAttr(initializer=KaimingNormal()), + bias_attr=False) + self.bn = BatchNorm(num_filters) + self.relu = ReLU() + + def forward(self, x): + x = self.conv(x) + x = self.bn(x) + x = self.relu(x) + return x + + +class DepthwiseSeparable(TheseusLayer): + def __init__(self, num_channels, num_filters1, num_filters2, num_groups, + stride, scale): + super().__init__() + + self.depthwise_conv = ConvBNLayer( + num_channels=num_channels, + num_filters=int(num_filters1 * scale), + filter_size=3, + stride=stride, + padding=1, + num_groups=int(num_groups * scale)) + + self.pointwise_conv = ConvBNLayer( + num_channels=int(num_filters1 * scale), + filter_size=1, + num_filters=int(num_filters2 * scale), + stride=1, + padding=0) + + def forward(self, x): + x = self.depthwise_conv(x) + x = self.pointwise_conv(x) + return x + + +class MobileNet(TheseusLayer): + """ + MobileNet + Args: + scale: float=1.0. The coefficient that controls the size of network parameters. + class_num: int=1000. The number of classes. + Returns: + model: nn.Layer. Specific MobileNet model depends on args. + """ + + def __init__(self, + stages_pattern, + scale=1.0, + class_num=1000, + return_patterns=None, + return_stages=None): + super().__init__() + self.scale = scale + + self.conv = ConvBNLayer( + num_channels=3, + filter_size=3, + num_filters=int(32 * scale), + stride=2, + padding=1) + + #num_channels, num_filters1, num_filters2, num_groups, stride + self.cfg = [[int(32 * scale), 32, 64, 32, 1], + [int(64 * scale), 64, 128, 64, 2], + [int(128 * scale), 128, 128, 128, 1], + [int(128 * scale), 128, 256, 128, 2], + [int(256 * scale), 256, 256, 256, 1], + [int(256 * scale), 256, 512, 256, 2], + [int(512 * scale), 512, 512, 512, 1], + [int(512 * scale), 512, 512, 512, 1], + [int(512 * scale), 512, 512, 512, 1], + [int(512 * scale), 512, 512, 512, 1], + [int(512 * scale), 512, 512, 512, 1], + [int(512 * scale), 512, 1024, 512, 2], + [int(1024 * scale), 1024, 1024, 1024, 1]] + + self.blocks = nn.Sequential(* [ + DepthwiseSeparable( + num_channels=params[0], + num_filters1=params[1], + num_filters2=params[2], + num_groups=params[3], + stride=params[4], + scale=scale) for params in self.cfg + ]) + + self.avg_pool = AdaptiveAvgPool2D(1) + self.flatten = Flatten(start_axis=1, stop_axis=-1) + + self.fc = Linear( + int(1024 * scale), + class_num, + weight_attr=ParamAttr(initializer=KaimingNormal())) + + super().init_res( + stages_pattern, + return_patterns=return_patterns, + return_stages=return_stages) + + def forward(self, x): + x = self.conv(x) + x = self.blocks(x) + x = self.avg_pool(x) + x = self.flatten(x) + x = self.fc(x) + return x + + +def _load_pretrained(pretrained, model, model_url, use_ssld): + if pretrained is False: + pass + elif pretrained is True: + load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld) + elif isinstance(pretrained, str): + load_dygraph_pretrain(model, pretrained) + else: + raise RuntimeError( + "pretrained type is not available. Please use `string` or `boolean` type." + ) + + +def MobileNetV1_x0_25(pretrained=False, use_ssld=False, **kwargs): + """ + MobileNetV1_x0_25 + Args: + pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise. + If str, means the path of the pretrained model. + use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True. + Returns: + model: nn.Layer. Specific `MobileNetV1_x0_25` model depends on args. + """ + model = MobileNet( + scale=0.25, + stages_pattern=MODEL_STAGES_PATTERN["MobileNetV1"], + **kwargs) + _load_pretrained(pretrained, model, MODEL_URLS["MobileNetV1_x0_25"], + use_ssld) + return model + + +def MobileNetV1_x0_5(pretrained=False, use_ssld=False, **kwargs): + """ + MobileNetV1_x0_5 + Args: + pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise. + If str, means the path of the pretrained model. + use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True. + Returns: + model: nn.Layer. Specific `MobileNetV1_x0_5` model depends on args. + """ + model = MobileNet( + scale=0.5, + stages_pattern=MODEL_STAGES_PATTERN["MobileNetV1"], + **kwargs) + _load_pretrained(pretrained, model, MODEL_URLS["MobileNetV1_x0_5"], + use_ssld) + return model + + +def MobileNetV1_x0_75(pretrained=False, use_ssld=False, **kwargs): + """ + MobileNetV1_x0_75 + Args: + pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise. + If str, means the path of the pretrained model. + use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True. + Returns: + model: nn.Layer. Specific `MobileNetV1_x0_75` model depends on args. + """ + model = MobileNet( + scale=0.75, + stages_pattern=MODEL_STAGES_PATTERN["MobileNetV1"], + **kwargs) + _load_pretrained(pretrained, model, MODEL_URLS["MobileNetV1_x0_75"], + use_ssld) + return model + + +def MobileNetV1(pretrained=False, use_ssld=False, **kwargs): + """ + MobileNetV1 + Args: + pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise. + If str, means the path of the pretrained model. + use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True. + Returns: + model: nn.Layer. Specific `MobileNetV1` model depends on args. + """ + model = MobileNet( + scale=1.0, + stages_pattern=MODEL_STAGES_PATTERN["MobileNetV1"], + **kwargs) + _load_pretrained(pretrained, model, MODEL_URLS["MobileNetV1"], use_ssld) + return model diff --git a/src/PaddleClas/ppcls/arch/backbone/legendary_models/mobilenet_v3.py b/src/PaddleClas/ppcls/arch/backbone/legendary_models/mobilenet_v3.py new file mode 100644 index 0000000..836c54c --- /dev/null +++ b/src/PaddleClas/ppcls/arch/backbone/legendary_models/mobilenet_v3.py @@ -0,0 +1,586 @@ +# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import, division, print_function + +import paddle +import paddle.nn as nn +from paddle import ParamAttr +from paddle.nn import AdaptiveAvgPool2D, BatchNorm, Conv2D, Dropout, Linear +from paddle.regularizer import L2Decay +from ppcls.arch.backbone.base.theseus_layer import TheseusLayer +from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url + +MODEL_URLS = { + "MobileNetV3_small_x0_35": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/MobileNetV3_small_x0_35_pretrained.pdparams", + "MobileNetV3_small_x0_5": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/MobileNetV3_small_x0_5_pretrained.pdparams", + "MobileNetV3_small_x0_75": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/MobileNetV3_small_x0_75_pretrained.pdparams", + "MobileNetV3_small_x1_0": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/MobileNetV3_small_x1_0_pretrained.pdparams", + "MobileNetV3_small_x1_25": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/MobileNetV3_small_x1_25_pretrained.pdparams", + "MobileNetV3_large_x0_35": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/MobileNetV3_large_x0_35_pretrained.pdparams", + "MobileNetV3_large_x0_5": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/MobileNetV3_large_x0_5_pretrained.pdparams", + "MobileNetV3_large_x0_75": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/MobileNetV3_large_x0_75_pretrained.pdparams", + "MobileNetV3_large_x1_0": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/MobileNetV3_large_x1_0_pretrained.pdparams", + "MobileNetV3_large_x1_25": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/MobileNetV3_large_x1_25_pretrained.pdparams", +} + +MODEL_STAGES_PATTERN = { + "MobileNetV3_small": + ["blocks[0]", "blocks[2]", "blocks[7]", "blocks[10]"], + "MobileNetV3_large": + ["blocks[0]", "blocks[2]", "blocks[5]", "blocks[11]", "blocks[14]"] +} + +__all__ = MODEL_URLS.keys() + +# "large", "small" is just for MobinetV3_large, MobileNetV3_small respectively. +# The type of "large" or "small" config is a list. Each element(list) represents a depthwise block, which is composed of k, exp, se, act, s. +# k: kernel_size +# exp: middle channel number in depthwise block +# c: output channel number in depthwise block +# se: whether to use SE block +# act: which activation to use +# s: stride in depthwise block +NET_CONFIG = { + "large": [ + # k, exp, c, se, act, s + [3, 16, 16, False, "relu", 1], + [3, 64, 24, False, "relu", 2], + [3, 72, 24, False, "relu", 1], + [5, 72, 40, True, "relu", 2], + [5, 120, 40, True, "relu", 1], + [5, 120, 40, True, "relu", 1], + [3, 240, 80, False, "hardswish", 2], + [3, 200, 80, False, "hardswish", 1], + [3, 184, 80, False, "hardswish", 1], + [3, 184, 80, False, "hardswish", 1], + [3, 480, 112, True, "hardswish", 1], + [3, 672, 112, True, "hardswish", 1], + [5, 672, 160, True, "hardswish", 2], + [5, 960, 160, True, "hardswish", 1], + [5, 960, 160, True, "hardswish", 1], + ], + "small": [ + # k, exp, c, se, act, s + [3, 16, 16, True, "relu", 2], + [3, 72, 24, False, "relu", 2], + [3, 88, 24, False, "relu", 1], + [5, 96, 40, True, "hardswish", 2], + [5, 240, 40, True, "hardswish", 1], + [5, 240, 40, True, "hardswish", 1], + [5, 120, 48, True, "hardswish", 1], + [5, 144, 48, True, "hardswish", 1], + [5, 288, 96, True, "hardswish", 2], + [5, 576, 96, True, "hardswish", 1], + [5, 576, 96, True, "hardswish", 1], + ] +} +# first conv output channel number in MobileNetV3 +STEM_CONV_NUMBER = 16 +# last second conv output channel for "small" +LAST_SECOND_CONV_SMALL = 576 +# last second conv output channel for "large" +LAST_SECOND_CONV_LARGE = 960 +# last conv output channel number for "large" and "small" +LAST_CONV = 1280 + + +def _make_divisible(v, divisor=8, min_value=None): + if min_value is None: + min_value = divisor + new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) + if new_v < 0.9 * v: + new_v += divisor + return new_v + + +def _create_act(act): + if act == "hardswish": + return nn.Hardswish() + elif act == "relu": + return nn.ReLU() + elif act is None: + return None + else: + raise RuntimeError( + "The activation function is not supported: {}".format(act)) + + +class MobileNetV3(TheseusLayer): + """ + MobileNetV3 + Args: + config: list. MobileNetV3 depthwise blocks config. + scale: float=1.0. The coefficient that controls the size of network parameters. + class_num: int=1000. The number of classes. + inplanes: int=16. The output channel number of first convolution layer. + class_squeeze: int=960. The output channel number of penultimate convolution layer. + class_expand: int=1280. The output channel number of last convolution layer. + dropout_prob: float=0.2. Probability of setting units to zero. + Returns: + model: nn.Layer. Specific MobileNetV3 model depends on args. + """ + + def __init__(self, + config, + stages_pattern, + scale=1.0, + class_num=1000, + inplanes=STEM_CONV_NUMBER, + class_squeeze=LAST_SECOND_CONV_LARGE, + class_expand=LAST_CONV, + dropout_prob=0.2, + return_patterns=None, + return_stages=None): + super().__init__() + + self.cfg = config + self.scale = scale + self.inplanes = inplanes + self.class_squeeze = class_squeeze + self.class_expand = class_expand + self.class_num = class_num + + self.conv = ConvBNLayer( + in_c=3, + out_c=_make_divisible(self.inplanes * self.scale), + filter_size=3, + stride=2, + padding=1, + num_groups=1, + if_act=True, + act="hardswish") + + self.blocks = nn.Sequential(* [ + ResidualUnit( + in_c=_make_divisible(self.inplanes * self.scale if i == 0 else + self.cfg[i - 1][2] * self.scale), + mid_c=_make_divisible(self.scale * exp), + out_c=_make_divisible(self.scale * c), + filter_size=k, + stride=s, + use_se=se, + act=act) for i, (k, exp, c, se, act, s) in enumerate(self.cfg) + ]) + + self.last_second_conv = ConvBNLayer( + in_c=_make_divisible(self.cfg[-1][2] * self.scale), + out_c=_make_divisible(self.scale * self.class_squeeze), + filter_size=1, + stride=1, + padding=0, + num_groups=1, + if_act=True, + act="hardswish") + + self.avg_pool = AdaptiveAvgPool2D(1) + + self.last_conv = Conv2D( + in_channels=_make_divisible(self.scale * self.class_squeeze), + out_channels=self.class_expand, + kernel_size=1, + stride=1, + padding=0, + bias_attr=False) + + self.hardswish = nn.Hardswish() + if dropout_prob is not None: + self.dropout = Dropout(p=dropout_prob, mode="downscale_in_infer") + else: + self.dropout = None + self.flatten = nn.Flatten(start_axis=1, stop_axis=-1) + + self.fc = Linear(self.class_expand, class_num) + + super().init_res( + stages_pattern, + return_patterns=return_patterns, + return_stages=return_stages) + + def forward(self, x): + x = self.conv(x) + x = self.blocks(x) + x = self.last_second_conv(x) + x = self.avg_pool(x) + x = self.last_conv(x) + x = self.hardswish(x) + if self.dropout is not None: + x = self.dropout(x) + x = self.flatten(x) + x = self.fc(x) + + return x + + +class ConvBNLayer(TheseusLayer): + def __init__(self, + in_c, + out_c, + filter_size, + stride, + padding, + num_groups=1, + if_act=True, + act=None): + super().__init__() + + self.conv = Conv2D( + in_channels=in_c, + out_channels=out_c, + kernel_size=filter_size, + stride=stride, + padding=padding, + groups=num_groups, + bias_attr=False) + self.bn = BatchNorm( + num_channels=out_c, + act=None, + param_attr=ParamAttr(regularizer=L2Decay(0.0)), + bias_attr=ParamAttr(regularizer=L2Decay(0.0))) + self.if_act = if_act + self.act = _create_act(act) + + def forward(self, x): + x = self.conv(x) + x = self.bn(x) + if self.if_act: + x = self.act(x) + return x + + +class ResidualUnit(TheseusLayer): + def __init__(self, + in_c, + mid_c, + out_c, + filter_size, + stride, + use_se, + act=None): + super().__init__() + self.if_shortcut = stride == 1 and in_c == out_c + self.if_se = use_se + + self.expand_conv = ConvBNLayer( + in_c=in_c, + out_c=mid_c, + filter_size=1, + stride=1, + padding=0, + if_act=True, + act=act) + self.bottleneck_conv = ConvBNLayer( + in_c=mid_c, + out_c=mid_c, + filter_size=filter_size, + stride=stride, + padding=int((filter_size - 1) // 2), + num_groups=mid_c, + if_act=True, + act=act) + if self.if_se: + self.mid_se = SEModule(mid_c) + self.linear_conv = ConvBNLayer( + in_c=mid_c, + out_c=out_c, + filter_size=1, + stride=1, + padding=0, + if_act=False, + act=None) + + def forward(self, x): + identity = x + x = self.expand_conv(x) + x = self.bottleneck_conv(x) + if self.if_se: + x = self.mid_se(x) + x = self.linear_conv(x) + if self.if_shortcut: + x = paddle.add(identity, x) + return x + + +# nn.Hardsigmoid can't transfer "slope" and "offset" in nn.functional.hardsigmoid +class Hardsigmoid(TheseusLayer): + def __init__(self, slope=0.2, offset=0.5): + super().__init__() + self.slope = slope + self.offset = offset + + def forward(self, x): + return nn.functional.hardsigmoid( + x, slope=self.slope, offset=self.offset) + + +class SEModule(TheseusLayer): + def __init__(self, channel, reduction=4): + super().__init__() + self.avg_pool = AdaptiveAvgPool2D(1) + self.conv1 = Conv2D( + in_channels=channel, + out_channels=channel // reduction, + kernel_size=1, + stride=1, + padding=0) + self.relu = nn.ReLU() + self.conv2 = Conv2D( + in_channels=channel // reduction, + out_channels=channel, + kernel_size=1, + stride=1, + padding=0) + self.hardsigmoid = Hardsigmoid(slope=0.2, offset=0.5) + + def forward(self, x): + identity = x + x = self.avg_pool(x) + x = self.conv1(x) + x = self.relu(x) + x = self.conv2(x) + x = self.hardsigmoid(x) + return paddle.multiply(x=identity, y=x) + + +def _load_pretrained(pretrained, model, model_url, use_ssld): + if pretrained is False: + pass + elif pretrained is True: + load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld) + elif isinstance(pretrained, str): + load_dygraph_pretrain(model, pretrained) + else: + raise RuntimeError( + "pretrained type is not available. Please use `string` or `boolean` type." + ) + + +def MobileNetV3_small_x0_35(pretrained=False, use_ssld=False, **kwargs): + """ + MobileNetV3_small_x0_35 + Args: + pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise. + If str, means the path of the pretrained model. + use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True. + Returns: + model: nn.Layer. Specific `MobileNetV3_small_x0_35` model depends on args. + """ + model = MobileNetV3( + config=NET_CONFIG["small"], + scale=0.35, + stages_pattern=MODEL_STAGES_PATTERN["MobileNetV3_small"], + class_squeeze=LAST_SECOND_CONV_SMALL, + **kwargs) + _load_pretrained(pretrained, model, MODEL_URLS["MobileNetV3_small_x0_35"], + use_ssld) + return model + + +def MobileNetV3_small_x0_5(pretrained=False, use_ssld=False, **kwargs): + """ + MobileNetV3_small_x0_5 + Args: + pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise. + If str, means the path of the pretrained model. + use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True. + Returns: + model: nn.Layer. Specific `MobileNetV3_small_x0_5` model depends on args. + """ + model = MobileNetV3( + config=NET_CONFIG["small"], + scale=0.5, + stages_pattern=MODEL_STAGES_PATTERN["MobileNetV3_small"], + class_squeeze=LAST_SECOND_CONV_SMALL, + **kwargs) + _load_pretrained(pretrained, model, MODEL_URLS["MobileNetV3_small_x0_5"], + use_ssld) + return model + + +def MobileNetV3_small_x0_75(pretrained=False, use_ssld=False, **kwargs): + """ + MobileNetV3_small_x0_75 + Args: + pretrained: bool=false or str. if `true` load pretrained parameters, `false` otherwise. + if str, means the path of the pretrained model. + use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True. + Returns: + model: nn.Layer. Specific `MobileNetV3_small_x0_75` model depends on args. + """ + model = MobileNetV3( + config=NET_CONFIG["small"], + scale=0.75, + stages_pattern=MODEL_STAGES_PATTERN["MobileNetV3_small"], + class_squeeze=LAST_SECOND_CONV_SMALL, + **kwargs) + _load_pretrained(pretrained, model, MODEL_URLS["MobileNetV3_small_x0_75"], + use_ssld) + return model + + +def MobileNetV3_small_x1_0(pretrained=False, use_ssld=False, **kwargs): + """ + MobileNetV3_small_x1_0 + Args: + pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise. + If str, means the path of the pretrained model. + use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True. + Returns: + model: nn.Layer. Specific `MobileNetV3_small_x1_0` model depends on args. + """ + model = MobileNetV3( + config=NET_CONFIG["small"], + scale=1.0, + stages_pattern=MODEL_STAGES_PATTERN["MobileNetV3_small"], + class_squeeze=LAST_SECOND_CONV_SMALL, + **kwargs) + _load_pretrained(pretrained, model, MODEL_URLS["MobileNetV3_small_x1_0"], + use_ssld) + return model + + +def MobileNetV3_small_x1_25(pretrained=False, use_ssld=False, **kwargs): + """ + MobileNetV3_small_x1_25 + Args: + pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise. + If str, means the path of the pretrained model. + use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True. + Returns: + model: nn.Layer. Specific `MobileNetV3_small_x1_25` model depends on args. + """ + model = MobileNetV3( + config=NET_CONFIG["small"], + scale=1.25, + stages_pattern=MODEL_STAGES_PATTERN["MobileNetV3_small"], + class_squeeze=LAST_SECOND_CONV_SMALL, + **kwargs) + _load_pretrained(pretrained, model, MODEL_URLS["MobileNetV3_small_x1_25"], + use_ssld) + return model + + +def MobileNetV3_large_x0_35(pretrained=False, use_ssld=False, **kwargs): + """ + MobileNetV3_large_x0_35 + Args: + pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise. + If str, means the path of the pretrained model. + use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True. + Returns: + model: nn.Layer. Specific `MobileNetV3_large_x0_35` model depends on args. + """ + model = MobileNetV3( + config=NET_CONFIG["large"], + scale=0.35, + stages_pattern=MODEL_STAGES_PATTERN["MobileNetV3_small"], + class_squeeze=LAST_SECOND_CONV_LARGE, + **kwargs) + _load_pretrained(pretrained, model, MODEL_URLS["MobileNetV3_large_x0_35"], + use_ssld) + return model + + +def MobileNetV3_large_x0_5(pretrained=False, use_ssld=False, **kwargs): + """ + MobileNetV3_large_x0_5 + Args: + pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise. + If str, means the path of the pretrained model. + use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True. + Returns: + model: nn.Layer. Specific `MobileNetV3_large_x0_5` model depends on args. + """ + model = MobileNetV3( + config=NET_CONFIG["large"], + scale=0.5, + stages_pattern=MODEL_STAGES_PATTERN["MobileNetV3_large"], + class_squeeze=LAST_SECOND_CONV_LARGE, + **kwargs) + _load_pretrained(pretrained, model, MODEL_URLS["MobileNetV3_large_x0_5"], + use_ssld) + return model + + +def MobileNetV3_large_x0_75(pretrained=False, use_ssld=False, **kwargs): + """ + MobileNetV3_large_x0_75 + Args: + pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise. + If str, means the path of the pretrained model. + use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True. + Returns: + model: nn.Layer. Specific `MobileNetV3_large_x0_75` model depends on args. + """ + model = MobileNetV3( + config=NET_CONFIG["large"], + scale=0.75, + stages_pattern=MODEL_STAGES_PATTERN["MobileNetV3_large"], + class_squeeze=LAST_SECOND_CONV_LARGE, + **kwargs) + _load_pretrained(pretrained, model, MODEL_URLS["MobileNetV3_large_x0_75"], + use_ssld) + return model + + +def MobileNetV3_large_x1_0(pretrained=False, use_ssld=False, **kwargs): + """ + MobileNetV3_large_x1_0 + Args: + pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise. + If str, means the path of the pretrained model. + use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True. + Returns: + model: nn.Layer. Specific `MobileNetV3_large_x1_0` model depends on args. + """ + model = MobileNetV3( + config=NET_CONFIG["large"], + scale=1.0, + stages_pattern=MODEL_STAGES_PATTERN["MobileNetV3_large"], + class_squeeze=LAST_SECOND_CONV_LARGE, + **kwargs) + _load_pretrained(pretrained, model, MODEL_URLS["MobileNetV3_large_x1_0"], + use_ssld) + return model + + +def MobileNetV3_large_x1_25(pretrained=False, use_ssld=False, **kwargs): + """ + MobileNetV3_large_x1_25 + Args: + pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise. + If str, means the path of the pretrained model. + use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True. + Returns: + model: nn.Layer. Specific `MobileNetV3_large_x1_25` model depends on args. + """ + model = MobileNetV3( + config=NET_CONFIG["large"], + scale=1.25, + stages_pattern=MODEL_STAGES_PATTERN["MobileNetV3_large"], + class_squeeze=LAST_SECOND_CONV_LARGE, + **kwargs) + _load_pretrained(pretrained, model, MODEL_URLS["MobileNetV3_large_x1_25"], + use_ssld) + return model diff --git a/src/PaddleClas/ppcls/arch/backbone/legendary_models/pp_lcnet.py b/src/PaddleClas/ppcls/arch/backbone/legendary_models/pp_lcnet.py new file mode 100644 index 0000000..4017462 --- /dev/null +++ b/src/PaddleClas/ppcls/arch/backbone/legendary_models/pp_lcnet.py @@ -0,0 +1,419 @@ +# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import, division, print_function + +import paddle +import paddle.nn as nn +from paddle import ParamAttr +from paddle.nn import AdaptiveAvgPool2D, BatchNorm, Conv2D, Dropout, Linear +from paddle.regularizer import L2Decay +from paddle.nn.initializer import KaimingNormal +from ppcls.arch.backbone.base.theseus_layer import TheseusLayer +from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url + +MODEL_URLS = { + "PPLCNet_x0_25": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/PPLCNet_x0_25_pretrained.pdparams", + "PPLCNet_x0_35": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/PPLCNet_x0_35_pretrained.pdparams", + "PPLCNet_x0_5": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/PPLCNet_x0_5_pretrained.pdparams", + "PPLCNet_x0_75": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/PPLCNet_x0_75_pretrained.pdparams", + "PPLCNet_x1_0": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/PPLCNet_x1_0_pretrained.pdparams", + "PPLCNet_x1_5": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/PPLCNet_x1_5_pretrained.pdparams", + "PPLCNet_x2_0": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/PPLCNet_x2_0_pretrained.pdparams", + "PPLCNet_x2_5": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/PPLCNet_x2_5_pretrained.pdparams" +} + +MODEL_STAGES_PATTERN = { + "PPLCNet": ["blocks2", "blocks3", "blocks4", "blocks5", "blocks6"] +} + +__all__ = list(MODEL_URLS.keys()) + +# Each element(list) represents a depthwise block, which is composed of k, in_c, out_c, s, use_se. +# k: kernel_size +# in_c: input channel number in depthwise block +# out_c: output channel number in depthwise block +# s: stride in depthwise block +# use_se: whether to use SE block + +NET_CONFIG = { + "blocks2": + #k, in_c, out_c, s, use_se + [[3, 16, 32, 1, False]], + "blocks3": [[3, 32, 64, 2, False], [3, 64, 64, 1, False]], + "blocks4": [[3, 64, 128, 2, False], [3, 128, 128, 1, False]], + "blocks5": [[3, 128, 256, 2, False], [5, 256, 256, 1, False], + [5, 256, 256, 1, False], [5, 256, 256, 1, False], + [5, 256, 256, 1, False], [5, 256, 256, 1, False]], + "blocks6": [[5, 256, 512, 2, True], [5, 512, 512, 1, True]] +} + + +def make_divisible(v, divisor=8, min_value=None): + if min_value is None: + min_value = divisor + new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) + if new_v < 0.9 * v: + new_v += divisor + return new_v + + +class ConvBNLayer(TheseusLayer): + def __init__(self, + num_channels, + filter_size, + num_filters, + stride, + num_groups=1): + super().__init__() + + self.conv = Conv2D( + in_channels=num_channels, + out_channels=num_filters, + kernel_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + groups=num_groups, + weight_attr=ParamAttr(initializer=KaimingNormal()), + bias_attr=False) + + self.bn = BatchNorm( + num_filters, + param_attr=ParamAttr(regularizer=L2Decay(0.0)), + bias_attr=ParamAttr(regularizer=L2Decay(0.0))) + self.hardswish = nn.Hardswish() + + def forward(self, x): + x = self.conv(x) + x = self.bn(x) + x = self.hardswish(x) + return x + + +class DepthwiseSeparable(TheseusLayer): + def __init__(self, + num_channels, + num_filters, + stride, + dw_size=3, + use_se=False): + super().__init__() + self.use_se = use_se + self.dw_conv = ConvBNLayer( + num_channels=num_channels, + num_filters=num_channels, + filter_size=dw_size, + stride=stride, + num_groups=num_channels) + if use_se: + self.se = SEModule(num_channels) + self.pw_conv = ConvBNLayer( + num_channels=num_channels, + filter_size=1, + num_filters=num_filters, + stride=1) + + def forward(self, x): + x = self.dw_conv(x) + if self.use_se: + x = self.se(x) + x = self.pw_conv(x) + return x + + +class SEModule(TheseusLayer): + def __init__(self, channel, reduction=4): + super().__init__() + self.avg_pool = AdaptiveAvgPool2D(1) + self.conv1 = Conv2D( + in_channels=channel, + out_channels=channel // reduction, + kernel_size=1, + stride=1, + padding=0) + self.relu = nn.ReLU() + self.conv2 = Conv2D( + in_channels=channel // reduction, + out_channels=channel, + kernel_size=1, + stride=1, + padding=0) + self.hardsigmoid = nn.Hardsigmoid() + + def forward(self, x): + identity = x + x = self.avg_pool(x) + x = self.conv1(x) + x = self.relu(x) + x = self.conv2(x) + x = self.hardsigmoid(x) + x = paddle.multiply(x=identity, y=x) + return x + + +class PPLCNet(TheseusLayer): + def __init__(self, + stages_pattern, + scale=1.0, + class_num=1000, + dropout_prob=0.2, + class_expand=1280, + return_patterns=None, + return_stages=None): + super().__init__() + self.scale = scale + self.class_expand = class_expand + + self.conv1 = ConvBNLayer( + num_channels=3, + filter_size=3, + num_filters=make_divisible(16 * scale), + stride=2) + + self.blocks2 = nn.Sequential(* [ + DepthwiseSeparable( + num_channels=make_divisible(in_c * scale), + num_filters=make_divisible(out_c * scale), + dw_size=k, + stride=s, + use_se=se) + for i, (k, in_c, out_c, s, se) in enumerate(NET_CONFIG["blocks2"]) + ]) + + self.blocks3 = nn.Sequential(* [ + DepthwiseSeparable( + num_channels=make_divisible(in_c * scale), + num_filters=make_divisible(out_c * scale), + dw_size=k, + stride=s, + use_se=se) + for i, (k, in_c, out_c, s, se) in enumerate(NET_CONFIG["blocks3"]) + ]) + + self.blocks4 = nn.Sequential(* [ + DepthwiseSeparable( + num_channels=make_divisible(in_c * scale), + num_filters=make_divisible(out_c * scale), + dw_size=k, + stride=s, + use_se=se) + for i, (k, in_c, out_c, s, se) in enumerate(NET_CONFIG["blocks4"]) + ]) + + self.blocks5 = nn.Sequential(* [ + DepthwiseSeparable( + num_channels=make_divisible(in_c * scale), + num_filters=make_divisible(out_c * scale), + dw_size=k, + stride=s, + use_se=se) + for i, (k, in_c, out_c, s, se) in enumerate(NET_CONFIG["blocks5"]) + ]) + + self.blocks6 = nn.Sequential(* [ + DepthwiseSeparable( + num_channels=make_divisible(in_c * scale), + num_filters=make_divisible(out_c * scale), + dw_size=k, + stride=s, + use_se=se) + for i, (k, in_c, out_c, s, se) in enumerate(NET_CONFIG["blocks6"]) + ]) + + self.avg_pool = AdaptiveAvgPool2D(1) + + self.last_conv = Conv2D( + in_channels=make_divisible(NET_CONFIG["blocks6"][-1][2] * scale), + out_channels=self.class_expand, + kernel_size=1, + stride=1, + padding=0, + bias_attr=False) + + self.hardswish = nn.Hardswish() + self.dropout = Dropout(p=dropout_prob, mode="downscale_in_infer") + self.flatten = nn.Flatten(start_axis=1, stop_axis=-1) + + self.fc = Linear(self.class_expand, class_num) + + super().init_res( + stages_pattern, + return_patterns=return_patterns, + return_stages=return_stages) + + def forward(self, x): + x = self.conv1(x) + + x = self.blocks2(x) + x = self.blocks3(x) + x = self.blocks4(x) + x = self.blocks5(x) + x = self.blocks6(x) + + x = self.avg_pool(x) + x = self.last_conv(x) + x = self.hardswish(x) + x = self.dropout(x) + x = self.flatten(x) + x = self.fc(x) + return x + + +def _load_pretrained(pretrained, model, model_url, use_ssld): + if pretrained is False: + pass + elif pretrained is True: + load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld) + elif isinstance(pretrained, str): + load_dygraph_pretrain(model, pretrained) + else: + raise RuntimeError( + "pretrained type is not available. Please use `string` or `boolean` type." + ) + + +def PPLCNet_x0_25(pretrained=False, use_ssld=False, **kwargs): + """ + PPLCNet_x0_25 + Args: + pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise. + If str, means the path of the pretrained model. + use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True. + Returns: + model: nn.Layer. Specific `PPLCNet_x0_25` model depends on args. + """ + model = PPLCNet( + scale=0.25, stages_pattern=MODEL_STAGES_PATTERN["PPLCNet"], **kwargs) + _load_pretrained(pretrained, model, MODEL_URLS["PPLCNet_x0_25"], use_ssld) + return model + + +def PPLCNet_x0_35(pretrained=False, use_ssld=False, **kwargs): + """ + PPLCNet_x0_35 + Args: + pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise. + If str, means the path of the pretrained model. + use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True. + Returns: + model: nn.Layer. Specific `PPLCNet_x0_35` model depends on args. + """ + model = PPLCNet( + scale=0.35, stages_pattern=MODEL_STAGES_PATTERN["PPLCNet"], **kwargs) + _load_pretrained(pretrained, model, MODEL_URLS["PPLCNet_x0_35"], use_ssld) + return model + + +def PPLCNet_x0_5(pretrained=False, use_ssld=False, **kwargs): + """ + PPLCNet_x0_5 + Args: + pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise. + If str, means the path of the pretrained model. + use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True. + Returns: + model: nn.Layer. Specific `PPLCNet_x0_5` model depends on args. + """ + model = PPLCNet( + scale=0.5, stages_pattern=MODEL_STAGES_PATTERN["PPLCNet"], **kwargs) + _load_pretrained(pretrained, model, MODEL_URLS["PPLCNet_x0_5"], use_ssld) + return model + + +def PPLCNet_x0_75(pretrained=False, use_ssld=False, **kwargs): + """ + PPLCNet_x0_75 + Args: + pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise. + If str, means the path of the pretrained model. + use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True. + Returns: + model: nn.Layer. Specific `PPLCNet_x0_75` model depends on args. + """ + model = PPLCNet( + scale=0.75, stages_pattern=MODEL_STAGES_PATTERN["PPLCNet"], **kwargs) + _load_pretrained(pretrained, model, MODEL_URLS["PPLCNet_x0_75"], use_ssld) + return model + + +def PPLCNet_x1_0(pretrained=False, use_ssld=False, **kwargs): + """ + PPLCNet_x1_0 + Args: + pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise. + If str, means the path of the pretrained model. + use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True. + Returns: + model: nn.Layer. Specific `PPLCNet_x1_0` model depends on args. + """ + model = PPLCNet( + scale=1.0, stages_pattern=MODEL_STAGES_PATTERN["PPLCNet"], **kwargs) + _load_pretrained(pretrained, model, MODEL_URLS["PPLCNet_x1_0"], use_ssld) + return model + + +def PPLCNet_x1_5(pretrained=False, use_ssld=False, **kwargs): + """ + PPLCNet_x1_5 + Args: + pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise. + If str, means the path of the pretrained model. + use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True. + Returns: + model: nn.Layer. Specific `PPLCNet_x1_5` model depends on args. + """ + model = PPLCNet( + scale=1.5, stages_pattern=MODEL_STAGES_PATTERN["PPLCNet"], **kwargs) + _load_pretrained(pretrained, model, MODEL_URLS["PPLCNet_x1_5"], use_ssld) + return model + + +def PPLCNet_x2_0(pretrained=False, use_ssld=False, **kwargs): + """ + PPLCNet_x2_0 + Args: + pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise. + If str, means the path of the pretrained model. + use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True. + Returns: + model: nn.Layer. Specific `PPLCNet_x2_0` model depends on args. + """ + model = PPLCNet( + scale=2.0, stages_pattern=MODEL_STAGES_PATTERN["PPLCNet"], **kwargs) + _load_pretrained(pretrained, model, MODEL_URLS["PPLCNet_x2_0"], use_ssld) + return model + + +def PPLCNet_x2_5(pretrained=False, use_ssld=False, **kwargs): + """ + PPLCNet_x2_5 + Args: + pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise. + If str, means the path of the pretrained model. + use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True. + Returns: + model: nn.Layer. Specific `PPLCNet_x2_5` model depends on args. + """ + model = PPLCNet( + scale=2.5, stages_pattern=MODEL_STAGES_PATTERN["PPLCNet"], **kwargs) + _load_pretrained(pretrained, model, MODEL_URLS["PPLCNet_x2_5"], use_ssld) + return model diff --git a/src/PaddleClas/ppcls/arch/backbone/legendary_models/resnet.py b/src/PaddleClas/ppcls/arch/backbone/legendary_models/resnet.py new file mode 100644 index 0000000..74c5c5f --- /dev/null +++ b/src/PaddleClas/ppcls/arch/backbone/legendary_models/resnet.py @@ -0,0 +1,591 @@ +# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import, division, print_function + +import numpy as np +import paddle +from paddle import ParamAttr +import paddle.nn as nn +from paddle.nn import Conv2D, BatchNorm, Linear +from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D +from paddle.nn.initializer import Uniform +import math + +from ppcls.arch.backbone.base.theseus_layer import TheseusLayer +from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url + +MODEL_URLS = { + "ResNet18": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/ResNet18_pretrained.pdparams", + "ResNet18_vd": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/ResNet18_vd_pretrained.pdparams", + "ResNet34": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/ResNet34_pretrained.pdparams", + "ResNet34_vd": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/ResNet34_vd_pretrained.pdparams", + "ResNet50": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/ResNet50_pretrained.pdparams", + "ResNet50_vd": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/ResNet50_vd_pretrained.pdparams", + "ResNet101": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/ResNet101_pretrained.pdparams", + "ResNet101_vd": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/ResNet101_vd_pretrained.pdparams", + "ResNet152": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/ResNet152_pretrained.pdparams", + "ResNet152_vd": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/ResNet152_vd_pretrained.pdparams", + "ResNet200_vd": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/ResNet200_vd_pretrained.pdparams", +} + +MODEL_STAGES_PATTERN = { + "ResNet18": ["blocks[1]", "blocks[3]", "blocks[5]", "blocks[7]"], + "ResNet34": ["blocks[2]", "blocks[6]", "blocks[12]", "blocks[15]"], + "ResNet50": ["blocks[2]", "blocks[6]", "blocks[12]", "blocks[15]"], + "ResNet101": ["blocks[2]", "blocks[6]", "blocks[29]", "blocks[32]"], + "ResNet152": ["blocks[2]", "blocks[10]", "blocks[46]", "blocks[49]"], + "ResNet200": ["blocks[2]", "blocks[14]", "blocks[62]", "blocks[65]"] +} + +__all__ = MODEL_URLS.keys() +''' +ResNet config: dict. + key: depth of ResNet. + values: config's dict of specific model. + keys: + block_type: Two different blocks in ResNet, BasicBlock and BottleneckBlock are optional. + block_depth: The number of blocks in different stages in ResNet. + num_channels: The number of channels to enter the next stage. +''' +NET_CONFIG = { + "18": { + "block_type": "BasicBlock", + "block_depth": [2, 2, 2, 2], + "num_channels": [64, 64, 128, 256] + }, + "34": { + "block_type": "BasicBlock", + "block_depth": [3, 4, 6, 3], + "num_channels": [64, 64, 128, 256] + }, + "50": { + "block_type": "BottleneckBlock", + "block_depth": [3, 4, 6, 3], + "num_channels": [64, 256, 512, 1024] + }, + "101": { + "block_type": "BottleneckBlock", + "block_depth": [3, 4, 23, 3], + "num_channels": [64, 256, 512, 1024] + }, + "152": { + "block_type": "BottleneckBlock", + "block_depth": [3, 8, 36, 3], + "num_channels": [64, 256, 512, 1024] + }, + "200": { + "block_type": "BottleneckBlock", + "block_depth": [3, 12, 48, 3], + "num_channels": [64, 256, 512, 1024] + }, +} + + +class ConvBNLayer(TheseusLayer): + def __init__(self, + num_channels, + num_filters, + filter_size, + stride=1, + groups=1, + is_vd_mode=False, + act=None, + lr_mult=1.0, + data_format="NCHW"): + super().__init__() + self.is_vd_mode = is_vd_mode + self.act = act + self.avg_pool = AvgPool2D( + kernel_size=2, stride=2, padding=0, ceil_mode=True) + self.conv = Conv2D( + in_channels=num_channels, + out_channels=num_filters, + kernel_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + groups=groups, + weight_attr=ParamAttr(learning_rate=lr_mult), + bias_attr=False, + data_format=data_format) + self.bn = BatchNorm( + num_filters, + param_attr=ParamAttr(learning_rate=lr_mult), + bias_attr=ParamAttr(learning_rate=lr_mult), + data_layout=data_format) + self.relu = nn.ReLU() + + def forward(self, x): + if self.is_vd_mode: + x = self.avg_pool(x) + x = self.conv(x) + x = self.bn(x) + if self.act: + x = self.relu(x) + return x + + +class BottleneckBlock(TheseusLayer): + def __init__(self, + num_channels, + num_filters, + stride, + shortcut=True, + if_first=False, + lr_mult=1.0, + data_format="NCHW"): + super().__init__() + + self.conv0 = ConvBNLayer( + num_channels=num_channels, + num_filters=num_filters, + filter_size=1, + act="relu", + lr_mult=lr_mult, + data_format=data_format) + self.conv1 = ConvBNLayer( + num_channels=num_filters, + num_filters=num_filters, + filter_size=3, + stride=stride, + act="relu", + lr_mult=lr_mult, + data_format=data_format) + self.conv2 = ConvBNLayer( + num_channels=num_filters, + num_filters=num_filters * 4, + filter_size=1, + act=None, + lr_mult=lr_mult, + data_format=data_format) + + if not shortcut: + self.short = ConvBNLayer( + num_channels=num_channels, + num_filters=num_filters * 4, + filter_size=1, + stride=stride if if_first else 1, + is_vd_mode=False if if_first else True, + lr_mult=lr_mult, + data_format=data_format) + self.relu = nn.ReLU() + self.shortcut = shortcut + + def forward(self, x): + identity = x + x = self.conv0(x) + x = self.conv1(x) + x = self.conv2(x) + + if self.shortcut: + short = identity + else: + short = self.short(identity) + x = paddle.add(x=x, y=short) + x = self.relu(x) + return x + + +class BasicBlock(TheseusLayer): + def __init__(self, + num_channels, + num_filters, + stride, + shortcut=True, + if_first=False, + lr_mult=1.0, + data_format="NCHW"): + super().__init__() + + self.stride = stride + self.conv0 = ConvBNLayer( + num_channels=num_channels, + num_filters=num_filters, + filter_size=3, + stride=stride, + act="relu", + lr_mult=lr_mult, + data_format=data_format) + self.conv1 = ConvBNLayer( + num_channels=num_filters, + num_filters=num_filters, + filter_size=3, + act=None, + lr_mult=lr_mult, + data_format=data_format) + if not shortcut: + self.short = ConvBNLayer( + num_channels=num_channels, + num_filters=num_filters, + filter_size=1, + stride=stride if if_first else 1, + is_vd_mode=False if if_first else True, + lr_mult=lr_mult, + data_format=data_format) + self.shortcut = shortcut + self.relu = nn.ReLU() + + def forward(self, x): + identity = x + x = self.conv0(x) + x = self.conv1(x) + if self.shortcut: + short = identity + else: + short = self.short(identity) + x = paddle.add(x=x, y=short) + x = self.relu(x) + return x + + +class ResNet(TheseusLayer): + """ + ResNet + Args: + config: dict. config of ResNet. + version: str="vb". Different version of ResNet, version vd can perform better. + class_num: int=1000. The number of classes. + lr_mult_list: list. Control the learning rate of different stages. + Returns: + model: nn.Layer. Specific ResNet model depends on args. + """ + + def __init__(self, + config, + stages_pattern, + version="vb", + class_num=1000, + lr_mult_list=[1.0, 1.0, 1.0, 1.0, 1.0], + data_format="NCHW", + input_image_channel=3, + return_patterns=None, + return_stages=None): + super().__init__() + + self.cfg = config + self.lr_mult_list = lr_mult_list + self.is_vd_mode = version == "vd" + self.class_num = class_num + self.num_filters = [64, 128, 256, 512] + self.block_depth = self.cfg["block_depth"] + self.block_type = self.cfg["block_type"] + self.num_channels = self.cfg["num_channels"] + self.channels_mult = 1 if self.num_channels[-1] == 256 else 4 + + assert isinstance(self.lr_mult_list, ( + list, tuple + )), "lr_mult_list should be in (list, tuple) but got {}".format( + type(self.lr_mult_list)) + assert len(self.lr_mult_list + ) == 5, "lr_mult_list length should be 5 but got {}".format( + len(self.lr_mult_list)) + + self.stem_cfg = { + #num_channels, num_filters, filter_size, stride + "vb": [[input_image_channel, 64, 7, 2]], + "vd": + [[input_image_channel, 32, 3, 2], [32, 32, 3, 1], [32, 64, 3, 1]] + } + + self.stem = nn.Sequential(* [ + ConvBNLayer( + num_channels=in_c, + num_filters=out_c, + filter_size=k, + stride=s, + act="relu", + lr_mult=self.lr_mult_list[0], + data_format=data_format) + for in_c, out_c, k, s in self.stem_cfg[version] + ]) + + self.max_pool = MaxPool2D( + kernel_size=3, stride=2, padding=1, data_format=data_format) + block_list = [] + for block_idx in range(len(self.block_depth)): + shortcut = False + for i in range(self.block_depth[block_idx]): + block_list.append(globals()[self.block_type]( + num_channels=self.num_channels[block_idx] if i == 0 else + self.num_filters[block_idx] * self.channels_mult, + num_filters=self.num_filters[block_idx], + stride=2 if i == 0 and block_idx != 0 else 1, + shortcut=shortcut, + if_first=block_idx == i == 0 if version == "vd" else True, + lr_mult=self.lr_mult_list[block_idx + 1], + data_format=data_format)) + shortcut = True + self.blocks = nn.Sequential(*block_list) + + self.avg_pool = AdaptiveAvgPool2D(1, data_format=data_format) + self.flatten = nn.Flatten() + self.avg_pool_channels = self.num_channels[-1] * 2 + stdv = 1.0 / math.sqrt(self.avg_pool_channels * 1.0) + self.fc = Linear( + self.avg_pool_channels, + self.class_num, + weight_attr=ParamAttr(initializer=Uniform(-stdv, stdv))) + + self.data_format = data_format + + super().init_res( + stages_pattern, + return_patterns=return_patterns, + return_stages=return_stages) + + def forward(self, x): + with paddle.static.amp.fp16_guard(): + if self.data_format == "NHWC": + x = paddle.transpose(x, [0, 2, 3, 1]) + x.stop_gradient = True + x = self.stem(x) + x = self.max_pool(x) + x = self.blocks(x) + x = self.avg_pool(x) + x = self.flatten(x) + x = self.fc(x) + return x + + +def _load_pretrained(pretrained, model, model_url, use_ssld): + if pretrained is False: + pass + elif pretrained is True: + load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld) + elif isinstance(pretrained, str): + load_dygraph_pretrain(model, pretrained) + else: + raise RuntimeError( + "pretrained type is not available. Please use `string` or `boolean` type." + ) + + +def ResNet18(pretrained=False, use_ssld=False, **kwargs): + """ + ResNet18 + Args: + pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise. + If str, means the path of the pretrained model. + use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True. + Returns: + model: nn.Layer. Specific `ResNet18` model depends on args. + """ + model = ResNet( + config=NET_CONFIG["18"], + stages_pattern=MODEL_STAGES_PATTERN["ResNet18"], + version="vb", + **kwargs) + _load_pretrained(pretrained, model, MODEL_URLS["ResNet18"], use_ssld) + return model + + +def ResNet18_vd(pretrained=False, use_ssld=False, **kwargs): + """ + ResNet18_vd + Args: + pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise. + If str, means the path of the pretrained model. + use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True. + Returns: + model: nn.Layer. Specific `ResNet18_vd` model depends on args. + """ + model = ResNet( + config=NET_CONFIG["18"], + stages_pattern=MODEL_STAGES_PATTERN["ResNet18"], + version="vd", + **kwargs) + _load_pretrained(pretrained, model, MODEL_URLS["ResNet18_vd"], use_ssld) + return model + + +def ResNet34(pretrained=False, use_ssld=False, **kwargs): + """ + ResNet34 + Args: + pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise. + If str, means the path of the pretrained model. + use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True. + Returns: + model: nn.Layer. Specific `ResNet34` model depends on args. + """ + model = ResNet( + config=NET_CONFIG["34"], + stages_pattern=MODEL_STAGES_PATTERN["ResNet34"], + version="vb", + **kwargs) + _load_pretrained(pretrained, model, MODEL_URLS["ResNet34"], use_ssld) + return model + + +def ResNet34_vd(pretrained=False, use_ssld=False, **kwargs): + """ + ResNet34_vd + Args: + pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise. + If str, means the path of the pretrained model. + use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True. + Returns: + model: nn.Layer. Specific `ResNet34_vd` model depends on args. + """ + model = ResNet( + config=NET_CONFIG["34"], + stages_pattern=MODEL_STAGES_PATTERN["ResNet34"], + version="vd", + **kwargs) + _load_pretrained(pretrained, model, MODEL_URLS["ResNet34_vd"], use_ssld) + return model + + +def ResNet50(pretrained=False, use_ssld=False, **kwargs): + """ + ResNet50 + Args: + pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise. + If str, means the path of the pretrained model. + use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True. + Returns: + model: nn.Layer. Specific `ResNet50` model depends on args. + """ + model = ResNet( + config=NET_CONFIG["50"], + stages_pattern=MODEL_STAGES_PATTERN["ResNet50"], + version="vb", + **kwargs) + _load_pretrained(pretrained, model, MODEL_URLS["ResNet50"], use_ssld) + return model + + +def ResNet50_vd(pretrained=False, use_ssld=False, **kwargs): + """ + ResNet50_vd + Args: + pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise. + If str, means the path of the pretrained model. + use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True. + Returns: + model: nn.Layer. Specific `ResNet50_vd` model depends on args. + """ + model = ResNet( + config=NET_CONFIG["50"], + stages_pattern=MODEL_STAGES_PATTERN["ResNet50"], + version="vd", + **kwargs) + _load_pretrained(pretrained, model, MODEL_URLS["ResNet50_vd"], use_ssld) + return model + + +def ResNet101(pretrained=False, use_ssld=False, **kwargs): + """ + ResNet101 + Args: + pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise. + If str, means the path of the pretrained model. + use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True. + Returns: + model: nn.Layer. Specific `ResNet101` model depends on args. + """ + model = ResNet( + config=NET_CONFIG["101"], + stages_pattern=MODEL_STAGES_PATTERN["ResNet101"], + version="vb", + **kwargs) + _load_pretrained(pretrained, model, MODEL_URLS["ResNet101"], use_ssld) + return model + + +def ResNet101_vd(pretrained=False, use_ssld=False, **kwargs): + """ + ResNet101_vd + Args: + pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise. + If str, means the path of the pretrained model. + use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True. + Returns: + model: nn.Layer. Specific `ResNet101_vd` model depends on args. + """ + model = ResNet( + config=NET_CONFIG["101"], + stages_pattern=MODEL_STAGES_PATTERN["ResNet101"], + version="vd", + **kwargs) + _load_pretrained(pretrained, model, MODEL_URLS["ResNet101_vd"], use_ssld) + return model + + +def ResNet152(pretrained=False, use_ssld=False, **kwargs): + """ + ResNet152 + Args: + pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise. + If str, means the path of the pretrained model. + use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True. + Returns: + model: nn.Layer. Specific `ResNet152` model depends on args. + """ + model = ResNet( + config=NET_CONFIG["152"], + stages_pattern=MODEL_STAGES_PATTERN["ResNet152"], + version="vb", + **kwargs) + _load_pretrained(pretrained, model, MODEL_URLS["ResNet152"], use_ssld) + return model + + +def ResNet152_vd(pretrained=False, use_ssld=False, **kwargs): + """ + ResNet152_vd + Args: + pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise. + If str, means the path of the pretrained model. + use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True. + Returns: + model: nn.Layer. Specific `ResNet152_vd` model depends on args. + """ + model = ResNet( + config=NET_CONFIG["152"], + stages_pattern=MODEL_STAGES_PATTERN["ResNet152"], + version="vd", + **kwargs) + _load_pretrained(pretrained, model, MODEL_URLS["ResNet152_vd"], use_ssld) + return model + + +def ResNet200_vd(pretrained=False, use_ssld=False, **kwargs): + """ + ResNet200_vd + Args: + pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise. + If str, means the path of the pretrained model. + use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True. + Returns: + model: nn.Layer. Specific `ResNet200_vd` model depends on args. + """ + model = ResNet( + config=NET_CONFIG["200"], + stages_pattern=MODEL_STAGES_PATTERN["ResNet200"], + version="vd", + **kwargs) + _load_pretrained(pretrained, model, MODEL_URLS["ResNet200_vd"], use_ssld) + return model diff --git a/src/PaddleClas/ppcls/arch/backbone/legendary_models/vgg.py b/src/PaddleClas/ppcls/arch/backbone/legendary_models/vgg.py new file mode 100644 index 0000000..74d5cfa --- /dev/null +++ b/src/PaddleClas/ppcls/arch/backbone/legendary_models/vgg.py @@ -0,0 +1,259 @@ +# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import, division, print_function + +import paddle.nn as nn +from paddle.nn import Conv2D, BatchNorm, Linear, Dropout +from paddle.nn import MaxPool2D + +from ppcls.arch.backbone.base.theseus_layer import TheseusLayer +from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url + +MODEL_URLS = { + "VGG11": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/VGG11_pretrained.pdparams", + "VGG13": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/VGG13_pretrained.pdparams", + "VGG16": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/VGG16_pretrained.pdparams", + "VGG19": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/VGG19_pretrained.pdparams", +} + +MODEL_STAGES_PATTERN = { + "VGG": [ + "conv_block_1", "conv_block_2", "conv_block_3", "conv_block_4", + "conv_block_5" + ] +} + +__all__ = MODEL_URLS.keys() + +# VGG config +# key: VGG network depth +# value: conv num in different blocks +NET_CONFIG = { + 11: [1, 1, 2, 2, 2], + 13: [2, 2, 2, 2, 2], + 16: [2, 2, 3, 3, 3], + 19: [2, 2, 4, 4, 4] +} + + +class ConvBlock(TheseusLayer): + def __init__(self, input_channels, output_channels, groups): + super().__init__() + + self.groups = groups + self.conv1 = Conv2D( + in_channels=input_channels, + out_channels=output_channels, + kernel_size=3, + stride=1, + padding=1, + bias_attr=False) + if groups == 2 or groups == 3 or groups == 4: + self.conv2 = Conv2D( + in_channels=output_channels, + out_channels=output_channels, + kernel_size=3, + stride=1, + padding=1, + bias_attr=False) + if groups == 3 or groups == 4: + self.conv3 = Conv2D( + in_channels=output_channels, + out_channels=output_channels, + kernel_size=3, + stride=1, + padding=1, + bias_attr=False) + if groups == 4: + self.conv4 = Conv2D( + in_channels=output_channels, + out_channels=output_channels, + kernel_size=3, + stride=1, + padding=1, + bias_attr=False) + + self.max_pool = MaxPool2D(kernel_size=2, stride=2, padding=0) + self.relu = nn.ReLU() + + def forward(self, inputs): + x = self.conv1(inputs) + x = self.relu(x) + if self.groups == 2 or self.groups == 3 or self.groups == 4: + x = self.conv2(x) + x = self.relu(x) + if self.groups == 3 or self.groups == 4: + x = self.conv3(x) + x = self.relu(x) + if self.groups == 4: + x = self.conv4(x) + x = self.relu(x) + x = self.max_pool(x) + return x + + +class VGGNet(TheseusLayer): + """ + VGGNet + Args: + config: list. VGGNet config. + stop_grad_layers: int=0. The parameters in blocks which index larger than `stop_grad_layers`, will be set `param.trainable=False` + class_num: int=1000. The number of classes. + Returns: + model: nn.Layer. Specific VGG model depends on args. + """ + + def __init__(self, + config, + stages_pattern, + stop_grad_layers=0, + class_num=1000, + return_patterns=None, + return_stages=None): + super().__init__() + + self.stop_grad_layers = stop_grad_layers + + self.conv_block_1 = ConvBlock(3, 64, config[0]) + self.conv_block_2 = ConvBlock(64, 128, config[1]) + self.conv_block_3 = ConvBlock(128, 256, config[2]) + self.conv_block_4 = ConvBlock(256, 512, config[3]) + self.conv_block_5 = ConvBlock(512, 512, config[4]) + + self.relu = nn.ReLU() + self.flatten = nn.Flatten(start_axis=1, stop_axis=-1) + + for idx, block in enumerate([ + self.conv_block_1, self.conv_block_2, self.conv_block_3, + self.conv_block_4, self.conv_block_5 + ]): + if self.stop_grad_layers >= idx + 1: + for param in block.parameters(): + param.trainable = False + + self.drop = Dropout(p=0.5, mode="downscale_in_infer") + self.fc1 = Linear(7 * 7 * 512, 4096) + self.fc2 = Linear(4096, 4096) + self.fc3 = Linear(4096, class_num) + + super().init_res( + stages_pattern, + return_patterns=return_patterns, + return_stages=return_stages) + + def forward(self, inputs): + x = self.conv_block_1(inputs) + x = self.conv_block_2(x) + x = self.conv_block_3(x) + x = self.conv_block_4(x) + x = self.conv_block_5(x) + x = self.flatten(x) + x = self.fc1(x) + x = self.relu(x) + x = self.drop(x) + x = self.fc2(x) + x = self.relu(x) + x = self.drop(x) + x = self.fc3(x) + return x + + +def _load_pretrained(pretrained, model, model_url, use_ssld): + if pretrained is False: + pass + elif pretrained is True: + load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld) + elif isinstance(pretrained, str): + load_dygraph_pretrain(model, pretrained) + else: + raise RuntimeError( + "pretrained type is not available. Please use `string` or `boolean` type." + ) + + +def VGG11(pretrained=False, use_ssld=False, **kwargs): + """ + VGG11 + Args: + pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise. + If str, means the path of the pretrained model. + use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True. + Returns: + model: nn.Layer. Specific `VGG11` model depends on args. + """ + model = VGGNet( + config=NET_CONFIG[11], + stages_pattern=MODEL_STAGES_PATTERN["VGG"], + **kwargs) + _load_pretrained(pretrained, model, MODEL_URLS["VGG11"], use_ssld) + return model + + +def VGG13(pretrained=False, use_ssld=False, **kwargs): + """ + VGG13 + Args: + pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise. + If str, means the path of the pretrained model. + use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True. + Returns: + model: nn.Layer. Specific `VGG13` model depends on args. + """ + model = VGGNet( + config=NET_CONFIG[13], + stages_pattern=MODEL_STAGES_PATTERN["VGG"], + **kwargs) + _load_pretrained(pretrained, model, MODEL_URLS["VGG13"], use_ssld) + return model + + +def VGG16(pretrained=False, use_ssld=False, **kwargs): + """ + VGG16 + Args: + pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise. + If str, means the path of the pretrained model. + use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True. + Returns: + model: nn.Layer. Specific `VGG16` model depends on args. + """ + model = VGGNet( + config=NET_CONFIG[16], + stages_pattern=MODEL_STAGES_PATTERN["VGG"], + **kwargs) + _load_pretrained(pretrained, model, MODEL_URLS["VGG16"], use_ssld) + return model + + +def VGG19(pretrained=False, use_ssld=False, **kwargs): + """ + VGG19 + Args: + pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise. + If str, means the path of the pretrained model. + use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True. + Returns: + model: nn.Layer. Specific `VGG19` model depends on args. + """ + model = VGGNet( + config=NET_CONFIG[19], + stages_pattern=MODEL_STAGES_PATTERN["VGG"], + **kwargs) + _load_pretrained(pretrained, model, MODEL_URLS["VGG19"], use_ssld) + return model diff --git a/src/PaddleClas/ppcls/arch/backbone/model_zoo/__init__.py b/src/PaddleClas/ppcls/arch/backbone/model_zoo/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/PaddleClas/ppcls/arch/backbone/model_zoo/__pycache__/__init__.cpython-39.pyc b/src/PaddleClas/ppcls/arch/backbone/model_zoo/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000..63bd229 Binary files /dev/null and b/src/PaddleClas/ppcls/arch/backbone/model_zoo/__pycache__/__init__.cpython-39.pyc differ diff --git a/src/PaddleClas/ppcls/arch/backbone/model_zoo/__pycache__/alexnet.cpython-39.pyc b/src/PaddleClas/ppcls/arch/backbone/model_zoo/__pycache__/alexnet.cpython-39.pyc new file mode 100644 index 0000000..5b84c99 Binary files /dev/null and b/src/PaddleClas/ppcls/arch/backbone/model_zoo/__pycache__/alexnet.cpython-39.pyc differ diff --git a/src/PaddleClas/ppcls/arch/backbone/model_zoo/__pycache__/cspnet.cpython-39.pyc b/src/PaddleClas/ppcls/arch/backbone/model_zoo/__pycache__/cspnet.cpython-39.pyc new file mode 100644 index 0000000..1d996fd Binary files /dev/null and b/src/PaddleClas/ppcls/arch/backbone/model_zoo/__pycache__/cspnet.cpython-39.pyc differ diff --git a/src/PaddleClas/ppcls/arch/backbone/model_zoo/__pycache__/darknet.cpython-39.pyc b/src/PaddleClas/ppcls/arch/backbone/model_zoo/__pycache__/darknet.cpython-39.pyc new file mode 100644 index 0000000..8323836 Binary files /dev/null and b/src/PaddleClas/ppcls/arch/backbone/model_zoo/__pycache__/darknet.cpython-39.pyc differ diff --git a/src/PaddleClas/ppcls/arch/backbone/model_zoo/__pycache__/densenet.cpython-39.pyc b/src/PaddleClas/ppcls/arch/backbone/model_zoo/__pycache__/densenet.cpython-39.pyc new file mode 100644 index 0000000..7d8af7d Binary files /dev/null and b/src/PaddleClas/ppcls/arch/backbone/model_zoo/__pycache__/densenet.cpython-39.pyc differ diff --git a/src/PaddleClas/ppcls/arch/backbone/model_zoo/__pycache__/distilled_vision_transformer.cpython-39.pyc b/src/PaddleClas/ppcls/arch/backbone/model_zoo/__pycache__/distilled_vision_transformer.cpython-39.pyc new file mode 100644 index 0000000..64f04d6 Binary files /dev/null and b/src/PaddleClas/ppcls/arch/backbone/model_zoo/__pycache__/distilled_vision_transformer.cpython-39.pyc differ diff --git a/src/PaddleClas/ppcls/arch/backbone/model_zoo/__pycache__/dla.cpython-39.pyc b/src/PaddleClas/ppcls/arch/backbone/model_zoo/__pycache__/dla.cpython-39.pyc new file mode 100644 index 0000000..bb12bae Binary files /dev/null and b/src/PaddleClas/ppcls/arch/backbone/model_zoo/__pycache__/dla.cpython-39.pyc differ diff --git a/src/PaddleClas/ppcls/arch/backbone/model_zoo/__pycache__/dpn.cpython-39.pyc b/src/PaddleClas/ppcls/arch/backbone/model_zoo/__pycache__/dpn.cpython-39.pyc new file mode 100644 index 0000000..73eb68d Binary files /dev/null and b/src/PaddleClas/ppcls/arch/backbone/model_zoo/__pycache__/dpn.cpython-39.pyc differ diff --git a/src/PaddleClas/ppcls/arch/backbone/model_zoo/__pycache__/efficientnet.cpython-39.pyc b/src/PaddleClas/ppcls/arch/backbone/model_zoo/__pycache__/efficientnet.cpython-39.pyc new file mode 100644 index 0000000..ef17886 Binary files /dev/null and b/src/PaddleClas/ppcls/arch/backbone/model_zoo/__pycache__/efficientnet.cpython-39.pyc differ diff --git a/src/PaddleClas/ppcls/arch/backbone/model_zoo/__pycache__/ghostnet.cpython-39.pyc b/src/PaddleClas/ppcls/arch/backbone/model_zoo/__pycache__/ghostnet.cpython-39.pyc new file mode 100644 index 0000000..2f81872 Binary files /dev/null and b/src/PaddleClas/ppcls/arch/backbone/model_zoo/__pycache__/ghostnet.cpython-39.pyc differ diff --git a/src/PaddleClas/ppcls/arch/backbone/model_zoo/__pycache__/googlenet.cpython-39.pyc b/src/PaddleClas/ppcls/arch/backbone/model_zoo/__pycache__/googlenet.cpython-39.pyc new file mode 100644 index 0000000..0326e19 Binary files /dev/null and b/src/PaddleClas/ppcls/arch/backbone/model_zoo/__pycache__/googlenet.cpython-39.pyc differ diff --git a/src/PaddleClas/ppcls/arch/backbone/model_zoo/__pycache__/gvt.cpython-39.pyc b/src/PaddleClas/ppcls/arch/backbone/model_zoo/__pycache__/gvt.cpython-39.pyc new file mode 100644 index 0000000..54b1d13 Binary files /dev/null and b/src/PaddleClas/ppcls/arch/backbone/model_zoo/__pycache__/gvt.cpython-39.pyc differ diff --git a/src/PaddleClas/ppcls/arch/backbone/model_zoo/__pycache__/hardnet.cpython-39.pyc b/src/PaddleClas/ppcls/arch/backbone/model_zoo/__pycache__/hardnet.cpython-39.pyc new file mode 100644 index 0000000..5919e76 Binary files /dev/null and b/src/PaddleClas/ppcls/arch/backbone/model_zoo/__pycache__/hardnet.cpython-39.pyc differ diff --git a/src/PaddleClas/ppcls/arch/backbone/model_zoo/__pycache__/inception_v4.cpython-39.pyc b/src/PaddleClas/ppcls/arch/backbone/model_zoo/__pycache__/inception_v4.cpython-39.pyc new file mode 100644 index 0000000..8cb333a Binary files /dev/null and b/src/PaddleClas/ppcls/arch/backbone/model_zoo/__pycache__/inception_v4.cpython-39.pyc differ diff --git a/src/PaddleClas/ppcls/arch/backbone/model_zoo/__pycache__/levit.cpython-39.pyc b/src/PaddleClas/ppcls/arch/backbone/model_zoo/__pycache__/levit.cpython-39.pyc new file mode 100644 index 0000000..bd35a6b Binary files /dev/null and b/src/PaddleClas/ppcls/arch/backbone/model_zoo/__pycache__/levit.cpython-39.pyc differ diff --git a/src/PaddleClas/ppcls/arch/backbone/model_zoo/__pycache__/mixnet.cpython-39.pyc b/src/PaddleClas/ppcls/arch/backbone/model_zoo/__pycache__/mixnet.cpython-39.pyc new file mode 100644 index 0000000..0607c52 Binary files /dev/null and b/src/PaddleClas/ppcls/arch/backbone/model_zoo/__pycache__/mixnet.cpython-39.pyc differ diff --git a/src/PaddleClas/ppcls/arch/backbone/model_zoo/__pycache__/mobilenet_v2.cpython-39.pyc b/src/PaddleClas/ppcls/arch/backbone/model_zoo/__pycache__/mobilenet_v2.cpython-39.pyc new file mode 100644 index 0000000..b6b5e94 Binary files /dev/null and b/src/PaddleClas/ppcls/arch/backbone/model_zoo/__pycache__/mobilenet_v2.cpython-39.pyc differ diff --git a/src/PaddleClas/ppcls/arch/backbone/model_zoo/__pycache__/pvt_v2.cpython-39.pyc b/src/PaddleClas/ppcls/arch/backbone/model_zoo/__pycache__/pvt_v2.cpython-39.pyc new file mode 100644 index 0000000..391ff9b Binary files /dev/null and b/src/PaddleClas/ppcls/arch/backbone/model_zoo/__pycache__/pvt_v2.cpython-39.pyc differ diff --git a/src/PaddleClas/ppcls/arch/backbone/model_zoo/__pycache__/rednet.cpython-39.pyc b/src/PaddleClas/ppcls/arch/backbone/model_zoo/__pycache__/rednet.cpython-39.pyc new file mode 100644 index 0000000..019481a Binary files /dev/null and b/src/PaddleClas/ppcls/arch/backbone/model_zoo/__pycache__/rednet.cpython-39.pyc differ diff --git a/src/PaddleClas/ppcls/arch/backbone/model_zoo/__pycache__/regnet.cpython-39.pyc b/src/PaddleClas/ppcls/arch/backbone/model_zoo/__pycache__/regnet.cpython-39.pyc new file mode 100644 index 0000000..7a1a00a Binary files /dev/null and b/src/PaddleClas/ppcls/arch/backbone/model_zoo/__pycache__/regnet.cpython-39.pyc differ diff --git a/src/PaddleClas/ppcls/arch/backbone/model_zoo/__pycache__/repvgg.cpython-39.pyc b/src/PaddleClas/ppcls/arch/backbone/model_zoo/__pycache__/repvgg.cpython-39.pyc new file mode 100644 index 0000000..f518d93 Binary files /dev/null and b/src/PaddleClas/ppcls/arch/backbone/model_zoo/__pycache__/repvgg.cpython-39.pyc differ diff --git a/src/PaddleClas/ppcls/arch/backbone/model_zoo/__pycache__/res2net.cpython-39.pyc b/src/PaddleClas/ppcls/arch/backbone/model_zoo/__pycache__/res2net.cpython-39.pyc new file mode 100644 index 0000000..edb79ea Binary files /dev/null and b/src/PaddleClas/ppcls/arch/backbone/model_zoo/__pycache__/res2net.cpython-39.pyc differ diff --git a/src/PaddleClas/ppcls/arch/backbone/model_zoo/__pycache__/res2net_vd.cpython-39.pyc b/src/PaddleClas/ppcls/arch/backbone/model_zoo/__pycache__/res2net_vd.cpython-39.pyc new file mode 100644 index 0000000..d7f0f91 Binary files /dev/null and b/src/PaddleClas/ppcls/arch/backbone/model_zoo/__pycache__/res2net_vd.cpython-39.pyc differ diff --git a/src/PaddleClas/ppcls/arch/backbone/model_zoo/__pycache__/resnest.cpython-39.pyc b/src/PaddleClas/ppcls/arch/backbone/model_zoo/__pycache__/resnest.cpython-39.pyc new file mode 100644 index 0000000..1466a3f Binary files /dev/null and b/src/PaddleClas/ppcls/arch/backbone/model_zoo/__pycache__/resnest.cpython-39.pyc differ diff --git a/src/PaddleClas/ppcls/arch/backbone/model_zoo/__pycache__/resnet_vc.cpython-39.pyc b/src/PaddleClas/ppcls/arch/backbone/model_zoo/__pycache__/resnet_vc.cpython-39.pyc new file mode 100644 index 0000000..144f063 Binary files /dev/null and b/src/PaddleClas/ppcls/arch/backbone/model_zoo/__pycache__/resnet_vc.cpython-39.pyc differ diff --git a/src/PaddleClas/ppcls/arch/backbone/model_zoo/__pycache__/resnext.cpython-39.pyc b/src/PaddleClas/ppcls/arch/backbone/model_zoo/__pycache__/resnext.cpython-39.pyc new file mode 100644 index 0000000..9d915d8 Binary files /dev/null and b/src/PaddleClas/ppcls/arch/backbone/model_zoo/__pycache__/resnext.cpython-39.pyc differ diff --git a/src/PaddleClas/ppcls/arch/backbone/model_zoo/__pycache__/resnext101_wsl.cpython-39.pyc b/src/PaddleClas/ppcls/arch/backbone/model_zoo/__pycache__/resnext101_wsl.cpython-39.pyc new file mode 100644 index 0000000..85ad8fa Binary files /dev/null and b/src/PaddleClas/ppcls/arch/backbone/model_zoo/__pycache__/resnext101_wsl.cpython-39.pyc differ diff --git a/src/PaddleClas/ppcls/arch/backbone/model_zoo/__pycache__/resnext_vd.cpython-39.pyc b/src/PaddleClas/ppcls/arch/backbone/model_zoo/__pycache__/resnext_vd.cpython-39.pyc new file mode 100644 index 0000000..6a76923 Binary files /dev/null and b/src/PaddleClas/ppcls/arch/backbone/model_zoo/__pycache__/resnext_vd.cpython-39.pyc differ diff --git a/src/PaddleClas/ppcls/arch/backbone/model_zoo/__pycache__/rexnet.cpython-39.pyc b/src/PaddleClas/ppcls/arch/backbone/model_zoo/__pycache__/rexnet.cpython-39.pyc new file mode 100644 index 0000000..7f387c4 Binary files /dev/null and b/src/PaddleClas/ppcls/arch/backbone/model_zoo/__pycache__/rexnet.cpython-39.pyc differ diff --git a/src/PaddleClas/ppcls/arch/backbone/model_zoo/__pycache__/se_resnet_vd.cpython-39.pyc b/src/PaddleClas/ppcls/arch/backbone/model_zoo/__pycache__/se_resnet_vd.cpython-39.pyc new file mode 100644 index 0000000..25a8437 Binary files /dev/null and b/src/PaddleClas/ppcls/arch/backbone/model_zoo/__pycache__/se_resnet_vd.cpython-39.pyc differ diff --git a/src/PaddleClas/ppcls/arch/backbone/model_zoo/__pycache__/se_resnext.cpython-39.pyc b/src/PaddleClas/ppcls/arch/backbone/model_zoo/__pycache__/se_resnext.cpython-39.pyc new file mode 100644 index 0000000..ba0e14c Binary files /dev/null and b/src/PaddleClas/ppcls/arch/backbone/model_zoo/__pycache__/se_resnext.cpython-39.pyc differ diff --git a/src/PaddleClas/ppcls/arch/backbone/model_zoo/__pycache__/se_resnext_vd.cpython-39.pyc b/src/PaddleClas/ppcls/arch/backbone/model_zoo/__pycache__/se_resnext_vd.cpython-39.pyc new file mode 100644 index 0000000..fb46ef4 Binary files /dev/null and b/src/PaddleClas/ppcls/arch/backbone/model_zoo/__pycache__/se_resnext_vd.cpython-39.pyc differ diff --git a/src/PaddleClas/ppcls/arch/backbone/model_zoo/__pycache__/shufflenet_v2.cpython-39.pyc b/src/PaddleClas/ppcls/arch/backbone/model_zoo/__pycache__/shufflenet_v2.cpython-39.pyc new file mode 100644 index 0000000..f27b598 Binary files /dev/null and b/src/PaddleClas/ppcls/arch/backbone/model_zoo/__pycache__/shufflenet_v2.cpython-39.pyc differ diff --git a/src/PaddleClas/ppcls/arch/backbone/model_zoo/__pycache__/squeezenet.cpython-39.pyc b/src/PaddleClas/ppcls/arch/backbone/model_zoo/__pycache__/squeezenet.cpython-39.pyc new file mode 100644 index 0000000..bdd7361 Binary files /dev/null and b/src/PaddleClas/ppcls/arch/backbone/model_zoo/__pycache__/squeezenet.cpython-39.pyc differ diff --git a/src/PaddleClas/ppcls/arch/backbone/model_zoo/__pycache__/swin_transformer.cpython-39.pyc b/src/PaddleClas/ppcls/arch/backbone/model_zoo/__pycache__/swin_transformer.cpython-39.pyc new file mode 100644 index 0000000..29ace62 Binary files /dev/null and b/src/PaddleClas/ppcls/arch/backbone/model_zoo/__pycache__/swin_transformer.cpython-39.pyc differ diff --git a/src/PaddleClas/ppcls/arch/backbone/model_zoo/__pycache__/tnt.cpython-39.pyc b/src/PaddleClas/ppcls/arch/backbone/model_zoo/__pycache__/tnt.cpython-39.pyc new file mode 100644 index 0000000..c340279 Binary files /dev/null and b/src/PaddleClas/ppcls/arch/backbone/model_zoo/__pycache__/tnt.cpython-39.pyc differ diff --git a/src/PaddleClas/ppcls/arch/backbone/model_zoo/__pycache__/vision_transformer.cpython-39.pyc b/src/PaddleClas/ppcls/arch/backbone/model_zoo/__pycache__/vision_transformer.cpython-39.pyc new file mode 100644 index 0000000..f0ac12a Binary files /dev/null and b/src/PaddleClas/ppcls/arch/backbone/model_zoo/__pycache__/vision_transformer.cpython-39.pyc differ diff --git a/src/PaddleClas/ppcls/arch/backbone/model_zoo/__pycache__/xception.cpython-39.pyc b/src/PaddleClas/ppcls/arch/backbone/model_zoo/__pycache__/xception.cpython-39.pyc new file mode 100644 index 0000000..336dfb4 Binary files /dev/null and b/src/PaddleClas/ppcls/arch/backbone/model_zoo/__pycache__/xception.cpython-39.pyc differ diff --git a/src/PaddleClas/ppcls/arch/backbone/model_zoo/__pycache__/xception_deeplab.cpython-39.pyc b/src/PaddleClas/ppcls/arch/backbone/model_zoo/__pycache__/xception_deeplab.cpython-39.pyc new file mode 100644 index 0000000..f73991b Binary files /dev/null and b/src/PaddleClas/ppcls/arch/backbone/model_zoo/__pycache__/xception_deeplab.cpython-39.pyc differ diff --git a/src/PaddleClas/ppcls/arch/backbone/model_zoo/alexnet.py b/src/PaddleClas/ppcls/arch/backbone/model_zoo/alexnet.py new file mode 100644 index 0000000..b44901a --- /dev/null +++ b/src/PaddleClas/ppcls/arch/backbone/model_zoo/alexnet.py @@ -0,0 +1,168 @@ +# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import paddle +from paddle import ParamAttr +import paddle.nn as nn +import paddle.nn.functional as F +from paddle.nn import Conv2D, BatchNorm, Linear, Dropout, ReLU +from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D +from paddle.nn.initializer import Uniform +import math + +from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url + +MODEL_URLS = { + "AlexNet": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/AlexNet_pretrained.pdparams" +} + +__all__ = list(MODEL_URLS.keys()) + + +class ConvPoolLayer(nn.Layer): + def __init__(self, + input_channels, + output_channels, + filter_size, + stride, + padding, + stdv, + groups=1, + act=None, + name=None): + super(ConvPoolLayer, self).__init__() + + self.relu = ReLU() if act == "relu" else None + + self._conv = Conv2D( + in_channels=input_channels, + out_channels=output_channels, + kernel_size=filter_size, + stride=stride, + padding=padding, + groups=groups, + weight_attr=ParamAttr( + name=name + "_weights", initializer=Uniform(-stdv, stdv)), + bias_attr=ParamAttr( + name=name + "_offset", initializer=Uniform(-stdv, stdv))) + self._pool = MaxPool2D(kernel_size=3, stride=2, padding=0) + + def forward(self, inputs): + x = self._conv(inputs) + if self.relu is not None: + x = self.relu(x) + x = self._pool(x) + return x + + +class AlexNetDY(nn.Layer): + def __init__(self, class_num=1000): + super(AlexNetDY, self).__init__() + + stdv = 1.0 / math.sqrt(3 * 11 * 11) + self._conv1 = ConvPoolLayer( + 3, 64, 11, 4, 2, stdv, act="relu", name="conv1") + stdv = 1.0 / math.sqrt(64 * 5 * 5) + self._conv2 = ConvPoolLayer( + 64, 192, 5, 1, 2, stdv, act="relu", name="conv2") + stdv = 1.0 / math.sqrt(192 * 3 * 3) + self._conv3 = Conv2D( + 192, + 384, + 3, + stride=1, + padding=1, + weight_attr=ParamAttr( + name="conv3_weights", initializer=Uniform(-stdv, stdv)), + bias_attr=ParamAttr( + name="conv3_offset", initializer=Uniform(-stdv, stdv))) + stdv = 1.0 / math.sqrt(384 * 3 * 3) + self._conv4 = Conv2D( + 384, + 256, + 3, + stride=1, + padding=1, + weight_attr=ParamAttr( + name="conv4_weights", initializer=Uniform(-stdv, stdv)), + bias_attr=ParamAttr( + name="conv4_offset", initializer=Uniform(-stdv, stdv))) + stdv = 1.0 / math.sqrt(256 * 3 * 3) + self._conv5 = ConvPoolLayer( + 256, 256, 3, 1, 1, stdv, act="relu", name="conv5") + stdv = 1.0 / math.sqrt(256 * 6 * 6) + + self._drop1 = Dropout(p=0.5, mode="downscale_in_infer") + self._fc6 = Linear( + in_features=256 * 6 * 6, + out_features=4096, + weight_attr=ParamAttr( + name="fc6_weights", initializer=Uniform(-stdv, stdv)), + bias_attr=ParamAttr( + name="fc6_offset", initializer=Uniform(-stdv, stdv))) + + self._drop2 = Dropout(p=0.5, mode="downscale_in_infer") + self._fc7 = Linear( + in_features=4096, + out_features=4096, + weight_attr=ParamAttr( + name="fc7_weights", initializer=Uniform(-stdv, stdv)), + bias_attr=ParamAttr( + name="fc7_offset", initializer=Uniform(-stdv, stdv))) + self._fc8 = Linear( + in_features=4096, + out_features=class_num, + weight_attr=ParamAttr( + name="fc8_weights", initializer=Uniform(-stdv, stdv)), + bias_attr=ParamAttr( + name="fc8_offset", initializer=Uniform(-stdv, stdv))) + + def forward(self, inputs): + x = self._conv1(inputs) + x = self._conv2(x) + x = self._conv3(x) + x = F.relu(x) + x = self._conv4(x) + x = F.relu(x) + x = self._conv5(x) + x = paddle.flatten(x, start_axis=1, stop_axis=-1) + x = self._drop1(x) + x = self._fc6(x) + x = F.relu(x) + x = self._drop2(x) + x = self._fc7(x) + x = F.relu(x) + x = self._fc8(x) + return x + + +def _load_pretrained(pretrained, model, model_url, use_ssld=False): + if pretrained is False: + pass + elif pretrained is True: + load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld) + elif isinstance(pretrained, str): + load_dygraph_pretrain(model, pretrained) + else: + raise RuntimeError( + "pretrained type is not available. Please use `string` or `boolean` type." + ) + + +def AlexNet(pretrained=False, use_ssld=False, **kwargs): + model = AlexNetDY(**kwargs) + _load_pretrained( + pretrained, model, MODEL_URLS["AlexNet"], use_ssld=use_ssld) + return model diff --git a/src/PaddleClas/ppcls/arch/backbone/model_zoo/cspnet.py b/src/PaddleClas/ppcls/arch/backbone/model_zoo/cspnet.py new file mode 100644 index 0000000..ab5021f --- /dev/null +++ b/src/PaddleClas/ppcls/arch/backbone/model_zoo/cspnet.py @@ -0,0 +1,376 @@ +# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Code was heavily based on https://github.com/rwightman/pytorch-image-models + +import paddle +import paddle.nn as nn +import paddle.nn.functional as F +from paddle import ParamAttr + +from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url + +MODEL_URLS = { + "CSPDarkNet53": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/CSPDarkNet53_pretrained.pdparams" +} + +MODEL_CFGS = { + "CSPDarkNet53": dict( + stem=dict( + out_chs=32, kernel_size=3, stride=1, pool=''), + stage=dict( + out_chs=(64, 128, 256, 512, 1024), + depth=(1, 2, 8, 8, 4), + stride=(2, ) * 5, + exp_ratio=(2., ) + (1., ) * 4, + bottle_ratio=(0.5, ) + (1.0, ) * 4, + block_ratio=(1., ) + (0.5, ) * 4, + down_growth=True, )) +} + +__all__ = ['CSPDarkNet53' + ] # model_registry will add each entrypoint fn to this + + +class ConvBnAct(nn.Layer): + def __init__(self, + input_channels, + output_channels, + kernel_size=1, + stride=1, + padding=None, + dilation=1, + groups=1, + act_layer=nn.LeakyReLU, + norm_layer=nn.BatchNorm2D): + super().__init__() + if padding is None: + padding = (kernel_size - 1) // 2 + self.conv = nn.Conv2D( + in_channels=input_channels, + out_channels=output_channels, + kernel_size=kernel_size, + stride=stride, + padding=padding, + dilation=dilation, + groups=groups, + weight_attr=ParamAttr(), + bias_attr=False) + + self.bn = norm_layer(num_features=output_channels) + self.act = act_layer() + + def forward(self, inputs): + x = self.conv(inputs) + x = self.bn(x) + if self.act is not None: + x = self.act(x) + return x + + +def create_stem(in_chans=3, + out_chs=32, + kernel_size=3, + stride=2, + pool='', + act_layer=None, + norm_layer=None): + stem = nn.Sequential() + if not isinstance(out_chs, (tuple, list)): + out_chs = [out_chs] + assert len(out_chs) + in_c = in_chans + for i, out_c in enumerate(out_chs): + conv_name = f'conv{i + 1}' + stem.add_sublayer( + conv_name, + ConvBnAct( + in_c, + out_c, + kernel_size, + stride=stride if i == 0 else 1, + act_layer=act_layer, + norm_layer=norm_layer)) + in_c = out_c + last_conv = conv_name + if pool: + stem.add_sublayer( + 'pool', nn.MaxPool2D( + kernel_size=3, stride=2, padding=1)) + return stem, dict( + num_chs=in_c, reduction=stride, module='.'.join(['stem', last_conv])) + + +class DarkBlock(nn.Layer): + def __init__(self, + in_chs, + out_chs, + dilation=1, + bottle_ratio=0.5, + groups=1, + act_layer=nn.ReLU, + norm_layer=nn.BatchNorm2D, + attn_layer=None, + drop_block=None): + super(DarkBlock, self).__init__() + mid_chs = int(round(out_chs * bottle_ratio)) + ckwargs = dict(act_layer=act_layer, norm_layer=norm_layer) + self.conv1 = ConvBnAct(in_chs, mid_chs, kernel_size=1, **ckwargs) + self.conv2 = ConvBnAct( + mid_chs, + out_chs, + kernel_size=3, + dilation=dilation, + groups=groups, + **ckwargs) + + def forward(self, x): + shortcut = x + x = self.conv1(x) + x = self.conv2(x) + x = x + shortcut + return x + + +class CrossStage(nn.Layer): + def __init__(self, + in_chs, + out_chs, + stride, + dilation, + depth, + block_ratio=1., + bottle_ratio=1., + exp_ratio=1., + groups=1, + first_dilation=None, + down_growth=False, + cross_linear=False, + block_dpr=None, + block_fn=DarkBlock, + **block_kwargs): + super(CrossStage, self).__init__() + first_dilation = first_dilation or dilation + down_chs = out_chs if down_growth else in_chs + exp_chs = int(round(out_chs * exp_ratio)) + block_out_chs = int(round(out_chs * block_ratio)) + conv_kwargs = dict( + act_layer=block_kwargs.get('act_layer'), + norm_layer=block_kwargs.get('norm_layer')) + + if stride != 1 or first_dilation != dilation: + self.conv_down = ConvBnAct( + in_chs, + down_chs, + kernel_size=3, + stride=stride, + dilation=first_dilation, + groups=groups, + **conv_kwargs) + prev_chs = down_chs + else: + self.conv_down = None + prev_chs = in_chs + + self.conv_exp = ConvBnAct( + prev_chs, exp_chs, kernel_size=1, **conv_kwargs) + prev_chs = exp_chs // 2 # output of conv_exp is always split in two + + self.blocks = nn.Sequential() + for i in range(depth): + self.blocks.add_sublayer( + str(i), + block_fn(prev_chs, block_out_chs, dilation, bottle_ratio, + groups, **block_kwargs)) + prev_chs = block_out_chs + + # transition convs + self.conv_transition_b = ConvBnAct( + prev_chs, exp_chs // 2, kernel_size=1, **conv_kwargs) + self.conv_transition = ConvBnAct( + exp_chs, out_chs, kernel_size=1, **conv_kwargs) + + def forward(self, x): + if self.conv_down is not None: + x = self.conv_down(x) + x = self.conv_exp(x) + split = x.shape[1] // 2 + xs, xb = x[:, :split], x[:, split:] + xb = self.blocks(xb) + xb = self.conv_transition_b(xb) + out = self.conv_transition(paddle.concat([xs, xb], axis=1)) + return out + + +class DarkStage(nn.Layer): + def __init__(self, + in_chs, + out_chs, + stride, + dilation, + depth, + block_ratio=1., + bottle_ratio=1., + groups=1, + first_dilation=None, + block_fn=DarkBlock, + block_dpr=None, + **block_kwargs): + super().__init__() + first_dilation = first_dilation or dilation + + self.conv_down = ConvBnAct( + in_chs, + out_chs, + kernel_size=3, + stride=stride, + dilation=first_dilation, + groups=groups, + act_layer=block_kwargs.get('act_layer'), + norm_layer=block_kwargs.get('norm_layer')) + + prev_chs = out_chs + block_out_chs = int(round(out_chs * block_ratio)) + self.blocks = nn.Sequential() + for i in range(depth): + self.blocks.add_sublayer( + str(i), + block_fn(prev_chs, block_out_chs, dilation, bottle_ratio, + groups, **block_kwargs)) + prev_chs = block_out_chs + + def forward(self, x): + x = self.conv_down(x) + x = self.blocks(x) + return x + + +def _cfg_to_stage_args(cfg, curr_stride=2, output_stride=32): + # get per stage args for stage and containing blocks, calculate strides to meet target output_stride + num_stages = len(cfg['depth']) + if 'groups' not in cfg: + cfg['groups'] = (1, ) * num_stages + if 'down_growth' in cfg and not isinstance(cfg['down_growth'], + (list, tuple)): + cfg['down_growth'] = (cfg['down_growth'], ) * num_stages + stage_strides = [] + stage_dilations = [] + stage_first_dilations = [] + dilation = 1 + for cfg_stride in cfg['stride']: + stage_first_dilations.append(dilation) + if curr_stride >= output_stride: + dilation *= cfg_stride + stride = 1 + else: + stride = cfg_stride + curr_stride *= stride + stage_strides.append(stride) + stage_dilations.append(dilation) + cfg['stride'] = stage_strides + cfg['dilation'] = stage_dilations + cfg['first_dilation'] = stage_first_dilations + stage_args = [ + dict(zip(cfg.keys(), values)) for values in zip(*cfg.values()) + ] + return stage_args + + +class CSPNet(nn.Layer): + def __init__(self, + cfg, + in_chans=3, + class_num=1000, + output_stride=32, + global_pool='avg', + drop_rate=0., + act_layer=nn.LeakyReLU, + norm_layer=nn.BatchNorm2D, + zero_init_last_bn=True, + stage_fn=CrossStage, + block_fn=DarkBlock): + super().__init__() + self.class_num = class_num + self.drop_rate = drop_rate + assert output_stride in (8, 16, 32) + layer_args = dict(act_layer=act_layer, norm_layer=norm_layer) + + # Construct the stem + self.stem, stem_feat_info = create_stem(in_chans, **cfg['stem'], + **layer_args) + self.feature_info = [stem_feat_info] + prev_chs = stem_feat_info['num_chs'] + curr_stride = stem_feat_info[ + 'reduction'] # reduction does not include pool + if cfg['stem']['pool']: + curr_stride *= 2 + + # Construct the stages + per_stage_args = _cfg_to_stage_args( + cfg['stage'], curr_stride=curr_stride, output_stride=output_stride) + self.stages = nn.LayerList() + for i, sa in enumerate(per_stage_args): + self.stages.add_sublayer( + str(i), + stage_fn( + prev_chs, **sa, **layer_args, block_fn=block_fn)) + prev_chs = sa['out_chs'] + curr_stride *= sa['stride'] + self.feature_info += [ + dict( + num_chs=prev_chs, + reduction=curr_stride, + module=f'stages.{i}') + ] + + # Construct the head + self.num_features = prev_chs + + self.pool = nn.AdaptiveAvgPool2D(1) + self.flatten = nn.Flatten(1) + self.fc = nn.Linear( + prev_chs, + class_num, + weight_attr=ParamAttr(), + bias_attr=ParamAttr()) + + def forward(self, x): + x = self.stem(x) + for stage in self.stages: + x = stage(x) + x = self.pool(x) + x = self.flatten(x) + x = self.fc(x) + return x + + +def _load_pretrained(pretrained, model, model_url, use_ssld=False): + if pretrained is False: + pass + elif pretrained is True: + load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld) + elif isinstance(pretrained, str): + load_dygraph_pretrain(model, pretrained) + else: + raise RuntimeError( + "pretrained type is not available. Please use `string` or `boolean` type." + ) + + +def CSPDarkNet53(pretrained=False, use_ssld=False, **kwargs): + model = CSPNet(MODEL_CFGS["CSPDarkNet53"], block_fn=DarkBlock, **kwargs) + _load_pretrained( + pretrained, model, MODEL_URLS["CSPDarkNet53"], use_ssld=use_ssld) + return model diff --git a/src/PaddleClas/ppcls/arch/backbone/model_zoo/darknet.py b/src/PaddleClas/ppcls/arch/backbone/model_zoo/darknet.py new file mode 100644 index 0000000..75aafd8 --- /dev/null +++ b/src/PaddleClas/ppcls/arch/backbone/model_zoo/darknet.py @@ -0,0 +1,197 @@ +# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import paddle +from paddle import ParamAttr +import paddle.nn as nn +import paddle.nn.functional as F +from paddle.nn import Conv2D, BatchNorm, Linear, Dropout +from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D +from paddle.nn.initializer import Uniform +import math + +from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url + +MODEL_URLS = { + "DarkNet53": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DarkNet53_pretrained.pdparams" +} + +__all__ = list(MODEL_URLS.keys()) + + +class ConvBNLayer(nn.Layer): + def __init__(self, + input_channels, + output_channels, + filter_size, + stride, + padding, + name=None): + super(ConvBNLayer, self).__init__() + + self._conv = Conv2D( + in_channels=input_channels, + out_channels=output_channels, + kernel_size=filter_size, + stride=stride, + padding=padding, + weight_attr=ParamAttr(name=name + ".conv.weights"), + bias_attr=False) + + bn_name = name + ".bn" + self._bn = BatchNorm( + num_channels=output_channels, + act="relu", + param_attr=ParamAttr(name=bn_name + ".scale"), + bias_attr=ParamAttr(name=bn_name + ".offset"), + moving_mean_name=bn_name + ".mean", + moving_variance_name=bn_name + ".var") + + def forward(self, inputs): + x = self._conv(inputs) + x = self._bn(x) + return x + + +class BasicBlock(nn.Layer): + def __init__(self, input_channels, output_channels, name=None): + super(BasicBlock, self).__init__() + + self._conv1 = ConvBNLayer( + input_channels, output_channels, 1, 1, 0, name=name + ".0") + self._conv2 = ConvBNLayer( + output_channels, output_channels * 2, 3, 1, 1, name=name + ".1") + + def forward(self, inputs): + x = self._conv1(inputs) + x = self._conv2(x) + return paddle.add(x=inputs, y=x) + + +class DarkNet(nn.Layer): + def __init__(self, class_num=1000): + super(DarkNet, self).__init__() + + self.stages = [1, 2, 8, 8, 4] + self._conv1 = ConvBNLayer(3, 32, 3, 1, 1, name="yolo_input") + self._conv2 = ConvBNLayer( + 32, 64, 3, 2, 1, name="yolo_input.downsample") + + self._basic_block_01 = BasicBlock(64, 32, name="stage.0.0") + self._downsample_0 = ConvBNLayer( + 64, 128, 3, 2, 1, name="stage.0.downsample") + + self._basic_block_11 = BasicBlock(128, 64, name="stage.1.0") + self._basic_block_12 = BasicBlock(128, 64, name="stage.1.1") + self._downsample_1 = ConvBNLayer( + 128, 256, 3, 2, 1, name="stage.1.downsample") + + self._basic_block_21 = BasicBlock(256, 128, name="stage.2.0") + self._basic_block_22 = BasicBlock(256, 128, name="stage.2.1") + self._basic_block_23 = BasicBlock(256, 128, name="stage.2.2") + self._basic_block_24 = BasicBlock(256, 128, name="stage.2.3") + self._basic_block_25 = BasicBlock(256, 128, name="stage.2.4") + self._basic_block_26 = BasicBlock(256, 128, name="stage.2.5") + self._basic_block_27 = BasicBlock(256, 128, name="stage.2.6") + self._basic_block_28 = BasicBlock(256, 128, name="stage.2.7") + self._downsample_2 = ConvBNLayer( + 256, 512, 3, 2, 1, name="stage.2.downsample") + + self._basic_block_31 = BasicBlock(512, 256, name="stage.3.0") + self._basic_block_32 = BasicBlock(512, 256, name="stage.3.1") + self._basic_block_33 = BasicBlock(512, 256, name="stage.3.2") + self._basic_block_34 = BasicBlock(512, 256, name="stage.3.3") + self._basic_block_35 = BasicBlock(512, 256, name="stage.3.4") + self._basic_block_36 = BasicBlock(512, 256, name="stage.3.5") + self._basic_block_37 = BasicBlock(512, 256, name="stage.3.6") + self._basic_block_38 = BasicBlock(512, 256, name="stage.3.7") + self._downsample_3 = ConvBNLayer( + 512, 1024, 3, 2, 1, name="stage.3.downsample") + + self._basic_block_41 = BasicBlock(1024, 512, name="stage.4.0") + self._basic_block_42 = BasicBlock(1024, 512, name="stage.4.1") + self._basic_block_43 = BasicBlock(1024, 512, name="stage.4.2") + self._basic_block_44 = BasicBlock(1024, 512, name="stage.4.3") + + self._pool = AdaptiveAvgPool2D(1) + + stdv = 1.0 / math.sqrt(1024.0) + self._out = Linear( + 1024, + class_num, + weight_attr=ParamAttr( + name="fc_weights", initializer=Uniform(-stdv, stdv)), + bias_attr=ParamAttr(name="fc_offset")) + + def forward(self, inputs): + x = self._conv1(inputs) + x = self._conv2(x) + + x = self._basic_block_01(x) + x = self._downsample_0(x) + + x = self._basic_block_11(x) + x = self._basic_block_12(x) + x = self._downsample_1(x) + + x = self._basic_block_21(x) + x = self._basic_block_22(x) + x = self._basic_block_23(x) + x = self._basic_block_24(x) + x = self._basic_block_25(x) + x = self._basic_block_26(x) + x = self._basic_block_27(x) + x = self._basic_block_28(x) + x = self._downsample_2(x) + + x = self._basic_block_31(x) + x = self._basic_block_32(x) + x = self._basic_block_33(x) + x = self._basic_block_34(x) + x = self._basic_block_35(x) + x = self._basic_block_36(x) + x = self._basic_block_37(x) + x = self._basic_block_38(x) + x = self._downsample_3(x) + + x = self._basic_block_41(x) + x = self._basic_block_42(x) + x = self._basic_block_43(x) + x = self._basic_block_44(x) + + x = self._pool(x) + x = paddle.squeeze(x, axis=[2, 3]) + x = self._out(x) + return x + + +def _load_pretrained(pretrained, model, model_url, use_ssld=False): + if pretrained is False: + pass + elif pretrained is True: + load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld) + elif isinstance(pretrained, str): + load_dygraph_pretrain(model, pretrained) + else: + raise RuntimeError( + "pretrained type is not available. Please use `string` or `boolean` type." + ) + + +def DarkNet53(pretrained=False, use_ssld=False, **kwargs): + model = DarkNet(**kwargs) + _load_pretrained( + pretrained, model, MODEL_URLS["DarkNet53"], use_ssld=use_ssld) + return model diff --git a/src/PaddleClas/ppcls/arch/backbone/model_zoo/densenet.py b/src/PaddleClas/ppcls/arch/backbone/model_zoo/densenet.py new file mode 100644 index 0000000..7e6e202 --- /dev/null +++ b/src/PaddleClas/ppcls/arch/backbone/model_zoo/densenet.py @@ -0,0 +1,344 @@ +# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np +import paddle +from paddle import ParamAttr +import paddle.nn as nn +from paddle.nn import Conv2D, BatchNorm, Linear, Dropout +from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D +from paddle.nn.initializer import Uniform + +import math + +from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url + +MODEL_URLS = { + "DenseNet121": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DenseNet121_pretrained.pdparams", + "DenseNet161": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DenseNet161_pretrained.pdparams", + "DenseNet169": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DenseNet169_pretrained.pdparams", + "DenseNet201": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DenseNet201_pretrained.pdparams", + "DenseNet264": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DenseNet264_pretrained.pdparams", +} + +__all__ = list(MODEL_URLS.keys()) + + +class BNACConvLayer(nn.Layer): + def __init__(self, + num_channels, + num_filters, + filter_size, + stride=1, + pad=0, + groups=1, + act="relu", + name=None): + super(BNACConvLayer, self).__init__() + + self._batch_norm = BatchNorm( + num_channels, + act=act, + param_attr=ParamAttr(name=name + '_bn_scale'), + bias_attr=ParamAttr(name + '_bn_offset'), + moving_mean_name=name + '_bn_mean', + moving_variance_name=name + '_bn_variance') + + self._conv = Conv2D( + in_channels=num_channels, + out_channels=num_filters, + kernel_size=filter_size, + stride=stride, + padding=pad, + groups=groups, + weight_attr=ParamAttr(name=name + "_weights"), + bias_attr=False) + + def forward(self, input): + y = self._batch_norm(input) + y = self._conv(y) + return y + + +class DenseLayer(nn.Layer): + def __init__(self, num_channels, growth_rate, bn_size, dropout, name=None): + super(DenseLayer, self).__init__() + self.dropout = dropout + + self.bn_ac_func1 = BNACConvLayer( + num_channels=num_channels, + num_filters=bn_size * growth_rate, + filter_size=1, + pad=0, + stride=1, + name=name + "_x1") + + self.bn_ac_func2 = BNACConvLayer( + num_channels=bn_size * growth_rate, + num_filters=growth_rate, + filter_size=3, + pad=1, + stride=1, + name=name + "_x2") + + if dropout: + self.dropout_func = Dropout(p=dropout, mode="downscale_in_infer") + + def forward(self, input): + conv = self.bn_ac_func1(input) + conv = self.bn_ac_func2(conv) + if self.dropout: + conv = self.dropout_func(conv) + conv = paddle.concat([input, conv], axis=1) + return conv + + +class DenseBlock(nn.Layer): + def __init__(self, + num_channels, + num_layers, + bn_size, + growth_rate, + dropout, + name=None): + super(DenseBlock, self).__init__() + self.dropout = dropout + + self.dense_layer_func = [] + + pre_channel = num_channels + for layer in range(num_layers): + self.dense_layer_func.append( + self.add_sublayer( + "{}_{}".format(name, layer + 1), + DenseLayer( + num_channels=pre_channel, + growth_rate=growth_rate, + bn_size=bn_size, + dropout=dropout, + name=name + '_' + str(layer + 1)))) + pre_channel = pre_channel + growth_rate + + def forward(self, input): + conv = input + for func in self.dense_layer_func: + conv = func(conv) + return conv + + +class TransitionLayer(nn.Layer): + def __init__(self, num_channels, num_output_features, name=None): + super(TransitionLayer, self).__init__() + + self.conv_ac_func = BNACConvLayer( + num_channels=num_channels, + num_filters=num_output_features, + filter_size=1, + pad=0, + stride=1, + name=name) + + self.pool2d_avg = AvgPool2D(kernel_size=2, stride=2, padding=0) + + def forward(self, input): + y = self.conv_ac_func(input) + y = self.pool2d_avg(y) + return y + + +class ConvBNLayer(nn.Layer): + def __init__(self, + num_channels, + num_filters, + filter_size, + stride=1, + pad=0, + groups=1, + act="relu", + name=None): + super(ConvBNLayer, self).__init__() + + self._conv = Conv2D( + in_channels=num_channels, + out_channels=num_filters, + kernel_size=filter_size, + stride=stride, + padding=pad, + groups=groups, + weight_attr=ParamAttr(name=name + "_weights"), + bias_attr=False) + self._batch_norm = BatchNorm( + num_filters, + act=act, + param_attr=ParamAttr(name=name + '_bn_scale'), + bias_attr=ParamAttr(name + '_bn_offset'), + moving_mean_name=name + '_bn_mean', + moving_variance_name=name + '_bn_variance') + + def forward(self, input): + y = self._conv(input) + y = self._batch_norm(y) + return y + + +class DenseNet(nn.Layer): + def __init__(self, layers=60, bn_size=4, dropout=0, class_num=1000): + super(DenseNet, self).__init__() + + supported_layers = [121, 161, 169, 201, 264] + assert layers in supported_layers, \ + "supported layers are {} but input layer is {}".format( + supported_layers, layers) + densenet_spec = { + 121: (64, 32, [6, 12, 24, 16]), + 161: (96, 48, [6, 12, 36, 24]), + 169: (64, 32, [6, 12, 32, 32]), + 201: (64, 32, [6, 12, 48, 32]), + 264: (64, 32, [6, 12, 64, 48]) + } + num_init_features, growth_rate, block_config = densenet_spec[layers] + + self.conv1_func = ConvBNLayer( + num_channels=3, + num_filters=num_init_features, + filter_size=7, + stride=2, + pad=3, + act='relu', + name="conv1") + + self.pool2d_max = MaxPool2D(kernel_size=3, stride=2, padding=1) + + self.block_config = block_config + + self.dense_block_func_list = [] + self.transition_func_list = [] + pre_num_channels = num_init_features + num_features = num_init_features + for i, num_layers in enumerate(block_config): + self.dense_block_func_list.append( + self.add_sublayer( + "db_conv_{}".format(i + 2), + DenseBlock( + num_channels=pre_num_channels, + num_layers=num_layers, + bn_size=bn_size, + growth_rate=growth_rate, + dropout=dropout, + name='conv' + str(i + 2)))) + + num_features = num_features + num_layers * growth_rate + pre_num_channels = num_features + + if i != len(block_config) - 1: + self.transition_func_list.append( + self.add_sublayer( + "tr_conv{}_blk".format(i + 2), + TransitionLayer( + num_channels=pre_num_channels, + num_output_features=num_features // 2, + name='conv' + str(i + 2) + "_blk"))) + pre_num_channels = num_features // 2 + num_features = num_features // 2 + + self.batch_norm = BatchNorm( + num_features, + act="relu", + param_attr=ParamAttr(name='conv5_blk_bn_scale'), + bias_attr=ParamAttr(name='conv5_blk_bn_offset'), + moving_mean_name='conv5_blk_bn_mean', + moving_variance_name='conv5_blk_bn_variance') + + self.pool2d_avg = AdaptiveAvgPool2D(1) + + stdv = 1.0 / math.sqrt(num_features * 1.0) + + self.out = Linear( + num_features, + class_num, + weight_attr=ParamAttr( + initializer=Uniform(-stdv, stdv), name="fc_weights"), + bias_attr=ParamAttr(name="fc_offset")) + + def forward(self, input): + conv = self.conv1_func(input) + conv = self.pool2d_max(conv) + + for i, num_layers in enumerate(self.block_config): + conv = self.dense_block_func_list[i](conv) + if i != len(self.block_config) - 1: + conv = self.transition_func_list[i](conv) + + conv = self.batch_norm(conv) + y = self.pool2d_avg(conv) + y = paddle.flatten(y, start_axis=1, stop_axis=-1) + y = self.out(y) + return y + + +def _load_pretrained(pretrained, model, model_url, use_ssld=False): + if pretrained is False: + pass + elif pretrained is True: + load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld) + elif isinstance(pretrained, str): + load_dygraph_pretrain(model, pretrained) + else: + raise RuntimeError( + "pretrained type is not available. Please use `string` or `boolean` type." + ) + + +def DenseNet121(pretrained=False, use_ssld=False, **kwargs): + model = DenseNet(layers=121, **kwargs) + _load_pretrained( + pretrained, model, MODEL_URLS["DenseNet121"], use_ssld=use_ssld) + return model + + +def DenseNet161(pretrained=False, use_ssld=False, **kwargs): + model = DenseNet(layers=161, **kwargs) + _load_pretrained( + pretrained, model, MODEL_URLS["DenseNet161"], use_ssld=use_ssld) + return model + + +def DenseNet169(pretrained=False, use_ssld=False, **kwargs): + model = DenseNet(layers=169, **kwargs) + _load_pretrained( + pretrained, model, MODEL_URLS["DenseNet169"], use_ssld=use_ssld) + return model + + +def DenseNet201(pretrained=False, use_ssld=False, **kwargs): + model = DenseNet(layers=201, **kwargs) + _load_pretrained( + pretrained, model, MODEL_URLS["DenseNet201"], use_ssld=use_ssld) + return model + + +def DenseNet264(pretrained=False, use_ssld=False, **kwargs): + model = DenseNet(layers=264, **kwargs) + _load_pretrained( + pretrained, model, MODEL_URLS["DenseNet264"], use_ssld=use_ssld) + return model diff --git a/src/PaddleClas/ppcls/arch/backbone/model_zoo/distilled_vision_transformer.py b/src/PaddleClas/ppcls/arch/backbone/model_zoo/distilled_vision_transformer.py new file mode 100644 index 0000000..676a289 --- /dev/null +++ b/src/PaddleClas/ppcls/arch/backbone/model_zoo/distilled_vision_transformer.py @@ -0,0 +1,272 @@ +# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Code was heavily based on https://github.com/facebookresearch/deit + +import paddle +import paddle.nn as nn +from .vision_transformer import VisionTransformer, Identity, trunc_normal_, zeros_ + +from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url + +MODEL_URLS = { + "DeiT_tiny_patch16_224": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DeiT_tiny_patch16_224_pretrained.pdparams", + "DeiT_small_patch16_224": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DeiT_small_patch16_224_pretrained.pdparams", + "DeiT_base_patch16_224": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DeiT_base_patch16_224_pretrained.pdparams", + "DeiT_tiny_distilled_patch16_224": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DeiT_tiny_distilled_patch16_224_pretrained.pdparams", + "DeiT_small_distilled_patch16_224": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DeiT_small_distilled_patch16_224_pretrained.pdparams", + "DeiT_base_distilled_patch16_224": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DeiT_base_distilled_patch16_224_pretrained.pdparams", + "DeiT_base_patch16_384": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DeiT_base_patch16_384_pretrained.pdparams", + "DeiT_base_distilled_patch16_384": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DeiT_base_distilled_patch16_384_pretrained.pdparams", +} + +__all__ = list(MODEL_URLS.keys()) + + +class DistilledVisionTransformer(VisionTransformer): + def __init__(self, + img_size=224, + patch_size=16, + class_num=1000, + embed_dim=768, + depth=12, + num_heads=12, + mlp_ratio=4, + qkv_bias=False, + norm_layer='nn.LayerNorm', + epsilon=1e-5, + **kwargs): + super().__init__( + img_size=img_size, + patch_size=patch_size, + class_num=class_num, + embed_dim=embed_dim, + depth=depth, + num_heads=num_heads, + mlp_ratio=mlp_ratio, + qkv_bias=qkv_bias, + norm_layer=norm_layer, + epsilon=epsilon, + **kwargs) + self.pos_embed = self.create_parameter( + shape=(1, self.patch_embed.num_patches + 2, self.embed_dim), + default_initializer=zeros_) + self.add_parameter("pos_embed", self.pos_embed) + + self.dist_token = self.create_parameter( + shape=(1, 1, self.embed_dim), default_initializer=zeros_) + self.add_parameter("cls_token", self.cls_token) + + self.head_dist = nn.Linear( + self.embed_dim, + self.class_num) if self.class_num > 0 else Identity() + + trunc_normal_(self.dist_token) + trunc_normal_(self.pos_embed) + self.head_dist.apply(self._init_weights) + + def forward_features(self, x): + B = paddle.shape(x)[0] + x = self.patch_embed(x) + + cls_tokens = self.cls_token.expand((B, -1, -1)) + dist_token = self.dist_token.expand((B, -1, -1)) + x = paddle.concat((cls_tokens, dist_token, x), axis=1) + + x = x + self.pos_embed + x = self.pos_drop(x) + + for blk in self.blocks: + x = blk(x) + + x = self.norm(x) + return x[:, 0], x[:, 1] + + def forward(self, x): + x, x_dist = self.forward_features(x) + x = self.head(x) + x_dist = self.head_dist(x_dist) + return (x + x_dist) / 2 + + +def _load_pretrained(pretrained, model, model_url, use_ssld=False): + if pretrained is False: + pass + elif pretrained is True: + load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld) + elif isinstance(pretrained, str): + load_dygraph_pretrain(model, pretrained) + else: + raise RuntimeError( + "pretrained type is not available. Please use `string` or `boolean` type." + ) + + +def DeiT_tiny_patch16_224(pretrained=False, use_ssld=False, **kwargs): + model = VisionTransformer( + patch_size=16, + embed_dim=192, + depth=12, + num_heads=3, + mlp_ratio=4, + qkv_bias=True, + epsilon=1e-6, + **kwargs) + _load_pretrained( + pretrained, + model, + MODEL_URLS["DeiT_tiny_patch16_224"], + use_ssld=use_ssld) + return model + + +def DeiT_small_patch16_224(pretrained=False, use_ssld=False, **kwargs): + model = VisionTransformer( + patch_size=16, + embed_dim=384, + depth=12, + num_heads=6, + mlp_ratio=4, + qkv_bias=True, + epsilon=1e-6, + **kwargs) + _load_pretrained( + pretrained, + model, + MODEL_URLS["DeiT_small_patch16_224"], + use_ssld=use_ssld) + return model + + +def DeiT_base_patch16_224(pretrained=False, use_ssld=False, **kwargs): + model = VisionTransformer( + patch_size=16, + embed_dim=768, + depth=12, + num_heads=12, + mlp_ratio=4, + qkv_bias=True, + epsilon=1e-6, + **kwargs) + _load_pretrained( + pretrained, + model, + MODEL_URLS["DeiT_base_patch16_224"], + use_ssld=use_ssld) + return model + + +def DeiT_tiny_distilled_patch16_224(pretrained=False, use_ssld=False, + **kwargs): + model = DistilledVisionTransformer( + patch_size=16, + embed_dim=192, + depth=12, + num_heads=3, + mlp_ratio=4, + qkv_bias=True, + epsilon=1e-6, + **kwargs) + _load_pretrained( + pretrained, + model, + MODEL_URLS["DeiT_tiny_distilled_patch16_224"], + use_ssld=use_ssld) + return model + + +def DeiT_small_distilled_patch16_224(pretrained=False, + use_ssld=False, + **kwargs): + model = DistilledVisionTransformer( + patch_size=16, + embed_dim=384, + depth=12, + num_heads=6, + mlp_ratio=4, + qkv_bias=True, + epsilon=1e-6, + **kwargs) + _load_pretrained( + pretrained, + model, + MODEL_URLS["DeiT_small_distilled_patch16_224"], + use_ssld=use_ssld) + return model + + +def DeiT_base_distilled_patch16_224(pretrained=False, use_ssld=False, + **kwargs): + model = DistilledVisionTransformer( + patch_size=16, + embed_dim=768, + depth=12, + num_heads=12, + mlp_ratio=4, + qkv_bias=True, + epsilon=1e-6, + **kwargs) + _load_pretrained( + pretrained, + model, + MODEL_URLS["DeiT_base_distilled_patch16_224"], + use_ssld=use_ssld) + return model + + +def DeiT_base_patch16_384(pretrained=False, use_ssld=False, **kwargs): + model = VisionTransformer( + img_size=384, + patch_size=16, + embed_dim=768, + depth=12, + num_heads=12, + mlp_ratio=4, + qkv_bias=True, + epsilon=1e-6, + **kwargs) + _load_pretrained( + pretrained, + model, + MODEL_URLS["DeiT_base_patch16_384"], + use_ssld=use_ssld) + return model + + +def DeiT_base_distilled_patch16_384(pretrained=False, use_ssld=False, + **kwargs): + model = DistilledVisionTransformer( + img_size=384, + patch_size=16, + embed_dim=768, + depth=12, + num_heads=12, + mlp_ratio=4, + qkv_bias=True, + epsilon=1e-6, + **kwargs) + _load_pretrained( + pretrained, + model, + MODEL_URLS["DeiT_base_distilled_patch16_384"], + use_ssld=use_ssld) + return model diff --git a/src/PaddleClas/ppcls/arch/backbone/model_zoo/dla.py b/src/PaddleClas/ppcls/arch/backbone/model_zoo/dla.py new file mode 100644 index 0000000..b1c00b2 --- /dev/null +++ b/src/PaddleClas/ppcls/arch/backbone/model_zoo/dla.py @@ -0,0 +1,528 @@ +# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Code was based on https://github.com/ucbdrive/dla + +import math + +import paddle +import paddle.nn as nn +import paddle.nn.functional as F + +from paddle.nn.initializer import Normal, Constant + +from ppcls.arch.backbone.base.theseus_layer import Identity +from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url + +MODEL_URLS = { + "DLA34": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DLA34_pretrained.pdparams", + "DLA46_c": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DLA46_c_pretrained.pdparams", + "DLA46x_c": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DLA46x_c_pretrained.pdparams", + "DLA60": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DLA60_pretrained.pdparams", + "DLA60x": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DLA60x_pretrained.pdparams", + "DLA60x_c": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DLA60x_c_pretrained.pdparams", + "DLA102": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DLA102_pretrained.pdparams", + "DLA102x": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DLA102x_pretrained.pdparams", + "DLA102x2": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DLA102x2_pretrained.pdparams", + "DLA169": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DLA169_pretrained.pdparams" +} + +__all__ = MODEL_URLS.keys() + +zeros_ = Constant(value=0.) +ones_ = Constant(value=1.) + + +class DlaBasic(nn.Layer): + def __init__(self, inplanes, planes, stride=1, dilation=1, **cargs): + super(DlaBasic, self).__init__() + self.conv1 = nn.Conv2D( + inplanes, + planes, + kernel_size=3, + stride=stride, + padding=dilation, + bias_attr=False, + dilation=dilation) + self.bn1 = nn.BatchNorm2D(planes) + self.relu = nn.ReLU() + self.conv2 = nn.Conv2D( + planes, + planes, + kernel_size=3, + stride=1, + padding=dilation, + bias_attr=False, + dilation=dilation) + self.bn2 = nn.BatchNorm2D(planes) + self.stride = stride + + def forward(self, x, residual=None): + if residual is None: + residual = x + + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.bn2(out) + + out += residual + out = self.relu(out) + + return out + + +class DlaBottleneck(nn.Layer): + expansion = 2 + + def __init__(self, + inplanes, + outplanes, + stride=1, + dilation=1, + cardinality=1, + base_width=64): + super(DlaBottleneck, self).__init__() + self.stride = stride + mid_planes = int( + math.floor(outplanes * (base_width / 64)) * cardinality) + mid_planes = mid_planes // self.expansion + + self.conv1 = nn.Conv2D( + inplanes, mid_planes, kernel_size=1, bias_attr=False) + self.bn1 = nn.BatchNorm2D(mid_planes) + self.conv2 = nn.Conv2D( + mid_planes, + mid_planes, + kernel_size=3, + stride=stride, + padding=dilation, + bias_attr=False, + dilation=dilation, + groups=cardinality) + self.bn2 = nn.BatchNorm2D(mid_planes) + self.conv3 = nn.Conv2D( + mid_planes, outplanes, kernel_size=1, bias_attr=False) + self.bn3 = nn.BatchNorm2D(outplanes) + self.relu = nn.ReLU() + + def forward(self, x, residual=None): + if residual is None: + residual = x + + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.bn2(out) + out = self.relu(out) + + out = self.conv3(out) + out = self.bn3(out) + + out += residual + out = self.relu(out) + + return out + + +class DlaRoot(nn.Layer): + def __init__(self, in_channels, out_channels, kernel_size, residual): + super(DlaRoot, self).__init__() + self.conv = nn.Conv2D( + in_channels, + out_channels, + 1, + stride=1, + bias_attr=False, + padding=(kernel_size - 1) // 2) + self.bn = nn.BatchNorm2D(out_channels) + self.relu = nn.ReLU() + self.residual = residual + + def forward(self, *x): + children = x + x = self.conv(paddle.concat(x, 1)) + x = self.bn(x) + if self.residual: + x += children[0] + x = self.relu(x) + + return x + + +class DlaTree(nn.Layer): + def __init__(self, + levels, + block, + in_channels, + out_channels, + stride=1, + dilation=1, + cardinality=1, + base_width=64, + level_root=False, + root_dim=0, + root_kernel_size=1, + root_residual=False): + super(DlaTree, self).__init__() + if root_dim == 0: + root_dim = 2 * out_channels + if level_root: + root_dim += in_channels + + self.downsample = nn.MaxPool2D( + stride, stride=stride) if stride > 1 else Identity() + self.project = Identity() + cargs = dict( + dilation=dilation, cardinality=cardinality, base_width=base_width) + + if levels == 1: + self.tree1 = block(in_channels, out_channels, stride, **cargs) + self.tree2 = block(out_channels, out_channels, 1, **cargs) + if in_channels != out_channels: + self.project = nn.Sequential( + nn.Conv2D( + in_channels, + out_channels, + kernel_size=1, + stride=1, + bias_attr=False), + nn.BatchNorm2D(out_channels)) + else: + cargs.update( + dict( + root_kernel_size=root_kernel_size, + root_residual=root_residual)) + self.tree1 = DlaTree( + levels - 1, + block, + in_channels, + out_channels, + stride, + root_dim=0, + **cargs) + self.tree2 = DlaTree( + levels - 1, + block, + out_channels, + out_channels, + root_dim=root_dim + out_channels, + **cargs) + + if levels == 1: + self.root = DlaRoot(root_dim, out_channels, root_kernel_size, + root_residual) + + self.level_root = level_root + self.root_dim = root_dim + self.levels = levels + + def forward(self, x, residual=None, children=None): + children = [] if children is None else children + bottom = self.downsample(x) + residual = self.project(bottom) + + if self.level_root: + children.append(bottom) + x1 = self.tree1(x, residual) + + if self.levels == 1: + x2 = self.tree2(x1) + x = self.root(x2, x1, *children) + else: + children.append(x1) + x = self.tree2(x1, children=children) + return x + + +class DLA(nn.Layer): + def __init__(self, + levels, + channels, + in_chans=3, + cardinality=1, + base_width=64, + block=DlaBottleneck, + residual_root=False, + drop_rate=0.0, + class_num=1000, + with_pool=True): + super(DLA, self).__init__() + self.channels = channels + self.class_num = class_num + self.with_pool = with_pool + self.cardinality = cardinality + self.base_width = base_width + self.drop_rate = drop_rate + + self.base_layer = nn.Sequential( + nn.Conv2D( + in_chans, + channels[0], + kernel_size=7, + stride=1, + padding=3, + bias_attr=False), + nn.BatchNorm2D(channels[0]), + nn.ReLU()) + + self.level0 = self._make_conv_level(channels[0], channels[0], + levels[0]) + self.level1 = self._make_conv_level( + channels[0], channels[1], levels[1], stride=2) + + cargs = dict( + cardinality=cardinality, + base_width=base_width, + root_residual=residual_root) + + self.level2 = DlaTree( + levels[2], + block, + channels[1], + channels[2], + 2, + level_root=False, + **cargs) + self.level3 = DlaTree( + levels[3], + block, + channels[2], + channels[3], + 2, + level_root=True, + **cargs) + self.level4 = DlaTree( + levels[4], + block, + channels[3], + channels[4], + 2, + level_root=True, + **cargs) + self.level5 = DlaTree( + levels[5], + block, + channels[4], + channels[5], + 2, + level_root=True, + **cargs) + + self.feature_info = [ + # rare to have a meaningful stride 1 level + dict( + num_chs=channels[0], reduction=1, module='level0'), + dict( + num_chs=channels[1], reduction=2, module='level1'), + dict( + num_chs=channels[2], reduction=4, module='level2'), + dict( + num_chs=channels[3], reduction=8, module='level3'), + dict( + num_chs=channels[4], reduction=16, module='level4'), + dict( + num_chs=channels[5], reduction=32, module='level5'), + ] + + self.num_features = channels[-1] + + if with_pool: + self.global_pool = nn.AdaptiveAvgPool2D(1) + + if class_num > 0: + self.fc = nn.Conv2D(self.num_features, class_num, 1) + + for m in self.sublayers(): + if isinstance(m, nn.Conv2D): + n = m._kernel_size[0] * m._kernel_size[1] * m._out_channels + normal_ = Normal(mean=0.0, std=math.sqrt(2. / n)) + normal_(m.weight) + elif isinstance(m, nn.BatchNorm2D): + ones_(m.weight) + zeros_(m.bias) + + def _make_conv_level(self, inplanes, planes, convs, stride=1, dilation=1): + modules = [] + for i in range(convs): + modules.extend([ + nn.Conv2D( + inplanes, + planes, + kernel_size=3, + stride=stride if i == 0 else 1, + padding=dilation, + bias_attr=False, + dilation=dilation), nn.BatchNorm2D(planes), nn.ReLU() + ]) + inplanes = planes + return nn.Sequential(*modules) + + def forward_features(self, x): + x = self.base_layer(x) + + x = self.level0(x) + x = self.level1(x) + x = self.level2(x) + x = self.level3(x) + x = self.level4(x) + x = self.level5(x) + + return x + + def forward(self, x): + x = self.forward_features(x) + + if self.with_pool: + x = self.global_pool(x) + + if self.drop_rate > 0.: + x = F.dropout(x, p=self.drop_rate, training=self.training) + + if self.class_num > 0: + x = self.fc(x) + x = x.flatten(1) + + return x + + +def _load_pretrained(pretrained, model, model_url, use_ssld=False): + if pretrained is False: + pass + elif pretrained is True: + load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld) + elif isinstance(pretrained, str): + load_dygraph_pretrain(model, pretrained) + else: + raise RuntimeError( + "pretrained type is not available. Please use `string` or `boolean` type." + ) + + +def DLA34(pretrained=False, **kwargs): + model = DLA(levels=(1, 1, 1, 2, 2, 1), + channels=(16, 32, 64, 128, 256, 512), + block=DlaBasic, + **kwargs) + _load_pretrained(pretrained, model, MODEL_URLS["DLA34"]) + return model + + +def DLA46_c(pretrained=False, **kwargs): + model = DLA(levels=(1, 1, 1, 2, 2, 1), + channels=(16, 32, 64, 64, 128, 256), + block=DlaBottleneck, + **kwargs) + _load_pretrained(pretrained, model, MODEL_URLS["DLA46_c"]) + return model + + +def DLA46x_c(pretrained=False, **kwargs): + model = DLA(levels=(1, 1, 1, 2, 2, 1), + channels=(16, 32, 64, 64, 128, 256), + block=DlaBottleneck, + cardinality=32, + base_width=4, + **kwargs) + _load_pretrained(pretrained, model, MODEL_URLS["DLA46x_c"]) + return model + + +def DLA60(pretrained=False, **kwargs): + model = DLA(levels=(1, 1, 1, 2, 3, 1), + channels=(16, 32, 128, 256, 512, 1024), + block=DlaBottleneck, + **kwargs) + _load_pretrained(pretrained, model, MODEL_URLS["DLA60"]) + return model + + +def DLA60x(pretrained=False, **kwargs): + model = DLA(levels=(1, 1, 1, 2, 3, 1), + channels=(16, 32, 128, 256, 512, 1024), + block=DlaBottleneck, + cardinality=32, + base_width=4, + **kwargs) + _load_pretrained(pretrained, model, MODEL_URLS["DLA60x"]) + return model + + +def DLA60x_c(pretrained=False, **kwargs): + model = DLA(levels=(1, 1, 1, 2, 3, 1), + channels=(16, 32, 64, 64, 128, 256), + block=DlaBottleneck, + cardinality=32, + base_width=4, + **kwargs) + _load_pretrained(pretrained, model, MODEL_URLS["DLA60x_c"]) + return model + + +def DLA102(pretrained=False, **kwargs): + model = DLA(levels=(1, 1, 1, 3, 4, 1), + channels=(16, 32, 128, 256, 512, 1024), + block=DlaBottleneck, + residual_root=True, + **kwargs) + _load_pretrained(pretrained, model, MODEL_URLS["DLA102"]) + return model + + +def DLA102x(pretrained=False, **kwargs): + model = DLA(levels=(1, 1, 1, 3, 4, 1), + channels=(16, 32, 128, 256, 512, 1024), + block=DlaBottleneck, + cardinality=32, + base_width=4, + residual_root=True, + **kwargs) + _load_pretrained(pretrained, model, MODEL_URLS["DLA102x"]) + return model + + +def DLA102x2(pretrained=False, **kwargs): + model = DLA(levels=(1, 1, 1, 3, 4, 1), + channels=(16, 32, 128, 256, 512, 1024), + block=DlaBottleneck, + cardinality=64, + base_width=4, + residual_root=True, + **kwargs) + _load_pretrained(pretrained, model, MODEL_URLS["DLA102x2"]) + return model + + +def DLA169(pretrained=False, **kwargs): + model = DLA(levels=(1, 1, 2, 3, 5, 1), + channels=(16, 32, 128, 256, 512, 1024), + block=DlaBottleneck, + residual_root=True, + **kwargs) + _load_pretrained(pretrained, model, MODEL_URLS["DLA169"]) + return model diff --git a/src/PaddleClas/ppcls/arch/backbone/model_zoo/dpn.py b/src/PaddleClas/ppcls/arch/backbone/model_zoo/dpn.py new file mode 100644 index 0000000..55953ed --- /dev/null +++ b/src/PaddleClas/ppcls/arch/backbone/model_zoo/dpn.py @@ -0,0 +1,451 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np +import sys +import paddle +from paddle import ParamAttr +import paddle.nn as nn +from paddle.nn import Conv2D, BatchNorm, Linear +from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D +from paddle.nn.initializer import Uniform + +import math + +from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url + +MODEL_URLS = { + "DPN68": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DPN68_pretrained.pdparams", + "DPN92": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DPN92_pretrained.pdparams", + "DPN98": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DPN98_pretrained.pdparams", + "DPN107": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DPN107_pretrained.pdparams", + "DPN131": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DPN131_pretrained.pdparams", +} + +__all__ = list(MODEL_URLS.keys()) + + +class ConvBNLayer(nn.Layer): + def __init__(self, + num_channels, + num_filters, + filter_size, + stride=1, + pad=0, + groups=1, + act="relu", + name=None): + super(ConvBNLayer, self).__init__() + + self._conv = Conv2D( + in_channels=num_channels, + out_channels=num_filters, + kernel_size=filter_size, + stride=stride, + padding=pad, + groups=groups, + weight_attr=ParamAttr(name=name + "_weights"), + bias_attr=False) + self._batch_norm = BatchNorm( + num_filters, + act=act, + param_attr=ParamAttr(name=name + '_bn_scale'), + bias_attr=ParamAttr(name + '_bn_offset'), + moving_mean_name=name + '_bn_mean', + moving_variance_name=name + '_bn_variance') + + def forward(self, input): + y = self._conv(input) + y = self._batch_norm(y) + return y + + +class BNACConvLayer(nn.Layer): + def __init__(self, + num_channels, + num_filters, + filter_size, + stride=1, + pad=0, + groups=1, + act="relu", + name=None): + super(BNACConvLayer, self).__init__() + self.num_channels = num_channels + + self._batch_norm = BatchNorm( + num_channels, + act=act, + param_attr=ParamAttr(name=name + '_bn_scale'), + bias_attr=ParamAttr(name + '_bn_offset'), + moving_mean_name=name + '_bn_mean', + moving_variance_name=name + '_bn_variance') + + self._conv = Conv2D( + in_channels=num_channels, + out_channels=num_filters, + kernel_size=filter_size, + stride=stride, + padding=pad, + groups=groups, + weight_attr=ParamAttr(name=name + "_weights"), + bias_attr=False) + + def forward(self, input): + y = self._batch_norm(input) + y = self._conv(y) + return y + + +class DualPathFactory(nn.Layer): + def __init__(self, + num_channels, + num_1x1_a, + num_3x3_b, + num_1x1_c, + inc, + G, + _type='normal', + name=None): + super(DualPathFactory, self).__init__() + + self.num_1x1_c = num_1x1_c + self.inc = inc + self.name = name + + kw = 3 + kh = 3 + pw = (kw - 1) // 2 + ph = (kh - 1) // 2 + + # type + if _type == 'proj': + key_stride = 1 + self.has_proj = True + elif _type == 'down': + key_stride = 2 + self.has_proj = True + elif _type == 'normal': + key_stride = 1 + self.has_proj = False + else: + print("not implemented now!!!") + sys.exit(1) + + data_in_ch = sum(num_channels) if isinstance(num_channels, + list) else num_channels + + if self.has_proj: + self.c1x1_w_func = BNACConvLayer( + num_channels=data_in_ch, + num_filters=num_1x1_c + 2 * inc, + filter_size=(1, 1), + pad=(0, 0), + stride=(key_stride, key_stride), + name=name + "_match") + + self.c1x1_a_func = BNACConvLayer( + num_channels=data_in_ch, + num_filters=num_1x1_a, + filter_size=(1, 1), + pad=(0, 0), + name=name + "_conv1") + + self.c3x3_b_func = BNACConvLayer( + num_channels=num_1x1_a, + num_filters=num_3x3_b, + filter_size=(kw, kh), + pad=(pw, ph), + stride=(key_stride, key_stride), + groups=G, + name=name + "_conv2") + + self.c1x1_c_func = BNACConvLayer( + num_channels=num_3x3_b, + num_filters=num_1x1_c + inc, + filter_size=(1, 1), + pad=(0, 0), + name=name + "_conv3") + + def forward(self, input): + # PROJ + if isinstance(input, list): + data_in = paddle.concat([input[0], input[1]], axis=1) + else: + data_in = input + + if self.has_proj: + c1x1_w = self.c1x1_w_func(data_in) + data_o1, data_o2 = paddle.split( + c1x1_w, num_or_sections=[self.num_1x1_c, 2 * self.inc], axis=1) + else: + data_o1 = input[0] + data_o2 = input[1] + + c1x1_a = self.c1x1_a_func(data_in) + c3x3_b = self.c3x3_b_func(c1x1_a) + c1x1_c = self.c1x1_c_func(c3x3_b) + + c1x1_c1, c1x1_c2 = paddle.split( + c1x1_c, num_or_sections=[self.num_1x1_c, self.inc], axis=1) + + # OUTPUTS + summ = paddle.add(x=data_o1, y=c1x1_c1) + dense = paddle.concat([data_o2, c1x1_c2], axis=1) + # tensor, channels + return [summ, dense] + + +class DPN(nn.Layer): + def __init__(self, layers=68, class_num=1000): + super(DPN, self).__init__() + + self._class_num = class_num + + args = self.get_net_args(layers) + bws = args['bw'] + inc_sec = args['inc_sec'] + rs = args['r'] + k_r = args['k_r'] + k_sec = args['k_sec'] + G = args['G'] + init_num_filter = args['init_num_filter'] + init_filter_size = args['init_filter_size'] + init_padding = args['init_padding'] + + self.k_sec = k_sec + + self.conv1_x_1_func = ConvBNLayer( + num_channels=3, + num_filters=init_num_filter, + filter_size=init_filter_size, + stride=2, + pad=init_padding, + act='relu', + name="conv1") + + self.pool2d_max = MaxPool2D(kernel_size=3, stride=2, padding=1) + + num_channel_dpn = init_num_filter + + self.dpn_func_list = [] + #conv2 - conv5 + match_list, num = [], 0 + for gc in range(4): + bw = bws[gc] + inc = inc_sec[gc] + R = (k_r * bw) // rs[gc] + if gc == 0: + _type1 = 'proj' + _type2 = 'normal' + match = 1 + else: + _type1 = 'down' + _type2 = 'normal' + match = match + k_sec[gc - 1] + match_list.append(match) + self.dpn_func_list.append( + self.add_sublayer( + "dpn{}".format(match), + DualPathFactory( + num_channels=num_channel_dpn, + num_1x1_a=R, + num_3x3_b=R, + num_1x1_c=bw, + inc=inc, + G=G, + _type=_type1, + name="dpn" + str(match)))) + num_channel_dpn = [bw, 3 * inc] + + for i_ly in range(2, k_sec[gc] + 1): + num += 1 + if num in match_list: + num += 1 + self.dpn_func_list.append( + self.add_sublayer( + "dpn{}".format(num), + DualPathFactory( + num_channels=num_channel_dpn, + num_1x1_a=R, + num_3x3_b=R, + num_1x1_c=bw, + inc=inc, + G=G, + _type=_type2, + name="dpn" + str(num)))) + + num_channel_dpn = [ + num_channel_dpn[0], num_channel_dpn[1] + inc + ] + + out_channel = sum(num_channel_dpn) + + self.conv5_x_x_bn = BatchNorm( + num_channels=sum(num_channel_dpn), + act="relu", + param_attr=ParamAttr(name='final_concat_bn_scale'), + bias_attr=ParamAttr('final_concat_bn_offset'), + moving_mean_name='final_concat_bn_mean', + moving_variance_name='final_concat_bn_variance') + + self.pool2d_avg = AdaptiveAvgPool2D(1) + + stdv = 0.01 + + self.out = Linear( + out_channel, + class_num, + weight_attr=ParamAttr( + initializer=Uniform(-stdv, stdv), name="fc_weights"), + bias_attr=ParamAttr(name="fc_offset")) + + def forward(self, input): + conv1_x_1 = self.conv1_x_1_func(input) + convX_x_x = self.pool2d_max(conv1_x_1) + + dpn_idx = 0 + for gc in range(4): + convX_x_x = self.dpn_func_list[dpn_idx](convX_x_x) + dpn_idx += 1 + for i_ly in range(2, self.k_sec[gc] + 1): + convX_x_x = self.dpn_func_list[dpn_idx](convX_x_x) + dpn_idx += 1 + + conv5_x_x = paddle.concat(convX_x_x, axis=1) + conv5_x_x = self.conv5_x_x_bn(conv5_x_x) + + y = self.pool2d_avg(conv5_x_x) + y = paddle.flatten(y, start_axis=1, stop_axis=-1) + y = self.out(y) + return y + + def get_net_args(self, layers): + if layers == 68: + k_r = 128 + G = 32 + k_sec = [3, 4, 12, 3] + inc_sec = [16, 32, 32, 64] + bw = [64, 128, 256, 512] + r = [64, 64, 64, 64] + init_num_filter = 10 + init_filter_size = 3 + init_padding = 1 + elif layers == 92: + k_r = 96 + G = 32 + k_sec = [3, 4, 20, 3] + inc_sec = [16, 32, 24, 128] + bw = [256, 512, 1024, 2048] + r = [256, 256, 256, 256] + init_num_filter = 64 + init_filter_size = 7 + init_padding = 3 + elif layers == 98: + k_r = 160 + G = 40 + k_sec = [3, 6, 20, 3] + inc_sec = [16, 32, 32, 128] + bw = [256, 512, 1024, 2048] + r = [256, 256, 256, 256] + init_num_filter = 96 + init_filter_size = 7 + init_padding = 3 + elif layers == 107: + k_r = 200 + G = 50 + k_sec = [4, 8, 20, 3] + inc_sec = [20, 64, 64, 128] + bw = [256, 512, 1024, 2048] + r = [256, 256, 256, 256] + init_num_filter = 128 + init_filter_size = 7 + init_padding = 3 + elif layers == 131: + k_r = 160 + G = 40 + k_sec = [4, 8, 28, 3] + inc_sec = [16, 32, 32, 128] + bw = [256, 512, 1024, 2048] + r = [256, 256, 256, 256] + init_num_filter = 128 + init_filter_size = 7 + init_padding = 3 + else: + raise NotImplementedError + net_arg = { + 'k_r': k_r, + 'G': G, + 'k_sec': k_sec, + 'inc_sec': inc_sec, + 'bw': bw, + 'r': r + } + net_arg['init_num_filter'] = init_num_filter + net_arg['init_filter_size'] = init_filter_size + net_arg['init_padding'] = init_padding + + return net_arg + + +def _load_pretrained(pretrained, model, model_url, use_ssld=False): + if pretrained is False: + pass + elif pretrained is True: + load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld) + elif isinstance(pretrained, str): + load_dygraph_pretrain(model, pretrained) + else: + raise RuntimeError( + "pretrained type is not available. Please use `string` or `boolean` type." + ) + + +def DPN68(pretrained=False, use_ssld=False, **kwargs): + model = DPN(layers=68, **kwargs) + _load_pretrained(pretrained, model, MODEL_URLS["DPN68"]) + return model + + +def DPN92(pretrained=False, use_ssld=False, **kwargs): + model = DPN(layers=92, **kwargs) + _load_pretrained(pretrained, model, MODEL_URLS["DPN92"]) + return model + + +def DPN98(pretrained=False, use_ssld=False, **kwargs): + model = DPN(layers=98, **kwargs) + _load_pretrained(pretrained, model, MODEL_URLS["DPN98"]) + return model + + +def DPN107(pretrained=False, use_ssld=False, **kwargs): + model = DPN(layers=107, **kwargs) + _load_pretrained(pretrained, model, MODEL_URLS["DPN107"]) + return model + + +def DPN131(pretrained=False, use_ssld=False, **kwargs): + model = DPN(layers=131, **kwargs) + _load_pretrained(pretrained, model, MODEL_URLS["DPN131"]) + return model diff --git a/src/PaddleClas/ppcls/arch/backbone/model_zoo/efficientnet.py b/src/PaddleClas/ppcls/arch/backbone/model_zoo/efficientnet.py new file mode 100644 index 0000000..bd0cffa --- /dev/null +++ b/src/PaddleClas/ppcls/arch/backbone/model_zoo/efficientnet.py @@ -0,0 +1,976 @@ +# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Code was based on https://github.com/lukemelas/EfficientNet-PyTorch + +import paddle +from paddle import ParamAttr +import paddle.nn as nn +import paddle.nn.functional as F +from paddle.nn import Conv2D, BatchNorm, Linear, Dropout +from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D +import math +import collections +import re +import copy + +from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url + +MODEL_URLS = { + "EfficientNetB0_small": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/EfficientNetB0_small_pretrained.pdparams", + "EfficientNetB0": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/EfficientNetB0_pretrained.pdparams", + "EfficientNetB1": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/EfficientNetB1_pretrained.pdparams", + "EfficientNetB2": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/EfficientNetB2_pretrained.pdparams", + "EfficientNetB3": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/EfficientNetB3_pretrained.pdparams", + "EfficientNetB4": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/EfficientNetB4_pretrained.pdparams", + "EfficientNetB5": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/EfficientNetB5_pretrained.pdparams", + "EfficientNetB6": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/EfficientNetB6_pretrained.pdparams", + "EfficientNetB7": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/EfficientNetB7_pretrained.pdparams", +} + +__all__ = list(MODEL_URLS.keys()) + +GlobalParams = collections.namedtuple('GlobalParams', [ + 'batch_norm_momentum', + 'batch_norm_epsilon', + 'dropout_rate', + 'num_classes', + 'width_coefficient', + 'depth_coefficient', + 'depth_divisor', + 'min_depth', + 'drop_connect_rate', +]) + +BlockArgs = collections.namedtuple('BlockArgs', [ + 'kernel_size', 'num_repeat', 'input_filters', 'output_filters', + 'expand_ratio', 'id_skip', 'stride', 'se_ratio' +]) + +GlobalParams.__new__.__defaults__ = (None, ) * len(GlobalParams._fields) +BlockArgs.__new__.__defaults__ = (None, ) * len(BlockArgs._fields) + + +def efficientnet_params(model_name): + """ Map EfficientNet model name to parameter coefficients. """ + params_dict = { + # Coefficients: width,depth,resolution,dropout + 'efficientnet-b0': (1.0, 1.0, 224, 0.2), + 'efficientnet-b1': (1.0, 1.1, 240, 0.2), + 'efficientnet-b2': (1.1, 1.2, 260, 0.3), + 'efficientnet-b3': (1.2, 1.4, 300, 0.3), + 'efficientnet-b4': (1.4, 1.8, 380, 0.4), + 'efficientnet-b5': (1.6, 2.2, 456, 0.4), + 'efficientnet-b6': (1.8, 2.6, 528, 0.5), + 'efficientnet-b7': (2.0, 3.1, 600, 0.5), + } + return params_dict[model_name] + + +def efficientnet(width_coefficient=None, + depth_coefficient=None, + dropout_rate=0.2, + drop_connect_rate=0.2): + """ Get block arguments according to parameter and coefficients. """ + blocks_args = [ + 'r1_k3_s11_e1_i32_o16_se0.25', + 'r2_k3_s22_e6_i16_o24_se0.25', + 'r2_k5_s22_e6_i24_o40_se0.25', + 'r3_k3_s22_e6_i40_o80_se0.25', + 'r3_k5_s11_e6_i80_o112_se0.25', + 'r4_k5_s22_e6_i112_o192_se0.25', + 'r1_k3_s11_e6_i192_o320_se0.25', + ] + blocks_args = BlockDecoder.decode(blocks_args) + + global_params = GlobalParams( + batch_norm_momentum=0.99, + batch_norm_epsilon=1e-3, + dropout_rate=dropout_rate, + drop_connect_rate=drop_connect_rate, + num_classes=1000, + width_coefficient=width_coefficient, + depth_coefficient=depth_coefficient, + depth_divisor=8, + min_depth=None) + + return blocks_args, global_params + + +def get_model_params(model_name, override_params): + """ Get the block args and global params for a given model """ + if model_name.startswith('efficientnet'): + w, d, _, p = efficientnet_params(model_name) + blocks_args, global_params = efficientnet( + width_coefficient=w, depth_coefficient=d, dropout_rate=p) + else: + raise NotImplementedError('model name is not pre-defined: %s' % + model_name) + if override_params: + global_params = global_params._replace(**override_params) + return blocks_args, global_params + + +def round_filters(filters, global_params): + """ Calculate and round number of filters based on depth multiplier. """ + multiplier = global_params.width_coefficient + if not multiplier: + return filters + divisor = global_params.depth_divisor + min_depth = global_params.min_depth + filters *= multiplier + min_depth = min_depth or divisor + new_filters = max(min_depth, + int(filters + divisor / 2) // divisor * divisor) + if new_filters < 0.9 * filters: # prevent rounding by more than 10% + new_filters += divisor + return int(new_filters) + + +def round_repeats(repeats, global_params): + """ Round number of filters based on depth multiplier. """ + multiplier = global_params.depth_coefficient + if not multiplier: + return repeats + return int(math.ceil(multiplier * repeats)) + + +class BlockDecoder(object): + """ + Block Decoder, straight from the official TensorFlow repository. + """ + + @staticmethod + def _decode_block_string(block_string): + """ Gets a block through a string notation of arguments. """ + assert isinstance(block_string, str) + + ops = block_string.split('_') + options = {} + for op in ops: + splits = re.split(r'(\d.*)', op) + if len(splits) >= 2: + key, value = splits[:2] + options[key] = value + + # Check stride + cond_1 = ('s' in options and len(options['s']) == 1) + cond_2 = ((len(options['s']) == 2) and + (options['s'][0] == options['s'][1])) + assert (cond_1 or cond_2) + + return BlockArgs( + kernel_size=int(options['k']), + num_repeat=int(options['r']), + input_filters=int(options['i']), + output_filters=int(options['o']), + expand_ratio=int(options['e']), + id_skip=('noskip' not in block_string), + se_ratio=float(options['se']) if 'se' in options else None, + stride=[int(options['s'][0])]) + + @staticmethod + def _encode_block_string(block): + """Encodes a block to a string.""" + args = [ + 'r%d' % block.num_repeat, 'k%d' % block.kernel_size, 's%d%d' % + (block.strides[0], block.strides[1]), 'e%s' % block.expand_ratio, + 'i%d' % block.input_filters, 'o%d' % block.output_filters + ] + if 0 < block.se_ratio <= 1: + args.append('se%s' % block.se_ratio) + if block.id_skip is False: + args.append('noskip') + return '_'.join(args) + + @staticmethod + def decode(string_list): + """ + Decode a list of string notations to specify blocks in the network. + + string_list: list of strings, each string is a notation of block + return + list of BlockArgs namedtuples of block args + """ + assert isinstance(string_list, list) + blocks_args = [] + for block_string in string_list: + blocks_args.append(BlockDecoder._decode_block_string(block_string)) + return blocks_args + + @staticmethod + def encode(blocks_args): + """ + Encodes a list of BlockArgs to a list of strings. + + :param blocks_args: a list of BlockArgs namedtuples of block args + :return: a list of strings, each string is a notation of block + """ + block_strings = [] + for block in blocks_args: + block_strings.append(BlockDecoder._encode_block_string(block)) + return block_strings + + +def initial_type(name, use_bias=False): + param_attr = ParamAttr(name=name + "_weights") + if use_bias: + bias_attr = ParamAttr(name=name + "_offset") + else: + bias_attr = False + return param_attr, bias_attr + + +def init_batch_norm_layer(name="batch_norm"): + param_attr = ParamAttr(name=name + "_scale") + bias_attr = ParamAttr(name=name + "_offset") + return param_attr, bias_attr + + +def init_fc_layer(name="fc"): + param_attr = ParamAttr(name=name + "_weights") + bias_attr = ParamAttr(name=name + "_offset") + return param_attr, bias_attr + + +def cal_padding(img_size, stride, filter_size, dilation=1): + """Calculate padding size.""" + if img_size % stride == 0: + out_size = max(filter_size - stride, 0) + else: + out_size = max(filter_size - (img_size % stride), 0) + return out_size // 2, out_size - out_size // 2 + + +inp_shape = { + "b0_small": [224, 112, 112, 56, 28, 14, 14, 7], + "b0": [224, 112, 112, 56, 28, 14, 14, 7], + "b1": [240, 120, 120, 60, 30, 15, 15, 8], + "b2": [260, 130, 130, 65, 33, 17, 17, 9], + "b3": [300, 150, 150, 75, 38, 19, 19, 10], + "b4": [380, 190, 190, 95, 48, 24, 24, 12], + "b5": [456, 228, 228, 114, 57, 29, 29, 15], + "b6": [528, 264, 264, 132, 66, 33, 33, 17], + "b7": [600, 300, 300, 150, 75, 38, 38, 19] +} + + +def _drop_connect(inputs, prob, is_test): + if is_test: + output = inputs + else: + keep_prob = 1.0 - prob + inputs_shape = paddle.shape(inputs) + random_tensor = keep_prob + paddle.rand( + shape=[inputs_shape[0], 1, 1, 1]) + binary_tensor = paddle.floor(random_tensor) + output = paddle.multiply(inputs, binary_tensor) / keep_prob + return output + + +class Conv2ds(nn.Layer): + def __init__(self, + input_channels, + output_channels, + filter_size, + stride=1, + padding=0, + groups=None, + name="conv2d", + act=None, + use_bias=False, + padding_type=None, + model_name=None, + cur_stage=None): + super(Conv2ds, self).__init__() + assert act in [None, "swish", "sigmoid"] + self.act = act + + param_attr, bias_attr = initial_type(name=name, use_bias=use_bias) + + def get_padding(filter_size, stride=1, dilation=1): + padding = ((stride - 1) + dilation * (filter_size - 1)) // 2 + return padding + + inps = 1 if model_name == None and cur_stage == None else inp_shape[ + model_name][cur_stage] + self.need_crop = False + if padding_type == "SAME": + top_padding, bottom_padding = cal_padding(inps, stride, + filter_size) + left_padding, right_padding = cal_padding(inps, stride, + filter_size) + height_padding = bottom_padding + width_padding = right_padding + if top_padding != bottom_padding or left_padding != right_padding: + height_padding = top_padding + stride + width_padding = left_padding + stride + self.need_crop = True + padding = [height_padding, width_padding] + elif padding_type == "VALID": + height_padding = 0 + width_padding = 0 + padding = [height_padding, width_padding] + elif padding_type == "DYNAMIC": + padding = get_padding(filter_size, stride) + else: + padding = padding_type + + groups = 1 if groups is None else groups + self._conv = Conv2D( + input_channels, + output_channels, + filter_size, + groups=groups, + stride=stride, + # act=act, + padding=padding, + weight_attr=param_attr, + bias_attr=bias_attr) + + def forward(self, inputs): + x = self._conv(inputs) + if self.act == "swish": + x = F.swish(x) + elif self.act == "sigmoid": + x = F.sigmoid(x) + + if self.need_crop: + x = x[:, :, 1:, 1:] + return x + + +class ConvBNLayer(nn.Layer): + def __init__(self, + input_channels, + filter_size, + output_channels, + stride=1, + num_groups=1, + padding_type="SAME", + conv_act=None, + bn_act="swish", + use_bn=True, + use_bias=False, + name=None, + conv_name=None, + bn_name=None, + model_name=None, + cur_stage=None): + super(ConvBNLayer, self).__init__() + + self._conv = Conv2ds( + input_channels=input_channels, + output_channels=output_channels, + filter_size=filter_size, + stride=stride, + groups=num_groups, + act=conv_act, + padding_type=padding_type, + name=conv_name, + use_bias=use_bias, + model_name=model_name, + cur_stage=cur_stage) + self.use_bn = use_bn + if use_bn is True: + bn_name = name + bn_name + param_attr, bias_attr = init_batch_norm_layer(bn_name) + + self._bn = BatchNorm( + num_channels=output_channels, + act=bn_act, + momentum=0.99, + epsilon=0.001, + moving_mean_name=bn_name + "_mean", + moving_variance_name=bn_name + "_variance", + param_attr=param_attr, + bias_attr=bias_attr) + + def forward(self, inputs): + if self.use_bn: + x = self._conv(inputs) + x = self._bn(x) + return x + else: + return self._conv(inputs) + + +class ExpandConvNorm(nn.Layer): + def __init__(self, + input_channels, + block_args, + padding_type, + name=None, + model_name=None, + cur_stage=None): + super(ExpandConvNorm, self).__init__() + + self.oup = block_args.input_filters * block_args.expand_ratio + self.expand_ratio = block_args.expand_ratio + + if self.expand_ratio != 1: + self._conv = ConvBNLayer( + input_channels, + 1, + self.oup, + bn_act=None, + padding_type=padding_type, + name=name, + conv_name=name + "_expand_conv", + bn_name="_bn0", + model_name=model_name, + cur_stage=cur_stage) + + def forward(self, inputs): + if self.expand_ratio != 1: + return self._conv(inputs) + else: + return inputs + + +class DepthwiseConvNorm(nn.Layer): + def __init__(self, + input_channels, + block_args, + padding_type, + name=None, + model_name=None, + cur_stage=None): + super(DepthwiseConvNorm, self).__init__() + + self.k = block_args.kernel_size + self.s = block_args.stride + if isinstance(self.s, list) or isinstance(self.s, tuple): + self.s = self.s[0] + oup = block_args.input_filters * block_args.expand_ratio + + self._conv = ConvBNLayer( + input_channels, + self.k, + oup, + self.s, + num_groups=input_channels, + bn_act=None, + padding_type=padding_type, + name=name, + conv_name=name + "_depthwise_conv", + bn_name="_bn1", + model_name=model_name, + cur_stage=cur_stage) + + def forward(self, inputs): + return self._conv(inputs) + + +class ProjectConvNorm(nn.Layer): + def __init__(self, + input_channels, + block_args, + padding_type, + name=None, + model_name=None, + cur_stage=None): + super(ProjectConvNorm, self).__init__() + + final_oup = block_args.output_filters + + self._conv = ConvBNLayer( + input_channels, + 1, + final_oup, + bn_act=None, + padding_type=padding_type, + name=name, + conv_name=name + "_project_conv", + bn_name="_bn2", + model_name=model_name, + cur_stage=cur_stage) + + def forward(self, inputs): + return self._conv(inputs) + + +class SEBlock(nn.Layer): + def __init__(self, + input_channels, + num_squeezed_channels, + oup, + padding_type, + name=None, + model_name=None, + cur_stage=None): + super(SEBlock, self).__init__() + + self._pool = AdaptiveAvgPool2D(1) + self._conv1 = Conv2ds( + input_channels, + num_squeezed_channels, + 1, + use_bias=True, + padding_type=padding_type, + act="swish", + name=name + "_se_reduce") + + self._conv2 = Conv2ds( + num_squeezed_channels, + oup, + 1, + act="sigmoid", + use_bias=True, + padding_type=padding_type, + name=name + "_se_expand") + + def forward(self, inputs): + x = self._pool(inputs) + x = self._conv1(x) + x = self._conv2(x) + out = paddle.multiply(inputs, x) + return out + + +class MbConvBlock(nn.Layer): + def __init__(self, + input_channels, + block_args, + padding_type, + use_se, + name=None, + drop_connect_rate=None, + model_name=None, + cur_stage=None): + super(MbConvBlock, self).__init__() + + oup = block_args.input_filters * block_args.expand_ratio + self.block_args = block_args + self.has_se = use_se and (block_args.se_ratio is not None) and ( + 0 < block_args.se_ratio <= 1) + self.id_skip = block_args.id_skip + self.expand_ratio = block_args.expand_ratio + self.drop_connect_rate = drop_connect_rate + + if self.expand_ratio != 1: + self._ecn = ExpandConvNorm( + input_channels, + block_args, + padding_type=padding_type, + name=name, + model_name=model_name, + cur_stage=cur_stage) + + self._dcn = DepthwiseConvNorm( + input_channels * block_args.expand_ratio, + block_args, + padding_type=padding_type, + name=name, + model_name=model_name, + cur_stage=cur_stage) + + if self.has_se: + num_squeezed_channels = max( + 1, int(block_args.input_filters * block_args.se_ratio)) + self._se = SEBlock( + input_channels * block_args.expand_ratio, + num_squeezed_channels, + oup, + padding_type=padding_type, + name=name, + model_name=model_name, + cur_stage=cur_stage) + + self._pcn = ProjectConvNorm( + input_channels * block_args.expand_ratio, + block_args, + padding_type=padding_type, + name=name, + model_name=model_name, + cur_stage=cur_stage) + + def forward(self, inputs): + x = inputs + if self.expand_ratio != 1: + x = self._ecn(x) + x = F.swish(x) + + x = self._dcn(x) + x = F.swish(x) + if self.has_se: + x = self._se(x) + x = self._pcn(x) + + if self.id_skip and \ + self.block_args.stride == 1 and \ + self.block_args.input_filters == self.block_args.output_filters: + if self.drop_connect_rate: + x = _drop_connect(x, self.drop_connect_rate, not self.training) + x = paddle.add(x, inputs) + return x + + +class ConvStemNorm(nn.Layer): + def __init__(self, + input_channels, + padding_type, + _global_params, + name=None, + model_name=None, + cur_stage=None): + super(ConvStemNorm, self).__init__() + + output_channels = round_filters(32, _global_params) + self._conv = ConvBNLayer( + input_channels, + filter_size=3, + output_channels=output_channels, + stride=2, + bn_act=None, + padding_type=padding_type, + name="", + conv_name="_conv_stem", + bn_name="_bn0", + model_name=model_name, + cur_stage=cur_stage) + + def forward(self, inputs): + return self._conv(inputs) + + +class ExtractFeatures(nn.Layer): + def __init__(self, + input_channels, + _block_args, + _global_params, + padding_type, + use_se, + model_name=None): + super(ExtractFeatures, self).__init__() + + self._global_params = _global_params + + self._conv_stem = ConvStemNorm( + input_channels, + padding_type=padding_type, + _global_params=_global_params, + model_name=model_name, + cur_stage=0) + + self.block_args_copy = copy.deepcopy(_block_args) + idx = 0 + block_size = 0 + for block_arg in self.block_args_copy: + block_arg = block_arg._replace( + input_filters=round_filters(block_arg.input_filters, + _global_params), + output_filters=round_filters(block_arg.output_filters, + _global_params), + num_repeat=round_repeats(block_arg.num_repeat, _global_params)) + block_size += 1 + for _ in range(block_arg.num_repeat - 1): + block_size += 1 + + self.conv_seq = [] + cur_stage = 1 + for block_args in _block_args: + block_args = block_args._replace( + input_filters=round_filters(block_args.input_filters, + _global_params), + output_filters=round_filters(block_args.output_filters, + _global_params), + num_repeat=round_repeats(block_args.num_repeat, + _global_params)) + + drop_connect_rate = self._global_params.drop_connect_rate + if drop_connect_rate: + drop_connect_rate *= float(idx) / block_size + + _mc_block = self.add_sublayer( + "_blocks." + str(idx) + ".", + MbConvBlock( + block_args.input_filters, + block_args=block_args, + padding_type=padding_type, + use_se=use_se, + name="_blocks." + str(idx) + ".", + drop_connect_rate=drop_connect_rate, + model_name=model_name, + cur_stage=cur_stage)) + self.conv_seq.append(_mc_block) + idx += 1 + if block_args.num_repeat > 1: + block_args = block_args._replace( + input_filters=block_args.output_filters, stride=1) + for _ in range(block_args.num_repeat - 1): + drop_connect_rate = self._global_params.drop_connect_rate + if drop_connect_rate: + drop_connect_rate *= float(idx) / block_size + _mc_block = self.add_sublayer( + "block." + str(idx) + ".", + MbConvBlock( + block_args.input_filters, + block_args, + padding_type=padding_type, + use_se=use_se, + name="_blocks." + str(idx) + ".", + drop_connect_rate=drop_connect_rate, + model_name=model_name, + cur_stage=cur_stage)) + self.conv_seq.append(_mc_block) + idx += 1 + cur_stage += 1 + + def forward(self, inputs): + x = self._conv_stem(inputs) + x = F.swish(x) + for _mc_block in self.conv_seq: + x = _mc_block(x) + return x + + +class EfficientNet(nn.Layer): + def __init__(self, + name="b0", + padding_type="SAME", + override_params=None, + use_se=True, + class_num=1000): + super(EfficientNet, self).__init__() + + model_name = 'efficientnet-' + name + self.name = name + self._block_args, self._global_params = get_model_params( + model_name, override_params) + self.padding_type = padding_type + self.use_se = use_se + + self._ef = ExtractFeatures( + 3, + self._block_args, + self._global_params, + self.padding_type, + self.use_se, + model_name=self.name) + + output_channels = round_filters(1280, self._global_params) + if name == "b0_small" or name == "b0" or name == "b1": + oup = 320 + elif name == "b2": + oup = 352 + elif name == "b3": + oup = 384 + elif name == "b4": + oup = 448 + elif name == "b5": + oup = 512 + elif name == "b6": + oup = 576 + elif name == "b7": + oup = 640 + self._conv = ConvBNLayer( + oup, + 1, + output_channels, + bn_act="swish", + padding_type=self.padding_type, + name="", + conv_name="_conv_head", + bn_name="_bn1", + model_name=self.name, + cur_stage=7) + self._pool = AdaptiveAvgPool2D(1) + + if self._global_params.dropout_rate: + self._drop = Dropout( + p=self._global_params.dropout_rate, mode="upscale_in_train") + + param_attr, bias_attr = init_fc_layer("_fc") + self._fc = Linear( + output_channels, + class_num, + weight_attr=param_attr, + bias_attr=bias_attr) + + def forward(self, inputs): + x = self._ef(inputs) + x = self._conv(x) + x = self._pool(x) + if self._global_params.dropout_rate: + x = self._drop(x) + x = paddle.squeeze(x, axis=[2, 3]) + x = self._fc(x) + return x + + +def _load_pretrained(pretrained, model, model_url, use_ssld=False): + if pretrained is False: + pass + elif pretrained is True: + load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld) + elif isinstance(pretrained, str): + load_dygraph_pretrain(model, pretrained) + else: + raise RuntimeError( + "pretrained type is not available. Please use `string` or `boolean` type." + ) + + +def EfficientNetB0_small(padding_type='DYNAMIC', + override_params=None, + use_se=False, + pretrained=False, + use_ssld=False, + **kwargs): + model = EfficientNet( + name='b0', + padding_type=padding_type, + override_params=override_params, + use_se=use_se, + **kwargs) + _load_pretrained(pretrained, model, MODEL_URLS["EfficientNetB0_small"]) + return model + + +def EfficientNetB0(padding_type='SAME', + override_params=None, + use_se=True, + pretrained=False, + use_ssld=False, + **kwargs): + model = EfficientNet( + name='b0', + padding_type=padding_type, + override_params=override_params, + use_se=use_se, + **kwargs) + _load_pretrained(pretrained, model, MODEL_URLS["EfficientNetB0"]) + return model + + +def EfficientNetB1(padding_type='SAME', + override_params=None, + use_se=True, + pretrained=False, + use_ssld=False, + **kwargs): + model = EfficientNet( + name='b1', + padding_type=padding_type, + override_params=override_params, + use_se=use_se, + **kwargs) + _load_pretrained(pretrained, model, MODEL_URLS["EfficientNetB1"]) + return model + + +def EfficientNetB2(padding_type='SAME', + override_params=None, + use_se=True, + pretrained=False, + use_ssld=False, + **kwargs): + model = EfficientNet( + name='b2', + padding_type=padding_type, + override_params=override_params, + use_se=use_se, + **kwargs) + _load_pretrained(pretrained, model, MODEL_URLS["EfficientNetB2"]) + return model + + +def EfficientNetB3(padding_type='SAME', + override_params=None, + use_se=True, + pretrained=False, + use_ssld=False, + **kwargs): + model = EfficientNet( + name='b3', + padding_type=padding_type, + override_params=override_params, + use_se=use_se, + **kwargs) + _load_pretrained(pretrained, model, MODEL_URLS["EfficientNetB3"]) + return model + + +def EfficientNetB4(padding_type='SAME', + override_params=None, + use_se=True, + pretrained=False, + use_ssld=False, + **kwargs): + model = EfficientNet( + name='b4', + padding_type=padding_type, + override_params=override_params, + use_se=use_se, + **kwargs) + _load_pretrained(pretrained, model, MODEL_URLS["EfficientNetB4"]) + return model + + +def EfficientNetB5(padding_type='SAME', + override_params=None, + use_se=True, + pretrained=False, + use_ssld=False, + **kwargs): + model = EfficientNet( + name='b5', + padding_type=padding_type, + override_params=override_params, + use_se=use_se, + **kwargs) + _load_pretrained(pretrained, model, MODEL_URLS["EfficientNetB5"]) + return model + + +def EfficientNetB6(padding_type='SAME', + override_params=None, + use_se=True, + pretrained=False, + use_ssld=False, + **kwargs): + model = EfficientNet( + name='b6', + padding_type=padding_type, + override_params=override_params, + use_se=use_se, + **kwargs) + _load_pretrained(pretrained, model, MODEL_URLS["EfficientNetB6"]) + return model + + +def EfficientNetB7(padding_type='SAME', + override_params=None, + use_se=True, + pretrained=False, + use_ssld=False, + **kwargs): + model = EfficientNet( + name='b7', + padding_type=padding_type, + override_params=override_params, + use_se=use_se, + **kwargs) + _load_pretrained(pretrained, model, MODEL_URLS["EfficientNetB7"]) + return model diff --git a/src/PaddleClas/ppcls/arch/backbone/model_zoo/ghostnet.py b/src/PaddleClas/ppcls/arch/backbone/model_zoo/ghostnet.py new file mode 100644 index 0000000..4d338c1 --- /dev/null +++ b/src/PaddleClas/ppcls/arch/backbone/model_zoo/ghostnet.py @@ -0,0 +1,363 @@ +# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Code was based on https://github.com/huawei-noah/CV-Backbones/tree/master/ghostnet_pytorch + +import math +import paddle +from paddle import ParamAttr +import paddle.nn as nn +import paddle.nn.functional as F +from paddle.nn import Conv2D, BatchNorm, AdaptiveAvgPool2D, Linear +from paddle.regularizer import L2Decay +from paddle.nn.initializer import Uniform, KaimingNormal + +from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url + +MODEL_URLS = { + "GhostNet_x0_5": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/GhostNet_x0_5_pretrained.pdparams", + "GhostNet_x1_0": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/GhostNet_x1_0_pretrained.pdparams", + "GhostNet_x1_3": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/GhostNet_x1_3_pretrained.pdparams", +} + +__all__ = list(MODEL_URLS.keys()) + + +class ConvBNLayer(nn.Layer): + def __init__(self, + in_channels, + out_channels, + kernel_size, + stride=1, + groups=1, + act="relu", + name=None): + super(ConvBNLayer, self).__init__() + self._conv = Conv2D( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=kernel_size, + stride=stride, + padding=(kernel_size - 1) // 2, + groups=groups, + weight_attr=ParamAttr( + initializer=KaimingNormal(), name=name + "_weights"), + bias_attr=False) + bn_name = name + "_bn" + + self._batch_norm = BatchNorm( + num_channels=out_channels, + act=act, + param_attr=ParamAttr( + name=bn_name + "_scale", regularizer=L2Decay(0.0)), + bias_attr=ParamAttr( + name=bn_name + "_offset", regularizer=L2Decay(0.0)), + moving_mean_name=bn_name + "_mean", + moving_variance_name=bn_name + "_variance") + + def forward(self, inputs): + y = self._conv(inputs) + y = self._batch_norm(y) + return y + + +class SEBlock(nn.Layer): + def __init__(self, num_channels, reduction_ratio=4, name=None): + super(SEBlock, self).__init__() + self.pool2d_gap = AdaptiveAvgPool2D(1) + self._num_channels = num_channels + stdv = 1.0 / math.sqrt(num_channels * 1.0) + med_ch = num_channels // reduction_ratio + self.squeeze = Linear( + num_channels, + med_ch, + weight_attr=ParamAttr( + initializer=Uniform(-stdv, stdv), name=name + "_1_weights"), + bias_attr=ParamAttr(name=name + "_1_offset")) + stdv = 1.0 / math.sqrt(med_ch * 1.0) + self.excitation = Linear( + med_ch, + num_channels, + weight_attr=ParamAttr( + initializer=Uniform(-stdv, stdv), name=name + "_2_weights"), + bias_attr=ParamAttr(name=name + "_2_offset")) + + def forward(self, inputs): + pool = self.pool2d_gap(inputs) + pool = paddle.squeeze(pool, axis=[2, 3]) + squeeze = self.squeeze(pool) + squeeze = F.relu(squeeze) + excitation = self.excitation(squeeze) + excitation = paddle.clip(x=excitation, min=0, max=1) + excitation = paddle.unsqueeze(excitation, axis=[2, 3]) + out = paddle.multiply(inputs, excitation) + return out + + +class GhostModule(nn.Layer): + def __init__(self, + in_channels, + output_channels, + kernel_size=1, + ratio=2, + dw_size=3, + stride=1, + relu=True, + name=None): + super(GhostModule, self).__init__() + init_channels = int(math.ceil(output_channels / ratio)) + new_channels = int(init_channels * (ratio - 1)) + self.primary_conv = ConvBNLayer( + in_channels=in_channels, + out_channels=init_channels, + kernel_size=kernel_size, + stride=stride, + groups=1, + act="relu" if relu else None, + name=name + "_primary_conv") + self.cheap_operation = ConvBNLayer( + in_channels=init_channels, + out_channels=new_channels, + kernel_size=dw_size, + stride=1, + groups=init_channels, + act="relu" if relu else None, + name=name + "_cheap_operation") + + def forward(self, inputs): + x = self.primary_conv(inputs) + y = self.cheap_operation(x) + out = paddle.concat([x, y], axis=1) + return out + + +class GhostBottleneck(nn.Layer): + def __init__(self, + in_channels, + hidden_dim, + output_channels, + kernel_size, + stride, + use_se, + name=None): + super(GhostBottleneck, self).__init__() + self._stride = stride + self._use_se = use_se + self._num_channels = in_channels + self._output_channels = output_channels + self.ghost_module_1 = GhostModule( + in_channels=in_channels, + output_channels=hidden_dim, + kernel_size=1, + stride=1, + relu=True, + name=name + "_ghost_module_1") + if stride == 2: + self.depthwise_conv = ConvBNLayer( + in_channels=hidden_dim, + out_channels=hidden_dim, + kernel_size=kernel_size, + stride=stride, + groups=hidden_dim, + act=None, + name=name + + "_depthwise_depthwise" # looks strange due to an old typo, will be fixed later. + ) + if use_se: + self.se_block = SEBlock(num_channels=hidden_dim, name=name + "_se") + self.ghost_module_2 = GhostModule( + in_channels=hidden_dim, + output_channels=output_channels, + kernel_size=1, + relu=False, + name=name + "_ghost_module_2") + if stride != 1 or in_channels != output_channels: + self.shortcut_depthwise = ConvBNLayer( + in_channels=in_channels, + out_channels=in_channels, + kernel_size=kernel_size, + stride=stride, + groups=in_channels, + act=None, + name=name + + "_shortcut_depthwise_depthwise" # looks strange due to an old typo, will be fixed later. + ) + self.shortcut_conv = ConvBNLayer( + in_channels=in_channels, + out_channels=output_channels, + kernel_size=1, + stride=1, + groups=1, + act=None, + name=name + "_shortcut_conv") + + def forward(self, inputs): + x = self.ghost_module_1(inputs) + if self._stride == 2: + x = self.depthwise_conv(x) + if self._use_se: + x = self.se_block(x) + x = self.ghost_module_2(x) + if self._stride == 1 and self._num_channels == self._output_channels: + shortcut = inputs + else: + shortcut = self.shortcut_depthwise(inputs) + shortcut = self.shortcut_conv(shortcut) + return paddle.add(x=x, y=shortcut) + + +class GhostNet(nn.Layer): + def __init__(self, scale, class_num=1000): + super(GhostNet, self).__init__() + self.cfgs = [ + # k, t, c, SE, s + [3, 16, 16, 0, 1], + [3, 48, 24, 0, 2], + [3, 72, 24, 0, 1], + [5, 72, 40, 1, 2], + [5, 120, 40, 1, 1], + [3, 240, 80, 0, 2], + [3, 200, 80, 0, 1], + [3, 184, 80, 0, 1], + [3, 184, 80, 0, 1], + [3, 480, 112, 1, 1], + [3, 672, 112, 1, 1], + [5, 672, 160, 1, 2], + [5, 960, 160, 0, 1], + [5, 960, 160, 1, 1], + [5, 960, 160, 0, 1], + [5, 960, 160, 1, 1] + ] + self.scale = scale + output_channels = int(self._make_divisible(16 * self.scale, 4)) + self.conv1 = ConvBNLayer( + in_channels=3, + out_channels=output_channels, + kernel_size=3, + stride=2, + groups=1, + act="relu", + name="conv1") + # build inverted residual blocks + idx = 0 + self.ghost_bottleneck_list = [] + for k, exp_size, c, use_se, s in self.cfgs: + in_channels = output_channels + output_channels = int(self._make_divisible(c * self.scale, 4)) + hidden_dim = int(self._make_divisible(exp_size * self.scale, 4)) + ghost_bottleneck = self.add_sublayer( + name="_ghostbottleneck_" + str(idx), + sublayer=GhostBottleneck( + in_channels=in_channels, + hidden_dim=hidden_dim, + output_channels=output_channels, + kernel_size=k, + stride=s, + use_se=use_se, + name="_ghostbottleneck_" + str(idx))) + self.ghost_bottleneck_list.append(ghost_bottleneck) + idx += 1 + # build last several layers + in_channels = output_channels + output_channels = int(self._make_divisible(exp_size * self.scale, 4)) + self.conv_last = ConvBNLayer( + in_channels=in_channels, + out_channels=output_channels, + kernel_size=1, + stride=1, + groups=1, + act="relu", + name="conv_last") + self.pool2d_gap = AdaptiveAvgPool2D(1) + in_channels = output_channels + self._fc0_output_channels = 1280 + self.fc_0 = ConvBNLayer( + in_channels=in_channels, + out_channels=self._fc0_output_channels, + kernel_size=1, + stride=1, + act="relu", + name="fc_0") + self.dropout = nn.Dropout(p=0.2) + stdv = 1.0 / math.sqrt(self._fc0_output_channels * 1.0) + self.fc_1 = Linear( + self._fc0_output_channels, + class_num, + weight_attr=ParamAttr( + name="fc_1_weights", initializer=Uniform(-stdv, stdv)), + bias_attr=ParamAttr(name="fc_1_offset")) + + def forward(self, inputs): + x = self.conv1(inputs) + for ghost_bottleneck in self.ghost_bottleneck_list: + x = ghost_bottleneck(x) + x = self.conv_last(x) + x = self.pool2d_gap(x) + x = self.fc_0(x) + x = self.dropout(x) + x = paddle.reshape(x, shape=[-1, self._fc0_output_channels]) + x = self.fc_1(x) + return x + + def _make_divisible(self, v, divisor, min_value=None): + """ + This function is taken from the original tf repo. + It ensures that all layers have a channel number that is divisible by 8 + It can be seen here: + https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py + """ + if min_value is None: + min_value = divisor + new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) + # Make sure that round down does not go down by more than 10%. + if new_v < 0.9 * v: + new_v += divisor + return new_v + + +def _load_pretrained(pretrained, model, model_url, use_ssld=False): + if pretrained is False: + pass + elif pretrained is True: + load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld) + elif isinstance(pretrained, str): + load_dygraph_pretrain(model, pretrained) + else: + raise RuntimeError( + "pretrained type is not available. Please use `string` or `boolean` type." + ) + + +def GhostNet_x0_5(pretrained=False, use_ssld=False, **kwargs): + model = GhostNet(scale=0.5, **kwargs) + _load_pretrained( + pretrained, model, MODEL_URLS["GhostNet_x0_5"], use_ssld=use_ssld) + return model + + +def GhostNet_x1_0(pretrained=False, use_ssld=False, **kwargs): + model = GhostNet(scale=1.0, **kwargs) + _load_pretrained( + pretrained, model, MODEL_URLS["GhostNet_x1_0"], use_ssld=use_ssld) + return model + + +def GhostNet_x1_3(pretrained=False, use_ssld=False, **kwargs): + model = GhostNet(scale=1.3, **kwargs) + _load_pretrained( + pretrained, model, MODEL_URLS["GhostNet_x1_3"], use_ssld=use_ssld) + return model diff --git a/src/PaddleClas/ppcls/arch/backbone/model_zoo/googlenet.py b/src/PaddleClas/ppcls/arch/backbone/model_zoo/googlenet.py new file mode 100644 index 0000000..2252842 --- /dev/null +++ b/src/PaddleClas/ppcls/arch/backbone/model_zoo/googlenet.py @@ -0,0 +1,229 @@ +import paddle +from paddle import ParamAttr +import paddle.nn as nn +import paddle.nn.functional as F +from paddle.nn import Conv2D, BatchNorm, Linear, Dropout +from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D +from paddle.nn.initializer import Uniform + +import math + +from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url + +MODEL_URLS = { + "GoogLeNet": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/GoogLeNet_pretrained.pdparams", +} + +__all__ = list(MODEL_URLS.keys()) + + +def xavier(channels, filter_size, name): + stdv = (3.0 / (filter_size**2 * channels))**0.5 + param_attr = ParamAttr( + initializer=Uniform(-stdv, stdv), name=name + "_weights") + return param_attr + + +class ConvLayer(nn.Layer): + def __init__(self, + num_channels, + num_filters, + filter_size, + stride=1, + groups=1, + act=None, + name=None): + super(ConvLayer, self).__init__() + + self._conv = Conv2D( + in_channels=num_channels, + out_channels=num_filters, + kernel_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + groups=groups, + weight_attr=ParamAttr(name=name + "_weights"), + bias_attr=False) + + def forward(self, inputs): + y = self._conv(inputs) + return y + + +class Inception(nn.Layer): + def __init__(self, + input_channels, + output_channels, + filter1, + filter3R, + filter3, + filter5R, + filter5, + proj, + name=None): + super(Inception, self).__init__() + + self._conv1 = ConvLayer( + input_channels, filter1, 1, name="inception_" + name + "_1x1") + self._conv3r = ConvLayer( + input_channels, + filter3R, + 1, + name="inception_" + name + "_3x3_reduce") + self._conv3 = ConvLayer( + filter3R, filter3, 3, name="inception_" + name + "_3x3") + self._conv5r = ConvLayer( + input_channels, + filter5R, + 1, + name="inception_" + name + "_5x5_reduce") + self._conv5 = ConvLayer( + filter5R, filter5, 5, name="inception_" + name + "_5x5") + self._pool = MaxPool2D(kernel_size=3, stride=1, padding=1) + + self._convprj = ConvLayer( + input_channels, proj, 1, name="inception_" + name + "_3x3_proj") + + def forward(self, inputs): + conv1 = self._conv1(inputs) + + conv3r = self._conv3r(inputs) + conv3 = self._conv3(conv3r) + + conv5r = self._conv5r(inputs) + conv5 = self._conv5(conv5r) + + pool = self._pool(inputs) + convprj = self._convprj(pool) + + cat = paddle.concat([conv1, conv3, conv5, convprj], axis=1) + cat = F.relu(cat) + return cat + + +class GoogLeNetDY(nn.Layer): + def __init__(self, class_num=1000): + super(GoogLeNetDY, self).__init__() + self._conv = ConvLayer(3, 64, 7, 2, name="conv1") + self._pool = MaxPool2D(kernel_size=3, stride=2) + self._conv_1 = ConvLayer(64, 64, 1, name="conv2_1x1") + self._conv_2 = ConvLayer(64, 192, 3, name="conv2_3x3") + + self._ince3a = Inception( + 192, 192, 64, 96, 128, 16, 32, 32, name="ince3a") + self._ince3b = Inception( + 256, 256, 128, 128, 192, 32, 96, 64, name="ince3b") + + self._ince4a = Inception( + 480, 480, 192, 96, 208, 16, 48, 64, name="ince4a") + self._ince4b = Inception( + 512, 512, 160, 112, 224, 24, 64, 64, name="ince4b") + self._ince4c = Inception( + 512, 512, 128, 128, 256, 24, 64, 64, name="ince4c") + self._ince4d = Inception( + 512, 512, 112, 144, 288, 32, 64, 64, name="ince4d") + self._ince4e = Inception( + 528, 528, 256, 160, 320, 32, 128, 128, name="ince4e") + + self._ince5a = Inception( + 832, 832, 256, 160, 320, 32, 128, 128, name="ince5a") + self._ince5b = Inception( + 832, 832, 384, 192, 384, 48, 128, 128, name="ince5b") + + self._pool_5 = AdaptiveAvgPool2D(1) + + self._drop = Dropout(p=0.4, mode="downscale_in_infer") + self._fc_out = Linear( + 1024, + class_num, + weight_attr=xavier(1024, 1, "out"), + bias_attr=ParamAttr(name="out_offset")) + self._pool_o1 = AvgPool2D(kernel_size=5, stride=3) + self._conv_o1 = ConvLayer(512, 128, 1, name="conv_o1") + self._fc_o1 = Linear( + 1152, + 1024, + weight_attr=xavier(2048, 1, "fc_o1"), + bias_attr=ParamAttr(name="fc_o1_offset")) + self._drop_o1 = Dropout(p=0.7, mode="downscale_in_infer") + self._out1 = Linear( + 1024, + class_num, + weight_attr=xavier(1024, 1, "out1"), + bias_attr=ParamAttr(name="out1_offset")) + self._pool_o2 = AvgPool2D(kernel_size=5, stride=3) + self._conv_o2 = ConvLayer(528, 128, 1, name="conv_o2") + self._fc_o2 = Linear( + 1152, + 1024, + weight_attr=xavier(2048, 1, "fc_o2"), + bias_attr=ParamAttr(name="fc_o2_offset")) + self._drop_o2 = Dropout(p=0.7, mode="downscale_in_infer") + self._out2 = Linear( + 1024, + class_num, + weight_attr=xavier(1024, 1, "out2"), + bias_attr=ParamAttr(name="out2_offset")) + + def forward(self, inputs): + x = self._conv(inputs) + x = self._pool(x) + x = self._conv_1(x) + x = self._conv_2(x) + x = self._pool(x) + + x = self._ince3a(x) + x = self._ince3b(x) + x = self._pool(x) + + ince4a = self._ince4a(x) + x = self._ince4b(ince4a) + x = self._ince4c(x) + ince4d = self._ince4d(x) + x = self._ince4e(ince4d) + x = self._pool(x) + + x = self._ince5a(x) + ince5b = self._ince5b(x) + + x = self._pool_5(ince5b) + x = self._drop(x) + x = paddle.squeeze(x, axis=[2, 3]) + out = self._fc_out(x) + + x = self._pool_o1(ince4a) + x = self._conv_o1(x) + x = paddle.flatten(x, start_axis=1, stop_axis=-1) + x = self._fc_o1(x) + x = F.relu(x) + x = self._drop_o1(x) + out1 = self._out1(x) + + x = self._pool_o2(ince4d) + x = self._conv_o2(x) + x = paddle.flatten(x, start_axis=1, stop_axis=-1) + x = self._fc_o2(x) + x = self._drop_o2(x) + out2 = self._out2(x) + return [out, out1, out2] + + +def _load_pretrained(pretrained, model, model_url, use_ssld=False): + if pretrained is False: + pass + elif pretrained is True: + load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld) + elif isinstance(pretrained, str): + load_dygraph_pretrain(model, pretrained) + else: + raise RuntimeError( + "pretrained type is not available. Please use `string` or `boolean` type." + ) + + +def GoogLeNet(pretrained=False, use_ssld=False, **kwargs): + model = GoogLeNetDY(**kwargs) + _load_pretrained( + pretrained, model, MODEL_URLS["GoogLeNet"], use_ssld=use_ssld) + return model diff --git a/src/PaddleClas/ppcls/arch/backbone/model_zoo/gvt.py b/src/PaddleClas/ppcls/arch/backbone/model_zoo/gvt.py new file mode 100644 index 0000000..2af7ccf --- /dev/null +++ b/src/PaddleClas/ppcls/arch/backbone/model_zoo/gvt.py @@ -0,0 +1,693 @@ +# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Code was based on https://github.com/Meituan-AutoML/Twins + +from functools import partial + +import paddle +import paddle.nn as nn +import paddle.nn.functional as F +from paddle.regularizer import L2Decay + +from .vision_transformer import trunc_normal_, normal_, zeros_, ones_, to_2tuple, DropPath, Identity, Mlp +from .vision_transformer import Block as ViTBlock + +from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url + +MODEL_URLS = { + "pcpvt_small": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/pcpvt_small_pretrained.pdparams", + "pcpvt_base": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/pcpvt_base_pretrained.pdparams", + "pcpvt_large": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/pcpvt_large_pretrained.pdparams", + "alt_gvt_small": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/alt_gvt_small_pretrained.pdparams", + "alt_gvt_base": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/alt_gvt_base_pretrained.pdparams", + "alt_gvt_large": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/alt_gvt_large_pretrained.pdparams" +} + +__all__ = list(MODEL_URLS.keys()) + + +class GroupAttention(nn.Layer): + """LSA: self attention within a group. + """ + + def __init__(self, + dim, + num_heads=8, + qkv_bias=False, + qk_scale=None, + attn_drop=0., + proj_drop=0., + ws=1): + super().__init__() + if ws == 1: + raise Exception("ws {ws} should not be 1") + if dim % num_heads != 0: + raise Exception( + "dim {dim} should be divided by num_heads {num_heads}.") + + self.dim = dim + self.num_heads = num_heads + head_dim = dim // num_heads + self.scale = qk_scale or head_dim**-0.5 + + self.qkv = nn.Linear(dim, dim * 3, bias_attr=qkv_bias) + self.attn_drop = nn.Dropout(attn_drop) + self.proj = nn.Linear(dim, dim) + self.proj_drop = nn.Dropout(proj_drop) + self.ws = ws + + def forward(self, x, H, W): + B, N, C = x.shape + h_group, w_group = H // self.ws, W // self.ws + total_groups = h_group * w_group + x = x.reshape([B, h_group, self.ws, w_group, self.ws, C]).transpose( + [0, 1, 3, 2, 4, 5]) + qkv = self.qkv(x).reshape([ + B, total_groups, self.ws**2, 3, self.num_heads, C // self.num_heads + ]).transpose([3, 0, 1, 4, 2, 5]) + q, k, v = qkv[0], qkv[1], qkv[2] + attn = paddle.matmul(q, k.transpose([0, 1, 2, 4, 3])) * self.scale + + attn = nn.Softmax(axis=-1)(attn) + attn = self.attn_drop(attn) + attn = paddle.matmul(attn, v).transpose([0, 1, 3, 2, 4]).reshape( + [B, h_group, w_group, self.ws, self.ws, C]) + + x = attn.transpose([0, 1, 3, 2, 4, 5]).reshape([B, N, C]) + x = self.proj(x) + x = self.proj_drop(x) + return x + + +class Attention(nn.Layer): + """GSA: using a key to summarize the information for a group to be efficient. + """ + + def __init__(self, + dim, + num_heads=8, + qkv_bias=False, + qk_scale=None, + attn_drop=0., + proj_drop=0., + sr_ratio=1): + super().__init__() + assert dim % num_heads == 0, f"dim {dim} should be divided by num_heads {num_heads}." + + self.dim = dim + self.num_heads = num_heads + head_dim = dim // num_heads + self.scale = qk_scale or head_dim**-0.5 + + self.q = nn.Linear(dim, dim, bias_attr=qkv_bias) + self.kv = nn.Linear(dim, dim * 2, bias_attr=qkv_bias) + self.attn_drop = nn.Dropout(attn_drop) + self.proj = nn.Linear(dim, dim) + self.proj_drop = nn.Dropout(proj_drop) + + self.sr_ratio = sr_ratio + if sr_ratio > 1: + self.sr = nn.Conv2D( + dim, dim, kernel_size=sr_ratio, stride=sr_ratio) + self.norm = nn.LayerNorm(dim) + + def forward(self, x, H, W): + B, N, C = x.shape + q = self.q(x).reshape( + [B, N, self.num_heads, C // self.num_heads]).transpose( + [0, 2, 1, 3]) + + if self.sr_ratio > 1: + x_ = x.transpose([0, 2, 1]).reshape([B, C, H, W]) + tmp_n = H * W // self.sr_ratio**2 + x_ = self.sr(x_).reshape([B, C, tmp_n]).transpose([0, 2, 1]) + x_ = self.norm(x_) + kv = self.kv(x_).reshape( + [B, tmp_n, 2, self.num_heads, C // self.num_heads]).transpose( + [2, 0, 3, 1, 4]) + else: + kv = self.kv(x).reshape( + [B, N, 2, self.num_heads, C // self.num_heads]).transpose( + [2, 0, 3, 1, 4]) + k, v = kv[0], kv[1] + + attn = paddle.matmul(q, k.transpose([0, 1, 3, 2])) * self.scale + attn = nn.Softmax(axis=-1)(attn) + attn = self.attn_drop(attn) + + x = paddle.matmul(attn, v).transpose([0, 2, 1, 3]).reshape([B, N, C]) + x = self.proj(x) + x = self.proj_drop(x) + return x + + +class Block(nn.Layer): + def __init__(self, + dim, + num_heads, + mlp_ratio=4., + qkv_bias=False, + qk_scale=None, + drop=0., + attn_drop=0., + drop_path=0., + act_layer=nn.GELU, + norm_layer=nn.LayerNorm, + sr_ratio=1): + super().__init__() + self.norm1 = norm_layer(dim) + self.attn = Attention( + dim, + num_heads=num_heads, + qkv_bias=qkv_bias, + qk_scale=qk_scale, + attn_drop=attn_drop, + proj_drop=drop, + sr_ratio=sr_ratio) + self.drop_path = DropPath(drop_path) if drop_path > 0. else Identity() + self.norm2 = norm_layer(dim) + mlp_hidden_dim = int(dim * mlp_ratio) + self.mlp = Mlp(in_features=dim, + hidden_features=mlp_hidden_dim, + act_layer=act_layer, + drop=drop) + + def forward(self, x, H, W): + x = x + self.drop_path(self.attn(self.norm1(x), H, W)) + x = x + self.drop_path(self.mlp(self.norm2(x))) + return x + + +class SBlock(ViTBlock): + def __init__(self, + dim, + num_heads, + mlp_ratio=4., + qkv_bias=False, + qk_scale=None, + drop=0., + attn_drop=0., + drop_path=0., + act_layer=nn.GELU, + norm_layer=nn.LayerNorm, + sr_ratio=1): + super().__init__(dim, num_heads, mlp_ratio, qkv_bias, qk_scale, drop, + attn_drop, drop_path, act_layer, norm_layer) + + def forward(self, x, H, W): + return super().forward(x) + + +class GroupBlock(ViTBlock): + def __init__(self, + dim, + num_heads, + mlp_ratio=4., + qkv_bias=False, + qk_scale=None, + drop=0., + attn_drop=0., + drop_path=0., + act_layer=nn.GELU, + norm_layer=nn.LayerNorm, + sr_ratio=1, + ws=1): + super().__init__(dim, num_heads, mlp_ratio, qkv_bias, qk_scale, drop, + attn_drop, drop_path, act_layer, norm_layer) + del self.attn + if ws == 1: + self.attn = Attention(dim, num_heads, qkv_bias, qk_scale, + attn_drop, drop, sr_ratio) + else: + self.attn = GroupAttention(dim, num_heads, qkv_bias, qk_scale, + attn_drop, drop, ws) + + def forward(self, x, H, W): + x = x + self.drop_path(self.attn(self.norm1(x), H, W)) + x = x + self.drop_path(self.mlp(self.norm2(x))) + return x + + +class PatchEmbed(nn.Layer): + """ Image to Patch Embedding. + """ + + def __init__(self, img_size=224, patch_size=16, in_chans=3, embed_dim=768): + super().__init__() + if img_size % patch_size != 0: + raise Exception( + f"img_size {img_size} should be divided by patch_size {patch_size}." + ) + + img_size = to_2tuple(img_size) + patch_size = to_2tuple(patch_size) + + self.img_size = img_size + self.patch_size = patch_size + self.H, self.W = img_size[0] // patch_size[0], img_size[ + 1] // patch_size[1] + self.num_patches = self.H * self.W + self.proj = nn.Conv2D( + in_chans, embed_dim, kernel_size=patch_size, stride=patch_size) + self.norm = nn.LayerNorm(embed_dim) + + def forward(self, x): + B, C, H, W = x.shape + x = self.proj(x).flatten(2).transpose([0, 2, 1]) + x = self.norm(x) + H, W = H // self.patch_size[0], W // self.patch_size[1] + return x, (H, W) + + +# borrow from PVT https://github.com/whai362/PVT.git +class PyramidVisionTransformer(nn.Layer): + def __init__(self, + img_size=224, + patch_size=16, + in_chans=3, + class_num=1000, + embed_dims=[64, 128, 256, 512], + num_heads=[1, 2, 4, 8], + mlp_ratios=[4, 4, 4, 4], + qkv_bias=False, + qk_scale=None, + drop_rate=0., + attn_drop_rate=0., + drop_path_rate=0., + norm_layer=nn.LayerNorm, + depths=[3, 4, 6, 3], + sr_ratios=[8, 4, 2, 1], + block_cls=Block): + super().__init__() + self.class_num = class_num + self.depths = depths + + # patch_embed + self.patch_embeds = nn.LayerList() + self.pos_embeds = nn.ParameterList() + self.pos_drops = nn.LayerList() + self.blocks = nn.LayerList() + + for i in range(len(depths)): + if i == 0: + self.patch_embeds.append( + PatchEmbed(img_size, patch_size, in_chans, embed_dims[i])) + else: + self.patch_embeds.append( + PatchEmbed(img_size // patch_size // 2**(i - 1), 2, + embed_dims[i - 1], embed_dims[i])) + patch_num = self.patch_embeds[i].num_patches + 1 if i == len( + embed_dims) - 1 else self.patch_embeds[i].num_patches + self.pos_embeds.append( + self.create_parameter( + shape=[1, patch_num, embed_dims[i]], + default_initializer=zeros_)) + self.pos_drops.append(nn.Dropout(p=drop_rate)) + + dpr = [ + x.numpy()[0] + for x in paddle.linspace(0, drop_path_rate, sum(depths)) + ] # stochastic depth decay rule + + cur = 0 + for k in range(len(depths)): + _block = nn.LayerList([ + block_cls( + dim=embed_dims[k], + num_heads=num_heads[k], + mlp_ratio=mlp_ratios[k], + qkv_bias=qkv_bias, + qk_scale=qk_scale, + drop=drop_rate, + attn_drop=attn_drop_rate, + drop_path=dpr[cur + i], + norm_layer=norm_layer, + sr_ratio=sr_ratios[k]) for i in range(depths[k]) + ]) + self.blocks.append(_block) + cur += depths[k] + + self.norm = norm_layer(embed_dims[-1]) + + # cls_token + self.cls_token = self.create_parameter( + shape=[1, 1, embed_dims[-1]], + default_initializer=zeros_, + attr=paddle.ParamAttr(regularizer=L2Decay(0.0))) + + # classification head + self.head = nn.Linear(embed_dims[-1], + class_num) if class_num > 0 else Identity() + + # init weights + for pos_emb in self.pos_embeds: + trunc_normal_(pos_emb) + self.apply(self._init_weights) + + def _init_weights(self, m): + if isinstance(m, nn.Linear): + trunc_normal_(m.weight) + if isinstance(m, nn.Linear) and m.bias is not None: + zeros_(m.bias) + elif isinstance(m, nn.LayerNorm): + zeros_(m.bias) + ones_(m.weight) + + def forward_features(self, x): + B = x.shape[0] + for i in range(len(self.depths)): + x, (H, W) = self.patch_embeds[i](x) + if i == len(self.depths) - 1: + cls_tokens = self.cls_token.expand([B, -1, -1]) + x = paddle.concat([cls_tokens, x], dim=1) + x = x + self.pos_embeds[i] + x = self.pos_drops[i](x) + for blk in self.blocks[i]: + x = blk(x, H, W) + if i < len(self.depths) - 1: + x = x.reshape([B, H, W, -1]).transpose( + [0, 3, 1, 2]).contiguous() + x = self.norm(x) + return x[:, 0] + + def forward(self, x): + x = self.forward_features(x) + x = self.head(x) + return x + + +# PEG from https://arxiv.org/abs/2102.10882 +class PosCNN(nn.Layer): + def __init__(self, in_chans, embed_dim=768, s=1): + super().__init__() + self.proj = nn.Sequential( + nn.Conv2D( + in_chans, + embed_dim, + 3, + s, + 1, + bias_attr=paddle.ParamAttr(regularizer=L2Decay(0.0)), + groups=embed_dim, + weight_attr=paddle.ParamAttr(regularizer=L2Decay(0.0)), )) + self.s = s + + def forward(self, x, H, W): + B, N, C = x.shape + feat_token = x + cnn_feat = feat_token.transpose([0, 2, 1]).reshape([B, C, H, W]) + if self.s == 1: + x = self.proj(cnn_feat) + cnn_feat + else: + x = self.proj(cnn_feat) + x = x.flatten(2).transpose([0, 2, 1]) + return x + + +class CPVTV2(PyramidVisionTransformer): + """ + Use useful results from CPVT. PEG and GAP. + Therefore, cls token is no longer required. + PEG is used to encode the absolute position on the fly, which greatly affects the performance when input resolution + changes during the training (such as segmentation, detection) + """ + + def __init__(self, + img_size=224, + patch_size=4, + in_chans=3, + class_num=1000, + embed_dims=[64, 128, 256, 512], + num_heads=[1, 2, 4, 8], + mlp_ratios=[4, 4, 4, 4], + qkv_bias=False, + qk_scale=None, + drop_rate=0., + attn_drop_rate=0., + drop_path_rate=0., + norm_layer=nn.LayerNorm, + depths=[3, 4, 6, 3], + sr_ratios=[8, 4, 2, 1], + block_cls=Block): + super().__init__(img_size, patch_size, in_chans, class_num, embed_dims, + num_heads, mlp_ratios, qkv_bias, qk_scale, drop_rate, + attn_drop_rate, drop_path_rate, norm_layer, depths, + sr_ratios, block_cls) + del self.pos_embeds + del self.cls_token + self.pos_block = nn.LayerList( + [PosCNN(embed_dim, embed_dim) for embed_dim in embed_dims]) + self.apply(self._init_weights) + + def _init_weights(self, m): + import math + if isinstance(m, nn.Linear): + trunc_normal_(m.weight) + if isinstance(m, nn.Linear) and m.bias is not None: + zeros_(m.bias) + elif isinstance(m, nn.LayerNorm): + zeros_(m.bias) + ones_(m.weight) + elif isinstance(m, nn.Conv2D): + fan_out = m._kernel_size[0] * m._kernel_size[1] * m._out_channels + fan_out //= m._groups + normal_(0, math.sqrt(2.0 / fan_out))(m.weight) + if m.bias is not None: + zeros_(m.bias) + elif isinstance(m, nn.BatchNorm2D): + m.weight.data.fill_(1.0) + m.bias.data.zero_() + + def forward_features(self, x): + B = x.shape[0] + + for i in range(len(self.depths)): + x, (H, W) = self.patch_embeds[i](x) + x = self.pos_drops[i](x) + + for j, blk in enumerate(self.blocks[i]): + x = blk(x, H, W) + if j == 0: + x = self.pos_block[i](x, H, W) # PEG here + + if i < len(self.depths) - 1: + x = x.reshape([B, H, W, x.shape[-1]]).transpose([0, 3, 1, 2]) + + x = self.norm(x) + return x.mean(axis=1) # GAP here + + +class PCPVT(CPVTV2): + def __init__(self, + img_size=224, + patch_size=4, + in_chans=3, + class_num=1000, + embed_dims=[64, 128, 256], + num_heads=[1, 2, 4], + mlp_ratios=[4, 4, 4], + qkv_bias=False, + qk_scale=None, + drop_rate=0., + attn_drop_rate=0., + drop_path_rate=0., + norm_layer=nn.LayerNorm, + depths=[4, 4, 4], + sr_ratios=[4, 2, 1], + block_cls=SBlock): + super().__init__(img_size, patch_size, in_chans, class_num, embed_dims, + num_heads, mlp_ratios, qkv_bias, qk_scale, drop_rate, + attn_drop_rate, drop_path_rate, norm_layer, depths, + sr_ratios, block_cls) + + +class ALTGVT(PCPVT): + """ + alias Twins-SVT + """ + + def __init__(self, + img_size=224, + patch_size=4, + in_chans=3, + class_num=1000, + embed_dims=[64, 128, 256], + num_heads=[1, 2, 4], + mlp_ratios=[4, 4, 4], + qkv_bias=False, + qk_scale=None, + drop_rate=0., + attn_drop_rate=0., + drop_path_rate=0., + norm_layer=nn.LayerNorm, + depths=[4, 4, 4], + sr_ratios=[4, 2, 1], + block_cls=GroupBlock, + wss=[7, 7, 7]): + super().__init__(img_size, patch_size, in_chans, class_num, embed_dims, + num_heads, mlp_ratios, qkv_bias, qk_scale, drop_rate, + attn_drop_rate, drop_path_rate, norm_layer, depths, + sr_ratios, block_cls) + del self.blocks + self.wss = wss + # transformer encoder + dpr = [ + x.numpy()[0] + for x in paddle.linspace(0, drop_path_rate, sum(depths)) + ] # stochastic depth decay rule + cur = 0 + self.blocks = nn.LayerList() + for k in range(len(depths)): + _block = nn.LayerList([ + block_cls( + dim=embed_dims[k], + num_heads=num_heads[k], + mlp_ratio=mlp_ratios[k], + qkv_bias=qkv_bias, + qk_scale=qk_scale, + drop=drop_rate, + attn_drop=attn_drop_rate, + drop_path=dpr[cur + i], + norm_layer=norm_layer, + sr_ratio=sr_ratios[k], + ws=1 if i % 2 == 1 else wss[k]) for i in range(depths[k]) + ]) + self.blocks.append(_block) + cur += depths[k] + self.apply(self._init_weights) + + +def _load_pretrained(pretrained, model, model_url, use_ssld=False): + if pretrained is False: + pass + elif pretrained is True: + load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld) + elif isinstance(pretrained, str): + load_dygraph_pretrain(model, pretrained) + else: + raise RuntimeError( + "pretrained type is not available. Please use `string` or `boolean` type." + ) + + +def pcpvt_small(pretrained=False, use_ssld=False, **kwargs): + model = CPVTV2( + patch_size=4, + embed_dims=[64, 128, 320, 512], + num_heads=[1, 2, 5, 8], + mlp_ratios=[8, 8, 4, 4], + qkv_bias=True, + norm_layer=partial( + nn.LayerNorm, epsilon=1e-6), + depths=[3, 4, 6, 3], + sr_ratios=[8, 4, 2, 1], + **kwargs) + _load_pretrained( + pretrained, model, MODEL_URLS["pcpvt_small"], use_ssld=use_ssld) + return model + + +def pcpvt_base(pretrained=False, use_ssld=False, **kwargs): + model = CPVTV2( + patch_size=4, + embed_dims=[64, 128, 320, 512], + num_heads=[1, 2, 5, 8], + mlp_ratios=[8, 8, 4, 4], + qkv_bias=True, + norm_layer=partial( + nn.LayerNorm, epsilon=1e-6), + depths=[3, 4, 18, 3], + sr_ratios=[8, 4, 2, 1], + **kwargs) + _load_pretrained( + pretrained, model, MODEL_URLS["pcpvt_base"], use_ssld=use_ssld) + return model + + +def pcpvt_large(pretrained=False, use_ssld=False, **kwargs): + model = CPVTV2( + patch_size=4, + embed_dims=[64, 128, 320, 512], + num_heads=[1, 2, 5, 8], + mlp_ratios=[8, 8, 4, 4], + qkv_bias=True, + norm_layer=partial( + nn.LayerNorm, epsilon=1e-6), + depths=[3, 8, 27, 3], + sr_ratios=[8, 4, 2, 1], + **kwargs) + _load_pretrained( + pretrained, model, MODEL_URLS["pcpvt_large"], use_ssld=use_ssld) + return model + + +def alt_gvt_small(pretrained=False, use_ssld=False, **kwargs): + model = ALTGVT( + patch_size=4, + embed_dims=[64, 128, 256, 512], + num_heads=[2, 4, 8, 16], + mlp_ratios=[4, 4, 4, 4], + qkv_bias=True, + norm_layer=partial( + nn.LayerNorm, epsilon=1e-6), + depths=[2, 2, 10, 4], + wss=[7, 7, 7, 7], + sr_ratios=[8, 4, 2, 1], + **kwargs) + _load_pretrained( + pretrained, model, MODEL_URLS["alt_gvt_small"], use_ssld=use_ssld) + return model + + +def alt_gvt_base(pretrained=False, use_ssld=False, **kwargs): + model = ALTGVT( + patch_size=4, + embed_dims=[96, 192, 384, 768], + num_heads=[3, 6, 12, 24], + mlp_ratios=[4, 4, 4, 4], + qkv_bias=True, + norm_layer=partial( + nn.LayerNorm, epsilon=1e-6), + depths=[2, 2, 18, 2], + wss=[7, 7, 7, 7], + sr_ratios=[8, 4, 2, 1], + **kwargs) + _load_pretrained( + pretrained, model, MODEL_URLS["alt_gvt_base"], use_ssld=use_ssld) + return model + + +def alt_gvt_large(pretrained=False, use_ssld=False, **kwargs): + model = ALTGVT( + patch_size=4, + embed_dims=[128, 256, 512, 1024], + num_heads=[4, 8, 16, 32], + mlp_ratios=[4, 4, 4, 4], + qkv_bias=True, + norm_layer=partial( + nn.LayerNorm, epsilon=1e-6), + depths=[2, 2, 18, 2], + wss=[7, 7, 7, 7], + sr_ratios=[8, 4, 2, 1], + **kwargs) + _load_pretrained( + pretrained, model, MODEL_URLS["alt_gvt_large"], use_ssld=use_ssld) + return model diff --git a/src/PaddleClas/ppcls/arch/backbone/model_zoo/hardnet.py b/src/PaddleClas/ppcls/arch/backbone/model_zoo/hardnet.py new file mode 100644 index 0000000..fffd3a4 --- /dev/null +++ b/src/PaddleClas/ppcls/arch/backbone/model_zoo/hardnet.py @@ -0,0 +1,293 @@ +# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Code was based on https://github.com/PingoLH/Pytorch-HarDNet + +import paddle +import paddle.nn as nn + +from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url + +MODEL_URLS = { + 'HarDNet39_ds': + 'https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/HarDNet39_ds_pretrained.pdparams', + 'HarDNet68_ds': + 'https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/HarDNet68_ds_pretrained.pdparams', + 'HarDNet68': + 'https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/HarDNet68_pretrained.pdparams', + 'HarDNet85': + 'https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/HarDNet85_pretrained.pdparams' +} + +__all__ = MODEL_URLS.keys() + + +def ConvLayer(in_channels, + out_channels, + kernel_size=3, + stride=1, + bias_attr=False): + layer = nn.Sequential( + ('conv', nn.Conv2D( + in_channels, + out_channels, + kernel_size=kernel_size, + stride=stride, + padding=kernel_size // 2, + groups=1, + bias_attr=bias_attr)), ('norm', nn.BatchNorm2D(out_channels)), + ('relu', nn.ReLU6())) + return layer + + +def DWConvLayer(in_channels, + out_channels, + kernel_size=3, + stride=1, + bias_attr=False): + layer = nn.Sequential( + ('dwconv', nn.Conv2D( + in_channels, + out_channels, + kernel_size=kernel_size, + stride=stride, + padding=1, + groups=out_channels, + bias_attr=bias_attr)), ('norm', nn.BatchNorm2D(out_channels))) + return layer + + +def CombConvLayer(in_channels, out_channels, kernel_size=1, stride=1): + layer = nn.Sequential( + ('layer1', ConvLayer( + in_channels, out_channels, kernel_size=kernel_size)), + ('layer2', DWConvLayer( + out_channels, out_channels, stride=stride))) + return layer + + +class HarDBlock(nn.Layer): + def __init__(self, + in_channels, + growth_rate, + grmul, + n_layers, + keepBase=False, + residual_out=False, + dwconv=False): + super().__init__() + self.keepBase = keepBase + self.links = [] + layers_ = [] + self.out_channels = 0 # if upsample else in_channels + for i in range(n_layers): + outch, inch, link = self.get_link(i + 1, in_channels, growth_rate, + grmul) + self.links.append(link) + if dwconv: + layers_.append(CombConvLayer(inch, outch)) + else: + layers_.append(ConvLayer(inch, outch)) + + if (i % 2 == 0) or (i == n_layers - 1): + self.out_channels += outch + # print("Blk out =",self.out_channels) + self.layers = nn.LayerList(layers_) + + def get_link(self, layer, base_ch, growth_rate, grmul): + if layer == 0: + return base_ch, 0, [] + out_channels = growth_rate + + link = [] + for i in range(10): + dv = 2**i + if layer % dv == 0: + k = layer - dv + link.append(k) + if i > 0: + out_channels *= grmul + + out_channels = int(int(out_channels + 1) / 2) * 2 + in_channels = 0 + + for i in link: + ch, _, _ = self.get_link(i, base_ch, growth_rate, grmul) + in_channels += ch + + return out_channels, in_channels, link + + def forward(self, x): + layers_ = [x] + + for layer in range(len(self.layers)): + link = self.links[layer] + tin = [] + for i in link: + tin.append(layers_[i]) + if len(tin) > 1: + x = paddle.concat(tin, 1) + else: + x = tin[0] + out = self.layers[layer](x) + layers_.append(out) + + t = len(layers_) + out_ = [] + for i in range(t): + if (i == 0 and self.keepBase) or (i == t - 1) or (i % 2 == 1): + out_.append(layers_[i]) + out = paddle.concat(out_, 1) + + return out + + +class HarDNet(nn.Layer): + def __init__(self, + depth_wise=False, + arch=85, + class_num=1000, + with_pool=True): + super().__init__() + first_ch = [32, 64] + second_kernel = 3 + max_pool = True + grmul = 1.7 + drop_rate = 0.1 + + # HarDNet68 + ch_list = [128, 256, 320, 640, 1024] + gr = [14, 16, 20, 40, 160] + n_layers = [8, 16, 16, 16, 4] + downSamp = [1, 0, 1, 1, 0] + + if arch == 85: + # HarDNet85 + first_ch = [48, 96] + ch_list = [192, 256, 320, 480, 720, 1280] + gr = [24, 24, 28, 36, 48, 256] + n_layers = [8, 16, 16, 16, 16, 4] + downSamp = [1, 0, 1, 0, 1, 0] + drop_rate = 0.2 + + elif arch == 39: + # HarDNet39 + first_ch = [24, 48] + ch_list = [96, 320, 640, 1024] + grmul = 1.6 + gr = [16, 20, 64, 160] + n_layers = [4, 16, 8, 4] + downSamp = [1, 1, 1, 0] + + if depth_wise: + second_kernel = 1 + max_pool = False + drop_rate = 0.05 + + blks = len(n_layers) + self.base = nn.LayerList([]) + + # First Layer: Standard Conv3x3, Stride=2 + self.base.append( + ConvLayer( + in_channels=3, + out_channels=first_ch[0], + kernel_size=3, + stride=2, + bias_attr=False)) + + # Second Layer + self.base.append( + ConvLayer( + first_ch[0], first_ch[1], kernel_size=second_kernel)) + + # Maxpooling or DWConv3x3 downsampling + if max_pool: + self.base.append(nn.MaxPool2D(kernel_size=3, stride=2, padding=1)) + else: + self.base.append(DWConvLayer(first_ch[1], first_ch[1], stride=2)) + + # Build all HarDNet blocks + ch = first_ch[1] + for i in range(blks): + blk = HarDBlock(ch, gr[i], grmul, n_layers[i], dwconv=depth_wise) + ch = blk.out_channels + self.base.append(blk) + + if i == blks - 1 and arch == 85: + self.base.append(nn.Dropout(0.1)) + + self.base.append(ConvLayer(ch, ch_list[i], kernel_size=1)) + ch = ch_list[i] + if downSamp[i] == 1: + if max_pool: + self.base.append(nn.MaxPool2D(kernel_size=2, stride=2)) + else: + self.base.append(DWConvLayer(ch, ch, stride=2)) + + ch = ch_list[blks - 1] + + layers = [] + + if with_pool: + layers.append(nn.AdaptiveAvgPool2D((1, 1))) + + if class_num > 0: + layers.append(nn.Flatten()) + layers.append(nn.Dropout(drop_rate)) + layers.append(nn.Linear(ch, class_num)) + + self.base.append(nn.Sequential(*layers)) + + def forward(self, x): + for layer in self.base: + x = layer(x) + return x + + +def _load_pretrained(pretrained, model, model_url, use_ssld=False): + if pretrained is False: + pass + elif pretrained is True: + load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld) + elif isinstance(pretrained, str): + load_dygraph_pretrain(model, pretrained) + else: + raise RuntimeError( + "pretrained type is not available. Please use `string` or `boolean` type." + ) + + +def HarDNet39_ds(pretrained=False, **kwargs): + model = HarDNet(arch=39, depth_wise=True, **kwargs) + _load_pretrained(pretrained, model, MODEL_URLS["HarDNet39_ds"]) + return model + + +def HarDNet68_ds(pretrained=False, **kwargs): + model = HarDNet(arch=68, depth_wise=True, **kwargs) + _load_pretrained(pretrained, model, MODEL_URLS["HarDNet68_ds"]) + return model + + +def HarDNet68(pretrained=False, **kwargs): + model = HarDNet(arch=68, **kwargs) + _load_pretrained(pretrained, model, MODEL_URLS["HarDNet68"]) + return model + + +def HarDNet85(pretrained=False, **kwargs): + model = HarDNet(arch=85, **kwargs) + _load_pretrained(pretrained, model, MODEL_URLS["HarDNet85"]) + return model diff --git a/src/PaddleClas/ppcls/arch/backbone/model_zoo/inception_v4.py b/src/PaddleClas/ppcls/arch/backbone/model_zoo/inception_v4.py new file mode 100644 index 0000000..e0460d4 --- /dev/null +++ b/src/PaddleClas/ppcls/arch/backbone/model_zoo/inception_v4.py @@ -0,0 +1,477 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import paddle +from paddle import ParamAttr +import paddle.nn as nn +import paddle.nn.functional as F +from paddle.nn import Conv2D, BatchNorm, Linear, Dropout +from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D +from paddle.nn.initializer import Uniform +import math + +from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url + +MODEL_URLS = { + "InceptionV4": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/InceptionV4_pretrained.pdparams" +} + +__all__ = list(MODEL_URLS.keys()) + + +class ConvBNLayer(nn.Layer): + def __init__(self, + num_channels, + num_filters, + filter_size, + stride=1, + padding=0, + groups=1, + act='relu', + name=None): + super(ConvBNLayer, self).__init__() + + self._conv = Conv2D( + in_channels=num_channels, + out_channels=num_filters, + kernel_size=filter_size, + stride=stride, + padding=padding, + groups=groups, + weight_attr=ParamAttr(name=name + "_weights"), + bias_attr=False) + bn_name = name + "_bn" + self._batch_norm = BatchNorm( + num_filters, + act=act, + param_attr=ParamAttr(name=bn_name + "_scale"), + bias_attr=ParamAttr(name=bn_name + "_offset"), + moving_mean_name=bn_name + '_mean', + moving_variance_name=bn_name + '_variance') + + def forward(self, inputs): + y = self._conv(inputs) + y = self._batch_norm(y) + return y + + +class InceptionStem(nn.Layer): + def __init__(self): + super(InceptionStem, self).__init__() + self._conv_1 = ConvBNLayer( + 3, 32, 3, stride=2, act="relu", name="conv1_3x3_s2") + self._conv_2 = ConvBNLayer(32, 32, 3, act="relu", name="conv2_3x3_s1") + self._conv_3 = ConvBNLayer( + 32, 64, 3, padding=1, act="relu", name="conv3_3x3_s1") + self._pool = MaxPool2D(kernel_size=3, stride=2, padding=0) + self._conv2 = ConvBNLayer( + 64, 96, 3, stride=2, act="relu", name="inception_stem1_3x3_s2") + self._conv1_1 = ConvBNLayer( + 160, 64, 1, act="relu", name="inception_stem2_3x3_reduce") + self._conv1_2 = ConvBNLayer( + 64, 96, 3, act="relu", name="inception_stem2_3x3") + self._conv2_1 = ConvBNLayer( + 160, 64, 1, act="relu", name="inception_stem2_1x7_reduce") + self._conv2_2 = ConvBNLayer( + 64, + 64, (7, 1), + padding=(3, 0), + act="relu", + name="inception_stem2_1x7") + self._conv2_3 = ConvBNLayer( + 64, + 64, (1, 7), + padding=(0, 3), + act="relu", + name="inception_stem2_7x1") + self._conv2_4 = ConvBNLayer( + 64, 96, 3, act="relu", name="inception_stem2_3x3_2") + self._conv3 = ConvBNLayer( + 192, 192, 3, stride=2, act="relu", name="inception_stem3_3x3_s2") + + def forward(self, inputs): + conv = self._conv_1(inputs) + conv = self._conv_2(conv) + conv = self._conv_3(conv) + + pool1 = self._pool(conv) + conv2 = self._conv2(conv) + concat = paddle.concat([pool1, conv2], axis=1) + + conv1 = self._conv1_1(concat) + conv1 = self._conv1_2(conv1) + + conv2 = self._conv2_1(concat) + conv2 = self._conv2_2(conv2) + conv2 = self._conv2_3(conv2) + conv2 = self._conv2_4(conv2) + + concat = paddle.concat([conv1, conv2], axis=1) + + conv1 = self._conv3(concat) + pool1 = self._pool(concat) + + concat = paddle.concat([conv1, pool1], axis=1) + return concat + + +class InceptionA(nn.Layer): + def __init__(self, name): + super(InceptionA, self).__init__() + self._pool = AvgPool2D(kernel_size=3, stride=1, padding=1) + self._conv1 = ConvBNLayer( + 384, 96, 1, act="relu", name="inception_a" + name + "_1x1") + self._conv2 = ConvBNLayer( + 384, 96, 1, act="relu", name="inception_a" + name + "_1x1_2") + self._conv3_1 = ConvBNLayer( + 384, 64, 1, act="relu", name="inception_a" + name + "_3x3_reduce") + self._conv3_2 = ConvBNLayer( + 64, + 96, + 3, + padding=1, + act="relu", + name="inception_a" + name + "_3x3") + self._conv4_1 = ConvBNLayer( + 384, + 64, + 1, + act="relu", + name="inception_a" + name + "_3x3_2_reduce") + self._conv4_2 = ConvBNLayer( + 64, + 96, + 3, + padding=1, + act="relu", + name="inception_a" + name + "_3x3_2") + self._conv4_3 = ConvBNLayer( + 96, + 96, + 3, + padding=1, + act="relu", + name="inception_a" + name + "_3x3_3") + + def forward(self, inputs): + pool1 = self._pool(inputs) + conv1 = self._conv1(pool1) + + conv2 = self._conv2(inputs) + + conv3 = self._conv3_1(inputs) + conv3 = self._conv3_2(conv3) + + conv4 = self._conv4_1(inputs) + conv4 = self._conv4_2(conv4) + conv4 = self._conv4_3(conv4) + + concat = paddle.concat([conv1, conv2, conv3, conv4], axis=1) + return concat + + +class ReductionA(nn.Layer): + def __init__(self): + super(ReductionA, self).__init__() + self._pool = MaxPool2D(kernel_size=3, stride=2, padding=0) + self._conv2 = ConvBNLayer( + 384, 384, 3, stride=2, act="relu", name="reduction_a_3x3") + self._conv3_1 = ConvBNLayer( + 384, 192, 1, act="relu", name="reduction_a_3x3_2_reduce") + self._conv3_2 = ConvBNLayer( + 192, 224, 3, padding=1, act="relu", name="reduction_a_3x3_2") + self._conv3_3 = ConvBNLayer( + 224, 256, 3, stride=2, act="relu", name="reduction_a_3x3_3") + + def forward(self, inputs): + pool1 = self._pool(inputs) + conv2 = self._conv2(inputs) + conv3 = self._conv3_1(inputs) + conv3 = self._conv3_2(conv3) + conv3 = self._conv3_3(conv3) + concat = paddle.concat([pool1, conv2, conv3], axis=1) + return concat + + +class InceptionB(nn.Layer): + def __init__(self, name=None): + super(InceptionB, self).__init__() + self._pool = AvgPool2D(kernel_size=3, stride=1, padding=1) + self._conv1 = ConvBNLayer( + 1024, 128, 1, act="relu", name="inception_b" + name + "_1x1") + self._conv2 = ConvBNLayer( + 1024, 384, 1, act="relu", name="inception_b" + name + "_1x1_2") + self._conv3_1 = ConvBNLayer( + 1024, + 192, + 1, + act="relu", + name="inception_b" + name + "_1x7_reduce") + self._conv3_2 = ConvBNLayer( + 192, + 224, (1, 7), + padding=(0, 3), + act="relu", + name="inception_b" + name + "_1x7") + self._conv3_3 = ConvBNLayer( + 224, + 256, (7, 1), + padding=(3, 0), + act="relu", + name="inception_b" + name + "_7x1") + self._conv4_1 = ConvBNLayer( + 1024, + 192, + 1, + act="relu", + name="inception_b" + name + "_7x1_2_reduce") + self._conv4_2 = ConvBNLayer( + 192, + 192, (1, 7), + padding=(0, 3), + act="relu", + name="inception_b" + name + "_1x7_2") + self._conv4_3 = ConvBNLayer( + 192, + 224, (7, 1), + padding=(3, 0), + act="relu", + name="inception_b" + name + "_7x1_2") + self._conv4_4 = ConvBNLayer( + 224, + 224, (1, 7), + padding=(0, 3), + act="relu", + name="inception_b" + name + "_1x7_3") + self._conv4_5 = ConvBNLayer( + 224, + 256, (7, 1), + padding=(3, 0), + act="relu", + name="inception_b" + name + "_7x1_3") + + def forward(self, inputs): + pool1 = self._pool(inputs) + conv1 = self._conv1(pool1) + + conv2 = self._conv2(inputs) + + conv3 = self._conv3_1(inputs) + conv3 = self._conv3_2(conv3) + conv3 = self._conv3_3(conv3) + + conv4 = self._conv4_1(inputs) + conv4 = self._conv4_2(conv4) + conv4 = self._conv4_3(conv4) + conv4 = self._conv4_4(conv4) + conv4 = self._conv4_5(conv4) + + concat = paddle.concat([conv1, conv2, conv3, conv4], axis=1) + return concat + + +class ReductionB(nn.Layer): + def __init__(self): + super(ReductionB, self).__init__() + self._pool = MaxPool2D(kernel_size=3, stride=2, padding=0) + self._conv2_1 = ConvBNLayer( + 1024, 192, 1, act="relu", name="reduction_b_3x3_reduce") + self._conv2_2 = ConvBNLayer( + 192, 192, 3, stride=2, act="relu", name="reduction_b_3x3") + self._conv3_1 = ConvBNLayer( + 1024, 256, 1, act="relu", name="reduction_b_1x7_reduce") + self._conv3_2 = ConvBNLayer( + 256, + 256, (1, 7), + padding=(0, 3), + act="relu", + name="reduction_b_1x7") + self._conv3_3 = ConvBNLayer( + 256, + 320, (7, 1), + padding=(3, 0), + act="relu", + name="reduction_b_7x1") + self._conv3_4 = ConvBNLayer( + 320, 320, 3, stride=2, act="relu", name="reduction_b_3x3_2") + + def forward(self, inputs): + pool1 = self._pool(inputs) + + conv2 = self._conv2_1(inputs) + conv2 = self._conv2_2(conv2) + + conv3 = self._conv3_1(inputs) + conv3 = self._conv3_2(conv3) + conv3 = self._conv3_3(conv3) + conv3 = self._conv3_4(conv3) + + concat = paddle.concat([pool1, conv2, conv3], axis=1) + + return concat + + +class InceptionC(nn.Layer): + def __init__(self, name=None): + super(InceptionC, self).__init__() + self._pool = AvgPool2D(kernel_size=3, stride=1, padding=1) + self._conv1 = ConvBNLayer( + 1536, 256, 1, act="relu", name="inception_c" + name + "_1x1") + self._conv2 = ConvBNLayer( + 1536, 256, 1, act="relu", name="inception_c" + name + "_1x1_2") + self._conv3_0 = ConvBNLayer( + 1536, 384, 1, act="relu", name="inception_c" + name + "_1x1_3") + self._conv3_1 = ConvBNLayer( + 384, + 256, (1, 3), + padding=(0, 1), + act="relu", + name="inception_c" + name + "_1x3") + self._conv3_2 = ConvBNLayer( + 384, + 256, (3, 1), + padding=(1, 0), + act="relu", + name="inception_c" + name + "_3x1") + self._conv4_0 = ConvBNLayer( + 1536, 384, 1, act="relu", name="inception_c" + name + "_1x1_4") + self._conv4_00 = ConvBNLayer( + 384, + 448, (1, 3), + padding=(0, 1), + act="relu", + name="inception_c" + name + "_1x3_2") + self._conv4_000 = ConvBNLayer( + 448, + 512, (3, 1), + padding=(1, 0), + act="relu", + name="inception_c" + name + "_3x1_2") + self._conv4_1 = ConvBNLayer( + 512, + 256, (1, 3), + padding=(0, 1), + act="relu", + name="inception_c" + name + "_1x3_3") + self._conv4_2 = ConvBNLayer( + 512, + 256, (3, 1), + padding=(1, 0), + act="relu", + name="inception_c" + name + "_3x1_3") + + def forward(self, inputs): + pool1 = self._pool(inputs) + conv1 = self._conv1(pool1) + + conv2 = self._conv2(inputs) + + conv3 = self._conv3_0(inputs) + conv3_1 = self._conv3_1(conv3) + conv3_2 = self._conv3_2(conv3) + + conv4 = self._conv4_0(inputs) + conv4 = self._conv4_00(conv4) + conv4 = self._conv4_000(conv4) + conv4_1 = self._conv4_1(conv4) + conv4_2 = self._conv4_2(conv4) + + concat = paddle.concat( + [conv1, conv2, conv3_1, conv3_2, conv4_1, conv4_2], axis=1) + + return concat + + +class InceptionV4DY(nn.Layer): + def __init__(self, class_num=1000): + super(InceptionV4DY, self).__init__() + self._inception_stem = InceptionStem() + + self._inceptionA_1 = InceptionA(name="1") + self._inceptionA_2 = InceptionA(name="2") + self._inceptionA_3 = InceptionA(name="3") + self._inceptionA_4 = InceptionA(name="4") + self._reductionA = ReductionA() + + self._inceptionB_1 = InceptionB(name="1") + self._inceptionB_2 = InceptionB(name="2") + self._inceptionB_3 = InceptionB(name="3") + self._inceptionB_4 = InceptionB(name="4") + self._inceptionB_5 = InceptionB(name="5") + self._inceptionB_6 = InceptionB(name="6") + self._inceptionB_7 = InceptionB(name="7") + self._reductionB = ReductionB() + + self._inceptionC_1 = InceptionC(name="1") + self._inceptionC_2 = InceptionC(name="2") + self._inceptionC_3 = InceptionC(name="3") + + self.avg_pool = AdaptiveAvgPool2D(1) + self._drop = Dropout(p=0.2, mode="downscale_in_infer") + stdv = 1.0 / math.sqrt(1536 * 1.0) + self.out = Linear( + 1536, + class_num, + weight_attr=ParamAttr( + initializer=Uniform(-stdv, stdv), name="final_fc_weights"), + bias_attr=ParamAttr(name="final_fc_offset")) + + def forward(self, inputs): + x = self._inception_stem(inputs) + + x = self._inceptionA_1(x) + x = self._inceptionA_2(x) + x = self._inceptionA_3(x) + x = self._inceptionA_4(x) + x = self._reductionA(x) + + x = self._inceptionB_1(x) + x = self._inceptionB_2(x) + x = self._inceptionB_3(x) + x = self._inceptionB_4(x) + x = self._inceptionB_5(x) + x = self._inceptionB_6(x) + x = self._inceptionB_7(x) + x = self._reductionB(x) + + x = self._inceptionC_1(x) + x = self._inceptionC_2(x) + x = self._inceptionC_3(x) + + x = self.avg_pool(x) + x = paddle.squeeze(x, axis=[2, 3]) + x = self._drop(x) + x = self.out(x) + return x + + +def _load_pretrained(pretrained, model, model_url, use_ssld=False): + if pretrained is False: + pass + elif pretrained is True: + load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld) + elif isinstance(pretrained, str): + load_dygraph_pretrain(model, pretrained) + else: + raise RuntimeError( + "pretrained type is not available. Please use `string` or `boolean` type." + ) + + +def InceptionV4(pretrained=False, use_ssld=False, **kwargs): + model = InceptionV4DY(**kwargs) + _load_pretrained( + pretrained, model, MODEL_URLS["InceptionV4"], use_ssld=use_ssld) + return model diff --git a/src/PaddleClas/ppcls/arch/backbone/model_zoo/levit.py b/src/PaddleClas/ppcls/arch/backbone/model_zoo/levit.py new file mode 100644 index 0000000..991f832 --- /dev/null +++ b/src/PaddleClas/ppcls/arch/backbone/model_zoo/levit.py @@ -0,0 +1,589 @@ +# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Code was based on https://github.com/facebookresearch/LeViT + +import itertools +import math +import warnings + +import paddle +import paddle.nn as nn +import paddle.nn.functional as F +from paddle.nn.initializer import TruncatedNormal, Constant +from paddle.regularizer import L2Decay + +from .vision_transformer import trunc_normal_, zeros_, ones_, Identity + +from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url + +MODEL_URLS = { + "LeViT_128S": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/LeViT_128S_pretrained.pdparams", + "LeViT_128": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/LeViT_128_pretrained.pdparams", + "LeViT_192": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/LeViT_192_pretrained.pdparams", + "LeViT_256": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/LeViT_256_pretrained.pdparams", + "LeViT_384": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/LeViT_384_pretrained.pdparams", +} + +__all__ = list(MODEL_URLS.keys()) + + +def cal_attention_biases(attention_biases, attention_bias_idxs): + gather_list = [] + attention_bias_t = paddle.transpose(attention_biases, (1, 0)) + nums = attention_bias_idxs.shape[0] + for idx in range(nums): + gather = paddle.gather(attention_bias_t, attention_bias_idxs[idx]) + gather_list.append(gather) + shape0, shape1 = attention_bias_idxs.shape + gather = paddle.concat(gather_list) + return paddle.transpose(gather, (1, 0)).reshape((0, shape0, shape1)) + + +class Conv2d_BN(nn.Sequential): + def __init__(self, + a, + b, + ks=1, + stride=1, + pad=0, + dilation=1, + groups=1, + bn_weight_init=1, + resolution=-10000): + super().__init__() + self.add_sublayer( + 'c', + nn.Conv2D( + a, b, ks, stride, pad, dilation, groups, bias_attr=False)) + bn = nn.BatchNorm2D(b) + ones_(bn.weight) + zeros_(bn.bias) + self.add_sublayer('bn', bn) + + +class Linear_BN(nn.Sequential): + def __init__(self, a, b, bn_weight_init=1): + super().__init__() + self.add_sublayer('c', nn.Linear(a, b, bias_attr=False)) + bn = nn.BatchNorm1D(b) + if bn_weight_init == 0: + zeros_(bn.weight) + else: + ones_(bn.weight) + zeros_(bn.bias) + self.add_sublayer('bn', bn) + + def forward(self, x): + l, bn = self._sub_layers.values() + x = l(x) + return paddle.reshape(bn(x.flatten(0, 1)), x.shape) + + +class BN_Linear(nn.Sequential): + def __init__(self, a, b, bias=True, std=0.02): + super().__init__() + self.add_sublayer('bn', nn.BatchNorm1D(a)) + l = nn.Linear(a, b, bias_attr=bias) + trunc_normal_(l.weight) + if bias: + zeros_(l.bias) + self.add_sublayer('l', l) + + +def b16(n, activation, resolution=224): + return nn.Sequential( + Conv2d_BN( + 3, n // 8, 3, 2, 1, resolution=resolution), + activation(), + Conv2d_BN( + n // 8, n // 4, 3, 2, 1, resolution=resolution // 2), + activation(), + Conv2d_BN( + n // 4, n // 2, 3, 2, 1, resolution=resolution // 4), + activation(), + Conv2d_BN( + n // 2, n, 3, 2, 1, resolution=resolution // 8)) + + +class Residual(nn.Layer): + def __init__(self, m, drop): + super().__init__() + self.m = m + self.drop = drop + + def forward(self, x): + if self.training and self.drop > 0: + y = paddle.rand( + shape=[x.shape[0], 1, 1]).__ge__(self.drop).astype("float32") + y = y.divide(paddle.full_like(y, 1 - self.drop)) + return paddle.add(x, y) + else: + return paddle.add(x, self.m(x)) + + +class Attention(nn.Layer): + def __init__(self, + dim, + key_dim, + num_heads=8, + attn_ratio=4, + activation=None, + resolution=14): + super().__init__() + self.num_heads = num_heads + self.scale = key_dim**-0.5 + self.key_dim = key_dim + self.nh_kd = nh_kd = key_dim * num_heads + self.d = int(attn_ratio * key_dim) + self.dh = int(attn_ratio * key_dim) * num_heads + self.attn_ratio = attn_ratio + self.h = self.dh + nh_kd * 2 + self.qkv = Linear_BN(dim, self.h) + self.proj = nn.Sequential( + activation(), Linear_BN( + self.dh, dim, bn_weight_init=0)) + points = list(itertools.product(range(resolution), range(resolution))) + N = len(points) + attention_offsets = {} + idxs = [] + for p1 in points: + for p2 in points: + offset = (abs(p1[0] - p2[0]), abs(p1[1] - p2[1])) + if offset not in attention_offsets: + attention_offsets[offset] = len(attention_offsets) + idxs.append(attention_offsets[offset]) + self.attention_biases = self.create_parameter( + shape=(num_heads, len(attention_offsets)), + default_initializer=zeros_, + attr=paddle.ParamAttr(regularizer=L2Decay(0.0))) + tensor_idxs = paddle.to_tensor(idxs, dtype='int64') + self.register_buffer('attention_bias_idxs', + paddle.reshape(tensor_idxs, [N, N])) + + @paddle.no_grad() + def train(self, mode=True): + if mode: + super().train() + else: + super().eval() + if mode and hasattr(self, 'ab'): + del self.ab + else: + self.ab = cal_attention_biases(self.attention_biases, + self.attention_bias_idxs) + + def forward(self, x): + self.training = True + B, N, C = x.shape + qkv = self.qkv(x) + qkv = paddle.reshape(qkv, + [B, N, self.num_heads, self.h // self.num_heads]) + q, k, v = paddle.split( + qkv, [self.key_dim, self.key_dim, self.d], axis=3) + q = paddle.transpose(q, perm=[0, 2, 1, 3]) + k = paddle.transpose(k, perm=[0, 2, 1, 3]) + v = paddle.transpose(v, perm=[0, 2, 1, 3]) + k_transpose = paddle.transpose(k, perm=[0, 1, 3, 2]) + + if self.training: + attention_biases = cal_attention_biases(self.attention_biases, + self.attention_bias_idxs) + else: + attention_biases = self.ab + attn = (paddle.matmul(q, k_transpose) * self.scale + attention_biases) + attn = F.softmax(attn) + x = paddle.transpose(paddle.matmul(attn, v), perm=[0, 2, 1, 3]) + x = paddle.reshape(x, [B, N, self.dh]) + x = self.proj(x) + return x + + +class Subsample(nn.Layer): + def __init__(self, stride, resolution): + super().__init__() + self.stride = stride + self.resolution = resolution + + def forward(self, x): + B, N, C = x.shape + x = paddle.reshape(x, [B, self.resolution, self.resolution, C]) + end1, end2 = x.shape[1], x.shape[2] + x = x[:, 0:end1:self.stride, 0:end2:self.stride] + x = paddle.reshape(x, [B, -1, C]) + return x + + +class AttentionSubsample(nn.Layer): + def __init__(self, + in_dim, + out_dim, + key_dim, + num_heads=8, + attn_ratio=2, + activation=None, + stride=2, + resolution=14, + resolution_=7): + super().__init__() + self.num_heads = num_heads + self.scale = key_dim**-0.5 + self.key_dim = key_dim + self.nh_kd = nh_kd = key_dim * num_heads + self.d = int(attn_ratio * key_dim) + self.dh = int(attn_ratio * key_dim) * self.num_heads + self.attn_ratio = attn_ratio + self.resolution_ = resolution_ + self.resolution_2 = resolution_**2 + self.training = True + h = self.dh + nh_kd + self.kv = Linear_BN(in_dim, h) + + self.q = nn.Sequential( + Subsample(stride, resolution), Linear_BN(in_dim, nh_kd)) + self.proj = nn.Sequential(activation(), Linear_BN(self.dh, out_dim)) + + self.stride = stride + self.resolution = resolution + points = list(itertools.product(range(resolution), range(resolution))) + points_ = list( + itertools.product(range(resolution_), range(resolution_))) + + N = len(points) + N_ = len(points_) + attention_offsets = {} + idxs = [] + i = 0 + j = 0 + for p1 in points_: + i += 1 + for p2 in points: + j += 1 + size = 1 + offset = (abs(p1[0] * stride - p2[0] + (size - 1) / 2), + abs(p1[1] * stride - p2[1] + (size - 1) / 2)) + if offset not in attention_offsets: + attention_offsets[offset] = len(attention_offsets) + idxs.append(attention_offsets[offset]) + self.attention_biases = self.create_parameter( + shape=(num_heads, len(attention_offsets)), + default_initializer=zeros_, + attr=paddle.ParamAttr(regularizer=L2Decay(0.0))) + + tensor_idxs_ = paddle.to_tensor(idxs, dtype='int64') + self.register_buffer('attention_bias_idxs', + paddle.reshape(tensor_idxs_, [N_, N])) + + @paddle.no_grad() + def train(self, mode=True): + if mode: + super().train() + else: + super().eval() + if mode and hasattr(self, 'ab'): + del self.ab + else: + self.ab = cal_attention_biases(self.attention_biases, + self.attention_bias_idxs) + + def forward(self, x): + self.training = True + B, N, C = x.shape + kv = self.kv(x) + kv = paddle.reshape(kv, [B, N, self.num_heads, -1]) + k, v = paddle.split(kv, [self.key_dim, self.d], axis=3) + k = paddle.transpose(k, perm=[0, 2, 1, 3]) # BHNC + v = paddle.transpose(v, perm=[0, 2, 1, 3]) + q = paddle.reshape( + self.q(x), [B, self.resolution_2, self.num_heads, self.key_dim]) + q = paddle.transpose(q, perm=[0, 2, 1, 3]) + + if self.training: + attention_biases = cal_attention_biases(self.attention_biases, + self.attention_bias_idxs) + else: + attention_biases = self.ab + + attn = (paddle.matmul( + q, paddle.transpose( + k, perm=[0, 1, 3, 2]))) * self.scale + attention_biases + attn = F.softmax(attn) + + x = paddle.reshape( + paddle.transpose( + paddle.matmul(attn, v), perm=[0, 2, 1, 3]), [B, -1, self.dh]) + x = self.proj(x) + return x + + +class LeViT(nn.Layer): + """ Vision Transformer with support for patch or hybrid CNN input stage + """ + + def __init__(self, + img_size=224, + patch_size=16, + in_chans=3, + class_num=1000, + embed_dim=[192], + key_dim=[64], + depth=[12], + num_heads=[3], + attn_ratio=[2], + mlp_ratio=[2], + hybrid_backbone=None, + down_ops=[], + attention_activation=nn.Hardswish, + mlp_activation=nn.Hardswish, + distillation=True, + drop_path=0): + super().__init__() + + self.class_num = class_num + self.num_features = embed_dim[-1] + self.embed_dim = embed_dim + self.distillation = distillation + + self.patch_embed = hybrid_backbone + + self.blocks = [] + down_ops.append(['']) + resolution = img_size // patch_size + for i, (ed, kd, dpth, nh, ar, mr, do) in enumerate( + zip(embed_dim, key_dim, depth, num_heads, attn_ratio, + mlp_ratio, down_ops)): + for _ in range(dpth): + self.blocks.append( + Residual( + Attention( + ed, + kd, + nh, + attn_ratio=ar, + activation=attention_activation, + resolution=resolution, ), + drop_path)) + if mr > 0: + h = int(ed * mr) + self.blocks.append( + Residual( + nn.Sequential( + Linear_BN(ed, h), + mlp_activation(), + Linear_BN( + h, ed, bn_weight_init=0), ), + drop_path)) + if do[0] == 'Subsample': + #('Subsample',key_dim, num_heads, attn_ratio, mlp_ratio, stride) + resolution_ = (resolution - 1) // do[5] + 1 + self.blocks.append( + AttentionSubsample( + *embed_dim[i:i + 2], + key_dim=do[1], + num_heads=do[2], + attn_ratio=do[3], + activation=attention_activation, + stride=do[5], + resolution=resolution, + resolution_=resolution_)) + resolution = resolution_ + if do[4] > 0: # mlp_ratio + h = int(embed_dim[i + 1] * do[4]) + self.blocks.append( + Residual( + nn.Sequential( + Linear_BN(embed_dim[i + 1], h), + mlp_activation(), + Linear_BN( + h, embed_dim[i + 1], bn_weight_init=0), ), + drop_path)) + self.blocks = nn.Sequential(*self.blocks) + + # Classifier head + self.head = BN_Linear(embed_dim[-1], + class_num) if class_num > 0 else Identity() + if distillation: + self.head_dist = BN_Linear( + embed_dim[-1], class_num) if class_num > 0 else Identity() + + def forward(self, x): + x = self.patch_embed(x) + x = x.flatten(2) + x = paddle.transpose(x, perm=[0, 2, 1]) + x = self.blocks(x) + x = x.mean(1) + + x = paddle.reshape(x, [-1, self.embed_dim[-1]]) + if self.distillation: + x = self.head(x), self.head_dist(x) + if not self.training: + x = (x[0] + x[1]) / 2 + else: + x = self.head(x) + return x + + +def model_factory(C, D, X, N, drop_path, class_num, distillation): + embed_dim = [int(x) for x in C.split('_')] + num_heads = [int(x) for x in N.split('_')] + depth = [int(x) for x in X.split('_')] + act = nn.Hardswish + model = LeViT( + patch_size=16, + embed_dim=embed_dim, + num_heads=num_heads, + key_dim=[D] * 3, + depth=depth, + attn_ratio=[2, 2, 2], + mlp_ratio=[2, 2, 2], + down_ops=[ + #('Subsample',key_dim, num_heads, attn_ratio, mlp_ratio, stride) + ['Subsample', D, embed_dim[0] // D, 4, 2, 2], + ['Subsample', D, embed_dim[1] // D, 4, 2, 2], + ], + attention_activation=act, + mlp_activation=act, + hybrid_backbone=b16(embed_dim[0], activation=act), + class_num=class_num, + drop_path=drop_path, + distillation=distillation) + + return model + + +specification = { + 'LeViT_128S': { + 'C': '128_256_384', + 'D': 16, + 'N': '4_6_8', + 'X': '2_3_4', + 'drop_path': 0 + }, + 'LeViT_128': { + 'C': '128_256_384', + 'D': 16, + 'N': '4_8_12', + 'X': '4_4_4', + 'drop_path': 0 + }, + 'LeViT_192': { + 'C': '192_288_384', + 'D': 32, + 'N': '3_5_6', + 'X': '4_4_4', + 'drop_path': 0 + }, + 'LeViT_256': { + 'C': '256_384_512', + 'D': 32, + 'N': '4_6_8', + 'X': '4_4_4', + 'drop_path': 0 + }, + 'LeViT_384': { + 'C': '384_512_768', + 'D': 32, + 'N': '6_9_12', + 'X': '4_4_4', + 'drop_path': 0.1 + }, +} + + +def _load_pretrained(pretrained, model, model_url, use_ssld=False): + if pretrained is False: + pass + elif pretrained is True: + load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld) + elif isinstance(pretrained, str): + load_dygraph_pretrain(model, pretrained) + else: + raise RuntimeError( + "pretrained type is not available. Please use `string` or `boolean` type." + ) + + +def LeViT_128S(pretrained=False, + use_ssld=False, + class_num=1000, + distillation=False, + **kwargs): + model = model_factory( + **specification['LeViT_128S'], + class_num=class_num, + distillation=distillation) + _load_pretrained( + pretrained, model, MODEL_URLS["LeViT_128S"], use_ssld=use_ssld) + return model + + +def LeViT_128(pretrained=False, + use_ssld=False, + class_num=1000, + distillation=False, + **kwargs): + model = model_factory( + **specification['LeViT_128'], + class_num=class_num, + distillation=distillation) + _load_pretrained( + pretrained, model, MODEL_URLS["LeViT_128"], use_ssld=use_ssld) + return model + + +def LeViT_192(pretrained=False, + use_ssld=False, + class_num=1000, + distillation=False, + **kwargs): + model = model_factory( + **specification['LeViT_192'], + class_num=class_num, + distillation=distillation) + _load_pretrained( + pretrained, model, MODEL_URLS["LeViT_192"], use_ssld=use_ssld) + return model + + +def LeViT_256(pretrained=False, + use_ssld=False, + class_num=1000, + distillation=False, + **kwargs): + model = model_factory( + **specification['LeViT_256'], + class_num=class_num, + distillation=distillation) + _load_pretrained( + pretrained, model, MODEL_URLS["LeViT_256"], use_ssld=use_ssld) + return model + + +def LeViT_384(pretrained=False, + use_ssld=False, + class_num=1000, + distillation=False, + **kwargs): + model = model_factory( + **specification['LeViT_384'], + class_num=class_num, + distillation=distillation) + _load_pretrained( + pretrained, model, MODEL_URLS["LeViT_384"], use_ssld=use_ssld) + return model diff --git a/src/PaddleClas/ppcls/arch/backbone/model_zoo/mixnet.py b/src/PaddleClas/ppcls/arch/backbone/model_zoo/mixnet.py new file mode 100644 index 0000000..c2a1adb --- /dev/null +++ b/src/PaddleClas/ppcls/arch/backbone/model_zoo/mixnet.py @@ -0,0 +1,815 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" + MixNet for ImageNet-1K, implemented in Paddle. + Original paper: 'MixConv: Mixed Depthwise Convolutional Kernels,' + https://arxiv.org/abs/1907.09595. +""" + +import os +from inspect import isfunction +from functools import reduce +import paddle +import paddle.nn as nn + +from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url + +MODEL_URLS = { + "MixNet_S": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MixNet_S_pretrained.pdparams", + "MixNet_M": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MixNet_M_pretrained.pdparams", + "MixNet_L": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MixNet_L_pretrained.pdparams" +} + +__all__ = list(MODEL_URLS.keys()) + + +class Identity(nn.Layer): + """ + Identity block. + """ + + def __init__(self): + super(Identity, self).__init__() + + def forward(self, x): + return x + + +def round_channels(channels, divisor=8): + """ + Round weighted channel number (make divisible operation). + + Parameters: + ---------- + channels : int or float + Original number of channels. + divisor : int, default 8 + Alignment value. + + Returns: + ------- + int + Weighted number of channels. + """ + rounded_channels = max( + int(channels + divisor / 2.0) // divisor * divisor, divisor) + if float(rounded_channels) < 0.9 * channels: + rounded_channels += divisor + return rounded_channels + + +def get_activation_layer(activation): + """ + Create activation layer from string/function. + + Parameters: + ---------- + activation : function, or str, or nn.Module + Activation function or name of activation function. + + Returns: + ------- + nn.Module + Activation layer. + """ + assert activation is not None + if isfunction(activation): + return activation() + elif isinstance(activation, str): + if activation == "relu": + return nn.ReLU() + elif activation == "relu6": + return nn.ReLU6() + elif activation == "swish": + return nn.Swish() + elif activation == "hswish": + return nn.Hardswish() + elif activation == "sigmoid": + return nn.Sigmoid() + elif activation == "hsigmoid": + return nn.Hardsigmoid() + elif activation == "identity": + return Identity() + else: + raise NotImplementedError() + else: + assert isinstance(activation, nn.Layer) + return activation + + +class ConvBlock(nn.Layer): + """ + Standard convolution block with Batch normalization and activation. + + Parameters: + ---------- + in_channels : int + Number of input channels. + out_channels : int + Number of output channels. + kernel_size : int or tuple/list of 2 int + Convolution window size. + stride : int or tuple/list of 2 int + Strides of the convolution. + padding : int, or tuple/list of 2 int, or tuple/list of 4 int + Padding value for convolution layer. + dilation : int or tuple/list of 2 int, default 1 + Dilation value for convolution layer. + groups : int, default 1 + Number of groups. + bias : bool, default False + Whether the layer uses a bias vector. + use_bn : bool, default True + Whether to use BatchNorm layer. + bn_eps : float, default 1e-5 + Small float added to variance in Batch norm. + activation : function or str or None, default nn.ReLU() + Activation function or name of activation function. + """ + + def __init__(self, + in_channels, + out_channels, + kernel_size, + stride, + padding, + dilation=1, + groups=1, + bias=False, + use_bn=True, + bn_eps=1e-5, + activation=nn.ReLU()): + super(ConvBlock, self).__init__() + self.activate = (activation is not None) + self.use_bn = use_bn + self.use_pad = (isinstance(padding, (list, tuple)) and + (len(padding) == 4)) + + if self.use_pad: + self.pad = padding + self.conv = nn.Conv2D( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=kernel_size, + stride=stride, + padding=padding, + dilation=dilation, + groups=groups, + bias_attr=bias, + weight_attr=None) + if self.use_bn: + self.bn = nn.BatchNorm2D(num_features=out_channels, epsilon=bn_eps) + if self.activate: + self.activ = get_activation_layer(activation) + + def forward(self, x): + x = self.conv(x) + if self.use_bn: + x = self.bn(x) + if self.activate: + x = self.activ(x) + return x + + +class SEBlock(nn.Layer): + def __init__(self, + channels, + reduction=16, + mid_channels=None, + round_mid=False, + use_conv=True, + mid_activation=nn.ReLU(), + out_activation=nn.Sigmoid()): + super(SEBlock, self).__init__() + self.use_conv = use_conv + if mid_channels is None: + mid_channels = channels // reduction if not round_mid else round_channels( + float(channels) / reduction) + + self.pool = nn.AdaptiveAvgPool2D(output_size=1) + if use_conv: + self.conv1 = nn.Conv2D( + in_channels=channels, + out_channels=mid_channels, + kernel_size=1, + stride=1, + groups=1, + bias_attr=True, + weight_attr=None) + + else: + self.fc1 = nn.Linear( + in_features=channels, out_features=mid_channels) + self.activ = get_activation_layer(mid_activation) + if use_conv: + self.conv2 = nn.Conv2D( + in_channels=mid_channels, + out_channels=channels, + kernel_size=1, + stride=1, + groups=1, + bias_attr=True, + weight_attr=None) + else: + self.fc2 = nn.Linear( + in_features=mid_channels, out_features=channels) + self.sigmoid = get_activation_layer(out_activation) + + def forward(self, x): + w = self.pool(x) + if not self.use_conv: + w = w.reshape(shape=[w.shape[0], -1]) + w = self.conv1(w) if self.use_conv else self.fc1(w) + w = self.activ(w) + w = self.conv2(w) if self.use_conv else self.fc2(w) + w = self.sigmoid(w) + if not self.use_conv: + w = w.unsqueeze(2).unsqueeze(3) + x = x * w + return x + + +class MixConv(nn.Layer): + """ + Mixed convolution layer from 'MixConv: Mixed Depthwise Convolutional Kernels,' + https://arxiv.org/abs/1907.09595. + + Parameters: + ---------- + in_channels : int + Number of input channels. + out_channels : int + Number of output channels. + kernel_size : int or tuple/list of int, or tuple/list of tuple/list of 2 int + Convolution window size. + stride : int or tuple/list of 2 int + Strides of the convolution. + padding : int or tuple/list of int, or tuple/list of tuple/list of 2 int + Padding value for convolution layer. + dilation : int or tuple/list of 2 int, default 1 + Dilation value for convolution layer. + groups : int, default 1 + Number of groups. + bias : bool, default False + Whether the layer uses a bias vector. + axis : int, default 1 + The axis on which to concatenate the outputs. + """ + + def __init__(self, + in_channels, + out_channels, + kernel_size, + stride, + padding, + dilation=1, + groups=1, + bias=False, + axis=1): + super(MixConv, self).__init__() + kernel_size = kernel_size if isinstance(kernel_size, + list) else [kernel_size] + padding = padding if isinstance(padding, list) else [padding] + kernel_count = len(kernel_size) + self.splitted_in_channels = self.split_channels(in_channels, + kernel_count) + splitted_out_channels = self.split_channels(out_channels, kernel_count) + for i, kernel_size_i in enumerate(kernel_size): + in_channels_i = self.splitted_in_channels[i] + out_channels_i = splitted_out_channels[i] + padding_i = padding[i] + _ = self.add_sublayer( + name=str(i), + sublayer=nn.Conv2D( + in_channels=in_channels_i, + out_channels=out_channels_i, + kernel_size=kernel_size_i, + stride=stride, + padding=padding_i, + dilation=dilation, + groups=(out_channels_i + if out_channels == groups else groups), + bias_attr=bias, + weight_attr=None)) + self.axis = axis + + def forward(self, x): + xx = paddle.split(x, self.splitted_in_channels, axis=self.axis) + xx = paddle.split(x, self.splitted_in_channels, axis=self.axis) + out = [ + conv_i(x_i) for x_i, conv_i in zip(xx, self._sub_layers.values()) + ] + x = paddle.concat(tuple(out), axis=self.axis) + return x + + @staticmethod + def split_channels(channels, kernel_count): + splitted_channels = [channels // kernel_count] * kernel_count + splitted_channels[0] += channels - sum(splitted_channels) + return splitted_channels + + +class MixConvBlock(nn.Layer): + """ + Mixed convolution block with Batch normalization and activation. + + Parameters: + ---------- + in_channels : int + Number of input channels. + out_channels : int + Number of output channels. + kernel_size : int or tuple/list of int, or tuple/list of tuple/list of 2 int + Convolution window size. + stride : int or tuple/list of 2 int + Strides of the convolution. + padding : int or tuple/list of int, or tuple/list of tuple/list of 2 int + Padding value for convolution layer. + dilation : int or tuple/list of 2 int, default 1 + Dilation value for convolution layer. + groups : int, default 1 + Number of groups. + bias : bool, default False + Whether the layer uses a bias vector. + use_bn : bool, default True + Whether to use BatchNorm layer. + bn_eps : float, default 1e-5 + Small float added to variance in Batch norm. + activation : function or str or None, default nn.ReLU() + Activation function or name of activation function. + activate : bool, default True + Whether activate the convolution block. + """ + + def __init__(self, + in_channels, + out_channels, + kernel_size, + stride, + padding, + dilation=1, + groups=1, + bias=False, + use_bn=True, + bn_eps=1e-5, + activation=nn.ReLU()): + super(MixConvBlock, self).__init__() + self.activate = (activation is not None) + self.use_bn = use_bn + + self.conv = MixConv( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=kernel_size, + stride=stride, + padding=padding, + dilation=dilation, + groups=groups, + bias=bias) + if self.use_bn: + self.bn = nn.BatchNorm2D(num_features=out_channels, epsilon=bn_eps) + if self.activate: + self.activ = get_activation_layer(activation) + + def forward(self, x): + x = self.conv(x) + if self.use_bn: + x = self.bn(x) + if self.activate: + x = self.activ(x) + return x + + +def mixconv1x1_block(in_channels, + out_channels, + kernel_count, + stride=1, + groups=1, + bias=False, + use_bn=True, + bn_eps=1e-5, + activation=nn.ReLU()): + """ + 1x1 version of the mixed convolution block. + + Parameters: + ---------- + in_channels : int + Number of input channels. + out_channels : int + Number of output channels. + kernel_count : int + Kernel count. + stride : int or tuple/list of 2 int, default 1 + Strides of the convolution. + groups : int, default 1 + Number of groups. + bias : bool, default False + Whether the layer uses a bias vector. + use_bn : bool, default True + Whether to use BatchNorm layer. + bn_eps : float, default 1e-5 + Small float added to variance in Batch norm. + activation : function or str, or None, default nn.ReLU() + Activation function or name of activation function. + """ + return MixConvBlock( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=([1] * kernel_count), + stride=stride, + padding=([0] * kernel_count), + groups=groups, + bias=bias, + use_bn=use_bn, + bn_eps=bn_eps, + activation=activation) + + +class MixUnit(nn.Layer): + """ + MixNet unit. + + Parameters: + ---------- + in_channels : int + Number of input channels. + out_channels : int + Number of output channels. exp_channels : int + Number of middle (expanded) channels. + stride : int or tuple/list of 2 int + Strides of the second convolution layer. + exp_kernel_count : int + Expansion convolution kernel count for each unit. + conv1_kernel_count : int + Conv1 kernel count for each unit. + conv2_kernel_count : int + Conv2 kernel count for each unit. + exp_factor : int + Expansion factor for each unit. + se_factor : int + SE reduction factor for each unit. + activation : str + Activation function or name of activation function. + """ + + def __init__(self, in_channels, out_channels, stride, exp_kernel_count, + conv1_kernel_count, conv2_kernel_count, exp_factor, se_factor, + activation): + super(MixUnit, self).__init__() + assert exp_factor >= 1 + assert se_factor >= 0 + self.residual = (in_channels == out_channels) and (stride == 1) + self.use_se = se_factor > 0 + mid_channels = exp_factor * in_channels + self.use_exp_conv = exp_factor > 1 + + if self.use_exp_conv: + if exp_kernel_count == 1: + self.exp_conv = ConvBlock( + in_channels=in_channels, + out_channels=mid_channels, + kernel_size=1, + stride=1, + padding=0, + groups=1, + bias=False, + use_bn=True, + bn_eps=1e-5, + activation=activation) + else: + self.exp_conv = mixconv1x1_block( + in_channels=in_channels, + out_channels=mid_channels, + kernel_count=exp_kernel_count, + activation=activation) + if conv1_kernel_count == 1: + self.conv1 = ConvBlock( + in_channels=mid_channels, + out_channels=mid_channels, + kernel_size=3, + stride=stride, + padding=1, + dilation=1, + groups=mid_channels, + bias=False, + use_bn=True, + bn_eps=1e-5, + activation=activation) + else: + self.conv1 = MixConvBlock( + in_channels=mid_channels, + out_channels=mid_channels, + kernel_size=[3 + 2 * i for i in range(conv1_kernel_count)], + stride=stride, + padding=[1 + i for i in range(conv1_kernel_count)], + groups=mid_channels, + activation=activation) + if self.use_se: + self.se = SEBlock( + channels=mid_channels, + reduction=(exp_factor * se_factor), + round_mid=False, + mid_activation=activation) + if conv2_kernel_count == 1: + self.conv2 = ConvBlock( + in_channels=mid_channels, + out_channels=out_channels, + activation=None, + kernel_size=1, + stride=1, + padding=0, + groups=1, + bias=False, + use_bn=True, + bn_eps=1e-5) + else: + self.conv2 = mixconv1x1_block( + in_channels=mid_channels, + out_channels=out_channels, + kernel_count=conv2_kernel_count, + activation=None) + + def forward(self, x): + if self.residual: + identity = x + if self.use_exp_conv: + x = self.exp_conv(x) + x = self.conv1(x) + if self.use_se: + x = self.se(x) + x = self.conv2(x) + if self.residual: + x = x + identity + return x + + +class MixInitBlock(nn.Layer): + """ + MixNet specific initial block. + + Parameters: + ---------- + in_channels : int + Number of input channels. + out_channels : int + Number of output channels. + """ + + def __init__(self, in_channels, out_channels): + super(MixInitBlock, self).__init__() + self.conv1 = ConvBlock( + in_channels=in_channels, + out_channels=out_channels, + stride=2, + kernel_size=3, + padding=1) + self.conv2 = MixUnit( + in_channels=out_channels, + out_channels=out_channels, + stride=1, + exp_kernel_count=1, + conv1_kernel_count=1, + conv2_kernel_count=1, + exp_factor=1, + se_factor=0, + activation="relu") + + def forward(self, x): + x = self.conv1(x) + x = self.conv2(x) + return x + + +class MixNet(nn.Layer): + """ + MixNet model from 'MixConv: Mixed Depthwise Convolutional Kernels,' + https://arxiv.org/abs/1907.09595. + + Parameters: + ---------- + channels : list of list of int + Number of output channels for each unit. + init_block_channels : int + Number of output channels for the initial unit. + final_block_channels : int + Number of output channels for the final block of the feature extractor. + exp_kernel_counts : list of list of int + Expansion convolution kernel count for each unit. + conv1_kernel_counts : list of list of int + Conv1 kernel count for each unit. + conv2_kernel_counts : list of list of int + Conv2 kernel count for each unit. + exp_factors : list of list of int + Expansion factor for each unit. + se_factors : list of list of int + SE reduction factor for each unit. + in_channels : int, default 3 + Number of input channels. + in_size : tuple of two ints, default (224, 224) + Spatial size of the expected input image. + class_num : int, default 1000 + Number of classification classes. + """ + + def __init__(self, + channels, + init_block_channels, + final_block_channels, + exp_kernel_counts, + conv1_kernel_counts, + conv2_kernel_counts, + exp_factors, + se_factors, + in_channels=3, + in_size=(224, 224), + class_num=1000): + super(MixNet, self).__init__() + self.in_size = in_size + self.class_num = class_num + + self.features = nn.Sequential() + self.features.add_sublayer( + "init_block", + MixInitBlock( + in_channels=in_channels, out_channels=init_block_channels)) + in_channels = init_block_channels + for i, channels_per_stage in enumerate(channels): + stage = nn.Sequential() + for j, out_channels in enumerate(channels_per_stage): + stride = 2 if ((j == 0) and (i != 3)) or ( + (j == len(channels_per_stage) // 2) and (i == 3)) else 1 + exp_kernel_count = exp_kernel_counts[i][j] + conv1_kernel_count = conv1_kernel_counts[i][j] + conv2_kernel_count = conv2_kernel_counts[i][j] + exp_factor = exp_factors[i][j] + se_factor = se_factors[i][j] + activation = "relu" if i == 0 else "swish" + stage.add_sublayer( + "unit{}".format(j + 1), + MixUnit( + in_channels=in_channels, + out_channels=out_channels, + stride=stride, + exp_kernel_count=exp_kernel_count, + conv1_kernel_count=conv1_kernel_count, + conv2_kernel_count=conv2_kernel_count, + exp_factor=exp_factor, + se_factor=se_factor, + activation=activation)) + in_channels = out_channels + self.features.add_sublayer("stage{}".format(i + 1), stage) + self.features.add_sublayer( + "final_block", + ConvBlock( + in_channels=in_channels, + out_channels=final_block_channels, + kernel_size=1, + stride=1, + padding=0, + groups=1, + bias=False, + use_bn=True, + bn_eps=1e-5, + activation=nn.ReLU())) + in_channels = final_block_channels + self.features.add_sublayer( + "final_pool", nn.AvgPool2D( + kernel_size=7, stride=1)) + + self.output = nn.Linear( + in_features=in_channels, out_features=class_num) + + def forward(self, x): + x = self.features(x) + reshape_dim = reduce(lambda x, y: x * y, x.shape[1:]) + x = x.reshape(shape=[x.shape[0], reshape_dim]) + x = self.output(x) + return x + + +def get_mixnet(version, width_scale, model_name=None, **kwargs): + """ + Create MixNet model with specific parameters. + + Parameters: + ---------- + version : str + Version of MobileNetV3 ('s' or 'm'). + width_scale : float + Scale factor for width of layers. + model_name : str or None, default None + Model name. + """ + + if version == "s": + init_block_channels = 16 + channels = [[24, 24], [40, 40, 40, 40], [80, 80, 80], + [120, 120, 120, 200, 200, 200]] + exp_kernel_counts = [[2, 2], [1, 2, 2, 2], [1, 1, 1], + [2, 2, 2, 1, 1, 1]] + conv1_kernel_counts = [[1, 1], [3, 2, 2, 2], [3, 2, 2], + [3, 4, 4, 5, 4, 4]] + conv2_kernel_counts = [[2, 2], [1, 2, 2, 2], [2, 2, 2], + [2, 2, 2, 1, 2, 2]] + exp_factors = [[6, 3], [6, 6, 6, 6], [6, 6, 6], [6, 3, 3, 6, 6, 6]] + se_factors = [[0, 0], [2, 2, 2, 2], [4, 4, 4], [2, 2, 2, 2, 2, 2]] + elif version == "m": + init_block_channels = 24 + channels = [[32, 32], [40, 40, 40, 40], [80, 80, 80, 80], + [120, 120, 120, 120, 200, 200, 200, 200]] + exp_kernel_counts = [[2, 2], [1, 2, 2, 2], [1, 2, 2, 2], + [1, 2, 2, 2, 1, 1, 1, 1]] + conv1_kernel_counts = [[3, 1], [4, 2, 2, 2], [3, 4, 4, 4], + [1, 4, 4, 4, 4, 4, 4, 4]] + conv2_kernel_counts = [[2, 2], [1, 2, 2, 2], [1, 2, 2, 2], + [1, 2, 2, 2, 1, 2, 2, 2]] + exp_factors = [[6, 3], [6, 6, 6, 6], [6, 6, 6, 6], + [6, 3, 3, 3, 6, 6, 6, 6]] + se_factors = [[0, 0], [2, 2, 2, 2], [4, 4, 4, 4], + [2, 2, 2, 2, 2, 2, 2, 2]] + else: + raise ValueError("Unsupported MixNet version {}".format(version)) + + final_block_channels = 1536 + + if width_scale != 1.0: + channels = [[round_channels(cij * width_scale) for cij in ci] + for ci in channels] + init_block_channels = round_channels(init_block_channels * width_scale) + + net = MixNet( + channels=channels, + init_block_channels=init_block_channels, + final_block_channels=final_block_channels, + exp_kernel_counts=exp_kernel_counts, + conv1_kernel_counts=conv1_kernel_counts, + conv2_kernel_counts=conv2_kernel_counts, + exp_factors=exp_factors, + se_factors=se_factors, + **kwargs) + + return net + + +def _load_pretrained(pretrained, model, model_url, use_ssld=False): + if pretrained is False: + pass + elif pretrained is True: + load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld) + elif isinstance(pretrained, str): + load_dygraph_pretrain(model, pretrained) + else: + raise RuntimeError( + "pretrained type is not available. Please use `string` or `boolean` type." + ) + + +def MixNet_S(pretrained=False, use_ssld=False, **kwargs): + """ + MixNet-S model from 'MixConv: Mixed Depthwise Convolutional Kernels,' + https://arxiv.org/abs/1907.09595. + """ + model = get_mixnet( + version="s", width_scale=1.0, model_name="MixNet_S", **kwargs) + _load_pretrained( + pretrained, model, MODEL_URLS["MixNet_S"], use_ssld=use_ssld) + return model + + +def MixNet_M(pretrained=False, use_ssld=False, **kwargs): + """ + MixNet-M model from 'MixConv: Mixed Depthwise Convolutional Kernels,' + https://arxiv.org/abs/1907.09595. + """ + model = get_mixnet( + version="m", width_scale=1.0, model_name="MixNet_M", **kwargs) + _load_pretrained( + pretrained, model, MODEL_URLS["MixNet_M"], use_ssld=use_ssld) + return model + + +def MixNet_L(pretrained=False, use_ssld=False, **kwargs): + """ + MixNet-S model from 'MixConv: Mixed Depthwise Convolutional Kernels,' + https://arxiv.org/abs/1907.09595. + """ + model = get_mixnet( + version="m", width_scale=1.3, model_name="MixNet_L", **kwargs) + _load_pretrained( + pretrained, model, MODEL_URLS["MixNet_L"], use_ssld=use_ssld) + return model diff --git a/src/PaddleClas/ppcls/arch/backbone/model_zoo/mobilenet_v2.py b/src/PaddleClas/ppcls/arch/backbone/model_zoo/mobilenet_v2.py new file mode 100644 index 0000000..b32c025 --- /dev/null +++ b/src/PaddleClas/ppcls/arch/backbone/model_zoo/mobilenet_v2.py @@ -0,0 +1,287 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np +import paddle +from paddle import ParamAttr +import paddle.nn as nn +import paddle.nn.functional as F +from paddle.nn import Conv2D, BatchNorm, Linear, Dropout +from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D + +import math + +from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url + +MODEL_URLS = { + "MobileNetV2_x0_25": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV2_x0_25_pretrained.pdparams", + "MobileNetV2_x0_5": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV2_x0_5_pretrained.pdparams", + "MobileNetV2_x0_75": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV2_x0_75_pretrained.pdparams", + "MobileNetV2": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV2_pretrained.pdparams", + "MobileNetV2_x1_5": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV2_x1_5_pretrained.pdparams", + "MobileNetV2_x2_0": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV2_x2_0_pretrained.pdparams" +} + +__all__ = list(MODEL_URLS.keys()) + + +class ConvBNLayer(nn.Layer): + def __init__(self, + num_channels, + filter_size, + num_filters, + stride, + padding, + channels=None, + num_groups=1, + name=None, + use_cudnn=True): + super(ConvBNLayer, self).__init__() + + self._conv = Conv2D( + in_channels=num_channels, + out_channels=num_filters, + kernel_size=filter_size, + stride=stride, + padding=padding, + groups=num_groups, + weight_attr=ParamAttr(name=name + "_weights"), + bias_attr=False) + + self._batch_norm = BatchNorm( + num_filters, + param_attr=ParamAttr(name=name + "_bn_scale"), + bias_attr=ParamAttr(name=name + "_bn_offset"), + moving_mean_name=name + "_bn_mean", + moving_variance_name=name + "_bn_variance") + + def forward(self, inputs, if_act=True): + y = self._conv(inputs) + y = self._batch_norm(y) + if if_act: + y = F.relu6(y) + return y + + +class InvertedResidualUnit(nn.Layer): + def __init__(self, num_channels, num_in_filter, num_filters, stride, + filter_size, padding, expansion_factor, name): + super(InvertedResidualUnit, self).__init__() + num_expfilter = int(round(num_in_filter * expansion_factor)) + self._expand_conv = ConvBNLayer( + num_channels=num_channels, + num_filters=num_expfilter, + filter_size=1, + stride=1, + padding=0, + num_groups=1, + name=name + "_expand") + + self._bottleneck_conv = ConvBNLayer( + num_channels=num_expfilter, + num_filters=num_expfilter, + filter_size=filter_size, + stride=stride, + padding=padding, + num_groups=num_expfilter, + use_cudnn=False, + name=name + "_dwise") + + self._linear_conv = ConvBNLayer( + num_channels=num_expfilter, + num_filters=num_filters, + filter_size=1, + stride=1, + padding=0, + num_groups=1, + name=name + "_linear") + + def forward(self, inputs, ifshortcut): + y = self._expand_conv(inputs, if_act=True) + y = self._bottleneck_conv(y, if_act=True) + y = self._linear_conv(y, if_act=False) + if ifshortcut: + y = paddle.add(inputs, y) + return y + + +class InvresiBlocks(nn.Layer): + def __init__(self, in_c, t, c, n, s, name): + super(InvresiBlocks, self).__init__() + + self._first_block = InvertedResidualUnit( + num_channels=in_c, + num_in_filter=in_c, + num_filters=c, + stride=s, + filter_size=3, + padding=1, + expansion_factor=t, + name=name + "_1") + + self._block_list = [] + for i in range(1, n): + block = self.add_sublayer( + name + "_" + str(i + 1), + sublayer=InvertedResidualUnit( + num_channels=c, + num_in_filter=c, + num_filters=c, + stride=1, + filter_size=3, + padding=1, + expansion_factor=t, + name=name + "_" + str(i + 1))) + self._block_list.append(block) + + def forward(self, inputs): + y = self._first_block(inputs, ifshortcut=False) + for block in self._block_list: + y = block(y, ifshortcut=True) + return y + + +class MobileNet(nn.Layer): + def __init__(self, class_num=1000, scale=1.0, prefix_name=""): + super(MobileNet, self).__init__() + self.scale = scale + self.class_num = class_num + + bottleneck_params_list = [ + (1, 16, 1, 1), + (6, 24, 2, 2), + (6, 32, 3, 2), + (6, 64, 4, 2), + (6, 96, 3, 1), + (6, 160, 3, 2), + (6, 320, 1, 1), + ] + + self.conv1 = ConvBNLayer( + num_channels=3, + num_filters=int(32 * scale), + filter_size=3, + stride=2, + padding=1, + name=prefix_name + "conv1_1") + + self.block_list = [] + i = 1 + in_c = int(32 * scale) + for layer_setting in bottleneck_params_list: + t, c, n, s = layer_setting + i += 1 + block = self.add_sublayer( + prefix_name + "conv" + str(i), + sublayer=InvresiBlocks( + in_c=in_c, + t=t, + c=int(c * scale), + n=n, + s=s, + name=prefix_name + "conv" + str(i))) + self.block_list.append(block) + in_c = int(c * scale) + + self.out_c = int(1280 * scale) if scale > 1.0 else 1280 + self.conv9 = ConvBNLayer( + num_channels=in_c, + num_filters=self.out_c, + filter_size=1, + stride=1, + padding=0, + name=prefix_name + "conv9") + + self.pool2d_avg = AdaptiveAvgPool2D(1) + + self.out = Linear( + self.out_c, + class_num, + weight_attr=ParamAttr(name=prefix_name + "fc10_weights"), + bias_attr=ParamAttr(name=prefix_name + "fc10_offset")) + + def forward(self, inputs): + y = self.conv1(inputs, if_act=True) + for block in self.block_list: + y = block(y) + y = self.conv9(y, if_act=True) + y = self.pool2d_avg(y) + y = paddle.flatten(y, start_axis=1, stop_axis=-1) + y = self.out(y) + return y + + +def _load_pretrained(pretrained, model, model_url, use_ssld=False): + if pretrained is False: + pass + elif pretrained is True: + load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld) + elif isinstance(pretrained, str): + load_dygraph_pretrain(model, pretrained) + else: + raise RuntimeError( + "pretrained type is not available. Please use `string` or `boolean` type." + ) + + +def MobileNetV2_x0_25(pretrained=False, use_ssld=False, **kwargs): + model = MobileNet(scale=0.25, **kwargs) + _load_pretrained( + pretrained, model, MODEL_URLS["MobileNetV2_x0_25"], use_ssld=use_ssld) + return model + + +def MobileNetV2_x0_5(pretrained=False, use_ssld=False, **kwargs): + model = MobileNet(scale=0.5, **kwargs) + _load_pretrained( + pretrained, model, MODEL_URLS["MobileNetV2_x0_5"], use_ssld=use_ssld) + return model + + +def MobileNetV2_x0_75(pretrained=False, use_ssld=False, **kwargs): + model = MobileNet(scale=0.75, **kwargs) + _load_pretrained( + pretrained, model, MODEL_URLS["MobileNetV2_x0_75"], use_ssld=use_ssld) + return model + + +def MobileNetV2(pretrained=False, use_ssld=False, **kwargs): + model = MobileNet(scale=1.0, **kwargs) + _load_pretrained( + pretrained, model, MODEL_URLS["MobileNetV2"], use_ssld=use_ssld) + return model + + +def MobileNetV2_x1_5(pretrained=False, use_ssld=False, **kwargs): + model = MobileNet(scale=1.5, **kwargs) + _load_pretrained( + pretrained, model, MODEL_URLS["MobileNetV2_x1_5"], use_ssld=use_ssld) + return model + + +def MobileNetV2_x2_0(pretrained=False, use_ssld=False, **kwargs): + model = MobileNet(scale=2.0, **kwargs) + _load_pretrained( + pretrained, model, MODEL_URLS["MobileNetV2_x2_0"], use_ssld=use_ssld) + return model diff --git a/src/PaddleClas/ppcls/arch/backbone/model_zoo/pvt_v2.py b/src/PaddleClas/ppcls/arch/backbone/model_zoo/pvt_v2.py new file mode 100644 index 0000000..e2fdfd4 --- /dev/null +++ b/src/PaddleClas/ppcls/arch/backbone/model_zoo/pvt_v2.py @@ -0,0 +1,492 @@ +# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Code was heavily based on https://github.com/whai362/PVT + +from functools import partial +import math +import paddle +import paddle.nn as nn +import paddle.nn.functional as F +from paddle.nn.initializer import TruncatedNormal, Constant + +from .vision_transformer import trunc_normal_, zeros_, ones_, to_2tuple, DropPath, Identity, drop_path + +from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url + +MODEL_URLS = { + "PVT_V2_B0": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/PVT_V2_B0_pretrained.pdparams", + "PVT_V2_B1": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/PVT_V2_B1_pretrained.pdparams", + "PVT_V2_B2": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/PVT_V2_B2_pretrained.pdparams", + "PVT_V2_B2_Linear": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/PVT_V2_B2_Linear_pretrained.pdparams", + "PVT_V2_B3": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/PVT_V2_B3_pretrained.pdparams", + "PVT_V2_B4": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/PVT_V2_B4_pretrained.pdparams", + "PVT_V2_B5": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/PVT_V2_B5_pretrained.pdparams", +} + +__all__ = list(MODEL_URLS.keys()) + + +@paddle.jit.not_to_static +def swapdim(x, dim1, dim2): + a = list(range(len(x.shape))) + a[dim1], a[dim2] = a[dim2], a[dim1] + return x.transpose(a) + + +class Mlp(nn.Layer): + def __init__(self, + in_features, + hidden_features=None, + out_features=None, + act_layer=nn.GELU, + drop=0., + linear=False): + super().__init__() + out_features = out_features or in_features + hidden_features = hidden_features or in_features + self.fc1 = nn.Linear(in_features, hidden_features) + self.dwconv = DWConv(hidden_features) + self.act = act_layer() + self.fc2 = nn.Linear(hidden_features, out_features) + self.drop = nn.Dropout(drop) + self.linear = linear + if self.linear: + self.relu = nn.ReLU() + + def forward(self, x, H, W): + x = self.fc1(x) + if self.linear: + x = self.relu(x) + x = self.dwconv(x, H, W) + x = self.act(x) + x = self.drop(x) + x = self.fc2(x) + x = self.drop(x) + return x + + +class Attention(nn.Layer): + def __init__(self, + dim, + num_heads=8, + qkv_bias=False, + qk_scale=None, + attn_drop=0., + proj_drop=0., + sr_ratio=1, + linear=False): + super().__init__() + assert dim % num_heads == 0 + + self.dim = dim + self.num_heads = num_heads + head_dim = dim // num_heads + self.scale = qk_scale or head_dim**-0.5 + + self.q = nn.Linear(dim, dim, bias_attr=qkv_bias) + self.kv = nn.Linear(dim, dim * 2, bias_attr=qkv_bias) + self.attn_drop = nn.Dropout(attn_drop) + self.proj = nn.Linear(dim, dim) + self.proj_drop = nn.Dropout(proj_drop) + + self.linear = linear + self.sr_ratio = sr_ratio + if not linear: + if sr_ratio > 1: + self.sr = nn.Conv2D( + dim, dim, kernel_size=sr_ratio, stride=sr_ratio) + self.norm = nn.LayerNorm(dim) + else: + self.pool = nn.AdaptiveAvgPool2D(7) + self.sr = nn.Conv2D(dim, dim, kernel_size=1, stride=1) + self.norm = nn.LayerNorm(dim) + self.act = nn.GELU() + + def forward(self, x, H, W): + B, N, C = x.shape + q = self.q(x).reshape( + [B, N, self.num_heads, C // self.num_heads]).transpose( + [0, 2, 1, 3]) + + if not self.linear: + if self.sr_ratio > 1: + x_ = x.transpose([0, 2, 1]).reshape([B, C, H, W]) + x_ = self.sr(x_) + h_, w_ = x_.shape[-2:] + x_ = x_.reshape([B, C, h_ * w_]).transpose([0, 2, 1]) + x_ = self.norm(x_) + kv = self.kv(x_) + kv = kv.reshape([ + B, kv.shape[2] * kv.shape[1] // 2 // C, 2, self.num_heads, + C // self.num_heads + ]).transpose([2, 0, 3, 1, 4]) + else: + kv = self.kv(x) + kv = kv.reshape([ + B, kv.shape[2] * kv.shape[1] // 2 // C, 2, self.num_heads, + C // self.num_heads + ]).transpose([2, 0, 3, 1, 4]) + else: + x_ = x.transpose([0, 2, 1]).reshape([B, C, H, W]) + x_ = self.sr(self.pool(x_)) + x_ = x_.reshape([B, C, x_.shape[2] * x_.shape[3]]).transpose( + [0, 2, 1]) + x_ = self.norm(x_) + x_ = self.act(x_) + kv = self.kv(x_) + kv = kv.reshape([ + B, kv.shape[2] * kv.shape[1] // 2 // C, 2, self.num_heads, + C // self.num_heads + ]).transpose([2, 0, 3, 1, 4]) + k, v = kv[0], kv[1] + + attn = (q @swapdim(k, -2, -1)) * self.scale + attn = F.softmax(attn, axis=-1) + attn = self.attn_drop(attn) + + x = swapdim((attn @v), 1, 2).reshape([B, N, C]) + x = self.proj(x) + x = self.proj_drop(x) + + return x + + +class Block(nn.Layer): + def __init__(self, + dim, + num_heads, + mlp_ratio=4., + qkv_bias=False, + qk_scale=None, + drop=0., + attn_drop=0., + drop_path=0., + act_layer=nn.GELU, + norm_layer=nn.LayerNorm, + sr_ratio=1, + linear=False): + super().__init__() + self.norm1 = norm_layer(dim) + self.attn = Attention( + dim, + num_heads=num_heads, + qkv_bias=qkv_bias, + qk_scale=qk_scale, + attn_drop=attn_drop, + proj_drop=drop, + sr_ratio=sr_ratio, + linear=linear) + self.drop_path = DropPath(drop_path) if drop_path > 0. else Identity() + self.norm2 = norm_layer(dim) + mlp_hidden_dim = int(dim * mlp_ratio) + self.mlp = Mlp(in_features=dim, + hidden_features=mlp_hidden_dim, + act_layer=act_layer, + drop=drop, + linear=linear) + + def forward(self, x, H, W): + x = x + self.drop_path(self.attn(self.norm1(x), H, W)) + x = x + self.drop_path(self.mlp(self.norm2(x), H, W)) + + return x + + +class OverlapPatchEmbed(nn.Layer): + """ Image to Patch Embedding + """ + + def __init__(self, + img_size=224, + patch_size=7, + stride=4, + in_chans=3, + embed_dim=768): + super().__init__() + img_size = to_2tuple(img_size) + patch_size = to_2tuple(patch_size) + + self.img_size = img_size + self.patch_size = patch_size + self.H, self.W = img_size[0] // patch_size[0], img_size[ + 1] // patch_size[1] + self.num_patches = self.H * self.W + self.proj = nn.Conv2D( + in_chans, + embed_dim, + kernel_size=patch_size, + stride=stride, + padding=(patch_size[0] // 2, patch_size[1] // 2)) + self.norm = nn.LayerNorm(embed_dim) + + def forward(self, x): + x = self.proj(x) + _, _, H, W = x.shape + x = x.flatten(2) + x = swapdim(x, 1, 2) + x = self.norm(x) + + return x, H, W + + +class PyramidVisionTransformerV2(nn.Layer): + def __init__(self, + img_size=224, + patch_size=16, + in_chans=3, + class_num=1000, + embed_dims=[64, 128, 256, 512], + num_heads=[1, 2, 4, 8], + mlp_ratios=[4, 4, 4, 4], + qkv_bias=False, + qk_scale=None, + drop_rate=0., + attn_drop_rate=0., + drop_path_rate=0., + norm_layer=nn.LayerNorm, + depths=[3, 4, 6, 3], + sr_ratios=[8, 4, 2, 1], + num_stages=4, + linear=False): + super().__init__() + self.class_num = class_num + self.depths = depths + self.num_stages = num_stages + + dpr = [x for x in paddle.linspace(0, drop_path_rate, sum(depths)) + ] # stochastic depth decay rule + cur = 0 + + for i in range(num_stages): + patch_embed = OverlapPatchEmbed( + img_size=img_size if i == 0 else img_size // (2**(i + 1)), + patch_size=7 if i == 0 else 3, + stride=4 if i == 0 else 2, + in_chans=in_chans if i == 0 else embed_dims[i - 1], + embed_dim=embed_dims[i]) + + block = nn.LayerList([ + Block( + dim=embed_dims[i], + num_heads=num_heads[i], + mlp_ratio=mlp_ratios[i], + qkv_bias=qkv_bias, + qk_scale=qk_scale, + drop=drop_rate, + attn_drop=attn_drop_rate, + drop_path=dpr[cur + j], + norm_layer=norm_layer, + sr_ratio=sr_ratios[i], + linear=linear) for j in range(depths[i]) + ]) + norm = norm_layer(embed_dims[i]) + cur += depths[i] + + setattr(self, f"patch_embed{i + 1}", patch_embed) + setattr(self, f"block{i + 1}", block) + setattr(self, f"norm{i + 1}", norm) + + # classification head + self.head = nn.Linear(embed_dims[3], + class_num) if class_num > 0 else Identity() + + self.apply(self._init_weights) + + def _init_weights(self, m): + if isinstance(m, nn.Linear): + trunc_normal_(m.weight) + if isinstance(m, nn.Linear) and m.bias is not None: + zeros_(m.bias) + elif isinstance(m, nn.LayerNorm): + zeros_(m.bias) + ones_(m.weight) + + def forward_features(self, x): + B = x.shape[0] + + for i in range(self.num_stages): + patch_embed = getattr(self, f"patch_embed{i + 1}") + block = getattr(self, f"block{i + 1}") + norm = getattr(self, f"norm{i + 1}") + x, H, W = patch_embed(x) + for blk in block: + x = blk(x, H, W) + x = norm(x) + if i != self.num_stages - 1: + x = x.reshape([B, H, W, x.shape[2]]).transpose([0, 3, 1, 2]) + + return x.mean(axis=1) + + def forward(self, x): + x = self.forward_features(x) + x = self.head(x) + + return x + + +class DWConv(nn.Layer): + def __init__(self, dim=768): + super().__init__() + self.dwconv = nn.Conv2D(dim, dim, 3, 1, 1, bias_attr=True, groups=dim) + + def forward(self, x, H, W): + B, N, C = x.shape + x = swapdim(x, 1, 2) + x = x.reshape([B, C, H, W]) + x = self.dwconv(x) + x = x.flatten(2) + x = swapdim(x, 1, 2) + + return x + + +def _load_pretrained(pretrained, model, model_url, use_ssld=False): + if pretrained is False: + pass + elif pretrained is True: + load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld) + elif isinstance(pretrained, str): + load_dygraph_pretrain(model, pretrained) + else: + raise RuntimeError( + "pretrained type is not available. Please use `string` or `boolean` type." + ) + + +def PVT_V2_B0(pretrained=False, use_ssld=False, **kwargs): + model = PyramidVisionTransformerV2( + patch_size=4, + embed_dims=[32, 64, 160, 256], + num_heads=[1, 2, 5, 8], + mlp_ratios=[8, 8, 4, 4], + qkv_bias=True, + norm_layer=partial( + nn.LayerNorm, epsilon=1e-6), + depths=[2, 2, 2, 2], + sr_ratios=[8, 4, 2, 1], + **kwargs) + _load_pretrained( + pretrained, model, MODEL_URLS["PVT_V2_B0"], use_ssld=use_ssld) + return model + + +def PVT_V2_B1(pretrained=False, use_ssld=False, **kwargs): + model = PyramidVisionTransformerV2( + patch_size=4, + embed_dims=[64, 128, 320, 512], + num_heads=[1, 2, 5, 8], + mlp_ratios=[8, 8, 4, 4], + qkv_bias=True, + norm_layer=partial( + nn.LayerNorm, epsilon=1e-6), + depths=[2, 2, 2, 2], + sr_ratios=[8, 4, 2, 1], + **kwargs) + _load_pretrained( + pretrained, model, MODEL_URLS["PVT_V2_B1"], use_ssld=use_ssld) + return model + + +def PVT_V2_B2(pretrained=False, use_ssld=False, **kwargs): + model = PyramidVisionTransformerV2( + patch_size=4, + embed_dims=[64, 128, 320, 512], + num_heads=[1, 2, 5, 8], + mlp_ratios=[8, 8, 4, 4], + qkv_bias=True, + norm_layer=partial( + nn.LayerNorm, epsilon=1e-6), + depths=[3, 4, 6, 3], + sr_ratios=[8, 4, 2, 1], + **kwargs) + _load_pretrained( + pretrained, model, MODEL_URLS["PVT_V2_B2"], use_ssld=use_ssld) + return model + + +def PVT_V2_B3(pretrained=False, use_ssld=False, **kwargs): + model = PyramidVisionTransformerV2( + patch_size=4, + embed_dims=[64, 128, 320, 512], + num_heads=[1, 2, 5, 8], + mlp_ratios=[8, 8, 4, 4], + qkv_bias=True, + norm_layer=partial( + nn.LayerNorm, epsilon=1e-6), + depths=[3, 4, 18, 3], + sr_ratios=[8, 4, 2, 1], + **kwargs) + _load_pretrained( + pretrained, model, MODEL_URLS["PVT_V2_B3"], use_ssld=use_ssld) + return model + + +def PVT_V2_B4(pretrained=False, use_ssld=False, **kwargs): + model = PyramidVisionTransformerV2( + patch_size=4, + embed_dims=[64, 128, 320, 512], + num_heads=[1, 2, 5, 8], + mlp_ratios=[8, 8, 4, 4], + qkv_bias=True, + norm_layer=partial( + nn.LayerNorm, epsilon=1e-6), + depths=[3, 8, 27, 3], + sr_ratios=[8, 4, 2, 1], + **kwargs) + _load_pretrained( + pretrained, model, MODEL_URLS["PVT_V2_B4"], use_ssld=use_ssld) + return model + + +def PVT_V2_B5(pretrained=False, use_ssld=False, **kwargs): + model = PyramidVisionTransformerV2( + patch_size=4, + embed_dims=[64, 128, 320, 512], + num_heads=[1, 2, 5, 8], + mlp_ratios=[4, 4, 4, 4], + qkv_bias=True, + norm_layer=partial( + nn.LayerNorm, epsilon=1e-6), + depths=[3, 6, 40, 3], + sr_ratios=[8, 4, 2, 1], + **kwargs) + _load_pretrained( + pretrained, model, MODEL_URLS["PVT_V2_B5"], use_ssld=use_ssld) + return model + + +def PVT_V2_B2_Linear(pretrained=False, use_ssld=False, **kwargs): + model = PyramidVisionTransformerV2( + patch_size=4, + embed_dims=[64, 128, 320, 512], + num_heads=[1, 2, 5, 8], + mlp_ratios=[8, 8, 4, 4], + qkv_bias=True, + norm_layer=partial( + nn.LayerNorm, epsilon=1e-6), + depths=[3, 4, 6, 3], + sr_ratios=[8, 4, 2, 1], + linear=True, + **kwargs) + _load_pretrained( + pretrained, model, MODEL_URLS["PVT_V2_B2_Linear"], use_ssld=use_ssld) + return model diff --git a/src/PaddleClas/ppcls/arch/backbone/model_zoo/rednet.py b/src/PaddleClas/ppcls/arch/backbone/model_zoo/rednet.py new file mode 100644 index 0000000..be84da1 --- /dev/null +++ b/src/PaddleClas/ppcls/arch/backbone/model_zoo/rednet.py @@ -0,0 +1,203 @@ +# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Code was based on https://github.com/d-li14/involution + +import paddle +import paddle.nn as nn + +from paddle.vision.models import resnet + +from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url + +MODEL_URLS = { + "RedNet26": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RedNet26_pretrained.pdparams", + "RedNet38": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RedNet38_pretrained.pdparams", + "RedNet50": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RedNet50_pretrained.pdparams", + "RedNet101": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RedNet101_pretrained.pdparams", + "RedNet152": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RedNet152_pretrained.pdparams" +} + +__all__ = MODEL_URLS.keys() + + +class Involution(nn.Layer): + def __init__(self, channels, kernel_size, stride): + super(Involution, self).__init__() + self.kernel_size = kernel_size + self.stride = stride + self.channels = channels + reduction_ratio = 4 + self.group_channels = 16 + self.groups = self.channels // self.group_channels + self.conv1 = nn.Sequential( + ('conv', nn.Conv2D( + in_channels=channels, + out_channels=channels // reduction_ratio, + kernel_size=1, + bias_attr=False)), + ('bn', nn.BatchNorm2D(channels // reduction_ratio)), + ('activate', nn.ReLU())) + self.conv2 = nn.Sequential(('conv', nn.Conv2D( + in_channels=channels // reduction_ratio, + out_channels=kernel_size**2 * self.groups, + kernel_size=1, + stride=1))) + if stride > 1: + self.avgpool = nn.AvgPool2D(stride, stride) + + def forward(self, x): + weight = self.conv2( + self.conv1(x if self.stride == 1 else self.avgpool(x))) + b, c, h, w = weight.shape + weight = weight.reshape( + (b, self.groups, self.kernel_size**2, h, w)).unsqueeze(2) + + out = nn.functional.unfold(x, self.kernel_size, self.stride, + (self.kernel_size - 1) // 2, 1) + out = out.reshape( + (b, self.groups, self.group_channels, self.kernel_size**2, h, w)) + out = (weight * out).sum(axis=3).reshape((b, self.channels, h, w)) + return out + + +class BottleneckBlock(resnet.BottleneckBlock): + def __init__(self, + inplanes, + planes, + stride=1, + downsample=None, + groups=1, + base_width=64, + dilation=1, + norm_layer=None): + super(BottleneckBlock, self).__init__(inplanes, planes, stride, + downsample, groups, base_width, + dilation, norm_layer) + width = int(planes * (base_width / 64.)) * groups + self.conv2 = Involution(width, 7, stride) + + +class RedNet(resnet.ResNet): + def __init__(self, block, depth, class_num=1000, with_pool=True): + super(RedNet, self).__init__( + block=block, depth=50, num_classes=class_num, with_pool=with_pool) + layer_cfg = { + 26: [1, 2, 4, 1], + 38: [2, 3, 5, 2], + 50: [3, 4, 6, 3], + 101: [3, 4, 23, 3], + 152: [3, 8, 36, 3] + } + layers = layer_cfg[depth] + + self.conv1 = None + self.bn1 = None + self.relu = None + self.inplanes = 64 + self.class_num = class_num + self.stem = nn.Sequential( + nn.Sequential( + ('conv', nn.Conv2D( + in_channels=3, + out_channels=self.inplanes // 2, + kernel_size=3, + stride=2, + padding=1, + bias_attr=False)), + ('bn', nn.BatchNorm2D(self.inplanes // 2)), + ('activate', nn.ReLU())), + Involution(self.inplanes // 2, 3, 1), + nn.BatchNorm2D(self.inplanes // 2), + nn.ReLU(), + nn.Sequential( + ('conv', nn.Conv2D( + in_channels=self.inplanes // 2, + out_channels=self.inplanes, + kernel_size=3, + stride=1, + padding=1, + bias_attr=False)), ('bn', nn.BatchNorm2D(self.inplanes)), + ('activate', nn.ReLU()))) + + self.layer1 = self._make_layer(block, 64, layers[0]) + self.layer2 = self._make_layer(block, 128, layers[1], stride=2) + self.layer3 = self._make_layer(block, 256, layers[2], stride=2) + self.layer4 = self._make_layer(block, 512, layers[3], stride=2) + + def forward(self, x): + x = self.stem(x) + x = self.maxpool(x) + + x = self.layer1(x) + x = self.layer2(x) + x = self.layer3(x) + x = self.layer4(x) + + if self.with_pool: + x = self.avgpool(x) + + if self.class_num > 0: + x = paddle.flatten(x, 1) + x = self.fc(x) + + return x + + +def _load_pretrained(pretrained, model, model_url, use_ssld=False): + if pretrained is False: + pass + elif pretrained is True: + load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld) + elif isinstance(pretrained, str): + load_dygraph_pretrain(model, pretrained) + else: + raise RuntimeError( + "pretrained type is not available. Please use `string` or `boolean` type." + ) + + +def RedNet26(pretrained=False, **kwargs): + model = RedNet(BottleneckBlock, 26, **kwargs) + _load_pretrained(pretrained, model, MODEL_URLS["RedNet26"]) + return model + + +def RedNet38(pretrained=False, **kwargs): + model = RedNet(BottleneckBlock, 38, **kwargs) + _load_pretrained(pretrained, model, MODEL_URLS["RedNet38"]) + return model + + +def RedNet50(pretrained=False, **kwargs): + model = RedNet(BottleneckBlock, 50, **kwargs) + _load_pretrained(pretrained, model, MODEL_URLS["RedNet50"]) + return model + + +def RedNet101(pretrained=False, **kwargs): + model = RedNet(BottleneckBlock, 101, **kwargs) + _load_pretrained(pretrained, model, MODEL_URLS["RedNet101"]) + return model + + +def RedNet152(pretrained=False, **kwargs): + model = RedNet(BottleneckBlock, 152, **kwargs) + _load_pretrained(pretrained, model, MODEL_URLS["RedNet152"]) + return model diff --git a/src/PaddleClas/ppcls/arch/backbone/model_zoo/regnet.py b/src/PaddleClas/ppcls/arch/backbone/model_zoo/regnet.py new file mode 100644 index 0000000..dc381cb --- /dev/null +++ b/src/PaddleClas/ppcls/arch/backbone/model_zoo/regnet.py @@ -0,0 +1,431 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Code was based on https://github.com/facebookresearch/pycls + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np +import paddle +from paddle import ParamAttr +import paddle.nn as nn +import paddle.nn.functional as F +from paddle.nn import Conv2D, BatchNorm, Linear, Dropout +from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D +from paddle.nn.initializer import Uniform +import math + +from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url + +MODEL_URLS = { + "RegNetX_200MF": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RegNetX_200MF_pretrained.pdparams", + "RegNetX_4GF": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RegNetX_4GF_pretrained.pdparams", + "RegNetX_32GF": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RegNetX_32GF_pretrained.pdparams", + "RegNetY_200MF": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RegNetY_200MF_pretrained.pdparams", + "RegNetY_4GF": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RegNetY_4GF_pretrained.pdparams", + "RegNetY_32GF": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RegNetY_32GF_pretrained.pdparams", +} + +__all__ = list(MODEL_URLS.keys()) + + +def quantize_float(f, q): + """Converts a float to closest non-zero int divisible by q.""" + return int(round(f / q) * q) + + +def adjust_ws_gs_comp(ws, bms, gs): + """Adjusts the compatibility of widths and groups.""" + ws_bot = [int(w * b) for w, b in zip(ws, bms)] + gs = [min(g, w_bot) for g, w_bot in zip(gs, ws_bot)] + ws_bot = [quantize_float(w_bot, g) for w_bot, g in zip(ws_bot, gs)] + ws = [int(w_bot / b) for w_bot, b in zip(ws_bot, bms)] + return ws, gs + + +def get_stages_from_blocks(ws, rs): + """Gets ws/ds of network at each stage from per block values.""" + ts = [ + w != wp or r != rp + for w, wp, r, rp in zip(ws + [0], [0] + ws, rs + [0], [0] + rs) + ] + s_ws = [w for w, t in zip(ws, ts[:-1]) if t] + s_ds = np.diff([d for d, t in zip(range(len(ts)), ts) if t]).tolist() + return s_ws, s_ds + + +def generate_regnet(w_a, w_0, w_m, d, q=8): + """Generates per block ws from RegNet parameters.""" + assert w_a >= 0 and w_0 > 0 and w_m > 1 and w_0 % q == 0 + ws_cont = np.arange(d) * w_a + w_0 + ks = np.round(np.log(ws_cont / w_0) / np.log(w_m)) + ws = w_0 * np.power(w_m, ks) + ws = np.round(np.divide(ws, q)) * q + num_stages, max_stage = len(np.unique(ws)), ks.max() + 1 + ws, ws_cont = ws.astype(int).tolist(), ws_cont.tolist() + return ws, num_stages, max_stage, ws_cont + + +class ConvBNLayer(nn.Layer): + def __init__(self, + num_channels, + num_filters, + filter_size, + stride=1, + groups=1, + padding=0, + act=None, + name=None): + super(ConvBNLayer, self).__init__() + + self._conv = Conv2D( + in_channels=num_channels, + out_channels=num_filters, + kernel_size=filter_size, + stride=stride, + padding=padding, + groups=groups, + weight_attr=ParamAttr(name=name + ".conv2d.output.1.w_0"), + bias_attr=ParamAttr(name=name + ".conv2d.output.1.b_0")) + bn_name = name + "_bn" + self._batch_norm = BatchNorm( + num_filters, + act=act, + param_attr=ParamAttr(name=bn_name + ".output.1.w_0"), + bias_attr=ParamAttr(bn_name + ".output.1.b_0"), + moving_mean_name=bn_name + "_mean", + moving_variance_name=bn_name + "_variance") + + def forward(self, inputs): + y = self._conv(inputs) + y = self._batch_norm(y) + return y + + +class BottleneckBlock(nn.Layer): + def __init__(self, + num_channels, + num_filters, + stride, + bm, + gw, + se_on, + se_r, + shortcut=True, + name=None): + super(BottleneckBlock, self).__init__() + + # Compute the bottleneck width + w_b = int(round(num_filters * bm)) + # Compute the number of groups + num_gs = w_b // gw + self.se_on = se_on + self.conv0 = ConvBNLayer( + num_channels=num_channels, + num_filters=w_b, + filter_size=1, + padding=0, + act="relu", + name=name + "_branch2a") + self.conv1 = ConvBNLayer( + num_channels=w_b, + num_filters=w_b, + filter_size=3, + stride=stride, + padding=1, + groups=num_gs, + act="relu", + name=name + "_branch2b") + if se_on: + w_se = int(round(num_channels * se_r)) + self.se_block = SELayer( + num_channels=w_b, + num_filters=w_b, + reduction_ratio=w_se, + name=name + "_branch2se") + self.conv2 = ConvBNLayer( + num_channels=w_b, + num_filters=num_filters, + filter_size=1, + act=None, + name=name + "_branch2c") + + if not shortcut: + self.short = ConvBNLayer( + num_channels=num_channels, + num_filters=num_filters, + filter_size=1, + stride=stride, + name=name + "_branch1") + + self.shortcut = shortcut + + def forward(self, inputs): + y = self.conv0(inputs) + conv1 = self.conv1(y) + if self.se_on: + conv1 = self.se_block(conv1) + conv2 = self.conv2(conv1) + + if self.shortcut: + short = inputs + else: + short = self.short(inputs) + + y = paddle.add(x=short, y=conv2) + y = F.relu(y) + return y + + +class SELayer(nn.Layer): + def __init__(self, num_channels, num_filters, reduction_ratio, name=None): + super(SELayer, self).__init__() + + self.pool2d_gap = AdaptiveAvgPool2D(1) + + self._num_channels = num_channels + + med_ch = int(num_channels / reduction_ratio) + stdv = 1.0 / math.sqrt(num_channels * 1.0) + self.squeeze = Linear( + num_channels, + med_ch, + weight_attr=ParamAttr( + initializer=Uniform(-stdv, stdv), name=name + "_sqz_weights"), + bias_attr=ParamAttr(name=name + "_sqz_offset")) + + stdv = 1.0 / math.sqrt(med_ch * 1.0) + self.excitation = Linear( + med_ch, + num_filters, + weight_attr=ParamAttr( + initializer=Uniform(-stdv, stdv), name=name + "_exc_weights"), + bias_attr=ParamAttr(name=name + "_exc_offset")) + + def forward(self, input): + pool = self.pool2d_gap(input) + pool = paddle.reshape(pool, shape=[-1, self._num_channels]) + squeeze = self.squeeze(pool) + squeeze = F.relu(squeeze) + excitation = self.excitation(squeeze) + excitation = F.sigmoid(excitation) + excitation = paddle.reshape( + excitation, shape=[-1, self._num_channels, 1, 1]) + out = input * excitation + return out + + +class RegNet(nn.Layer): + def __init__(self, + w_a, + w_0, + w_m, + d, + group_w, + bot_mul, + q=8, + se_on=False, + class_num=1000): + super(RegNet, self).__init__() + + # Generate RegNet ws per block + b_ws, num_s, max_s, ws_cont = generate_regnet(w_a, w_0, w_m, d, q) + # Convert to per stage format + ws, ds = get_stages_from_blocks(b_ws, b_ws) + # Generate group widths and bot muls + gws = [group_w for _ in range(num_s)] + bms = [bot_mul for _ in range(num_s)] + # Adjust the compatibility of ws and gws + ws, gws = adjust_ws_gs_comp(ws, bms, gws) + # Use the same stride for each stage + ss = [2 for _ in range(num_s)] + # Use SE for RegNetY + se_r = 0.25 + # Construct the model + # Group params by stage + stage_params = list(zip(ds, ws, ss, bms, gws)) + # Construct the stem + stem_type = "simple_stem_in" + stem_w = 32 + block_type = "res_bottleneck_block" + + self.conv = ConvBNLayer( + num_channels=3, + num_filters=stem_w, + filter_size=3, + stride=2, + padding=1, + act="relu", + name="stem_conv") + + self.block_list = [] + for block, (d, w_out, stride, bm, gw) in enumerate(stage_params): + shortcut = False + for i in range(d): + num_channels = stem_w if block == i == 0 else in_channels + # Stride apply to the first block of the stage + b_stride = stride if i == 0 else 1 + conv_name = "s" + str(block + 1) + "_b" + str(i + + 1) # chr(97 + i) + bottleneck_block = self.add_sublayer( + conv_name, + BottleneckBlock( + num_channels=num_channels, + num_filters=w_out, + stride=b_stride, + bm=bm, + gw=gw, + se_on=se_on, + se_r=se_r, + shortcut=shortcut, + name=conv_name)) + in_channels = w_out + self.block_list.append(bottleneck_block) + shortcut = True + + self.pool2d_avg = AdaptiveAvgPool2D(1) + + self.pool2d_avg_channels = w_out + + stdv = 1.0 / math.sqrt(self.pool2d_avg_channels * 1.0) + + self.out = Linear( + self.pool2d_avg_channels, + class_num, + weight_attr=ParamAttr( + initializer=Uniform(-stdv, stdv), name="fc_0.w_0"), + bias_attr=ParamAttr(name="fc_0.b_0")) + + def forward(self, inputs): + y = self.conv(inputs) + for block in self.block_list: + y = block(y) + y = self.pool2d_avg(y) + y = paddle.reshape(y, shape=[-1, self.pool2d_avg_channels]) + y = self.out(y) + return y + + +def _load_pretrained(pretrained, model, model_url, use_ssld=False): + if pretrained is False: + pass + elif pretrained is True: + load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld) + elif isinstance(pretrained, str): + load_dygraph_pretrain(model, pretrained) + else: + raise RuntimeError( + "pretrained type is not available. Please use `string` or `boolean` type." + ) + + +def RegNetX_200MF(pretrained=False, use_ssld=False, **kwargs): + model = RegNet( + w_a=36.44, + w_0=24, + w_m=2.49, + d=13, + group_w=8, + bot_mul=1.0, + q=8, + **kwargs) + _load_pretrained( + pretrained, model, MODEL_URLS["RegNetX_200MF"], use_ssld=use_ssld) + return model + + +def RegNetX_4GF(pretrained=False, use_ssld=False, **kwargs): + model = RegNet( + w_a=38.65, + w_0=96, + w_m=2.43, + d=23, + group_w=40, + bot_mul=1.0, + q=8, + **kwargs) + _load_pretrained( + pretrained, model, MODEL_URLS["RegNetX_4GF"], use_ssld=use_ssld) + return model + + +def RegNetX_32GF(pretrained=False, use_ssld=False, **kwargs): + model = RegNet( + w_a=69.86, + w_0=320, + w_m=2.0, + d=23, + group_w=168, + bot_mul=1.0, + q=8, + **kwargs) + _load_pretrained( + pretrained, model, MODEL_URLS["RegNetX_32GF"], use_ssld=use_ssld) + return model + + +def RegNetY_200MF(pretrained=False, use_ssld=False, **kwargs): + model = RegNet( + w_a=36.44, + w_0=24, + w_m=2.49, + d=13, + group_w=8, + bot_mul=1.0, + q=8, + se_on=True, + **kwargs) + _load_pretrained( + pretrained, model, MODEL_URLS["RegNetX_32GF"], use_ssld=use_ssld) + return model + + +def RegNetY_4GF(pretrained=False, use_ssld=False, **kwargs): + model = RegNet( + w_a=31.41, + w_0=96, + w_m=2.24, + d=22, + group_w=64, + bot_mul=1.0, + q=8, + se_on=True, + **kwargs) + _load_pretrained( + pretrained, model, MODEL_URLS["RegNetX_32GF"], use_ssld=use_ssld) + return model + + +def RegNetY_32GF(pretrained=False, use_ssld=False, **kwargs): + model = RegNet( + w_a=115.89, + w_0=232, + w_m=2.53, + d=20, + group_w=232, + bot_mul=1.0, + q=8, + se_on=True, + **kwargs) + _load_pretrained( + pretrained, model, MODEL_URLS["RegNetX_32GF"], use_ssld=use_ssld) + return model diff --git a/src/PaddleClas/ppcls/arch/backbone/model_zoo/repvgg.py b/src/PaddleClas/ppcls/arch/backbone/model_zoo/repvgg.py new file mode 100644 index 0000000..1218be7 --- /dev/null +++ b/src/PaddleClas/ppcls/arch/backbone/model_zoo/repvgg.py @@ -0,0 +1,382 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Code was based on https://github.com/DingXiaoH/RepVGG + +import paddle.nn as nn +import paddle +import numpy as np + +from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url + +MODEL_URLS = { + "RepVGG_A0": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_A0_pretrained.pdparams", + "RepVGG_A1": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_A1_pretrained.pdparams", + "RepVGG_A2": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_A2_pretrained.pdparams", + "RepVGG_B0": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_B0_pretrained.pdparams", + "RepVGG_B1": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_B1_pretrained.pdparams", + "RepVGG_B2": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_B2_pretrained.pdparams", + "RepVGG_B1g2": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_B1g2_pretrained.pdparams", + "RepVGG_B1g4": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_B1g4_pretrained.pdparams", + "RepVGG_B2g4": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_B2g4_pretrained.pdparams", + "RepVGG_B3g4": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_B3g4_pretrained.pdparams", +} + +__all__ = list(MODEL_URLS.keys()) + +optional_groupwise_layers = [2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26] +g2_map = {l: 2 for l in optional_groupwise_layers} +g4_map = {l: 4 for l in optional_groupwise_layers} + + +class ConvBN(nn.Layer): + def __init__(self, + in_channels, + out_channels, + kernel_size, + stride, + padding, + groups=1): + super(ConvBN, self).__init__() + self.conv = nn.Conv2D( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=kernel_size, + stride=stride, + padding=padding, + groups=groups, + bias_attr=False) + self.bn = nn.BatchNorm2D(num_features=out_channels) + + def forward(self, x): + y = self.conv(x) + y = self.bn(y) + return y + + +class RepVGGBlock(nn.Layer): + def __init__(self, + in_channels, + out_channels, + kernel_size, + stride=1, + padding=0, + dilation=1, + groups=1, + padding_mode='zeros'): + super(RepVGGBlock, self).__init__() + self.is_repped = False + + self.in_channels = in_channels + self.out_channels = out_channels + self.kernel_size = kernel_size + self.stride = stride + self.padding = padding + self.dilation = dilation + self.groups = groups + self.padding_mode = padding_mode + + assert kernel_size == 3 + assert padding == 1 + + padding_11 = padding - kernel_size // 2 + + self.nonlinearity = nn.ReLU() + + self.rbr_identity = nn.BatchNorm2D( + num_features=in_channels + ) if out_channels == in_channels and stride == 1 else None + self.rbr_dense = ConvBN( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=kernel_size, + stride=stride, + padding=padding, + groups=groups) + self.rbr_1x1 = ConvBN( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=1, + stride=stride, + padding=padding_11, + groups=groups) + + def forward(self, inputs): + if not self.training and not self.is_repped: + self.rep() + self.is_repped = True + if self.training and self.is_repped: + self.is_repped = False + + if not self.training: + return self.nonlinearity(self.rbr_reparam(inputs)) + + if self.rbr_identity is None: + id_out = 0 + else: + id_out = self.rbr_identity(inputs) + return self.nonlinearity( + self.rbr_dense(inputs) + self.rbr_1x1(inputs) + id_out) + + def rep(self): + if not hasattr(self, 'rbr_reparam'): + self.rbr_reparam = nn.Conv2D( + in_channels=self.in_channels, + out_channels=self.out_channels, + kernel_size=self.kernel_size, + stride=self.stride, + padding=self.padding, + dilation=self.dilation, + groups=self.groups, + padding_mode=self.padding_mode) + kernel, bias = self.get_equivalent_kernel_bias() + self.rbr_reparam.weight.set_value(kernel) + self.rbr_reparam.bias.set_value(bias) + + def get_equivalent_kernel_bias(self): + kernel3x3, bias3x3 = self._fuse_bn_tensor(self.rbr_dense) + kernel1x1, bias1x1 = self._fuse_bn_tensor(self.rbr_1x1) + kernelid, biasid = self._fuse_bn_tensor(self.rbr_identity) + return kernel3x3 + self._pad_1x1_to_3x3_tensor( + kernel1x1) + kernelid, bias3x3 + bias1x1 + biasid + + def _pad_1x1_to_3x3_tensor(self, kernel1x1): + if kernel1x1 is None: + return 0 + else: + return nn.functional.pad(kernel1x1, [1, 1, 1, 1]) + + def _fuse_bn_tensor(self, branch): + if branch is None: + return 0, 0 + if isinstance(branch, ConvBN): + kernel = branch.conv.weight + running_mean = branch.bn._mean + running_var = branch.bn._variance + gamma = branch.bn.weight + beta = branch.bn.bias + eps = branch.bn._epsilon + else: + assert isinstance(branch, nn.BatchNorm2D) + if not hasattr(self, 'id_tensor'): + input_dim = self.in_channels // self.groups + kernel_value = np.zeros( + (self.in_channels, input_dim, 3, 3), dtype=np.float32) + for i in range(self.in_channels): + kernel_value[i, i % input_dim, 1, 1] = 1 + self.id_tensor = paddle.to_tensor(kernel_value) + kernel = self.id_tensor + running_mean = branch._mean + running_var = branch._variance + gamma = branch.weight + beta = branch.bias + eps = branch._epsilon + std = (running_var + eps).sqrt() + t = (gamma / std).reshape((-1, 1, 1, 1)) + return kernel * t, beta - running_mean * gamma / std + + +class RepVGG(nn.Layer): + def __init__(self, + num_blocks, + width_multiplier=None, + override_groups_map=None, + class_num=1000): + super(RepVGG, self).__init__() + + assert len(width_multiplier) == 4 + self.override_groups_map = override_groups_map or dict() + + assert 0 not in self.override_groups_map + + self.in_planes = min(64, int(64 * width_multiplier[0])) + + self.stage0 = RepVGGBlock( + in_channels=3, + out_channels=self.in_planes, + kernel_size=3, + stride=2, + padding=1) + self.cur_layer_idx = 1 + self.stage1 = self._make_stage( + int(64 * width_multiplier[0]), num_blocks[0], stride=2) + self.stage2 = self._make_stage( + int(128 * width_multiplier[1]), num_blocks[1], stride=2) + self.stage3 = self._make_stage( + int(256 * width_multiplier[2]), num_blocks[2], stride=2) + self.stage4 = self._make_stage( + int(512 * width_multiplier[3]), num_blocks[3], stride=2) + self.gap = nn.AdaptiveAvgPool2D(output_size=1) + self.linear = nn.Linear(int(512 * width_multiplier[3]), class_num) + + def _make_stage(self, planes, num_blocks, stride): + strides = [stride] + [1] * (num_blocks - 1) + blocks = [] + for stride in strides: + cur_groups = self.override_groups_map.get(self.cur_layer_idx, 1) + blocks.append( + RepVGGBlock( + in_channels=self.in_planes, + out_channels=planes, + kernel_size=3, + stride=stride, + padding=1, + groups=cur_groups)) + self.in_planes = planes + self.cur_layer_idx += 1 + return nn.Sequential(*blocks) + + def forward(self, x): + out = self.stage0(x) + out = self.stage1(out) + out = self.stage2(out) + out = self.stage3(out) + out = self.stage4(out) + out = self.gap(out) + out = paddle.flatten(out, start_axis=1) + out = self.linear(out) + return out + + +def _load_pretrained(pretrained, model, model_url, use_ssld=False): + if pretrained is False: + pass + elif pretrained is True: + load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld) + elif isinstance(pretrained, str): + load_dygraph_pretrain(model, pretrained) + else: + raise RuntimeError( + "pretrained type is not available. Please use `string` or `boolean` type." + ) + + +def RepVGG_A0(pretrained=False, use_ssld=False, **kwargs): + model = RepVGG( + num_blocks=[2, 4, 14, 1], + width_multiplier=[0.75, 0.75, 0.75, 2.5], + override_groups_map=None, + **kwargs) + _load_pretrained( + pretrained, model, MODEL_URLS["RepVGG_A0"], use_ssld=use_ssld) + return model + + +def RepVGG_A1(pretrained=False, use_ssld=False, **kwargs): + model = RepVGG( + num_blocks=[2, 4, 14, 1], + width_multiplier=[1, 1, 1, 2.5], + override_groups_map=None, + **kwargs) + _load_pretrained( + pretrained, model, MODEL_URLS["RepVGG_A1"], use_ssld=use_ssld) + return model + + +def RepVGG_A2(pretrained=False, use_ssld=False, **kwargs): + model = RepVGG( + num_blocks=[2, 4, 14, 1], + width_multiplier=[1.5, 1.5, 1.5, 2.75], + override_groups_map=None, + **kwargs) + _load_pretrained( + pretrained, model, MODEL_URLS["RepVGG_A2"], use_ssld=use_ssld) + return model + + +def RepVGG_B0(pretrained=False, use_ssld=False, **kwargs): + model = RepVGG( + num_blocks=[4, 6, 16, 1], + width_multiplier=[1, 1, 1, 2.5], + override_groups_map=None, + **kwargs) + _load_pretrained( + pretrained, model, MODEL_URLS["RepVGG_B0"], use_ssld=use_ssld) + return model + + +def RepVGG_B1(pretrained=False, use_ssld=False, **kwargs): + model = RepVGG( + num_blocks=[4, 6, 16, 1], + width_multiplier=[2, 2, 2, 4], + override_groups_map=None, + **kwargs) + _load_pretrained( + pretrained, model, MODEL_URLS["RepVGG_B1"], use_ssld=use_ssld) + return model + + +def RepVGG_B1g2(pretrained=False, use_ssld=False, **kwargs): + model = RepVGG( + num_blocks=[4, 6, 16, 1], + width_multiplier=[2, 2, 2, 4], + override_groups_map=g2_map, + **kwargs) + _load_pretrained( + pretrained, model, MODEL_URLS["RepVGG_B1g2"], use_ssld=use_ssld) + return model + + +def RepVGG_B1g4(pretrained=False, use_ssld=False, **kwargs): + model = RepVGG( + num_blocks=[4, 6, 16, 1], + width_multiplier=[2, 2, 2, 4], + override_groups_map=g4_map, + **kwargs) + _load_pretrained( + pretrained, model, MODEL_URLS["RepVGG_B1g4"], use_ssld=use_ssld) + return model + + +def RepVGG_B2(pretrained=False, use_ssld=False, **kwargs): + model = RepVGG( + num_blocks=[4, 6, 16, 1], + width_multiplier=[2.5, 2.5, 2.5, 5], + override_groups_map=None, + **kwargs) + _load_pretrained( + pretrained, model, MODEL_URLS["RepVGG_B2"], use_ssld=use_ssld) + return model + + +def RepVGG_B2g4(pretrained=False, use_ssld=False, **kwargs): + model = RepVGG( + num_blocks=[4, 6, 16, 1], + width_multiplier=[2.5, 2.5, 2.5, 5], + override_groups_map=g4_map, + **kwargs) + _load_pretrained( + pretrained, model, MODEL_URLS["RepVGG_B2g4"], use_ssld=use_ssld) + return model + + +def RepVGG_B3g4(pretrained=False, use_ssld=False, **kwargs): + model = RepVGG( + num_blocks=[4, 6, 16, 1], + width_multiplier=[3, 3, 3, 5], + override_groups_map=g4_map, + **kwargs) + _load_pretrained( + pretrained, model, MODEL_URLS["RepVGG_B3g4"], use_ssld=use_ssld) + return model diff --git a/src/PaddleClas/ppcls/arch/backbone/model_zoo/res2net.py b/src/PaddleClas/ppcls/arch/backbone/model_zoo/res2net.py new file mode 100644 index 0000000..191cc84 --- /dev/null +++ b/src/PaddleClas/ppcls/arch/backbone/model_zoo/res2net.py @@ -0,0 +1,264 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np +import paddle +from paddle import ParamAttr +import paddle.nn as nn +import paddle.nn.functional as F +from paddle.nn import Conv2D, BatchNorm, Linear, Dropout +from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D +from paddle.nn.initializer import Uniform + +import math + +from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url + +MODEL_URLS = { + "Res2Net50_26w_4s": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/Res2Net50_26w_4s_pretrained.pdparams", + "Res2Net50_14w_8s": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/Res2Net50_14w_8s_pretrained.pdparams", +} + +__all__ = list(MODEL_URLS.keys()) + + +class ConvBNLayer(nn.Layer): + def __init__( + self, + num_channels, + num_filters, + filter_size, + stride=1, + groups=1, + act=None, + name=None, ): + super(ConvBNLayer, self).__init__() + + self._conv = Conv2D( + in_channels=num_channels, + out_channels=num_filters, + kernel_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + groups=groups, + weight_attr=ParamAttr(name=name + "_weights"), + bias_attr=False) + if name == "conv1": + bn_name = "bn_" + name + else: + bn_name = "bn" + name[3:] + self._batch_norm = BatchNorm( + num_filters, + act=act, + param_attr=ParamAttr(name=bn_name + '_scale'), + bias_attr=ParamAttr(bn_name + '_offset'), + moving_mean_name=bn_name + '_mean', + moving_variance_name=bn_name + '_variance') + + def forward(self, inputs): + y = self._conv(inputs) + y = self._batch_norm(y) + return y + + +class BottleneckBlock(nn.Layer): + def __init__(self, + num_channels1, + num_channels2, + num_filters, + stride, + scales, + shortcut=True, + if_first=False, + name=None): + super(BottleneckBlock, self).__init__() + self.stride = stride + self.scales = scales + self.conv0 = ConvBNLayer( + num_channels=num_channels1, + num_filters=num_filters, + filter_size=1, + act='relu', + name=name + "_branch2a") + self.conv1_list = [] + for s in range(scales - 1): + conv1 = self.add_sublayer( + name + '_branch2b_' + str(s + 1), + ConvBNLayer( + num_channels=num_filters // scales, + num_filters=num_filters // scales, + filter_size=3, + stride=stride, + act='relu', + name=name + '_branch2b_' + str(s + 1))) + self.conv1_list.append(conv1) + self.pool2d_avg = AvgPool2D(kernel_size=3, stride=stride, padding=1) + + self.conv2 = ConvBNLayer( + num_channels=num_filters, + num_filters=num_channels2, + filter_size=1, + act=None, + name=name + "_branch2c") + + if not shortcut: + self.short = ConvBNLayer( + num_channels=num_channels1, + num_filters=num_channels2, + filter_size=1, + stride=stride, + name=name + "_branch1") + + self.shortcut = shortcut + + def forward(self, inputs): + y = self.conv0(inputs) + xs = paddle.split(y, self.scales, 1) + ys = [] + for s, conv1 in enumerate(self.conv1_list): + if s == 0 or self.stride == 2: + ys.append(conv1(xs[s])) + else: + ys.append(conv1(paddle.add(xs[s], ys[-1]))) + if self.stride == 1: + ys.append(xs[-1]) + else: + ys.append(self.pool2d_avg(xs[-1])) + conv1 = paddle.concat(ys, axis=1) + conv2 = self.conv2(conv1) + + if self.shortcut: + short = inputs + else: + short = self.short(inputs) + y = paddle.add(x=short, y=conv2) + y = F.relu(y) + return y + + +class Res2Net(nn.Layer): + def __init__(self, layers=50, scales=4, width=26, class_num=1000): + super(Res2Net, self).__init__() + + self.layers = layers + self.scales = scales + self.width = width + basic_width = self.width * self.scales + supported_layers = [50, 101, 152, 200] + assert layers in supported_layers, \ + "supported layers are {} but input layer is {}".format( + supported_layers, layers) + + if layers == 50: + depth = [3, 4, 6, 3] + elif layers == 101: + depth = [3, 4, 23, 3] + elif layers == 152: + depth = [3, 8, 36, 3] + elif layers == 200: + depth = [3, 12, 48, 3] + num_channels = [64, 256, 512, 1024] + num_channels2 = [256, 512, 1024, 2048] + num_filters = [basic_width * t for t in [1, 2, 4, 8]] + + self.conv1 = ConvBNLayer( + num_channels=3, + num_filters=64, + filter_size=7, + stride=2, + act='relu', + name="conv1") + self.pool2d_max = MaxPool2D(kernel_size=3, stride=2, padding=1) + + self.block_list = [] + for block in range(len(depth)): + shortcut = False + for i in range(depth[block]): + if layers in [101, 152] and block == 2: + if i == 0: + conv_name = "res" + str(block + 2) + "a" + else: + conv_name = "res" + str(block + 2) + "b" + str(i) + else: + conv_name = "res" + str(block + 2) + chr(97 + i) + bottleneck_block = self.add_sublayer( + 'bb_%d_%d' % (block, i), + BottleneckBlock( + num_channels1=num_channels[block] + if i == 0 else num_channels2[block], + num_channels2=num_channels2[block], + num_filters=num_filters[block], + stride=2 if i == 0 and block != 0 else 1, + scales=scales, + shortcut=shortcut, + if_first=block == i == 0, + name=conv_name)) + self.block_list.append(bottleneck_block) + shortcut = True + + self.pool2d_avg = AdaptiveAvgPool2D(1) + + self.pool2d_avg_channels = num_channels[-1] * 2 + + stdv = 1.0 / math.sqrt(self.pool2d_avg_channels * 1.0) + + self.out = Linear( + self.pool2d_avg_channels, + class_num, + weight_attr=ParamAttr( + initializer=Uniform(-stdv, stdv), name="fc_weights"), + bias_attr=ParamAttr(name="fc_offset")) + + def forward(self, inputs): + y = self.conv1(inputs) + y = self.pool2d_max(y) + for block in self.block_list: + y = block(y) + y = self.pool2d_avg(y) + y = paddle.reshape(y, shape=[-1, self.pool2d_avg_channels]) + y = self.out(y) + return y + + +def _load_pretrained(pretrained, model, model_url, use_ssld=False): + if pretrained is False: + pass + elif pretrained is True: + load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld) + elif isinstance(pretrained, str): + load_dygraph_pretrain(model, pretrained) + else: + raise RuntimeError( + "pretrained type is not available. Please use `string` or `boolean` type." + ) + + +def Res2Net50_26w_4s(pretrained=False, use_ssld=False, **kwargs): + model = Res2Net(layers=50, scales=4, width=26, **kwargs) + _load_pretrained( + pretrained, model, MODEL_URLS["Res2Net50_26w_4s"], use_ssld=use_ssld) + return model + + +def Res2Net50_14w_8s(pretrained=False, use_ssld=False, **kwargs): + model = Res2Net(layers=50, scales=8, width=14, **kwargs) + _load_pretrained( + pretrained, model, MODEL_URLS["Res2Net50_14w_8s"], use_ssld=use_ssld) + return model diff --git a/src/PaddleClas/ppcls/arch/backbone/model_zoo/res2net_vd.py b/src/PaddleClas/ppcls/arch/backbone/model_zoo/res2net_vd.py new file mode 100644 index 0000000..a375679 --- /dev/null +++ b/src/PaddleClas/ppcls/arch/backbone/model_zoo/res2net_vd.py @@ -0,0 +1,305 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np +import paddle +from paddle import ParamAttr +import paddle.nn as nn +import paddle.nn.functional as F +from paddle.nn import Conv2D, BatchNorm, Linear, Dropout +from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D +from paddle.nn.initializer import Uniform + +import math + +from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url + +MODEL_URLS = { + "Res2Net50_vd_26w_4s": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/Res2Net50_vd_26w_4s_pretrained.pdparams", + "Res2Net101_vd_26w_4s": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/Res2Net101_vd_26w_4s_pretrained.pdparams", + "Res2Net200_vd_26w_4s": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/Res2Net200_vd_26w_4s_pretrained.pdparams", +} + +__all__ = list(MODEL_URLS.keys()) + + +class ConvBNLayer(nn.Layer): + def __init__( + self, + num_channels, + num_filters, + filter_size, + stride=1, + groups=1, + is_vd_mode=False, + act=None, + name=None, ): + super(ConvBNLayer, self).__init__() + + self.is_vd_mode = is_vd_mode + self._pool2d_avg = AvgPool2D( + kernel_size=2, stride=2, padding=0, ceil_mode=True) + self._conv = Conv2D( + in_channels=num_channels, + out_channels=num_filters, + kernel_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + groups=groups, + weight_attr=ParamAttr(name=name + "_weights"), + bias_attr=False) + if name == "conv1": + bn_name = "bn_" + name + else: + bn_name = "bn" + name[3:] + self._batch_norm = BatchNorm( + num_filters, + act=act, + param_attr=ParamAttr(name=bn_name + '_scale'), + bias_attr=ParamAttr(bn_name + '_offset'), + moving_mean_name=bn_name + '_mean', + moving_variance_name=bn_name + '_variance') + + def forward(self, inputs): + if self.is_vd_mode: + inputs = self._pool2d_avg(inputs) + y = self._conv(inputs) + y = self._batch_norm(y) + return y + + +class BottleneckBlock(nn.Layer): + def __init__(self, + num_channels1, + num_channels2, + num_filters, + stride, + scales, + shortcut=True, + if_first=False, + name=None): + super(BottleneckBlock, self).__init__() + self.stride = stride + self.scales = scales + self.conv0 = ConvBNLayer( + num_channels=num_channels1, + num_filters=num_filters, + filter_size=1, + act='relu', + name=name + "_branch2a") + self.conv1_list = [] + for s in range(scales - 1): + conv1 = self.add_sublayer( + name + '_branch2b_' + str(s + 1), + ConvBNLayer( + num_channels=num_filters // scales, + num_filters=num_filters // scales, + filter_size=3, + stride=stride, + act='relu', + name=name + '_branch2b_' + str(s + 1))) + self.conv1_list.append(conv1) + self.pool2d_avg = AvgPool2D(kernel_size=3, stride=stride, padding=1) + + self.conv2 = ConvBNLayer( + num_channels=num_filters, + num_filters=num_channels2, + filter_size=1, + act=None, + name=name + "_branch2c") + + if not shortcut: + self.short = ConvBNLayer( + num_channels=num_channels1, + num_filters=num_channels2, + filter_size=1, + stride=1, + is_vd_mode=False if if_first else True, + name=name + "_branch1") + + self.shortcut = shortcut + + def forward(self, inputs): + y = self.conv0(inputs) + xs = paddle.split(y, self.scales, 1) + ys = [] + for s, conv1 in enumerate(self.conv1_list): + if s == 0 or self.stride == 2: + ys.append(conv1(xs[s])) + else: + ys.append(conv1(xs[s] + ys[-1])) + if self.stride == 1: + ys.append(xs[-1]) + else: + ys.append(self.pool2d_avg(xs[-1])) + conv1 = paddle.concat(ys, axis=1) + conv2 = self.conv2(conv1) + + if self.shortcut: + short = inputs + else: + short = self.short(inputs) + y = paddle.add(x=short, y=conv2) + y = F.relu(y) + return y + + +class Res2Net_vd(nn.Layer): + def __init__(self, layers=50, scales=4, width=26, class_num=1000): + super(Res2Net_vd, self).__init__() + + self.layers = layers + self.scales = scales + self.width = width + basic_width = self.width * self.scales + supported_layers = [50, 101, 152, 200] + assert layers in supported_layers, \ + "supported layers are {} but input layer is {}".format( + supported_layers, layers) + + if layers == 50: + depth = [3, 4, 6, 3] + elif layers == 101: + depth = [3, 4, 23, 3] + elif layers == 152: + depth = [3, 8, 36, 3] + elif layers == 200: + depth = [3, 12, 48, 3] + num_channels = [64, 256, 512, 1024] + num_channels2 = [256, 512, 1024, 2048] + num_filters = [basic_width * t for t in [1, 2, 4, 8]] + + self.conv1_1 = ConvBNLayer( + num_channels=3, + num_filters=32, + filter_size=3, + stride=2, + act='relu', + name="conv1_1") + self.conv1_2 = ConvBNLayer( + num_channels=32, + num_filters=32, + filter_size=3, + stride=1, + act='relu', + name="conv1_2") + self.conv1_3 = ConvBNLayer( + num_channels=32, + num_filters=64, + filter_size=3, + stride=1, + act='relu', + name="conv1_3") + self.pool2d_max = MaxPool2D(kernel_size=3, stride=2, padding=1) + + self.block_list = [] + for block in range(len(depth)): + shortcut = False + for i in range(depth[block]): + if layers in [101, 152, 200] and block == 2: + if i == 0: + conv_name = "res" + str(block + 2) + "a" + else: + conv_name = "res" + str(block + 2) + "b" + str(i) + else: + conv_name = "res" + str(block + 2) + chr(97 + i) + bottleneck_block = self.add_sublayer( + 'bb_%d_%d' % (block, i), + BottleneckBlock( + num_channels1=num_channels[block] + if i == 0 else num_channels2[block], + num_channels2=num_channels2[block], + num_filters=num_filters[block], + stride=2 if i == 0 and block != 0 else 1, + scales=scales, + shortcut=shortcut, + if_first=block == i == 0, + name=conv_name)) + self.block_list.append(bottleneck_block) + shortcut = True + + self.pool2d_avg = AdaptiveAvgPool2D(1) + + self.pool2d_avg_channels = num_channels[-1] * 2 + + stdv = 1.0 / math.sqrt(self.pool2d_avg_channels * 1.0) + + self.out = Linear( + self.pool2d_avg_channels, + class_num, + weight_attr=ParamAttr( + initializer=Uniform(-stdv, stdv), name="fc_weights"), + bias_attr=ParamAttr(name="fc_offset")) + + def forward(self, inputs): + y = self.conv1_1(inputs) + y = self.conv1_2(y) + y = self.conv1_3(y) + y = self.pool2d_max(y) + for block in self.block_list: + y = block(y) + y = self.pool2d_avg(y) + y = paddle.reshape(y, shape=[-1, self.pool2d_avg_channels]) + y = self.out(y) + return y + + +def _load_pretrained(pretrained, model, model_url, use_ssld=False): + if pretrained is False: + pass + elif pretrained is True: + load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld) + elif isinstance(pretrained, str): + load_dygraph_pretrain(model, pretrained) + else: + raise RuntimeError( + "pretrained type is not available. Please use `string` or `boolean` type." + ) + + +def Res2Net50_vd_26w_4s(pretrained=False, use_ssld=False, **kwargs): + model = Res2Net_vd(layers=50, scales=4, width=26, **kwargs) + _load_pretrained( + pretrained, + model, + MODEL_URLS["Res2Net50_vd_26w_4s"], + use_ssld=use_ssld) + return model + + +def Res2Net101_vd_26w_4s(pretrained=False, use_ssld=False, **kwargs): + model = Res2Net_vd(layers=101, scales=4, width=26, **kwargs) + _load_pretrained( + pretrained, + model, + MODEL_URLS["Res2Net101_vd_26w_4s"], + use_ssld=use_ssld) + return model + + +def Res2Net200_vd_26w_4s(pretrained=False, use_ssld=False, **kwargs): + model = Res2Net_vd(layers=200, scales=4, width=26, **kwargs) + _load_pretrained( + pretrained, + model, + MODEL_URLS["Res2Net200_vd_26w_4s"], + use_ssld=use_ssld) + return model diff --git a/src/PaddleClas/ppcls/arch/backbone/model_zoo/resnest.py b/src/PaddleClas/ppcls/arch/backbone/model_zoo/resnest.py new file mode 100644 index 0000000..88eee8a --- /dev/null +++ b/src/PaddleClas/ppcls/arch/backbone/model_zoo/resnest.py @@ -0,0 +1,740 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Code was based on https://github.com/zhanghang1989/ResNeSt + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np +import paddle +import math +import paddle.nn as nn +import paddle.nn.functional as F +from paddle import ParamAttr +from paddle.nn.initializer import KaimingNormal +from paddle.nn import Conv2D, BatchNorm, Linear, Dropout +from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D +from paddle.regularizer import L2Decay + +from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url + +MODEL_URLS = { + "ResNeSt50_fast_1s1x64d": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeSt50_fast_1s1x64d_pretrained.pdparams", + "ResNeSt50": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeSt50_pretrained.pdparams", + "ResNeSt101": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeSt101_pretrained.pdparams", +} + +__all__ = list(MODEL_URLS.keys()) + + +class ConvBNLayer(nn.Layer): + def __init__(self, + num_channels, + num_filters, + filter_size, + stride=1, + dilation=1, + groups=1, + act=None, + name=None): + super(ConvBNLayer, self).__init__() + + bn_decay = 0.0 + + self._conv = Conv2D( + in_channels=num_channels, + out_channels=num_filters, + kernel_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + dilation=dilation, + groups=groups, + weight_attr=ParamAttr(name=name + "_weight"), + bias_attr=False) + self._batch_norm = BatchNorm( + num_filters, + act=act, + param_attr=ParamAttr( + name=name + "_scale", regularizer=L2Decay(bn_decay)), + bias_attr=ParamAttr( + name + "_offset", regularizer=L2Decay(bn_decay)), + moving_mean_name=name + "_mean", + moving_variance_name=name + "_variance") + + def forward(self, x): + x = self._conv(x) + x = self._batch_norm(x) + return x + + +class rSoftmax(nn.Layer): + def __init__(self, radix, cardinality): + super(rSoftmax, self).__init__() + self.radix = radix + self.cardinality = cardinality + + def forward(self, x): + cardinality = self.cardinality + radix = self.radix + + batch, r, h, w = x.shape + if self.radix > 1: + x = paddle.reshape( + x=x, + shape=[ + batch, cardinality, radix, + int(r * h * w / cardinality / radix) + ]) + x = paddle.transpose(x=x, perm=[0, 2, 1, 3]) + x = nn.functional.softmax(x, axis=1) + x = paddle.reshape(x=x, shape=[batch, r * h * w, 1, 1]) + else: + x = nn.functional.sigmoid(x) + return x + + +class SplatConv(nn.Layer): + def __init__(self, + in_channels, + channels, + kernel_size, + stride=1, + padding=0, + dilation=1, + groups=1, + bias=True, + radix=2, + reduction_factor=4, + rectify_avg=False, + name=None): + super(SplatConv, self).__init__() + + self.radix = radix + + self.conv1 = ConvBNLayer( + num_channels=in_channels, + num_filters=channels * radix, + filter_size=kernel_size, + stride=stride, + groups=groups * radix, + act="relu", + name=name + "_1_weights") + + self.avg_pool2d = AdaptiveAvgPool2D(1) + + inter_channels = int(max(in_channels * radix // reduction_factor, 32)) + + # to calc gap + self.conv2 = ConvBNLayer( + num_channels=channels, + num_filters=inter_channels, + filter_size=1, + stride=1, + groups=groups, + act="relu", + name=name + "_2_weights") + + # to calc atten + self.conv3 = Conv2D( + in_channels=inter_channels, + out_channels=channels * radix, + kernel_size=1, + stride=1, + padding=0, + groups=groups, + weight_attr=ParamAttr( + name=name + "_weights", initializer=KaimingNormal()), + bias_attr=False) + + self.rsoftmax = rSoftmax(radix=radix, cardinality=groups) + + def forward(self, x): + x = self.conv1(x) + + if self.radix > 1: + splited = paddle.split(x, num_or_sections=self.radix, axis=1) + gap = paddle.add_n(splited) + else: + gap = x + + gap = self.avg_pool2d(gap) + gap = self.conv2(gap) + + atten = self.conv3(gap) + atten = self.rsoftmax(atten) + + if self.radix > 1: + attens = paddle.split(atten, num_or_sections=self.radix, axis=1) + y = paddle.add_n([ + paddle.multiply(split, att) + for (att, split) in zip(attens, splited) + ]) + else: + y = paddle.multiply(x, atten) + + return y + + +class BottleneckBlock(nn.Layer): + def __init__(self, + inplanes, + planes, + stride=1, + radix=1, + cardinality=1, + bottleneck_width=64, + avd=False, + avd_first=False, + dilation=1, + is_first=False, + rectify_avg=False, + last_gamma=False, + avg_down=False, + name=None): + super(BottleneckBlock, self).__init__() + self.inplanes = inplanes + self.planes = planes + self.stride = stride + self.radix = radix + self.cardinality = cardinality + self.avd = avd + self.avd_first = avd_first + self.dilation = dilation + self.is_first = is_first + self.rectify_avg = rectify_avg + self.last_gamma = last_gamma + self.avg_down = avg_down + + group_width = int(planes * (bottleneck_width / 64.)) * cardinality + + self.conv1 = ConvBNLayer( + num_channels=self.inplanes, + num_filters=group_width, + filter_size=1, + stride=1, + groups=1, + act="relu", + name=name + "_conv1") + + if avd and avd_first and (stride > 1 or is_first): + self.avg_pool2d_1 = AvgPool2D( + kernel_size=3, stride=stride, padding=1) + + if radix >= 1: + self.conv2 = SplatConv( + in_channels=group_width, + channels=group_width, + kernel_size=3, + stride=1, + padding=dilation, + dilation=dilation, + groups=cardinality, + bias=False, + radix=radix, + rectify_avg=rectify_avg, + name=name + "_splat") + else: + self.conv2 = ConvBNLayer( + num_channels=group_width, + num_filters=group_width, + filter_size=3, + stride=1, + dilation=dilation, + groups=cardinality, + act="relu", + name=name + "_conv2") + + if avd and avd_first == False and (stride > 1 or is_first): + self.avg_pool2d_2 = AvgPool2D( + kernel_size=3, stride=stride, padding=1) + + self.conv3 = ConvBNLayer( + num_channels=group_width, + num_filters=planes * 4, + filter_size=1, + stride=1, + groups=1, + act=None, + name=name + "_conv3") + + if stride != 1 or self.inplanes != self.planes * 4: + if avg_down: + if dilation == 1: + self.avg_pool2d_3 = AvgPool2D( + kernel_size=stride, stride=stride, padding=0) + else: + self.avg_pool2d_3 = AvgPool2D( + kernel_size=1, stride=1, padding=0, ceil_mode=True) + + self.conv4 = Conv2D( + in_channels=self.inplanes, + out_channels=planes * 4, + kernel_size=1, + stride=1, + padding=0, + groups=1, + weight_attr=ParamAttr( + name=name + "_weights", initializer=KaimingNormal()), + bias_attr=False) + else: + self.conv4 = Conv2D( + in_channels=self.inplanes, + out_channels=planes * 4, + kernel_size=1, + stride=stride, + padding=0, + groups=1, + weight_attr=ParamAttr( + name=name + "_shortcut_weights", + initializer=KaimingNormal()), + bias_attr=False) + + bn_decay = 0.0 + self._batch_norm = BatchNorm( + planes * 4, + act=None, + param_attr=ParamAttr( + name=name + "_shortcut_scale", + regularizer=L2Decay(bn_decay)), + bias_attr=ParamAttr( + name + "_shortcut_offset", regularizer=L2Decay(bn_decay)), + moving_mean_name=name + "_shortcut_mean", + moving_variance_name=name + "_shortcut_variance") + + def forward(self, x): + short = x + + x = self.conv1(x) + if self.avd and self.avd_first and (self.stride > 1 or self.is_first): + x = self.avg_pool2d_1(x) + + x = self.conv2(x) + + if self.avd and self.avd_first == False and (self.stride > 1 or + self.is_first): + x = self.avg_pool2d_2(x) + + x = self.conv3(x) + + if self.stride != 1 or self.inplanes != self.planes * 4: + if self.avg_down: + short = self.avg_pool2d_3(short) + + short = self.conv4(short) + + short = self._batch_norm(short) + + y = paddle.add(x=short, y=x) + y = F.relu(y) + return y + + +class ResNeStLayer(nn.Layer): + def __init__(self, + inplanes, + planes, + blocks, + radix, + cardinality, + bottleneck_width, + avg_down, + avd, + avd_first, + rectify_avg, + last_gamma, + stride=1, + dilation=1, + is_first=True, + name=None): + super(ResNeStLayer, self).__init__() + self.inplanes = inplanes + self.planes = planes + self.blocks = blocks + self.radix = radix + self.cardinality = cardinality + self.bottleneck_width = bottleneck_width + self.avg_down = avg_down + self.avd = avd + self.avd_first = avd_first + self.rectify_avg = rectify_avg + self.last_gamma = last_gamma + self.is_first = is_first + + if dilation == 1 or dilation == 2: + bottleneck_func = self.add_sublayer( + name + "_bottleneck_0", + BottleneckBlock( + inplanes=self.inplanes, + planes=planes, + stride=stride, + radix=radix, + cardinality=cardinality, + bottleneck_width=bottleneck_width, + avg_down=self.avg_down, + avd=avd, + avd_first=avd_first, + dilation=1, + is_first=is_first, + rectify_avg=rectify_avg, + last_gamma=last_gamma, + name=name + "_bottleneck_0")) + elif dilation == 4: + bottleneck_func = self.add_sublayer( + name + "_bottleneck_0", + BottleneckBlock( + inplanes=self.inplanes, + planes=planes, + stride=stride, + radix=radix, + cardinality=cardinality, + bottleneck_width=bottleneck_width, + avg_down=self.avg_down, + avd=avd, + avd_first=avd_first, + dilation=2, + is_first=is_first, + rectify_avg=rectify_avg, + last_gamma=last_gamma, + name=name + "_bottleneck_0")) + else: + raise RuntimeError("=>unknown dilation size") + + self.inplanes = planes * 4 + self.bottleneck_block_list = [bottleneck_func] + for i in range(1, blocks): + curr_name = name + "_bottleneck_" + str(i) + + bottleneck_func = self.add_sublayer( + curr_name, + BottleneckBlock( + inplanes=self.inplanes, + planes=planes, + radix=radix, + cardinality=cardinality, + bottleneck_width=bottleneck_width, + avg_down=self.avg_down, + avd=avd, + avd_first=avd_first, + dilation=dilation, + rectify_avg=rectify_avg, + last_gamma=last_gamma, + name=curr_name)) + self.bottleneck_block_list.append(bottleneck_func) + + def forward(self, x): + for bottleneck_block in self.bottleneck_block_list: + x = bottleneck_block(x) + return x + + +class ResNeSt(nn.Layer): + def __init__(self, + layers, + radix=1, + groups=1, + bottleneck_width=64, + dilated=False, + dilation=1, + deep_stem=False, + stem_width=64, + avg_down=False, + rectify_avg=False, + avd=False, + avd_first=False, + final_drop=0.0, + last_gamma=False, + class_num=1000): + super(ResNeSt, self).__init__() + + self.cardinality = groups + self.bottleneck_width = bottleneck_width + # ResNet-D params + self.inplanes = stem_width * 2 if deep_stem else 64 + self.avg_down = avg_down + self.last_gamma = last_gamma + # ResNeSt params + self.radix = radix + self.avd = avd + self.avd_first = avd_first + + self.deep_stem = deep_stem + self.stem_width = stem_width + self.layers = layers + self.final_drop = final_drop + self.dilated = dilated + self.dilation = dilation + + self.rectify_avg = rectify_avg + + if self.deep_stem: + self.stem = nn.Sequential( + ("conv1", ConvBNLayer( + num_channels=3, + num_filters=stem_width, + filter_size=3, + stride=2, + act="relu", + name="conv1")), ("conv2", ConvBNLayer( + num_channels=stem_width, + num_filters=stem_width, + filter_size=3, + stride=1, + act="relu", + name="conv2")), ("conv3", ConvBNLayer( + num_channels=stem_width, + num_filters=stem_width * 2, + filter_size=3, + stride=1, + act="relu", + name="conv3"))) + else: + self.stem = ConvBNLayer( + num_channels=3, + num_filters=stem_width, + filter_size=7, + stride=2, + act="relu", + name="conv1") + + self.max_pool2d = MaxPool2D(kernel_size=3, stride=2, padding=1) + + self.layer1 = ResNeStLayer( + inplanes=self.stem_width * 2 + if self.deep_stem else self.stem_width, + planes=64, + blocks=self.layers[0], + radix=radix, + cardinality=self.cardinality, + bottleneck_width=bottleneck_width, + avg_down=self.avg_down, + avd=avd, + avd_first=avd_first, + rectify_avg=rectify_avg, + last_gamma=last_gamma, + stride=1, + dilation=1, + is_first=False, + name="layer1") + + # return + + self.layer2 = ResNeStLayer( + inplanes=256, + planes=128, + blocks=self.layers[1], + radix=radix, + cardinality=self.cardinality, + bottleneck_width=bottleneck_width, + avg_down=self.avg_down, + avd=avd, + avd_first=avd_first, + rectify_avg=rectify_avg, + last_gamma=last_gamma, + stride=2, + name="layer2") + + if self.dilated or self.dilation == 4: + self.layer3 = ResNeStLayer( + inplanes=512, + planes=256, + blocks=self.layers[2], + radix=radix, + cardinality=self.cardinality, + bottleneck_width=bottleneck_width, + avg_down=self.avg_down, + avd=avd, + avd_first=avd_first, + rectify_avg=rectify_avg, + last_gamma=last_gamma, + stride=1, + dilation=2, + name="layer3") + self.layer4 = ResNeStLayer( + inplanes=1024, + planes=512, + blocks=self.layers[3], + radix=radix, + cardinality=self.cardinality, + bottleneck_width=bottleneck_width, + avg_down=self.avg_down, + avd=avd, + avd_first=avd_first, + rectify_avg=rectify_avg, + last_gamma=last_gamma, + stride=1, + dilation=4, + name="layer4") + elif self.dilation == 2: + self.layer3 = ResNeStLayer( + inplanes=512, + planes=256, + blocks=self.layers[2], + radix=radix, + cardinality=self.cardinality, + bottleneck_width=bottleneck_width, + avg_down=self.avg_down, + avd=avd, + avd_first=avd_first, + rectify_avg=rectify_avg, + last_gamma=last_gamma, + stride=2, + dilation=1, + name="layer3") + self.layer4 = ResNeStLayer( + inplanes=1024, + planes=512, + blocks=self.layers[3], + radix=radix, + cardinality=self.cardinality, + bottleneck_width=bottleneck_width, + avg_down=self.avg_down, + avd=avd, + avd_first=avd_first, + rectify_avg=rectify_avg, + last_gamma=last_gamma, + stride=1, + dilation=2, + name="layer4") + else: + self.layer3 = ResNeStLayer( + inplanes=512, + planes=256, + blocks=self.layers[2], + radix=radix, + cardinality=self.cardinality, + bottleneck_width=bottleneck_width, + avg_down=self.avg_down, + avd=avd, + avd_first=avd_first, + rectify_avg=rectify_avg, + last_gamma=last_gamma, + stride=2, + name="layer3") + self.layer4 = ResNeStLayer( + inplanes=1024, + planes=512, + blocks=self.layers[3], + radix=radix, + cardinality=self.cardinality, + bottleneck_width=bottleneck_width, + avg_down=self.avg_down, + avd=avd, + avd_first=avd_first, + rectify_avg=rectify_avg, + last_gamma=last_gamma, + stride=2, + name="layer4") + + self.pool2d_avg = AdaptiveAvgPool2D(1) + + self.out_channels = 2048 + + stdv = 1.0 / math.sqrt(self.out_channels * 1.0) + + self.out = Linear( + self.out_channels, + class_num, + weight_attr=ParamAttr( + initializer=nn.initializer.Uniform(-stdv, stdv), + name="fc_weights"), + bias_attr=ParamAttr(name="fc_offset")) + + def forward(self, x): + x = self.stem(x) + x = self.max_pool2d(x) + x = self.layer1(x) + x = self.layer2(x) + + x = self.layer3(x) + + x = self.layer4(x) + x = self.pool2d_avg(x) + x = paddle.reshape(x, shape=[-1, self.out_channels]) + x = self.out(x) + return x + + +def _load_pretrained(pretrained, model, model_url, use_ssld=False): + if pretrained is False: + pass + elif pretrained is True: + load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld) + elif isinstance(pretrained, str): + load_dygraph_pretrain(model, pretrained) + else: + raise RuntimeError( + "pretrained type is not available. Please use `string` or `boolean` type." + ) + + +def ResNeSt50_fast_1s1x64d(pretrained=False, use_ssld=False, **kwargs): + model = ResNeSt( + layers=[3, 4, 6, 3], + radix=1, + groups=1, + bottleneck_width=64, + deep_stem=True, + stem_width=32, + avg_down=True, + avd=True, + avd_first=True, + final_drop=0.0, + **kwargs) + _load_pretrained( + pretrained, + model, + MODEL_URLS["ResNeSt50_fast_1s1x64d"], + use_ssld=use_ssld) + return model + + +def ResNeSt50(pretrained=False, use_ssld=False, **kwargs): + model = ResNeSt( + layers=[3, 4, 6, 3], + radix=2, + groups=1, + bottleneck_width=64, + deep_stem=True, + stem_width=32, + avg_down=True, + avd=True, + avd_first=False, + final_drop=0.0, + **kwargs) + _load_pretrained( + pretrained, model, MODEL_URLS["ResNeSt50"], use_ssld=use_ssld) + return model + + +def ResNeSt101(pretrained=False, use_ssld=False, **kwargs): + model = ResNeSt( + layers=[3, 4, 23, 3], + radix=2, + groups=1, + bottleneck_width=64, + deep_stem=True, + stem_width=64, + avg_down=True, + avd=True, + avd_first=False, + final_drop=0.0, + **kwargs) + _load_pretrained( + pretrained, model, MODEL_URLS["ResNeSt101"], use_ssld=use_ssld) + return model diff --git a/src/PaddleClas/ppcls/arch/backbone/model_zoo/resnet_vc.py b/src/PaddleClas/ppcls/arch/backbone/model_zoo/resnet_vc.py new file mode 100644 index 0000000..6b972dc --- /dev/null +++ b/src/PaddleClas/ppcls/arch/backbone/model_zoo/resnet_vc.py @@ -0,0 +1,309 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np +import paddle +from paddle import ParamAttr +import paddle.nn as nn +import paddle.nn.functional as F +from paddle.nn import Conv2D, BatchNorm, Linear, Dropout +from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D +from paddle.nn.initializer import Uniform + +import math + +from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url + +MODEL_URLS = { + "ResNet50_vc": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNet50_vc_pretrained.pdparams", +} + +__all__ = list(MODEL_URLS.keys()) + + +class ConvBNLayer(nn.Layer): + def __init__(self, + num_channels, + num_filters, + filter_size, + stride=1, + groups=1, + act=None, + name=None): + super(ConvBNLayer, self).__init__() + + self._conv = Conv2D( + in_channels=num_channels, + out_channels=num_filters, + kernel_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + groups=groups, + weight_attr=ParamAttr(name=name + "_weights"), + bias_attr=False) + if name == "conv1": + bn_name = "bn_" + name + else: + bn_name = "bn" + name[3:] + self._batch_norm = BatchNorm( + num_filters, + act=act, + param_attr=ParamAttr(name=bn_name + '_scale'), + bias_attr=ParamAttr(bn_name + '_offset'), + moving_mean_name=bn_name + '_mean', + moving_variance_name=bn_name + '_variance') + + def forward(self, inputs): + y = self._conv(inputs) + y = self._batch_norm(y) + return y + + +class BottleneckBlock(nn.Layer): + def __init__(self, + num_channels, + num_filters, + stride, + shortcut=True, + name=None): + super(BottleneckBlock, self).__init__() + + self.conv0 = ConvBNLayer( + num_channels=num_channels, + num_filters=num_filters, + filter_size=1, + act='relu', + name=name + "_branch2a") + self.conv1 = ConvBNLayer( + num_channels=num_filters, + num_filters=num_filters, + filter_size=3, + stride=stride, + act='relu', + name=name + "_branch2b") + self.conv2 = ConvBNLayer( + num_channels=num_filters, + num_filters=num_filters * 4, + filter_size=1, + act=None, + name=name + "_branch2c") + + if not shortcut: + self.short = ConvBNLayer( + num_channels=num_channels, + num_filters=num_filters * 4, + filter_size=1, + stride=stride, + name=name + "_branch1") + + self.shortcut = shortcut + + self._num_channels_out = num_filters * 4 + + def forward(self, inputs): + y = self.conv0(inputs) + conv1 = self.conv1(y) + conv2 = self.conv2(conv1) + + if self.shortcut: + short = inputs + else: + short = self.short(inputs) + + y = paddle.add(x=short, y=conv2) + y = F.relu(y) + return y + + +class BasicBlock(nn.Layer): + def __init__(self, + num_channels, + num_filters, + stride, + shortcut=True, + name=None): + super(BasicBlock, self).__init__() + self.stride = stride + self.conv0 = ConvBNLayer( + num_channels=num_channels, + num_filters=num_filters, + filter_size=3, + stride=stride, + act='relu', + name=name + "_branch2a") + self.conv1 = ConvBNLayer( + num_channels=num_filters, + num_filters=num_filters, + filter_size=3, + act=None, + name=name + "_branch2b") + + if not shortcut: + self.short = ConvBNLayer( + num_channels=num_channels, + num_filters=num_filters, + filter_size=1, + stride=stride, + name=name + "_branch1") + + self.shortcut = shortcut + + def forward(self, inputs): + y = self.conv0(inputs) + conv1 = self.conv1(y) + + if self.shortcut: + short = inputs + else: + short = self.short(inputs) + y = paddle.add(x=short, y=conv1) + y = F.relu(y) + return y + + +class ResNet_vc(nn.Layer): + def __init__(self, layers=50, class_num=1000): + super(ResNet_vc, self).__init__() + + self.layers = layers + supported_layers = [18, 34, 50, 101, 152] + assert layers in supported_layers, \ + "supported layers are {} but input layer is {}".format( + supported_layers, layers) + + if layers == 18: + depth = [2, 2, 2, 2] + elif layers == 34 or layers == 50: + depth = [3, 4, 6, 3] + elif layers == 101: + depth = [3, 4, 23, 3] + elif layers == 152: + depth = [3, 8, 36, 3] + num_channels = [64, 256, 512, + 1024] if layers >= 50 else [64, 64, 128, 256] + num_filters = [64, 128, 256, 512] + + self.conv1_1 = ConvBNLayer( + num_channels=3, + num_filters=32, + filter_size=3, + stride=2, + act='relu', + name="conv1_1") + self.conv1_2 = ConvBNLayer( + num_channels=32, + num_filters=32, + filter_size=3, + stride=1, + act='relu', + name="conv1_2") + self.conv1_3 = ConvBNLayer( + num_channels=32, + num_filters=64, + filter_size=3, + stride=1, + act='relu', + name="conv1_3") + + self.pool2d_max = MaxPool2D(kernel_size=3, stride=2, padding=1) + + self.block_list = [] + if layers >= 50: + for block in range(len(depth)): + shortcut = False + for i in range(depth[block]): + if layers in [101, 152] and block == 2: + if i == 0: + conv_name = "res" + str(block + 2) + "a" + else: + conv_name = "res" + str(block + 2) + "b" + str(i) + else: + conv_name = "res" + str(block + 2) + chr(97 + i) + bottleneck_block = self.add_sublayer( + 'bb_%d_%d' % (block, i), + BottleneckBlock( + num_channels=num_channels[block] + if i == 0 else num_filters[block] * 4, + num_filters=num_filters[block], + stride=2 if i == 0 and block != 0 else 1, + shortcut=shortcut, + name=conv_name)) + self.block_list.append(bottleneck_block) + shortcut = True + else: + for block in range(len(depth)): + shortcut = False + for i in range(depth[block]): + conv_name = "res" + str(block + 2) + chr(97 + i) + basic_block = self.add_sublayer( + 'bb_%d_%d' % (block, i), + BasicBlock( + num_channels=num_channels[block] + if i == 0 else num_filters[block], + num_filters=num_filters[block], + stride=2 if i == 0 and block != 0 else 1, + shortcut=shortcut, + name=conv_name)) + self.block_list.append(basic_block) + shortcut = True + + self.pool2d_avg = AdaptiveAvgPool2D(1) + + self.pool2d_avg_channels = num_channels[-1] * 2 + + stdv = 1.0 / math.sqrt(self.pool2d_avg_channels * 1.0) + + self.out = Linear( + self.pool2d_avg_channels, + class_num, + weight_attr=ParamAttr( + initializer=Uniform(-stdv, stdv), name="fc_0.w_0"), + bias_attr=ParamAttr(name="fc_0.b_0")) + + def forward(self, inputs): + y = self.conv1_1(inputs) + y = self.conv1_2(y) + y = self.conv1_3(y) + y = self.pool2d_max(y) + for block in self.block_list: + y = block(y) + y = self.pool2d_avg(y) + y = paddle.reshape(y, shape=[-1, self.pool2d_avg_channels]) + y = self.out(y) + return y + + +def _load_pretrained(pretrained, model, model_url, use_ssld=False): + if pretrained is False: + pass + elif pretrained is True: + load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld) + elif isinstance(pretrained, str): + load_dygraph_pretrain(model, pretrained) + else: + raise RuntimeError( + "pretrained type is not available. Please use `string` or `boolean` type." + ) + + +def ResNet50_vc(pretrained=False, use_ssld=False, **kwargs): + model = ResNet_vc(layers=50, **kwargs) + _load_pretrained( + pretrained, model, MODEL_URLS["ResNet50_vc"], use_ssld=use_ssld) + return model diff --git a/src/PaddleClas/ppcls/arch/backbone/model_zoo/resnext.py b/src/PaddleClas/ppcls/arch/backbone/model_zoo/resnext.py new file mode 100644 index 0000000..1aef811 --- /dev/null +++ b/src/PaddleClas/ppcls/arch/backbone/model_zoo/resnext.py @@ -0,0 +1,298 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np +import paddle +from paddle import ParamAttr +import paddle.nn as nn +import paddle.nn.functional as F +from paddle.nn import Conv2D, BatchNorm, Linear, Dropout +from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D +from paddle.nn.initializer import Uniform + +import math + +from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url + +MODEL_URLS = { + "ResNeXt50_32x4d": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt50_32x4d_pretrained.pdparams", + "ResNeXt50_64x4d": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt50_64x4d_pretrained.pdparams", + "ResNeXt101_32x4d": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt101_32x4d_pretrained.pdparams", + "ResNeXt101_64x4d": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt101_64x4d_pretrained.pdparams", + "ResNeXt152_32x4d": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt152_32x4d_pretrained.pdparams", + "ResNeXt152_64x4d": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt152_64x4d_pretrained.pdparams", +} + +__all__ = list(MODEL_URLS.keys()) + + +class ConvBNLayer(nn.Layer): + def __init__(self, + num_channels, + num_filters, + filter_size, + stride=1, + groups=1, + act=None, + name=None, + data_format="NCHW"): + super(ConvBNLayer, self).__init__() + self._conv = Conv2D( + in_channels=num_channels, + out_channels=num_filters, + kernel_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + groups=groups, + weight_attr=ParamAttr(name=name + "_weights"), + bias_attr=False, + data_format=data_format) + if name == "conv1": + bn_name = "bn_" + name + else: + bn_name = "bn" + name[3:] + self._batch_norm = BatchNorm( + num_filters, + act=act, + param_attr=ParamAttr(name=bn_name + '_scale'), + bias_attr=ParamAttr(bn_name + '_offset'), + moving_mean_name=bn_name + '_mean', + moving_variance_name=bn_name + '_variance', + data_layout=data_format) + + def forward(self, inputs): + y = self._conv(inputs) + y = self._batch_norm(y) + return y + + +class BottleneckBlock(nn.Layer): + def __init__(self, + num_channels, + num_filters, + stride, + cardinality, + shortcut=True, + name=None, + data_format="NCHW"): + super(BottleneckBlock, self).__init__() + self.conv0 = ConvBNLayer( + num_channels=num_channels, + num_filters=num_filters, + filter_size=1, + act='relu', + name=name + "_branch2a", + data_format=data_format) + self.conv1 = ConvBNLayer( + num_channels=num_filters, + num_filters=num_filters, + filter_size=3, + groups=cardinality, + stride=stride, + act='relu', + name=name + "_branch2b", + data_format=data_format) + self.conv2 = ConvBNLayer( + num_channels=num_filters, + num_filters=num_filters * 2 if cardinality == 32 else num_filters, + filter_size=1, + act=None, + name=name + "_branch2c", + data_format=data_format) + + if not shortcut: + self.short = ConvBNLayer( + num_channels=num_channels, + num_filters=num_filters * 2 + if cardinality == 32 else num_filters, + filter_size=1, + stride=stride, + name=name + "_branch1", + data_format=data_format) + + self.shortcut = shortcut + + def forward(self, inputs): + y = self.conv0(inputs) + conv1 = self.conv1(y) + conv2 = self.conv2(conv1) + + if self.shortcut: + short = inputs + else: + short = self.short(inputs) + + y = paddle.add(x=short, y=conv2) + y = F.relu(y) + return y + + +class ResNeXt(nn.Layer): + def __init__(self, + layers=50, + class_num=1000, + cardinality=32, + input_image_channel=3, + data_format="NCHW"): + super(ResNeXt, self).__init__() + + self.layers = layers + self.data_format = data_format + self.input_image_channel = input_image_channel + self.cardinality = cardinality + supported_layers = [50, 101, 152] + assert layers in supported_layers, \ + "supported layers are {} but input layer is {}".format( + supported_layers, layers) + supported_cardinality = [32, 64] + assert cardinality in supported_cardinality, \ + "supported cardinality is {} but input cardinality is {}" \ + .format(supported_cardinality, cardinality) + if layers == 50: + depth = [3, 4, 6, 3] + elif layers == 101: + depth = [3, 4, 23, 3] + elif layers == 152: + depth = [3, 8, 36, 3] + num_channels = [64, 256, 512, 1024] + num_filters = [128, 256, 512, + 1024] if cardinality == 32 else [256, 512, 1024, 2048] + + self.conv = ConvBNLayer( + num_channels=self.input_image_channel, + num_filters=64, + filter_size=7, + stride=2, + act='relu', + name="res_conv1", + data_format=self.data_format) + self.pool2d_max = MaxPool2D( + kernel_size=3, stride=2, padding=1, data_format=self.data_format) + + self.block_list = [] + for block in range(len(depth)): + shortcut = False + for i in range(depth[block]): + if layers in [101, 152] and block == 2: + if i == 0: + conv_name = "res" + str(block + 2) + "a" + else: + conv_name = "res" + str(block + 2) + "b" + str(i) + else: + conv_name = "res" + str(block + 2) + chr(97 + i) + bottleneck_block = self.add_sublayer( + 'bb_%d_%d' % (block, i), + BottleneckBlock( + num_channels=num_channels[block] if i == 0 else + num_filters[block] * int(64 // self.cardinality), + num_filters=num_filters[block], + stride=2 if i == 0 and block != 0 else 1, + cardinality=self.cardinality, + shortcut=shortcut, + name=conv_name, + data_format=self.data_format)) + self.block_list.append(bottleneck_block) + shortcut = True + + self.pool2d_avg = AdaptiveAvgPool2D(1, data_format=self.data_format) + + self.pool2d_avg_channels = num_channels[-1] * 2 + + stdv = 1.0 / math.sqrt(self.pool2d_avg_channels * 1.0) + + self.out = Linear( + self.pool2d_avg_channels, + class_num, + weight_attr=ParamAttr( + initializer=Uniform(-stdv, stdv), name="fc_weights"), + bias_attr=ParamAttr(name="fc_offset")) + + def forward(self, inputs): + with paddle.static.amp.fp16_guard(): + if self.data_format == "NHWC": + inputs = paddle.tensor.transpose(inputs, [0, 2, 3, 1]) + inputs.stop_gradient = True + y = self.conv(inputs) + y = self.pool2d_max(y) + for block in self.block_list: + y = block(y) + y = self.pool2d_avg(y) + y = paddle.reshape(y, shape=[-1, self.pool2d_avg_channels]) + y = self.out(y) + return y + + +def _load_pretrained(pretrained, model, model_url, use_ssld=False): + if pretrained is False: + pass + elif pretrained is True: + load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld) + elif isinstance(pretrained, str): + load_dygraph_pretrain(model, pretrained) + else: + raise RuntimeError( + "pretrained type is not available. Please use `string` or `boolean` type." + ) + + +def ResNeXt50_32x4d(pretrained=False, use_ssld=False, **kwargs): + model = ResNeXt(layers=50, cardinality=32, **kwargs) + _load_pretrained( + pretrained, model, MODEL_URLS["ResNeXt50_32x4d"], use_ssld=use_ssld) + return model + + +def ResNeXt50_64x4d(pretrained=False, use_ssld=False, **kwargs): + model = ResNeXt(layers=50, cardinality=64, **kwargs) + _load_pretrained( + pretrained, model, MODEL_URLS["ResNeXt50_64x4d"], use_ssld=use_ssld) + return model + + +def ResNeXt101_32x4d(pretrained=False, use_ssld=False, **kwargs): + model = ResNeXt(layers=101, cardinality=32, **kwargs) + _load_pretrained( + pretrained, model, MODEL_URLS["ResNeXt101_32x4d"], use_ssld=use_ssld) + return model + + +def ResNeXt101_64x4d(pretrained=False, use_ssld=False, **kwargs): + model = ResNeXt(layers=101, cardinality=64, **kwargs) + _load_pretrained( + pretrained, model, MODEL_URLS["ResNeXt101_64x4d"], use_ssld=use_ssld) + return model + + +def ResNeXt152_32x4d(pretrained=False, use_ssld=False, **kwargs): + model = ResNeXt(layers=152, cardinality=32, **kwargs) + _load_pretrained( + pretrained, model, MODEL_URLS["ResNeXt152_32x4d"], use_ssld=use_ssld) + return model + + +def ResNeXt152_64x4d(pretrained=False, use_ssld=False, **kwargs): + model = ResNeXt(layers=152, cardinality=64, **kwargs) + _load_pretrained( + pretrained, model, MODEL_URLS["ResNeXt152_64x4d"], use_ssld=use_ssld) + return model diff --git a/src/PaddleClas/ppcls/arch/backbone/model_zoo/resnext101_wsl.py b/src/PaddleClas/ppcls/arch/backbone/model_zoo/resnext101_wsl.py new file mode 100644 index 0000000..e85e133 --- /dev/null +++ b/src/PaddleClas/ppcls/arch/backbone/model_zoo/resnext101_wsl.py @@ -0,0 +1,490 @@ +import paddle +from paddle import ParamAttr +import paddle.nn as nn +import paddle.nn.functional as F +from paddle.nn import Conv2D, BatchNorm, Linear, Dropout +from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D +from paddle.nn.initializer import Uniform + +from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url + +MODEL_URLS = { + "ResNeXt101_32x8d_wsl": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt101_32x8d_wsl_pretrained.pdparams", + "ResNeXt101_32x16d_wsl": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt101_32x16_wsl_pretrained.pdparams", + "ResNeXt101_32x32d_wsl": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt101_32x32d_wsl_pretrained.pdparams", + "ResNeXt101_32x48d_wsl": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt101_32x48d_wsl_pretrained.pdparams", +} + +__all__ = list(MODEL_URLS.keys()) + + +class ConvBNLayer(nn.Layer): + def __init__(self, + input_channels, + output_channels, + filter_size, + stride=1, + groups=1, + act=None, + name=None): + super(ConvBNLayer, self).__init__() + if "downsample" in name: + conv_name = name + ".0" + else: + conv_name = name + self._conv = Conv2D( + in_channels=input_channels, + out_channels=output_channels, + kernel_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + groups=groups, + weight_attr=ParamAttr(name=conv_name + ".weight"), + bias_attr=False) + if "downsample" in name: + bn_name = name[:9] + "downsample.1" + else: + if "conv1" == name: + bn_name = "bn" + name[-1] + else: + bn_name = (name[:10] if name[7:9].isdigit() else name[:9] + ) + "bn" + name[-1] + self._bn = BatchNorm( + num_channels=output_channels, + act=act, + param_attr=ParamAttr(name=bn_name + ".weight"), + bias_attr=ParamAttr(name=bn_name + ".bias"), + moving_mean_name=bn_name + ".running_mean", + moving_variance_name=bn_name + ".running_var") + + def forward(self, inputs): + x = self._conv(inputs) + x = self._bn(x) + return x + + +class ShortCut(nn.Layer): + def __init__(self, input_channels, output_channels, stride, name=None): + super(ShortCut, self).__init__() + + self.input_channels = input_channels + self.output_channels = output_channels + self.stride = stride + if input_channels != output_channels or stride != 1: + self._conv = ConvBNLayer( + input_channels, + output_channels, + filter_size=1, + stride=stride, + name=name) + + def forward(self, inputs): + if self.input_channels != self.output_channels or self.stride != 1: + return self._conv(inputs) + return inputs + + +class BottleneckBlock(nn.Layer): + def __init__(self, input_channels, output_channels, stride, cardinality, + width, name): + super(BottleneckBlock, self).__init__() + + self._conv0 = ConvBNLayer( + input_channels, + output_channels, + filter_size=1, + act="relu", + name=name + ".conv1") + self._conv1 = ConvBNLayer( + output_channels, + output_channels, + filter_size=3, + act="relu", + stride=stride, + groups=cardinality, + name=name + ".conv2") + self._conv2 = ConvBNLayer( + output_channels, + output_channels // (width // 8), + filter_size=1, + act=None, + name=name + ".conv3") + self._short = ShortCut( + input_channels, + output_channels // (width // 8), + stride=stride, + name=name + ".downsample") + + def forward(self, inputs): + x = self._conv0(inputs) + x = self._conv1(x) + x = self._conv2(x) + y = self._short(inputs) + y = paddle.add(x, y) + y = F.relu(y) + return y + + +class ResNeXt101WSL(nn.Layer): + def __init__(self, layers=101, cardinality=32, width=48, class_num=1000): + super(ResNeXt101WSL, self).__init__() + + self.class_num = class_num + + self.layers = layers + self.cardinality = cardinality + self.width = width + self.scale = width // 8 + + self.depth = [3, 4, 23, 3] + self.base_width = cardinality * width + num_filters = [self.base_width * i + for i in [1, 2, 4, 8]] # [256, 512, 1024, 2048] + self._conv_stem = ConvBNLayer( + 3, 64, 7, stride=2, act="relu", name="conv1") + self._pool = MaxPool2D(kernel_size=3, stride=2, padding=1) + + self._conv1_0 = BottleneckBlock( + 64, + num_filters[0], + stride=1, + cardinality=self.cardinality, + width=self.width, + name="layer1.0") + self._conv1_1 = BottleneckBlock( + num_filters[0] // (width // 8), + num_filters[0], + stride=1, + cardinality=self.cardinality, + width=self.width, + name="layer1.1") + self._conv1_2 = BottleneckBlock( + num_filters[0] // (width // 8), + num_filters[0], + stride=1, + cardinality=self.cardinality, + width=self.width, + name="layer1.2") + + self._conv2_0 = BottleneckBlock( + num_filters[0] // (width // 8), + num_filters[1], + stride=2, + cardinality=self.cardinality, + width=self.width, + name="layer2.0") + self._conv2_1 = BottleneckBlock( + num_filters[1] // (width // 8), + num_filters[1], + stride=1, + cardinality=self.cardinality, + width=self.width, + name="layer2.1") + self._conv2_2 = BottleneckBlock( + num_filters[1] // (width // 8), + num_filters[1], + stride=1, + cardinality=self.cardinality, + width=self.width, + name="layer2.2") + self._conv2_3 = BottleneckBlock( + num_filters[1] // (width // 8), + num_filters[1], + stride=1, + cardinality=self.cardinality, + width=self.width, + name="layer2.3") + + self._conv3_0 = BottleneckBlock( + num_filters[1] // (width // 8), + num_filters[2], + stride=2, + cardinality=self.cardinality, + width=self.width, + name="layer3.0") + self._conv3_1 = BottleneckBlock( + num_filters[2] // (width // 8), + num_filters[2], + stride=1, + cardinality=self.cardinality, + width=self.width, + name="layer3.1") + self._conv3_2 = BottleneckBlock( + num_filters[2] // (width // 8), + num_filters[2], + stride=1, + cardinality=self.cardinality, + width=self.width, + name="layer3.2") + self._conv3_3 = BottleneckBlock( + num_filters[2] // (width // 8), + num_filters[2], + stride=1, + cardinality=self.cardinality, + width=self.width, + name="layer3.3") + self._conv3_4 = BottleneckBlock( + num_filters[2] // (width // 8), + num_filters[2], + stride=1, + cardinality=self.cardinality, + width=self.width, + name="layer3.4") + self._conv3_5 = BottleneckBlock( + num_filters[2] // (width // 8), + num_filters[2], + stride=1, + cardinality=self.cardinality, + width=self.width, + name="layer3.5") + self._conv3_6 = BottleneckBlock( + num_filters[2] // (width // 8), + num_filters[2], + stride=1, + cardinality=self.cardinality, + width=self.width, + name="layer3.6") + self._conv3_7 = BottleneckBlock( + num_filters[2] // (width // 8), + num_filters[2], + stride=1, + cardinality=self.cardinality, + width=self.width, + name="layer3.7") + self._conv3_8 = BottleneckBlock( + num_filters[2] // (width // 8), + num_filters[2], + stride=1, + cardinality=self.cardinality, + width=self.width, + name="layer3.8") + self._conv3_9 = BottleneckBlock( + num_filters[2] // (width // 8), + num_filters[2], + stride=1, + cardinality=self.cardinality, + width=self.width, + name="layer3.9") + self._conv3_10 = BottleneckBlock( + num_filters[2] // (width // 8), + num_filters[2], + stride=1, + cardinality=self.cardinality, + width=self.width, + name="layer3.10") + self._conv3_11 = BottleneckBlock( + num_filters[2] // (width // 8), + num_filters[2], + stride=1, + cardinality=self.cardinality, + width=self.width, + name="layer3.11") + self._conv3_12 = BottleneckBlock( + num_filters[2] // (width // 8), + num_filters[2], + stride=1, + cardinality=self.cardinality, + width=self.width, + name="layer3.12") + self._conv3_13 = BottleneckBlock( + num_filters[2] // (width // 8), + num_filters[2], + stride=1, + cardinality=self.cardinality, + width=self.width, + name="layer3.13") + self._conv3_14 = BottleneckBlock( + num_filters[2] // (width // 8), + num_filters[2], + stride=1, + cardinality=self.cardinality, + width=self.width, + name="layer3.14") + self._conv3_15 = BottleneckBlock( + num_filters[2] // (width // 8), + num_filters[2], + stride=1, + cardinality=self.cardinality, + width=self.width, + name="layer3.15") + self._conv3_16 = BottleneckBlock( + num_filters[2] // (width // 8), + num_filters[2], + stride=1, + cardinality=self.cardinality, + width=self.width, + name="layer3.16") + self._conv3_17 = BottleneckBlock( + num_filters[2] // (width // 8), + num_filters[2], + stride=1, + cardinality=self.cardinality, + width=self.width, + name="layer3.17") + self._conv3_18 = BottleneckBlock( + num_filters[2] // (width // 8), + num_filters[2], + stride=1, + cardinality=self.cardinality, + width=self.width, + name="layer3.18") + self._conv3_19 = BottleneckBlock( + num_filters[2] // (width // 8), + num_filters[2], + stride=1, + cardinality=self.cardinality, + width=self.width, + name="layer3.19") + self._conv3_20 = BottleneckBlock( + num_filters[2] // (width // 8), + num_filters[2], + stride=1, + cardinality=self.cardinality, + width=self.width, + name="layer3.20") + self._conv3_21 = BottleneckBlock( + num_filters[2] // (width // 8), + num_filters[2], + stride=1, + cardinality=self.cardinality, + width=self.width, + name="layer3.21") + self._conv3_22 = BottleneckBlock( + num_filters[2] // (width // 8), + num_filters[2], + stride=1, + cardinality=self.cardinality, + width=self.width, + name="layer3.22") + + self._conv4_0 = BottleneckBlock( + num_filters[2] // (width // 8), + num_filters[3], + stride=2, + cardinality=self.cardinality, + width=self.width, + name="layer4.0") + self._conv4_1 = BottleneckBlock( + num_filters[3] // (width // 8), + num_filters[3], + stride=1, + cardinality=self.cardinality, + width=self.width, + name="layer4.1") + self._conv4_2 = BottleneckBlock( + num_filters[3] // (width // 8), + num_filters[3], + stride=1, + cardinality=self.cardinality, + width=self.width, + name="layer4.2") + + self._avg_pool = AdaptiveAvgPool2D(1) + self._out = Linear( + num_filters[3] // (width // 8), + class_num, + weight_attr=ParamAttr(name="fc.weight"), + bias_attr=ParamAttr(name="fc.bias")) + + def forward(self, inputs): + x = self._conv_stem(inputs) + x = self._pool(x) + + x = self._conv1_0(x) + x = self._conv1_1(x) + x = self._conv1_2(x) + + x = self._conv2_0(x) + x = self._conv2_1(x) + x = self._conv2_2(x) + x = self._conv2_3(x) + + x = self._conv3_0(x) + x = self._conv3_1(x) + x = self._conv3_2(x) + x = self._conv3_3(x) + x = self._conv3_4(x) + x = self._conv3_5(x) + x = self._conv3_6(x) + x = self._conv3_7(x) + x = self._conv3_8(x) + x = self._conv3_9(x) + x = self._conv3_10(x) + x = self._conv3_11(x) + x = self._conv3_12(x) + x = self._conv3_13(x) + x = self._conv3_14(x) + x = self._conv3_15(x) + x = self._conv3_16(x) + x = self._conv3_17(x) + x = self._conv3_18(x) + x = self._conv3_19(x) + x = self._conv3_20(x) + x = self._conv3_21(x) + x = self._conv3_22(x) + + x = self._conv4_0(x) + x = self._conv4_1(x) + x = self._conv4_2(x) + + x = self._avg_pool(x) + x = paddle.squeeze(x, axis=[2, 3]) + x = self._out(x) + return x + + +def _load_pretrained(pretrained, model, model_url, use_ssld=False): + if pretrained is False: + pass + elif pretrained is True: + load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld) + elif isinstance(pretrained, str): + load_dygraph_pretrain(model, pretrained) + else: + raise RuntimeError( + "pretrained type is not available. Please use `string` or `boolean` type." + ) + + +def ResNeXt101_32x8d_wsl(pretrained=False, use_ssld=False, **kwargs): + model = ResNeXt101WSL(cardinality=32, width=8, **kwargs) + _load_pretrained( + pretrained, + model, + MODEL_URLS["ResNeXt101_32x8d_wsl"], + use_ssld=use_ssld) + return model + + +def ResNeXt101_32x16d_wsl(pretrained=False, use_ssld=False, **kwargs): + model = ResNeXt101WSL(cardinality=32, width=16, **kwargs) + _load_pretrained( + pretrained, + model, + MODEL_URLS["ResNeXt101_32x16d_wsl"], + use_ssld=use_ssld) + return model + + +def ResNeXt101_32x32d_wsl(pretrained=False, use_ssld=False, **kwargs): + model = ResNeXt101WSL(cardinality=32, width=32, **kwargs) + _load_pretrained( + pretrained, + model, + MODEL_URLS["ResNeXt101_32x32d_wsl"], + use_ssld=use_ssld) + return model + + +def ResNeXt101_32x48d_wsl(pretrained=False, use_ssld=False, **kwargs): + model = ResNeXt101WSL(cardinality=32, width=48, **kwargs) + _load_pretrained( + pretrained, + model, + MODEL_URLS["ResNeXt101_32x48d_wsl"], + use_ssld=use_ssld) + return model diff --git a/src/PaddleClas/ppcls/arch/backbone/model_zoo/resnext_vd.py b/src/PaddleClas/ppcls/arch/backbone/model_zoo/resnext_vd.py new file mode 100644 index 0000000..b2bd484 --- /dev/null +++ b/src/PaddleClas/ppcls/arch/backbone/model_zoo/resnext_vd.py @@ -0,0 +1,317 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np +import paddle +from paddle import ParamAttr +import paddle.nn as nn +import paddle.nn.functional as F +from paddle.nn import Conv2D, BatchNorm, Linear, Dropout +from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D +from paddle.nn.initializer import Uniform + +import math + +from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url + +MODEL_URLS = { + "ResNeXt50_vd_32x4d": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt50_vd_32x4d_pretrained.pdparams", + "ResNeXt50_vd_64x4d": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt50_vd_64x4d_pretrained.pdparams", + "ResNeXt101_vd_32x4d": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt101_vd_32x4d_pretrained.pdparams", + "ResNeXt101_vd_64x4d": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt101_vd_64x4d_pretrained.pdparams", + "ResNeXt152_vd_32x4d": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt152_vd_32x4d_pretrained.pdparams", + "ResNeXt152_vd_64x4d": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt152_vd_64x4d_pretrained.pdparams", +} + +__all__ = list(MODEL_URLS.keys()) + + +class ConvBNLayer(nn.Layer): + def __init__( + self, + num_channels, + num_filters, + filter_size, + stride=1, + groups=1, + is_vd_mode=False, + act=None, + name=None, ): + super(ConvBNLayer, self).__init__() + + self.is_vd_mode = is_vd_mode + self._pool2d_avg = AvgPool2D( + kernel_size=2, stride=2, padding=0, ceil_mode=True) + self._conv = Conv2D( + in_channels=num_channels, + out_channels=num_filters, + kernel_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + groups=groups, + weight_attr=ParamAttr(name=name + "_weights"), + bias_attr=False) + if name == "conv1": + bn_name = "bn_" + name + else: + bn_name = "bn" + name[3:] + self._batch_norm = BatchNorm( + num_filters, + act=act, + param_attr=ParamAttr(name=bn_name + '_scale'), + bias_attr=ParamAttr(bn_name + '_offset'), + moving_mean_name=bn_name + '_mean', + moving_variance_name=bn_name + '_variance') + + def forward(self, inputs): + if self.is_vd_mode: + inputs = self._pool2d_avg(inputs) + y = self._conv(inputs) + y = self._batch_norm(y) + return y + + +class BottleneckBlock(nn.Layer): + def __init__(self, + num_channels, + num_filters, + stride, + cardinality, + shortcut=True, + if_first=False, + name=None): + super(BottleneckBlock, self).__init__() + + self.conv0 = ConvBNLayer( + num_channels=num_channels, + num_filters=num_filters, + filter_size=1, + act='relu', + name=name + "_branch2a") + self.conv1 = ConvBNLayer( + num_channels=num_filters, + num_filters=num_filters, + filter_size=3, + groups=cardinality, + stride=stride, + act='relu', + name=name + "_branch2b") + self.conv2 = ConvBNLayer( + num_channels=num_filters, + num_filters=num_filters * 2 if cardinality == 32 else num_filters, + filter_size=1, + act=None, + name=name + "_branch2c") + + if not shortcut: + self.short = ConvBNLayer( + num_channels=num_channels, + num_filters=num_filters * 2 + if cardinality == 32 else num_filters, + filter_size=1, + stride=1, + is_vd_mode=False if if_first else True, + name=name + "_branch1") + + self.shortcut = shortcut + + def forward(self, inputs): + y = self.conv0(inputs) + conv1 = self.conv1(y) + conv2 = self.conv2(conv1) + + if self.shortcut: + short = inputs + else: + short = self.short(inputs) + + y = paddle.add(x=short, y=conv2) + y = F.relu(y) + return y + + +class ResNeXt(nn.Layer): + def __init__(self, layers=50, class_num=1000, cardinality=32): + super(ResNeXt, self).__init__() + + self.layers = layers + self.cardinality = cardinality + supported_layers = [50, 101, 152] + assert layers in supported_layers, \ + "supported layers are {} but input layer is {}".format( + supported_layers, layers) + supported_cardinality = [32, 64] + assert cardinality in supported_cardinality, \ + "supported cardinality is {} but input cardinality is {}" \ + .format(supported_cardinality, cardinality) + if layers == 50: + depth = [3, 4, 6, 3] + elif layers == 101: + depth = [3, 4, 23, 3] + elif layers == 152: + depth = [3, 8, 36, 3] + num_channels = [64, 256, 512, 1024] + num_filters = [128, 256, 512, + 1024] if cardinality == 32 else [256, 512, 1024, 2048] + + self.conv1_1 = ConvBNLayer( + num_channels=3, + num_filters=32, + filter_size=3, + stride=2, + act='relu', + name="conv1_1") + self.conv1_2 = ConvBNLayer( + num_channels=32, + num_filters=32, + filter_size=3, + stride=1, + act='relu', + name="conv1_2") + self.conv1_3 = ConvBNLayer( + num_channels=32, + num_filters=64, + filter_size=3, + stride=1, + act='relu', + name="conv1_3") + + self.pool2d_max = MaxPool2D(kernel_size=3, stride=2, padding=1) + + self.block_list = [] + for block in range(len(depth)): + shortcut = False + for i in range(depth[block]): + if layers in [101, 152] and block == 2: + if i == 0: + conv_name = "res" + str(block + 2) + "a" + else: + conv_name = "res" + str(block + 2) + "b" + str(i) + else: + conv_name = "res" + str(block + 2) + chr(97 + i) + bottleneck_block = self.add_sublayer( + 'bb_%d_%d' % (block, i), + BottleneckBlock( + num_channels=num_channels[block] if i == 0 else + num_filters[block] * int(64 // self.cardinality), + num_filters=num_filters[block], + stride=2 if i == 0 and block != 0 else 1, + cardinality=self.cardinality, + shortcut=shortcut, + if_first=block == i == 0, + name=conv_name)) + self.block_list.append(bottleneck_block) + shortcut = True + + self.pool2d_avg = AdaptiveAvgPool2D(1) + + self.pool2d_avg_channels = num_channels[-1] * 2 + + stdv = 1.0 / math.sqrt(self.pool2d_avg_channels * 1.0) + + self.out = Linear( + self.pool2d_avg_channels, + class_num, + weight_attr=ParamAttr( + initializer=Uniform(-stdv, stdv), name="fc_weights"), + bias_attr=ParamAttr(name="fc_offset")) + + def forward(self, inputs): + y = self.conv1_1(inputs) + y = self.conv1_2(y) + y = self.conv1_3(y) + y = self.pool2d_max(y) + for block in self.block_list: + y = block(y) + y = self.pool2d_avg(y) + y = paddle.reshape(y, shape=[-1, self.pool2d_avg_channels]) + y = self.out(y) + return y + + +def _load_pretrained(pretrained, model, model_url, use_ssld=False): + if pretrained is False: + pass + elif pretrained is True: + load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld) + elif isinstance(pretrained, str): + load_dygraph_pretrain(model, pretrained) + else: + raise RuntimeError( + "pretrained type is not available. Please use `string` or `boolean` type." + ) + + +def ResNeXt50_vd_32x4d(pretrained=False, use_ssld=False, **kwargs): + model = ResNeXt(layers=50, cardinality=32, **kwargs) + _load_pretrained( + pretrained, model, MODEL_URLS["ResNeXt50_vd_32x4d"], use_ssld=use_ssld) + return model + + +def ResNeXt50_vd_64x4d(pretrained=False, use_ssld=False, **kwargs): + model = ResNeXt(layers=50, cardinality=64, **kwargs) + _load_pretrained( + pretrained, model, MODEL_URLS["ResNeXt50_vd_64x4d"], use_ssld=use_ssld) + return model + + +def ResNeXt101_vd_32x4d(pretrained=False, use_ssld=False, **kwargs): + model = ResNeXt(layers=101, cardinality=32, **kwargs) + _load_pretrained( + pretrained, + model, + MODEL_URLS["ResNeXt101_vd_32x4d"], + use_ssld=use_ssld) + return model + + +def ResNeXt101_vd_64x4d(pretrained=False, use_ssld=False, **kwargs): + model = ResNeXt(layers=101, cardinality=64, **kwargs) + _load_pretrained( + pretrained, + model, + MODEL_URLS["ResNeXt101_vd_64x4d"], + use_ssld=use_ssld) + return model + + +def ResNeXt152_vd_32x4d(pretrained=False, use_ssld=False, **kwargs): + model = ResNeXt(layers=152, cardinality=32, **kwargs) + _load_pretrained( + pretrained, + model, + MODEL_URLS["ResNeXt152_vd_32x4d"], + use_ssld=use_ssld) + return model + + +def ResNeXt152_vd_64x4d(pretrained=False, use_ssld=False, **kwargs): + model = ResNeXt(layers=152, cardinality=64, **kwargs) + _load_pretrained( + pretrained, + model, + MODEL_URLS["ResNeXt152_vd_64x4d"], + use_ssld=use_ssld) + return model diff --git a/src/PaddleClas/ppcls/arch/backbone/model_zoo/rexnet.py b/src/PaddleClas/ppcls/arch/backbone/model_zoo/rexnet.py new file mode 100644 index 0000000..1556a01 --- /dev/null +++ b/src/PaddleClas/ppcls/arch/backbone/model_zoo/rexnet.py @@ -0,0 +1,281 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np +import paddle +from paddle import ParamAttr +import paddle.nn as nn +from math import ceil + +from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url + +MODEL_URLS = { + "ReXNet_1_0": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ReXNet_1_0_pretrained.pdparams", + "ReXNet_1_3": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ReXNet_1_3_pretrained.pdparams", + "ReXNet_1_5": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ReXNet_1_5_pretrained.pdparams", + "ReXNet_2_0": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ReXNet_2_0_pretrained.pdparams", + "ReXNet_3_0": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ReXNet_3_0_pretrained.pdparams", +} + +__all__ = list(MODEL_URLS.keys()) + + +def conv_bn_act(out, + in_channels, + channels, + kernel=1, + stride=1, + pad=0, + num_group=1, + active=True, + relu6=False): + out.append( + nn.Conv2D( + in_channels, + channels, + kernel, + stride, + pad, + groups=num_group, + bias_attr=False)) + out.append(nn.BatchNorm2D(channels)) + if active: + out.append(nn.ReLU6() if relu6 else nn.ReLU()) + + +def conv_bn_swish(out, + in_channels, + channels, + kernel=1, + stride=1, + pad=0, + num_group=1): + out.append( + nn.Conv2D( + in_channels, + channels, + kernel, + stride, + pad, + groups=num_group, + bias_attr=False)) + out.append(nn.BatchNorm2D(channels)) + out.append(nn.Swish()) + + +class SE(nn.Layer): + def __init__(self, in_channels, channels, se_ratio=12): + super(SE, self).__init__() + self.avg_pool = nn.AdaptiveAvgPool2D(1) + self.fc = nn.Sequential( + nn.Conv2D( + in_channels, channels // se_ratio, kernel_size=1, padding=0), + nn.BatchNorm2D(channels // se_ratio), + nn.ReLU(), + nn.Conv2D( + channels // se_ratio, channels, kernel_size=1, padding=0), + nn.Sigmoid()) + + def forward(self, x): + y = self.avg_pool(x) + y = self.fc(y) + return x * y + + +class LinearBottleneck(nn.Layer): + def __init__(self, + in_channels, + channels, + t, + stride, + use_se=True, + se_ratio=12, + **kwargs): + super(LinearBottleneck, self).__init__(**kwargs) + self.use_shortcut = stride == 1 and in_channels <= channels + self.in_channels = in_channels + self.out_channels = channels + + out = [] + if t != 1: + dw_channels = in_channels * t + conv_bn_swish(out, in_channels=in_channels, channels=dw_channels) + else: + dw_channels = in_channels + + conv_bn_act( + out, + in_channels=dw_channels, + channels=dw_channels, + kernel=3, + stride=stride, + pad=1, + num_group=dw_channels, + active=False) + + if use_se: + out.append(SE(dw_channels, dw_channels, se_ratio)) + + out.append(nn.ReLU6()) + conv_bn_act( + out, + in_channels=dw_channels, + channels=channels, + active=False, + relu6=True) + self.out = nn.Sequential(*out) + + def forward(self, x): + out = self.out(x) + if self.use_shortcut: + out[:, 0:self.in_channels] += x + + return out + + +class ReXNetV1(nn.Layer): + def __init__(self, + input_ch=16, + final_ch=180, + width_mult=1.0, + depth_mult=1.0, + class_num=1000, + use_se=True, + se_ratio=12, + dropout_ratio=0.2, + bn_momentum=0.9): + super(ReXNetV1, self).__init__() + + layers = [1, 2, 2, 3, 3, 5] + strides = [1, 2, 2, 2, 1, 2] + use_ses = [False, False, True, True, True, True] + + layers = [ceil(element * depth_mult) for element in layers] + strides = sum([[element] + [1] * (layers[idx] - 1) + for idx, element in enumerate(strides)], []) + if use_se: + use_ses = sum([[element] * layers[idx] + for idx, element in enumerate(use_ses)], []) + else: + use_ses = [False] * sum(layers[:]) + ts = [1] * layers[0] + [6] * sum(layers[1:]) + + self.depth = sum(layers[:]) * 3 + stem_channel = 32 / width_mult if width_mult < 1.0 else 32 + inplanes = input_ch / width_mult if width_mult < 1.0 else input_ch + + features = [] + in_channels_group = [] + channels_group = [] + + # The following channel configuration is a simple instance to make each layer become an expand layer. + for i in range(self.depth // 3): + if i == 0: + in_channels_group.append(int(round(stem_channel * width_mult))) + channels_group.append(int(round(inplanes * width_mult))) + else: + in_channels_group.append(int(round(inplanes * width_mult))) + inplanes += final_ch / (self.depth // 3 * 1.0) + channels_group.append(int(round(inplanes * width_mult))) + + conv_bn_swish( + features, + 3, + int(round(stem_channel * width_mult)), + kernel=3, + stride=2, + pad=1) + + for block_idx, (in_c, c, t, s, se) in enumerate( + zip(in_channels_group, channels_group, ts, strides, use_ses)): + features.append( + LinearBottleneck( + in_channels=in_c, + channels=c, + t=t, + stride=s, + use_se=se, + se_ratio=se_ratio)) + + pen_channels = int(1280 * width_mult) + conv_bn_swish(features, c, pen_channels) + + features.append(nn.AdaptiveAvgPool2D(1)) + self.features = nn.Sequential(*features) + self.output = nn.Sequential( + nn.Dropout(dropout_ratio), + nn.Conv2D( + pen_channels, class_num, 1, bias_attr=True)) + + def forward(self, x): + x = self.features(x) + x = self.output(x).squeeze(axis=-1).squeeze(axis=-1) + return x + + +def _load_pretrained(pretrained, model, model_url, use_ssld=False): + if pretrained is False: + pass + elif pretrained is True: + load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld) + elif isinstance(pretrained, str): + load_dygraph_pretrain(model, pretrained) + else: + raise RuntimeError( + "pretrained type is not available. Please use `string` or `boolean` type." + ) + + +def ReXNet_1_0(pretrained=False, use_ssld=False, **kwargs): + model = ReXNetV1(width_mult=1.0, **kwargs) + _load_pretrained( + pretrained, model, MODEL_URLS["ReXNet_1_0"], use_ssld=use_ssld) + return model + + +def ReXNet_1_3(pretrained=False, use_ssld=False, **kwargs): + model = ReXNetV1(width_mult=1.3, **kwargs) + _load_pretrained( + pretrained, model, MODEL_URLS["ReXNet_1_3"], use_ssld=use_ssld) + return model + + +def ReXNet_1_5(pretrained=False, use_ssld=False, **kwargs): + model = ReXNetV1(width_mult=1.5, **kwargs) + _load_pretrained( + pretrained, model, MODEL_URLS["ReXNet_1_5"], use_ssld=use_ssld) + return model + + +def ReXNet_2_0(pretrained=False, use_ssld=False, **kwargs): + model = ReXNetV1(width_mult=2.0, **kwargs) + _load_pretrained( + pretrained, model, MODEL_URLS["ReXNet_2_0"], use_ssld=use_ssld) + return model + + +def ReXNet_3_0(pretrained=False, use_ssld=False, **kwargs): + model = ReXNetV1(width_mult=3.0, **kwargs) + _load_pretrained( + pretrained, model, MODEL_URLS["ReXNet_3_0"], use_ssld=use_ssld) + return model diff --git a/src/PaddleClas/ppcls/arch/backbone/model_zoo/se_resnet_vd.py b/src/PaddleClas/ppcls/arch/backbone/model_zoo/se_resnet_vd.py new file mode 100644 index 0000000..205feec --- /dev/null +++ b/src/PaddleClas/ppcls/arch/backbone/model_zoo/se_resnet_vd.py @@ -0,0 +1,390 @@ +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np +import paddle +from paddle import ParamAttr +import paddle.nn as nn +import paddle.nn.functional as F +from paddle.nn import Conv2D, BatchNorm, Linear, Dropout +from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D +from paddle.nn.initializer import Uniform + +import math + +from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url + +MODEL_URLS = { + "SE_ResNet18_vd": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SE_ResNet18_vd_pretrained.pdparams", + "SE_ResNet34_vd": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SE_ResNet34_vd_pretrained.pdparams", + "SE_ResNet50_vd": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SE_ResNet50_vd_pretrained.pdparams", +} + +__all__ = list(MODEL_URLS.keys()) + + +class ConvBNLayer(nn.Layer): + def __init__( + self, + num_channels, + num_filters, + filter_size, + stride=1, + groups=1, + is_vd_mode=False, + act=None, + name=None, ): + super(ConvBNLayer, self).__init__() + + self.is_vd_mode = is_vd_mode + self._pool2d_avg = AvgPool2D( + kernel_size=2, stride=2, padding=0, ceil_mode=True) + + self._conv = Conv2D( + in_channels=num_channels, + out_channels=num_filters, + kernel_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + groups=groups, + weight_attr=ParamAttr(name=name + "_weights"), + bias_attr=False) + if name == "conv1": + bn_name = "bn_" + name + else: + bn_name = "bn" + name[3:] + self._batch_norm = BatchNorm( + num_filters, + act=act, + param_attr=ParamAttr(name=bn_name + '_scale'), + bias_attr=ParamAttr(bn_name + '_offset'), + moving_mean_name=bn_name + '_mean', + moving_variance_name=bn_name + '_variance') + + def forward(self, inputs): + if self.is_vd_mode: + inputs = self._pool2d_avg(inputs) + y = self._conv(inputs) + y = self._batch_norm(y) + return y + + +class BottleneckBlock(nn.Layer): + def __init__(self, + num_channels, + num_filters, + stride, + shortcut=True, + if_first=False, + reduction_ratio=16, + name=None): + super(BottleneckBlock, self).__init__() + + self.conv0 = ConvBNLayer( + num_channels=num_channels, + num_filters=num_filters, + filter_size=1, + act='relu', + name=name + "_branch2a") + self.conv1 = ConvBNLayer( + num_channels=num_filters, + num_filters=num_filters, + filter_size=3, + stride=stride, + act='relu', + name=name + "_branch2b") + self.conv2 = ConvBNLayer( + num_channels=num_filters, + num_filters=num_filters * 4, + filter_size=1, + act=None, + name=name + "_branch2c") + self.scale = SELayer( + num_channels=num_filters * 4, + num_filters=num_filters * 4, + reduction_ratio=reduction_ratio, + name='fc_' + name) + + if not shortcut: + self.short = ConvBNLayer( + num_channels=num_channels, + num_filters=num_filters * 4, + filter_size=1, + stride=1, + is_vd_mode=False if if_first else True, + name=name + "_branch1") + + self.shortcut = shortcut + + def forward(self, inputs): + y = self.conv0(inputs) + conv1 = self.conv1(y) + conv2 = self.conv2(conv1) + scale = self.scale(conv2) + + if self.shortcut: + short = inputs + else: + short = self.short(inputs) + y = paddle.add(x=short, y=scale) + y = F.relu(y) + return y + + +class BasicBlock(nn.Layer): + def __init__(self, + num_channels, + num_filters, + stride, + shortcut=True, + if_first=False, + reduction_ratio=16, + name=None): + super(BasicBlock, self).__init__() + self.stride = stride + self.conv0 = ConvBNLayer( + num_channels=num_channels, + num_filters=num_filters, + filter_size=3, + stride=stride, + act='relu', + name=name + "_branch2a") + self.conv1 = ConvBNLayer( + num_channels=num_filters, + num_filters=num_filters, + filter_size=3, + act=None, + name=name + "_branch2b") + + self.scale = SELayer( + num_channels=num_filters, + num_filters=num_filters, + reduction_ratio=reduction_ratio, + name='fc_' + name) + + if not shortcut: + self.short = ConvBNLayer( + num_channels=num_channels, + num_filters=num_filters, + filter_size=1, + stride=1, + is_vd_mode=False if if_first else True, + name=name + "_branch1") + + self.shortcut = shortcut + + def forward(self, inputs): + y = self.conv0(inputs) + conv1 = self.conv1(y) + scale = self.scale(conv1) + + if self.shortcut: + short = inputs + else: + short = self.short(inputs) + y = paddle.add(x=short, y=scale) + y = F.relu(y) + return y + + +class SELayer(nn.Layer): + def __init__(self, num_channels, num_filters, reduction_ratio, name=None): + super(SELayer, self).__init__() + + self.pool2d_gap = AdaptiveAvgPool2D(1) + + self._num_channels = num_channels + + med_ch = int(num_channels / reduction_ratio) + stdv = 1.0 / math.sqrt(num_channels * 1.0) + self.squeeze = Linear( + num_channels, + med_ch, + weight_attr=ParamAttr( + initializer=Uniform(-stdv, stdv), name=name + "_sqz_weights"), + bias_attr=ParamAttr(name=name + '_sqz_offset')) + + stdv = 1.0 / math.sqrt(med_ch * 1.0) + self.excitation = Linear( + med_ch, + num_filters, + weight_attr=ParamAttr( + initializer=Uniform(-stdv, stdv), name=name + "_exc_weights"), + bias_attr=ParamAttr(name=name + '_exc_offset')) + + def forward(self, input): + pool = self.pool2d_gap(input) + pool = paddle.squeeze(pool, axis=[2, 3]) + squeeze = self.squeeze(pool) + squeeze = F.relu(squeeze) + excitation = self.excitation(squeeze) + excitation = F.sigmoid(excitation) + excitation = paddle.unsqueeze(excitation, axis=[2, 3]) + out = input * excitation + return out + + +class SE_ResNet_vd(nn.Layer): + def __init__(self, layers=50, class_num=1000): + super(SE_ResNet_vd, self).__init__() + + self.layers = layers + supported_layers = [18, 34, 50, 101, 152, 200] + assert layers in supported_layers, \ + "supported layers are {} but input layer is {}".format( + supported_layers, layers) + + if layers == 18: + depth = [2, 2, 2, 2] + elif layers == 34 or layers == 50: + depth = [3, 4, 6, 3] + elif layers == 101: + depth = [3, 4, 23, 3] + elif layers == 152: + depth = [3, 8, 36, 3] + elif layers == 200: + depth = [3, 12, 48, 3] + num_channels = [64, 256, 512, + 1024] if layers >= 50 else [64, 64, 128, 256] + num_filters = [64, 128, 256, 512] + + self.conv1_1 = ConvBNLayer( + num_channels=3, + num_filters=32, + filter_size=3, + stride=2, + act='relu', + name="conv1_1") + self.conv1_2 = ConvBNLayer( + num_channels=32, + num_filters=32, + filter_size=3, + stride=1, + act='relu', + name="conv1_2") + self.conv1_3 = ConvBNLayer( + num_channels=32, + num_filters=64, + filter_size=3, + stride=1, + act='relu', + name="conv1_3") + self.pool2d_max = MaxPool2D(kernel_size=3, stride=2, padding=1) + + self.block_list = [] + if layers >= 50: + for block in range(len(depth)): + shortcut = False + for i in range(depth[block]): + if layers in [101, 152] and block == 2: + if i == 0: + conv_name = "res" + str(block + 2) + "a" + else: + conv_name = "res" + str(block + 2) + "b" + str(i) + else: + conv_name = "res" + str(block + 2) + chr(97 + i) + bottleneck_block = self.add_sublayer( + 'bb_%d_%d' % (block, i), + BottleneckBlock( + num_channels=num_channels[block] + if i == 0 else num_filters[block] * 4, + num_filters=num_filters[block], + stride=2 if i == 0 and block != 0 else 1, + shortcut=shortcut, + if_first=block == i == 0, + name=conv_name)) + self.block_list.append(bottleneck_block) + shortcut = True + else: + for block in range(len(depth)): + shortcut = False + for i in range(depth[block]): + conv_name = "res" + str(block + 2) + chr(97 + i) + basic_block = self.add_sublayer( + 'bb_%d_%d' % (block, i), + BasicBlock( + num_channels=num_channels[block] + if i == 0 else num_filters[block], + num_filters=num_filters[block], + stride=2 if i == 0 and block != 0 else 1, + shortcut=shortcut, + if_first=block == i == 0, + name=conv_name)) + self.block_list.append(basic_block) + shortcut = True + + self.pool2d_avg = AdaptiveAvgPool2D(1) + + self.pool2d_avg_channels = num_channels[-1] * 2 + + stdv = 1.0 / math.sqrt(self.pool2d_avg_channels * 1.0) + + self.out = Linear( + self.pool2d_avg_channels, + class_num, + weight_attr=ParamAttr( + initializer=Uniform(-stdv, stdv), name="fc6_weights"), + bias_attr=ParamAttr(name="fc6_offset")) + + def forward(self, inputs): + y = self.conv1_1(inputs) + y = self.conv1_2(y) + y = self.conv1_3(y) + y = self.pool2d_max(y) + for block in self.block_list: + y = block(y) + y = self.pool2d_avg(y) + y = paddle.reshape(y, shape=[-1, self.pool2d_avg_channels]) + y = self.out(y) + return y + + +def _load_pretrained(pretrained, model, model_url, use_ssld=False): + if pretrained is False: + pass + elif pretrained is True: + load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld) + elif isinstance(pretrained, str): + load_dygraph_pretrain(model, pretrained) + else: + raise RuntimeError( + "pretrained type is not available. Please use `string` or `boolean` type." + ) + + +def SE_ResNet18_vd(pretrained=False, use_ssld=False, **kwargs): + model = SE_ResNet_vd(layers=18, **kwargs) + _load_pretrained( + pretrained, model, MODEL_URLS["SE_ResNet18_vd"], use_ssld=use_ssld) + return model + + +def SE_ResNet34_vd(pretrained=False, use_ssld=False, **kwargs): + model = SE_ResNet_vd(layers=34, **kwargs) + _load_pretrained( + pretrained, model, MODEL_URLS["SE_ResNet34_vd"], use_ssld=use_ssld) + return model + + +def SE_ResNet50_vd(pretrained=False, use_ssld=False, **kwargs): + model = SE_ResNet_vd(layers=50, **kwargs) + _load_pretrained( + pretrained, model, MODEL_URLS["SE_ResNet50_vd"], use_ssld=use_ssld) + return model diff --git a/src/PaddleClas/ppcls/arch/backbone/model_zoo/se_resnext.py b/src/PaddleClas/ppcls/arch/backbone/model_zoo/se_resnext.py new file mode 100644 index 0000000..8b7149e --- /dev/null +++ b/src/PaddleClas/ppcls/arch/backbone/model_zoo/se_resnext.py @@ -0,0 +1,364 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np +import paddle +from paddle import ParamAttr +import paddle.nn as nn +import paddle.nn.functional as F +from paddle.nn import Conv2D, BatchNorm, Linear, Dropout +from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D +from paddle.nn.initializer import Uniform + +import math + +from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url + +MODEL_URLS = { + "SE_ResNeXt50_32x4d": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SE_ResNeXt50_32x4d_pretrained.pdparams", + "SE_ResNeXt101_32x4d": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SE_ResNeXt101_32x4d_pretrained.pdparams", + "SE_ResNeXt152_64x4d": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SE_ResNeXt152_64x4d_pretrained.pdparams", +} + +__all__ = list(MODEL_URLS.keys()) + + +class ConvBNLayer(nn.Layer): + def __init__(self, + num_channels, + num_filters, + filter_size, + stride=1, + groups=1, + act=None, + name=None, + data_format='NCHW'): + super(ConvBNLayer, self).__init__() + + self._conv = Conv2D( + in_channels=num_channels, + out_channels=num_filters, + kernel_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + groups=groups, + weight_attr=ParamAttr(name=name + "_weights"), + bias_attr=False, + data_format=data_format) + bn_name = name + '_bn' + self._batch_norm = BatchNorm( + num_filters, + act=act, + param_attr=ParamAttr(name=bn_name + '_scale'), + bias_attr=ParamAttr(bn_name + '_offset'), + moving_mean_name=bn_name + '_mean', + moving_variance_name=bn_name + '_variance', + data_layout=data_format) + + def forward(self, inputs): + y = self._conv(inputs) + y = self._batch_norm(y) + return y + + +class BottleneckBlock(nn.Layer): + def __init__(self, + num_channels, + num_filters, + stride, + cardinality, + reduction_ratio, + shortcut=True, + if_first=False, + name=None, + data_format="NCHW"): + super(BottleneckBlock, self).__init__() + + self.conv0 = ConvBNLayer( + num_channels=num_channels, + num_filters=num_filters, + filter_size=1, + act='relu', + name='conv' + name + '_x1', + data_format=data_format) + self.conv1 = ConvBNLayer( + num_channels=num_filters, + num_filters=num_filters, + filter_size=3, + groups=cardinality, + stride=stride, + act='relu', + name='conv' + name + '_x2', + data_format=data_format) + self.conv2 = ConvBNLayer( + num_channels=num_filters, + num_filters=num_filters * 2 if cardinality == 32 else num_filters, + filter_size=1, + act=None, + name='conv' + name + '_x3', + data_format=data_format) + self.scale = SELayer( + num_channels=num_filters * 2 if cardinality == 32 else num_filters, + num_filters=num_filters * 2 if cardinality == 32 else num_filters, + reduction_ratio=reduction_ratio, + name='fc' + name, + data_format=data_format) + + if not shortcut: + self.short = ConvBNLayer( + num_channels=num_channels, + num_filters=num_filters * 2 + if cardinality == 32 else num_filters, + filter_size=1, + stride=stride, + name='conv' + name + '_prj', + data_format=data_format) + + self.shortcut = shortcut + + def forward(self, inputs): + y = self.conv0(inputs) + conv1 = self.conv1(y) + conv2 = self.conv2(conv1) + scale = self.scale(conv2) + + if self.shortcut: + short = inputs + else: + short = self.short(inputs) + y = paddle.add(x=short, y=scale) + y = F.relu(y) + return y + + +class SELayer(nn.Layer): + def __init__(self, + num_channels, + num_filters, + reduction_ratio, + name=None, + data_format="NCHW"): + super(SELayer, self).__init__() + + self.data_format = data_format + self.pool2d_gap = AdaptiveAvgPool2D(1, data_format=self.data_format) + + self._num_channels = num_channels + + med_ch = int(num_channels / reduction_ratio) + stdv = 1.0 / math.sqrt(num_channels * 1.0) + self.squeeze = Linear( + num_channels, + med_ch, + weight_attr=ParamAttr( + initializer=Uniform(-stdv, stdv), name=name + "_sqz_weights"), + bias_attr=ParamAttr(name=name + '_sqz_offset')) + self.relu = nn.ReLU() + stdv = 1.0 / math.sqrt(med_ch * 1.0) + self.excitation = Linear( + med_ch, + num_filters, + weight_attr=ParamAttr( + initializer=Uniform(-stdv, stdv), name=name + "_exc_weights"), + bias_attr=ParamAttr(name=name + '_exc_offset')) + self.sigmoid = nn.Sigmoid() + + def forward(self, input): + pool = self.pool2d_gap(input) + if self.data_format == "NHWC": + pool = paddle.squeeze(pool, axis=[1, 2]) + else: + pool = paddle.squeeze(pool, axis=[2, 3]) + squeeze = self.squeeze(pool) + squeeze = self.relu(squeeze) + excitation = self.excitation(squeeze) + excitation = self.sigmoid(excitation) + if self.data_format == "NHWC": + excitation = paddle.unsqueeze(excitation, axis=[1, 2]) + else: + excitation = paddle.unsqueeze(excitation, axis=[2, 3]) + out = input * excitation + return out + + +class ResNeXt(nn.Layer): + def __init__(self, + layers=50, + class_num=1000, + cardinality=32, + input_image_channel=3, + data_format="NCHW"): + super(ResNeXt, self).__init__() + + self.layers = layers + self.cardinality = cardinality + self.reduction_ratio = 16 + self.data_format = data_format + self.input_image_channel = input_image_channel + + supported_layers = [50, 101, 152] + assert layers in supported_layers, \ + "supported layers are {} but input layer is {}".format( + supported_layers, layers) + supported_cardinality = [32, 64] + assert cardinality in supported_cardinality, \ + "supported cardinality is {} but input cardinality is {}" \ + .format(supported_cardinality, cardinality) + if layers == 50: + depth = [3, 4, 6, 3] + elif layers == 101: + depth = [3, 4, 23, 3] + elif layers == 152: + depth = [3, 8, 36, 3] + num_channels = [64, 256, 512, 1024] + num_filters = [128, 256, 512, + 1024] if cardinality == 32 else [256, 512, 1024, 2048] + if layers < 152: + self.conv = ConvBNLayer( + num_channels=self.input_image_channel, + num_filters=64, + filter_size=7, + stride=2, + act='relu', + name="conv1", + data_format=self.data_format) + else: + self.conv1_1 = ConvBNLayer( + num_channels=self.input_image_channel, + num_filters=64, + filter_size=3, + stride=2, + act='relu', + name="conv1", + data_format=self.data_format) + self.conv1_2 = ConvBNLayer( + num_channels=64, + num_filters=64, + filter_size=3, + stride=1, + act='relu', + name="conv2", + data_format=self.data_format) + self.conv1_3 = ConvBNLayer( + num_channels=64, + num_filters=128, + filter_size=3, + stride=1, + act='relu', + name="conv3", + data_format=self.data_format) + + self.pool2d_max = MaxPool2D( + kernel_size=3, stride=2, padding=1, data_format=self.data_format) + + self.block_list = [] + n = 1 if layers == 50 or layers == 101 else 3 + for block in range(len(depth)): + n += 1 + shortcut = False + for i in range(depth[block]): + bottleneck_block = self.add_sublayer( + 'bb_%d_%d' % (block, i), + BottleneckBlock( + num_channels=num_channels[block] if i == 0 else + num_filters[block] * int(64 // self.cardinality), + num_filters=num_filters[block], + stride=2 if i == 0 and block != 0 else 1, + cardinality=self.cardinality, + reduction_ratio=self.reduction_ratio, + shortcut=shortcut, + if_first=block == 0, + name=str(n) + '_' + str(i + 1), + data_format=self.data_format)) + self.block_list.append(bottleneck_block) + shortcut = True + + self.pool2d_avg = AdaptiveAvgPool2D(1, data_format=self.data_format) + + self.pool2d_avg_channels = num_channels[-1] * 2 + + stdv = 1.0 / math.sqrt(self.pool2d_avg_channels * 1.0) + + self.out = Linear( + self.pool2d_avg_channels, + class_num, + weight_attr=ParamAttr( + initializer=Uniform(-stdv, stdv), name="fc6_weights"), + bias_attr=ParamAttr(name="fc6_offset")) + + def forward(self, inputs): + with paddle.static.amp.fp16_guard(): + if self.data_format == "NHWC": + inputs = paddle.tensor.transpose(inputs, [0, 2, 3, 1]) + inputs.stop_gradient = True + if self.layers < 152: + y = self.conv(inputs) + else: + y = self.conv1_1(inputs) + y = self.conv1_2(y) + y = self.conv1_3(y) + y = self.pool2d_max(y) + for i, block in enumerate(self.block_list): + y = block(y) + y = self.pool2d_avg(y) + y = paddle.reshape(y, shape=[-1, self.pool2d_avg_channels]) + y = self.out(y) + return y + + +def _load_pretrained(pretrained, model, model_url, use_ssld=False): + if pretrained is False: + pass + elif pretrained is True: + load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld) + elif isinstance(pretrained, str): + load_dygraph_pretrain(model, pretrained) + else: + raise RuntimeError( + "pretrained type is not available. Please use `string` or `boolean` type." + ) + + +def SE_ResNeXt50_32x4d(pretrained=False, use_ssld=False, **kwargs): + model = ResNeXt(layers=50, cardinality=32, **kwargs) + _load_pretrained( + pretrained, model, MODEL_URLS["SE_ResNeXt50_32x4d"], use_ssld=use_ssld) + return model + + +def SE_ResNeXt101_32x4d(pretrained=False, use_ssld=False, **kwargs): + model = ResNeXt(layers=101, cardinality=32, **kwargs) + _load_pretrained( + pretrained, + model, + MODEL_URLS["SE_ResNeXt101_32x4d"], + use_ssld=use_ssld) + return model + + +def SE_ResNeXt152_64x4d(pretrained=False, use_ssld=False, **kwargs): + model = ResNeXt(layers=152, cardinality=64, **kwargs) + _load_pretrained( + pretrained, + model, + MODEL_URLS["SE_ResNeXt152_64x4d"], + use_ssld=use_ssld) + return model diff --git a/src/PaddleClas/ppcls/arch/backbone/model_zoo/se_resnext_vd.py b/src/PaddleClas/ppcls/arch/backbone/model_zoo/se_resnext_vd.py new file mode 100644 index 0000000..ef63025 --- /dev/null +++ b/src/PaddleClas/ppcls/arch/backbone/model_zoo/se_resnext_vd.py @@ -0,0 +1,309 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np +import paddle +from paddle import ParamAttr +import paddle.nn as nn +import paddle.nn.functional as F +from paddle.nn import Conv2D, BatchNorm, Linear, Dropout +from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D +from paddle.nn.initializer import Uniform + +import math + +from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url + +MODEL_URLS = { + "SE_ResNeXt50_vd_32x4d": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SE_ResNeXt50_vd_32x4d_pretrained.pdparams", + "SENet154_vd": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SENet154_vd_pretrained.pdparams", +} + +__all__ = list(MODEL_URLS.keys()) + + +class ConvBNLayer(nn.Layer): + def __init__(self, + num_channels, + num_filters, + filter_size, + stride=1, + groups=1, + is_vd_mode=False, + act=None, + name=None): + super(ConvBNLayer, self).__init__() + + self.is_vd_mode = is_vd_mode + self._pool2d_avg = AvgPool2D( + kernel_size=2, stride=2, padding=0, ceil_mode=True) + self._conv = Conv2D( + in_channels=num_channels, + out_channels=num_filters, + kernel_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + groups=groups, + weight_attr=ParamAttr(name=name + "_weights"), + bias_attr=False) + bn_name = name + '_bn' + self._batch_norm = BatchNorm( + num_filters, + act=act, + param_attr=ParamAttr(name=bn_name + '_scale'), + bias_attr=ParamAttr(bn_name + '_offset'), + moving_mean_name=bn_name + '_mean', + moving_variance_name=bn_name + '_variance') + + def forward(self, inputs): + if self.is_vd_mode: + inputs = self._pool2d_avg(inputs) + y = self._conv(inputs) + y = self._batch_norm(y) + return y + + +class BottleneckBlock(nn.Layer): + def __init__(self, + num_channels, + num_filters, + stride, + cardinality, + reduction_ratio, + shortcut=True, + if_first=False, + name=None): + super(BottleneckBlock, self).__init__() + + self.conv0 = ConvBNLayer( + num_channels=num_channels, + num_filters=num_filters, + filter_size=1, + act='relu', + name='conv' + name + '_x1') + self.conv1 = ConvBNLayer( + num_channels=num_filters, + num_filters=num_filters, + filter_size=3, + groups=cardinality, + stride=stride, + act='relu', + name='conv' + name + '_x2') + self.conv2 = ConvBNLayer( + num_channels=num_filters, + num_filters=num_filters * 2 if cardinality == 32 else num_filters, + filter_size=1, + act=None, + name='conv' + name + '_x3') + self.scale = SELayer( + num_channels=num_filters * 2 if cardinality == 32 else num_filters, + num_filters=num_filters * 2 if cardinality == 32 else num_filters, + reduction_ratio=reduction_ratio, + name='fc' + name) + + if not shortcut: + self.short = ConvBNLayer( + num_channels=num_channels, + num_filters=num_filters * 2 + if cardinality == 32 else num_filters, + filter_size=1, + stride=1, + is_vd_mode=False if if_first else True, + name='conv' + name + '_prj') + + self.shortcut = shortcut + + def forward(self, inputs): + y = self.conv0(inputs) + conv1 = self.conv1(y) + conv2 = self.conv2(conv1) + scale = self.scale(conv2) + + if self.shortcut: + short = inputs + else: + short = self.short(inputs) + y = paddle.add(x=short, y=scale) + y = F.relu(y) + return y + + +class SELayer(nn.Layer): + def __init__(self, num_channels, num_filters, reduction_ratio, name=None): + super(SELayer, self).__init__() + + self.pool2d_gap = AdaptiveAvgPool2D(1) + + self._num_channels = num_channels + + med_ch = int(num_channels / reduction_ratio) + stdv = 1.0 / math.sqrt(num_channels * 1.0) + self.squeeze = Linear( + num_channels, + med_ch, + weight_attr=ParamAttr( + initializer=Uniform(-stdv, stdv), name=name + "_sqz_weights"), + bias_attr=ParamAttr(name=name + '_sqz_offset')) + self.relu = nn.ReLU() + stdv = 1.0 / math.sqrt(med_ch * 1.0) + self.excitation = Linear( + med_ch, + num_filters, + weight_attr=ParamAttr( + initializer=Uniform(-stdv, stdv), name=name + "_exc_weights"), + bias_attr=ParamAttr(name=name + '_exc_offset')) + self.sigmoid = nn.Sigmoid() + + def forward(self, input): + pool = self.pool2d_gap(input) + pool = paddle.squeeze(pool, axis=[2, 3]) + squeeze = self.squeeze(pool) + squeeze = self.relu(squeeze) + excitation = self.excitation(squeeze) + excitation = self.sigmoid(excitation) + excitation = paddle.unsqueeze(excitation, axis=[2, 3]) + out = paddle.multiply(input, excitation) + return out + + +class ResNeXt(nn.Layer): + def __init__(self, layers=50, class_num=1000, cardinality=32): + super(ResNeXt, self).__init__() + + self.layers = layers + self.cardinality = cardinality + self.reduction_ratio = 16 + supported_layers = [50, 101, 152] + assert layers in supported_layers, \ + "supported layers are {} but input layer is {}".format( + supported_layers, layers) + supported_cardinality = [32, 64] + assert cardinality in supported_cardinality, \ + "supported cardinality is {} but input cardinality is {}" \ + .format(supported_cardinality, cardinality) + if layers == 50: + depth = [3, 4, 6, 3] + elif layers == 101: + depth = [3, 4, 23, 3] + elif layers == 152: + depth = [3, 8, 36, 3] + num_channels = [128, 256, 512, 1024] + num_filters = [128, 256, 512, + 1024] if cardinality == 32 else [256, 512, 1024, 2048] + + self.conv1_1 = ConvBNLayer( + num_channels=3, + num_filters=64, + filter_size=3, + stride=2, + act='relu', + name="conv1_1") + self.conv1_2 = ConvBNLayer( + num_channels=64, + num_filters=64, + filter_size=3, + stride=1, + act='relu', + name="conv1_2") + self.conv1_3 = ConvBNLayer( + num_channels=64, + num_filters=128, + filter_size=3, + stride=1, + act='relu', + name="conv1_3") + + self.pool2d_max = MaxPool2D(kernel_size=3, stride=2, padding=1) + + self.block_list = [] + n = 1 if layers == 50 or layers == 101 else 3 + for block in range(len(depth)): + n += 1 + shortcut = False + for i in range(depth[block]): + bottleneck_block = self.add_sublayer( + 'bb_%d_%d' % (block, i), + BottleneckBlock( + num_channels=num_channels[block] if i == 0 else + num_filters[block] * int(64 // self.cardinality), + num_filters=num_filters[block], + stride=2 if i == 0 and block != 0 else 1, + cardinality=self.cardinality, + reduction_ratio=self.reduction_ratio, + shortcut=shortcut, + if_first=block == 0, + name=str(n) + '_' + str(i + 1))) + self.block_list.append(bottleneck_block) + shortcut = True + + self.pool2d_avg = AdaptiveAvgPool2D(1) + + self.pool2d_avg_channels = num_channels[-1] * 2 + + stdv = 1.0 / math.sqrt(self.pool2d_avg_channels * 1.0) + + self.out = Linear( + self.pool2d_avg_channels, + class_num, + weight_attr=ParamAttr( + initializer=Uniform(-stdv, stdv), name="fc6_weights"), + bias_attr=ParamAttr(name="fc6_offset")) + + def forward(self, inputs): + y = self.conv1_1(inputs) + y = self.conv1_2(y) + y = self.conv1_3(y) + y = self.pool2d_max(y) + for block in self.block_list: + y = block(y) + y = self.pool2d_avg(y) + y = paddle.reshape(y, shape=[-1, self.pool2d_avg_channels]) + y = self.out(y) + return y + + +def _load_pretrained(pretrained, model, model_url, use_ssld=False): + if pretrained is False: + pass + elif pretrained is True: + load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld) + elif isinstance(pretrained, str): + load_dygraph_pretrain(model, pretrained) + else: + raise RuntimeError( + "pretrained type is not available. Please use `string` or `boolean` type." + ) + + +def SE_ResNeXt50_vd_32x4d(pretrained=False, use_ssld=False, **kwargs): + model = ResNeXt(layers=50, cardinality=32, **kwargs) + _load_pretrained( + pretrained, + model, + MODEL_URLS["SE_ResNeXt50_vd_32x4d"], + use_ssld=use_ssld) + return model + + +def SENet154_vd(pretrained=False, use_ssld=False, **kwargs): + model = ResNeXt(layers=152, cardinality=64, **kwargs) + _load_pretrained( + pretrained, model, MODEL_URLS["SENet154_vd"], use_ssld=use_ssld) + return model diff --git a/src/PaddleClas/ppcls/arch/backbone/model_zoo/shufflenet_v2.py b/src/PaddleClas/ppcls/arch/backbone/model_zoo/shufflenet_v2.py new file mode 100644 index 0000000..d8bb69f --- /dev/null +++ b/src/PaddleClas/ppcls/arch/backbone/model_zoo/shufflenet_v2.py @@ -0,0 +1,362 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import paddle +from paddle import ParamAttr, reshape, transpose, concat, split +from paddle.nn import Layer, Conv2D, MaxPool2D, AdaptiveAvgPool2D, BatchNorm, Linear +from paddle.nn.initializer import KaimingNormal +from paddle.nn.functional import swish + +from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url + +MODEL_URLS = { + "ShuffleNetV2_x0_25": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ShuffleNetV2_x0_25_pretrained.pdparams", + "ShuffleNetV2_x0_33": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ShuffleNetV2_x0_33_pretrained.pdparams", + "ShuffleNetV2_x0_5": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ShuffleNetV2_x0_5_pretrained.pdparams", + "ShuffleNetV2_x1_0": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ShuffleNetV2_x1_0_pretrained.pdparams", + "ShuffleNetV2_x1_5": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ShuffleNetV2_x1_5_pretrained.pdparams", + "ShuffleNetV2_x2_0": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ShuffleNetV2_x2_0_pretrained.pdparams", + "ShuffleNetV2_swish": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ShuffleNetV2_swish_pretrained.pdparams" +} + +__all__ = list(MODEL_URLS.keys()) + + +def channel_shuffle(x, groups): + batch_size, num_channels, height, width = x.shape[0:4] + channels_per_group = num_channels // groups + + # reshape + x = reshape( + x=x, shape=[batch_size, groups, channels_per_group, height, width]) + + # transpose + x = transpose(x=x, perm=[0, 2, 1, 3, 4]) + + # flatten + x = reshape(x=x, shape=[batch_size, num_channels, height, width]) + return x + + +class ConvBNLayer(Layer): + def __init__( + self, + in_channels, + out_channels, + kernel_size, + stride, + padding, + groups=1, + act=None, + name=None, ): + super(ConvBNLayer, self).__init__() + self._conv = Conv2D( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=kernel_size, + stride=stride, + padding=padding, + groups=groups, + weight_attr=ParamAttr( + initializer=KaimingNormal(), name=name + "_weights"), + bias_attr=False) + + self._batch_norm = BatchNorm( + out_channels, + param_attr=ParamAttr(name=name + "_bn_scale"), + bias_attr=ParamAttr(name=name + "_bn_offset"), + act=act, + moving_mean_name=name + "_bn_mean", + moving_variance_name=name + "_bn_variance") + + def forward(self, inputs): + y = self._conv(inputs) + y = self._batch_norm(y) + return y + + +class InvertedResidual(Layer): + def __init__(self, + in_channels, + out_channels, + stride, + act="relu", + name=None): + super(InvertedResidual, self).__init__() + self._conv_pw = ConvBNLayer( + in_channels=in_channels // 2, + out_channels=out_channels // 2, + kernel_size=1, + stride=1, + padding=0, + groups=1, + act=act, + name='stage_' + name + '_conv1') + self._conv_dw = ConvBNLayer( + in_channels=out_channels // 2, + out_channels=out_channels // 2, + kernel_size=3, + stride=stride, + padding=1, + groups=out_channels // 2, + act=None, + name='stage_' + name + '_conv2') + self._conv_linear = ConvBNLayer( + in_channels=out_channels // 2, + out_channels=out_channels // 2, + kernel_size=1, + stride=1, + padding=0, + groups=1, + act=act, + name='stage_' + name + '_conv3') + + def forward(self, inputs): + x1, x2 = split( + inputs, + num_or_sections=[inputs.shape[1] // 2, inputs.shape[1] // 2], + axis=1) + x2 = self._conv_pw(x2) + x2 = self._conv_dw(x2) + x2 = self._conv_linear(x2) + out = concat([x1, x2], axis=1) + return channel_shuffle(out, 2) + + +class InvertedResidualDS(Layer): + def __init__(self, + in_channels, + out_channels, + stride, + act="relu", + name=None): + super(InvertedResidualDS, self).__init__() + + # branch1 + self._conv_dw_1 = ConvBNLayer( + in_channels=in_channels, + out_channels=in_channels, + kernel_size=3, + stride=stride, + padding=1, + groups=in_channels, + act=None, + name='stage_' + name + '_conv4') + self._conv_linear_1 = ConvBNLayer( + in_channels=in_channels, + out_channels=out_channels // 2, + kernel_size=1, + stride=1, + padding=0, + groups=1, + act=act, + name='stage_' + name + '_conv5') + # branch2 + self._conv_pw_2 = ConvBNLayer( + in_channels=in_channels, + out_channels=out_channels // 2, + kernel_size=1, + stride=1, + padding=0, + groups=1, + act=act, + name='stage_' + name + '_conv1') + self._conv_dw_2 = ConvBNLayer( + in_channels=out_channels // 2, + out_channels=out_channels // 2, + kernel_size=3, + stride=stride, + padding=1, + groups=out_channels // 2, + act=None, + name='stage_' + name + '_conv2') + self._conv_linear_2 = ConvBNLayer( + in_channels=out_channels // 2, + out_channels=out_channels // 2, + kernel_size=1, + stride=1, + padding=0, + groups=1, + act=act, + name='stage_' + name + '_conv3') + + def forward(self, inputs): + x1 = self._conv_dw_1(inputs) + x1 = self._conv_linear_1(x1) + x2 = self._conv_pw_2(inputs) + x2 = self._conv_dw_2(x2) + x2 = self._conv_linear_2(x2) + out = concat([x1, x2], axis=1) + + return channel_shuffle(out, 2) + + +class ShuffleNet(Layer): + def __init__(self, class_num=1000, scale=1.0, act="relu"): + super(ShuffleNet, self).__init__() + self.scale = scale + self.class_num = class_num + stage_repeats = [4, 8, 4] + + if scale == 0.25: + stage_out_channels = [-1, 24, 24, 48, 96, 512] + elif scale == 0.33: + stage_out_channels = [-1, 24, 32, 64, 128, 512] + elif scale == 0.5: + stage_out_channels = [-1, 24, 48, 96, 192, 1024] + elif scale == 1.0: + stage_out_channels = [-1, 24, 116, 232, 464, 1024] + elif scale == 1.5: + stage_out_channels = [-1, 24, 176, 352, 704, 1024] + elif scale == 2.0: + stage_out_channels = [-1, 24, 224, 488, 976, 2048] + else: + raise NotImplementedError("This scale size:[" + str(scale) + + "] is not implemented!") + # 1. conv1 + self._conv1 = ConvBNLayer( + in_channels=3, + out_channels=stage_out_channels[1], + kernel_size=3, + stride=2, + padding=1, + act=act, + name='stage1_conv') + self._max_pool = MaxPool2D(kernel_size=3, stride=2, padding=1) + + # 2. bottleneck sequences + self._block_list = [] + for stage_id, num_repeat in enumerate(stage_repeats): + for i in range(num_repeat): + if i == 0: + block = self.add_sublayer( + name=str(stage_id + 2) + '_' + str(i + 1), + sublayer=InvertedResidualDS( + in_channels=stage_out_channels[stage_id + 1], + out_channels=stage_out_channels[stage_id + 2], + stride=2, + act=act, + name=str(stage_id + 2) + '_' + str(i + 1))) + else: + block = self.add_sublayer( + name=str(stage_id + 2) + '_' + str(i + 1), + sublayer=InvertedResidual( + in_channels=stage_out_channels[stage_id + 2], + out_channels=stage_out_channels[stage_id + 2], + stride=1, + act=act, + name=str(stage_id + 2) + '_' + str(i + 1))) + self._block_list.append(block) + # 3. last_conv + self._last_conv = ConvBNLayer( + in_channels=stage_out_channels[-2], + out_channels=stage_out_channels[-1], + kernel_size=1, + stride=1, + padding=0, + act=act, + name='conv5') + # 4. pool + self._pool2d_avg = AdaptiveAvgPool2D(1) + self._out_c = stage_out_channels[-1] + # 5. fc + self._fc = Linear( + stage_out_channels[-1], + class_num, + weight_attr=ParamAttr(name='fc6_weights'), + bias_attr=ParamAttr(name='fc6_offset')) + + def forward(self, inputs): + y = self._conv1(inputs) + y = self._max_pool(y) + for inv in self._block_list: + y = inv(y) + y = self._last_conv(y) + y = self._pool2d_avg(y) + y = paddle.flatten(y, start_axis=1, stop_axis=-1) + y = self._fc(y) + return y + + +def _load_pretrained(pretrained, model, model_url, use_ssld=False): + if pretrained is False: + pass + elif pretrained is True: + load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld) + elif isinstance(pretrained, str): + load_dygraph_pretrain(model, pretrained) + else: + raise RuntimeError( + "pretrained type is not available. Please use `string` or `boolean` type." + ) + + +def ShuffleNetV2_x0_25(pretrained=False, use_ssld=False, **kwargs): + model = ShuffleNet(scale=0.25, **kwargs) + _load_pretrained( + pretrained, model, MODEL_URLS["ShuffleNetV2_x0_25"], use_ssld=use_ssld) + return model + + +def ShuffleNetV2_x0_33(pretrained=False, use_ssld=False, **kwargs): + model = ShuffleNet(scale=0.33, **kwargs) + _load_pretrained( + pretrained, model, MODEL_URLS["ShuffleNetV2_x0_33"], use_ssld=use_ssld) + return model + + +def ShuffleNetV2_x0_5(pretrained=False, use_ssld=False, **kwargs): + model = ShuffleNet(scale=0.5, **kwargs) + _load_pretrained( + pretrained, model, MODEL_URLS["ShuffleNetV2_x0_5"], use_ssld=use_ssld) + return model + + +def ShuffleNetV2_x1_0(pretrained=False, use_ssld=False, **kwargs): + model = ShuffleNet(scale=1.0, **kwargs) + _load_pretrained( + pretrained, model, MODEL_URLS["ShuffleNetV2_x1_0"], use_ssld=use_ssld) + return model + + +def ShuffleNetV2_x1_5(pretrained=False, use_ssld=False, **kwargs): + model = ShuffleNet(scale=1.5, **kwargs) + _load_pretrained( + pretrained, model, MODEL_URLS["ShuffleNetV2_x1_5"], use_ssld=use_ssld) + return model + + +def ShuffleNetV2_x2_0(pretrained=False, use_ssld=False, **kwargs): + model = ShuffleNet(scale=2.0, **kwargs) + _load_pretrained( + pretrained, model, MODEL_URLS["ShuffleNetV2_x2_0"], use_ssld=use_ssld) + return model + + +def ShuffleNetV2_swish(pretrained=False, use_ssld=False, **kwargs): + model = ShuffleNet(scale=1.0, act="swish", **kwargs) + _load_pretrained( + pretrained, model, MODEL_URLS["ShuffleNetV2_swish"], use_ssld=use_ssld) + return model diff --git a/src/PaddleClas/ppcls/arch/backbone/model_zoo/squeezenet.py b/src/PaddleClas/ppcls/arch/backbone/model_zoo/squeezenet.py new file mode 100644 index 0000000..647cd2e --- /dev/null +++ b/src/PaddleClas/ppcls/arch/backbone/model_zoo/squeezenet.py @@ -0,0 +1,194 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import paddle +from paddle import ParamAttr +import paddle.nn as nn +import paddle.nn.functional as F +from paddle.nn import Conv2D, BatchNorm, Linear, Dropout +from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D + +from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url + +MODEL_URLS = { + "SqueezeNet1_0": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SqueezeNet1_0_pretrained.pdparams", + "SqueezeNet1_1": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SqueezeNet1_1_pretrained.pdparams", +} + +__all__ = list(MODEL_URLS.keys()) + + +class MakeFireConv(nn.Layer): + def __init__(self, + input_channels, + output_channels, + filter_size, + padding=0, + name=None): + super(MakeFireConv, self).__init__() + self._conv = Conv2D( + input_channels, + output_channels, + filter_size, + padding=padding, + weight_attr=ParamAttr(name=name + "_weights"), + bias_attr=ParamAttr(name=name + "_offset")) + + def forward(self, x): + x = self._conv(x) + x = F.relu(x) + return x + + +class MakeFire(nn.Layer): + def __init__(self, + input_channels, + squeeze_channels, + expand1x1_channels, + expand3x3_channels, + name=None): + super(MakeFire, self).__init__() + self._conv = MakeFireConv( + input_channels, squeeze_channels, 1, name=name + "_squeeze1x1") + self._conv_path1 = MakeFireConv( + squeeze_channels, expand1x1_channels, 1, name=name + "_expand1x1") + self._conv_path2 = MakeFireConv( + squeeze_channels, + expand3x3_channels, + 3, + padding=1, + name=name + "_expand3x3") + + def forward(self, inputs): + x = self._conv(inputs) + x1 = self._conv_path1(x) + x2 = self._conv_path2(x) + return paddle.concat([x1, x2], axis=1) + + +class SqueezeNet(nn.Layer): + def __init__(self, version, class_num=1000): + super(SqueezeNet, self).__init__() + self.version = version + + if self.version == "1.0": + self._conv = Conv2D( + 3, + 96, + 7, + stride=2, + weight_attr=ParamAttr(name="conv1_weights"), + bias_attr=ParamAttr(name="conv1_offset")) + self._pool = MaxPool2D(kernel_size=3, stride=2, padding=0) + self._conv1 = MakeFire(96, 16, 64, 64, name="fire2") + self._conv2 = MakeFire(128, 16, 64, 64, name="fire3") + self._conv3 = MakeFire(128, 32, 128, 128, name="fire4") + + self._conv4 = MakeFire(256, 32, 128, 128, name="fire5") + self._conv5 = MakeFire(256, 48, 192, 192, name="fire6") + self._conv6 = MakeFire(384, 48, 192, 192, name="fire7") + self._conv7 = MakeFire(384, 64, 256, 256, name="fire8") + + self._conv8 = MakeFire(512, 64, 256, 256, name="fire9") + else: + self._conv = Conv2D( + 3, + 64, + 3, + stride=2, + padding=1, + weight_attr=ParamAttr(name="conv1_weights"), + bias_attr=ParamAttr(name="conv1_offset")) + self._pool = MaxPool2D(kernel_size=3, stride=2, padding=0) + self._conv1 = MakeFire(64, 16, 64, 64, name="fire2") + self._conv2 = MakeFire(128, 16, 64, 64, name="fire3") + + self._conv3 = MakeFire(128, 32, 128, 128, name="fire4") + self._conv4 = MakeFire(256, 32, 128, 128, name="fire5") + + self._conv5 = MakeFire(256, 48, 192, 192, name="fire6") + self._conv6 = MakeFire(384, 48, 192, 192, name="fire7") + self._conv7 = MakeFire(384, 64, 256, 256, name="fire8") + self._conv8 = MakeFire(512, 64, 256, 256, name="fire9") + + self._drop = Dropout(p=0.5, mode="downscale_in_infer") + self._conv9 = Conv2D( + 512, + class_num, + 1, + weight_attr=ParamAttr(name="conv10_weights"), + bias_attr=ParamAttr(name="conv10_offset")) + self._avg_pool = AdaptiveAvgPool2D(1) + + def forward(self, inputs): + x = self._conv(inputs) + x = F.relu(x) + x = self._pool(x) + if self.version == "1.0": + x = self._conv1(x) + x = self._conv2(x) + x = self._conv3(x) + x = self._pool(x) + x = self._conv4(x) + x = self._conv5(x) + x = self._conv6(x) + x = self._conv7(x) + x = self._pool(x) + x = self._conv8(x) + else: + x = self._conv1(x) + x = self._conv2(x) + x = self._pool(x) + x = self._conv3(x) + x = self._conv4(x) + x = self._pool(x) + x = self._conv5(x) + x = self._conv6(x) + x = self._conv7(x) + x = self._conv8(x) + x = self._drop(x) + x = self._conv9(x) + x = F.relu(x) + x = self._avg_pool(x) + x = paddle.squeeze(x, axis=[2, 3]) + return x + + +def _load_pretrained(pretrained, model, model_url, use_ssld=False): + if pretrained is False: + pass + elif pretrained is True: + load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld) + elif isinstance(pretrained, str): + load_dygraph_pretrain(model, pretrained) + else: + raise RuntimeError( + "pretrained type is not available. Please use `string` or `boolean` type." + ) + + +def SqueezeNet1_0(pretrained=False, use_ssld=False, **kwargs): + model = SqueezeNet(version="1.0", **kwargs) + _load_pretrained( + pretrained, model, MODEL_URLS["SqueezeNet1_0"], use_ssld=use_ssld) + return model + + +def SqueezeNet1_1(pretrained=False, use_ssld=False, **kwargs): + model = SqueezeNet(version="1.1", **kwargs) + _load_pretrained( + pretrained, model, MODEL_URLS["SqueezeNet1_1"], use_ssld=use_ssld) + return model diff --git a/src/PaddleClas/ppcls/arch/backbone/model_zoo/swin_transformer.py b/src/PaddleClas/ppcls/arch/backbone/model_zoo/swin_transformer.py new file mode 100644 index 0000000..c783ec6 --- /dev/null +++ b/src/PaddleClas/ppcls/arch/backbone/model_zoo/swin_transformer.py @@ -0,0 +1,857 @@ +# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Code was based on https://github.com/microsoft/Swin-Transformer + +import numpy as np +import paddle +import paddle.nn as nn +import paddle.nn.functional as F +from paddle.nn.initializer import TruncatedNormal, Constant + +from .vision_transformer import trunc_normal_, zeros_, ones_, to_2tuple, DropPath, Identity + +from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url + +MODEL_URLS = { + "SwinTransformer_tiny_patch4_window7_224": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SwinTransformer_tiny_patch4_window7_224_pretrained.pdparams", + "SwinTransformer_small_patch4_window7_224": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SwinTransformer_small_patch4_window7_224_pretrained.pdparams", + "SwinTransformer_base_patch4_window7_224": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SwinTransformer_base_patch4_window7_224_pretrained.pdparams", + "SwinTransformer_base_patch4_window12_384": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SwinTransformer_base_patch4_window12_384_pretrained.pdparams", + "SwinTransformer_large_patch4_window7_224": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SwinTransformer_large_patch4_window7_224_22kto1k_pretrained.pdparams", + "SwinTransformer_large_patch4_window12_384": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SwinTransformer_large_patch4_window12_384_22kto1k_pretrained.pdparams", +} + +__all__ = list(MODEL_URLS.keys()) + + +class Mlp(nn.Layer): + def __init__(self, + in_features, + hidden_features=None, + out_features=None, + act_layer=nn.GELU, + drop=0.): + super().__init__() + out_features = out_features or in_features + hidden_features = hidden_features or in_features + self.fc1 = nn.Linear(in_features, hidden_features) + self.act = act_layer() + self.fc2 = nn.Linear(hidden_features, out_features) + self.drop = nn.Dropout(drop) + + def forward(self, x): + x = self.fc1(x) + x = self.act(x) + x = self.drop(x) + x = self.fc2(x) + x = self.drop(x) + return x + + +def window_partition(x, window_size): + """ + Args: + x: (B, H, W, C) + window_size (int): window size + + Returns: + windows: (num_windows*B, window_size, window_size, C) + """ + B, H, W, C = x.shape + x = x.reshape( + [B, H // window_size, window_size, W // window_size, window_size, C]) + windows = x.transpose([0, 1, 3, 2, 4, 5]).reshape( + [-1, window_size, window_size, C]) + return windows + + +def window_reverse(windows, window_size, H, W, C): + """ + Args: + windows: (num_windows*B, window_size, window_size, C) + window_size (int): Window size + H (int): Height of image + W (int): Width of image + + Returns: + x: (B, H, W, C) + """ + x = windows.reshape( + [-1, H // window_size, W // window_size, window_size, window_size, C]) + x = x.transpose([0, 1, 3, 2, 4, 5]).reshape([-1, H, W, C]) + return x + + +class WindowAttention(nn.Layer): + r""" Window based multi-head self attention (W-MSA) module with relative position bias. + It supports both of shifted and non-shifted window. + + Args: + dim (int): Number of input channels. + window_size (tuple[int]): The height and width of the window. + num_heads (int): Number of attention heads. + qkv_bias (bool, optional): If True, add a learnable bias to query, key, value. Default: True + qk_scale (float | None, optional): Override default qk scale of head_dim ** -0.5 if set + attn_drop (float, optional): Dropout ratio of attention weight. Default: 0.0 + proj_drop (float, optional): Dropout ratio of output. Default: 0.0 + """ + + def __init__(self, + dim, + window_size, + num_heads, + qkv_bias=True, + qk_scale=None, + attn_drop=0., + proj_drop=0.): + super().__init__() + self.dim = dim + self.window_size = window_size # Wh, Ww + self.num_heads = num_heads + head_dim = dim // num_heads + self.scale = qk_scale or head_dim**-0.5 + + # define a parameter table of relative position bias + # 2*Wh-1 * 2*Ww-1, nH + self.relative_position_bias_table = self.create_parameter( + shape=((2 * window_size[0] - 1) * (2 * window_size[1] - 1), + num_heads), + default_initializer=zeros_) + self.add_parameter("relative_position_bias_table", + self.relative_position_bias_table) + + # get pair-wise relative position index for each token inside the window + coords_h = paddle.arange(self.window_size[0]) + coords_w = paddle.arange(self.window_size[1]) + coords = paddle.stack(paddle.meshgrid( + [coords_h, coords_w])) # 2, Wh, Ww + coords_flatten = paddle.flatten(coords, 1) # 2, Wh*Ww + + coords_flatten_1 = coords_flatten.unsqueeze(axis=2) + coords_flatten_2 = coords_flatten.unsqueeze(axis=1) + relative_coords = coords_flatten_1 - coords_flatten_2 + + relative_coords = relative_coords.transpose( + [1, 2, 0]) # Wh*Ww, Wh*Ww, 2 + relative_coords[:, :, 0] += self.window_size[ + 0] - 1 # shift to start from 0 + relative_coords[:, :, 1] += self.window_size[1] - 1 + relative_coords[:, :, 0] *= 2 * self.window_size[1] - 1 + relative_position_index = relative_coords.sum(-1) # Wh*Ww, Wh*Ww + self.register_buffer("relative_position_index", + relative_position_index) + + self.qkv = nn.Linear(dim, dim * 3, bias_attr=qkv_bias) + self.attn_drop = nn.Dropout(attn_drop) + self.proj = nn.Linear(dim, dim) + self.proj_drop = nn.Dropout(proj_drop) + + trunc_normal_(self.relative_position_bias_table) + self.softmax = nn.Softmax(axis=-1) + + def forward(self, x, mask=None): + """ + Args: + x: input features with shape of (num_windows*B, N, C) + mask: (0/-inf) mask with shape of (num_windows, Wh*Ww, Wh*Ww) or None + """ + B_, N, C = x.shape + qkv = self.qkv(x).reshape( + [B_, N, 3, self.num_heads, C // self.num_heads]).transpose( + [2, 0, 3, 1, 4]) + q, k, v = qkv[0], qkv[1], qkv[2] + + q = q * self.scale + attn = paddle.mm(q, k.transpose([0, 1, 3, 2])) + + index = self.relative_position_index.reshape([-1]) + + relative_position_bias = paddle.index_select( + self.relative_position_bias_table, index) + relative_position_bias = relative_position_bias.reshape([ + self.window_size[0] * self.window_size[1], + self.window_size[0] * self.window_size[1], -1 + ]) # Wh*Ww,Wh*Ww,nH + + relative_position_bias = relative_position_bias.transpose( + [2, 0, 1]) # nH, Wh*Ww, Wh*Ww + attn = attn + relative_position_bias.unsqueeze(0) + + if mask is not None: + nW = mask.shape[0] + attn = attn.reshape([B_ // nW, nW, self.num_heads, N, N + ]) + mask.unsqueeze(1).unsqueeze(0) + attn = attn.reshape([-1, self.num_heads, N, N]) + attn = self.softmax(attn) + else: + attn = self.softmax(attn) + + attn = self.attn_drop(attn) + + # x = (attn @ v).transpose(1, 2).reshape([B_, N, C]) + x = paddle.mm(attn, v).transpose([0, 2, 1, 3]).reshape([B_, N, C]) + x = self.proj(x) + x = self.proj_drop(x) + return x + + def extra_repr(self): + return "dim={}, window_size={}, num_heads={}".format( + self.dim, self.window_size, self.num_heads) + + def flops(self, N): + # calculate flops for 1 window with token length of N + flops = 0 + # qkv = self.qkv(x) + flops += N * self.dim * 3 * self.dim + # attn = (q @ k.transpose(-2, -1)) + flops += self.num_heads * N * (self.dim // self.num_heads) * N + # x = (attn @ v) + flops += self.num_heads * N * N * (self.dim // self.num_heads) + # x = self.proj(x) + flops += N * self.dim * self.dim + return flops + + +class SwinTransformerBlock(nn.Layer): + r""" Swin Transformer Block. + + Args: + dim (int): Number of input channels. + input_resolution (tuple[int]): Input resulotion. + num_heads (int): Number of attention heads. + window_size (int): Window size. + shift_size (int): Shift size for SW-MSA. + mlp_ratio (float): Ratio of mlp hidden dim to embedding dim. + qkv_bias (bool, optional): If True, add a learnable bias to query, key, value. Default: True + qk_scale (float | None, optional): Override default qk scale of head_dim ** -0.5 if set. + drop (float, optional): Dropout rate. Default: 0.0 + attn_drop (float, optional): Attention dropout rate. Default: 0.0 + drop_path (float, optional): Stochastic depth rate. Default: 0.0 + act_layer (nn.Layer, optional): Activation layer. Default: nn.GELU + norm_layer (nn.Layer, optional): Normalization layer. Default: nn.LayerNorm + """ + + def __init__(self, + dim, + input_resolution, + num_heads, + window_size=7, + shift_size=0, + mlp_ratio=4., + qkv_bias=True, + qk_scale=None, + drop=0., + attn_drop=0., + drop_path=0., + act_layer=nn.GELU, + norm_layer=nn.LayerNorm): + super().__init__() + self.dim = dim + self.input_resolution = input_resolution + self.num_heads = num_heads + self.window_size = window_size + self.shift_size = shift_size + self.mlp_ratio = mlp_ratio + if min(self.input_resolution) <= self.window_size: + # if window size is larger than input resolution, we don't partition windows + self.shift_size = 0 + self.window_size = min(self.input_resolution) + assert 0 <= self.shift_size < self.window_size, "shift_size must in 0-window_size" + + self.norm1 = norm_layer(dim) + self.attn = WindowAttention( + dim, + window_size=to_2tuple(self.window_size), + num_heads=num_heads, + qkv_bias=qkv_bias, + qk_scale=qk_scale, + attn_drop=attn_drop, + proj_drop=drop) + + self.drop_path = DropPath(drop_path) if drop_path > 0. else Identity() + self.norm2 = norm_layer(dim) + mlp_hidden_dim = int(dim * mlp_ratio) + self.mlp = Mlp(in_features=dim, + hidden_features=mlp_hidden_dim, + act_layer=act_layer, + drop=drop) + + if self.shift_size > 0: + # calculate attention mask for SW-MSA + H, W = self.input_resolution + img_mask = paddle.zeros((1, H, W, 1)) # 1 H W 1 + h_slices = (slice(0, -self.window_size), + slice(-self.window_size, -self.shift_size), + slice(-self.shift_size, None)) + w_slices = (slice(0, -self.window_size), + slice(-self.window_size, -self.shift_size), + slice(-self.shift_size, None)) + cnt = 0 + for h in h_slices: + for w in w_slices: + img_mask[:, h, w, :] = cnt + cnt += 1 + + mask_windows = window_partition( + img_mask, self.window_size) # nW, window_size, window_size, 1 + mask_windows = mask_windows.reshape( + [-1, self.window_size * self.window_size]) + attn_mask = mask_windows.unsqueeze(1) - mask_windows.unsqueeze(2) + + huns = -100.0 * paddle.ones_like(attn_mask) + attn_mask = huns * (attn_mask != 0).astype("float32") + else: + attn_mask = None + + self.register_buffer("attn_mask", attn_mask) + + def forward(self, x): + H, W = self.input_resolution + B, L, C = x.shape + assert L == H * W, "input feature has wrong size" + + shortcut = x + x = self.norm1(x) + x = x.reshape([B, H, W, C]) + + # cyclic shift + if self.shift_size > 0: + shifted_x = paddle.roll( + x, shifts=(-self.shift_size, -self.shift_size), axis=(1, 2)) + else: + shifted_x = x + + # partition windows + x_windows = window_partition( + shifted_x, self.window_size) # nW*B, window_size, window_size, C + x_windows = x_windows.reshape( + [-1, self.window_size * self.window_size, + C]) # nW*B, window_size*window_size, C + + # W-MSA/SW-MSA + attn_windows = self.attn( + x_windows, mask=self.attn_mask) # nW*B, window_size*window_size, C + + # merge windows + attn_windows = attn_windows.reshape( + [-1, self.window_size, self.window_size, C]) + shifted_x = window_reverse(attn_windows, self.window_size, H, W, + C) # B H' W' C + + # reverse cyclic shift + if self.shift_size > 0: + x = paddle.roll( + shifted_x, + shifts=(self.shift_size, self.shift_size), + axis=(1, 2)) + else: + x = shifted_x + x = x.reshape([B, H * W, C]) + + # FFN + x = shortcut + self.drop_path(x) + x = x + self.drop_path(self.mlp(self.norm2(x))) + + return x + + def extra_repr(self): + return "dim={}, input_resolution={}, num_heads={}, window_size={}, shift_size={}, mlp_ratio={}".format( + self.dim, self.input_resolution, self.num_heads, self.window_size, + self.shift_size, self.mlp_ratio) + + def flops(self): + flops = 0 + H, W = self.input_resolution + # norm1 + flops += self.dim * H * W + # W-MSA/SW-MSA + nW = H * W / self.window_size / self.window_size + flops += nW * self.attn.flops(self.window_size * self.window_size) + # mlp + flops += 2 * H * W * self.dim * self.dim * self.mlp_ratio + # norm2 + flops += self.dim * H * W + return flops + + +class PatchMerging(nn.Layer): + r""" Patch Merging Layer. + + Args: + input_resolution (tuple[int]): Resolution of input feature. + dim (int): Number of input channels. + norm_layer (nn.Layer, optional): Normalization layer. Default: nn.LayerNorm + """ + + def __init__(self, input_resolution, dim, norm_layer=nn.LayerNorm): + super().__init__() + self.input_resolution = input_resolution + self.dim = dim + self.reduction = nn.Linear(4 * dim, 2 * dim, bias_attr=False) + self.norm = norm_layer(4 * dim) + + def forward(self, x): + """ + x: B, H*W, C + """ + H, W = self.input_resolution + B, L, C = x.shape + assert L == H * W, "input feature has wrong size" + assert H % 2 == 0 and W % 2 == 0, "x size ({}*{}) are not even.".format( + H, W) + + x = x.reshape([B, H, W, C]) + + x0 = x[:, 0::2, 0::2, :] # B H/2 W/2 C + x1 = x[:, 1::2, 0::2, :] # B H/2 W/2 C + x2 = x[:, 0::2, 1::2, :] # B H/2 W/2 C + x3 = x[:, 1::2, 1::2, :] # B H/2 W/2 C + x = paddle.concat([x0, x1, x2, x3], -1) # B H/2 W/2 4*C + x = x.reshape([B, H * W // 4, 4 * C]) # B H/2*W/2 4*C + + x = self.norm(x) + x = self.reduction(x) + + return x + + def extra_repr(self): + return "input_resolution={}, dim={}".format(self.input_resolution, + self.dim) + + def flops(self): + H, W = self.input_resolution + flops = H * W * self.dim + flops += (H // 2) * (W // 2) * 4 * self.dim * 2 * self.dim + return flops + + +class BasicLayer(nn.Layer): + """ A basic Swin Transformer layer for one stage. + + Args: + dim (int): Number of input channels. + input_resolution (tuple[int]): Input resolution. + depth (int): Number of blocks. + num_heads (int): Number of attention heads. + window_size (int): Local window size. + mlp_ratio (float): Ratio of mlp hidden dim to embedding dim. + qkv_bias (bool, optional): If True, add a learnable bias to query, key, value. Default: True + qk_scale (float | None, optional): Override default qk scale of head_dim ** -0.5 if set. + drop (float, optional): Dropout rate. Default: 0.0 + attn_drop (float, optional): Attention dropout rate. Default: 0.0 + drop_path (float | tuple[float], optional): Stochastic depth rate. Default: 0.0 + norm_layer (nn.Layer, optional): Normalization layer. Default: nn.LayerNorm + downsample (nn.Layer | None, optional): Downsample layer at the end of the layer. Default: None + use_checkpoint (bool): Whether to use checkpointing to save memory. Default: False. + """ + + def __init__(self, + dim, + input_resolution, + depth, + num_heads, + window_size, + mlp_ratio=4., + qkv_bias=True, + qk_scale=None, + drop=0., + attn_drop=0., + drop_path=0., + norm_layer=nn.LayerNorm, + downsample=None, + use_checkpoint=False): + + super().__init__() + self.dim = dim + self.input_resolution = input_resolution + self.depth = depth + self.use_checkpoint = use_checkpoint + + # build blocks + self.blocks = nn.LayerList([ + SwinTransformerBlock( + dim=dim, + input_resolution=input_resolution, + num_heads=num_heads, + window_size=window_size, + shift_size=0 if (i % 2 == 0) else window_size // 2, + mlp_ratio=mlp_ratio, + qkv_bias=qkv_bias, + qk_scale=qk_scale, + drop=drop, + attn_drop=attn_drop, + drop_path=drop_path[i] + if isinstance(drop_path, list) else drop_path, + norm_layer=norm_layer) for i in range(depth) + ]) + + # patch merging layer + if downsample is not None: + self.downsample = downsample( + input_resolution, dim=dim, norm_layer=norm_layer) + else: + self.downsample = None + + def forward(self, x): + for blk in self.blocks: + x = blk(x) + if self.downsample is not None: + x = self.downsample(x) + return x + + def extra_repr(self): + return "dim={}, input_resolution={}, depth={}".format( + self.dim, self.input_resolution, self.depth) + + def flops(self): + flops = 0 + for blk in self.blocks: + flops += blk.flops() + if self.downsample is not None: + flops += self.downsample.flops() + return flops + + +class PatchEmbed(nn.Layer): + """ Image to Patch Embedding + + Args: + img_size (int): Image size. Default: 224. + patch_size (int): Patch token size. Default: 4. + in_chans (int): Number of input image channels. Default: 3. + embed_dim (int): Number of linear projection output channels. Default: 96. + norm_layer (nn.Layer, optional): Normalization layer. Default: None + """ + + def __init__(self, + img_size=224, + patch_size=4, + in_chans=3, + embed_dim=96, + norm_layer=None): + super().__init__() + img_size = to_2tuple(img_size) + patch_size = to_2tuple(patch_size) + patches_resolution = [ + img_size[0] // patch_size[0], img_size[1] // patch_size[1] + ] + self.img_size = img_size + self.patch_size = patch_size + self.patches_resolution = patches_resolution + self.num_patches = patches_resolution[0] * patches_resolution[1] + + self.in_chans = in_chans + self.embed_dim = embed_dim + + self.proj = nn.Conv2D( + in_chans, embed_dim, kernel_size=patch_size, stride=patch_size) + if norm_layer is not None: + self.norm = norm_layer(embed_dim) + else: + self.norm = None + + def forward(self, x): + B, C, H, W = x.shape + # TODO (littletomatodonkey), uncomment the line will cause failure of jit.save + # assert [H, W] == self.img_size[:2], "Input image size ({H}*{W}) doesn't match model ({}*{}).".format(H, W, self.img_size[0], self.img_size[1]) + x = self.proj(x) + + x = x.flatten(2).transpose([0, 2, 1]) # B Ph*Pw C + if self.norm is not None: + x = self.norm(x) + return x + + def flops(self): + Ho, Wo = self.patches_resolution + flops = Ho * Wo * self.embed_dim * self.in_chans * ( + self.patch_size[0] * self.patch_size[1]) + if self.norm is not None: + flops += Ho * Wo * self.embed_dim + return flops + + +class SwinTransformer(nn.Layer): + """ Swin Transformer + A PaddlePaddle impl of : `Swin Transformer: Hierarchical Vision Transformer using Shifted Windows` - + https://arxiv.org/pdf/2103.14030 + + Args: + img_size (int | tuple(int)): Input image size. Default 224 + patch_size (int | tuple(int)): Patch size. Default: 4 + in_chans (int): Number of input image channels. Default: 3 + num_classes (int): Number of classes for classification head. Default: 1000 + embed_dim (int): Patch embedding dimension. Default: 96 + depths (tuple(int)): Depth of each Swin Transformer layer. + num_heads (tuple(int)): Number of attention heads in different layers. + window_size (int): Window size. Default: 7 + mlp_ratio (float): Ratio of mlp hidden dim to embedding dim. Default: 4 + qkv_bias (bool): If True, add a learnable bias to query, key, value. Default: True + qk_scale (float): Override default qk scale of head_dim ** -0.5 if set. Default: None + drop_rate (float): Dropout rate. Default: 0 + attn_drop_rate (float): Attention dropout rate. Default: 0 + drop_path_rate (float): Stochastic depth rate. Default: 0.1 + norm_layer (nn.Layer): Normalization layer. Default: nn.LayerNorm. + ape (bool): If True, add absolute position embedding to the patch embedding. Default: False + patch_norm (bool): If True, add normalization after patch embedding. Default: True + use_checkpoint (bool): Whether to use checkpointing to save memory. Default: False + """ + + def __init__(self, + img_size=224, + patch_size=4, + in_chans=3, + class_num=1000, + embed_dim=96, + depths=[2, 2, 6, 2], + num_heads=[3, 6, 12, 24], + window_size=7, + mlp_ratio=4., + qkv_bias=True, + qk_scale=None, + drop_rate=0., + attn_drop_rate=0., + drop_path_rate=0.1, + norm_layer=nn.LayerNorm, + ape=False, + patch_norm=True, + use_checkpoint=False, + **kwargs): + super(SwinTransformer, self).__init__() + + self.num_classes = num_classes = class_num + self.num_layers = len(depths) + self.embed_dim = embed_dim + self.ape = ape + self.patch_norm = patch_norm + self.num_features = int(embed_dim * 2**(self.num_layers - 1)) + self.mlp_ratio = mlp_ratio + + # split image into non-overlapping patches + self.patch_embed = PatchEmbed( + img_size=img_size, + patch_size=patch_size, + in_chans=in_chans, + embed_dim=embed_dim, + norm_layer=norm_layer if self.patch_norm else None) + num_patches = self.patch_embed.num_patches + patches_resolution = self.patch_embed.patches_resolution + self.patches_resolution = patches_resolution + + # absolute position embedding + if self.ape: + self.absolute_pos_embed = self.create_parameter( + shape=(1, num_patches, embed_dim), default_initializer=zeros_) + self.add_parameter("absolute_pos_embed", self.absolute_pos_embed) + trunc_normal_(self.absolute_pos_embed) + + self.pos_drop = nn.Dropout(p=drop_rate) + + # stochastic depth + dpr = np.linspace(0, drop_path_rate, + sum(depths)).tolist() # stochastic depth decay rule + + # build layers + self.layers = nn.LayerList() + for i_layer in range(self.num_layers): + layer = BasicLayer( + dim=int(embed_dim * 2**i_layer), + input_resolution=(patches_resolution[0] // (2**i_layer), + patches_resolution[1] // (2**i_layer)), + depth=depths[i_layer], + num_heads=num_heads[i_layer], + window_size=window_size, + mlp_ratio=self.mlp_ratio, + qkv_bias=qkv_bias, + qk_scale=qk_scale, + drop=drop_rate, + attn_drop=attn_drop_rate, + drop_path=dpr[sum(depths[:i_layer]):sum(depths[:i_layer + 1])], + norm_layer=norm_layer, + downsample=PatchMerging + if (i_layer < self.num_layers - 1) else None, + use_checkpoint=use_checkpoint) + self.layers.append(layer) + + self.norm = norm_layer(self.num_features) + self.avgpool = nn.AdaptiveAvgPool1D(1) + self.head = nn.Linear( + self.num_features, + num_classes) if self.num_classes > 0 else nn.Identity() + + self.apply(self._init_weights) + + def _init_weights(self, m): + if isinstance(m, nn.Linear): + trunc_normal_(m.weight) + if isinstance(m, nn.Linear) and m.bias is not None: + zeros_(m.bias) + elif isinstance(m, nn.LayerNorm): + zeros_(m.bias) + ones_(m.weight) + + def forward_features(self, x): + x = self.patch_embed(x) + if self.ape: + x = x + self.absolute_pos_embed + x = self.pos_drop(x) + + for layer in self.layers: + x = layer(x) + + x = self.norm(x) # B L C + x = self.avgpool(x.transpose([0, 2, 1])) # B C 1 + x = paddle.flatten(x, 1) + return x + + def forward(self, x): + x = self.forward_features(x) + x = self.head(x) + return x + + def flops(self): + flops = 0 + flops += self.patch_embed.flops() + for _, layer in enumerate(self.layers): + flops += layer.flops() + flops += self.num_features * self.patches_resolution[ + 0] * self.patches_resolution[1] // (2**self.num_layers) + flops += self.num_features * self.num_classes + return flops + + +def _load_pretrained(pretrained, model, model_url, use_ssld=False): + if pretrained is False: + pass + elif pretrained is True: + load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld) + elif isinstance(pretrained, str): + load_dygraph_pretrain(model, pretrained) + else: + raise RuntimeError( + "pretrained type is not available. Please use `string` or `boolean` type." + ) + + +def SwinTransformer_tiny_patch4_window7_224(pretrained=False, + use_ssld=False, + **kwargs): + model = SwinTransformer( + embed_dim=96, + depths=[2, 2, 6, 2], + num_heads=[3, 6, 12, 24], + window_size=7, + drop_path_rate=0.2, + **kwargs) + _load_pretrained( + pretrained, + model, + MODEL_URLS["SwinTransformer_tiny_patch4_window7_224"], + use_ssld=use_ssld) + return model + + +def SwinTransformer_small_patch4_window7_224(pretrained=False, + use_ssld=False, + **kwargs): + model = SwinTransformer( + embed_dim=96, + depths=[2, 2, 18, 2], + num_heads=[3, 6, 12, 24], + window_size=7, + **kwargs) + _load_pretrained( + pretrained, + model, + MODEL_URLS["SwinTransformer_small_patch4_window7_224"], + use_ssld=use_ssld) + return model + + +def SwinTransformer_base_patch4_window7_224(pretrained=False, + use_ssld=False, + **kwargs): + model = SwinTransformer( + embed_dim=128, + depths=[2, 2, 18, 2], + num_heads=[4, 8, 16, 32], + window_size=7, + drop_path_rate=0.5, + **kwargs) + _load_pretrained( + pretrained, + model, + MODEL_URLS["SwinTransformer_base_patch4_window7_224"], + use_ssld=use_ssld) + return model + + +def SwinTransformer_base_patch4_window12_384(pretrained=False, + use_ssld=False, + **kwargs): + model = SwinTransformer( + img_size=384, + embed_dim=128, + depths=[2, 2, 18, 2], + num_heads=[4, 8, 16, 32], + window_size=12, + drop_path_rate=0.5, # NOTE: do not appear in offical code + **kwargs) + _load_pretrained( + pretrained, + model, + MODEL_URLS["SwinTransformer_base_patch4_window12_384"], + use_ssld=use_ssld) + return model + + +def SwinTransformer_large_patch4_window7_224(pretrained=False, + use_ssld=False, + **kwargs): + model = SwinTransformer( + embed_dim=192, + depths=[2, 2, 18, 2], + num_heads=[6, 12, 24, 48], + window_size=7, + **kwargs) + _load_pretrained( + pretrained, + model, + MODEL_URLS["SwinTransformer_large_patch4_window7_224"], + use_ssld=use_ssld) + return model + + +def SwinTransformer_large_patch4_window12_384(pretrained=False, + use_ssld=False, + **kwargs): + model = SwinTransformer( + img_size=384, + embed_dim=192, + depths=[2, 2, 18, 2], + num_heads=[6, 12, 24, 48], + window_size=12, + **kwargs) + _load_pretrained( + pretrained, + model, + MODEL_URLS["SwinTransformer_large_patch4_window12_384"], + use_ssld=use_ssld) + return model diff --git a/src/PaddleClas/ppcls/arch/backbone/model_zoo/tnt.py b/src/PaddleClas/ppcls/arch/backbone/model_zoo/tnt.py new file mode 100644 index 0000000..dcffcf4 --- /dev/null +++ b/src/PaddleClas/ppcls/arch/backbone/model_zoo/tnt.py @@ -0,0 +1,386 @@ +# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Code was based on https://github.com/huawei-noah/CV-Backbones/tree/master/tnt_pytorch + +import math +import numpy as np + +import paddle +import paddle.nn as nn + +from paddle.nn.initializer import TruncatedNormal, Constant + +from ppcls.arch.backbone.base.theseus_layer import Identity +from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url + +MODEL_URLS = { + "TNT_small": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/TNT_small_pretrained.pdparams" +} + +__all__ = MODEL_URLS.keys() + +trunc_normal_ = TruncatedNormal(std=.02) +zeros_ = Constant(value=0.) +ones_ = Constant(value=1.) + + +def drop_path(x, drop_prob=0., training=False): + """Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks). + the original name is misleading as 'Drop Connect' is a different form of dropout in a separate paper... + See discussion: https://github.com/tensorflow/tpu/issues/494#issuecomment-532968956 ... + """ + if drop_prob == 0. or not training: + return x + keep_prob = paddle.to_tensor(1 - drop_prob) + shape = (paddle.shape(x)[0], ) + (1, ) * (x.ndim - 1) + random_tensor = paddle.add(keep_prob, paddle.rand(shape, dtype=x.dtype)) + random_tensor = paddle.floor(random_tensor) # binarize + output = x.divide(keep_prob) * random_tensor + return output + + +class DropPath(nn.Layer): + """Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks). + """ + + def __init__(self, drop_prob=None): + super(DropPath, self).__init__() + self.drop_prob = drop_prob + + def forward(self, x): + return drop_path(x, self.drop_prob, self.training) + + +class Mlp(nn.Layer): + def __init__(self, + in_features, + hidden_features=None, + out_features=None, + act_layer=nn.GELU, + drop=0.): + super().__init__() + out_features = out_features or in_features + hidden_features = hidden_features or in_features + self.fc1 = nn.Linear(in_features, hidden_features) + self.act = act_layer() + self.fc2 = nn.Linear(hidden_features, out_features) + self.drop = nn.Dropout(drop) + + def forward(self, x): + x = self.fc1(x) + x = self.act(x) + x = self.drop(x) + x = self.fc2(x) + x = self.drop(x) + return x + + +class Attention(nn.Layer): + def __init__(self, + dim, + hidden_dim, + num_heads=8, + qkv_bias=False, + attn_drop=0., + proj_drop=0.): + super().__init__() + self.hidden_dim = hidden_dim + self.num_heads = num_heads + head_dim = hidden_dim // num_heads + self.head_dim = head_dim + self.scale = head_dim**-0.5 + + self.qk = nn.Linear(dim, hidden_dim * 2, bias_attr=qkv_bias) + self.v = nn.Linear(dim, dim, bias_attr=qkv_bias) + self.attn_drop = nn.Dropout(attn_drop) + self.proj = nn.Linear(dim, dim) + self.proj_drop = nn.Dropout(proj_drop) + + def forward(self, x): + B, N, C = x.shape + qk = self.qk(x).reshape( + (B, N, 2, self.num_heads, self.head_dim)).transpose( + (2, 0, 3, 1, 4)) + + q, k = qk[0], qk[1] + v = self.v(x).reshape( + (B, N, self.num_heads, x.shape[-1] // self.num_heads)).transpose( + (0, 2, 1, 3)) + + attn = paddle.matmul(q, k.transpose((0, 1, 3, 2))) * self.scale + attn = nn.functional.softmax(attn, axis=-1) + attn = self.attn_drop(attn) + + x = paddle.matmul(attn, v) + x = x.transpose((0, 2, 1, 3)).reshape( + (B, N, x.shape[-1] * x.shape[-3])) + x = self.proj(x) + x = self.proj_drop(x) + return x + + +class Block(nn.Layer): + def __init__(self, + dim, + in_dim, + num_pixel, + num_heads=12, + in_num_head=4, + mlp_ratio=4., + qkv_bias=False, + drop=0., + attn_drop=0., + drop_path=0., + act_layer=nn.GELU, + norm_layer=nn.LayerNorm): + super().__init__() + # Inner transformer + self.norm_in = norm_layer(in_dim) + self.attn_in = Attention( + in_dim, + in_dim, + num_heads=in_num_head, + qkv_bias=qkv_bias, + attn_drop=attn_drop, + proj_drop=drop) + + self.norm_mlp_in = norm_layer(in_dim) + self.mlp_in = Mlp(in_features=in_dim, + hidden_features=int(in_dim * 4), + out_features=in_dim, + act_layer=act_layer, + drop=drop) + + self.norm1_proj = norm_layer(in_dim) + self.proj = nn.Linear(in_dim * num_pixel, dim) + # Outer transformer + self.norm_out = norm_layer(dim) + self.attn_out = Attention( + dim, + dim, + num_heads=num_heads, + qkv_bias=qkv_bias, + attn_drop=attn_drop, + proj_drop=drop) + + self.drop_path = DropPath(drop_path) if drop_path > 0. else Identity() + + self.norm_mlp = norm_layer(dim) + self.mlp = Mlp(in_features=dim, + hidden_features=int(dim * mlp_ratio), + out_features=dim, + act_layer=act_layer, + drop=drop) + + def forward(self, pixel_embed, patch_embed): + # inner + pixel_embed = paddle.add( + pixel_embed, + self.drop_path(self.attn_in(self.norm_in(pixel_embed)))) + pixel_embed = paddle.add( + pixel_embed, + self.drop_path(self.mlp_in(self.norm_mlp_in(pixel_embed)))) + # outer + B, N, C = patch_embed.shape + norm1_proj = self.norm1_proj(pixel_embed) + norm1_proj = norm1_proj.reshape( + (B, N - 1, norm1_proj.shape[1] * norm1_proj.shape[2])) + patch_embed[:, 1:] = paddle.add(patch_embed[:, 1:], + self.proj(norm1_proj)) + patch_embed = paddle.add( + patch_embed, + self.drop_path(self.attn_out(self.norm_out(patch_embed)))) + patch_embed = paddle.add( + patch_embed, self.drop_path(self.mlp(self.norm_mlp(patch_embed)))) + return pixel_embed, patch_embed + + +class PixelEmbed(nn.Layer): + def __init__(self, + img_size=224, + patch_size=16, + in_chans=3, + in_dim=48, + stride=4): + super().__init__() + num_patches = (img_size // patch_size)**2 + self.img_size = img_size + self.num_patches = num_patches + self.in_dim = in_dim + new_patch_size = math.ceil(patch_size / stride) + self.new_patch_size = new_patch_size + + self.proj = nn.Conv2D( + in_chans, self.in_dim, kernel_size=7, padding=3, stride=stride) + + def forward(self, x, pixel_pos): + B, C, H, W = x.shape + assert H == self.img_size and W == self.img_size, f"Input image size ({H}*{W}) doesn't match model ({self.img_size}*{self.img_size})." + + x = self.proj(x) + x = nn.functional.unfold(x, self.new_patch_size, self.new_patch_size) + x = x.transpose((0, 2, 1)).reshape( + (-1, self.in_dim, self.new_patch_size, self.new_patch_size)) + x = x + pixel_pos + x = x.reshape((-1, self.in_dim, self.new_patch_size * + self.new_patch_size)).transpose((0, 2, 1)) + return x + + +class TNT(nn.Layer): + def __init__(self, + img_size=224, + patch_size=16, + in_chans=3, + embed_dim=768, + in_dim=48, + depth=12, + num_heads=12, + in_num_head=4, + mlp_ratio=4., + qkv_bias=False, + drop_rate=0., + attn_drop_rate=0., + drop_path_rate=0., + norm_layer=nn.LayerNorm, + first_stride=4, + class_num=1000): + super().__init__() + self.class_num = class_num + # num_features for consistency with other models + self.num_features = self.embed_dim = embed_dim + + self.pixel_embed = PixelEmbed( + img_size=img_size, + patch_size=patch_size, + in_chans=in_chans, + in_dim=in_dim, + stride=first_stride) + num_patches = self.pixel_embed.num_patches + self.num_patches = num_patches + new_patch_size = self.pixel_embed.new_patch_size + num_pixel = new_patch_size**2 + + self.norm1_proj = norm_layer(num_pixel * in_dim) + self.proj = nn.Linear(num_pixel * in_dim, embed_dim) + self.norm2_proj = norm_layer(embed_dim) + + self.cls_token = self.create_parameter( + shape=(1, 1, embed_dim), default_initializer=zeros_) + self.add_parameter("cls_token", self.cls_token) + + self.patch_pos = self.create_parameter( + shape=(1, num_patches + 1, embed_dim), default_initializer=zeros_) + self.add_parameter("patch_pos", self.patch_pos) + + self.pixel_pos = self.create_parameter( + shape=(1, in_dim, new_patch_size, new_patch_size), + default_initializer=zeros_) + self.add_parameter("pixel_pos", self.pixel_pos) + + self.pos_drop = nn.Dropout(p=drop_rate) + + # stochastic depth decay rule + dpr = np.linspace(0, drop_path_rate, depth) + + blocks = [] + for i in range(depth): + blocks.append( + Block( + dim=embed_dim, + in_dim=in_dim, + num_pixel=num_pixel, + num_heads=num_heads, + in_num_head=in_num_head, + mlp_ratio=mlp_ratio, + qkv_bias=qkv_bias, + drop=drop_rate, + attn_drop=attn_drop_rate, + drop_path=dpr[i], + norm_layer=norm_layer)) + self.blocks = nn.LayerList(blocks) + self.norm = norm_layer(embed_dim) + + if class_num > 0: + self.head = nn.Linear(embed_dim, class_num) + + trunc_normal_(self.cls_token) + trunc_normal_(self.patch_pos) + trunc_normal_(self.pixel_pos) + self.apply(self._init_weights) + + def _init_weights(self, m): + if isinstance(m, nn.Linear): + trunc_normal_(m.weight) + if isinstance(m, nn.Linear) and m.bias is not None: + zeros_(m.bias) + elif isinstance(m, nn.LayerNorm): + zeros_(m.bias) + ones_(m.weight) + + def forward_features(self, x): + B = paddle.shape(x)[0] + pixel_embed = self.pixel_embed(x, self.pixel_pos) + + patch_embed = self.norm2_proj( + self.proj( + self.norm1_proj( + pixel_embed.reshape((-1, self.num_patches, pixel_embed. + shape[-1] * pixel_embed.shape[-2]))))) + patch_embed = paddle.concat( + (self.cls_token.expand((B, -1, -1)), patch_embed), axis=1) + patch_embed = patch_embed + self.patch_pos + patch_embed = self.pos_drop(patch_embed) + + for blk in self.blocks: + pixel_embed, patch_embed = blk(pixel_embed, patch_embed) + + patch_embed = self.norm(patch_embed) + return patch_embed[:, 0] + + def forward(self, x): + x = self.forward_features(x) + + if self.class_num > 0: + x = self.head(x) + return x + + +def _load_pretrained(pretrained, model, model_url, use_ssld=False): + if pretrained is False: + pass + elif pretrained is True: + load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld) + elif isinstance(pretrained, str): + load_dygraph_pretrain(model, pretrained) + else: + raise RuntimeError( + "pretrained type is not available. Please use `string` or `boolean` type." + ) + + +def TNT_small(pretrained=False, use_ssld=False, **kwargs): + model = TNT(patch_size=16, + embed_dim=384, + in_dim=24, + depth=12, + num_heads=6, + in_num_head=4, + qkv_bias=False, + **kwargs) + _load_pretrained( + pretrained, model, MODEL_URLS["TNT_small"], use_ssld=use_ssld) + return model diff --git a/src/PaddleClas/ppcls/arch/backbone/model_zoo/vision_transformer.py b/src/PaddleClas/ppcls/arch/backbone/model_zoo/vision_transformer.py new file mode 100644 index 0000000..c71c026 --- /dev/null +++ b/src/PaddleClas/ppcls/arch/backbone/model_zoo/vision_transformer.py @@ -0,0 +1,458 @@ +# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Code was based on https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/vision_transformer.py + +from collections.abc import Callable + +import numpy as np +import paddle +import paddle.nn as nn +from paddle.nn.initializer import TruncatedNormal, Constant, Normal + +from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url + +MODEL_URLS = { + "ViT_small_patch16_224": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ViT_small_patch16_224_pretrained.pdparams", + "ViT_base_patch16_224": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ViT_base_patch16_224_pretrained.pdparams", + "ViT_base_patch16_384": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ViT_base_patch16_384_pretrained.pdparams", + "ViT_base_patch32_384": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ViT_base_patch32_384_pretrained.pdparams", + "ViT_large_patch16_224": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ViT_large_patch16_224_pretrained.pdparams", + "ViT_large_patch16_384": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ViT_large_patch16_384_pretrained.pdparams", + "ViT_large_patch32_384": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ViT_large_patch32_384_pretrained.pdparams", +} + +__all__ = list(MODEL_URLS.keys()) + +trunc_normal_ = TruncatedNormal(std=.02) +normal_ = Normal +zeros_ = Constant(value=0.) +ones_ = Constant(value=1.) + + +def to_2tuple(x): + return tuple([x] * 2) + + +def drop_path(x, drop_prob=0., training=False): + """Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks). + the original name is misleading as 'Drop Connect' is a different form of dropout in a separate paper... + See discussion: https://github.com/tensorflow/tpu/issues/494#issuecomment-532968956 ... + """ + if drop_prob == 0. or not training: + return x + keep_prob = paddle.to_tensor(1 - drop_prob) + shape = (paddle.shape(x)[0], ) + (1, ) * (x.ndim - 1) + random_tensor = keep_prob + paddle.rand(shape, dtype=x.dtype) + random_tensor = paddle.floor(random_tensor) # binarize + output = x.divide(keep_prob) * random_tensor + return output + + +class DropPath(nn.Layer): + """Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks). + """ + + def __init__(self, drop_prob=None): + super(DropPath, self).__init__() + self.drop_prob = drop_prob + + def forward(self, x): + return drop_path(x, self.drop_prob, self.training) + + +class Identity(nn.Layer): + def __init__(self): + super(Identity, self).__init__() + + def forward(self, input): + return input + + +class Mlp(nn.Layer): + def __init__(self, + in_features, + hidden_features=None, + out_features=None, + act_layer=nn.GELU, + drop=0.): + super().__init__() + out_features = out_features or in_features + hidden_features = hidden_features or in_features + self.fc1 = nn.Linear(in_features, hidden_features) + self.act = act_layer() + self.fc2 = nn.Linear(hidden_features, out_features) + self.drop = nn.Dropout(drop) + + def forward(self, x): + x = self.fc1(x) + x = self.act(x) + x = self.drop(x) + x = self.fc2(x) + x = self.drop(x) + return x + + +class Attention(nn.Layer): + def __init__(self, + dim, + num_heads=8, + qkv_bias=False, + qk_scale=None, + attn_drop=0., + proj_drop=0.): + super().__init__() + self.num_heads = num_heads + head_dim = dim // num_heads + self.scale = qk_scale or head_dim**-0.5 + + self.qkv = nn.Linear(dim, dim * 3, bias_attr=qkv_bias) + self.attn_drop = nn.Dropout(attn_drop) + self.proj = nn.Linear(dim, dim) + self.proj_drop = nn.Dropout(proj_drop) + + def forward(self, x): + # B= paddle.shape(x)[0] + N, C = x.shape[1:] + qkv = self.qkv(x).reshape((-1, N, 3, self.num_heads, C // + self.num_heads)).transpose((2, 0, 3, 1, 4)) + q, k, v = qkv[0], qkv[1], qkv[2] + + attn = (q.matmul(k.transpose((0, 1, 3, 2)))) * self.scale + attn = nn.functional.softmax(attn, axis=-1) + attn = self.attn_drop(attn) + + x = (attn.matmul(v)).transpose((0, 2, 1, 3)).reshape((-1, N, C)) + x = self.proj(x) + x = self.proj_drop(x) + return x + + +class Block(nn.Layer): + def __init__(self, + dim, + num_heads, + mlp_ratio=4., + qkv_bias=False, + qk_scale=None, + drop=0., + attn_drop=0., + drop_path=0., + act_layer=nn.GELU, + norm_layer='nn.LayerNorm', + epsilon=1e-5): + super().__init__() + if isinstance(norm_layer, str): + self.norm1 = eval(norm_layer)(dim, epsilon=epsilon) + elif isinstance(norm_layer, Callable): + self.norm1 = norm_layer(dim) + else: + raise TypeError( + "The norm_layer must be str or paddle.nn.layer.Layer class") + self.attn = Attention( + dim, + num_heads=num_heads, + qkv_bias=qkv_bias, + qk_scale=qk_scale, + attn_drop=attn_drop, + proj_drop=drop) + # NOTE: drop path for stochastic depth, we shall see if this is better than dropout here + self.drop_path = DropPath(drop_path) if drop_path > 0. else Identity() + if isinstance(norm_layer, str): + self.norm2 = eval(norm_layer)(dim, epsilon=epsilon) + elif isinstance(norm_layer, Callable): + self.norm2 = norm_layer(dim) + else: + raise TypeError( + "The norm_layer must be str or paddle.nn.layer.Layer class") + mlp_hidden_dim = int(dim * mlp_ratio) + self.mlp = Mlp(in_features=dim, + hidden_features=mlp_hidden_dim, + act_layer=act_layer, + drop=drop) + + def forward(self, x): + x = x + self.drop_path(self.attn(self.norm1(x))) + x = x + self.drop_path(self.mlp(self.norm2(x))) + return x + + +class PatchEmbed(nn.Layer): + """ Image to Patch Embedding + """ + + def __init__(self, img_size=224, patch_size=16, in_chans=3, embed_dim=768): + super().__init__() + img_size = to_2tuple(img_size) + patch_size = to_2tuple(patch_size) + num_patches = (img_size[1] // patch_size[1]) * \ + (img_size[0] // patch_size[0]) + self.img_size = img_size + self.patch_size = patch_size + self.num_patches = num_patches + + self.proj = nn.Conv2D( + in_chans, embed_dim, kernel_size=patch_size, stride=patch_size) + + def forward(self, x): + B, C, H, W = x.shape + assert H == self.img_size[0] and W == self.img_size[1], \ + f"Input image size ({H}*{W}) doesn't match model ({self.img_size[0]}*{self.img_size[1]})." + + x = self.proj(x).flatten(2).transpose((0, 2, 1)) + return x + + +class VisionTransformer(nn.Layer): + """ Vision Transformer with support for patch input + """ + + def __init__(self, + img_size=224, + patch_size=16, + in_chans=3, + class_num=1000, + embed_dim=768, + depth=12, + num_heads=12, + mlp_ratio=4, + qkv_bias=False, + qk_scale=None, + drop_rate=0., + attn_drop_rate=0., + drop_path_rate=0., + norm_layer='nn.LayerNorm', + epsilon=1e-5, + **kwargs): + super().__init__() + self.class_num = class_num + + self.num_features = self.embed_dim = embed_dim + + self.patch_embed = PatchEmbed( + img_size=img_size, + patch_size=patch_size, + in_chans=in_chans, + embed_dim=embed_dim) + num_patches = self.patch_embed.num_patches + + self.pos_embed = self.create_parameter( + shape=(1, num_patches + 1, embed_dim), default_initializer=zeros_) + self.add_parameter("pos_embed", self.pos_embed) + self.cls_token = self.create_parameter( + shape=(1, 1, embed_dim), default_initializer=zeros_) + self.add_parameter("cls_token", self.cls_token) + self.pos_drop = nn.Dropout(p=drop_rate) + + dpr = np.linspace(0, drop_path_rate, depth) + + self.blocks = nn.LayerList([ + Block( + dim=embed_dim, + num_heads=num_heads, + mlp_ratio=mlp_ratio, + qkv_bias=qkv_bias, + qk_scale=qk_scale, + drop=drop_rate, + attn_drop=attn_drop_rate, + drop_path=dpr[i], + norm_layer=norm_layer, + epsilon=epsilon) for i in range(depth) + ]) + + self.norm = eval(norm_layer)(embed_dim, epsilon=epsilon) + + # Classifier head + self.head = nn.Linear(embed_dim, + class_num) if class_num > 0 else Identity() + + trunc_normal_(self.pos_embed) + trunc_normal_(self.cls_token) + self.apply(self._init_weights) + + def _init_weights(self, m): + if isinstance(m, nn.Linear): + trunc_normal_(m.weight) + if isinstance(m, nn.Linear) and m.bias is not None: + zeros_(m.bias) + elif isinstance(m, nn.LayerNorm): + zeros_(m.bias) + ones_(m.weight) + + def forward_features(self, x): + # B = x.shape[0] + B = paddle.shape(x)[0] + x = self.patch_embed(x) + cls_tokens = self.cls_token.expand((B, -1, -1)) + x = paddle.concat((cls_tokens, x), axis=1) + x = x + self.pos_embed + x = self.pos_drop(x) + for blk in self.blocks: + x = blk(x) + x = self.norm(x) + return x[:, 0] + + def forward(self, x): + x = self.forward_features(x) + x = self.head(x) + return x + + +def _load_pretrained(pretrained, model, model_url, use_ssld=False): + if pretrained is False: + pass + elif pretrained is True: + load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld) + elif isinstance(pretrained, str): + load_dygraph_pretrain(model, pretrained) + else: + raise RuntimeError( + "pretrained type is not available. Please use `string` or `boolean` type." + ) + + +def ViT_small_patch16_224(pretrained=False, use_ssld=False, **kwargs): + model = VisionTransformer( + patch_size=16, + embed_dim=768, + depth=8, + num_heads=8, + mlp_ratio=3, + qk_scale=768**-0.5, + **kwargs) + _load_pretrained( + pretrained, + model, + MODEL_URLS["ViT_small_patch16_224"], + use_ssld=use_ssld) + return model + + +def ViT_base_patch16_224(pretrained=False, use_ssld=False, **kwargs): + model = VisionTransformer( + patch_size=16, + embed_dim=768, + depth=12, + num_heads=12, + mlp_ratio=4, + qkv_bias=True, + epsilon=1e-6, + **kwargs) + _load_pretrained( + pretrained, + model, + MODEL_URLS["ViT_base_patch16_224"], + use_ssld=use_ssld) + return model + + +def ViT_base_patch16_384(pretrained=False, use_ssld=False, **kwargs): + model = VisionTransformer( + img_size=384, + patch_size=16, + embed_dim=768, + depth=12, + num_heads=12, + mlp_ratio=4, + qkv_bias=True, + epsilon=1e-6, + **kwargs) + _load_pretrained( + pretrained, + model, + MODEL_URLS["ViT_base_patch16_384"], + use_ssld=use_ssld) + return model + + +def ViT_base_patch32_384(pretrained=False, use_ssld=False, **kwargs): + model = VisionTransformer( + img_size=384, + patch_size=32, + embed_dim=768, + depth=12, + num_heads=12, + mlp_ratio=4, + qkv_bias=True, + epsilon=1e-6, + **kwargs) + _load_pretrained( + pretrained, + model, + MODEL_URLS["ViT_base_patch32_384"], + use_ssld=use_ssld) + return model + + +def ViT_large_patch16_224(pretrained=False, use_ssld=False, **kwargs): + model = VisionTransformer( + patch_size=16, + embed_dim=1024, + depth=24, + num_heads=16, + mlp_ratio=4, + qkv_bias=True, + epsilon=1e-6, + **kwargs) + _load_pretrained( + pretrained, + model, + MODEL_URLS["ViT_large_patch16_224"], + use_ssld=use_ssld) + return model + + +def ViT_large_patch16_384(pretrained=False, use_ssld=False, **kwargs): + model = VisionTransformer( + img_size=384, + patch_size=16, + embed_dim=1024, + depth=24, + num_heads=16, + mlp_ratio=4, + qkv_bias=True, + epsilon=1e-6, + **kwargs) + _load_pretrained( + pretrained, + model, + MODEL_URLS["ViT_large_patch16_384"], + use_ssld=use_ssld) + return model + + +def ViT_large_patch32_384(pretrained=False, use_ssld=False, **kwargs): + model = VisionTransformer( + img_size=384, + patch_size=32, + embed_dim=1024, + depth=24, + num_heads=16, + mlp_ratio=4, + qkv_bias=True, + epsilon=1e-6, + **kwargs) + _load_pretrained( + pretrained, + model, + MODEL_URLS["ViT_large_patch32_384"], + use_ssld=use_ssld) + return model diff --git a/src/PaddleClas/ppcls/arch/backbone/model_zoo/xception.py b/src/PaddleClas/ppcls/arch/backbone/model_zoo/xception.py new file mode 100644 index 0000000..2b84378 --- /dev/null +++ b/src/PaddleClas/ppcls/arch/backbone/model_zoo/xception.py @@ -0,0 +1,377 @@ +import paddle +from paddle import ParamAttr +import paddle.nn as nn +import paddle.nn.functional as F +from paddle.nn import Conv2D, BatchNorm, Linear, Dropout +from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D +from paddle.nn.initializer import Uniform +import math +import sys + +from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url + +MODEL_URLS = { + "Xception41": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/Xception41_pretrained.pdparams", + "Xception65": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/Xception65_pretrained.pdparams", + "Xception71": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/Xception71_pretrained.pdparams" +} + +__all__ = list(MODEL_URLS.keys()) + + +class ConvBNLayer(nn.Layer): + def __init__(self, + num_channels, + num_filters, + filter_size, + stride=1, + groups=1, + act=None, + name=None): + super(ConvBNLayer, self).__init__() + + self._conv = Conv2D( + in_channels=num_channels, + out_channels=num_filters, + kernel_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + groups=groups, + weight_attr=ParamAttr(name=name + "_weights"), + bias_attr=False) + bn_name = "bn_" + name + self._batch_norm = BatchNorm( + num_filters, + act=act, + param_attr=ParamAttr(name=bn_name + "_scale"), + bias_attr=ParamAttr(name=bn_name + "_offset"), + moving_mean_name=bn_name + '_mean', + moving_variance_name=bn_name + '_variance') + + def forward(self, inputs): + y = self._conv(inputs) + y = self._batch_norm(y) + return y + + +class SeparableConv(nn.Layer): + def __init__(self, input_channels, output_channels, stride=1, name=None): + super(SeparableConv, self).__init__() + + self._pointwise_conv = ConvBNLayer( + input_channels, output_channels, 1, name=name + "_sep") + self._depthwise_conv = ConvBNLayer( + output_channels, + output_channels, + 3, + stride=stride, + groups=output_channels, + name=name + "_dw") + + def forward(self, inputs): + x = self._pointwise_conv(inputs) + x = self._depthwise_conv(x) + return x + + +class EntryFlowBottleneckBlock(nn.Layer): + def __init__(self, + input_channels, + output_channels, + stride=2, + name=None, + relu_first=False): + super(EntryFlowBottleneckBlock, self).__init__() + self.relu_first = relu_first + + self._short = Conv2D( + in_channels=input_channels, + out_channels=output_channels, + kernel_size=1, + stride=stride, + padding=0, + weight_attr=ParamAttr(name + "_branch1_weights"), + bias_attr=False) + self._conv1 = SeparableConv( + input_channels, + output_channels, + stride=1, + name=name + "_branch2a_weights") + self._conv2 = SeparableConv( + output_channels, + output_channels, + stride=1, + name=name + "_branch2b_weights") + self._pool = MaxPool2D(kernel_size=3, stride=stride, padding=1) + + def forward(self, inputs): + conv0 = inputs + short = self._short(inputs) + if self.relu_first: + conv0 = F.relu(conv0) + conv1 = self._conv1(conv0) + conv2 = F.relu(conv1) + conv2 = self._conv2(conv2) + pool = self._pool(conv2) + return paddle.add(x=short, y=pool) + + +class EntryFlow(nn.Layer): + def __init__(self, block_num=3): + super(EntryFlow, self).__init__() + + name = "entry_flow" + self.block_num = block_num + self._conv1 = ConvBNLayer( + 3, 32, 3, stride=2, act="relu", name=name + "_conv1") + self._conv2 = ConvBNLayer(32, 64, 3, act="relu", name=name + "_conv2") + if block_num == 3: + self._conv_0 = EntryFlowBottleneckBlock( + 64, 128, stride=2, name=name + "_0", relu_first=False) + self._conv_1 = EntryFlowBottleneckBlock( + 128, 256, stride=2, name=name + "_1", relu_first=True) + self._conv_2 = EntryFlowBottleneckBlock( + 256, 728, stride=2, name=name + "_2", relu_first=True) + elif block_num == 5: + self._conv_0 = EntryFlowBottleneckBlock( + 64, 128, stride=2, name=name + "_0", relu_first=False) + self._conv_1 = EntryFlowBottleneckBlock( + 128, 256, stride=1, name=name + "_1", relu_first=True) + self._conv_2 = EntryFlowBottleneckBlock( + 256, 256, stride=2, name=name + "_2", relu_first=True) + self._conv_3 = EntryFlowBottleneckBlock( + 256, 728, stride=1, name=name + "_3", relu_first=True) + self._conv_4 = EntryFlowBottleneckBlock( + 728, 728, stride=2, name=name + "_4", relu_first=True) + else: + sys.exit(-1) + + def forward(self, inputs): + x = self._conv1(inputs) + x = self._conv2(x) + + if self.block_num == 3: + x = self._conv_0(x) + x = self._conv_1(x) + x = self._conv_2(x) + elif self.block_num == 5: + x = self._conv_0(x) + x = self._conv_1(x) + x = self._conv_2(x) + x = self._conv_3(x) + x = self._conv_4(x) + return x + + +class MiddleFlowBottleneckBlock(nn.Layer): + def __init__(self, input_channels, output_channels, name): + super(MiddleFlowBottleneckBlock, self).__init__() + + self._conv_0 = SeparableConv( + input_channels, + output_channels, + stride=1, + name=name + "_branch2a_weights") + self._conv_1 = SeparableConv( + output_channels, + output_channels, + stride=1, + name=name + "_branch2b_weights") + self._conv_2 = SeparableConv( + output_channels, + output_channels, + stride=1, + name=name + "_branch2c_weights") + + def forward(self, inputs): + conv0 = F.relu(inputs) + conv0 = self._conv_0(conv0) + conv1 = F.relu(conv0) + conv1 = self._conv_1(conv1) + conv2 = F.relu(conv1) + conv2 = self._conv_2(conv2) + return paddle.add(x=inputs, y=conv2) + + +class MiddleFlow(nn.Layer): + def __init__(self, block_num=8): + super(MiddleFlow, self).__init__() + + self.block_num = block_num + self._conv_0 = MiddleFlowBottleneckBlock( + 728, 728, name="middle_flow_0") + self._conv_1 = MiddleFlowBottleneckBlock( + 728, 728, name="middle_flow_1") + self._conv_2 = MiddleFlowBottleneckBlock( + 728, 728, name="middle_flow_2") + self._conv_3 = MiddleFlowBottleneckBlock( + 728, 728, name="middle_flow_3") + self._conv_4 = MiddleFlowBottleneckBlock( + 728, 728, name="middle_flow_4") + self._conv_5 = MiddleFlowBottleneckBlock( + 728, 728, name="middle_flow_5") + self._conv_6 = MiddleFlowBottleneckBlock( + 728, 728, name="middle_flow_6") + self._conv_7 = MiddleFlowBottleneckBlock( + 728, 728, name="middle_flow_7") + if block_num == 16: + self._conv_8 = MiddleFlowBottleneckBlock( + 728, 728, name="middle_flow_8") + self._conv_9 = MiddleFlowBottleneckBlock( + 728, 728, name="middle_flow_9") + self._conv_10 = MiddleFlowBottleneckBlock( + 728, 728, name="middle_flow_10") + self._conv_11 = MiddleFlowBottleneckBlock( + 728, 728, name="middle_flow_11") + self._conv_12 = MiddleFlowBottleneckBlock( + 728, 728, name="middle_flow_12") + self._conv_13 = MiddleFlowBottleneckBlock( + 728, 728, name="middle_flow_13") + self._conv_14 = MiddleFlowBottleneckBlock( + 728, 728, name="middle_flow_14") + self._conv_15 = MiddleFlowBottleneckBlock( + 728, 728, name="middle_flow_15") + + def forward(self, inputs): + x = self._conv_0(inputs) + x = self._conv_1(x) + x = self._conv_2(x) + x = self._conv_3(x) + x = self._conv_4(x) + x = self._conv_5(x) + x = self._conv_6(x) + x = self._conv_7(x) + if self.block_num == 16: + x = self._conv_8(x) + x = self._conv_9(x) + x = self._conv_10(x) + x = self._conv_11(x) + x = self._conv_12(x) + x = self._conv_13(x) + x = self._conv_14(x) + x = self._conv_15(x) + return x + + +class ExitFlowBottleneckBlock(nn.Layer): + def __init__(self, input_channels, output_channels1, output_channels2, + name): + super(ExitFlowBottleneckBlock, self).__init__() + + self._short = Conv2D( + in_channels=input_channels, + out_channels=output_channels2, + kernel_size=1, + stride=2, + padding=0, + weight_attr=ParamAttr(name + "_branch1_weights"), + bias_attr=False) + self._conv_1 = SeparableConv( + input_channels, + output_channels1, + stride=1, + name=name + "_branch2a_weights") + self._conv_2 = SeparableConv( + output_channels1, + output_channels2, + stride=1, + name=name + "_branch2b_weights") + self._pool = MaxPool2D(kernel_size=3, stride=2, padding=1) + + def forward(self, inputs): + short = self._short(inputs) + conv0 = F.relu(inputs) + conv1 = self._conv_1(conv0) + conv2 = F.relu(conv1) + conv2 = self._conv_2(conv2) + pool = self._pool(conv2) + return paddle.add(x=short, y=pool) + + +class ExitFlow(nn.Layer): + def __init__(self, class_num): + super(ExitFlow, self).__init__() + + name = "exit_flow" + + self._conv_0 = ExitFlowBottleneckBlock( + 728, 728, 1024, name=name + "_1") + self._conv_1 = SeparableConv(1024, 1536, stride=1, name=name + "_2") + self._conv_2 = SeparableConv(1536, 2048, stride=1, name=name + "_3") + self._pool = AdaptiveAvgPool2D(1) + stdv = 1.0 / math.sqrt(2048 * 1.0) + self._out = Linear( + 2048, + class_num, + weight_attr=ParamAttr( + name="fc_weights", initializer=Uniform(-stdv, stdv)), + bias_attr=ParamAttr(name="fc_offset")) + + def forward(self, inputs): + conv0 = self._conv_0(inputs) + conv1 = self._conv_1(conv0) + conv1 = F.relu(conv1) + conv2 = self._conv_2(conv1) + conv2 = F.relu(conv2) + pool = self._pool(conv2) + pool = paddle.flatten(pool, start_axis=1, stop_axis=-1) + out = self._out(pool) + return out + + +class Xception(nn.Layer): + def __init__(self, + entry_flow_block_num=3, + middle_flow_block_num=8, + class_num=1000): + super(Xception, self).__init__() + self.entry_flow_block_num = entry_flow_block_num + self.middle_flow_block_num = middle_flow_block_num + self._entry_flow = EntryFlow(entry_flow_block_num) + self._middle_flow = MiddleFlow(middle_flow_block_num) + self._exit_flow = ExitFlow(class_num) + + def forward(self, inputs): + x = self._entry_flow(inputs) + x = self._middle_flow(x) + x = self._exit_flow(x) + return x + + +def _load_pretrained(pretrained, model, model_url, use_ssld=False): + if pretrained is False: + pass + elif pretrained is True: + load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld) + elif isinstance(pretrained, str): + load_dygraph_pretrain(model, pretrained) + else: + raise RuntimeError( + "pretrained type is not available. Please use `string` or `boolean` type." + ) + + +def Xception41(pretrained=False, use_ssld=False, **kwargs): + model = Xception(entry_flow_block_num=3, middle_flow_block_num=8, **kwargs) + _load_pretrained( + pretrained, model, MODEL_URLS["Xception41"], use_ssld=use_ssld) + return model + + +def Xception65(pretrained=False, use_ssld=False, **kwargs): + model = Xception( + entry_flow_block_num=3, middle_flow_block_num=16, **kwargs) + _load_pretrained( + pretrained, model, MODEL_URLS["Xception65"], use_ssld=use_ssld) + return model + + +def Xception71(pretrained=False, use_ssld=False, **kwargs): + model = Xception( + entry_flow_block_num=5, middle_flow_block_num=16, **kwargs) + _load_pretrained( + pretrained, model, MODEL_URLS["Xception71"], use_ssld=use_ssld) + return model diff --git a/src/PaddleClas/ppcls/arch/backbone/model_zoo/xception_deeplab.py b/src/PaddleClas/ppcls/arch/backbone/model_zoo/xception_deeplab.py new file mode 100644 index 0000000..c52769b --- /dev/null +++ b/src/PaddleClas/ppcls/arch/backbone/model_zoo/xception_deeplab.py @@ -0,0 +1,421 @@ +# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import paddle +from paddle import ParamAttr +import paddle.nn as nn +import paddle.nn.functional as F +from paddle.nn import Conv2D, BatchNorm, Linear, Dropout +from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D + +from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url + +MODEL_URLS = { + "Xception41_deeplab": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/Xception41_deeplab_pretrained.pdparams", + "Xception65_deeplab": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/Xception65_deeplab_pretrained.pdparams" +} + +__all__ = list(MODEL_URLS.keys()) + + +def check_data(data, number): + if type(data) == int: + return [data] * number + assert len(data) == number + return data + + +def check_stride(s, os): + if s <= os: + return True + else: + return False + + +def check_points(count, points): + if points is None: + return False + else: + if isinstance(points, list): + return (True if count in points else False) + else: + return (True if count == points else False) + + +def gen_bottleneck_params(backbone='xception_65'): + if backbone == 'xception_65': + bottleneck_params = { + "entry_flow": (3, [2, 2, 2], [128, 256, 728]), + "middle_flow": (16, 1, 728), + "exit_flow": (2, [2, 1], [[728, 1024, 1024], [1536, 1536, 2048]]) + } + elif backbone == 'xception_41': + bottleneck_params = { + "entry_flow": (3, [2, 2, 2], [128, 256, 728]), + "middle_flow": (8, 1, 728), + "exit_flow": (2, [2, 1], [[728, 1024, 1024], [1536, 1536, 2048]]) + } + elif backbone == 'xception_71': + bottleneck_params = { + "entry_flow": (5, [2, 1, 2, 1, 2], [128, 256, 256, 728, 728]), + "middle_flow": (16, 1, 728), + "exit_flow": (2, [2, 1], [[728, 1024, 1024], [1536, 1536, 2048]]) + } + else: + raise Exception( + "xception backbont only support xception_41/xception_65/xception_71" + ) + return bottleneck_params + + +class ConvBNLayer(nn.Layer): + def __init__(self, + input_channels, + output_channels, + filter_size, + stride=1, + padding=0, + act=None, + name=None): + super(ConvBNLayer, self).__init__() + + self._conv = Conv2D( + in_channels=input_channels, + out_channels=output_channels, + kernel_size=filter_size, + stride=stride, + padding=padding, + weight_attr=ParamAttr(name=name + "/weights"), + bias_attr=False) + self._bn = BatchNorm( + num_channels=output_channels, + act=act, + epsilon=1e-3, + momentum=0.99, + param_attr=ParamAttr(name=name + "/BatchNorm/gamma"), + bias_attr=ParamAttr(name=name + "/BatchNorm/beta"), + moving_mean_name=name + "/BatchNorm/moving_mean", + moving_variance_name=name + "/BatchNorm/moving_variance") + + def forward(self, inputs): + return self._bn(self._conv(inputs)) + + +class Seperate_Conv(nn.Layer): + def __init__(self, + input_channels, + output_channels, + stride, + filter, + dilation=1, + act=None, + name=None): + super(Seperate_Conv, self).__init__() + + self._conv1 = Conv2D( + in_channels=input_channels, + out_channels=input_channels, + kernel_size=filter, + stride=stride, + groups=input_channels, + padding=(filter) // 2 * dilation, + dilation=dilation, + weight_attr=ParamAttr(name=name + "/depthwise/weights"), + bias_attr=False) + self._bn1 = BatchNorm( + input_channels, + act=act, + epsilon=1e-3, + momentum=0.99, + param_attr=ParamAttr(name=name + "/depthwise/BatchNorm/gamma"), + bias_attr=ParamAttr(name=name + "/depthwise/BatchNorm/beta"), + moving_mean_name=name + "/depthwise/BatchNorm/moving_mean", + moving_variance_name=name + "/depthwise/BatchNorm/moving_variance") + self._conv2 = Conv2D( + input_channels, + output_channels, + 1, + stride=1, + groups=1, + padding=0, + weight_attr=ParamAttr(name=name + "/pointwise/weights"), + bias_attr=False) + self._bn2 = BatchNorm( + output_channels, + act=act, + epsilon=1e-3, + momentum=0.99, + param_attr=ParamAttr(name=name + "/pointwise/BatchNorm/gamma"), + bias_attr=ParamAttr(name=name + "/pointwise/BatchNorm/beta"), + moving_mean_name=name + "/pointwise/BatchNorm/moving_mean", + moving_variance_name=name + "/pointwise/BatchNorm/moving_variance") + + def forward(self, inputs): + x = self._conv1(inputs) + x = self._bn1(x) + x = self._conv2(x) + x = self._bn2(x) + return x + + +class Xception_Block(nn.Layer): + def __init__(self, + input_channels, + output_channels, + strides=1, + filter_size=3, + dilation=1, + skip_conv=True, + has_skip=True, + activation_fn_in_separable_conv=False, + name=None): + super(Xception_Block, self).__init__() + + repeat_number = 3 + output_channels = check_data(output_channels, repeat_number) + filter_size = check_data(filter_size, repeat_number) + strides = check_data(strides, repeat_number) + + self.has_skip = has_skip + self.skip_conv = skip_conv + self.activation_fn_in_separable_conv = activation_fn_in_separable_conv + if not activation_fn_in_separable_conv: + self._conv1 = Seperate_Conv( + input_channels, + output_channels[0], + stride=strides[0], + filter=filter_size[0], + dilation=dilation, + name=name + "/separable_conv1") + self._conv2 = Seperate_Conv( + output_channels[0], + output_channels[1], + stride=strides[1], + filter=filter_size[1], + dilation=dilation, + name=name + "/separable_conv2") + self._conv3 = Seperate_Conv( + output_channels[1], + output_channels[2], + stride=strides[2], + filter=filter_size[2], + dilation=dilation, + name=name + "/separable_conv3") + else: + self._conv1 = Seperate_Conv( + input_channels, + output_channels[0], + stride=strides[0], + filter=filter_size[0], + act="relu", + dilation=dilation, + name=name + "/separable_conv1") + self._conv2 = Seperate_Conv( + output_channels[0], + output_channels[1], + stride=strides[1], + filter=filter_size[1], + act="relu", + dilation=dilation, + name=name + "/separable_conv2") + self._conv3 = Seperate_Conv( + output_channels[1], + output_channels[2], + stride=strides[2], + filter=filter_size[2], + act="relu", + dilation=dilation, + name=name + "/separable_conv3") + + if has_skip and skip_conv: + self._short = ConvBNLayer( + input_channels, + output_channels[-1], + 1, + stride=strides[-1], + padding=0, + name=name + "/shortcut") + + def forward(self, inputs): + if not self.activation_fn_in_separable_conv: + x = F.relu(inputs) + x = self._conv1(x) + x = F.relu(x) + x = self._conv2(x) + x = F.relu(x) + x = self._conv3(x) + else: + x = self._conv1(inputs) + x = self._conv2(x) + x = self._conv3(x) + if self.has_skip: + if self.skip_conv: + skip = self._short(inputs) + else: + skip = inputs + return paddle.add(x, skip) + else: + return x + + +class XceptionDeeplab(nn.Layer): + def __init__(self, backbone, class_num=1000): + super(XceptionDeeplab, self).__init__() + + bottleneck_params = gen_bottleneck_params(backbone) + self.backbone = backbone + + self._conv1 = ConvBNLayer( + 3, + 32, + 3, + stride=2, + padding=1, + act="relu", + name=self.backbone + "/entry_flow/conv1") + self._conv2 = ConvBNLayer( + 32, + 64, + 3, + stride=1, + padding=1, + act="relu", + name=self.backbone + "/entry_flow/conv2") + + self.block_num = bottleneck_params["entry_flow"][0] + self.strides = bottleneck_params["entry_flow"][1] + self.chns = bottleneck_params["entry_flow"][2] + self.strides = check_data(self.strides, self.block_num) + self.chns = check_data(self.chns, self.block_num) + + self.entry_flow = [] + self.middle_flow = [] + + self.stride = 2 + self.output_stride = 32 + s = self.stride + + for i in range(self.block_num): + stride = self.strides[i] if check_stride(s * self.strides[i], + self.output_stride) else 1 + xception_block = self.add_sublayer( + self.backbone + "/entry_flow/block" + str(i + 1), + Xception_Block( + input_channels=64 if i == 0 else self.chns[i - 1], + output_channels=self.chns[i], + strides=[1, 1, self.stride], + name=self.backbone + "/entry_flow/block" + str(i + 1))) + self.entry_flow.append(xception_block) + s = s * stride + self.stride = s + + self.block_num = bottleneck_params["middle_flow"][0] + self.strides = bottleneck_params["middle_flow"][1] + self.chns = bottleneck_params["middle_flow"][2] + self.strides = check_data(self.strides, self.block_num) + self.chns = check_data(self.chns, self.block_num) + s = self.stride + + for i in range(self.block_num): + stride = self.strides[i] if check_stride(s * self.strides[i], + self.output_stride) else 1 + xception_block = self.add_sublayer( + self.backbone + "/middle_flow/block" + str(i + 1), + Xception_Block( + input_channels=728, + output_channels=728, + strides=[1, 1, self.strides[i]], + skip_conv=False, + name=self.backbone + "/middle_flow/block" + str(i + 1))) + self.middle_flow.append(xception_block) + s = s * stride + self.stride = s + + self.block_num = bottleneck_params["exit_flow"][0] + self.strides = bottleneck_params["exit_flow"][1] + self.chns = bottleneck_params["exit_flow"][2] + self.strides = check_data(self.strides, self.block_num) + self.chns = check_data(self.chns, self.block_num) + s = self.stride + stride = self.strides[0] if check_stride(s * self.strides[0], + self.output_stride) else 1 + self._exit_flow_1 = Xception_Block( + 728, + self.chns[0], [1, 1, stride], + name=self.backbone + "/exit_flow/block1") + s = s * stride + stride = self.strides[1] if check_stride(s * self.strides[1], + self.output_stride) else 1 + self._exit_flow_2 = Xception_Block( + self.chns[0][-1], + self.chns[1], [1, 1, stride], + dilation=2, + has_skip=False, + activation_fn_in_separable_conv=True, + name=self.backbone + "/exit_flow/block2") + s = s * stride + + self.stride = s + + self._drop = Dropout(p=0.5, mode="downscale_in_infer") + self._pool = AdaptiveAvgPool2D(1) + self._fc = Linear( + self.chns[1][-1], + class_num, + weight_attr=ParamAttr(name="fc_weights"), + bias_attr=ParamAttr(name="fc_bias")) + + def forward(self, inputs): + x = self._conv1(inputs) + x = self._conv2(x) + for ef in self.entry_flow: + x = ef(x) + for mf in self.middle_flow: + x = mf(x) + x = self._exit_flow_1(x) + x = self._exit_flow_2(x) + x = self._drop(x) + x = self._pool(x) + x = paddle.squeeze(x, axis=[2, 3]) + x = self._fc(x) + return x + + +def _load_pretrained(pretrained, model, model_url, use_ssld=False): + if pretrained is False: + pass + elif pretrained is True: + load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld) + elif isinstance(pretrained, str): + load_dygraph_pretrain(model, pretrained) + else: + raise RuntimeError( + "pretrained type is not available. Please use `string` or `boolean` type." + ) + + +def Xception41_deeplab(pretrained=False, use_ssld=False, **kwargs): + model = XceptionDeeplab('xception_41', **kwargs) + _load_pretrained( + pretrained, model, MODEL_URLS["Xception41_deeplab"], use_ssld=use_ssld) + return model + + +def Xception65_deeplab(pretrained=False, use_ssld=False, **kwargs): + model = XceptionDeeplab("xception_65", **kwargs) + _load_pretrained( + pretrained, model, MODEL_URLS["Xception65_deeplab"], use_ssld=use_ssld) + return model diff --git a/src/PaddleClas/ppcls/arch/backbone/variant_models/__init__.py b/src/PaddleClas/ppcls/arch/backbone/variant_models/__init__.py new file mode 100644 index 0000000..75cf29f --- /dev/null +++ b/src/PaddleClas/ppcls/arch/backbone/variant_models/__init__.py @@ -0,0 +1,3 @@ +from .resnet_variant import ResNet50_last_stage_stride1 +from .vgg_variant import VGG19Sigmoid +from .pp_lcnet_variant import PPLCNet_x2_5_Tanh diff --git a/src/PaddleClas/ppcls/arch/backbone/variant_models/__pycache__/__init__.cpython-39.pyc b/src/PaddleClas/ppcls/arch/backbone/variant_models/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000..429ee93 Binary files /dev/null and b/src/PaddleClas/ppcls/arch/backbone/variant_models/__pycache__/__init__.cpython-39.pyc differ diff --git a/src/PaddleClas/ppcls/arch/backbone/variant_models/__pycache__/pp_lcnet_variant.cpython-39.pyc b/src/PaddleClas/ppcls/arch/backbone/variant_models/__pycache__/pp_lcnet_variant.cpython-39.pyc new file mode 100644 index 0000000..5f60704 Binary files /dev/null and b/src/PaddleClas/ppcls/arch/backbone/variant_models/__pycache__/pp_lcnet_variant.cpython-39.pyc differ diff --git a/src/PaddleClas/ppcls/arch/backbone/variant_models/__pycache__/resnet_variant.cpython-39.pyc b/src/PaddleClas/ppcls/arch/backbone/variant_models/__pycache__/resnet_variant.cpython-39.pyc new file mode 100644 index 0000000..a86f989 Binary files /dev/null and b/src/PaddleClas/ppcls/arch/backbone/variant_models/__pycache__/resnet_variant.cpython-39.pyc differ diff --git a/src/PaddleClas/ppcls/arch/backbone/variant_models/__pycache__/vgg_variant.cpython-39.pyc b/src/PaddleClas/ppcls/arch/backbone/variant_models/__pycache__/vgg_variant.cpython-39.pyc new file mode 100644 index 0000000..06aa183 Binary files /dev/null and b/src/PaddleClas/ppcls/arch/backbone/variant_models/__pycache__/vgg_variant.cpython-39.pyc differ diff --git a/src/PaddleClas/ppcls/arch/backbone/variant_models/pp_lcnet_variant.py b/src/PaddleClas/ppcls/arch/backbone/variant_models/pp_lcnet_variant.py new file mode 100644 index 0000000..dc9747a --- /dev/null +++ b/src/PaddleClas/ppcls/arch/backbone/variant_models/pp_lcnet_variant.py @@ -0,0 +1,29 @@ +import paddle +from paddle.nn import Sigmoid +from paddle.nn import Tanh +from ppcls.arch.backbone.legendary_models.pp_lcnet import PPLCNet_x2_5 + +__all__ = ["PPLCNet_x2_5_Tanh"] + + +class TanhSuffix(paddle.nn.Layer): + def __init__(self, origin_layer): + super(TanhSuffix, self).__init__() + self.origin_layer = origin_layer + self.tanh = Tanh() + + def forward(self, input, res_dict=None, **kwargs): + x = self.origin_layer(input) + x = self.tanh(x) + return x + + +def PPLCNet_x2_5_Tanh(pretrained=False, use_ssld=False, **kwargs): + def replace_function(origin_layer, pattern): + new_layer = TanhSuffix(origin_layer) + return new_layer + + pattern = "fc" + model = PPLCNet_x2_5(pretrained=pretrained, use_ssld=use_ssld, **kwargs) + model.upgrade_sublayer(pattern, replace_function) + return model diff --git a/src/PaddleClas/ppcls/arch/backbone/variant_models/resnet_variant.py b/src/PaddleClas/ppcls/arch/backbone/variant_models/resnet_variant.py new file mode 100644 index 0000000..0219344 --- /dev/null +++ b/src/PaddleClas/ppcls/arch/backbone/variant_models/resnet_variant.py @@ -0,0 +1,23 @@ +from paddle.nn import Conv2D +from ppcls.arch.backbone.legendary_models.resnet import ResNet50, MODEL_URLS, _load_pretrained + +__all__ = ["ResNet50_last_stage_stride1"] + + +def ResNet50_last_stage_stride1(pretrained=False, use_ssld=False, **kwargs): + def replace_function(conv, pattern): + new_conv = Conv2D( + in_channels=conv._in_channels, + out_channels=conv._out_channels, + kernel_size=conv._kernel_size, + stride=1, + padding=conv._padding, + groups=conv._groups, + bias_attr=conv._bias_attr) + return new_conv + + pattern = ["blocks[13].conv1.conv", "blocks[13].short.conv"] + model = ResNet50(pretrained=False, use_ssld=use_ssld, **kwargs) + model.upgrade_sublayer(pattern, replace_function) + _load_pretrained(pretrained, model, MODEL_URLS["ResNet50"], use_ssld) + return model diff --git a/src/PaddleClas/ppcls/arch/backbone/variant_models/vgg_variant.py b/src/PaddleClas/ppcls/arch/backbone/variant_models/vgg_variant.py new file mode 100644 index 0000000..c1f75ba --- /dev/null +++ b/src/PaddleClas/ppcls/arch/backbone/variant_models/vgg_variant.py @@ -0,0 +1,28 @@ +import paddle +from paddle.nn import Sigmoid +from ppcls.arch.backbone.legendary_models.vgg import VGG19 + +__all__ = ["VGG19Sigmoid"] + + +class SigmoidSuffix(paddle.nn.Layer): + def __init__(self, origin_layer): + super().__init__() + self.origin_layer = origin_layer + self.sigmoid = Sigmoid() + + def forward(self, input, res_dict=None, **kwargs): + x = self.origin_layer(input) + x = self.sigmoid(x) + return x + + +def VGG19Sigmoid(pretrained=False, use_ssld=False, **kwargs): + def replace_function(origin_layer, pattern): + new_layer = SigmoidSuffix(origin_layer) + return new_layer + + pattern = "fc2" + model = VGG19(pretrained=pretrained, use_ssld=use_ssld, **kwargs) + model.upgrade_sublayer(pattern, replace_function) + return model diff --git a/src/PaddleClas/ppcls/arch/gears/__init__.py b/src/PaddleClas/ppcls/arch/gears/__init__.py new file mode 100644 index 0000000..75ca41d --- /dev/null +++ b/src/PaddleClas/ppcls/arch/gears/__init__.py @@ -0,0 +1,32 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .arcmargin import ArcMargin +from .cosmargin import CosMargin +from .circlemargin import CircleMargin +from .fc import FC +from .vehicle_neck import VehicleNeck + +__all__ = ['build_gear'] + + +def build_gear(config): + support_dict = [ + 'ArcMargin', 'CosMargin', 'CircleMargin', 'FC', 'VehicleNeck' + ] + module_name = config.pop('name') + assert module_name in support_dict, Exception( + 'head only support {}'.format(support_dict)) + module_class = eval(module_name)(**config) + return module_class diff --git a/src/PaddleClas/ppcls/arch/gears/__pycache__/__init__.cpython-39.pyc b/src/PaddleClas/ppcls/arch/gears/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000..8e51faa Binary files /dev/null and b/src/PaddleClas/ppcls/arch/gears/__pycache__/__init__.cpython-39.pyc differ diff --git a/src/PaddleClas/ppcls/arch/gears/__pycache__/arcmargin.cpython-39.pyc b/src/PaddleClas/ppcls/arch/gears/__pycache__/arcmargin.cpython-39.pyc new file mode 100644 index 0000000..02443b1 Binary files /dev/null and b/src/PaddleClas/ppcls/arch/gears/__pycache__/arcmargin.cpython-39.pyc differ diff --git a/src/PaddleClas/ppcls/arch/gears/__pycache__/circlemargin.cpython-39.pyc b/src/PaddleClas/ppcls/arch/gears/__pycache__/circlemargin.cpython-39.pyc new file mode 100644 index 0000000..f7d1261 Binary files /dev/null and b/src/PaddleClas/ppcls/arch/gears/__pycache__/circlemargin.cpython-39.pyc differ diff --git a/src/PaddleClas/ppcls/arch/gears/__pycache__/cosmargin.cpython-39.pyc b/src/PaddleClas/ppcls/arch/gears/__pycache__/cosmargin.cpython-39.pyc new file mode 100644 index 0000000..85a7df3 Binary files /dev/null and b/src/PaddleClas/ppcls/arch/gears/__pycache__/cosmargin.cpython-39.pyc differ diff --git a/src/PaddleClas/ppcls/arch/gears/__pycache__/fc.cpython-39.pyc b/src/PaddleClas/ppcls/arch/gears/__pycache__/fc.cpython-39.pyc new file mode 100644 index 0000000..361b877 Binary files /dev/null and b/src/PaddleClas/ppcls/arch/gears/__pycache__/fc.cpython-39.pyc differ diff --git a/src/PaddleClas/ppcls/arch/gears/__pycache__/identity_head.cpython-39.pyc b/src/PaddleClas/ppcls/arch/gears/__pycache__/identity_head.cpython-39.pyc new file mode 100644 index 0000000..2a99da9 Binary files /dev/null and b/src/PaddleClas/ppcls/arch/gears/__pycache__/identity_head.cpython-39.pyc differ diff --git a/src/PaddleClas/ppcls/arch/gears/__pycache__/vehicle_neck.cpython-39.pyc b/src/PaddleClas/ppcls/arch/gears/__pycache__/vehicle_neck.cpython-39.pyc new file mode 100644 index 0000000..ede4fd6 Binary files /dev/null and b/src/PaddleClas/ppcls/arch/gears/__pycache__/vehicle_neck.cpython-39.pyc differ diff --git a/src/PaddleClas/ppcls/arch/gears/arcmargin.py b/src/PaddleClas/ppcls/arch/gears/arcmargin.py new file mode 100644 index 0000000..22cc76e --- /dev/null +++ b/src/PaddleClas/ppcls/arch/gears/arcmargin.py @@ -0,0 +1,72 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import paddle +import paddle.nn as nn +import math + + +class ArcMargin(nn.Layer): + def __init__(self, + embedding_size, + class_num, + margin=0.5, + scale=80.0, + easy_margin=False): + super().__init__() + self.embedding_size = embedding_size + self.class_num = class_num + self.margin = margin + self.scale = scale + self.easy_margin = easy_margin + self.weight = self.create_parameter( + shape=[self.embedding_size, self.class_num], + is_bias=False, + default_initializer=paddle.nn.initializer.XavierNormal()) + + def forward(self, input, label=None): + input_norm = paddle.sqrt( + paddle.sum(paddle.square(input), axis=1, keepdim=True)) + input = paddle.divide(input, input_norm) + + weight_norm = paddle.sqrt( + paddle.sum(paddle.square(self.weight), axis=0, keepdim=True)) + weight = paddle.divide(self.weight, weight_norm) + + cos = paddle.matmul(input, weight) + if not self.training or label is None: + return cos + sin = paddle.sqrt(1.0 - paddle.square(cos) + 1e-6) + cos_m = math.cos(self.margin) + sin_m = math.sin(self.margin) + phi = cos * cos_m - sin * sin_m + + th = math.cos(self.margin) * (-1) + mm = math.sin(self.margin) * self.margin + if self.easy_margin: + phi = self._paddle_where_more_than(cos, 0, phi, cos) + else: + phi = self._paddle_where_more_than(cos, th, phi, cos - mm) + + one_hot = paddle.nn.functional.one_hot(label, self.class_num) + one_hot = paddle.squeeze(one_hot, axis=[1]) + output = paddle.multiply(one_hot, phi) + paddle.multiply( + (1.0 - one_hot), cos) + output = output * self.scale + return output + + def _paddle_where_more_than(self, target, limit, x, y): + mask = paddle.cast(x=(target > limit), dtype='float32') + output = paddle.multiply(mask, x) + paddle.multiply((1.0 - mask), y) + return output diff --git a/src/PaddleClas/ppcls/arch/gears/circlemargin.py b/src/PaddleClas/ppcls/arch/gears/circlemargin.py new file mode 100644 index 0000000..d1bce83 --- /dev/null +++ b/src/PaddleClas/ppcls/arch/gears/circlemargin.py @@ -0,0 +1,59 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import math +import paddle +import paddle.nn as nn +import paddle.nn.functional as F + + +class CircleMargin(nn.Layer): + def __init__(self, embedding_size, class_num, margin, scale): + super(CircleMargin, self).__init__() + self.scale = scale + self.margin = margin + self.embedding_size = embedding_size + self.class_num = class_num + + self.weight = self.create_parameter( + shape=[self.embedding_size, self.class_num], + is_bias=False, + default_initializer=paddle.nn.initializer.XavierNormal()) + + def forward(self, input, label): + feat_norm = paddle.sqrt( + paddle.sum(paddle.square(input), axis=1, keepdim=True)) + input = paddle.divide(input, feat_norm) + + weight_norm = paddle.sqrt( + paddle.sum(paddle.square(self.weight), axis=0, keepdim=True)) + weight = paddle.divide(self.weight, weight_norm) + + logits = paddle.matmul(input, weight) + if not self.training or label is None: + return logits + + alpha_p = paddle.clip(-logits.detach() + 1 + self.margin, min=0.) + alpha_n = paddle.clip(logits.detach() + self.margin, min=0.) + delta_p = 1 - self.margin + delta_n = self.margin + + m_hot = F.one_hot(label.reshape([-1]), num_classes=logits.shape[1]) + + logits_p = alpha_p * (logits - delta_p) + logits_n = alpha_n * (logits - delta_n) + pre_logits = logits_p * m_hot + logits_n * (1 - m_hot) + pre_logits = self.scale * pre_logits + + return pre_logits diff --git a/src/PaddleClas/ppcls/arch/gears/cosmargin.py b/src/PaddleClas/ppcls/arch/gears/cosmargin.py new file mode 100644 index 0000000..578b64c --- /dev/null +++ b/src/PaddleClas/ppcls/arch/gears/cosmargin.py @@ -0,0 +1,55 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import paddle +import math +import paddle.nn as nn + + +class CosMargin(paddle.nn.Layer): + def __init__(self, embedding_size, class_num, margin=0.35, scale=64.0): + super(CosMargin, self).__init__() + self.scale = scale + self.margin = margin + self.embedding_size = embedding_size + self.class_num = class_num + + self.weight = self.create_parameter( + shape=[self.embedding_size, self.class_num], + is_bias=False, + default_initializer=paddle.nn.initializer.XavierNormal()) + + def forward(self, input, label): + label.stop_gradient = True + + input_norm = paddle.sqrt( + paddle.sum(paddle.square(input), axis=1, keepdim=True)) + input = paddle.divide(input, input_norm) + + weight_norm = paddle.sqrt( + paddle.sum(paddle.square(self.weight), axis=0, keepdim=True)) + weight = paddle.divide(self.weight, weight_norm) + + cos = paddle.matmul(input, weight) + if not self.training or label is None: + return cos + + cos_m = cos - self.margin + + one_hot = paddle.nn.functional.one_hot(label, self.class_num) + one_hot = paddle.squeeze(one_hot, axis=[1]) + output = paddle.multiply(one_hot, cos_m) + paddle.multiply( + (1.0 - one_hot), cos) + output = output * self.scale + return output diff --git a/src/PaddleClas/ppcls/arch/gears/fc.py b/src/PaddleClas/ppcls/arch/gears/fc.py new file mode 100644 index 0000000..b324741 --- /dev/null +++ b/src/PaddleClas/ppcls/arch/gears/fc.py @@ -0,0 +1,35 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import paddle +import paddle.nn as nn + + +class FC(nn.Layer): + def __init__(self, embedding_size, class_num): + super(FC, self).__init__() + self.embedding_size = embedding_size + self.class_num = class_num + weight_attr = paddle.ParamAttr( + initializer=paddle.nn.initializer.XavierNormal()) + self.fc = paddle.nn.Linear( + self.embedding_size, self.class_num, weight_attr=weight_attr) + + def forward(self, input, label=None): + out = self.fc(input) + return out diff --git a/src/PaddleClas/ppcls/arch/gears/identity_head.py b/src/PaddleClas/ppcls/arch/gears/identity_head.py new file mode 100644 index 0000000..7d11e57 --- /dev/null +++ b/src/PaddleClas/ppcls/arch/gears/identity_head.py @@ -0,0 +1,9 @@ +from paddle import nn + + +class IdentityHead(nn.Layer): + def __init__(self): + super(IdentityHead, self).__init__() + + def forward(self, x, label=None): + return {"features": x, "logits": None} diff --git a/src/PaddleClas/ppcls/arch/gears/vehicle_neck.py b/src/PaddleClas/ppcls/arch/gears/vehicle_neck.py new file mode 100644 index 0000000..05f4e33 --- /dev/null +++ b/src/PaddleClas/ppcls/arch/gears/vehicle_neck.py @@ -0,0 +1,52 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import, division, print_function + +import paddle +import paddle.nn as nn + + +class VehicleNeck(nn.Layer): + def __init__(self, + in_channels, + out_channels, + kernel_size=1, + stride=1, + padding=0, + dilation=1, + groups=1, + padding_mode='zeros', + weight_attr=None, + bias_attr=None, + data_format='NCHW'): + super().__init__() + self.conv = nn.Conv2D( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=kernel_size, + stride=stride, + padding=padding, + dilation=dilation, + groups=groups, + padding_mode=padding_mode, + weight_attr=weight_attr, + bias_attr=weight_attr, + data_format=data_format) + self.flatten = nn.Flatten() + + def forward(self, x): + x = self.conv(x) + x = self.flatten(x) + return x diff --git a/src/PaddleClas/ppcls/arch/slim/__init__.py b/src/PaddleClas/ppcls/arch/slim/__init__.py new file mode 100644 index 0000000..3733059 --- /dev/null +++ b/src/PaddleClas/ppcls/arch/slim/__init__.py @@ -0,0 +1,16 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from ppcls.arch.slim.prune import prune_model +from ppcls.arch.slim.quant import quantize_model diff --git a/src/PaddleClas/ppcls/arch/slim/__pycache__/__init__.cpython-39.pyc b/src/PaddleClas/ppcls/arch/slim/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000..823977a Binary files /dev/null and b/src/PaddleClas/ppcls/arch/slim/__pycache__/__init__.cpython-39.pyc differ diff --git a/src/PaddleClas/ppcls/arch/slim/__pycache__/prune.cpython-39.pyc b/src/PaddleClas/ppcls/arch/slim/__pycache__/prune.cpython-39.pyc new file mode 100644 index 0000000..46a6c65 Binary files /dev/null and b/src/PaddleClas/ppcls/arch/slim/__pycache__/prune.cpython-39.pyc differ diff --git a/src/PaddleClas/ppcls/arch/slim/__pycache__/quant.cpython-39.pyc b/src/PaddleClas/ppcls/arch/slim/__pycache__/quant.cpython-39.pyc new file mode 100644 index 0000000..70884e9 Binary files /dev/null and b/src/PaddleClas/ppcls/arch/slim/__pycache__/quant.cpython-39.pyc differ diff --git a/src/PaddleClas/ppcls/arch/slim/prune.py b/src/PaddleClas/ppcls/arch/slim/prune.py new file mode 100644 index 0000000..c0c9d22 --- /dev/null +++ b/src/PaddleClas/ppcls/arch/slim/prune.py @@ -0,0 +1,65 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import, division, print_function +import paddle +from ppcls.utils import logger + + +def prune_model(config, model): + if config.get("Slim", False) and config["Slim"].get("prune", False): + import paddleslim + prune_method_name = config["Slim"]["prune"]["name"].lower() + assert prune_method_name in [ + "fpgm", "l1_norm" + ], "The prune methods only support 'fpgm' and 'l1_norm'" + if prune_method_name == "fpgm": + model.pruner = paddleslim.dygraph.FPGMFilterPruner( + model, [1] + config["Global"]["image_shape"]) + else: + model.pruner = paddleslim.dygraph.L1NormFilterPruner( + model, [1] + config["Global"]["image_shape"]) + + # prune model + _prune_model(config, model) + else: + model.pruner = None + + + +def _prune_model(config, model): + from paddleslim.analysis import dygraph_flops as flops + logger.info("FLOPs before pruning: {}GFLOPs".format( + flops(model, [1] + config["Global"]["image_shape"]) / 1e9)) + model.eval() + + params = [] + for sublayer in model.sublayers(): + for param in sublayer.parameters(include_sublayers=False): + if isinstance(sublayer, paddle.nn.Conv2D): + params.append(param.name) + ratios = {} + for param in params: + ratios[param] = config["Slim"]["prune"]["pruned_ratio"] + plan = model.pruner.prune_vars(ratios, [0]) + + logger.info("FLOPs after pruning: {}GFLOPs; pruned ratio: {}".format( + flops(model, [1] + config["Global"]["image_shape"]) / 1e9, + plan.pruned_flops)) + + for param in model.parameters(): + if "conv2d" in param.name: + logger.info("{}\t{}".format(param.name, param.shape)) + + model.train() diff --git a/src/PaddleClas/ppcls/arch/slim/quant.py b/src/PaddleClas/ppcls/arch/slim/quant.py new file mode 100644 index 0000000..b8f59a7 --- /dev/null +++ b/src/PaddleClas/ppcls/arch/slim/quant.py @@ -0,0 +1,55 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import, division, print_function +import paddle +from ppcls.utils import logger + +QUANT_CONFIG = { + # weight preprocess type, default is None and no preprocessing is performed. + 'weight_preprocess_type': None, + # activation preprocess type, default is None and no preprocessing is performed. + 'activation_preprocess_type': None, + # weight quantize type, default is 'channel_wise_abs_max' + 'weight_quantize_type': 'channel_wise_abs_max', + # activation quantize type, default is 'moving_average_abs_max' + 'activation_quantize_type': 'moving_average_abs_max', + # weight quantize bit num, default is 8 + 'weight_bits': 8, + # activation quantize bit num, default is 8 + 'activation_bits': 8, + # data type after quantization, such as 'uint8', 'int8', etc. default is 'int8' + 'dtype': 'int8', + # window size for 'range_abs_max' quantization. default is 10000 + 'window_size': 10000, + # The decay coefficient of moving average, default is 0.9 + 'moving_rate': 0.9, + # for dygraph quantization, layers of type in quantizable_layer_type will be quantized + 'quantizable_layer_type': ['Conv2D', 'Linear'], +} + + +def quantize_model(config, model): + if config.get("Slim", False) and config["Slim"].get("quant", False): + from paddleslim.dygraph.quant import QAT + assert config["Slim"]["quant"]["name"].lower( + ) == 'pact', 'Only PACT quantization method is supported now' + QUANT_CONFIG["activation_preprocess_type"] = "PACT" + model.quanter = QAT(config=QUANT_CONFIG) + model.quanter.quantize(model) + logger.info("QAT model summary:") + paddle.summary(model, (1, 3, 224, 224)) + else: + model.quanter = None + return diff --git a/src/PaddleClas/ppcls/arch/utils.py b/src/PaddleClas/ppcls/arch/utils.py new file mode 100644 index 0000000..308475d --- /dev/null +++ b/src/PaddleClas/ppcls/arch/utils.py @@ -0,0 +1,53 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import six +import types +from difflib import SequenceMatcher + +from . import backbone + + +def get_architectures(): + """ + get all of model architectures + """ + names = [] + for k, v in backbone.__dict__.items(): + if isinstance(v, (types.FunctionType, six.class_types)): + names.append(k) + return names + + +def get_blacklist_model_in_static_mode(): + from ppcls.arch.backbone import distilled_vision_transformer + from ppcls.arch.backbone import vision_transformer + blacklist = distilled_vision_transformer.__all__ + vision_transformer.__all__ + return blacklist + + +def similar_architectures(name='', names=[], thresh=0.1, topk=10): + """ + inferred similar architectures + """ + scores = [] + for idx, n in enumerate(names): + if n.startswith('__'): + continue + score = SequenceMatcher(None, n.lower(), name.lower()).quick_ratio() + if score > thresh: + scores.append((idx, score)) + scores.sort(key=lambda x: x[1], reverse=True) + similar_names = [names[s[0]] for s in scores[:min(topk, len(scores))]] + return similar_names diff --git a/src/PaddleClas/ppcls/configs/Cartoonface/ResNet50_icartoon.yaml b/src/PaddleClas/ppcls/configs/Cartoonface/ResNet50_icartoon.yaml new file mode 100644 index 0000000..3d1b993 --- /dev/null +++ b/src/PaddleClas/ppcls/configs/Cartoonface/ResNet50_icartoon.yaml @@ -0,0 +1,149 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: "./output/" + device: "gpu" + save_interval: 1 + eval_mode: "retrieval" + eval_during_train: True + eval_interval: 1 + epochs: 120 + print_batch_step: 10 + use_visualdl: False + image_shape: [3, 224, 224] + infer_imgs: + save_inference_dir: "./inference" + feature_normalize: True + +Arch: + name: "RecModel" + Backbone: + name: "ResNet50" + pretrained: True + BackboneStopLayer: + name: "flatten" + output_dim: 2048 + Head: + name: "FC" + class_num: 5013 + embedding_size: 2048 + # margin: 0.5 + # scale: 80 + infer_output_key: "features" + infer_add_softmax: "false" + +Loss: + Train: + - CELoss: + weight: 1.0 + # - TripletLoss: + # margin: 0.1 + # weight: 0.1 + Eval: + - CELoss: + weight: 1.0 + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Piecewise + learning_rate: 0.1 + decay_epochs: [30, 60, 90] + values: [0.1, 0.01, 0.001, 0.0001] + regularizer: + name: 'L2' + coeff: 0.0001 + +DataLoader: + Train: + dataset: + name: ICartoonDataset + image_root: "./dataset/iCartoonFace" + cls_label_path: "./dataset/iCartoonFace/train_list.txt" + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 0.00392157 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + #num_instances: 2 + batch_size: 256 + drop_last: False + shuffle: True + loader: + num_workers: 6 + use_shared_memory: True + + Eval: + Query: + dataset: + name: ICartoonDataset + image_root: "./dataset/iCartoonFace" + cls_label_path: "./dataset/iCartoonFace/query.txt" + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 0.00392157 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 8 + use_shared_memory: True + + Gallery: + dataset: + name: ICartoonDataset + image_root: "./dataset/iCartoonFace" + cls_label_path: "./dataset/iCartoonFace/gallery.txt" + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 0.00392157 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 8 + use_shared_memory: True + +Metric: + Train: + - TopkAcc: + topk: [1, 5] + Eval: + - Recallk: + topk: [1] diff --git a/src/PaddleClas/ppcls/configs/GeneralRecognition/GeneralRecognition_PPLCNet_x2_5.yaml b/src/PaddleClas/ppcls/configs/GeneralRecognition/GeneralRecognition_PPLCNet_x2_5.yaml new file mode 100644 index 0000000..626dd7c --- /dev/null +++ b/src/PaddleClas/ppcls/configs/GeneralRecognition/GeneralRecognition_PPLCNet_x2_5.yaml @@ -0,0 +1,148 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 100 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + eval_mode: retrieval + use_dali: False + to_static: False + +# model architecture +Arch: + name: RecModel + infer_output_key: features + infer_add_softmax: False + + Backbone: + name: PPLCNet_x2_5 + pretrained: True + use_ssld: True + BackboneStopLayer: + name: "flatten" + Neck: + name: FC + embedding_size: 1280 + class_num: 512 + Head: + name: ArcMargin + embedding_size: 512 + class_num: 185341 + margin: 0.2 + scale: 30 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + Eval: + - CELoss: + weight: 1.0 + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Cosine + learning_rate: 0.04 + warmup_epoch: 5 + regularizer: + name: 'L2' + coeff: 0.00001 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ + cls_label_path: ./dataset/train_reg_all_data.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + + sampler: + name: DistributedBatchSampler + batch_size: 256 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + Query: + dataset: + name: VeriWild + image_root: ./dataset/Aliproduct/ + cls_label_path: ./dataset/Aliproduct/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + size: 224 + - NormalizeImage: + scale: 0.00392157 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + + Gallery: + dataset: + name: VeriWild + image_root: ./dataset/Aliproduct/ + cls_label_path: ./dataset/Aliproduct/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + size: 224 + - NormalizeImage: + scale: 0.00392157 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Metric: + Eval: + - Recallk: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/GeneralRecognition/GeneralRecognition_PPLCNet_x2_5_binary.yaml b/src/PaddleClas/ppcls/configs/GeneralRecognition/GeneralRecognition_PPLCNet_x2_5_binary.yaml new file mode 100644 index 0000000..728942f --- /dev/null +++ b/src/PaddleClas/ppcls/configs/GeneralRecognition/GeneralRecognition_PPLCNet_x2_5_binary.yaml @@ -0,0 +1,145 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 100 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + eval_mode: retrieval + use_dali: False + to_static: False + + #feature postprocess + feature_normalize: False + feature_binarize: "sign" + +# model architecture +Arch: + name: RecModel + infer_output_key: features + infer_add_softmax: False + + Backbone: + name: PPLCNet_x2_5_Tanh + pretrained: True + use_ssld: True + class_num: 512 + Head: + name: FC + embedding_size: 512 + class_num: 185341 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + Eval: + - CELoss: + weight: 1.0 + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Cosine + learning_rate: 0.04 + warmup_epoch: 5 + regularizer: + name: 'L2' + coeff: 0.00001 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/all_data + cls_label_path: ./dataset/all_data/train_reg_all_data.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + + sampler: + name: DistributedBatchSampler + batch_size: 256 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + Query: + dataset: + name: VeriWild + image_root: ./dataset/Aliproduct/ + cls_label_path: ./dataset/Aliproduct/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + size: 224 + - NormalizeImage: + scale: 0.00392157 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + + Gallery: + dataset: + name: VeriWild + image_root: ./dataset/Aliproduct/ + cls_label_path: ./dataset/Aliproduct/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + size: 224 + - NormalizeImage: + scale: 0.00392157 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Metric: + Eval: + - Recallk: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/GeneralRecognition/GeneralRecognition_PPLCNet_x2_5_dml.yaml b/src/PaddleClas/ppcls/configs/GeneralRecognition/GeneralRecognition_PPLCNet_x2_5_dml.yaml new file mode 100644 index 0000000..b6c4536 --- /dev/null +++ b/src/PaddleClas/ppcls/configs/GeneralRecognition/GeneralRecognition_PPLCNet_x2_5_dml.yaml @@ -0,0 +1,188 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: true + eval_interval: 1 + epochs: 100 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + eval_mode: retrieval + use_dali: False + to_static: False + +# model architecture +Arch: + name: "DistillationModel" + infer_output_key: features + infer_add_softmax: False + is_rec: True + infer_model_name: "Student" + # if not null, its lengths should be same as models + pretrained_list: + # if not null, its lengths should be same as models + freeze_params_list: + - False + - False + models: + - Teacher: + name: RecModel + infer_output_key: features + infer_add_softmax: False + Backbone: + name: PPLCNet_x2_5 + pretrained: True + use_ssld: True + BackboneStopLayer: + name: "flatten" + Neck: + name: FC + embedding_size: 1280 + class_num: 512 + Head: + name: ArcMargin + embedding_size: 512 + class_num: 185341 + margin: 0.2 + scale: 30 + - Student: + name: RecModel + infer_output_key: features + infer_add_softmax: False + Backbone: + name: PPLCNet_x2_5 + pretrained: True + use_ssld: True + BackboneStopLayer: + name: "flatten" + Neck: + name: FC + embedding_size: 1280 + class_num: 512 + Head: + name: ArcMargin + embedding_size: 512 + class_num: 185341 + margin: 0.2 + scale: 30 + +# loss function config for traing/eval process +Loss: + Train: + - DistillationGTCELoss: + weight: 1.0 + key: "logits" + model_names: ["Student", "Teacher"] + - DistillationDMLLoss: + weight: 1.0 + key: "logits" + model_name_pairs: + - ["Student", "Teacher"] + Eval: + - DistillationGTCELoss: + weight: 1.0 + model_names: ["Student"] + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Cosine + learning_rate: 0.02 + warmup_epoch: 5 + regularizer: + name: 'L2' + coeff: 0.00001 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ + cls_label_path: ./dataset/train_reg_all_data.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + Query: + dataset: + name: VeriWild + image_root: ./dataset/Aliproduct/ + cls_label_path: ./dataset/Aliproduct/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + size: 224 + - NormalizeImage: + scale: 0.00392157 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + + Gallery: + dataset: + name: VeriWild + image_root: ./dataset/Aliproduct/ + cls_label_path: ./dataset/Aliproduct/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + size: 224 + - NormalizeImage: + scale: 0.00392157 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Metric: + Eval: + - Recallk: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/GeneralRecognition/GeneralRecognition_PPLCNet_x2_5_udml.yaml b/src/PaddleClas/ppcls/configs/GeneralRecognition/GeneralRecognition_PPLCNet_x2_5_udml.yaml new file mode 100644 index 0000000..bcaea03 --- /dev/null +++ b/src/PaddleClas/ppcls/configs/GeneralRecognition/GeneralRecognition_PPLCNet_x2_5_udml.yaml @@ -0,0 +1,193 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: true + eval_interval: 1 + epochs: 100 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + eval_mode: retrieval + use_dali: False + to_static: False + +# model architecture +Arch: + name: "DistillationModel" + infer_output_key: features + infer_add_softmax: False + is_rec: True + infer_model_name: "Student" + # if not null, its lengths should be same as models + pretrained_list: + # if not null, its lengths should be same as models + freeze_params_list: + - False + - False + models: + - Teacher: + name: RecModel + infer_output_key: features + infer_add_softmax: False + Backbone: + name: PPLCNet_x2_5 + pretrained: True + use_ssld: True + BackboneStopLayer: + name: "flatten" + Neck: + name: FC + embedding_size: 1280 + class_num: 512 + Head: + name: ArcMargin + embedding_size: 512 + class_num: 185341 + margin: 0.2 + scale: 30 + - Student: + name: RecModel + infer_output_key: features + infer_add_softmax: False + Backbone: + name: PPLCNet_x2_5 + pretrained: True + use_ssld: True + BackboneStopLayer: + name: "flatten" + Neck: + name: FC + embedding_size: 1280 + class_num: 512 + Head: + name: ArcMargin + embedding_size: 512 + class_num: 185341 + margin: 0.2 + scale: 30 + +# loss function config for traing/eval process +Loss: + Train: + - DistillationGTCELoss: + weight: 1.0 + key: "logits" + model_names: ["Student", "Teacher"] + - DistillationDMLLoss: + weight: 1.0 + key: "logits" + model_name_pairs: + - ["Student", "Teacher"] + - DistillationDistanceLoss: + weight: 1.0 + key: "backbone" + model_name_pairs: + - ["Student", "Teacher"] + Eval: + - DistillationGTCELoss: + weight: 1.0 + model_names: ["Student"] + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Cosine + learning_rate: 0.02 + warmup_epoch: 5 + regularizer: + name: 'L2' + coeff: 0.00001 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ + cls_label_path: ./dataset/train_reg_all_data.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + Query: + dataset: + name: VeriWild + image_root: ./dataset/Aliproduct/ + cls_label_path: ./dataset/Aliproduct/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + size: 224 + - NormalizeImage: + scale: 0.00392157 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + + Gallery: + dataset: + name: VeriWild + image_root: ./dataset/Aliproduct/ + cls_label_path: ./dataset/Aliproduct/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + size: 224 + - NormalizeImage: + scale: 0.00392157 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Metric: + Eval: + - Recallk: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/AlexNet/AlexNet.yaml b/src/PaddleClas/ppcls/configs/ImageNet/AlexNet/AlexNet.yaml new file mode 100644 index 0000000..ea2e073 --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/AlexNet/AlexNet.yaml @@ -0,0 +1,129 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 120 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + +# model architecture +Arch: + name: AlexNet + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Piecewise + decay_epochs: [30, 60, 90] + values: [0.01, 0.001, 0.0001, 0.00001] + regularizer: + name: 'L2' + coeff: 0.0001 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + - TopkAcc: + topk: [1, 5] + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/CSPNet/CSPDarkNet53.yaml b/src/PaddleClas/ppcls/configs/ImageNet/CSPNet/CSPDarkNet53.yaml new file mode 100644 index 0000000..4848cfc --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/CSPNet/CSPDarkNet53.yaml @@ -0,0 +1,131 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 120 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 256, 256] + save_inference_dir: ./inference + # training model under @to_static + to_static: False + +# model architecture +Arch: + name: CSPDarkNet53 + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Piecewise + decay_epochs: [30, 60, 90] + values: [0.1, 0.01, 0.001, 0.0001] + regularizer: + name: 'L2' + coeff: 0.0001 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 256 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 288 + - CropImage: + size: 256 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 288 + - CropImage: + size: 256 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + - TopkAcc: + topk: [1, 5] + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/DLA/DLA102.yaml b/src/PaddleClas/ppcls/configs/ImageNet/DLA/DLA102.yaml new file mode 100644 index 0000000..b6033f7 --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/DLA/DLA102.yaml @@ -0,0 +1,130 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 120 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + +# model architecture +Arch: + name: DLA102 + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Piecewise + learning_rate: 0.1 + decay_epochs: [30, 60, 90] + values: [0.1, 0.01, 0.001, 0.0001] + regularizer: + name: 'L2' + coeff: 0.0001 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + - TopkAcc: + topk: [1, 5] + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/DLA/DLA102x.yaml b/src/PaddleClas/ppcls/configs/ImageNet/DLA/DLA102x.yaml new file mode 100644 index 0000000..a1e2c09 --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/DLA/DLA102x.yaml @@ -0,0 +1,130 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 120 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + +# model architecture +Arch: + name: DLA102x + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Piecewise + learning_rate: 0.1 + decay_epochs: [30, 60, 90] + values: [0.1, 0.01, 0.001, 0.0001] + regularizer: + name: 'L2' + coeff: 0.0001 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + - TopkAcc: + topk: [1, 5] + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/DLA/DLA102x2.yaml b/src/PaddleClas/ppcls/configs/ImageNet/DLA/DLA102x2.yaml new file mode 100644 index 0000000..8bd4c46 --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/DLA/DLA102x2.yaml @@ -0,0 +1,130 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 120 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + +# model architecture +Arch: + name: DLA102x2 + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Piecewise + learning_rate: 0.1 + decay_epochs: [30, 60, 90] + values: [0.1, 0.01, 0.001, 0.0001] + regularizer: + name: 'L2' + coeff: 0.0001 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + - TopkAcc: + topk: [1, 5] + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/DLA/DLA169.yaml b/src/PaddleClas/ppcls/configs/ImageNet/DLA/DLA169.yaml new file mode 100644 index 0000000..18c244d --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/DLA/DLA169.yaml @@ -0,0 +1,130 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 120 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + +# model architecture +Arch: + name: DLA169 + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Piecewise + learning_rate: 0.1 + decay_epochs: [30, 60, 90] + values: [0.1, 0.01, 0.001, 0.0001] + regularizer: + name: 'L2' + coeff: 0.0001 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + - TopkAcc: + topk: [1, 5] + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/DLA/DLA34.yaml b/src/PaddleClas/ppcls/configs/ImageNet/DLA/DLA34.yaml new file mode 100644 index 0000000..d9218df --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/DLA/DLA34.yaml @@ -0,0 +1,130 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 120 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + +# model architecture +Arch: + name: DLA34 + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Piecewise + learning_rate: 0.1 + decay_epochs: [30, 60, 90] + values: [0.1, 0.01, 0.001, 0.0001] + regularizer: + name: 'L2' + coeff: 0.0001 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + - TopkAcc: + topk: [1, 5] + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/DLA/DLA46_c.yaml b/src/PaddleClas/ppcls/configs/ImageNet/DLA/DLA46_c.yaml new file mode 100644 index 0000000..8d20341 --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/DLA/DLA46_c.yaml @@ -0,0 +1,130 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 120 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + +# model architecture +Arch: + name: DLA46_c + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Piecewise + learning_rate: 0.1 + decay_epochs: [30, 60, 90] + values: [0.1, 0.01, 0.001, 0.0001] + regularizer: + name: 'L2' + coeff: 0.0001 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + - TopkAcc: + topk: [1, 5] + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/DLA/DLA46x_c.yaml b/src/PaddleClas/ppcls/configs/ImageNet/DLA/DLA46x_c.yaml new file mode 100644 index 0000000..e7f7d67 --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/DLA/DLA46x_c.yaml @@ -0,0 +1,130 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 120 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + +# model architecture +Arch: + name: DLA46x_c + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Piecewise + learning_rate: 0.1 + decay_epochs: [30, 60, 90] + values: [0.1, 0.01, 0.001, 0.0001] + regularizer: + name: 'L2' + coeff: 0.0001 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + - TopkAcc: + topk: [1, 5] + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/DLA/DLA60.yaml b/src/PaddleClas/ppcls/configs/ImageNet/DLA/DLA60.yaml new file mode 100644 index 0000000..a255f05 --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/DLA/DLA60.yaml @@ -0,0 +1,130 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 120 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + +# model architecture +Arch: + name: DLA60 + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Piecewise + learning_rate: 0.1 + decay_epochs: [30, 60, 90] + values: [0.1, 0.01, 0.001, 0.0001] + regularizer: + name: 'L2' + coeff: 0.0001 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + - TopkAcc: + topk: [1, 5] + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/DLA/DLA60x.yaml b/src/PaddleClas/ppcls/configs/ImageNet/DLA/DLA60x.yaml new file mode 100644 index 0000000..143b87f --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/DLA/DLA60x.yaml @@ -0,0 +1,130 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 120 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + +# model architecture +Arch: + name: DLA60x + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Piecewise + learning_rate: 0.1 + decay_epochs: [30, 60, 90] + values: [0.1, 0.01, 0.001, 0.0001] + regularizer: + name: 'L2' + coeff: 0.0001 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + - TopkAcc: + topk: [1, 5] + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/DLA/DLA60x_c.yaml b/src/PaddleClas/ppcls/configs/ImageNet/DLA/DLA60x_c.yaml new file mode 100644 index 0000000..7792819 --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/DLA/DLA60x_c.yaml @@ -0,0 +1,130 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 120 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + +# model architecture +Arch: + name: DLA60x_c + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Piecewise + learning_rate: 0.1 + decay_epochs: [30, 60, 90] + values: [0.1, 0.01, 0.001, 0.0001] + regularizer: + name: 'L2' + coeff: 0.0001 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + - TopkAcc: + topk: [1, 5] + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/DPN/DPN107.yaml b/src/PaddleClas/ppcls/configs/ImageNet/DPN/DPN107.yaml new file mode 100644 index 0000000..7df1256 --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/DPN/DPN107.yaml @@ -0,0 +1,130 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 200 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + +# model architecture +Arch: + name: DPN107 + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + epsilon: 0.1 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Cosine + learning_rate: 0.1 + regularizer: + name: 'L2' + coeff: 0.0001 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + batch_transform_ops: + - MixupOperator: + alpha: 0.2 + + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/DPN/DPN131.yaml b/src/PaddleClas/ppcls/configs/ImageNet/DPN/DPN131.yaml new file mode 100644 index 0000000..88f1b57 --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/DPN/DPN131.yaml @@ -0,0 +1,130 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 200 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + +# model architecture +Arch: + name: DPN131 + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + epsilon: 0.1 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Cosine + learning_rate: 0.1 + regularizer: + name: 'L2' + coeff: 0.0001 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + batch_transform_ops: + - MixupOperator: + alpha: 0.2 + + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/DPN/DPN68.yaml b/src/PaddleClas/ppcls/configs/ImageNet/DPN/DPN68.yaml new file mode 100644 index 0000000..c1e2808 --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/DPN/DPN68.yaml @@ -0,0 +1,130 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 200 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + +# model architecture +Arch: + name: DPN68 + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + epsilon: 0.1 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Cosine + learning_rate: 0.1 + regularizer: + name: 'L2' + coeff: 0.0001 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + batch_transform_ops: + - MixupOperator: + alpha: 0.2 + + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/DPN/DPN92.yaml b/src/PaddleClas/ppcls/configs/ImageNet/DPN/DPN92.yaml new file mode 100644 index 0000000..fb5b0ed --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/DPN/DPN92.yaml @@ -0,0 +1,130 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 200 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + +# model architecture +Arch: + name: DPN92 + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + epsilon: 0.1 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Cosine + learning_rate: 0.1 + regularizer: + name: 'L2' + coeff: 0.0001 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + batch_transform_ops: + - MixupOperator: + alpha: 0.2 + + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/DPN/DPN98.yaml b/src/PaddleClas/ppcls/configs/ImageNet/DPN/DPN98.yaml new file mode 100644 index 0000000..e394710 --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/DPN/DPN98.yaml @@ -0,0 +1,130 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 200 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + +# model architecture +Arch: + name: DPN98 + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + epsilon: 0.1 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Cosine + learning_rate: 0.1 + regularizer: + name: 'L2' + coeff: 0.0001 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + batch_transform_ops: + - MixupOperator: + alpha: 0.2 + + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/DarkNet/DarkNet53.yaml b/src/PaddleClas/ppcls/configs/ImageNet/DarkNet/DarkNet53.yaml new file mode 100644 index 0000000..ec0f822 --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/DarkNet/DarkNet53.yaml @@ -0,0 +1,130 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 200 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 256, 256] + save_inference_dir: ./inference + +# model architecture +Arch: + name: DarkNet53 + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + epsilon: 0.1 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Cosine + learning_rate: 0.1 + regularizer: + name: 'L2' + coeff: 0.0001 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 256 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + batch_transform_ops: + - MixupOperator: + alpha: 0.2 + + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 292 + - CropImage: + size: 256 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 292 + - CropImage: + size: 256 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/DataAugment/ResNet50_AutoAugment.yaml b/src/PaddleClas/ppcls/configs/ImageNet/DataAugment/ResNet50_AutoAugment.yaml new file mode 100644 index 0000000..ab4c29c --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/DataAugment/ResNet50_AutoAugment.yaml @@ -0,0 +1,129 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 300 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + +# model architecture +Arch: + name: ResNet50 + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Cosine + learning_rate: 0.1 + regularizer: + name: 'L2' + coeff: 0.0001 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - AutoAugment: + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + - TopkAcc: + topk: [1, 5] + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/DataAugment/ResNet50_Baseline.yaml b/src/PaddleClas/ppcls/configs/ImageNet/DataAugment/ResNet50_Baseline.yaml new file mode 100644 index 0000000..d75fede --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/DataAugment/ResNet50_Baseline.yaml @@ -0,0 +1,128 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 300 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + +# model architecture +Arch: + name: ResNet50 + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Cosine + learning_rate: 0.1 + regularizer: + name: 'L2' + coeff: 0.0001 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + - TopkAcc: + topk: [1, 5] + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/DataAugment/ResNet50_Cutmix.yaml b/src/PaddleClas/ppcls/configs/ImageNet/DataAugment/ResNet50_Cutmix.yaml new file mode 100644 index 0000000..2fefb9f --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/DataAugment/ResNet50_Cutmix.yaml @@ -0,0 +1,128 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 300 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + +# model architecture +Arch: + name: ResNet50 + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Cosine + learning_rate: 0.1 + regularizer: + name: 'L2' + coeff: 0.0001 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + batch_transform_ops: + - CutmixOperator: + alpha: 0.2 + + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/DataAugment/ResNet50_Cutout.yaml b/src/PaddleClas/ppcls/configs/ImageNet/DataAugment/ResNet50_Cutout.yaml new file mode 100644 index 0000000..4bf5306 --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/DataAugment/ResNet50_Cutout.yaml @@ -0,0 +1,131 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 300 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + +# model architecture +Arch: + name: ResNet50 + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Cosine + learning_rate: 0.1 + regularizer: + name: 'L2' + coeff: 0.0001 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - Cutout: + n_holes: 1 + length: 112 + + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + - TopkAcc: + topk: [1, 5] + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/DataAugment/ResNet50_GridMask.yaml b/src/PaddleClas/ppcls/configs/ImageNet/DataAugment/ResNet50_GridMask.yaml new file mode 100644 index 0000000..c0016aa --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/DataAugment/ResNet50_GridMask.yaml @@ -0,0 +1,134 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 300 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + +# model architecture +Arch: + name: ResNet50 + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Cosine + learning_rate: 0.1 + regularizer: + name: 'L2' + coeff: 0.0001 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - GridMask: + d1: 96 + d2: 224 + rotate: 1 + ratio: 0.5 + mode: 0 + + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + - TopkAcc: + topk: [1, 5] + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/DataAugment/ResNet50_HideAndSeek.yaml b/src/PaddleClas/ppcls/configs/ImageNet/DataAugment/ResNet50_HideAndSeek.yaml new file mode 100644 index 0000000..12e4ac8 --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/DataAugment/ResNet50_HideAndSeek.yaml @@ -0,0 +1,129 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 300 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + +# model architecture +Arch: + name: ResNet50 + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Cosine + learning_rate: 0.1 + regularizer: + name: 'L2' + coeff: 0.0001 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - HideAndSeek: + + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + - TopkAcc: + topk: [1, 5] + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/DataAugment/ResNet50_Mixup.yaml b/src/PaddleClas/ppcls/configs/ImageNet/DataAugment/ResNet50_Mixup.yaml new file mode 100644 index 0000000..3434cab --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/DataAugment/ResNet50_Mixup.yaml @@ -0,0 +1,128 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 300 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + +# model architecture +Arch: + name: ResNet50 + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Cosine + learning_rate: 0.1 + regularizer: + name: 'L2' + coeff: 0.0001 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + batch_transform_ops: + - MixupOperator: + alpha: 0.2 + + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/DataAugment/ResNet50_RandAugment.yaml b/src/PaddleClas/ppcls/configs/ImageNet/DataAugment/ResNet50_RandAugment.yaml new file mode 100644 index 0000000..153451e --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/DataAugment/ResNet50_RandAugment.yaml @@ -0,0 +1,131 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 300 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + +# model architecture +Arch: + name: ResNet50 + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Cosine + learning_rate: 0.1 + regularizer: + name: 'L2' + coeff: 0.0001 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - RandAugment: + num_layers: 2 + magnitude: 5 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + - TopkAcc: + topk: [1, 5] + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/DataAugment/ResNet50_RandomErasing.yaml b/src/PaddleClas/ppcls/configs/ImageNet/DataAugment/ResNet50_RandomErasing.yaml new file mode 100644 index 0000000..8e89c5c --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/DataAugment/ResNet50_RandomErasing.yaml @@ -0,0 +1,134 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 300 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + +# model architecture +Arch: + name: ResNet50 + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Cosine + learning_rate: 0.1 + regularizer: + name: 'L2' + coeff: 0.0001 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - RandomErasing: + EPSILON: 0.5 + sl: 0.02 + sh: 0.4 + r1: 0.3 + mean: [0., 0., 0.] + + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + - TopkAcc: + topk: [1, 5] + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/DeiT/DeiT_base_distilled_patch16_224.yaml b/src/PaddleClas/ppcls/configs/ImageNet/DeiT/DeiT_base_distilled_patch16_224.yaml new file mode 100644 index 0000000..979a04a --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/DeiT/DeiT_base_distilled_patch16_224.yaml @@ -0,0 +1,156 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 300 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + +# model architecture +Arch: + name: DeiT_base_distilled_patch16_224 + drop_path_rate : 0.1 + drop_rate : 0.0 + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + epsilon: 0.1 + Eval: + - CELoss: + weight: 1.0 + +Optimizer: + name: AdamW + beta1: 0.9 + beta2: 0.999 + epsilon: 1e-8 + weight_decay: 0.05 + no_weight_decay_name: norm cls_token pos_embed dist_token + one_dim_param_no_weight_decay: True + lr: + name: Cosine + learning_rate: 1e-3 + eta_min: 1e-5 + warmup_epoch: 5 + warmup_start_lr: 1e-6 + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + interpolation: bicubic + backend: pil + - RandFlipImage: + flip_code: 1 + - TimmAutoAugment: + config_str: rand-m9-mstd0.5-inc1 + interpolation: bicubic + img_size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - RandomErasing: + EPSILON: 0.25 + sl: 0.02 + sh: 1.0/3.0 + r1: 0.3 + attempt: 10 + use_log_aspect: True + mode: pixel + batch_transform_ops: + - OpSampler: + MixupOperator: + alpha: 0.8 + prob: 0.5 + CutmixOperator: + alpha: 1.0 + prob: 0.5 + sampler: + name: DistributedBatchSampler + batch_size: 256 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + interpolation: bicubic + backend: pil + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 256 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + interpolation: bicubic + backend: pil + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/DeiT/DeiT_base_distilled_patch16_384.yaml b/src/PaddleClas/ppcls/configs/ImageNet/DeiT/DeiT_base_distilled_patch16_384.yaml new file mode 100644 index 0000000..859f57d --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/DeiT/DeiT_base_distilled_patch16_384.yaml @@ -0,0 +1,156 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 300 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 384, 384] + save_inference_dir: ./inference + +# model architecture +Arch: + name: DeiT_base_distilled_patch16_384 + drop_path_rate : 0.1 + drop_rate : 0.0 + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + epsilon: 0.1 + Eval: + - CELoss: + weight: 1.0 + +Optimizer: + name: AdamW + beta1: 0.9 + beta2: 0.999 + epsilon: 1e-8 + weight_decay: 0.05 + no_weight_decay_name: norm cls_token pos_embed dist_token + one_dim_param_no_weight_decay: True + lr: + name: Cosine + learning_rate: 1e-3 + eta_min: 1e-5 + warmup_epoch: 5 + warmup_start_lr: 1e-6 + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 384 + interpolation: bicubic + backend: pil + - RandFlipImage: + flip_code: 1 + - TimmAutoAugment: + config_str: rand-m9-mstd0.5-inc1 + interpolation: bicubic + img_size: 384 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - RandomErasing: + EPSILON: 0.25 + sl: 0.02 + sh: 1.0/3.0 + r1: 0.3 + attempt: 10 + use_log_aspect: True + mode: pixel + batch_transform_ops: + - OpSampler: + MixupOperator: + alpha: 0.8 + prob: 0.5 + CutmixOperator: + alpha: 1.0 + prob: 0.5 + sampler: + name: DistributedBatchSampler + batch_size: 256 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 438 + interpolation: bicubic + backend: pil + - CropImage: + size: 384 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 256 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 438 + interpolation: bicubic + backend: pil + - CropImage: + size: 384 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/DeiT/DeiT_base_patch16_224.yaml b/src/PaddleClas/ppcls/configs/ImageNet/DeiT/DeiT_base_patch16_224.yaml new file mode 100644 index 0000000..3cdd102 --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/DeiT/DeiT_base_patch16_224.yaml @@ -0,0 +1,156 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 300 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + +# model architecture +Arch: + name: DeiT_base_patch16_224 + drop_path_rate : 0.1 + drop_rate : 0.0 + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + epsilon: 0.1 + Eval: + - CELoss: + weight: 1.0 + +Optimizer: + name: AdamW + beta1: 0.9 + beta2: 0.999 + epsilon: 1e-8 + weight_decay: 0.05 + no_weight_decay_name: norm cls_token pos_embed dist_token + one_dim_param_no_weight_decay: True + lr: + name: Cosine + learning_rate: 1e-3 + eta_min: 1e-5 + warmup_epoch: 5 + warmup_start_lr: 1e-6 + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + interpolation: bicubic + backend: pil + - RandFlipImage: + flip_code: 1 + - TimmAutoAugment: + config_str: rand-m9-mstd0.5-inc1 + interpolation: bicubic + img_size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - RandomErasing: + EPSILON: 0.25 + sl: 0.02 + sh: 1.0/3.0 + r1: 0.3 + attempt: 10 + use_log_aspect: True + mode: pixel + batch_transform_ops: + - OpSampler: + MixupOperator: + alpha: 0.8 + prob: 0.5 + CutmixOperator: + alpha: 1.0 + prob: 0.5 + sampler: + name: DistributedBatchSampler + batch_size: 256 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + interpolation: bicubic + backend: pil + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 256 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + interpolation: bicubic + backend: pil + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/DeiT/DeiT_base_patch16_384.yaml b/src/PaddleClas/ppcls/configs/ImageNet/DeiT/DeiT_base_patch16_384.yaml new file mode 100644 index 0000000..88a8fba --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/DeiT/DeiT_base_patch16_384.yaml @@ -0,0 +1,156 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 300 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 384, 384] + save_inference_dir: ./inference + +# model architecture +Arch: + name: DeiT_base_patch16_384 + drop_path_rate : 0.1 + drop_rate : 0.0 + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + epsilon: 0.1 + Eval: + - CELoss: + weight: 1.0 + +Optimizer: + name: AdamW + beta1: 0.9 + beta2: 0.999 + epsilon: 1e-8 + weight_decay: 0.05 + no_weight_decay_name: norm cls_token pos_embed dist_token + one_dim_param_no_weight_decay: True + lr: + name: Cosine + learning_rate: 1e-3 + eta_min: 1e-5 + warmup_epoch: 5 + warmup_start_lr: 1e-6 + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 384 + interpolation: bicubic + backend: pil + - RandFlipImage: + flip_code: 1 + - TimmAutoAugment: + config_str: rand-m9-mstd0.5-inc1 + interpolation: bicubic + img_size: 384 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - RandomErasing: + EPSILON: 0.25 + sl: 0.02 + sh: 1.0/3.0 + r1: 0.3 + attempt: 10 + use_log_aspect: True + mode: pixel + batch_transform_ops: + - OpSampler: + MixupOperator: + alpha: 0.8 + prob: 0.5 + CutmixOperator: + alpha: 1.0 + prob: 0.5 + sampler: + name: DistributedBatchSampler + batch_size: 256 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 438 + interpolation: bicubic + backend: pil + - CropImage: + size: 384 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 256 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 438 + interpolation: bicubic + backend: pil + - CropImage: + size: 384 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/DeiT/DeiT_small_distilled_patch16_224.yaml b/src/PaddleClas/ppcls/configs/ImageNet/DeiT/DeiT_small_distilled_patch16_224.yaml new file mode 100644 index 0000000..54d962e --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/DeiT/DeiT_small_distilled_patch16_224.yaml @@ -0,0 +1,156 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 300 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + +# model architecture +Arch: + name: DeiT_small_distilled_patch16_224 + drop_path_rate : 0.1 + drop_rate : 0.0 + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + epsilon: 0.1 + Eval: + - CELoss: + weight: 1.0 + +Optimizer: + name: AdamW + beta1: 0.9 + beta2: 0.999 + epsilon: 1e-8 + weight_decay: 0.05 + no_weight_decay_name: norm cls_token pos_embed dist_token + one_dim_param_no_weight_decay: True + lr: + name: Cosine + learning_rate: 1e-3 + eta_min: 1e-5 + warmup_epoch: 5 + warmup_start_lr: 1e-6 + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + interpolation: bicubic + backend: pil + - RandFlipImage: + flip_code: 1 + - TimmAutoAugment: + config_str: rand-m9-mstd0.5-inc1 + interpolation: bicubic + img_size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - RandomErasing: + EPSILON: 0.25 + sl: 0.02 + sh: 1.0/3.0 + r1: 0.3 + attempt: 10 + use_log_aspect: True + mode: pixel + batch_transform_ops: + - OpSampler: + MixupOperator: + alpha: 0.8 + prob: 0.5 + CutmixOperator: + alpha: 1.0 + prob: 0.5 + sampler: + name: DistributedBatchSampler + batch_size: 256 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + interpolation: bicubic + backend: pil + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 256 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + interpolation: bicubic + backend: pil + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/DeiT/DeiT_small_patch16_224.yaml b/src/PaddleClas/ppcls/configs/ImageNet/DeiT/DeiT_small_patch16_224.yaml new file mode 100644 index 0000000..05c3ac1 --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/DeiT/DeiT_small_patch16_224.yaml @@ -0,0 +1,156 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 300 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + +# model architecture +Arch: + name: DeiT_small_patch16_224 + drop_path_rate : 0.1 + drop_rate : 0.0 + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + epsilon: 0.1 + Eval: + - CELoss: + weight: 1.0 + +Optimizer: + name: AdamW + beta1: 0.9 + beta2: 0.999 + epsilon: 1e-8 + weight_decay: 0.05 + no_weight_decay_name: norm cls_token pos_embed dist_token + one_dim_param_no_weight_decay: True + lr: + name: Cosine + learning_rate: 1e-3 + eta_min: 1e-5 + warmup_epoch: 5 + warmup_start_lr: 1e-6 + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + interpolation: bicubic + backend: pil + - RandFlipImage: + flip_code: 1 + - TimmAutoAugment: + config_str: rand-m9-mstd0.5-inc1 + interpolation: bicubic + img_size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - RandomErasing: + EPSILON: 0.25 + sl: 0.02 + sh: 1.0/3.0 + r1: 0.3 + attempt: 10 + use_log_aspect: True + mode: pixel + batch_transform_ops: + - OpSampler: + MixupOperator: + alpha: 0.8 + prob: 0.5 + CutmixOperator: + alpha: 1.0 + prob: 0.5 + sampler: + name: DistributedBatchSampler + batch_size: 256 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + interpolation: bicubic + backend: pil + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 256 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + interpolation: bicubic + backend: pil + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/DeiT/DeiT_tiny_distilled_patch16_224.yaml b/src/PaddleClas/ppcls/configs/ImageNet/DeiT/DeiT_tiny_distilled_patch16_224.yaml new file mode 100644 index 0000000..f666176 --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/DeiT/DeiT_tiny_distilled_patch16_224.yaml @@ -0,0 +1,156 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 300 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + +# model architecture +Arch: + name: DeiT_tiny_distilled_patch16_224 + drop_path_rate : 0.1 + drop_rate : 0.0 + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + epsilon: 0.1 + Eval: + - CELoss: + weight: 1.0 + +Optimizer: + name: AdamW + beta1: 0.9 + beta2: 0.999 + epsilon: 1e-8 + weight_decay: 0.05 + no_weight_decay_name: norm cls_token pos_embed dist_token + one_dim_param_no_weight_decay: True + lr: + name: Cosine + learning_rate: 1e-3 + eta_min: 1e-5 + warmup_epoch: 5 + warmup_start_lr: 1e-6 + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + interpolation: bicubic + backend: pil + - RandFlipImage: + flip_code: 1 + - TimmAutoAugment: + config_str: rand-m9-mstd0.5-inc1 + interpolation: bicubic + img_size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - RandomErasing: + EPSILON: 0.25 + sl: 0.02 + sh: 1.0/3.0 + r1: 0.3 + attempt: 10 + use_log_aspect: True + mode: pixel + batch_transform_ops: + - OpSampler: + MixupOperator: + alpha: 0.8 + prob: 0.5 + CutmixOperator: + alpha: 1.0 + prob: 0.5 + sampler: + name: DistributedBatchSampler + batch_size: 256 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + interpolation: bicubic + backend: pil + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 256 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + interpolation: bicubic + backend: pil + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/DeiT/DeiT_tiny_patch16_224.yaml b/src/PaddleClas/ppcls/configs/ImageNet/DeiT/DeiT_tiny_patch16_224.yaml new file mode 100644 index 0000000..647050a --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/DeiT/DeiT_tiny_patch16_224.yaml @@ -0,0 +1,156 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 300 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + +# model architecture +Arch: + name: DeiT_tiny_patch16_224 + drop_path_rate : 0.1 + drop_rate : 0.0 + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + epsilon: 0.1 + Eval: + - CELoss: + weight: 1.0 + +Optimizer: + name: AdamW + beta1: 0.9 + beta2: 0.999 + epsilon: 1e-8 + weight_decay: 0.05 + no_weight_decay_name: norm cls_token pos_embed dist_token + one_dim_param_no_weight_decay: True + lr: + name: Cosine + learning_rate: 1e-3 + eta_min: 1e-5 + warmup_epoch: 5 + warmup_start_lr: 1e-6 + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + interpolation: bicubic + backend: pil + - RandFlipImage: + flip_code: 1 + - TimmAutoAugment: + config_str: rand-m9-mstd0.5-inc1 + interpolation: bicubic + img_size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - RandomErasing: + EPSILON: 0.25 + sl: 0.02 + sh: 1.0/3.0 + r1: 0.3 + attempt: 10 + use_log_aspect: True + mode: pixel + batch_transform_ops: + - OpSampler: + MixupOperator: + alpha: 0.8 + prob: 0.5 + CutmixOperator: + alpha: 1.0 + prob: 0.5 + sampler: + name: DistributedBatchSampler + batch_size: 256 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + interpolation: bicubic + backend: pil + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 256 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + interpolation: bicubic + backend: pil + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/DenseNet/DenseNet121.yaml b/src/PaddleClas/ppcls/configs/ImageNet/DenseNet/DenseNet121.yaml new file mode 100644 index 0000000..42c7e78 --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/DenseNet/DenseNet121.yaml @@ -0,0 +1,130 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 120 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + +# model architecture +Arch: + name: DenseNet121 + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Piecewise + learning_rate: 0.1 + decay_epochs: [30, 60, 90] + values: [0.1, 0.01, 0.001, 0.0001] + regularizer: + name: 'L2' + coeff: 0.0001 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + - TopkAcc: + topk: [1, 5] + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/DenseNet/DenseNet161.yaml b/src/PaddleClas/ppcls/configs/ImageNet/DenseNet/DenseNet161.yaml new file mode 100644 index 0000000..3f9bbb6 --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/DenseNet/DenseNet161.yaml @@ -0,0 +1,130 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 120 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + +# model architecture +Arch: + name: DenseNet161 + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Piecewise + learning_rate: 0.1 + decay_epochs: [30, 60, 90] + values: [0.1, 0.01, 0.001, 0.0001] + regularizer: + name: 'L2' + coeff: 0.0001 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + - TopkAcc: + topk: [1, 5] + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/DenseNet/DenseNet169.yaml b/src/PaddleClas/ppcls/configs/ImageNet/DenseNet/DenseNet169.yaml new file mode 100644 index 0000000..3a046fb --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/DenseNet/DenseNet169.yaml @@ -0,0 +1,130 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 120 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + +# model architecture +Arch: + name: DenseNet169 + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Piecewise + learning_rate: 0.1 + decay_epochs: [30, 60, 90] + values: [0.1, 0.01, 0.001, 0.0001] + regularizer: + name: 'L2' + coeff: 0.0001 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + - TopkAcc: + topk: [1, 5] + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/DenseNet/DenseNet201.yaml b/src/PaddleClas/ppcls/configs/ImageNet/DenseNet/DenseNet201.yaml new file mode 100644 index 0000000..ba62682 --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/DenseNet/DenseNet201.yaml @@ -0,0 +1,130 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 120 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + +# model architecture +Arch: + name: DenseNet201 + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Piecewise + learning_rate: 0.1 + decay_epochs: [30, 60, 90] + values: [0.1, 0.01, 0.001, 0.0001] + regularizer: + name: 'L2' + coeff: 0.0001 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + - TopkAcc: + topk: [1, 5] + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/DenseNet/DenseNet264.yaml b/src/PaddleClas/ppcls/configs/ImageNet/DenseNet/DenseNet264.yaml new file mode 100644 index 0000000..a0a8193 --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/DenseNet/DenseNet264.yaml @@ -0,0 +1,130 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 120 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + +# model architecture +Arch: + name: DenseNet264 + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Piecewise + learning_rate: 0.1 + decay_epochs: [30, 60, 90] + values: [0.1, 0.01, 0.001, 0.0001] + regularizer: + name: 'L2' + coeff: 0.0001 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + - TopkAcc: + topk: [1, 5] + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/Distillation/mv3_large_x1_0_distill_mv3_small_x1_0.yaml b/src/PaddleClas/ppcls/configs/ImageNet/Distillation/mv3_large_x1_0_distill_mv3_small_x1_0.yaml new file mode 100644 index 0000000..a7265b0 --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/Distillation/mv3_large_x1_0_distill_mv3_small_x1_0.yaml @@ -0,0 +1,157 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: "./output/" + device: "gpu" + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 120 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: "./inference" + use_dali: false + +# model architecture +Arch: + name: "DistillationModel" + class_num: &class_num 1000 + # if not null, its lengths should be same as models + pretrained_list: + # if not null, its lengths should be same as models + freeze_params_list: + - True + - False + models: + - Teacher: + name: MobileNetV3_large_x1_0 + class_num: *class_num + pretrained: True + use_ssld: True + dropout_prob: null + - Student: + name: MobileNetV3_small_x1_0 + class_num: *class_num + pretrained: False + dropout_prob: null + + infer_model_name: "Student" + + +# loss function config for traing/eval process +Loss: + Train: + - DistillationCELoss: + weight: 1.0 + model_name_pairs: + - ["Student", "Teacher"] + Eval: + - DistillationGTCELoss: + weight: 1.0 + model_names: ["Student"] + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Cosine + learning_rate: 0.65 + warmup_epoch: 5 + regularizer: + name: 'L2' + coeff: 0.00001 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: "./dataset/ILSVRC2012/" + cls_label_path: "./dataset/ILSVRC2012/train_list.txt" + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 0.00392157 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + + sampler: + name: DistributedBatchSampler + batch_size: 256 + drop_last: False + shuffle: True + loader: + num_workers: 6 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: "./dataset/ILSVRC2012/" + cls_label_path: "./dataset/ILSVRC2012/val_list.txt" + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 0.00392157 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 128 + drop_last: False + shuffle: False + loader: + num_workers: 6 + use_shared_memory: True + +Infer: + infer_imgs: "docs/images/inference_deployment/whl_demo.jpg" + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: DistillationPostProcess + func: Topk + topk: 5 + class_id_map_file: "ppcls/utils/imagenet1k_label_list.txt" + +Metric: + Train: + - DistillationTopkAcc: + model_key: "Student" + topk: [1, 5] + Eval: + - DistillationTopkAcc: + model_key: "Student" + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/ESNet/ESNet_x0_25.yaml b/src/PaddleClas/ppcls/configs/ImageNet/ESNet/ESNet_x0_25.yaml new file mode 100644 index 0000000..b34ba07 --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/ESNet/ESNet_x0_25.yaml @@ -0,0 +1,129 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 360 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference +# model architecture +Arch: + name: ESNet_x0_25 + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + epsilon: 0.1 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Cosine + learning_rate: 0.8 + warmup_epoch: 5 + regularizer: + name: 'L2' + coeff: 0.00003 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + + sampler: + name: DistributedBatchSampler + batch_size: 512 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + - TopkAcc: + topk: [1, 5] + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/ESNet/ESNet_x0_5.yaml b/src/PaddleClas/ppcls/configs/ImageNet/ESNet/ESNet_x0_5.yaml new file mode 100644 index 0000000..0b82e08 --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/ESNet/ESNet_x0_5.yaml @@ -0,0 +1,129 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 360 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference +# model architecture +Arch: + name: ESNet_x0_5 + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + epsilon: 0.1 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Cosine + learning_rate: 0.8 + warmup_epoch: 5 + regularizer: + name: 'L2' + coeff: 0.00003 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + + sampler: + name: DistributedBatchSampler + batch_size: 512 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + - TopkAcc: + topk: [1, 5] + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/ESNet/ESNet_x0_75.yaml b/src/PaddleClas/ppcls/configs/ImageNet/ESNet/ESNet_x0_75.yaml new file mode 100644 index 0000000..7662397 --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/ESNet/ESNet_x0_75.yaml @@ -0,0 +1,129 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 360 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference +# model architecture +Arch: + name: ESNet_x0_75 + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + epsilon: 0.1 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Cosine + learning_rate: 0.8 + warmup_epoch: 5 + regularizer: + name: 'L2' + coeff: 0.00003 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + + sampler: + name: DistributedBatchSampler + batch_size: 512 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + - TopkAcc: + topk: [1, 5] + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/ESNet/ESNet_x1_0.yaml b/src/PaddleClas/ppcls/configs/ImageNet/ESNet/ESNet_x1_0.yaml new file mode 100644 index 0000000..583efd2 --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/ESNet/ESNet_x1_0.yaml @@ -0,0 +1,129 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 360 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference +# model architecture +Arch: + name: ESNet_x1_0 + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + epsilon: 0.1 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Cosine + learning_rate: 0.8 + warmup_epoch: 5 + regularizer: + name: 'L2' + coeff: 0.00003 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + + sampler: + name: DistributedBatchSampler + batch_size: 512 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + - TopkAcc: + topk: [1, 5] + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/EfficientNet/EfficientNetB0.yaml b/src/PaddleClas/ppcls/configs/ImageNet/EfficientNet/EfficientNetB0.yaml new file mode 100644 index 0000000..2d5b7d0 --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/EfficientNet/EfficientNetB0.yaml @@ -0,0 +1,133 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 360 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + +# model architecture +Arch: + name: EfficientNetB0 + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + epsilon: 0.1 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: RMSProp + momentum: 0.9 + rho: 0.9 + epsilon: 0.001 + lr: + name: Cosine + learning_rate: 0.032 + warmup_epoch: 5 + regularizer: + name: 'L2' + coeff: 0.00001 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - AutoAugment: + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + + sampler: + name: DistributedBatchSampler + batch_size: 128 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 128 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + - TopkAcc: + topk: [1, 5] + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/EfficientNet/EfficientNetB1.yaml b/src/PaddleClas/ppcls/configs/ImageNet/EfficientNet/EfficientNetB1.yaml new file mode 100644 index 0000000..b23030f --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/EfficientNet/EfficientNetB1.yaml @@ -0,0 +1,133 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 360 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 240, 240] + save_inference_dir: ./inference + +# model architecture +Arch: + name: EfficientNetB1 + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + epsilon: 0.1 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: RMSProp + momentum: 0.9 + rho: 0.9 + epsilon: 0.001 + lr: + name: Cosine + learning_rate: 0.032 + warmup_epoch: 5 + regularizer: + name: 'L2' + coeff: 0.00001 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 240 + - RandFlipImage: + flip_code: 1 + - AutoAugment: + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + + sampler: + name: DistributedBatchSampler + batch_size: 128 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 272 + - CropImage: + size: 240 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 128 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 272 + - CropImage: + size: 240 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + - TopkAcc: + topk: [1, 5] + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/EfficientNet/EfficientNetB2.yaml b/src/PaddleClas/ppcls/configs/ImageNet/EfficientNet/EfficientNetB2.yaml new file mode 100644 index 0000000..de48d03 --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/EfficientNet/EfficientNetB2.yaml @@ -0,0 +1,133 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 360 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 260, 260] + save_inference_dir: ./inference + +# model architecture +Arch: + name: EfficientNetB2 + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + epsilon: 0.1 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: RMSProp + momentum: 0.9 + rho: 0.9 + epsilon: 0.001 + lr: + name: Cosine + learning_rate: 0.032 + warmup_epoch: 5 + regularizer: + name: 'L2' + coeff: 0.00001 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 260 + - RandFlipImage: + flip_code: 1 + - AutoAugment: + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + + sampler: + name: DistributedBatchSampler + batch_size: 128 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 292 + - CropImage: + size: 260 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 128 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 292 + - CropImage: + size: 260 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + - TopkAcc: + topk: [1, 5] + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/EfficientNet/EfficientNetB3.yaml b/src/PaddleClas/ppcls/configs/ImageNet/EfficientNet/EfficientNetB3.yaml new file mode 100644 index 0000000..3f0b559 --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/EfficientNet/EfficientNetB3.yaml @@ -0,0 +1,133 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 360 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 300, 300] + save_inference_dir: ./inference + +# model architecture +Arch: + name: EfficientNetB3 + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + epsilon: 0.1 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: RMSProp + momentum: 0.9 + rho: 0.9 + epsilon: 0.001 + lr: + name: Cosine + learning_rate: 0.032 + warmup_epoch: 5 + regularizer: + name: 'L2' + coeff: 0.00001 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 300 + - RandFlipImage: + flip_code: 1 + - AutoAugment: + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + + sampler: + name: DistributedBatchSampler + batch_size: 128 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 332 + - CropImage: + size: 300 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 128 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 332 + - CropImage: + size: 300 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + - TopkAcc: + topk: [1, 5] + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/EfficientNet/EfficientNetB4.yaml b/src/PaddleClas/ppcls/configs/ImageNet/EfficientNet/EfficientNetB4.yaml new file mode 100644 index 0000000..e3a009a --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/EfficientNet/EfficientNetB4.yaml @@ -0,0 +1,133 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 360 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 380, 380] + save_inference_dir: ./inference + +# model architecture +Arch: + name: EfficientNetB4 + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + epsilon: 0.1 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: RMSProp + momentum: 0.9 + rho: 0.9 + epsilon: 0.001 + lr: + name: Cosine + learning_rate: 0.032 + warmup_epoch: 5 + regularizer: + name: 'L2' + coeff: 0.00001 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 380 + - RandFlipImage: + flip_code: 1 + - AutoAugment: + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + + sampler: + name: DistributedBatchSampler + batch_size: 128 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 412 + - CropImage: + size: 380 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 128 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 412 + - CropImage: + size: 380 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + - TopkAcc: + topk: [1, 5] + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/EfficientNet/EfficientNetB5.yaml b/src/PaddleClas/ppcls/configs/ImageNet/EfficientNet/EfficientNetB5.yaml new file mode 100644 index 0000000..795dfa1 --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/EfficientNet/EfficientNetB5.yaml @@ -0,0 +1,133 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 360 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 456, 456] + save_inference_dir: ./inference + +# model architecture +Arch: + name: EfficientNetB5 + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + epsilon: 0.1 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: RMSProp + momentum: 0.9 + rho: 0.9 + epsilon: 0.001 + lr: + name: Cosine + learning_rate: 0.032 + warmup_epoch: 5 + regularizer: + name: 'L2' + coeff: 0.00001 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 456 + - RandFlipImage: + flip_code: 1 + - AutoAugment: + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + + sampler: + name: DistributedBatchSampler + batch_size: 128 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 488 + - CropImage: + size: 456 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 128 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 488 + - CropImage: + size: 456 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + - TopkAcc: + topk: [1, 5] + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/EfficientNet/EfficientNetB6.yaml b/src/PaddleClas/ppcls/configs/ImageNet/EfficientNet/EfficientNetB6.yaml new file mode 100644 index 0000000..f86dd04 --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/EfficientNet/EfficientNetB6.yaml @@ -0,0 +1,133 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 360 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 528, 528] + save_inference_dir: ./inference + +# model architecture +Arch: + name: EfficientNetB6 + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + epsilon: 0.1 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: RMSProp + momentum: 0.9 + rho: 0.9 + epsilon: 0.001 + lr: + name: Cosine + learning_rate: 0.032 + warmup_epoch: 5 + regularizer: + name: 'L2' + coeff: 0.00001 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 528 + - RandFlipImage: + flip_code: 1 + - AutoAugment: + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + + sampler: + name: DistributedBatchSampler + batch_size: 128 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 560 + - CropImage: + size: 528 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 128 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 560 + - CropImage: + size: 528 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + - TopkAcc: + topk: [1, 5] + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/EfficientNet/EfficientNetB7.yaml b/src/PaddleClas/ppcls/configs/ImageNet/EfficientNet/EfficientNetB7.yaml new file mode 100644 index 0000000..d57d841 --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/EfficientNet/EfficientNetB7.yaml @@ -0,0 +1,133 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 360 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 600, 600] + save_inference_dir: ./inference + +# model architecture +Arch: + name: EfficientNetB7 + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + epsilon: 0.1 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: RMSProp + momentum: 0.9 + rho: 0.9 + epsilon: 0.001 + lr: + name: Cosine + learning_rate: 0.032 + warmup_epoch: 5 + regularizer: + name: 'L2' + coeff: 0.00001 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 600 + - RandFlipImage: + flip_code: 1 + - AutoAugment: + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + + sampler: + name: DistributedBatchSampler + batch_size: 128 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 632 + - CropImage: + size: 600 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 128 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 632 + - CropImage: + size: 600 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + - TopkAcc: + topk: [1, 5] + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/GhostNet/GhostNet_x0_5.yaml b/src/PaddleClas/ppcls/configs/ImageNet/GhostNet/GhostNet_x0_5.yaml new file mode 100644 index 0000000..ba44691 --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/GhostNet/GhostNet_x0_5.yaml @@ -0,0 +1,130 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 360 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + +# model architecture +Arch: + name: GhostNet_x0_5 + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + epsilon: 0.1 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Cosine + learning_rate: 0.8 + warmup_epoch: 5 + regularizer: + name: 'L2' + coeff: 0.00004 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + + sampler: + name: DistributedBatchSampler + batch_size: 512 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + - TopkAcc: + topk: [1, 5] + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/GhostNet/GhostNet_x1_0.yaml b/src/PaddleClas/ppcls/configs/ImageNet/GhostNet/GhostNet_x1_0.yaml new file mode 100644 index 0000000..a4e6e37 --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/GhostNet/GhostNet_x1_0.yaml @@ -0,0 +1,130 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 360 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + +# model architecture +Arch: + name: GhostNet_x1_0 + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + epsilon: 0.1 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Cosine + learning_rate: 0.8 + warmup_epoch: 5 + regularizer: + name: 'L2' + coeff: 0.00004 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + + sampler: + name: DistributedBatchSampler + batch_size: 512 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + - TopkAcc: + topk: [1, 5] + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/GhostNet/GhostNet_x1_3.yaml b/src/PaddleClas/ppcls/configs/ImageNet/GhostNet/GhostNet_x1_3.yaml new file mode 100644 index 0000000..69921be --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/GhostNet/GhostNet_x1_3.yaml @@ -0,0 +1,130 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 360 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + +# model architecture +Arch: + name: GhostNet_x1_3 + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + epsilon: 0.1 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Cosine + learning_rate: 0.8 + warmup_epoch: 5 + regularizer: + name: 'L2' + coeff: 0.00004 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + + sampler: + name: DistributedBatchSampler + batch_size: 512 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + - TopkAcc: + topk: [1, 5] + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/HRNet/HRNet_W18_C.yaml b/src/PaddleClas/ppcls/configs/ImageNet/HRNet/HRNet_W18_C.yaml new file mode 100644 index 0000000..935b0b5 --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/HRNet/HRNet_W18_C.yaml @@ -0,0 +1,130 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 120 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + +# model architecture +Arch: + name: HRNet_W18_C + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Piecewise + learning_rate: 0.1 + decay_epochs: [30, 60, 90] + values: [0.1, 0.01, 0.001, 0.0001] + regularizer: + name: 'L2' + coeff: 0.0001 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + - TopkAcc: + topk: [1, 5] + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/HRNet/HRNet_W30_C.yaml b/src/PaddleClas/ppcls/configs/ImageNet/HRNet/HRNet_W30_C.yaml new file mode 100644 index 0000000..5f7067c --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/HRNet/HRNet_W30_C.yaml @@ -0,0 +1,130 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 120 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + +# model architecture +Arch: + name: HRNet_W30_C + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Piecewise + learning_rate: 0.1 + decay_epochs: [30, 60, 90] + values: [0.1, 0.01, 0.001, 0.0001] + regularizer: + name: 'L2' + coeff: 0.0001 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + - TopkAcc: + topk: [1, 5] + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/HRNet/HRNet_W32_C.yaml b/src/PaddleClas/ppcls/configs/ImageNet/HRNet/HRNet_W32_C.yaml new file mode 100644 index 0000000..fcc6dc1 --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/HRNet/HRNet_W32_C.yaml @@ -0,0 +1,130 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 120 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + +# model architecture +Arch: + name: HRNet_W32_C + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Piecewise + learning_rate: 0.1 + decay_epochs: [30, 60, 90] + values: [0.1, 0.01, 0.001, 0.0001] + regularizer: + name: 'L2' + coeff: 0.0001 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + - TopkAcc: + topk: [1, 5] + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/HRNet/HRNet_W40_C.yaml b/src/PaddleClas/ppcls/configs/ImageNet/HRNet/HRNet_W40_C.yaml new file mode 100644 index 0000000..a709677 --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/HRNet/HRNet_W40_C.yaml @@ -0,0 +1,130 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 120 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + +# model architecture +Arch: + name: HRNet_W40_C + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Piecewise + learning_rate: 0.1 + decay_epochs: [30, 60, 90] + values: [0.1, 0.01, 0.001, 0.0001] + regularizer: + name: 'L2' + coeff: 0.0001 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + - TopkAcc: + topk: [1, 5] + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/HRNet/HRNet_W44_C.yaml b/src/PaddleClas/ppcls/configs/ImageNet/HRNet/HRNet_W44_C.yaml new file mode 100644 index 0000000..f530cc2 --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/HRNet/HRNet_W44_C.yaml @@ -0,0 +1,130 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 120 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + +# model architecture +Arch: + name: HRNet_W44_C + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Piecewise + learning_rate: 0.1 + decay_epochs: [30, 60, 90] + values: [0.1, 0.01, 0.001, 0.0001] + regularizer: + name: 'L2' + coeff: 0.0001 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + - TopkAcc: + topk: [1, 5] + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/HRNet/HRNet_W48_C.yaml b/src/PaddleClas/ppcls/configs/ImageNet/HRNet/HRNet_W48_C.yaml new file mode 100644 index 0000000..1c7ffc9 --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/HRNet/HRNet_W48_C.yaml @@ -0,0 +1,130 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 120 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + +# model architecture +Arch: + name: HRNet_W48_C + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Piecewise + learning_rate: 0.1 + decay_epochs: [30, 60, 90] + values: [0.1, 0.01, 0.001, 0.0001] + regularizer: + name: 'L2' + coeff: 0.0001 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + - TopkAcc: + topk: [1, 5] + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/HRNet/HRNet_W64_C.yaml b/src/PaddleClas/ppcls/configs/ImageNet/HRNet/HRNet_W64_C.yaml new file mode 100644 index 0000000..e72b0b3 --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/HRNet/HRNet_W64_C.yaml @@ -0,0 +1,130 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 120 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + +# model architecture +Arch: + name: HRNet_W64_C + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Piecewise + learning_rate: 0.1 + decay_epochs: [30, 60, 90] + values: [0.1, 0.01, 0.001, 0.0001] + regularizer: + name: 'L2' + coeff: 0.0001 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + - TopkAcc: + topk: [1, 5] + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/HarDNet/HarDNet39_ds.yaml b/src/PaddleClas/ppcls/configs/ImageNet/HarDNet/HarDNet39_ds.yaml new file mode 100644 index 0000000..2aa8e68 --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/HarDNet/HarDNet39_ds.yaml @@ -0,0 +1,130 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 120 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + +# model architecture +Arch: + name: HarDNet39_ds + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Piecewise + learning_rate: 0.1 + decay_epochs: [30, 60, 90] + values: [0.1, 0.01, 0.001, 0.0001] + regularizer: + name: 'L2' + coeff: 0.0001 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + - TopkAcc: + topk: [1, 5] + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/HarDNet/HarDNet68.yaml b/src/PaddleClas/ppcls/configs/ImageNet/HarDNet/HarDNet68.yaml new file mode 100644 index 0000000..2f0ef12 --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/HarDNet/HarDNet68.yaml @@ -0,0 +1,130 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 120 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + +# model architecture +Arch: + name: HarDNet68 + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Piecewise + learning_rate: 0.1 + decay_epochs: [30, 60, 90] + values: [0.1, 0.01, 0.001, 0.0001] + regularizer: + name: 'L2' + coeff: 0.0001 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + - TopkAcc: + topk: [1, 5] + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/HarDNet/HarDNet68_ds.yaml b/src/PaddleClas/ppcls/configs/ImageNet/HarDNet/HarDNet68_ds.yaml new file mode 100644 index 0000000..cf8f2ed --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/HarDNet/HarDNet68_ds.yaml @@ -0,0 +1,130 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 120 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + +# model architecture +Arch: + name: HarDNet68_ds + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Piecewise + learning_rate: 0.1 + decay_epochs: [30, 60, 90] + values: [0.1, 0.01, 0.001, 0.0001] + regularizer: + name: 'L2' + coeff: 0.0001 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + - TopkAcc: + topk: [1, 5] + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/HarDNet/HarDNet85.yaml b/src/PaddleClas/ppcls/configs/ImageNet/HarDNet/HarDNet85.yaml new file mode 100644 index 0000000..8512859 --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/HarDNet/HarDNet85.yaml @@ -0,0 +1,130 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 120 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + +# model architecture +Arch: + name: HarDNet85 + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Piecewise + learning_rate: 0.1 + decay_epochs: [30, 60, 90] + values: [0.1, 0.01, 0.001, 0.0001] + regularizer: + name: 'L2' + coeff: 0.0001 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + - TopkAcc: + topk: [1, 5] + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/Inception/GoogLeNet.yaml b/src/PaddleClas/ppcls/configs/ImageNet/Inception/GoogLeNet.yaml new file mode 100644 index 0000000..5bc3c9e --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/Inception/GoogLeNet.yaml @@ -0,0 +1,129 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 200 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + +# model architecture +Arch: + name: GoogLeNet + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - GoogLeNetLoss: + weight: 1.0 + Eval: + - GoogLeNetLoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Cosine + learning_rate: 0.01 + warmup_epoch: 5 + regularizer: + name: 'L2' + coeff: 0.0001 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + - GoogLeNetTopkAcc: + topk: [1, 5] + Eval: + - GoogLeNetTopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/Inception/InceptionV3.yaml b/src/PaddleClas/ppcls/configs/ImageNet/Inception/InceptionV3.yaml new file mode 100644 index 0000000..3749ed8 --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/Inception/InceptionV3.yaml @@ -0,0 +1,130 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 200 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 299, 299] + save_inference_dir: ./inference + +# model architecture +Arch: + name: InceptionV3 + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + epsilon: 0.1 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Cosine + learning_rate: 0.045 + regularizer: + name: 'L2' + coeff: 0.0001 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 299 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + batch_transform_ops: + - MixupOperator: + alpha: 0.2 + + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 320 + - CropImage: + size: 299 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 320 + - CropImage: + size: 299 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/Inception/InceptionV4.yaml b/src/PaddleClas/ppcls/configs/ImageNet/Inception/InceptionV4.yaml new file mode 100644 index 0000000..7df00cc --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/Inception/InceptionV4.yaml @@ -0,0 +1,130 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 200 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 299, 299] + save_inference_dir: ./inference + +# model architecture +Arch: + name: InceptionV4 + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + epsilon: 0.1 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Cosine + learning_rate: 0.045 + regularizer: + name: 'L2' + coeff: 0.0001 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 299 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + batch_transform_ops: + - MixupOperator: + alpha: 0.2 + + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 320 + - CropImage: + size: 299 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 320 + - CropImage: + size: 299 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/LeViT/LeViT_128.yaml b/src/PaddleClas/ppcls/configs/ImageNet/LeViT/LeViT_128.yaml new file mode 100644 index 0000000..a1a4f73 --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/LeViT/LeViT_128.yaml @@ -0,0 +1,130 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 120 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + +# model architecture +Arch: + name: LeViT_128 + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Piecewise + learning_rate: 0.1 + decay_epochs: [30, 60, 90] + values: [0.1, 0.01, 0.001, 0.0001] + regularizer: + name: 'L2' + coeff: 0.0001 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + - TopkAcc: + topk: [1, 5] + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/LeViT/LeViT_128S.yaml b/src/PaddleClas/ppcls/configs/ImageNet/LeViT/LeViT_128S.yaml new file mode 100644 index 0000000..bfc6eb4 --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/LeViT/LeViT_128S.yaml @@ -0,0 +1,130 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 120 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + +# model architecture +Arch: + name: LeViT_128S + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Piecewise + learning_rate: 0.1 + decay_epochs: [30, 60, 90] + values: [0.1, 0.01, 0.001, 0.0001] + regularizer: + name: 'L2' + coeff: 0.0001 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + - TopkAcc: + topk: [1, 5] + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/LeViT/LeViT_192.yaml b/src/PaddleClas/ppcls/configs/ImageNet/LeViT/LeViT_192.yaml new file mode 100644 index 0000000..9596e86 --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/LeViT/LeViT_192.yaml @@ -0,0 +1,130 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 120 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + +# model architecture +Arch: + name: LeViT_192 + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Piecewise + learning_rate: 0.1 + decay_epochs: [30, 60, 90] + values: [0.1, 0.01, 0.001, 0.0001] + regularizer: + name: 'L2' + coeff: 0.0001 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + - TopkAcc: + topk: [1, 5] + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/LeViT/LeViT_256.yaml b/src/PaddleClas/ppcls/configs/ImageNet/LeViT/LeViT_256.yaml new file mode 100644 index 0000000..fb42700 --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/LeViT/LeViT_256.yaml @@ -0,0 +1,130 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 120 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + +# model architecture +Arch: + name: LeViT_256 + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Piecewise + learning_rate: 0.1 + decay_epochs: [30, 60, 90] + values: [0.1, 0.01, 0.001, 0.0001] + regularizer: + name: 'L2' + coeff: 0.0001 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + - TopkAcc: + topk: [1, 5] + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/LeViT/LeViT_384.yaml b/src/PaddleClas/ppcls/configs/ImageNet/LeViT/LeViT_384.yaml new file mode 100644 index 0000000..8347c4a --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/LeViT/LeViT_384.yaml @@ -0,0 +1,130 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 120 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + +# model architecture +Arch: + name: LeViT_384 + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Piecewise + learning_rate: 0.1 + decay_epochs: [30, 60, 90] + values: [0.1, 0.01, 0.001, 0.0001] + regularizer: + name: 'L2' + coeff: 0.0001 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + - TopkAcc: + topk: [1, 5] + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/MixNet/MixNet_L.yaml b/src/PaddleClas/ppcls/configs/ImageNet/MixNet/MixNet_L.yaml new file mode 100644 index 0000000..54bb18d --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/MixNet/MixNet_L.yaml @@ -0,0 +1,132 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 120 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + # training model under @to_static + to_static: False + +# model architecture +Arch: + name: MixNet_L + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Piecewise + learning_rate: 0.1 + decay_epochs: [30, 60, 90] + values: [0.1, 0.01, 0.001, 0.0001] + regularizer: + name: 'L2' + coeff: 0.0001 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + - TopkAcc: + topk: [1, 5] + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/MixNet/MixNet_M.yaml b/src/PaddleClas/ppcls/configs/ImageNet/MixNet/MixNet_M.yaml new file mode 100644 index 0000000..2c2a18d --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/MixNet/MixNet_M.yaml @@ -0,0 +1,132 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 120 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + # training model under @to_static + to_static: False + +# model architecture +Arch: + name: MixNet_M + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Piecewise + learning_rate: 0.1 + decay_epochs: [30, 60, 90] + values: [0.1, 0.01, 0.001, 0.0001] + regularizer: + name: 'L2' + coeff: 0.0001 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + - TopkAcc: + topk: [1, 5] + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/MixNet/MixNet_S.yaml b/src/PaddleClas/ppcls/configs/ImageNet/MixNet/MixNet_S.yaml new file mode 100644 index 0000000..e0f5c6a --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/MixNet/MixNet_S.yaml @@ -0,0 +1,132 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 120 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + # training model under @to_static + to_static: False + +# model architecture +Arch: + name: MixNet_S + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Piecewise + learning_rate: 0.1 + decay_epochs: [30, 60, 90] + values: [0.1, 0.01, 0.001, 0.0001] + regularizer: + name: 'L2' + coeff: 0.0001 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + - TopkAcc: + topk: [1, 5] + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/MobileNetV1/MobileNetV1.yaml b/src/PaddleClas/ppcls/configs/ImageNet/MobileNetV1/MobileNetV1.yaml new file mode 100644 index 0000000..281015d --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/MobileNetV1/MobileNetV1.yaml @@ -0,0 +1,132 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 120 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + # training model under @to_static + to_static: False + +# model architecture +Arch: + name: MobileNetV1 + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Piecewise + learning_rate: 0.1 + decay_epochs: [30, 60, 90] + values: [0.1, 0.01, 0.001, 0.0001] + regularizer: + name: 'L2' + coeff: 0.00003 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: True + loader: + num_workers: 8 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + - TopkAcc: + topk: [1, 5] + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/MobileNetV1/MobileNetV1_x0_25.yaml b/src/PaddleClas/ppcls/configs/ImageNet/MobileNetV1/MobileNetV1_x0_25.yaml new file mode 100644 index 0000000..86324cf --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/MobileNetV1/MobileNetV1_x0_25.yaml @@ -0,0 +1,130 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 120 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + +# model architecture +Arch: + name: MobileNetV1_x0_25 + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Piecewise + learning_rate: 0.1 + decay_epochs: [30, 60, 90] + values: [0.1, 0.01, 0.001, 0.0001] + regularizer: + name: 'L2' + coeff: 0.00003 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + - TopkAcc: + topk: [1, 5] + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/MobileNetV1/MobileNetV1_x0_5.yaml b/src/PaddleClas/ppcls/configs/ImageNet/MobileNetV1/MobileNetV1_x0_5.yaml new file mode 100644 index 0000000..1693e78 --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/MobileNetV1/MobileNetV1_x0_5.yaml @@ -0,0 +1,130 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 120 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + +# model architecture +Arch: + name: MobileNetV1_x0_5 + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Piecewise + learning_rate: 0.1 + decay_epochs: [30, 60, 90] + values: [0.1, 0.01, 0.001, 0.0001] + regularizer: + name: 'L2' + coeff: 0.00003 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + - TopkAcc: + topk: [1, 5] + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/MobileNetV1/MobileNetV1_x0_75.yaml b/src/PaddleClas/ppcls/configs/ImageNet/MobileNetV1/MobileNetV1_x0_75.yaml new file mode 100644 index 0000000..b8b0477 --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/MobileNetV1/MobileNetV1_x0_75.yaml @@ -0,0 +1,130 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 120 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + +# model architecture +Arch: + name: MobileNetV1_x0_75 + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Piecewise + learning_rate: 0.1 + decay_epochs: [30, 60, 90] + values: [0.1, 0.01, 0.001, 0.0001] + regularizer: + name: 'L2' + coeff: 0.00003 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + - TopkAcc: + topk: [1, 5] + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/MobileNetV2/MobileNetV2.yaml b/src/PaddleClas/ppcls/configs/ImageNet/MobileNetV2/MobileNetV2.yaml new file mode 100644 index 0000000..2fe1f5c --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/MobileNetV2/MobileNetV2.yaml @@ -0,0 +1,130 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 240 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + # training model under @to_static + to_static: False + +# model architecture +Arch: + name: MobileNetV2 + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Cosine + learning_rate: 0.045 + regularizer: + name: 'L2' + coeff: 0.00004 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: True + loader: + num_workers: 8 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + - TopkAcc: + topk: [1, 5] + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/MobileNetV2/MobileNetV2_x0_25.yaml b/src/PaddleClas/ppcls/configs/ImageNet/MobileNetV2/MobileNetV2_x0_25.yaml new file mode 100644 index 0000000..d9f30fd --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/MobileNetV2/MobileNetV2_x0_25.yaml @@ -0,0 +1,128 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 240 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + +# model architecture +Arch: + name: MobileNetV2_x0_25 + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Cosine + learning_rate: 0.045 + regularizer: + name: 'L2' + coeff: 0.00003 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + - TopkAcc: + topk: [1, 5] + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/MobileNetV2/MobileNetV2_x0_5.yaml b/src/PaddleClas/ppcls/configs/ImageNet/MobileNetV2/MobileNetV2_x0_5.yaml new file mode 100644 index 0000000..7abddd4 --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/MobileNetV2/MobileNetV2_x0_5.yaml @@ -0,0 +1,128 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 240 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + +# model architecture +Arch: + name: MobileNetV2_x0_5 + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Cosine + learning_rate: 0.045 + regularizer: + name: 'L2' + coeff: 0.00003 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + - TopkAcc: + topk: [1, 5] + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/MobileNetV2/MobileNetV2_x0_75.yaml b/src/PaddleClas/ppcls/configs/ImageNet/MobileNetV2/MobileNetV2_x0_75.yaml new file mode 100644 index 0000000..e620d70 --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/MobileNetV2/MobileNetV2_x0_75.yaml @@ -0,0 +1,128 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 240 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + +# model architecture +Arch: + name: MobileNetV2_x0_75 + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Cosine + learning_rate: 0.045 + regularizer: + name: 'L2' + coeff: 0.00004 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + - TopkAcc: + topk: [1, 5] + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/MobileNetV2/MobileNetV2_x1_5.yaml b/src/PaddleClas/ppcls/configs/ImageNet/MobileNetV2/MobileNetV2_x1_5.yaml new file mode 100644 index 0000000..f9d6abc --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/MobileNetV2/MobileNetV2_x1_5.yaml @@ -0,0 +1,128 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 240 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + +# model architecture +Arch: + name: MobileNetV2_x1_5 + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Cosine + learning_rate: 0.045 + regularizer: + name: 'L2' + coeff: 0.00004 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + - TopkAcc: + topk: [1, 5] + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/MobileNetV2/MobileNetV2_x2_0.yaml b/src/PaddleClas/ppcls/configs/ImageNet/MobileNetV2/MobileNetV2_x2_0.yaml new file mode 100644 index 0000000..fa5bf68 --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/MobileNetV2/MobileNetV2_x2_0.yaml @@ -0,0 +1,128 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 240 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + +# model architecture +Arch: + name: MobileNetV2_x2_0 + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Cosine + learning_rate: 0.045 + regularizer: + name: 'L2' + coeff: 0.00004 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + - TopkAcc: + topk: [1, 5] + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/MobileNetV3/MobileNetV3_large_x0_35.yaml b/src/PaddleClas/ppcls/configs/ImageNet/MobileNetV3/MobileNetV3_large_x0_35.yaml new file mode 100644 index 0000000..0c81ebc --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/MobileNetV3/MobileNetV3_large_x0_35.yaml @@ -0,0 +1,130 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 360 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + +# model architecture +Arch: + name: MobileNetV3_large_x0_35 + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + epsilon: 0.1 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Cosine + learning_rate: 1.3 + warmup_epoch: 5 + regularizer: + name: 'L2' + coeff: 0.00002 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + + sampler: + name: DistributedBatchSampler + batch_size: 512 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + - TopkAcc: + topk: [1, 5] + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/MobileNetV3/MobileNetV3_large_x0_5.yaml b/src/PaddleClas/ppcls/configs/ImageNet/MobileNetV3/MobileNetV3_large_x0_5.yaml new file mode 100644 index 0000000..76c7028 --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/MobileNetV3/MobileNetV3_large_x0_5.yaml @@ -0,0 +1,130 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 360 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + +# model architecture +Arch: + name: MobileNetV3_large_x0_5 + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + epsilon: 0.1 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Cosine + learning_rate: 1.3 + warmup_epoch: 5 + regularizer: + name: 'L2' + coeff: 0.00002 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + + sampler: + name: DistributedBatchSampler + batch_size: 512 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + - TopkAcc: + topk: [1, 5] + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/MobileNetV3/MobileNetV3_large_x0_75.yaml b/src/PaddleClas/ppcls/configs/ImageNet/MobileNetV3/MobileNetV3_large_x0_75.yaml new file mode 100644 index 0000000..a1e9126 --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/MobileNetV3/MobileNetV3_large_x0_75.yaml @@ -0,0 +1,130 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 360 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + +# model architecture +Arch: + name: MobileNetV3_large_x0_75 + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + epsilon: 0.1 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Cosine + learning_rate: 1.3 + warmup_epoch: 5 + regularizer: + name: 'L2' + coeff: 0.00002 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + + sampler: + name: DistributedBatchSampler + batch_size: 512 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + - TopkAcc: + topk: [1, 5] + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/MobileNetV3/MobileNetV3_large_x1_0.yaml b/src/PaddleClas/ppcls/configs/ImageNet/MobileNetV3/MobileNetV3_large_x1_0.yaml new file mode 100644 index 0000000..3e3ad70 --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/MobileNetV3/MobileNetV3_large_x1_0.yaml @@ -0,0 +1,131 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 360 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + +# model architecture +Arch: + name: MobileNetV3_large_x1_0 + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + epsilon: 0.1 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Cosine + learning_rate: 0.65 + warmup_epoch: 5 + regularizer: + name: 'L2' + coeff: 0.00002 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - AutoAugment: + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + + sampler: + name: DistributedBatchSampler + batch_size: 256 + drop_last: False + shuffle: True + loader: + num_workers: 8 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + - TopkAcc: + topk: [1, 5] + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/MobileNetV3/MobileNetV3_large_x1_25.yaml b/src/PaddleClas/ppcls/configs/ImageNet/MobileNetV3/MobileNetV3_large_x1_25.yaml new file mode 100644 index 0000000..097c41e --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/MobileNetV3/MobileNetV3_large_x1_25.yaml @@ -0,0 +1,130 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 360 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + +# model architecture +Arch: + name: MobileNetV3_large_x1_25 + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + epsilon: 0.1 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Cosine + learning_rate: 1.3 + warmup_epoch: 5 + regularizer: + name: 'L2' + coeff: 0.00004 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + + sampler: + name: DistributedBatchSampler + batch_size: 512 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + - TopkAcc: + topk: [1, 5] + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/MobileNetV3/MobileNetV3_small_x0_35.yaml b/src/PaddleClas/ppcls/configs/ImageNet/MobileNetV3/MobileNetV3_small_x0_35.yaml new file mode 100644 index 0000000..30ea2eb --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/MobileNetV3/MobileNetV3_small_x0_35.yaml @@ -0,0 +1,130 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 360 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + +# model architecture +Arch: + name: MobileNetV3_small_x0_35 + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + epsilon: 0.1 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Cosine + learning_rate: 1.3 + warmup_epoch: 5 + regularizer: + name: 'L2' + coeff: 0.00001 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + + sampler: + name: DistributedBatchSampler + batch_size: 512 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + - TopkAcc: + topk: [1, 5] + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/MobileNetV3/MobileNetV3_small_x0_5.yaml b/src/PaddleClas/ppcls/configs/ImageNet/MobileNetV3/MobileNetV3_small_x0_5.yaml new file mode 100644 index 0000000..3c13bbb --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/MobileNetV3/MobileNetV3_small_x0_5.yaml @@ -0,0 +1,130 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 360 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + +# model architecture +Arch: + name: MobileNetV3_small_x0_5 + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + epsilon: 0.1 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Cosine + learning_rate: 1.3 + warmup_epoch: 5 + regularizer: + name: 'L2' + coeff: 0.00001 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + + sampler: + name: DistributedBatchSampler + batch_size: 512 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + - TopkAcc: + topk: [1, 5] + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/MobileNetV3/MobileNetV3_small_x0_75.yaml b/src/PaddleClas/ppcls/configs/ImageNet/MobileNetV3/MobileNetV3_small_x0_75.yaml new file mode 100644 index 0000000..45608df --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/MobileNetV3/MobileNetV3_small_x0_75.yaml @@ -0,0 +1,130 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 360 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + +# model architecture +Arch: + name: MobileNetV3_small_x0_75 + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + epsilon: 0.1 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Cosine + learning_rate: 1.3 + warmup_epoch: 5 + regularizer: + name: 'L2' + coeff: 0.00002 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + + sampler: + name: DistributedBatchSampler + batch_size: 512 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + - TopkAcc: + topk: [1, 5] + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/MobileNetV3/MobileNetV3_small_x1_0.yaml b/src/PaddleClas/ppcls/configs/ImageNet/MobileNetV3/MobileNetV3_small_x1_0.yaml new file mode 100644 index 0000000..02a3949 --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/MobileNetV3/MobileNetV3_small_x1_0.yaml @@ -0,0 +1,130 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 360 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + +# model architecture +Arch: + name: MobileNetV3_small_x1_0 + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + epsilon: 0.1 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Cosine + learning_rate: 1.3 + warmup_epoch: 5 + regularizer: + name: 'L2' + coeff: 0.00002 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + + sampler: + name: DistributedBatchSampler + batch_size: 512 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + - TopkAcc: + topk: [1, 5] + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/MobileNetV3/MobileNetV3_small_x1_25.yaml b/src/PaddleClas/ppcls/configs/ImageNet/MobileNetV3/MobileNetV3_small_x1_25.yaml new file mode 100644 index 0000000..eeae690 --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/MobileNetV3/MobileNetV3_small_x1_25.yaml @@ -0,0 +1,130 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 360 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + +# model architecture +Arch: + name: MobileNetV3_small_x1_25 + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + epsilon: 0.1 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Cosine + learning_rate: 1.3 + warmup_epoch: 5 + regularizer: + name: 'L2' + coeff: 0.00002 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + + sampler: + name: DistributedBatchSampler + batch_size: 512 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + - TopkAcc: + topk: [1, 5] + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/PPLCNet/PPLCNet_x0_25.yaml b/src/PaddleClas/ppcls/configs/ImageNet/PPLCNet/PPLCNet_x0_25.yaml new file mode 100644 index 0000000..8b0924c --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/PPLCNet/PPLCNet_x0_25.yaml @@ -0,0 +1,129 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 360 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference +# model architecture +Arch: + name: PPLCNet_x0_25 + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + epsilon: 0.1 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Cosine + learning_rate: 0.8 + warmup_epoch: 5 + regularizer: + name: 'L2' + coeff: 0.00003 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + + sampler: + name: DistributedBatchSampler + batch_size: 512 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + - TopkAcc: + topk: [1, 5] + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/PPLCNet/PPLCNet_x0_35.yaml b/src/PaddleClas/ppcls/configs/ImageNet/PPLCNet/PPLCNet_x0_35.yaml new file mode 100644 index 0000000..ed2501e --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/PPLCNet/PPLCNet_x0_35.yaml @@ -0,0 +1,129 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 360 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference +# model architecture +Arch: + name: PPLCNet_x0_35 + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + epsilon: 0.1 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Cosine + learning_rate: 0.8 + warmup_epoch: 5 + regularizer: + name: 'L2' + coeff: 0.00003 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + + sampler: + name: DistributedBatchSampler + batch_size: 512 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + - TopkAcc: + topk: [1, 5] + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/PPLCNet/PPLCNet_x0_5.yaml b/src/PaddleClas/ppcls/configs/ImageNet/PPLCNet/PPLCNet_x0_5.yaml new file mode 100644 index 0000000..0f01d58 --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/PPLCNet/PPLCNet_x0_5.yaml @@ -0,0 +1,129 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 360 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference +# model architecture +Arch: + name: PPLCNet_x0_5 + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + epsilon: 0.1 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Cosine + learning_rate: 0.8 + warmup_epoch: 5 + regularizer: + name: 'L2' + coeff: 0.00003 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + + sampler: + name: DistributedBatchSampler + batch_size: 512 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + - TopkAcc: + topk: [1, 5] + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/PPLCNet/PPLCNet_x0_75.yaml b/src/PaddleClas/ppcls/configs/ImageNet/PPLCNet/PPLCNet_x0_75.yaml new file mode 100644 index 0000000..7857882 --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/PPLCNet/PPLCNet_x0_75.yaml @@ -0,0 +1,129 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 360 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference +# model architecture +Arch: + name: PPLCNet_x0_75 + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + epsilon: 0.1 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Cosine + learning_rate: 0.8 + warmup_epoch: 5 + regularizer: + name: 'L2' + coeff: 0.00003 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + + sampler: + name: DistributedBatchSampler + batch_size: 512 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + - TopkAcc: + topk: [1, 5] + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/PPLCNet/PPLCNet_x1_0.yaml b/src/PaddleClas/ppcls/configs/ImageNet/PPLCNet/PPLCNet_x1_0.yaml new file mode 100644 index 0000000..f55a044 --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/PPLCNet/PPLCNet_x1_0.yaml @@ -0,0 +1,129 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 360 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference +# model architecture +Arch: + name: PPLCNet_x1_0 + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + epsilon: 0.1 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Cosine + learning_rate: 0.8 + warmup_epoch: 5 + regularizer: + name: 'L2' + coeff: 0.00003 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + + sampler: + name: DistributedBatchSampler + batch_size: 512 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + - TopkAcc: + topk: [1, 5] + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/PPLCNet/PPLCNet_x1_5.yaml b/src/PaddleClas/ppcls/configs/ImageNet/PPLCNet/PPLCNet_x1_5.yaml new file mode 100644 index 0000000..d654d42 --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/PPLCNet/PPLCNet_x1_5.yaml @@ -0,0 +1,129 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 360 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference +# model architecture +Arch: + name: PPLCNet_x1_5 + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + epsilon: 0.1 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Cosine + learning_rate: 0.8 + warmup_epoch: 5 + regularizer: + name: 'L2' + coeff: 0.00004 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + + sampler: + name: DistributedBatchSampler + batch_size: 512 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + - TopkAcc: + topk: [1, 5] + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/PPLCNet/PPLCNet_x2_0.yaml b/src/PaddleClas/ppcls/configs/ImageNet/PPLCNet/PPLCNet_x2_0.yaml new file mode 100644 index 0000000..50b19aa --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/PPLCNet/PPLCNet_x2_0.yaml @@ -0,0 +1,128 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 360 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference +# model architecture +Arch: + name: PPLCNet_x2_0 + class_num: 1000 +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + epsilon: 0.1 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Cosine + learning_rate: 0.8 + warmup_epoch: 5 + regularizer: + name: 'L2' + coeff: 0.00004 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + + sampler: + name: DistributedBatchSampler + batch_size: 512 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + - TopkAcc: + topk: [1, 5] + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/PPLCNet/PPLCNet_x2_5.yaml b/src/PaddleClas/ppcls/configs/ImageNet/PPLCNet/PPLCNet_x2_5.yaml new file mode 100644 index 0000000..4f677e5 --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/PPLCNet/PPLCNet_x2_5.yaml @@ -0,0 +1,130 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 360 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference +# model architecture +Arch: + name: PPLCNet_x2_5 + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + epsilon: 0.1 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Cosine + learning_rate: 0.8 + warmup_epoch: 5 + regularizer: + name: 'L2' + coeff: 0.00004 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - AutoAugment: + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + + sampler: + name: DistributedBatchSampler + batch_size: 512 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + - TopkAcc: + topk: [1, 5] + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/PVTV2/PVT_V2_B0.yaml b/src/PaddleClas/ppcls/configs/ImageNet/PVTV2/PVT_V2_B0.yaml new file mode 100644 index 0000000..6c0854c --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/PVTV2/PVT_V2_B0.yaml @@ -0,0 +1,161 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 300 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + # training model under @to_static + to_static: False + +# model architecture +Arch: + name: PVT_V2_B0 + class_num: 1000 + drop_path_rate: 0.1 + drop_rate: 0.0 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + epsilon: 0.1 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: AdamW + beta1: 0.9 + beta2: 0.999 + epsilon: 1e-8 + weight_decay: 0.05 + no_weight_decay_name: pos_embed1 pos_embed2 pos_embed3 pos_embed4 cls_token + one_dim_param_no_weight_decay: True + lr: + name: Cosine + learning_rate: 5e-4 + eta_min: 5e-6 + warmup_epoch: 20 + warmup_start_lr: 5e-7 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + interpolation: bicubic + backend: pil + - RandFlipImage: + flip_code: 1 + - TimmAutoAugment: + config_str: rand-m9-mstd0.5-inc1 + interpolation: bicubic + img_size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - RandomErasing: + EPSILON: 0.25 + sl: 0.02 + sh: 1.0/3.0 + r1: 0.3 + attempt: 10 + use_log_aspect: True + mode: pixel + batch_transform_ops: + - OpSampler: + MixupOperator: + alpha: 0.8 + prob: 0.5 + CutmixOperator: + alpha: 1.0 + prob: 0.5 + + sampler: + name: DistributedBatchSampler + batch_size: 128 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + interpolation: bicubic + backend: pil + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 128 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + interpolation: bicubic + backend: pil + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/PVTV2/PVT_V2_B1.yaml b/src/PaddleClas/ppcls/configs/ImageNet/PVTV2/PVT_V2_B1.yaml new file mode 100644 index 0000000..42134c7 --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/PVTV2/PVT_V2_B1.yaml @@ -0,0 +1,161 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 300 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + # training model under @to_static + to_static: False + +# model architecture +Arch: + name: PVT_V2_B1 + class_num: 1000 + drop_path_rate: 0.1 + drop_rate: 0.0 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + epsilon: 0.1 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: AdamW + beta1: 0.9 + beta2: 0.999 + epsilon: 1e-8 + weight_decay: 0.05 + no_weight_decay_name: pos_embed1 pos_embed2 pos_embed3 pos_embed4 cls_token + one_dim_param_no_weight_decay: True + lr: + name: Cosine + learning_rate: 5e-4 + eta_min: 5e-6 + warmup_epoch: 20 + warmup_start_lr: 5e-7 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + interpolation: bicubic + backend: pil + - RandFlipImage: + flip_code: 1 + - TimmAutoAugment: + config_str: rand-m9-mstd0.5-inc1 + interpolation: bicubic + img_size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - RandomErasing: + EPSILON: 0.25 + sl: 0.02 + sh: 1.0/3.0 + r1: 0.3 + attempt: 10 + use_log_aspect: True + mode: pixel + batch_transform_ops: + - OpSampler: + MixupOperator: + alpha: 0.8 + prob: 0.5 + CutmixOperator: + alpha: 1.0 + prob: 0.5 + + sampler: + name: DistributedBatchSampler + batch_size: 128 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + interpolation: bicubic + backend: pil + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 128 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + interpolation: bicubic + backend: pil + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/PVTV2/PVT_V2_B2.yaml b/src/PaddleClas/ppcls/configs/ImageNet/PVTV2/PVT_V2_B2.yaml new file mode 100644 index 0000000..4d0d5a4 --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/PVTV2/PVT_V2_B2.yaml @@ -0,0 +1,161 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 300 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + # training model under @to_static + to_static: False + +# model architecture +Arch: + name: PVT_V2_B2 + class_num: 1000 + drop_path_rate: 0.1 + drop_rate: 0.0 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + epsilon: 0.1 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: AdamW + beta1: 0.9 + beta2: 0.999 + epsilon: 1e-8 + weight_decay: 0.05 + no_weight_decay_name: pos_embed1 pos_embed2 pos_embed3 pos_embed4 cls_token + one_dim_param_no_weight_decay: True + lr: + name: Cosine + learning_rate: 5e-4 + eta_min: 5e-6 + warmup_epoch: 20 + warmup_start_lr: 5e-7 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + interpolation: bicubic + backend: pil + - RandFlipImage: + flip_code: 1 + - TimmAutoAugment: + config_str: rand-m9-mstd0.5-inc1 + interpolation: bicubic + img_size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - RandomErasing: + EPSILON: 0.25 + sl: 0.02 + sh: 1.0/3.0 + r1: 0.3 + attempt: 10 + use_log_aspect: True + mode: pixel + batch_transform_ops: + - OpSampler: + MixupOperator: + alpha: 0.8 + prob: 0.5 + CutmixOperator: + alpha: 1.0 + prob: 0.5 + + sampler: + name: DistributedBatchSampler + batch_size: 128 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + interpolation: bicubic + backend: pil + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 128 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + interpolation: bicubic + backend: pil + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/PVTV2/PVT_V2_B2_Linear.yaml b/src/PaddleClas/ppcls/configs/ImageNet/PVTV2/PVT_V2_B2_Linear.yaml new file mode 100644 index 0000000..a5feb26 --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/PVTV2/PVT_V2_B2_Linear.yaml @@ -0,0 +1,161 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 300 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + # training model under @to_static + to_static: False + +# model architecture +Arch: + name: PVT_V2_B2_Linear + class_num: 1000 + drop_path_rate: 0.1 + drop_rate: 0.0 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + epsilon: 0.1 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: AdamW + beta1: 0.9 + beta2: 0.999 + epsilon: 1e-8 + weight_decay: 0.05 + no_weight_decay_name: pos_embed1 pos_embed2 pos_embed3 pos_embed4 cls_token + one_dim_param_no_weight_decay: True + lr: + name: Cosine + learning_rate: 5e-4 + eta_min: 5e-6 + warmup_epoch: 20 + warmup_start_lr: 5e-7 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + interpolation: bicubic + backend: pil + - RandFlipImage: + flip_code: 1 + - TimmAutoAugment: + config_str: rand-m9-mstd0.5-inc1 + interpolation: bicubic + img_size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - RandomErasing: + EPSILON: 0.25 + sl: 0.02 + sh: 1.0/3.0 + r1: 0.3 + attempt: 10 + use_log_aspect: True + mode: pixel + batch_transform_ops: + - OpSampler: + MixupOperator: + alpha: 0.8 + prob: 0.5 + CutmixOperator: + alpha: 1.0 + prob: 0.5 + + sampler: + name: DistributedBatchSampler + batch_size: 128 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + interpolation: bicubic + backend: pil + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 128 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + interpolation: bicubic + backend: pil + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/PVTV2/PVT_V2_B3.yaml b/src/PaddleClas/ppcls/configs/ImageNet/PVTV2/PVT_V2_B3.yaml new file mode 100644 index 0000000..be300ac --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/PVTV2/PVT_V2_B3.yaml @@ -0,0 +1,162 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 300 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + # training model under @to_static + to_static: False + +# model architecture +Arch: + name: PVT_V2_B3 + class_num: 1000 + drop_path_rate: 0.3 + drop_rate: 0.0 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + epsilon: 0.1 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: AdamW + beta1: 0.9 + beta2: 0.999 + epsilon: 1e-8 + weight_decay: 0.05 + clip_grad: 1.0 + no_weight_decay_name: pos_embed1 pos_embed2 pos_embed3 pos_embed4 cls_token + one_dim_param_no_weight_decay: True + lr: + name: Cosine + learning_rate: 5e-4 + eta_min: 5e-6 + warmup_epoch: 20 + warmup_start_lr: 5e-7 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + interpolation: bicubic + backend: pil + - RandFlipImage: + flip_code: 1 + - TimmAutoAugment: + config_str: rand-m9-mstd0.5-inc1 + interpolation: bicubic + img_size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - RandomErasing: + EPSILON: 0.25 + sl: 0.02 + sh: 1.0/3.0 + r1: 0.3 + attempt: 10 + use_log_aspect: True + mode: pixel + batch_transform_ops: + - OpSampler: + MixupOperator: + alpha: 0.8 + prob: 0.5 + CutmixOperator: + alpha: 1.0 + prob: 0.5 + + sampler: + name: DistributedBatchSampler + batch_size: 128 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + interpolation: bicubic + backend: pil + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 128 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + interpolation: bicubic + backend: pil + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/PVTV2/PVT_V2_B4.yaml b/src/PaddleClas/ppcls/configs/ImageNet/PVTV2/PVT_V2_B4.yaml new file mode 100644 index 0000000..b6a8953 --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/PVTV2/PVT_V2_B4.yaml @@ -0,0 +1,162 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 300 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + # training model under @to_static + to_static: False + +# model architecture +Arch: + name: PVT_V2_B4 + class_num: 1000 + drop_path_rate: 0.3 + drop_rate: 0.0 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + epsilon: 0.1 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: AdamW + beta1: 0.9 + beta2: 0.999 + epsilon: 1e-8 + weight_decay: 0.05 + clip_grad: 1.0 + no_weight_decay_name: pos_embed1 pos_embed2 pos_embed3 pos_embed4 cls_token + one_dim_param_no_weight_decay: True + lr: + name: Cosine + learning_rate: 5e-4 + eta_min: 5e-6 + warmup_epoch: 20 + warmup_start_lr: 5e-7 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + interpolation: bicubic + backend: pil + - RandFlipImage: + flip_code: 1 + - TimmAutoAugment: + config_str: rand-m9-mstd0.5-inc1 + interpolation: bicubic + img_size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - RandomErasing: + EPSILON: 0.25 + sl: 0.02 + sh: 1.0/3.0 + r1: 0.3 + attempt: 10 + use_log_aspect: True + mode: pixel + batch_transform_ops: + - OpSampler: + MixupOperator: + alpha: 0.8 + prob: 0.5 + CutmixOperator: + alpha: 1.0 + prob: 0.5 + + sampler: + name: DistributedBatchSampler + batch_size: 128 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + interpolation: bicubic + backend: pil + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 128 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + interpolation: bicubic + backend: pil + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/PVTV2/PVT_V2_B5.yaml b/src/PaddleClas/ppcls/configs/ImageNet/PVTV2/PVT_V2_B5.yaml new file mode 100644 index 0000000..9d36b28 --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/PVTV2/PVT_V2_B5.yaml @@ -0,0 +1,162 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 300 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + # training model under @to_static + to_static: False + +# model architecture +Arch: + name: PVT_V2_B5 + class_num: 1000 + drop_path_rate: 0.3 + drop_rate: 0.0 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + epsilon: 0.1 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: AdamW + beta1: 0.9 + beta2: 0.999 + epsilon: 1e-8 + weight_decay: 0.05 + clip_grad: 1.0 + no_weight_decay_name: pos_embed1 pos_embed2 pos_embed3 pos_embed4 cls_token + one_dim_param_no_weight_decay: True + lr: + name: Cosine + learning_rate: 5e-4 + eta_min: 5e-6 + warmup_epoch: 20 + warmup_start_lr: 5e-7 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + interpolation: bicubic + backend: pil + - RandFlipImage: + flip_code: 1 + - TimmAutoAugment: + config_str: rand-m9-mstd0.5-inc1 + interpolation: bicubic + img_size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - RandomErasing: + EPSILON: 0.25 + sl: 0.02 + sh: 1.0/3.0 + r1: 0.3 + attempt: 10 + use_log_aspect: True + mode: pixel + batch_transform_ops: + - OpSampler: + MixupOperator: + alpha: 0.8 + prob: 0.5 + CutmixOperator: + alpha: 1.0 + prob: 0.5 + + sampler: + name: DistributedBatchSampler + batch_size: 128 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + interpolation: bicubic + backend: pil + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 128 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + interpolation: bicubic + backend: pil + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/ReXNet/ReXNet_1_0.yaml b/src/PaddleClas/ppcls/configs/ImageNet/ReXNet/ReXNet_1_0.yaml new file mode 100644 index 0000000..709d72f --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/ReXNet/ReXNet_1_0.yaml @@ -0,0 +1,132 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 120 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + # training model under @to_static + to_static: False + +# model architecture +Arch: + name: ReXNet_1_0 + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Piecewise + learning_rate: 0.1 + decay_epochs: [30, 60, 90] + values: [0.1, 0.01, 0.001, 0.0001] + regularizer: + name: 'L2' + coeff: 0.0001 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + - TopkAcc: + topk: [1, 5] + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/ReXNet/ReXNet_1_3.yaml b/src/PaddleClas/ppcls/configs/ImageNet/ReXNet/ReXNet_1_3.yaml new file mode 100644 index 0000000..18607c6 --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/ReXNet/ReXNet_1_3.yaml @@ -0,0 +1,132 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 120 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + # training model under @to_static + to_static: False + +# model architecture +Arch: + name: ReXNet_1_3 + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Piecewise + learning_rate: 0.1 + decay_epochs: [30, 60, 90] + values: [0.1, 0.01, 0.001, 0.0001] + regularizer: + name: 'L2' + coeff: 0.0001 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + - TopkAcc: + topk: [1, 5] + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/ReXNet/ReXNet_1_5.yaml b/src/PaddleClas/ppcls/configs/ImageNet/ReXNet/ReXNet_1_5.yaml new file mode 100644 index 0000000..99dca8b --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/ReXNet/ReXNet_1_5.yaml @@ -0,0 +1,132 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 120 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + # training model under @to_static + to_static: False + +# model architecture +Arch: + name: ReXNet_1_5 + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Piecewise + learning_rate: 0.1 + decay_epochs: [30, 60, 90] + values: [0.1, 0.01, 0.001, 0.0001] + regularizer: + name: 'L2' + coeff: 0.0001 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + - TopkAcc: + topk: [1, 5] + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/ReXNet/ReXNet_2_0.yaml b/src/PaddleClas/ppcls/configs/ImageNet/ReXNet/ReXNet_2_0.yaml new file mode 100644 index 0000000..285b8df --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/ReXNet/ReXNet_2_0.yaml @@ -0,0 +1,132 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 120 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + # training model under @to_static + to_static: False + +# model architecture +Arch: + name: ReXNet_2_0 + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Piecewise + learning_rate: 0.1 + decay_epochs: [30, 60, 90] + values: [0.1, 0.01, 0.001, 0.0001] + regularizer: + name: 'L2' + coeff: 0.0001 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + - TopkAcc: + topk: [1, 5] + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/ReXNet/ReXNet_3_0.yaml b/src/PaddleClas/ppcls/configs/ImageNet/ReXNet/ReXNet_3_0.yaml new file mode 100644 index 0000000..a44294e --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/ReXNet/ReXNet_3_0.yaml @@ -0,0 +1,132 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 120 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + # training model under @to_static + to_static: False + +# model architecture +Arch: + name: ReXNet_3_0 + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Piecewise + learning_rate: 0.1 + decay_epochs: [30, 60, 90] + values: [0.1, 0.01, 0.001, 0.0001] + regularizer: + name: 'L2' + coeff: 0.0001 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + - TopkAcc: + topk: [1, 5] + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/RedNet/RedNet101.yaml b/src/PaddleClas/ppcls/configs/ImageNet/RedNet/RedNet101.yaml new file mode 100644 index 0000000..95ea518 --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/RedNet/RedNet101.yaml @@ -0,0 +1,130 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 120 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + +# model architecture +Arch: + name: RedNet101 + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Piecewise + learning_rate: 0.1 + decay_epochs: [30, 60, 90] + values: [0.1, 0.01, 0.001, 0.0001] + regularizer: + name: 'L2' + coeff: 0.0001 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0 + mean: [123.675, 116.28, 103.53] + std: [58.395, 57.12, 57.375] + order: '' + + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0 + mean: [123.675, 116.28, 103.53] + std: [58.395, 57.12, 57.375] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0 + mean: [123.675, 116.28, 103.53] + std: [58.395, 57.12, 57.375] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + - TopkAcc: + topk: [1, 5] + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/RedNet/RedNet152.yaml b/src/PaddleClas/ppcls/configs/ImageNet/RedNet/RedNet152.yaml new file mode 100644 index 0000000..7d5cc03 --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/RedNet/RedNet152.yaml @@ -0,0 +1,130 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 120 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + +# model architecture +Arch: + name: RedNet152 + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Piecewise + learning_rate: 0.1 + decay_epochs: [30, 60, 90] + values: [0.1, 0.01, 0.001, 0.0001] + regularizer: + name: 'L2' + coeff: 0.0001 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0 + mean: [123.675, 116.28, 103.53] + std: [58.395, 57.12, 57.375] + order: '' + + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0 + mean: [123.675, 116.28, 103.53] + std: [58.395, 57.12, 57.375] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0 + mean: [123.675, 116.28, 103.53] + std: [58.395, 57.12, 57.375] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + - TopkAcc: + topk: [1, 5] + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/RedNet/RedNet26.yaml b/src/PaddleClas/ppcls/configs/ImageNet/RedNet/RedNet26.yaml new file mode 100644 index 0000000..089db6f --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/RedNet/RedNet26.yaml @@ -0,0 +1,130 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 120 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + +# model architecture +Arch: + name: RedNet26 + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Piecewise + learning_rate: 0.1 + decay_epochs: [30, 60, 90] + values: [0.1, 0.01, 0.001, 0.0001] + regularizer: + name: 'L2' + coeff: 0.0001 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0 + mean: [123.675, 116.28, 103.53] + std: [58.395, 57.12, 57.375] + order: '' + + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0 + mean: [123.675, 116.28, 103.53] + std: [58.395, 57.12, 57.375] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0 + mean: [123.675, 116.28, 103.53] + std: [58.395, 57.12, 57.375] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + - TopkAcc: + topk: [1, 5] + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/RedNet/RedNet38.yaml b/src/PaddleClas/ppcls/configs/ImageNet/RedNet/RedNet38.yaml new file mode 100644 index 0000000..c2fb863 --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/RedNet/RedNet38.yaml @@ -0,0 +1,130 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 120 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + +# model architecture +Arch: + name: RedNet38 + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Piecewise + learning_rate: 0.1 + decay_epochs: [30, 60, 90] + values: [0.1, 0.01, 0.001, 0.0001] + regularizer: + name: 'L2' + coeff: 0.0001 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0 + mean: [123.675, 116.28, 103.53] + std: [58.395, 57.12, 57.375] + order: '' + + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0 + mean: [123.675, 116.28, 103.53] + std: [58.395, 57.12, 57.375] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0 + mean: [123.675, 116.28, 103.53] + std: [58.395, 57.12, 57.375] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + - TopkAcc: + topk: [1, 5] + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/RedNet/RedNet50.yaml b/src/PaddleClas/ppcls/configs/ImageNet/RedNet/RedNet50.yaml new file mode 100644 index 0000000..02e045a --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/RedNet/RedNet50.yaml @@ -0,0 +1,130 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 120 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + +# model architecture +Arch: + name: RedNet50 + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Piecewise + learning_rate: 0.1 + decay_epochs: [30, 60, 90] + values: [0.1, 0.01, 0.001, 0.0001] + regularizer: + name: 'L2' + coeff: 0.0001 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0 + mean: [123.675, 116.28, 103.53] + std: [58.395, 57.12, 57.375] + order: '' + + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0 + mean: [123.675, 116.28, 103.53] + std: [58.395, 57.12, 57.375] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0 + mean: [123.675, 116.28, 103.53] + std: [58.395, 57.12, 57.375] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + - TopkAcc: + topk: [1, 5] + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/Res2Net/Res2Net101_vd_26w_4s.yaml b/src/PaddleClas/ppcls/configs/ImageNet/Res2Net/Res2Net101_vd_26w_4s.yaml new file mode 100644 index 0000000..ed16b03 --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/Res2Net/Res2Net101_vd_26w_4s.yaml @@ -0,0 +1,130 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 200 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + +# model architecture +Arch: + name: Res2Net101_vd_26w_4s + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + epsilon: 0.1 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Cosine + learning_rate: 0.1 + regularizer: + name: 'L2' + coeff: 0.0001 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + batch_transform_ops: + - MixupOperator: + alpha: 0.2 + + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/Res2Net/Res2Net200_vd_26w_4s.yaml b/src/PaddleClas/ppcls/configs/ImageNet/Res2Net/Res2Net200_vd_26w_4s.yaml new file mode 100644 index 0000000..af1f438 --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/Res2Net/Res2Net200_vd_26w_4s.yaml @@ -0,0 +1,130 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 200 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + +# model architecture +Arch: + name: Res2Net200_vd_26w_4s + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + epsilon: 0.1 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Cosine + learning_rate: 0.1 + regularizer: + name: 'L2' + coeff: 0.0001 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + batch_transform_ops: + - MixupOperator: + alpha: 0.2 + + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/Res2Net/Res2Net50_14w_8s.yaml b/src/PaddleClas/ppcls/configs/ImageNet/Res2Net/Res2Net50_14w_8s.yaml new file mode 100644 index 0000000..7824052 --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/Res2Net/Res2Net50_14w_8s.yaml @@ -0,0 +1,130 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 200 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + +# model architecture +Arch: + name: Res2Net50_14w_8s + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + epsilon: 0.1 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Cosine + learning_rate: 0.1 + regularizer: + name: 'L2' + coeff: 0.0001 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + batch_transform_ops: + - MixupOperator: + alpha: 0.2 + + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/Res2Net/Res2Net50_26w_4s.yaml b/src/PaddleClas/ppcls/configs/ImageNet/Res2Net/Res2Net50_26w_4s.yaml new file mode 100644 index 0000000..60767ba --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/Res2Net/Res2Net50_26w_4s.yaml @@ -0,0 +1,130 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 200 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + +# model architecture +Arch: + name: Res2Net50_26w_4s + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + epsilon: 0.1 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Cosine + learning_rate: 0.1 + regularizer: + name: 'L2' + coeff: 0.0001 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + batch_transform_ops: + - MixupOperator: + alpha: 0.2 + + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/Res2Net/Res2Net50_vd_26w_4s.yaml b/src/PaddleClas/ppcls/configs/ImageNet/Res2Net/Res2Net50_vd_26w_4s.yaml new file mode 100644 index 0000000..977c144 --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/Res2Net/Res2Net50_vd_26w_4s.yaml @@ -0,0 +1,130 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 200 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + +# model architecture +Arch: + name: Res2Net50_vd_26w_4s + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + epsilon: 0.1 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Cosine + learning_rate: 0.1 + regularizer: + name: 'L2' + coeff: 0.0001 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + batch_transform_ops: + - MixupOperator: + alpha: 0.2 + + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/ResNeSt/ResNeSt101.yaml b/src/PaddleClas/ppcls/configs/ImageNet/ResNeSt/ResNeSt101.yaml new file mode 100644 index 0000000..d99e885 --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/ResNeSt/ResNeSt101.yaml @@ -0,0 +1,131 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 300 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 256, 256] + save_inference_dir: ./inference + +# model architecture +Arch: + name: ResNeSt101 + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + epsilon: 0.1 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Cosine + learning_rate: 0.1 + regularizer: + name: 'L2' + coeff: 0.00007 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 256 + - RandFlipImage: + flip_code: 1 + - AutoAugment: + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + batch_transform_ops: + - MixupOperator: + alpha: 0.2 + + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 288 + - CropImage: + size: 256 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 288 + - CropImage: + size: 256 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/ResNeSt/ResNeSt50.yaml b/src/PaddleClas/ppcls/configs/ImageNet/ResNeSt/ResNeSt50.yaml new file mode 100644 index 0000000..d822c8b --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/ResNeSt/ResNeSt50.yaml @@ -0,0 +1,131 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 300 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + +# model architecture +Arch: + name: ResNeSt50 + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + epsilon: 0.1 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Cosine + learning_rate: 0.1 + regularizer: + name: 'L2' + coeff: 0.00007 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - AutoAugment: + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + batch_transform_ops: + - MixupOperator: + alpha: 0.2 + + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/ResNeSt/ResNeSt50_fast_1s1x64d.yaml b/src/PaddleClas/ppcls/configs/ImageNet/ResNeSt/ResNeSt50_fast_1s1x64d.yaml new file mode 100644 index 0000000..eb973af --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/ResNeSt/ResNeSt50_fast_1s1x64d.yaml @@ -0,0 +1,131 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 300 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + +# model architecture +Arch: + name: ResNeSt50_fast_1s1x64d + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + epsilon: 0.1 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Cosine + learning_rate: 0.1 + regularizer: + name: 'L2' + coeff: 0.00007 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - AutoAugment: + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + batch_transform_ops: + - MixupOperator: + alpha: 0.2 + + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/ResNeXt/ResNeXt101_32x4d.yaml b/src/PaddleClas/ppcls/configs/ImageNet/ResNeXt/ResNeXt101_32x4d.yaml new file mode 100644 index 0000000..e0d0a5b --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/ResNeXt/ResNeXt101_32x4d.yaml @@ -0,0 +1,130 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 120 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + +# model architecture +Arch: + name: ResNeXt101_32x4d + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Piecewise + learning_rate: 0.1 + decay_epochs: [30, 60, 90] + values: [0.1, 0.01, 0.001, 0.0001] + regularizer: + name: 'L2' + coeff: 0.0001 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + - TopkAcc: + topk: [1, 5] + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/ResNeXt/ResNeXt101_64x4d.yaml b/src/PaddleClas/ppcls/configs/ImageNet/ResNeXt/ResNeXt101_64x4d.yaml new file mode 100644 index 0000000..d68f5f7 --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/ResNeXt/ResNeXt101_64x4d.yaml @@ -0,0 +1,130 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 120 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + +# model architecture +Arch: + name: ResNeXt101_64x4d + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Piecewise + learning_rate: 0.1 + decay_epochs: [30, 60, 90] + values: [0.1, 0.01, 0.001, 0.0001] + regularizer: + name: 'L2' + coeff: 0.00015 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + - TopkAcc: + topk: [1, 5] + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/ResNeXt/ResNeXt101_vd_32x4d.yaml b/src/PaddleClas/ppcls/configs/ImageNet/ResNeXt/ResNeXt101_vd_32x4d.yaml new file mode 100644 index 0000000..eadd9ee --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/ResNeXt/ResNeXt101_vd_32x4d.yaml @@ -0,0 +1,130 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 200 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + +# model architecture +Arch: + name: ResNeXt101_vd_32x4d + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + epsilon: 0.1 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Cosine + learning_rate: 0.1 + regularizer: + name: 'L2' + coeff: 0.0001 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + batch_transform_ops: + - MixupOperator: + alpha: 0.2 + + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/ResNeXt/ResNeXt101_vd_64x4d.yaml b/src/PaddleClas/ppcls/configs/ImageNet/ResNeXt/ResNeXt101_vd_64x4d.yaml new file mode 100644 index 0000000..5c59e5a --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/ResNeXt/ResNeXt101_vd_64x4d.yaml @@ -0,0 +1,130 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 200 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + +# model architecture +Arch: + name: ResNeXt101_vd_64x4d + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + epsilon: 0.1 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Cosine + learning_rate: 0.1 + regularizer: + name: 'L2' + coeff: 0.0001 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + batch_transform_ops: + - MixupOperator: + alpha: 0.2 + + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/ResNeXt/ResNeXt152_32x4d.yaml b/src/PaddleClas/ppcls/configs/ImageNet/ResNeXt/ResNeXt152_32x4d.yaml new file mode 100644 index 0000000..8bad3f6 --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/ResNeXt/ResNeXt152_32x4d.yaml @@ -0,0 +1,130 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 120 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + +# model architecture +Arch: + name: ResNeXt152_32x4d + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Piecewise + learning_rate: 0.1 + decay_epochs: [30, 60, 90] + values: [0.1, 0.01, 0.001, 0.0001] + regularizer: + name: 'L2' + coeff: 0.0001 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + - TopkAcc: + topk: [1, 5] + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/ResNeXt/ResNeXt152_64x4d.yaml b/src/PaddleClas/ppcls/configs/ImageNet/ResNeXt/ResNeXt152_64x4d.yaml new file mode 100644 index 0000000..104f37a --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/ResNeXt/ResNeXt152_64x4d.yaml @@ -0,0 +1,130 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 120 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + +# model architecture +Arch: + name: ResNeXt152_64x4d + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Piecewise + learning_rate: 0.1 + decay_epochs: [30, 60, 90] + values: [0.1, 0.01, 0.001, 0.0001] + regularizer: + name: 'L2' + coeff: 0.00018 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + - TopkAcc: + topk: [1, 5] + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/ResNeXt/ResNeXt152_vd_32x4d.yaml b/src/PaddleClas/ppcls/configs/ImageNet/ResNeXt/ResNeXt152_vd_32x4d.yaml new file mode 100644 index 0000000..638feef --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/ResNeXt/ResNeXt152_vd_32x4d.yaml @@ -0,0 +1,130 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 200 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + +# model architecture +Arch: + name: ResNeXt152_vd_32x4d + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + epsilon: 0.1 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Cosine + learning_rate: 0.1 + regularizer: + name: 'L2' + coeff: 0.0001 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + batch_transform_ops: + - MixupOperator: + alpha: 0.2 + + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/ResNeXt/ResNeXt152_vd_64x4d.yaml b/src/PaddleClas/ppcls/configs/ImageNet/ResNeXt/ResNeXt152_vd_64x4d.yaml new file mode 100644 index 0000000..7c05197 --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/ResNeXt/ResNeXt152_vd_64x4d.yaml @@ -0,0 +1,130 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 200 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + +# model architecture +Arch: + name: ResNeXt152_vd_64x4d + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + epsilon: 0.1 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Cosine + learning_rate: 0.1 + regularizer: + name: 'L2' + coeff: 0.0001 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + batch_transform_ops: + - MixupOperator: + alpha: 0.2 + + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/ResNeXt/ResNeXt50_32x4d.yaml b/src/PaddleClas/ppcls/configs/ImageNet/ResNeXt/ResNeXt50_32x4d.yaml new file mode 100644 index 0000000..ef78f60 --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/ResNeXt/ResNeXt50_32x4d.yaml @@ -0,0 +1,130 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 120 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + +# model architecture +Arch: + name: ResNeXt50_32x4d + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Piecewise + learning_rate: 0.1 + decay_epochs: [30, 60, 90] + values: [0.1, 0.01, 0.001, 0.0001] + regularizer: + name: 'L2' + coeff: 0.0001 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + - TopkAcc: + topk: [1, 5] + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/ResNeXt/ResNeXt50_64x4d.yaml b/src/PaddleClas/ppcls/configs/ImageNet/ResNeXt/ResNeXt50_64x4d.yaml new file mode 100644 index 0000000..b750357 --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/ResNeXt/ResNeXt50_64x4d.yaml @@ -0,0 +1,130 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 120 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + +# model architecture +Arch: + name: ResNeXt50_64x4d + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Piecewise + learning_rate: 0.1 + decay_epochs: [30, 60, 90] + values: [0.1, 0.01, 0.001, 0.0001] + regularizer: + name: 'L2' + coeff: 0.0001 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + - TopkAcc: + topk: [1, 5] + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/ResNeXt/ResNeXt50_vd_32x4d.yaml b/src/PaddleClas/ppcls/configs/ImageNet/ResNeXt/ResNeXt50_vd_32x4d.yaml new file mode 100644 index 0000000..baf38e3 --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/ResNeXt/ResNeXt50_vd_32x4d.yaml @@ -0,0 +1,130 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 200 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + +# model architecture +Arch: + name: ResNeXt50_vd_32x4d + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + epsilon: 0.1 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Cosine + learning_rate: 0.1 + regularizer: + name: 'L2' + coeff: 0.0001 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + batch_transform_ops: + - MixupOperator: + alpha: 0.2 + + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/ResNeXt/ResNeXt50_vd_64x4d.yaml b/src/PaddleClas/ppcls/configs/ImageNet/ResNeXt/ResNeXt50_vd_64x4d.yaml new file mode 100644 index 0000000..dba5f86 --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/ResNeXt/ResNeXt50_vd_64x4d.yaml @@ -0,0 +1,130 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 200 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + +# model architecture +Arch: + name: ResNeXt50_vd_64x4d + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + epsilon: 0.1 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Cosine + learning_rate: 0.1 + regularizer: + name: 'L2' + coeff: 0.0001 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + batch_transform_ops: + - MixupOperator: + alpha: 0.2 + + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/ResNeXt101_wsl/ResNeXt101_32x16d_wsl.yaml b/src/PaddleClas/ppcls/configs/ImageNet/ResNeXt101_wsl/ResNeXt101_32x16d_wsl.yaml new file mode 100644 index 0000000..71193aa --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/ResNeXt101_wsl/ResNeXt101_32x16d_wsl.yaml @@ -0,0 +1,130 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 120 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + +# model architecture +Arch: + name: ResNeXt101_32x16d_wsl + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Piecewise + learning_rate: 0.1 + decay_epochs: [30, 60, 90] + values: [0.1, 0.01, 0.001, 0.0001] + regularizer: + name: 'L2' + coeff: 0.0001 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + - TopkAcc: + topk: [1, 5] + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/ResNeXt101_wsl/ResNeXt101_32x32d_wsl.yaml b/src/PaddleClas/ppcls/configs/ImageNet/ResNeXt101_wsl/ResNeXt101_32x32d_wsl.yaml new file mode 100644 index 0000000..346d2ea --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/ResNeXt101_wsl/ResNeXt101_32x32d_wsl.yaml @@ -0,0 +1,130 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 120 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + +# model architecture +Arch: + name: ResNeXt101_32x32d_wsl + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Piecewise + learning_rate: 0.1 + decay_epochs: [30, 60, 90] + values: [0.1, 0.01, 0.001, 0.0001] + regularizer: + name: 'L2' + coeff: 0.0001 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + - TopkAcc: + topk: [1, 5] + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/ResNeXt101_wsl/ResNeXt101_32x48d_wsl.yaml b/src/PaddleClas/ppcls/configs/ImageNet/ResNeXt101_wsl/ResNeXt101_32x48d_wsl.yaml new file mode 100644 index 0000000..2db3bd6 --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/ResNeXt101_wsl/ResNeXt101_32x48d_wsl.yaml @@ -0,0 +1,130 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 120 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + +# model architecture +Arch: + name: ResNeXt101_32x48d_wsl + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Piecewise + learning_rate: 0.1 + decay_epochs: [30, 60, 90] + values: [0.1, 0.01, 0.001, 0.0001] + regularizer: + name: 'L2' + coeff: 0.0001 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + - TopkAcc: + topk: [1, 5] + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/ResNeXt101_wsl/ResNeXt101_32x8d_wsl.yaml b/src/PaddleClas/ppcls/configs/ImageNet/ResNeXt101_wsl/ResNeXt101_32x8d_wsl.yaml new file mode 100644 index 0000000..bed3cc2 --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/ResNeXt101_wsl/ResNeXt101_32x8d_wsl.yaml @@ -0,0 +1,130 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 120 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + +# model architecture +Arch: + name: ResNeXt101_32x8d_wsl + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Piecewise + learning_rate: 0.1 + decay_epochs: [30, 60, 90] + values: [0.1, 0.01, 0.001, 0.0001] + regularizer: + name: 'L2' + coeff: 0.0001 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + - TopkAcc: + topk: [1, 5] + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/ResNet/ResNet101.yaml b/src/PaddleClas/ppcls/configs/ImageNet/ResNet/ResNet101.yaml new file mode 100644 index 0000000..2c98acf --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/ResNet/ResNet101.yaml @@ -0,0 +1,132 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 120 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + # training model under @to_static + to_static: False + +# model architecture +Arch: + name: ResNet101 + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Piecewise + learning_rate: 0.1 + decay_epochs: [30, 60, 90] + values: [0.1, 0.01, 0.001, 0.0001] + regularizer: + name: 'L2' + coeff: 0.0001 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + - TopkAcc: + topk: [1, 5] + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/ResNet/ResNet101_vd.yaml b/src/PaddleClas/ppcls/configs/ImageNet/ResNet/ResNet101_vd.yaml new file mode 100644 index 0000000..d62b7bc --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/ResNet/ResNet101_vd.yaml @@ -0,0 +1,130 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 200 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + +# model architecture +Arch: + name: ResNet101_vd + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + epsilon: 0.1 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Cosine + learning_rate: 0.1 + regularizer: + name: 'L2' + coeff: 0.0001 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + batch_transform_ops: + - MixupOperator: + alpha: 0.2 + + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/ResNet/ResNet152.yaml b/src/PaddleClas/ppcls/configs/ImageNet/ResNet/ResNet152.yaml new file mode 100644 index 0000000..0dbbaf8 --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/ResNet/ResNet152.yaml @@ -0,0 +1,132 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 120 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + # training model under @to_static + to_static: False + +# model architecture +Arch: + name: ResNet152 + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Piecewise + learning_rate: 0.1 + decay_epochs: [30, 60, 90] + values: [0.1, 0.01, 0.001, 0.0001] + regularizer: + name: 'L2' + coeff: 0.0001 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + - TopkAcc: + topk: [1, 5] + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/ResNet/ResNet152_vd.yaml b/src/PaddleClas/ppcls/configs/ImageNet/ResNet/ResNet152_vd.yaml new file mode 100644 index 0000000..735c84b --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/ResNet/ResNet152_vd.yaml @@ -0,0 +1,130 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 200 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + +# model architecture +Arch: + name: ResNet152_vd + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + epsilon: 0.1 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Cosine + learning_rate: 0.1 + regularizer: + name: 'L2' + coeff: 0.0001 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + batch_transform_ops: + - MixupOperator: + alpha: 0.2 + + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/ResNet/ResNet18.yaml b/src/PaddleClas/ppcls/configs/ImageNet/ResNet/ResNet18.yaml new file mode 100644 index 0000000..4e0e460 --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/ResNet/ResNet18.yaml @@ -0,0 +1,130 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 120 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + +# model architecture +Arch: + name: ResNet18 + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Piecewise + learning_rate: 0.1 + decay_epochs: [30, 60, 90] + values: [0.1, 0.01, 0.001, 0.0001] + regularizer: + name: 'L2' + coeff: 0.0001 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + - TopkAcc: + topk: [1, 5] + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/ResNet/ResNet18_vd.yaml b/src/PaddleClas/ppcls/configs/ImageNet/ResNet/ResNet18_vd.yaml new file mode 100644 index 0000000..0150633 --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/ResNet/ResNet18_vd.yaml @@ -0,0 +1,130 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 200 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + +# model architecture +Arch: + name: ResNet18_vd + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + epsilon: 0.1 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Cosine + learning_rate: 0.1 + regularizer: + name: 'L2' + coeff: 0.00007 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + batch_transform_ops: + - MixupOperator: + alpha: 0.2 + + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/ResNet/ResNet200_vd.yaml b/src/PaddleClas/ppcls/configs/ImageNet/ResNet/ResNet200_vd.yaml new file mode 100644 index 0000000..c9209f1 --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/ResNet/ResNet200_vd.yaml @@ -0,0 +1,130 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 200 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + +# model architecture +Arch: + name: ResNet200_vd + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + epsilon: 0.1 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Cosine + learning_rate: 0.1 + regularizer: + name: 'L2' + coeff: 0.0001 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + batch_transform_ops: + - MixupOperator: + alpha: 0.2 + + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/ResNet/ResNet34.yaml b/src/PaddleClas/ppcls/configs/ImageNet/ResNet/ResNet34.yaml new file mode 100644 index 0000000..5b90cf0 --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/ResNet/ResNet34.yaml @@ -0,0 +1,130 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 120 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + +# model architecture +Arch: + name: ResNet34 + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Piecewise + learning_rate: 0.1 + decay_epochs: [30, 60, 90] + values: [0.1, 0.01, 0.001, 0.0001] + regularizer: + name: 'L2' + coeff: 0.0001 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + - TopkAcc: + topk: [1, 5] + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/ResNet/ResNet34_vd.yaml b/src/PaddleClas/ppcls/configs/ImageNet/ResNet/ResNet34_vd.yaml new file mode 100644 index 0000000..a894ea4 --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/ResNet/ResNet34_vd.yaml @@ -0,0 +1,130 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 200 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + +# model architecture +Arch: + name: ResNet34_vd + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + epsilon: 0.1 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Cosine + learning_rate: 0.1 + regularizer: + name: 'L2' + coeff: 0.00007 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + batch_transform_ops: + - MixupOperator: + alpha: 0.2 + + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/ResNet/ResNet50.yaml b/src/PaddleClas/ppcls/configs/ImageNet/ResNet/ResNet50.yaml new file mode 100644 index 0000000..c2da23f --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/ResNet/ResNet50.yaml @@ -0,0 +1,132 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 120 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + # training model under @to_static + to_static: False + +# model architecture +Arch: + name: ResNet50 + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Piecewise + learning_rate: 0.1 + decay_epochs: [30, 60, 90] + values: [0.1, 0.01, 0.001, 0.0001] + regularizer: + name: 'L2' + coeff: 0.0001 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + - TopkAcc: + topk: [1, 5] + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/ResNet/ResNet50_amp_O1.yaml b/src/PaddleClas/ppcls/configs/ImageNet/ResNet/ResNet50_amp_O1.yaml new file mode 100644 index 0000000..53e9ae2 --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/ResNet/ResNet50_amp_O1.yaml @@ -0,0 +1,147 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 120 + print_batch_step: 10 + use_visualdl: False + image_channel: &image_channel 4 + # used for static mode and model export + image_shape: [*image_channel, 224, 224] + save_inference_dir: ./inference + # training model under @to_static + to_static: False + use_dali: True + +# mixed precision training +AMP: + scale_loss: 128.0 + use_dynamic_loss_scaling: True + # O1: mixed fp16 + level: O1 + +# model architecture +Arch: + name: ResNet50 + class_num: 1000 + input_image_channel: *image_channel + data_format: "NHWC" + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + multi_precision: True + lr: + name: Piecewise + learning_rate: 0.1 + decay_epochs: [30, 60, 90] + values: [0.1, 0.01, 0.001, 0.0001] + regularizer: + name: 'L2' + coeff: 0.0001 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + channel_num: *image_channel + + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + channel_num: *image_channel + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + channel_num: *image_channel + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + - TopkAcc: + topk: [1, 5] + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/ResNet/ResNet50_amp_O2.yaml b/src/PaddleClas/ppcls/configs/ImageNet/ResNet/ResNet50_amp_O2.yaml new file mode 100644 index 0000000..6a4425b --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/ResNet/ResNet50_amp_O2.yaml @@ -0,0 +1,149 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 120 + print_batch_step: 10 + use_visualdl: False + image_channel: &image_channel 4 + # used for static mode and model export + image_shape: [*image_channel, 224, 224] + save_inference_dir: ./inference + # training model under @to_static + to_static: False + use_dali: True + +# mixed precision training +AMP: + scale_loss: 128.0 + use_dynamic_loss_scaling: True + # O2: pure fp16 + level: O2 + +# model architecture +Arch: + name: ResNet50 + class_num: 1000 + input_image_channel: *image_channel + data_format: "NHWC" + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + Eval: + - CELoss: + weight: 1.0 + +Optimizer: + name: Momentum + momentum: 0.9 + multi_precision: True + lr: + name: Piecewise + learning_rate: 0.1 + decay_epochs: [30, 60, 90] + values: [0.1, 0.01, 0.001, 0.0001] + regularizer: + name: 'L2' + coeff: 0.0001 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + output_fp16: True + channel_num: *image_channel + + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + output_fp16: True + channel_num: *image_channel + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + output_fp16: True + channel_num: *image_channel + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + - TopkAcc: + topk: [1, 5] + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/ResNet/ResNet50_vd.yaml b/src/PaddleClas/ppcls/configs/ImageNet/ResNet/ResNet50_vd.yaml new file mode 100644 index 0000000..be7b2d9 --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/ResNet/ResNet50_vd.yaml @@ -0,0 +1,130 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 200 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + +# model architecture +Arch: + name: ResNet50_vd + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + epsilon: 0.1 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Cosine + learning_rate: 0.1 + regularizer: + name: 'L2' + coeff: 0.00007 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + batch_transform_ops: + - MixupOperator: + alpha: 0.2 + + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/SENet/SENet154_vd.yaml b/src/PaddleClas/ppcls/configs/ImageNet/SENet/SENet154_vd.yaml new file mode 100644 index 0000000..6545cbf --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/SENet/SENet154_vd.yaml @@ -0,0 +1,130 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 200 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + +# model architecture +Arch: + name: SENet154_vd + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + epsilon: 0.1 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Cosine + learning_rate: 0.1 + regularizer: + name: 'L2' + coeff: 0.00007 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + batch_transform_ops: + - MixupOperator: + alpha: 0.2 + + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/SENet/SE_ResNeXt101_32x4d.yaml b/src/PaddleClas/ppcls/configs/ImageNet/SENet/SE_ResNeXt101_32x4d.yaml new file mode 100644 index 0000000..f97430e --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/SENet/SE_ResNeXt101_32x4d.yaml @@ -0,0 +1,130 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 200 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + +# model architecture +Arch: + name: SE_ResNeXt101_32x4d + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + epsilon: 0.1 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Cosine + learning_rate: 0.1 + regularizer: + name: 'L2' + coeff: 0.00007 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + batch_transform_ops: + - MixupOperator: + alpha: 0.2 + + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/SENet/SE_ResNeXt101_32x4d_amp_O2.yaml b/src/PaddleClas/ppcls/configs/ImageNet/SENet/SE_ResNeXt101_32x4d_amp_O2.yaml new file mode 100644 index 0000000..da005d3 --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/SENet/SE_ResNeXt101_32x4d_amp_O2.yaml @@ -0,0 +1,143 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 200 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_channel: &image_channel 4 + image_shape: [*image_channel, 224, 224] + save_inference_dir: ./inference + +# model architecture +Arch: + name: SE_ResNeXt101_32x4d + class_num: 1000 + input_image_channel: *image_channel + data_format: "NHWC" + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + epsilon: 0.1 + Eval: + - CELoss: + weight: 1.0 + +# mixed precision training +AMP: + scale_loss: 128.0 + use_dynamic_loss_scaling: True + # O2: pure fp16 + level: O2 + +Optimizer: + name: Momentum + momentum: 0.9 + multi_precision: True + lr: + name: Cosine + learning_rate: 0.1 + regularizer: + name: 'L2' + coeff: 0.00007 + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + output_fp16: True + channel_num: *image_channel + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + output_fp16: True + channel_num: *image_channel + sampler: + name: BatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + output_fp16: True + channel_num: *image_channel + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + - TopkAcc: + topk: [1, 5] + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/SENet/SE_ResNeXt50_32x4d.yaml b/src/PaddleClas/ppcls/configs/ImageNet/SENet/SE_ResNeXt50_32x4d.yaml new file mode 100644 index 0000000..b31250b --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/SENet/SE_ResNeXt50_32x4d.yaml @@ -0,0 +1,130 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 200 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + +# model architecture +Arch: + name: SE_ResNeXt50_32x4d + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + epsilon: 0.1 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Cosine + learning_rate: 0.1 + regularizer: + name: 'L2' + coeff: 0.00007 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + batch_transform_ops: + - MixupOperator: + alpha: 0.2 + + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/SENet/SE_ResNeXt50_vd_32x4d.yaml b/src/PaddleClas/ppcls/configs/ImageNet/SENet/SE_ResNeXt50_vd_32x4d.yaml new file mode 100644 index 0000000..292b52d --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/SENet/SE_ResNeXt50_vd_32x4d.yaml @@ -0,0 +1,130 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 200 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + +# model architecture +Arch: + name: SE_ResNeXt50_vd_32x4d + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + epsilon: 0.1 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Cosine + learning_rate: 0.1 + regularizer: + name: 'L2' + coeff: 0.00007 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + batch_transform_ops: + - MixupOperator: + alpha: 0.2 + + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/SENet/SE_ResNet18_vd.yaml b/src/PaddleClas/ppcls/configs/ImageNet/SENet/SE_ResNet18_vd.yaml new file mode 100644 index 0000000..47d1754 --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/SENet/SE_ResNet18_vd.yaml @@ -0,0 +1,130 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 200 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + +# model architecture +Arch: + name: SE_ResNet18_vd + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + epsilon: 0.1 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Cosine + learning_rate: 0.1 + regularizer: + name: 'L2' + coeff: 0.00007 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + batch_transform_ops: + - MixupOperator: + alpha: 0.2 + + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/SENet/SE_ResNet34_vd.yaml b/src/PaddleClas/ppcls/configs/ImageNet/SENet/SE_ResNet34_vd.yaml new file mode 100644 index 0000000..174c181 --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/SENet/SE_ResNet34_vd.yaml @@ -0,0 +1,130 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 200 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + +# model architecture +Arch: + name: SE_ResNet34_vd + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + epsilon: 0.1 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Cosine + learning_rate: 0.1 + regularizer: + name: 'L2' + coeff: 0.00007 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + batch_transform_ops: + - MixupOperator: + alpha: 0.2 + + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/SENet/SE_ResNet50_vd.yaml b/src/PaddleClas/ppcls/configs/ImageNet/SENet/SE_ResNet50_vd.yaml new file mode 100644 index 0000000..f503ea6 --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/SENet/SE_ResNet50_vd.yaml @@ -0,0 +1,130 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 200 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + +# model architecture +Arch: + name: SE_ResNet50_vd + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + epsilon: 0.1 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Cosine + learning_rate: 0.1 + regularizer: + name: 'L2' + coeff: 0.00007 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + batch_transform_ops: + - MixupOperator: + alpha: 0.2 + + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/ShuffleNet/ShuffleNetV2_swish.yaml b/src/PaddleClas/ppcls/configs/ImageNet/ShuffleNet/ShuffleNetV2_swish.yaml new file mode 100644 index 0000000..e01891e --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/ShuffleNet/ShuffleNetV2_swish.yaml @@ -0,0 +1,129 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 240 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + +# model architecture +Arch: + name: ShuffleNetV2_swish + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Cosine + learning_rate: 0.5 + warmup_epoch: 5 + regularizer: + name: 'L2' + coeff: 0.00004 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + + sampler: + name: DistributedBatchSampler + batch_size: 256 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + - TopkAcc: + topk: [1, 5] + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/ShuffleNet/ShuffleNetV2_x0_25.yaml b/src/PaddleClas/ppcls/configs/ImageNet/ShuffleNet/ShuffleNetV2_x0_25.yaml new file mode 100644 index 0000000..c2e9805 --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/ShuffleNet/ShuffleNetV2_x0_25.yaml @@ -0,0 +1,129 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 240 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + +# model architecture +Arch: + name: ShuffleNetV2_x0_25 + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Cosine + learning_rate: 0.5 + warmup_epoch: 5 + regularizer: + name: 'L2' + coeff: 0.00003 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + + sampler: + name: DistributedBatchSampler + batch_size: 256 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + - TopkAcc: + topk: [1, 5] + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/ShuffleNet/ShuffleNetV2_x0_33.yaml b/src/PaddleClas/ppcls/configs/ImageNet/ShuffleNet/ShuffleNetV2_x0_33.yaml new file mode 100644 index 0000000..dc7a5ef --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/ShuffleNet/ShuffleNetV2_x0_33.yaml @@ -0,0 +1,129 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 240 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + +# model architecture +Arch: + name: ShuffleNetV2_x0_33 + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Cosine + learning_rate: 0.5 + warmup_epoch: 5 + regularizer: + name: 'L2' + coeff: 0.00003 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + + sampler: + name: DistributedBatchSampler + batch_size: 256 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + - TopkAcc: + topk: [1, 5] + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/ShuffleNet/ShuffleNetV2_x0_5.yaml b/src/PaddleClas/ppcls/configs/ImageNet/ShuffleNet/ShuffleNetV2_x0_5.yaml new file mode 100644 index 0000000..796fb7a --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/ShuffleNet/ShuffleNetV2_x0_5.yaml @@ -0,0 +1,129 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 240 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + +# model architecture +Arch: + name: ShuffleNetV2_x0_5 + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Cosine + learning_rate: 0.5 + warmup_epoch: 5 + regularizer: + name: 'L2' + coeff: 0.00003 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + + sampler: + name: DistributedBatchSampler + batch_size: 256 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + - TopkAcc: + topk: [1, 5] + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/ShuffleNet/ShuffleNetV2_x1_0.yaml b/src/PaddleClas/ppcls/configs/ImageNet/ShuffleNet/ShuffleNetV2_x1_0.yaml new file mode 100644 index 0000000..809fb2a --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/ShuffleNet/ShuffleNetV2_x1_0.yaml @@ -0,0 +1,129 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 240 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + +# model architecture +Arch: + name: ShuffleNetV2_x1_0 + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Cosine + learning_rate: 0.5 + warmup_epoch: 5 + regularizer: + name: 'L2' + coeff: 0.00004 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + + sampler: + name: DistributedBatchSampler + batch_size: 256 + drop_last: False + shuffle: True + loader: + num_workers: 8 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + - TopkAcc: + topk: [1, 5] + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/ShuffleNet/ShuffleNetV2_x1_5.yaml b/src/PaddleClas/ppcls/configs/ImageNet/ShuffleNet/ShuffleNetV2_x1_5.yaml new file mode 100644 index 0000000..eb3e013 --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/ShuffleNet/ShuffleNetV2_x1_5.yaml @@ -0,0 +1,129 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 240 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + +# model architecture +Arch: + name: ShuffleNetV2_x1_5 + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Cosine + learning_rate: 0.25 + warmup_epoch: 5 + regularizer: + name: 'L2' + coeff: 0.00004 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + + sampler: + name: DistributedBatchSampler + batch_size: 128 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + - TopkAcc: + topk: [1, 5] + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/ShuffleNet/ShuffleNetV2_x2_0.yaml b/src/PaddleClas/ppcls/configs/ImageNet/ShuffleNet/ShuffleNetV2_x2_0.yaml new file mode 100644 index 0000000..730cf43 --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/ShuffleNet/ShuffleNetV2_x2_0.yaml @@ -0,0 +1,129 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 240 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + +# model architecture +Arch: + name: ShuffleNetV2_x2_0 + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Cosine + learning_rate: 0.25 + warmup_epoch: 5 + regularizer: + name: 'L2' + coeff: 0.00004 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + + sampler: + name: DistributedBatchSampler + batch_size: 128 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + - TopkAcc: + topk: [1, 5] + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/SqueezeNet/SqueezeNet1_0.yaml b/src/PaddleClas/ppcls/configs/ImageNet/SqueezeNet/SqueezeNet1_0.yaml new file mode 100644 index 0000000..28eba49 --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/SqueezeNet/SqueezeNet1_0.yaml @@ -0,0 +1,128 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 120 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + +# model architecture +Arch: + name: SqueezeNet1_0 + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Cosine + learning_rate: 0.02 + regularizer: + name: 'L2' + coeff: 0.0001 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + + sampler: + name: DistributedBatchSampler + batch_size: 128 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + - TopkAcc: + topk: [1, 5] + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/SqueezeNet/SqueezeNet1_1.yaml b/src/PaddleClas/ppcls/configs/ImageNet/SqueezeNet/SqueezeNet1_1.yaml new file mode 100644 index 0000000..b61a28c --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/SqueezeNet/SqueezeNet1_1.yaml @@ -0,0 +1,128 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 120 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + +# model architecture +Arch: + name: SqueezeNet1_1 + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Cosine + learning_rate: 0.02 + regularizer: + name: 'L2' + coeff: 0.0001 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + + sampler: + name: DistributedBatchSampler + batch_size: 128 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + - TopkAcc: + topk: [1, 5] + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/SwinTransformer/SwinTransformer_base_patch4_window12_384.yaml b/src/PaddleClas/ppcls/configs/ImageNet/SwinTransformer/SwinTransformer_base_patch4_window12_384.yaml new file mode 100644 index 0000000..4dd0ac4 --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/SwinTransformer/SwinTransformer_base_patch4_window12_384.yaml @@ -0,0 +1,159 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 300 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 384, 384] + save_inference_dir: ./inference + # training model under @to_static + to_static: False + +# model architecture +Arch: + name: SwinTransformer_base_patch4_window12_384 + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + epsilon: 0.1 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: AdamW + beta1: 0.9 + beta2: 0.999 + epsilon: 1e-8 + weight_decay: 0.05 + no_weight_decay_name: absolute_pos_embed relative_position_bias_table .bias norm + one_dim_param_no_weight_decay: True + lr: + name: Cosine + learning_rate: 5e-4 + eta_min: 1e-5 + warmup_epoch: 20 + warmup_start_lr: 1e-6 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 384 + interpolation: bicubic + backend: pil + - RandFlipImage: + flip_code: 1 + - TimmAutoAugment: + config_str: rand-m9-mstd0.5-inc1 + interpolation: bicubic + img_size: 384 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - RandomErasing: + EPSILON: 0.25 + sl: 0.02 + sh: 1.0/3.0 + r1: 0.3 + attempt: 10 + use_log_aspect: True + mode: pixel + batch_transform_ops: + - OpSampler: + MixupOperator: + alpha: 0.8 + prob: 0.5 + CutmixOperator: + alpha: 1.0 + prob: 0.5 + + sampler: + name: DistributedBatchSampler + batch_size: 128 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 438 + interpolation: bicubic + backend: pil + - CropImage: + size: 384 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 128 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 438 + interpolation: bicubic + backend: pil + - CropImage: + size: 384 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/SwinTransformer/SwinTransformer_base_patch4_window7_224.yaml b/src/PaddleClas/ppcls/configs/ImageNet/SwinTransformer/SwinTransformer_base_patch4_window7_224.yaml new file mode 100644 index 0000000..a42dea1 --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/SwinTransformer/SwinTransformer_base_patch4_window7_224.yaml @@ -0,0 +1,159 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 300 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + # training model under @to_static + to_static: False + +# model architecture +Arch: + name: SwinTransformer_base_patch4_window7_224 + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + epsilon: 0.1 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: AdamW + beta1: 0.9 + beta2: 0.999 + epsilon: 1e-8 + weight_decay: 0.05 + no_weight_decay_name: absolute_pos_embed relative_position_bias_table .bias norm + one_dim_param_no_weight_decay: True + lr: + name: Cosine + learning_rate: 5e-4 + eta_min: 1e-5 + warmup_epoch: 20 + warmup_start_lr: 1e-6 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + interpolation: bicubic + backend: pil + - RandFlipImage: + flip_code: 1 + - TimmAutoAugment: + config_str: rand-m9-mstd0.5-inc1 + interpolation: bicubic + img_size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - RandomErasing: + EPSILON: 0.25 + sl: 0.02 + sh: 1.0/3.0 + r1: 0.3 + attempt: 10 + use_log_aspect: True + mode: pixel + batch_transform_ops: + - OpSampler: + MixupOperator: + alpha: 0.8 + prob: 0.5 + CutmixOperator: + alpha: 1.0 + prob: 0.5 + + sampler: + name: DistributedBatchSampler + batch_size: 128 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + interpolation: bicubic + backend: pil + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 128 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + interpolation: bicubic + backend: pil + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/SwinTransformer/SwinTransformer_large_patch4_window12_384.yaml b/src/PaddleClas/ppcls/configs/ImageNet/SwinTransformer/SwinTransformer_large_patch4_window12_384.yaml new file mode 100644 index 0000000..36b5e5e --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/SwinTransformer/SwinTransformer_large_patch4_window12_384.yaml @@ -0,0 +1,159 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 300 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 384, 384] + save_inference_dir: ./inference + # training model under @to_static + to_static: False + +# model architecture +Arch: + name: SwinTransformer_large_patch4_window12_384 + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + epsilon: 0.1 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: AdamW + beta1: 0.9 + beta2: 0.999 + epsilon: 1e-8 + weight_decay: 0.05 + no_weight_decay_name: absolute_pos_embed relative_position_bias_table .bias norm + one_dim_param_no_weight_decay: True + lr: + name: Cosine + learning_rate: 5e-4 + eta_min: 1e-5 + warmup_epoch: 20 + warmup_start_lr: 1e-6 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 384 + interpolation: bicubic + backend: pil + - RandFlipImage: + flip_code: 1 + - TimmAutoAugment: + config_str: rand-m9-mstd0.5-inc1 + interpolation: bicubic + img_size: 384 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - RandomErasing: + EPSILON: 0.25 + sl: 0.02 + sh: 1.0/3.0 + r1: 0.3 + attempt: 10 + use_log_aspect: True + mode: pixel + batch_transform_ops: + - OpSampler: + MixupOperator: + alpha: 0.8 + prob: 0.5 + CutmixOperator: + alpha: 1.0 + prob: 0.5 + + sampler: + name: DistributedBatchSampler + batch_size: 128 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 438 + interpolation: bicubic + backend: pil + - CropImage: + size: 384 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 128 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 438 + interpolation: bicubic + backend: pil + - CropImage: + size: 384 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/SwinTransformer/SwinTransformer_large_patch4_window7_224.yaml b/src/PaddleClas/ppcls/configs/ImageNet/SwinTransformer/SwinTransformer_large_patch4_window7_224.yaml new file mode 100644 index 0000000..96a9bef --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/SwinTransformer/SwinTransformer_large_patch4_window7_224.yaml @@ -0,0 +1,159 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 300 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + # training model under @to_static + to_static: False + +# model architecture +Arch: + name: SwinTransformer_large_patch4_window7_224 + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + epsilon: 0.1 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: AdamW + beta1: 0.9 + beta2: 0.999 + epsilon: 1e-8 + weight_decay: 0.05 + no_weight_decay_name: absolute_pos_embed relative_position_bias_table .bias norm + one_dim_param_no_weight_decay: True + lr: + name: Cosine + learning_rate: 5e-4 + eta_min: 1e-5 + warmup_epoch: 20 + warmup_start_lr: 1e-6 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + interpolation: bicubic + backend: pil + - RandFlipImage: + flip_code: 1 + - TimmAutoAugment: + config_str: rand-m9-mstd0.5-inc1 + interpolation: bicubic + img_size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - RandomErasing: + EPSILON: 0.25 + sl: 0.02 + sh: 1.0/3.0 + r1: 0.3 + attempt: 10 + use_log_aspect: True + mode: pixel + batch_transform_ops: + - OpSampler: + MixupOperator: + alpha: 0.8 + prob: 0.5 + CutmixOperator: + alpha: 1.0 + prob: 0.5 + + sampler: + name: DistributedBatchSampler + batch_size: 128 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + interpolation: bicubic + backend: pil + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 128 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + interpolation: bicubic + backend: pil + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/SwinTransformer/SwinTransformer_small_patch4_window7_224.yaml b/src/PaddleClas/ppcls/configs/ImageNet/SwinTransformer/SwinTransformer_small_patch4_window7_224.yaml new file mode 100644 index 0000000..ffbbcf0 --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/SwinTransformer/SwinTransformer_small_patch4_window7_224.yaml @@ -0,0 +1,159 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 300 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + # training model under @to_static + to_static: False + +# model architecture +Arch: + name: SwinTransformer_small_patch4_window7_224 + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + epsilon: 0.1 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: AdamW + beta1: 0.9 + beta2: 0.999 + epsilon: 1e-8 + weight_decay: 0.05 + no_weight_decay_name: absolute_pos_embed relative_position_bias_table .bias norm + one_dim_param_no_weight_decay: True + lr: + name: Cosine + learning_rate: 5e-4 + eta_min: 1e-5 + warmup_epoch: 20 + warmup_start_lr: 1e-6 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + interpolation: bicubic + backend: pil + - RandFlipImage: + flip_code: 1 + - TimmAutoAugment: + config_str: rand-m9-mstd0.5-inc1 + interpolation: bicubic + img_size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - RandomErasing: + EPSILON: 0.25 + sl: 0.02 + sh: 1.0/3.0 + r1: 0.3 + attempt: 10 + use_log_aspect: True + mode: pixel + batch_transform_ops: + - OpSampler: + MixupOperator: + alpha: 0.8 + prob: 0.5 + CutmixOperator: + alpha: 1.0 + prob: 0.5 + + sampler: + name: DistributedBatchSampler + batch_size: 128 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + interpolation: bicubic + backend: pil + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 128 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + interpolation: bicubic + backend: pil + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/SwinTransformer/SwinTransformer_tiny_patch4_window7_224.yaml b/src/PaddleClas/ppcls/configs/ImageNet/SwinTransformer/SwinTransformer_tiny_patch4_window7_224.yaml new file mode 100644 index 0000000..066db71 --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/SwinTransformer/SwinTransformer_tiny_patch4_window7_224.yaml @@ -0,0 +1,159 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 300 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + # training model under @to_static + to_static: False + +# model architecture +Arch: + name: SwinTransformer_tiny_patch4_window7_224 + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + epsilon: 0.1 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: AdamW + beta1: 0.9 + beta2: 0.999 + epsilon: 1e-8 + weight_decay: 0.05 + no_weight_decay_name: absolute_pos_embed relative_position_bias_table .bias norm + one_dim_param_no_weight_decay: True + lr: + name: Cosine + learning_rate: 5e-4 + eta_min: 1e-5 + warmup_epoch: 20 + warmup_start_lr: 1e-6 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + interpolation: bicubic + backend: pil + - RandFlipImage: + flip_code: 1 + - TimmAutoAugment: + config_str: rand-m9-mstd0.5-inc1 + interpolation: bicubic + img_size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - RandomErasing: + EPSILON: 0.25 + sl: 0.02 + sh: 1.0/3.0 + r1: 0.3 + attempt: 10 + use_log_aspect: True + mode: pixel + batch_transform_ops: + - OpSampler: + MixupOperator: + alpha: 0.8 + prob: 0.5 + CutmixOperator: + alpha: 1.0 + prob: 0.5 + + sampler: + name: DistributedBatchSampler + batch_size: 128 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + interpolation: bicubic + backend: pil + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 128 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + interpolation: bicubic + backend: pil + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/TNT/TNT_small.yaml b/src/PaddleClas/ppcls/configs/ImageNet/TNT/TNT_small.yaml new file mode 100644 index 0000000..1eab423 --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/TNT/TNT_small.yaml @@ -0,0 +1,130 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 120 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + +# model architecture +Arch: + name: TNT_small + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Piecewise + learning_rate: 0.1 + decay_epochs: [30, 60, 90] + values: [0.1, 0.01, 0.001, 0.0001] + regularizer: + name: 'L2' + coeff: 0.0001 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.5, 0.5, 0.5] + std: [0.5, 0.5, 0.5] + order: '' + + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 248 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.5, 0.5, 0.5] + std: [0.5, 0.5, 0.5] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 248 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.5, 0.5, 0.5] + std: [0.5, 0.5, 0.5] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + - TopkAcc: + topk: [1, 5] + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/Twins/alt_gvt_base.yaml b/src/PaddleClas/ppcls/configs/ImageNet/Twins/alt_gvt_base.yaml new file mode 100644 index 0000000..74c402e --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/Twins/alt_gvt_base.yaml @@ -0,0 +1,161 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 300 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + # training model under @to_static + to_static: False + +# model architecture +Arch: + name: alt_gvt_base + class_num: 1000 + drop_rate: 0.0 + drop_path_rate: 0.3 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + epsilon: 0.1 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: AdamW + beta1: 0.9 + beta2: 0.999 + epsilon: 1e-8 + weight_decay: 0.05 + no_weight_decay_name: norm cls_token proj.0.weight proj.1.weight proj.2.weight proj.3.weight pos_block + one_dim_param_no_weight_decay: True + lr: + name: Cosine + learning_rate: 5e-4 + eta_min: 1e-5 + warmup_epoch: 5 + warmup_start_lr: 1e-6 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + interpolation: bicubic + backend: pil + - RandFlipImage: + flip_code: 1 + - TimmAutoAugment: + config_str: rand-m9-mstd0.5-inc1 + interpolation: bicubic + img_size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - RandomErasing: + EPSILON: 0.25 + sl: 0.02 + sh: 1.0/3.0 + r1: 0.3 + attempt: 10 + use_log_aspect: True + mode: pixel + batch_transform_ops: + - OpSampler: + MixupOperator: + alpha: 0.8 + prob: 0.5 + CutmixOperator: + alpha: 1.0 + prob: 0.5 + + sampler: + name: DistributedBatchSampler + batch_size: 128 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + interpolation: bicubic + backend: pil + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 128 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + interpolation: bicubic + backend: pil + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/Twins/alt_gvt_large.yaml b/src/PaddleClas/ppcls/configs/ImageNet/Twins/alt_gvt_large.yaml new file mode 100644 index 0000000..ca66e9a --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/Twins/alt_gvt_large.yaml @@ -0,0 +1,161 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 300 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + # training model under @to_static + to_static: False + +# model architecture +Arch: + name: alt_gvt_large + class_num: 1000 + drop_rate: 0.0 + drop_path_rate: 0.5 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + epsilon: 0.1 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: AdamW + beta1: 0.9 + beta2: 0.999 + epsilon: 1e-8 + weight_decay: 0.05 + no_weight_decay_name: norm cls_token proj.0.weight proj.1.weight proj.2.weight proj.3.weight pos_block + one_dim_param_no_weight_decay: True + lr: + name: Cosine + learning_rate: 5e-4 + eta_min: 1e-5 + warmup_epoch: 5 + warmup_start_lr: 1e-6 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + interpolation: bicubic + backend: pil + - RandFlipImage: + flip_code: 1 + - TimmAutoAugment: + config_str: rand-m9-mstd0.5-inc1 + interpolation: bicubic + img_size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - RandomErasing: + EPSILON: 0.25 + sl: 0.02 + sh: 1.0/3.0 + r1: 0.3 + attempt: 10 + use_log_aspect: True + mode: pixel + batch_transform_ops: + - OpSampler: + MixupOperator: + alpha: 0.8 + prob: 0.5 + CutmixOperator: + alpha: 1.0 + prob: 0.5 + + sampler: + name: DistributedBatchSampler + batch_size: 128 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + interpolation: bicubic + backend: pil + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 128 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + interpolation: bicubic + backend: pil + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/Twins/alt_gvt_small.yaml b/src/PaddleClas/ppcls/configs/ImageNet/Twins/alt_gvt_small.yaml new file mode 100644 index 0000000..9e97c0f --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/Twins/alt_gvt_small.yaml @@ -0,0 +1,161 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 300 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + # training model under @to_static + to_static: False + +# model architecture +Arch: + name: alt_gvt_small + class_num: 1000 + drop_rate: 0.0 + drop_path_rate: 0.2 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + epsilon: 0.1 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: AdamW + beta1: 0.9 + beta2: 0.999 + epsilon: 1e-8 + weight_decay: 0.05 + no_weight_decay_name: norm cls_token proj.0.weight proj.1.weight proj.2.weight proj.3.weight pos_block + one_dim_param_no_weight_decay: True + lr: + name: Cosine + learning_rate: 5e-4 + eta_min: 1e-5 + warmup_epoch: 5 + warmup_start_lr: 1e-6 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + interpolation: bicubic + backend: pil + - RandFlipImage: + flip_code: 1 + - TimmAutoAugment: + config_str: rand-m9-mstd0.5-inc1 + interpolation: bicubic + img_size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - RandomErasing: + EPSILON: 0.25 + sl: 0.02 + sh: 1.0/3.0 + r1: 0.3 + attempt: 10 + use_log_aspect: True + mode: pixel + batch_transform_ops: + - OpSampler: + MixupOperator: + alpha: 0.8 + prob: 0.5 + CutmixOperator: + alpha: 1.0 + prob: 0.5 + + sampler: + name: DistributedBatchSampler + batch_size: 128 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + interpolation: bicubic + backend: pil + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 128 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + interpolation: bicubic + backend: pil + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/Twins/pcpvt_base.yaml b/src/PaddleClas/ppcls/configs/ImageNet/Twins/pcpvt_base.yaml new file mode 100644 index 0000000..7831e92 --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/Twins/pcpvt_base.yaml @@ -0,0 +1,161 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 300 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + # training model under @to_static + to_static: False + +# model architecture +Arch: + name: pcpvt_base + class_num: 1000 + drop_rate: 0.0 + drop_path_rate: 0.3 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + epsilon: 0.1 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: AdamW + beta1: 0.9 + beta2: 0.999 + epsilon: 1e-8 + weight_decay: 0.05 + no_weight_decay_name: norm cls_token proj.0.weight proj.1.weight proj.2.weight proj.3.weight pos_block + one_dim_param_no_weight_decay: True + lr: + name: Cosine + learning_rate: 5e-4 + eta_min: 1e-5 + warmup_epoch: 5 + warmup_start_lr: 1e-6 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + interpolation: bicubic + backend: pil + - RandFlipImage: + flip_code: 1 + - TimmAutoAugment: + config_str: rand-m9-mstd0.5-inc1 + interpolation: bicubic + img_size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - RandomErasing: + EPSILON: 0.25 + sl: 0.02 + sh: 1.0/3.0 + r1: 0.3 + attempt: 10 + use_log_aspect: True + mode: pixel + batch_transform_ops: + - OpSampler: + MixupOperator: + alpha: 0.8 + prob: 0.5 + CutmixOperator: + alpha: 1.0 + prob: 0.5 + + sampler: + name: DistributedBatchSampler + batch_size: 128 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + interpolation: bicubic + backend: pil + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 128 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + interpolation: bicubic + backend: pil + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/Twins/pcpvt_large.yaml b/src/PaddleClas/ppcls/configs/ImageNet/Twins/pcpvt_large.yaml new file mode 100644 index 0000000..8e160b3 --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/Twins/pcpvt_large.yaml @@ -0,0 +1,161 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 300 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + # training model under @to_static + to_static: False + +# model architecture +Arch: + name: pcpvt_large + class_num: 1000 + drop_rate: 0.0 + drop_path_rate: 0.5 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + epsilon: 0.1 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: AdamW + beta1: 0.9 + beta2: 0.999 + epsilon: 1e-8 + weight_decay: 0.05 + no_weight_decay_name: norm cls_token proj.0.weight proj.1.weight proj.2.weight proj.3.weight pos_block + one_dim_param_no_weight_decay: True + lr: + name: Cosine + learning_rate: 5e-4 + eta_min: 1e-5 + warmup_epoch: 5 + warmup_start_lr: 1e-6 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + interpolation: bicubic + backend: pil + - RandFlipImage: + flip_code: 1 + - TimmAutoAugment: + config_str: rand-m9-mstd0.5-inc1 + interpolation: bicubic + img_size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - RandomErasing: + EPSILON: 0.25 + sl: 0.02 + sh: 1.0/3.0 + r1: 0.3 + attempt: 10 + use_log_aspect: True + mode: pixel + batch_transform_ops: + - OpSampler: + MixupOperator: + alpha: 0.8 + prob: 0.5 + CutmixOperator: + alpha: 1.0 + prob: 0.5 + + sampler: + name: DistributedBatchSampler + batch_size: 128 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + interpolation: bicubic + backend: pil + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 128 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + interpolation: bicubic + backend: pil + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/Twins/pcpvt_small.yaml b/src/PaddleClas/ppcls/configs/ImageNet/Twins/pcpvt_small.yaml new file mode 100644 index 0000000..582382d --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/Twins/pcpvt_small.yaml @@ -0,0 +1,161 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 300 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + # training model under @to_static + to_static: False + +# model architecture +Arch: + name: pcpvt_small + class_num: 1000 + drop_rate: 0.0 + drop_path_rate: 0.2 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + epsilon: 0.1 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: AdamW + beta1: 0.9 + beta2: 0.999 + epsilon: 1e-8 + weight_decay: 0.05 + no_weight_decay_name: norm cls_token proj.0.weight proj.1.weight proj.2.weight proj.3.weight pos_block + one_dim_param_no_weight_decay: True + lr: + name: Cosine + learning_rate: 5e-4 + eta_min: 1e-5 + warmup_epoch: 5 + warmup_start_lr: 1e-6 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + interpolation: bicubic + backend: pil + - RandFlipImage: + flip_code: 1 + - TimmAutoAugment: + config_str: rand-m9-mstd0.5-inc1 + interpolation: bicubic + img_size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - RandomErasing: + EPSILON: 0.25 + sl: 0.02 + sh: 1.0/3.0 + r1: 0.3 + attempt: 10 + use_log_aspect: True + mode: pixel + batch_transform_ops: + - OpSampler: + MixupOperator: + alpha: 0.8 + prob: 0.5 + CutmixOperator: + alpha: 1.0 + prob: 0.5 + + sampler: + name: DistributedBatchSampler + batch_size: 128 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + interpolation: bicubic + backend: pil + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 128 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + interpolation: bicubic + backend: pil + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/VGG/VGG11.yaml b/src/PaddleClas/ppcls/configs/ImageNet/VGG/VGG11.yaml new file mode 100644 index 0000000..e55c4d0 --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/VGG/VGG11.yaml @@ -0,0 +1,128 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 90 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + +# model architecture +Arch: + name: VGG11 + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Cosine + learning_rate: 0.1 + regularizer: + name: 'L2' + coeff: 0.0002 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + + sampler: + name: DistributedBatchSampler + batch_size: 128 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + - TopkAcc: + topk: [1, 5] + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/VGG/VGG13.yaml b/src/PaddleClas/ppcls/configs/ImageNet/VGG/VGG13.yaml new file mode 100644 index 0000000..b4a0ee3 --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/VGG/VGG13.yaml @@ -0,0 +1,128 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 90 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + +# model architecture +Arch: + name: VGG13 + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Cosine + learning_rate: 0.01 + regularizer: + name: 'L2' + coeff: 0.0003 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + - TopkAcc: + topk: [1, 5] + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/VGG/VGG16.yaml b/src/PaddleClas/ppcls/configs/ImageNet/VGG/VGG16.yaml new file mode 100644 index 0000000..154c468 --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/VGG/VGG16.yaml @@ -0,0 +1,128 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 90 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + +# model architecture +Arch: + name: VGG16 + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Cosine + learning_rate: 0.01 + regularizer: + name: 'L2' + coeff: 0.0004 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + - TopkAcc: + topk: [1, 5] + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/VGG/VGG19.yaml b/src/PaddleClas/ppcls/configs/ImageNet/VGG/VGG19.yaml new file mode 100644 index 0000000..0a7022e --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/VGG/VGG19.yaml @@ -0,0 +1,128 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 150 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + +# model architecture +Arch: + name: VGG19 + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Cosine + learning_rate: 0.01 + regularizer: + name: 'L2' + coeff: 0.0004 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + - TopkAcc: + topk: [1, 5] + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/VisionTransformer/ViT_base_patch16_224.yaml b/src/PaddleClas/ppcls/configs/ImageNet/VisionTransformer/ViT_base_patch16_224.yaml new file mode 100644 index 0000000..6d5857d --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/VisionTransformer/ViT_base_patch16_224.yaml @@ -0,0 +1,130 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 120 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + +# model architecture +Arch: + name: ViT_base_patch16_224 + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Piecewise + learning_rate: 0.1 + decay_epochs: [30, 60, 90] + values: [0.1, 0.01, 0.001, 0.0001] + regularizer: + name: 'L2' + coeff: 0.0001 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.5, 0.5, 0.5] + std: [0.5, 0.5, 0.5] + order: '' + + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.5, 0.5, 0.5] + std: [0.5, 0.5, 0.5] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.5, 0.5, 0.5] + std: [0.5, 0.5, 0.5] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + - TopkAcc: + topk: [1, 5] + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/VisionTransformer/ViT_base_patch16_384.yaml b/src/PaddleClas/ppcls/configs/ImageNet/VisionTransformer/ViT_base_patch16_384.yaml new file mode 100644 index 0000000..925d827 --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/VisionTransformer/ViT_base_patch16_384.yaml @@ -0,0 +1,130 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 120 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 384, 384] + save_inference_dir: ./inference + +# model architecture +Arch: + name: ViT_base_patch16_384 + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Piecewise + learning_rate: 0.1 + decay_epochs: [30, 60, 90] + values: [0.1, 0.01, 0.001, 0.0001] + regularizer: + name: 'L2' + coeff: 0.0001 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 384 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.5, 0.5, 0.5] + std: [0.5, 0.5, 0.5] + order: '' + + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 384 + - CropImage: + size: 384 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.5, 0.5, 0.5] + std: [0.5, 0.5, 0.5] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 384 + - CropImage: + size: 384 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.5, 0.5, 0.5] + std: [0.5, 0.5, 0.5] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + - TopkAcc: + topk: [1, 5] + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/VisionTransformer/ViT_base_patch32_384.yaml b/src/PaddleClas/ppcls/configs/ImageNet/VisionTransformer/ViT_base_patch32_384.yaml new file mode 100644 index 0000000..fc4747b --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/VisionTransformer/ViT_base_patch32_384.yaml @@ -0,0 +1,130 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 120 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 384, 384] + save_inference_dir: ./inference + +# model architecture +Arch: + name: ViT_base_patch32_384 + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Piecewise + learning_rate: 0.1 + decay_epochs: [30, 60, 90] + values: [0.1, 0.01, 0.001, 0.0001] + regularizer: + name: 'L2' + coeff: 0.0001 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 384 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.5, 0.5, 0.5] + std: [0.5, 0.5, 0.5] + order: '' + + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 384 + - CropImage: + size: 384 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.5, 0.5, 0.5] + std: [0.5, 0.5, 0.5] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 384 + - CropImage: + size: 384 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.5, 0.5, 0.5] + std: [0.5, 0.5, 0.5] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + - TopkAcc: + topk: [1, 5] + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/VisionTransformer/ViT_large_patch16_224.yaml b/src/PaddleClas/ppcls/configs/ImageNet/VisionTransformer/ViT_large_patch16_224.yaml new file mode 100644 index 0000000..3882c55 --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/VisionTransformer/ViT_large_patch16_224.yaml @@ -0,0 +1,130 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 120 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + +# model architecture +Arch: + name: ViT_large_patch16_224 + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Piecewise + learning_rate: 0.1 + decay_epochs: [30, 60, 90] + values: [0.1, 0.01, 0.001, 0.0001] + regularizer: + name: 'L2' + coeff: 0.0001 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.5, 0.5, 0.5] + std: [0.5, 0.5, 0.5] + order: '' + + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.5, 0.5, 0.5] + std: [0.5, 0.5, 0.5] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.5, 0.5, 0.5] + std: [0.5, 0.5, 0.5] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + - TopkAcc: + topk: [1, 5] + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/VisionTransformer/ViT_large_patch16_384.yaml b/src/PaddleClas/ppcls/configs/ImageNet/VisionTransformer/ViT_large_patch16_384.yaml new file mode 100644 index 0000000..3bdb387 --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/VisionTransformer/ViT_large_patch16_384.yaml @@ -0,0 +1,130 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 120 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 384, 384] + save_inference_dir: ./inference + +# model architecture +Arch: + name: ViT_large_patch16_384 + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Piecewise + learning_rate: 0.1 + decay_epochs: [30, 60, 90] + values: [0.1, 0.01, 0.001, 0.0001] + regularizer: + name: 'L2' + coeff: 0.0001 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 384 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.5, 0.5, 0.5] + std: [0.5, 0.5, 0.5] + order: '' + + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 384 + - CropImage: + size: 384 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.5, 0.5, 0.5] + std: [0.5, 0.5, 0.5] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 384 + - CropImage: + size: 384 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.5, 0.5, 0.5] + std: [0.5, 0.5, 0.5] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + - TopkAcc: + topk: [1, 5] + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/VisionTransformer/ViT_large_patch32_384.yaml b/src/PaddleClas/ppcls/configs/ImageNet/VisionTransformer/ViT_large_patch32_384.yaml new file mode 100644 index 0000000..25212dd --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/VisionTransformer/ViT_large_patch32_384.yaml @@ -0,0 +1,130 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 120 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 384, 384] + save_inference_dir: ./inference + +# model architecture +Arch: + name: ViT_large_patch32_384 + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Piecewise + learning_rate: 0.1 + decay_epochs: [30, 60, 90] + values: [0.1, 0.01, 0.001, 0.0001] + regularizer: + name: 'L2' + coeff: 0.0001 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 384 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.5, 0.5, 0.5] + std: [0.5, 0.5, 0.5] + order: '' + + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 384 + - CropImage: + size: 384 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.5, 0.5, 0.5] + std: [0.5, 0.5, 0.5] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 384 + - CropImage: + size: 384 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.5, 0.5, 0.5] + std: [0.5, 0.5, 0.5] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + - TopkAcc: + topk: [1, 5] + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/VisionTransformer/ViT_small_patch16_224.yaml b/src/PaddleClas/ppcls/configs/ImageNet/VisionTransformer/ViT_small_patch16_224.yaml new file mode 100644 index 0000000..0a956b4 --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/VisionTransformer/ViT_small_patch16_224.yaml @@ -0,0 +1,130 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 120 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + +# model architecture +Arch: + name: ViT_small_patch16_224 + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Piecewise + learning_rate: 0.1 + decay_epochs: [30, 60, 90] + values: [0.1, 0.01, 0.001, 0.0001] + regularizer: + name: 'L2' + coeff: 0.0001 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.5, 0.5, 0.5] + std: [0.5, 0.5, 0.5] + order: '' + + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.5, 0.5, 0.5] + std: [0.5, 0.5, 0.5] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.5, 0.5, 0.5] + std: [0.5, 0.5, 0.5] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + - TopkAcc: + topk: [1, 5] + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/Xception/Xception41.yaml b/src/PaddleClas/ppcls/configs/ImageNet/Xception/Xception41.yaml new file mode 100644 index 0000000..45e64a1 --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/Xception/Xception41.yaml @@ -0,0 +1,129 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 120 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 299, 299] + save_inference_dir: ./inference + +# model architecture +Arch: + name: Xception41 + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + epsilon: 0.1 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Cosine + learning_rate: 0.045 + regularizer: + name: 'L2' + coeff: 0.0001 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 299 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 320 + - CropImage: + size: 299 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 320 + - CropImage: + size: 299 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + - TopkAcc: + topk: [1, 5] + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/Xception/Xception41_deeplab.yaml b/src/PaddleClas/ppcls/configs/ImageNet/Xception/Xception41_deeplab.yaml new file mode 100644 index 0000000..daf0598 --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/Xception/Xception41_deeplab.yaml @@ -0,0 +1,129 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 120 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 299, 299] + save_inference_dir: ./inference + +# model architecture +Arch: + name: Xception41_deeplab + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + epsilon: 0.1 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Cosine + learning_rate: 0.045 + regularizer: + name: 'L2' + coeff: 0.0001 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 299 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 320 + - CropImage: + size: 299 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 320 + - CropImage: + size: 299 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + - TopkAcc: + topk: [1, 5] + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/Xception/Xception65.yaml b/src/PaddleClas/ppcls/configs/ImageNet/Xception/Xception65.yaml new file mode 100644 index 0000000..c6bb529 --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/Xception/Xception65.yaml @@ -0,0 +1,130 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 200 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 299, 299] + save_inference_dir: ./inference + +# model architecture +Arch: + name: Xception65 + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + epsilon: 0.1 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Cosine + learning_rate: 0.045 + regularizer: + name: 'L2' + coeff: 0.0001 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 299 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + batch_transform_ops: + - MixupOperator: + alpha: 0.2 + + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 320 + - CropImage: + size: 299 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 320 + - CropImage: + size: 299 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/Xception/Xception65_deeplab.yaml b/src/PaddleClas/ppcls/configs/ImageNet/Xception/Xception65_deeplab.yaml new file mode 100644 index 0000000..1248a29 --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/Xception/Xception65_deeplab.yaml @@ -0,0 +1,129 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 120 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 299, 299] + save_inference_dir: ./inference + +# model architecture +Arch: + name: Xception65_deeplab + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + epsilon: 0.1 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Cosine + learning_rate: 0.045 + regularizer: + name: 'L2' + coeff: 0.0001 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 299 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 320 + - CropImage: + size: 299 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 320 + - CropImage: + size: 299 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + - TopkAcc: + topk: [1, 5] + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/ImageNet/Xception/Xception71.yaml b/src/PaddleClas/ppcls/configs/ImageNet/Xception/Xception71.yaml new file mode 100644 index 0000000..7f714cc --- /dev/null +++ b/src/PaddleClas/ppcls/configs/ImageNet/Xception/Xception71.yaml @@ -0,0 +1,130 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 200 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 299, 299] + save_inference_dir: ./inference + +# model architecture +Arch: + name: Xception71 + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + epsilon: 0.1 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Cosine + learning_rate: 0.0225 + regularizer: + name: 'L2' + coeff: 0.0001 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 299 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + batch_transform_ops: + - MixupOperator: + alpha: 0.2 + + sampler: + name: DistributedBatchSampler + batch_size: 32 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 320 + - CropImage: + size: 299 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 320 + - CropImage: + size: 299 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/Logo/ResNet50_ReID.yaml b/src/PaddleClas/ppcls/configs/Logo/ResNet50_ReID.yaml new file mode 100644 index 0000000..0949add --- /dev/null +++ b/src/PaddleClas/ppcls/configs/Logo/ResNet50_ReID.yaml @@ -0,0 +1,151 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: "./output/" + device: "gpu" + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 120 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: "./inference" + eval_mode: "retrieval" + +# model architecture +Arch: + name: "RecModel" + infer_output_key: "features" + infer_add_softmax: False + Backbone: + name: "ResNet50_last_stage_stride1" + pretrained: True + BackboneStopLayer: + name: "avg_pool" + Neck: + name: "VehicleNeck" + in_channels: 2048 + out_channels: 512 + Head: + name: "CircleMargin" + margin: 0.35 + scale: 64 + embedding_size: 512 + class_num: 3000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + - PairwiseCosface: + margin: 0.35 + gamma: 64 + weight: 1.0 + Eval: + - CELoss: + weight: 1.0 + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Cosine + learning_rate: 0.04 + regularizer: + name: 'L2' + coeff: 0.0001 + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: "dataset/LogoDet-3K-crop/train/" + cls_label_path: "dataset/LogoDet-3K-crop/train_list.txt" + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - AugMix: + prob: 0.5 + - NormalizeImage: + scale: 0.00392157 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - RandomErasing: + EPSILON: 0.5 + sampler: + name: PKSampler + batch_size: 128 + sample_per_id: 2 + drop_last: True + + loader: + num_workers: 6 + use_shared_memory: True + Eval: + Query: + dataset: + name: ImageNetDataset + image_root: "dataset/LogoDet-3K-crop/val/" + cls_label_path: "dataset/LogoDet-3K-crop/query_list.txt" + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + size: 224 + - NormalizeImage: + scale: 0.00392157 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 128 + drop_last: False + shuffle: False + loader: + num_workers: 8 + use_shared_memory: True + + Gallery: + dataset: + name: ImageNetDataset + image_root: "dataset/LogoDet-3K-crop/train/" + cls_label_path: "dataset/LogoDet-3K-crop/train_list.txt" + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + size: 224 + - NormalizeImage: + scale: 0.00392157 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 128 + drop_last: False + shuffle: False + loader: + num_workers: 8 + use_shared_memory: True + +Metric: + Eval: + - Recallk: + topk: [1, 5] + - mAP: {} + diff --git a/src/PaddleClas/ppcls/configs/Products/MV3_Large_1x_Aliproduct_DLBHC.yaml b/src/PaddleClas/ppcls/configs/Products/MV3_Large_1x_Aliproduct_DLBHC.yaml new file mode 100644 index 0000000..ad77ea9 --- /dev/null +++ b/src/PaddleClas/ppcls/configs/Products/MV3_Large_1x_Aliproduct_DLBHC.yaml @@ -0,0 +1,149 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output_dlbhc/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 100 + #eval_mode: "retrieval" + print_batch_step: 10 + use_visualdl: False + + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + + #feature postprocess + feature_normalize: False + feature_binarize: "round" + +# model architecture +Arch: + name: "RecModel" + Backbone: + name: "MobileNetV3_large_x1_0" + pretrained: True + class_num: 512 + Head: + name: "FC" + class_num: 50030 + embedding_size: 512 + + infer_output_key: "features" + infer_add_softmax: "false" + +# loss function config for train/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + epsilon: 0.1 + Eval: + - CELoss: + weight: 1.0 + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Piecewise + learning_rate: 0.1 + decay_epochs: [50, 150] + values: [0.1, 0.01, 0.001] + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/Aliproduct/ + cls_label_path: ./dataset/Aliproduct/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + size: 256 + - RandCropImage: + size: 227 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.4914, 0.4822, 0.4465] + std: [0.2023, 0.1994, 0.2010] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 128 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/Aliproduct/ + cls_label_path: ./dataset/Aliproduct/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + size: 227 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.4914, 0.4822, 0.4465] + std: [0.2023, 0.1994, 0.2010] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 256 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 227 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.4914, 0.4822, 0.4465] + std: [0.2023, 0.1994, 0.2010] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + - TopkAcc: + topk: [1, 5] + Eval: + - TopkAcc: + topk: [1, 5] + +# switch to metric below when eval by retrieval +# - Recallk: +# topk: [1] +# - mAP: +# - Precisionk: +# topk: [1] + diff --git a/src/PaddleClas/ppcls/configs/Products/ResNet50_vd_Aliproduct.yaml b/src/PaddleClas/ppcls/configs/Products/ResNet50_vd_Aliproduct.yaml new file mode 100644 index 0000000..70f8056 --- /dev/null +++ b/src/PaddleClas/ppcls/configs/Products/ResNet50_vd_Aliproduct.yaml @@ -0,0 +1,119 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 10 + epochs: 120 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + eval_mode: classification + +# model architecture +Arch: + name: RecModel + infer_output_key: features + infer_add_softmax: False + + Backbone: + name: ResNet50_vd + pretrained: True + BackboneStopLayer: + name: "flatten" + Neck: + name: FC + embedding_size: 2048 + class_num: 512 + Head: + name: FC + embedding_size: 512 + class_num: 50030 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + Eval: + - CELoss: + weight: 1.0 + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Cosine + learning_rate: 0.05 + regularizer: + name: 'L2' + coeff: 0.00007 + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/Aliproduct/ + cls_label_path: ./dataset/Aliproduct/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 0.00392157 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/Aliproduct/ + cls_label_path: ./dataset/Aliproduct/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 0.00392157 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True +Metric: + Train: + - TopkAcc: + topk: [1, 5] + Eval: + - TopkAcc: + topk: [1, 5] + diff --git a/src/PaddleClas/ppcls/configs/Products/ResNet50_vd_Inshop.yaml b/src/PaddleClas/ppcls/configs/Products/ResNet50_vd_Inshop.yaml new file mode 100644 index 0000000..18ddfa3 --- /dev/null +++ b/src/PaddleClas/ppcls/configs/Products/ResNet50_vd_Inshop.yaml @@ -0,0 +1,157 @@ +# global configs +Global: + checkpoints: null + pretrained_model: "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/rec/models/pretrain/product_ResNet50_vd_Aliproduct_v1.0_pretrained.pdparams" + output_dir: ./output/ + device: gpu + save_interval: 10 + eval_during_train: True + eval_interval: 10 + epochs: 120 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + eval_mode: retrieval + +# model architecture +Arch: + name: RecModel + infer_output_key: features + infer_add_softmax: False + + Backbone: + name: ResNet50_vd + pretrained: False + BackboneStopLayer: + name: "flatten" + Neck: + name: FC + embedding_size: 2048 + class_num: 512 + Head: + name: ArcMargin + embedding_size: 512 + class_num: 3997 + margin: 0.15 + scale: 30 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + - TripletLossV2: + weight: 1.0 + margin: 0.5 + Eval: + - CELoss: + weight: 1.0 + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: MultiStepDecay + learning_rate: 0.04 + milestones: [30, 60, 70, 80, 90, 100] + gamma: 0.5 + verbose: False + last_epoch: -1 + regularizer: + name: 'L2' + coeff: 0.0005 + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/Inshop/ + cls_label_path: ./dataset/Inshop/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 0.00392157 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - RandomErasing: + EPSILON: 0.5 + sl: 0.02 + sh: 0.4 + r1: 0.3 + mean: [0., 0., 0.] + sampler: + name: PKSampler + batch_size: 64 + sample_per_id: 2 + drop_last: True + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + Query: + dataset: + name: ImageNetDataset + image_root: ./dataset/Inshop/ + cls_label_path: ./dataset/Inshop/query_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + size: 224 + - NormalizeImage: + scale: 0.00392157 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + + Gallery: + dataset: + name: ImageNetDataset + image_root: ./dataset/Inshop/ + cls_label_path: ./dataset/Inshop/gallery_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Metric: + Eval: + - Recallk: + topk: [1, 5] + diff --git a/src/PaddleClas/ppcls/configs/Products/ResNet50_vd_SOP.yaml b/src/PaddleClas/ppcls/configs/Products/ResNet50_vd_SOP.yaml new file mode 100644 index 0000000..7728a66 --- /dev/null +++ b/src/PaddleClas/ppcls/configs/Products/ResNet50_vd_SOP.yaml @@ -0,0 +1,156 @@ +# global configs +Global: + checkpoints: null + pretrained_model: "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/rec/models/pretrain/product_ResNet50_vd_Aliproduct_v1.0_pretrained.pdparams" + output_dir: ./output/ + device: gpu + save_interval: 10 + eval_during_train: True + eval_interval: 10 + epochs: 120 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + eval_mode: retrieval + +# model architecture +Arch: + name: RecModel + Backbone: + name: ResNet50_vd + pretrained: False + BackboneStopLayer: + name: "flatten" + Neck: + name: FC + embedding_size: 2048 + class_num: 512 + Head: + name: ArcMargin + embedding_size: 512 + class_num: 11319 + margin: 0.15 + scale: 30 + infer_output_key: features + infer_add_softmax: False + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + - TripletLossV2: + weight: 1.0 + margin: 0.5 + Eval: + - CELoss: + weight: 1.0 + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: MultiStepDecay + learning_rate: 0.01 + milestones: [30, 60, 70, 80, 90, 100] + gamma: 0.5 + verbose: False + last_epoch: -1 + regularizer: + name: 'L2' + coeff: 0.0005 + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: VeriWild + image_root: ./dataset/SOP/ + cls_label_path: ./dataset/SOP/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 0.00392157 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - RandomErasing: + EPSILON: 0.5 + sl: 0.02 + sh: 0.4 + r1: 0.3 + mean: [0., 0., 0.] + + sampler: + name: PKSampler + batch_size: 64 + sample_per_id: 2 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + Eval: + Query: + dataset: + name: VeriWild + image_root: ./dataset/SOP/ + cls_label_path: ./dataset/SOP/test_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + size: 224 + - NormalizeImage: + scale: 0.00392157 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 32 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + + Gallery: + dataset: + name: VeriWild + image_root: ./dataset/SOP/ + cls_label_path: ./dataset/SOP/test_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + size: 224 + - NormalizeImage: + scale: 0.00392157 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 32 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Metric: + Eval: + - Recallk: + topk: [1, 5] + - mAP: {} diff --git a/src/PaddleClas/ppcls/configs/Vehicle/PPLCNet_2.5x_ReID.yaml b/src/PaddleClas/ppcls/configs/Vehicle/PPLCNet_2.5x_ReID.yaml new file mode 100644 index 0000000..eb9f145 --- /dev/null +++ b/src/PaddleClas/ppcls/configs/Vehicle/PPLCNet_2.5x_ReID.yaml @@ -0,0 +1,158 @@ +# global configs +# pretrained_model: https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/rec/models/pretrain/vehicle_reid_PPLCNet2.5x_VERIWild_v1.0_pretrained.pdparams +# VeriWild v1 small: recall1: 0.93736, recall5: 0.98427, mAP: 0.82125 +Global: + checkpoints: null + pretrained_model: null + output_dir: "./output_reid/" + device: "gpu" + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 160 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: "./inference" + eval_mode: "retrieval" + +# model architecture +Arch: + name: "RecModel" + infer_output_key: "features" + infer_add_softmax: False + Backbone: + name: "PPLCNet_x2_5" + pretrained: True + use_ssld: True + BackboneStopLayer: + name: "flatten" + Neck: + name: "FC" + embedding_size: 1280 + class_num: 512 + Head: + name: "ArcMargin" + embedding_size: 512 + class_num: 30671 + margin: 0.15 + scale: 32 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + - SupConLoss: + weight: 1.0 + views: 2 + Eval: + - CELoss: + weight: 1.0 + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Cosine + learning_rate: 0.04 + regularizer: + name: 'L2' + coeff: 0.0005 + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: "VeriWild" + image_root: "./dataset/VeRI-Wild/images/" + cls_label_path: "./dataset/VeRI-Wild/train_test_split/train_list_start0.txt" + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - AugMix: + prob: 0.5 + - NormalizeImage: + scale: 0.00392157 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - RandomErasing: + EPSILON: 0.5 + sl: 0.02 + sh: 0.4 + r1: 0.3 + mean: [0., 0., 0.] + + sampler: + name: PKSampler + batch_size: 128 + sample_per_id: 2 + drop_last: True + shuffle: True + loader: + num_workers: 6 + use_shared_memory: True + Eval: + Query: + dataset: + name: "VeriWild" + image_root: "./dataset/VeRI-Wild/images" + cls_label_path: "./dataset/VeRI-Wild/train_test_split/test_3000_id_query.txt" + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + size: 224 + - NormalizeImage: + scale: 0.00392157 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 6 + use_shared_memory: True + + Gallery: + dataset: + name: "VeriWild" + image_root: "./dataset/VeRI-Wild/images" + cls_label_path: "./dataset/VeRI-Wild/train_test_split/test_3000_id.txt" + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + size: 224 + - NormalizeImage: + scale: 0.00392157 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 6 + use_shared_memory: True + +Metric: + Eval: + - Recallk: + topk: [1, 5] + - mAP: {} + diff --git a/src/PaddleClas/ppcls/configs/Vehicle/ResNet50.yaml b/src/PaddleClas/ppcls/configs/Vehicle/ResNet50.yaml new file mode 100644 index 0000000..6b61724 --- /dev/null +++ b/src/PaddleClas/ppcls/configs/Vehicle/ResNet50.yaml @@ -0,0 +1,130 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: "./output/" + device: "gpu" + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 160 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: "./inference" + +# model architecture +Arch: + name: "RecModel" + infer_output_key: "features" + infer_add_softmax: False + Backbone: + name: "ResNet50_last_stage_stride1" + pretrained: True + BackboneStopLayer: + name: "avg_pool" + Neck: + name: "VehicleNeck" + in_channels: 2048 + out_channels: 512 + Head: + name: "ArcMargin" + embedding_size: 512 + class_num: 431 + margin: 0.15 + scale: 32 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + - SupConLoss: + weight: 1.0 + views: 2 + Eval: + - CELoss: + weight: 1.0 + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Cosine + learning_rate: 0.01 + regularizer: + name: 'L2' + coeff: 0.0005 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: "CompCars" + image_root: "./dataset/CompCars/image/" + label_root: "./dataset/CompCars/label/" + bbox_crop: True + cls_label_path: "./dataset/CompCars/train_test_split/classification/train_label.txt" + transform_ops: + - ResizeImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - AugMix: + prob: 0.5 + - NormalizeImage: + scale: 0.00392157 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - RandomErasing: + EPSILON: 0.5 + sl: 0.02 + sh: 0.4 + r1: 0.3 + mean: [0., 0., 0.] + + sampler: + name: PKSampler + batch_size: 128 + sample_per_id: 2 + drop_last: False + shuffle: True + loader: + num_workers: 8 + use_shared_memory: True + + Eval: + dataset: + name: "CompCars" + image_root: "./dataset/CompCars/image/" + label_root: "./dataset/CompCars/label/" + cls_label_path: "./dataset/CompCars/train_test_split/classification/test_label.txt" + bbox_crop: True + transform_ops: + - ResizeImage: + size: 224 + - NormalizeImage: + scale: 0.00392157 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 128 + drop_last: False + shuffle: False + loader: + num_workers: 8 + use_shared_memory: True + +Metric: + Train: + - TopkAcc: + topk: [1, 5] + Eval: + - TopkAcc: + topk: [1, 5] + diff --git a/src/PaddleClas/ppcls/configs/Vehicle/ResNet50_ReID.yaml b/src/PaddleClas/ppcls/configs/Vehicle/ResNet50_ReID.yaml new file mode 100644 index 0000000..c13d59a --- /dev/null +++ b/src/PaddleClas/ppcls/configs/Vehicle/ResNet50_ReID.yaml @@ -0,0 +1,155 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: "./output/" + device: "gpu" + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 160 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: "./inference" + eval_mode: "retrieval" + +# model architecture +Arch: + name: "RecModel" + infer_output_key: "features" + infer_add_softmax: False + Backbone: + name: "ResNet50_last_stage_stride1" + pretrained: True + BackboneStopLayer: + name: "avg_pool" + Neck: + name: "VehicleNeck" + in_channels: 2048 + out_channels: 512 + Head: + name: "ArcMargin" + embedding_size: 512 + class_num: 30671 + margin: 0.15 + scale: 32 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + - SupConLoss: + weight: 1.0 + views: 2 + Eval: + - CELoss: + weight: 1.0 + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Cosine + learning_rate: 0.04 + regularizer: + name: 'L2' + coeff: 0.0005 + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: "VeriWild" + image_root: "./dataset/VeRI-Wild/images/" + cls_label_path: "./dataset/VeRI-Wild/train_test_split/train_list_start0.txt" + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - AugMix: + prob: 0.5 + - NormalizeImage: + scale: 0.00392157 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - RandomErasing: + EPSILON: 0.5 + sl: 0.02 + sh: 0.4 + r1: 0.3 + mean: [0., 0., 0.] + + sampler: + name: PKSampler + batch_size: 128 + sample_per_id: 2 + drop_last: True + shuffle: True + loader: + num_workers: 6 + use_shared_memory: True + Eval: + Query: + dataset: + name: "VeriWild" + image_root: "./dataset/VeRI-Wild/images" + cls_label_path: "./dataset/VeRI-Wild/train_test_split/test_3000_id_query.txt" + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + size: 224 + - NormalizeImage: + scale: 0.00392157 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 6 + use_shared_memory: True + + Gallery: + dataset: + name: "VeriWild" + image_root: "./dataset/VeRI-Wild/images" + cls_label_path: "./dataset/VeRI-Wild/train_test_split/test_3000_id.txt" + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + size: 224 + - NormalizeImage: + scale: 0.00392157 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 6 + use_shared_memory: True + +Metric: + Eval: + - Recallk: + topk: [1, 5] + - mAP: {} + diff --git a/src/PaddleClas/ppcls/configs/quick_start/MobileNetV1_retrieval.yaml b/src/PaddleClas/ppcls/configs/quick_start/MobileNetV1_retrieval.yaml new file mode 100644 index 0000000..f088e1c --- /dev/null +++ b/src/PaddleClas/ppcls/configs/quick_start/MobileNetV1_retrieval.yaml @@ -0,0 +1,158 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 5 + eval_during_train: True + eval_interval: 1 + epochs: 50 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + eval_mode: retrieval + +# model architecture +Arch: + name: RecModel + infer_output_key: features + infer_add_softmax: False + + Backbone: + name: MobileNetV1 + pretrained: False + BackboneStopLayer: + name: "flatten" + Neck: + name: FC + embedding_size: 1024 + class_num: 512 + Head: + name: ArcMargin + embedding_size: 512 + class_num: 101 + margin: 0.15 + scale: 30 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + - TripletLossV2: + weight: 1.0 + margin: 0.5 + Eval: + - CELoss: + weight: 1.0 + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: MultiStepDecay + learning_rate: 0.01 + milestones: [20, 30, 40] + gamma: 0.5 + verbose: False + last_epoch: -1 + regularizer: + name: 'L2' + coeff: 0.0005 + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: VeriWild + image_root: ./dataset/CUB_200_2011/ + cls_label_path: ./dataset/CUB_200_2011/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 0.00392157 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - RandomErasing: + EPSILON: 0.5 + sl: 0.02 + sh: 0.4 + r1: 0.3 + mean: [0., 0., 0.] + sampler: + name: DistributedRandomIdentitySampler + batch_size: 64 + num_instances: 2 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + Query: + dataset: + name: VeriWild + image_root: ./dataset/CUB_200_2011/ + cls_label_path: ./dataset/CUB_200_2011/test_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + size: 224 + - NormalizeImage: + scale: 0.00392157 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + + Gallery: + dataset: + name: VeriWild + image_root: ./dataset/CUB_200_2011/ + cls_label_path: ./dataset/CUB_200_2011/test_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Metric: + Eval: + - Recallk: + topk: [1, 5] + - mAP: {} + diff --git a/src/PaddleClas/ppcls/configs/quick_start/MobileNetV3_large_x1_0.yaml b/src/PaddleClas/ppcls/configs/quick_start/MobileNetV3_large_x1_0.yaml new file mode 100644 index 0000000..d87dc09 --- /dev/null +++ b/src/PaddleClas/ppcls/configs/quick_start/MobileNetV3_large_x1_0.yaml @@ -0,0 +1,130 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 20 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + +# model architecture +Arch: + name: MobileNetV3_large_x1_0 + class_num: 102 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Cosine + learning_rate: 0.00375 + warmup_epoch: 5 + last_epoch: -1 + regularizer: + name: 'L2' + coeff: 0.000001 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/flowers102/ + cls_label_path: ./dataset/flowers102/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + + sampler: + name: DistributedBatchSampler + batch_size: 32 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/flowers102/ + cls_label_path: ./dataset/flowers102/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ./dataset/flowers102/flowers102_label_list.txt + +Metric: + Train: + - TopkAcc: + topk: [1, 5] + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/quick_start/ResNet50_vd.yaml b/src/PaddleClas/ppcls/configs/quick_start/ResNet50_vd.yaml new file mode 100644 index 0000000..90b2c88 --- /dev/null +++ b/src/PaddleClas/ppcls/configs/quick_start/ResNet50_vd.yaml @@ -0,0 +1,129 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 20 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + +# model architecture +Arch: + name: ResNet50_vd + class_num: 102 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Cosine + learning_rate: 0.0125 + warmup_epoch: 5 + regularizer: + name: 'L2' + coeff: 0.00001 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/flowers102/ + cls_label_path: ./dataset/flowers102/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + + sampler: + name: DistributedBatchSampler + batch_size: 32 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/flowers102/ + cls_label_path: ./dataset/flowers102/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ./dataset/flowers102/flowers102_label_list.txt + +Metric: + Train: + - TopkAcc: + topk: [1, 5] + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/quick_start/kunlun/HRNet_W18_C_finetune_kunlun.yaml b/src/PaddleClas/ppcls/configs/quick_start/kunlun/HRNet_W18_C_finetune_kunlun.yaml new file mode 100644 index 0000000..6a461cc --- /dev/null +++ b/src/PaddleClas/ppcls/configs/quick_start/kunlun/HRNet_W18_C_finetune_kunlun.yaml @@ -0,0 +1,68 @@ +mode: 'train' +ARCHITECTURE: + name: 'HRNet_W18_C' +pretrained_model: "./pretrained/HRNet_W18_C_pretrained" +model_save_dir: "./output/" +classes_num: 102 +total_images: 1020 +save_interval: 1 +validate: True +valid_interval: 1 +epochs: 10 +topk: 5 +image_shape: [3, 224, 224] + +LEARNING_RATE: + function: 'Cosine' + params: + lr: 0.00375 + +OPTIMIZER: + function: 'Momentum' + params: + momentum: 0.9 + regularizer: + function: 'L2' + factor: 0.000001 + +TRAIN: + batch_size: 20 + num_workers: 0 + file_list: "./dataset/flowers102/train_list.txt" + data_dir: "./dataset/flowers102/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1./255. + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + +VALID: + batch_size: 20 + num_workers: 0 + file_list: "./dataset/flowers102/val_list.txt" + data_dir: "./dataset/flowers102/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: diff --git a/src/PaddleClas/ppcls/configs/quick_start/kunlun/ResNet50_vd_finetune_kunlun.yaml b/src/PaddleClas/ppcls/configs/quick_start/kunlun/ResNet50_vd_finetune_kunlun.yaml new file mode 100644 index 0000000..7fad5ee --- /dev/null +++ b/src/PaddleClas/ppcls/configs/quick_start/kunlun/ResNet50_vd_finetune_kunlun.yaml @@ -0,0 +1,69 @@ +mode: 'train' +ARCHITECTURE: + name: 'ResNet50_vd' +pretrained_model: "./pretrained/ResNet50_vd_pretrained" +load_static_weights: true +model_save_dir: "./output/" +classes_num: 102 +total_images: 1020 +save_interval: 1 +validate: True +valid_interval: 1 +epochs: 20 +topk: 5 +image_shape: [3, 224, 224] + +LEARNING_RATE: + function: 'Cosine' + params: + lr: 0.00375 + +OPTIMIZER: + function: 'Momentum' + params: + momentum: 0.9 + regularizer: + function: 'L2' + factor: 0.000001 + +TRAIN: + batch_size: 20 + num_workers: 1 + file_list: "./dataset/flowers102/train_list.txt" + data_dir: "./dataset/flowers102/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1./255. + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + +VALID: + batch_size: 20 + num_workers: 1 + file_list: "./dataset/flowers102/val_list.txt" + data_dir: "./dataset/flowers102/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: diff --git a/src/PaddleClas/ppcls/configs/quick_start/kunlun/VGG16_finetune_kunlun.yaml b/src/PaddleClas/ppcls/configs/quick_start/kunlun/VGG16_finetune_kunlun.yaml new file mode 100644 index 0000000..389a5f3 --- /dev/null +++ b/src/PaddleClas/ppcls/configs/quick_start/kunlun/VGG16_finetune_kunlun.yaml @@ -0,0 +1,70 @@ +mode: 'train' +ARCHITECTURE: + name: 'VGG16' + params: + stop_grad_layers: 5 +pretrained_model: "./pretrained/VGG16_pretrained" +model_save_dir: "./output/" +classes_num: 102 +total_images: 1020 +save_interval: 1 +validate: True +valid_interval: 1 +epochs: 20 +topk: 5 +image_shape: [3, 224, 224] + +LEARNING_RATE: + function: 'Cosine' + params: + lr: 0.0005 + +OPTIMIZER: + function: 'Momentum' + params: + momentum: 0.9 + regularizer: + function: 'L2' + factor: 0.00001 + +TRAIN: + batch_size: 20 + num_workers: 0 + file_list: "./dataset/flowers102/train_list.txt" + data_dir: "./dataset/flowers102/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1./255. + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + +VALID: + batch_size: 20 + num_workers: 0 + file_list: "./dataset/flowers102/val_list.txt" + data_dir: "./dataset/flowers102/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: diff --git a/src/PaddleClas/ppcls/configs/quick_start/kunlun/VGG19_finetune_kunlun.yaml b/src/PaddleClas/ppcls/configs/quick_start/kunlun/VGG19_finetune_kunlun.yaml new file mode 100644 index 0000000..6ba38b9 --- /dev/null +++ b/src/PaddleClas/ppcls/configs/quick_start/kunlun/VGG19_finetune_kunlun.yaml @@ -0,0 +1,70 @@ +mode: 'train' +ARCHITECTURE: + name: 'VGG19' + params: + stop_grad_layers: 5 +pretrained_model: "./pretrained/VGG19_pretrained" +model_save_dir: "./output/" +classes_num: 102 +total_images: 1020 +save_interval: 1 +validate: True +valid_interval: 1 +epochs: 20 +topk: 5 +image_shape: [3, 224, 224] + +LEARNING_RATE: + function: 'Cosine' + params: + lr: 0.0005 + +OPTIMIZER: + function: 'Momentum' + params: + momentum: 0.9 + regularizer: + function: 'L2' + factor: 0.00001 + +TRAIN: + batch_size: 20 + num_workers: 0 + file_list: "./dataset/flowers102/train_list.txt" + data_dir: "./dataset/flowers102/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1./255. + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + +VALID: + batch_size: 20 + num_workers: 0 + file_list: "./dataset/flowers102/val_list.txt" + data_dir: "./dataset/flowers102/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: diff --git a/src/PaddleClas/ppcls/configs/quick_start/new_user/ShuffleNetV2_x0_25.yaml b/src/PaddleClas/ppcls/configs/quick_start/new_user/ShuffleNetV2_x0_25.yaml new file mode 100644 index 0000000..1246366 --- /dev/null +++ b/src/PaddleClas/ppcls/configs/quick_start/new_user/ShuffleNetV2_x0_25.yaml @@ -0,0 +1,129 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: cpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 20 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + +# model architecture +Arch: + name: ShuffleNetV2_x0_25 + class_num: 102 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Cosine + learning_rate: 0.0125 + warmup_epoch: 5 + regularizer: + name: 'L2' + coeff: 0.00001 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/flowers102/ + cls_label_path: ./dataset/flowers102/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + + sampler: + name: DistributedBatchSampler + batch_size: 256 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/flowers102/ + cls_label_path: ./dataset/flowers102/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ./dataset/flowers102/flowers102_label_list.txt + +Metric: + Train: + - TopkAcc: + topk: [1, 5] + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/quick_start/professional/MobileNetV1_multilabel.yaml b/src/PaddleClas/ppcls/configs/quick_start/professional/MobileNetV1_multilabel.yaml new file mode 100644 index 0000000..6838710 --- /dev/null +++ b/src/PaddleClas/ppcls/configs/quick_start/professional/MobileNetV1_multilabel.yaml @@ -0,0 +1,129 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 10 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + use_multilabel: True +# model architecture +Arch: + name: MobileNetV1 + class_num: 33 + pretrained: True + +# loss function config for traing/eval process +Loss: + Train: + - MultiLabelLoss: + weight: 1.0 + Eval: + - MultiLabelLoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Cosine + learning_rate: 0.1 + regularizer: + name: 'L2' + coeff: 0.00004 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: MultiLabelDataset + image_root: ./dataset/NUS-WIDE-SCENE/NUS-SCENE-dataset/images/ + cls_label_path: ./dataset/NUS-WIDE-SCENE/NUS-SCENE-dataset/multilabel_train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: MultiLabelDataset + image_root: ./dataset/NUS-WIDE-SCENE/NUS-SCENE-dataset/images/ + cls_label_path: ./dataset/NUS-WIDE-SCENE/NUS-SCENE-dataset/multilabel_test_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 256 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: ./deploy/images/0517_2715693311.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: MultiLabelTopk + topk: 5 + class_id_map_file: None + +Metric: + Train: + - HammingDistance: + - AccuracyScore: + Eval: + - HammingDistance: + - AccuracyScore: diff --git a/src/PaddleClas/ppcls/configs/quick_start/professional/MobileNetV3_large_x1_0_CIFAR100_finetune.yaml b/src/PaddleClas/ppcls/configs/quick_start/professional/MobileNetV3_large_x1_0_CIFAR100_finetune.yaml new file mode 100644 index 0000000..423a453 --- /dev/null +++ b/src/PaddleClas/ppcls/configs/quick_start/professional/MobileNetV3_large_x1_0_CIFAR100_finetune.yaml @@ -0,0 +1,127 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 100 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 32, 32] + save_inference_dir: ./inference + +# model architecture +Arch: + name: MobileNetV3_large_x1_0 + class_num: 100 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Cosine + learning_rate: 0.04 + regularizer: + name: 'L2' + coeff: 0.0001 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/CIFAR100/ + cls_label_path: ./dataset/CIFAR100/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 32 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/CIFAR100/ + cls_label_path: ./dataset/CIFAR100/test_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 36 + - CropImage: + size: 32 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 36 + - CropImage: + size: 32 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + +Metric: + Train: + - TopkAcc: + topk: [1, 5] + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/quick_start/professional/R50_vd_distill_MV3_large_x1_0_CIFAR100.yaml b/src/PaddleClas/ppcls/configs/quick_start/professional/R50_vd_distill_MV3_large_x1_0_CIFAR100.yaml new file mode 100644 index 0000000..a27068d --- /dev/null +++ b/src/PaddleClas/ppcls/configs/quick_start/professional/R50_vd_distill_MV3_large_x1_0_CIFAR100.yaml @@ -0,0 +1,151 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: "./output/" + device: "gpu" + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 100 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 32, 32] + save_inference_dir: "./inference" + +# model architecture +Arch: + name: "DistillationModel" + # if not null, its lengths should be same as models + pretrained_list: + # if not null, its lengths should be same as models + freeze_params_list: + - True + - False + models: + - Teacher: + name: ResNet50_vd + class_num: 100 + pretrained: "./pretrained/best_model" + - Student: + name: MobileNetV3_large_x1_0 + class_num: 100 + pretrained: True + + infer_model_name: "Student" + + +# loss function config for traing/eval process +Loss: + Train: + - DistillationCELoss: + weight: 1.0 + model_name_pairs: + - ["Student", "Teacher"] + Eval: + - DistillationGTCELoss: + weight: 1.0 + model_names: ["Student"] + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Cosine + learning_rate: 0.04 + warmup_epoch: 5 + regularizer: + name: 'L2' + coeff: 0.0001 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: "./dataset/CIFAR100/" + cls_label_path: "./dataset/CIFAR100/train_list.txt" + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 32 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 0.00392157 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + + sampler: + name: DistributedBatchSampler + batch_size: 512 + drop_last: False + shuffle: True + loader: + num_workers: 6 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: "./dataset/CIFAR100/" + cls_label_path: "./dataset/CIFAR100/test_list.txt" + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 36 + - CropImage: + size: 32 + - NormalizeImage: + scale: 0.00392157 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 6 + use_shared_memory: True + +Infer: + infer_imgs: "docs/images/inference_deployment/whl_demo.jpg" + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 36 + - CropImage: + size: 32 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: DistillationPostProcess + func: Topk + topk: 5 + +Metric: + Train: + - DistillationTopkAcc: + model_key: "Student" + topk: [1, 5] + Eval: + - DistillationTopkAcc: + model_key: "Student" + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/quick_start/professional/ResNet50_vd_CIFAR100.yaml b/src/PaddleClas/ppcls/configs/quick_start/professional/ResNet50_vd_CIFAR100.yaml new file mode 100644 index 0000000..ca0794f --- /dev/null +++ b/src/PaddleClas/ppcls/configs/quick_start/professional/ResNet50_vd_CIFAR100.yaml @@ -0,0 +1,127 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: cpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 100 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 32, 32] + save_inference_dir: ./inference + +# model architecture +Arch: + name: ResNet50_vd + class_num: 100 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Cosine + learning_rate: 0.04 + regularizer: + name: 'L2' + coeff: 0.0001 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/CIFAR100/ + cls_label_path: ./dataset/CIFAR100/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 32 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/CIFAR100/ + cls_label_path: ./dataset/CIFAR100/test_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 36 + - CropImage: + size: 32 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 36 + - CropImage: + size: 32 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + +Metric: + Train: + - TopkAcc: + topk: [1, 5] + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/quick_start/professional/ResNet50_vd_mixup_CIFAR100_finetune.yaml b/src/PaddleClas/ppcls/configs/quick_start/professional/ResNet50_vd_mixup_CIFAR100_finetune.yaml new file mode 100644 index 0000000..d8ff817 --- /dev/null +++ b/src/PaddleClas/ppcls/configs/quick_start/professional/ResNet50_vd_mixup_CIFAR100_finetune.yaml @@ -0,0 +1,127 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 100 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 32, 32] + save_inference_dir: ./inference + +# model architecture +Arch: + name: ResNet50_vd + class_num: 100 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Cosine + learning_rate: 0.04 + regularizer: + name: 'L2' + coeff: 0.0001 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/CIFAR100/ + cls_label_path: ./dataset/CIFAR100/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 32 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + batch_transform_ops: + - MixupOperator: + alpha: 0.2 + + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/CIFAR100/ + cls_label_path: ./dataset/CIFAR100/test_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 36 + - CropImage: + size: 32 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 36 + - CropImage: + size: 32 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + +Metric: + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/quick_start/professional/VGG19_CIFAR10_DeepHash.yaml b/src/PaddleClas/ppcls/configs/quick_start/professional/VGG19_CIFAR10_DeepHash.yaml new file mode 100644 index 0000000..9722882 --- /dev/null +++ b/src/PaddleClas/ppcls/configs/quick_start/professional/VGG19_CIFAR10_DeepHash.yaml @@ -0,0 +1,147 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + eval_mode: "retrieval" + epochs: 128 + print_batch_step: 10 + use_visualdl: False + + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + + #feature postprocess + feature_normalize: False + feature_binarize: "round" + +# model architecture +Arch: + name: "RecModel" + Backbone: + name: "VGG19Sigmoid" + pretrained: True + class_num: 48 + Head: + name: "FC" + class_num: 10 + embedding_size: 48 + + infer_output_key: "features" + infer_add_softmax: "false" + +# loss function config for train/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + epsilon: 0.1 + Eval: + - CELoss: + weight: 1.0 + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Piecewise + learning_rate: 0.01 + decay_epochs: [200] + values: [0.01, 0.001] + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/cifar10/ + cls_label_path: ./dataset/cifar10/cifar10-2/train.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + size: 256 + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.4914, 0.4822, 0.4465] + std: [0.2023, 0.1994, 0.2010] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 128 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + Query: + dataset: + name: ImageNetDataset + image_root: ./dataset/cifar10/ + cls_label_path: ./dataset/cifar10/cifar10-2/test.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.4914, 0.4822, 0.4465] + std: [0.2023, 0.1994, 0.2010] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 512 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + + Gallery: + dataset: + name: ImageNetDataset + image_root: ./dataset/cifar10/ + cls_label_path: ./dataset/cifar10/cifar10-2/database.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.4914, 0.4822, 0.4465] + std: [0.2023, 0.1994, 0.2010] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 512 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Metric: + Train: + - TopkAcc: + topk: [1, 5] + Eval: + - mAP: + - Precisionk: + topk: [1, 5] + diff --git a/src/PaddleClas/ppcls/configs/slim/GeneralRecognition_PPLCNet_x2_5_quantization.yaml b/src/PaddleClas/ppcls/configs/slim/GeneralRecognition_PPLCNet_x2_5_quantization.yaml new file mode 100644 index 0000000..7b21d0b --- /dev/null +++ b/src/PaddleClas/ppcls/configs/slim/GeneralRecognition_PPLCNet_x2_5_quantization.yaml @@ -0,0 +1,154 @@ +# global configs +Global: + checkpoints: null + pretrained_model: https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/rec/models/pretrain/general_PPLCNet_x2_5_pretrained_v1.0.pdparams + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 30 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + eval_mode: retrieval + use_dali: False + to_static: False + +# for quantizaiton or prune model +Slim: + ## for prune + quant: + name: pact + +# model architecture +Arch: + name: RecModel + infer_output_key: features + infer_add_softmax: False + + Backbone: + name: PPLCNet_x2_5 + pretrained: False + use_ssld: True + BackboneStopLayer: + name: "flatten" + Neck: + name: FC + embedding_size: 1280 + class_num: 512 + Head: + name: ArcMargin + embedding_size: 512 + class_num: 185341 + margin: 0.2 + scale: 30 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + Eval: + - CELoss: + weight: 1.0 + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Cosine + learning_rate: 0.002 + warmup_epoch: 5 + regularizer: + name: 'L2' + coeff: 0.00001 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ + cls_label_path: ./dataset/train_reg_all_data.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + + sampler: + name: DistributedBatchSampler + batch_size: 128 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + Query: + dataset: + name: VeriWild + image_root: ./dataset/Aliproduct/ + cls_label_path: ./dataset/Aliproduct/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + size: 224 + - NormalizeImage: + scale: 0.00392157 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + + Gallery: + dataset: + name: VeriWild + image_root: ./dataset/Aliproduct/ + cls_label_path: ./dataset/Aliproduct/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + size: 224 + - NormalizeImage: + scale: 0.00392157 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Metric: + Eval: + - Recallk: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/slim/MobileNetV3_large_x1_0_prune.yaml b/src/PaddleClas/ppcls/configs/slim/MobileNetV3_large_x1_0_prune.yaml new file mode 100644 index 0000000..6655c3a --- /dev/null +++ b/src/PaddleClas/ppcls/configs/slim/MobileNetV3_large_x1_0_prune.yaml @@ -0,0 +1,139 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 360 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + +# for quantization or prune model +Slim: + ## for prune + prune: + name: fpgm + pruned_ratio: 0.3 + +# model architecture +Arch: + name: MobileNetV3_large_x1_0 + class_num: 1000 + pretrained: True + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + epsilon: 0.1 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Cosine + learning_rate: 0.65 + warmup_epoch: 5 + regularizer: + name: 'L2' + coeff: 0.00002 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - AutoAugment: + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + + sampler: + name: DistributedBatchSampler + batch_size: 256 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + - TopkAcc: + topk: [1, 5] + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/slim/MobileNetV3_large_x1_0_quantization.yaml b/src/PaddleClas/ppcls/configs/slim/MobileNetV3_large_x1_0_quantization.yaml new file mode 100644 index 0000000..517c467 --- /dev/null +++ b/src/PaddleClas/ppcls/configs/slim/MobileNetV3_large_x1_0_quantization.yaml @@ -0,0 +1,138 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 60 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + +# for quantalization or prune model +Slim: + ## for quantization + quant: + name: pact + +# model architecture +Arch: + name: MobileNetV3_large_x1_0 + class_num: 1000 + pretrained: True + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + epsilon: 0.1 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Cosine + learning_rate: 0.065 + warmup_epoch: 0 + regularizer: + name: 'L2' + coeff: 0.00002 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - AutoAugment: + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + + sampler: + name: DistributedBatchSampler + batch_size: 256 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + - TopkAcc: + topk: [1, 5] + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/slim/PPLCNet_x1_0_quantization.yaml b/src/PaddleClas/ppcls/configs/slim/PPLCNet_x1_0_quantization.yaml new file mode 100644 index 0000000..40111a0 --- /dev/null +++ b/src/PaddleClas/ppcls/configs/slim/PPLCNet_x1_0_quantization.yaml @@ -0,0 +1,138 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 60 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + +# for quantalization or prune model +Slim: + ## for quantization + quant: + name: pact + +# model architecture +Arch: + name: PPLCNet_x1_0 + class_num: 1000 + pretrained: True + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + epsilon: 0.1 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Cosine + learning_rate: 0.02 + warmup_epoch: 0 + regularizer: + name: 'L2' + coeff: 0.00003 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - AutoAugment: + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + + sampler: + name: DistributedBatchSampler + batch_size: 128 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + - TopkAcc: + topk: [1, 5] + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/slim/ResNet50_vd_prune.yaml b/src/PaddleClas/ppcls/configs/slim/ResNet50_vd_prune.yaml new file mode 100644 index 0000000..7bfc537 --- /dev/null +++ b/src/PaddleClas/ppcls/configs/slim/ResNet50_vd_prune.yaml @@ -0,0 +1,138 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 200 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + +# for quantization or prune model +Slim: + ## for prune + prune: + name: fpgm + pruned_ratio: 0.3 + +# model architecture +Arch: + name: ResNet50_vd + class_num: 1000 + pretrained: True + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + epsilon: 0.1 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Cosine + learning_rate: 0.1 + regularizer: + name: 'L2' + coeff: 0.00007 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + batch_transform_ops: + - MixupOperator: + alpha: 0.2 + + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/slim/ResNet50_vd_quantization.yaml b/src/PaddleClas/ppcls/configs/slim/ResNet50_vd_quantization.yaml new file mode 100644 index 0000000..f9db410 --- /dev/null +++ b/src/PaddleClas/ppcls/configs/slim/ResNet50_vd_quantization.yaml @@ -0,0 +1,137 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 30 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + +# for quantalization or prune model +Slim: + ## for quantization + quant: + name: pact + +# model architecture +Arch: + name: ResNet50_vd + class_num: 1000 + pretrained: True + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + epsilon: 0.1 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Cosine + learning_rate: 0.01 + regularizer: + name: 'L2' + coeff: 0.00007 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + batch_transform_ops: + - MixupOperator: + alpha: 0.2 + + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/inference_deployment/whl_demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + Eval: + - TopkAcc: + topk: [1, 5] diff --git a/src/PaddleClas/ppcls/configs/slim/ResNet50_vehicle_cls_prune.yaml b/src/PaddleClas/ppcls/configs/slim/ResNet50_vehicle_cls_prune.yaml new file mode 100644 index 0000000..1f6fea8 --- /dev/null +++ b/src/PaddleClas/ppcls/configs/slim/ResNet50_vehicle_cls_prune.yaml @@ -0,0 +1,135 @@ +# global configs +Global: + checkpoints: null + pretrained_model: "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/rec/models/pretrain/vehicle_cls_ResNet50_CompCars_v1.2_pretrained.pdparams" + output_dir: "./output_vehicle_cls_prune/" + device: "gpu" + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 160 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: "./inference" + +Slim: + prune: + name: fpgm + pruned_ratio: 0.3 + +# model architecture +Arch: + name: "RecModel" + infer_output_key: "features" + infer_add_softmax: False + Backbone: + name: "ResNet50_last_stage_stride1" + pretrained: True + BackboneStopLayer: + name: "avg_pool" + Neck: + name: "VehicleNeck" + in_channels: 2048 + out_channels: 512 + Head: + name: "ArcMargin" + embedding_size: 512 + class_num: 431 + margin: 0.15 + scale: 32 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + - SupConLoss: + weight: 1.0 + views: 2 + Eval: + - CELoss: + weight: 1.0 + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Cosine + learning_rate: 0.01 + regularizer: + name: 'L2' + coeff: 0.0005 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: "CompCars" + image_root: "./dataset/CompCars/image/" + label_root: "./dataset/CompCars/label/" + bbox_crop: True + cls_label_path: "./dataset/CompCars/train_test_split/classification/train_label.txt" + transform_ops: + - ResizeImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - AugMix: + prob: 0.5 + - NormalizeImage: + scale: 0.00392157 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - RandomErasing: + EPSILON: 0.5 + sl: 0.02 + sh: 0.4 + r1: 0.3 + mean: [0., 0., 0.] + + sampler: + name: PKSampler + batch_size: 128 + sample_per_id: 2 + drop_last: False + shuffle: True + loader: + num_workers: 8 + use_shared_memory: True + + Eval: + dataset: + name: "CompCars" + image_root: "./dataset/CompCars/image/" + label_root: "./dataset/CompCars/label/" + cls_label_path: "./dataset/CompCars/train_test_split/classification/test_label.txt" + bbox_crop: True + transform_ops: + - ResizeImage: + size: 224 + - NormalizeImage: + scale: 0.00392157 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 128 + drop_last: False + shuffle: False + loader: + num_workers: 8 + use_shared_memory: True + +Metric: + Train: + - TopkAcc: + topk: [1, 5] + Eval: + - TopkAcc: + topk: [1, 5] + diff --git a/src/PaddleClas/ppcls/configs/slim/ResNet50_vehicle_cls_quantization.yaml b/src/PaddleClas/ppcls/configs/slim/ResNet50_vehicle_cls_quantization.yaml new file mode 100644 index 0000000..026b865 --- /dev/null +++ b/src/PaddleClas/ppcls/configs/slim/ResNet50_vehicle_cls_quantization.yaml @@ -0,0 +1,134 @@ +# global configs +Global: + checkpoints: null + pretrained_model: "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/rec/models/pretrain/vehicle_cls_ResNet50_CompCars_v1.2_pretrained.pdparams" + output_dir: "./output_vehicle_cls_pact/" + device: "gpu" + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 80 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: "./inference" + +Slim: + quant: + name: pact + +# model architecture +Arch: + name: "RecModel" + infer_output_key: "features" + infer_add_softmax: False + Backbone: + name: "ResNet50_last_stage_stride1" + pretrained: True + BackboneStopLayer: + name: "avg_pool" + Neck: + name: "VehicleNeck" + in_channels: 2048 + out_channels: 512 + Head: + name: "ArcMargin" + embedding_size: 512 + class_num: 431 + margin: 0.15 + scale: 32 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + - SupConLoss: + weight: 1.0 + views: 2 + Eval: + - CELoss: + weight: 1.0 + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Cosine + learning_rate: 0.001 + regularizer: + name: 'L2' + coeff: 0.0005 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: "CompCars" + image_root: "./dataset/CompCars/image/" + label_root: "./dataset/CompCars/label/" + bbox_crop: True + cls_label_path: "./dataset/CompCars/train_test_split/classification/train_label.txt" + transform_ops: + - ResizeImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - AugMix: + prob: 0.5 + - NormalizeImage: + scale: 0.00392157 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - RandomErasing: + EPSILON: 0.5 + sl: 0.02 + sh: 0.4 + r1: 0.3 + mean: [0., 0., 0.] + + sampler: + name: PKSampler + batch_size: 64 + sample_per_id: 2 + drop_last: False + shuffle: True + loader: + num_workers: 8 + use_shared_memory: True + + Eval: + dataset: + name: "CompCars" + image_root: "./dataset/CompCars/image/" + label_root: "./dataset/CompCars/label/" + cls_label_path: "./dataset/CompCars/train_test_split/classification/test_label.txt" + bbox_crop: True + transform_ops: + - ResizeImage: + size: 224 + - NormalizeImage: + scale: 0.00392157 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 128 + drop_last: False + shuffle: False + loader: + num_workers: 8 + use_shared_memory: True + +Metric: + Train: + - TopkAcc: + topk: [1, 5] + Eval: + - TopkAcc: + topk: [1, 5] + diff --git a/src/PaddleClas/ppcls/configs/slim/ResNet50_vehicle_reid_prune.yaml b/src/PaddleClas/ppcls/configs/slim/ResNet50_vehicle_reid_prune.yaml new file mode 100644 index 0000000..63b87f1 --- /dev/null +++ b/src/PaddleClas/ppcls/configs/slim/ResNet50_vehicle_reid_prune.yaml @@ -0,0 +1,162 @@ +# global configs +Global: + checkpoints: null + pretrained_model: "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/rec/models/pretrain/vehicle_reid_ResNet50_VERIWild_v1.1_pretrained.pdparams" + output_dir: "./output_vehicle_reid_prune/" + device: "gpu" + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 160 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: "./inference" + eval_mode: "retrieval" + +# for quantizaiton or prune model +Slim: + ## for prune + prune: + name: fpgm + pruned_ratio: 0.3 + +# model architecture +Arch: + name: "RecModel" + infer_output_key: "features" + infer_add_softmax: False + Backbone: + name: "ResNet50_last_stage_stride1" + pretrained: True + BackboneStopLayer: + name: "avg_pool" + Neck: + name: "VehicleNeck" + in_channels: 2048 + out_channels: 512 + Head: + name: "ArcMargin" + embedding_size: 512 + class_num: 30671 + margin: 0.15 + scale: 32 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + - SupConLoss: + weight: 1.0 + views: 2 + Eval: + - CELoss: + weight: 1.0 + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Cosine + learning_rate: 0.01 + regularizer: + name: 'L2' + coeff: 0.0005 + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: "VeriWild" + image_root: "./dataset/VeRI-Wild/images/" + cls_label_path: "./dataset/VeRI-Wild/train_test_split/train_list_start0.txt" + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - AugMix: + prob: 0.5 + - NormalizeImage: + scale: 0.00392157 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - RandomErasing: + EPSILON: 0.5 + sl: 0.02 + sh: 0.4 + r1: 0.3 + mean: [0., 0., 0.] + + sampler: + name: PKSampler + batch_size: 128 + sample_per_id: 2 + drop_last: False + shuffle: True + loader: + num_workers: 6 + use_shared_memory: True + Eval: + Query: + dataset: + name: "VeriWild" + image_root: "./dataset/VeRI-Wild/images" + cls_label_path: "./dataset/VeRI-Wild/train_test_split/test_3000_id_query.txt" + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + size: 224 + - NormalizeImage: + scale: 0.00392157 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 6 + use_shared_memory: True + + Gallery: + dataset: + name: "VeriWild" + image_root: "./dataset/VeRI-Wild/images" + cls_label_path: "./dataset/VeRI-Wild/train_test_split/test_3000_id.txt" + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + size: 224 + - NormalizeImage: + scale: 0.00392157 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 6 + use_shared_memory: True + +Metric: + Eval: + - Recallk: + topk: [1, 5] + - mAP: {} + diff --git a/src/PaddleClas/ppcls/configs/slim/ResNet50_vehicle_reid_quantization.yaml b/src/PaddleClas/ppcls/configs/slim/ResNet50_vehicle_reid_quantization.yaml new file mode 100644 index 0000000..cca9915 --- /dev/null +++ b/src/PaddleClas/ppcls/configs/slim/ResNet50_vehicle_reid_quantization.yaml @@ -0,0 +1,161 @@ +# global configs +Global: + checkpoints: null + pretrained_model: "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/rec/models/pretrain/vehicle_reid_ResNet50_VERIWild_v1.1_pretrained.pdparams" + output_dir: "./output_vehicle_reid_pact/" + device: "gpu" + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 40 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: "./inference" + eval_mode: "retrieval" + +# for quantizaiton or prune model +Slim: + ## for prune + quant: + name: pact + +# model architecture +Arch: + name: "RecModel" + infer_output_key: "features" + infer_add_softmax: False + Backbone: + name: "ResNet50_last_stage_stride1" + pretrained: True + BackboneStopLayer: + name: "avg_pool" + Neck: + name: "VehicleNeck" + in_channels: 2048 + out_channels: 512 + Head: + name: "ArcMargin" + embedding_size: 512 + class_num: 30671 + margin: 0.15 + scale: 32 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + - SupConLoss: + weight: 1.0 + views: 2 + Eval: + - CELoss: + weight: 1.0 + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Cosine + learning_rate: 0.001 + regularizer: + name: 'L2' + coeff: 0.0005 + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: "VeriWild" + image_root: "./dataset/VeRI-Wild/images/" + cls_label_path: "./dataset/VeRI-Wild/train_test_split/train_list_start0.txt" + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - AugMix: + prob: 0.5 + - NormalizeImage: + scale: 0.00392157 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - RandomErasing: + EPSILON: 0.5 + sl: 0.02 + sh: 0.4 + r1: 0.3 + mean: [0., 0., 0.] + + sampler: + name: PKSampler + batch_size: 64 + sample_per_id: 2 + drop_last: False + shuffle: True + loader: + num_workers: 6 + use_shared_memory: True + Eval: + Query: + dataset: + name: "VeriWild" + image_root: "./dataset/VeRI-Wild/images" + cls_label_path: "./dataset/VeRI-Wild/train_test_split/test_3000_id_query.txt" + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + size: 224 + - NormalizeImage: + scale: 0.00392157 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 6 + use_shared_memory: True + + Gallery: + dataset: + name: "VeriWild" + image_root: "./dataset/VeRI-Wild/images" + cls_label_path: "./dataset/VeRI-Wild/train_test_split/test_3000_id.txt" + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + size: 224 + - NormalizeImage: + scale: 0.00392157 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 6 + use_shared_memory: True + +Metric: + Eval: + - Recallk: + topk: [1, 5] + - mAP: {} + diff --git a/src/PaddleClas/ppcls/data/__init__.py b/src/PaddleClas/ppcls/data/__init__.py new file mode 100644 index 0000000..cffac81 --- /dev/null +++ b/src/PaddleClas/ppcls/data/__init__.py @@ -0,0 +1,144 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import inspect +import copy +import paddle +import numpy as np +from paddle.io import DistributedBatchSampler, BatchSampler, DataLoader +from ppcls.utils import logger + +from ppcls.data import dataloader +# dataset +from ppcls.data.dataloader.imagenet_dataset import ImageNetDataset +from ppcls.data.dataloader.multilabel_dataset import MultiLabelDataset +from ppcls.data.dataloader.common_dataset import create_operators +from ppcls.data.dataloader.vehicle_dataset import CompCars, VeriWild +from ppcls.data.dataloader.logo_dataset import LogoDataset +from ppcls.data.dataloader.icartoon_dataset import ICartoonDataset +from ppcls.data.dataloader.mix_dataset import MixDataset + +# sampler +from ppcls.data.dataloader.DistributedRandomIdentitySampler import DistributedRandomIdentitySampler +from ppcls.data.dataloader.pk_sampler import PKSampler +from ppcls.data.dataloader.mix_sampler import MixSampler +from ppcls.data import preprocess +from ppcls.data.preprocess import transform + + +def create_operators(params, class_num=None): + """ + create operators based on the config + + Args: + params(list): a dict list, used to create some operators + """ + assert isinstance(params, list), ('operator config should be a list') + ops = [] + for operator in params: + assert isinstance(operator, + dict) and len(operator) == 1, "yaml format error" + op_name = list(operator)[0] + param = {} if operator[op_name] is None else operator[op_name] + op_func = getattr(preprocess, op_name) + if "class_num" in inspect.getfullargspec(op_func).args: + param.update({"class_num": class_num}) + op = op_func(**param) + ops.append(op) + + return ops + + +def build_dataloader(config, mode, device, use_dali=False, seed=None): + assert mode in [ + 'Train', 'Eval', 'Test', 'Gallery', 'Query' + ], "Dataset mode should be Train, Eval, Test, Gallery, Query" + # build dataset + if use_dali: + from ppcls.data.dataloader.dali import dali_dataloader + return dali_dataloader(config, mode, paddle.device.get_device(), seed) + + class_num = config.get("class_num", None) + config_dataset = config[mode]['dataset'] + config_dataset = copy.deepcopy(config_dataset) + dataset_name = config_dataset.pop('name') + if 'batch_transform_ops' in config_dataset: + batch_transform = config_dataset.pop('batch_transform_ops') + else: + batch_transform = None + + dataset = eval(dataset_name)(**config_dataset) + + logger.debug("build dataset({}) success...".format(dataset)) + + # build sampler + config_sampler = config[mode]['sampler'] + if "name" not in config_sampler: + batch_sampler = None + batch_size = config_sampler["batch_size"] + drop_last = config_sampler["drop_last"] + shuffle = config_sampler["shuffle"] + else: + sampler_name = config_sampler.pop("name") + batch_sampler = eval(sampler_name)(dataset, **config_sampler) + + logger.debug("build batch_sampler({}) success...".format(batch_sampler)) + + # build batch operator + def mix_collate_fn(batch): + batch = transform(batch, batch_ops) + # batch each field + slots = [] + for items in batch: + for i, item in enumerate(items): + if len(slots) < len(items): + slots.append([item]) + else: + slots[i].append(item) + return [np.stack(slot, axis=0) for slot in slots] + + if isinstance(batch_transform, list): + batch_ops = create_operators(batch_transform, class_num) + batch_collate_fn = mix_collate_fn + else: + batch_collate_fn = None + + # build dataloader + config_loader = config[mode]['loader'] + num_workers = config_loader["num_workers"] + use_shared_memory = config_loader["use_shared_memory"] + + if batch_sampler is None: + data_loader = DataLoader( + dataset=dataset, + places=device, + num_workers=num_workers, + return_list=True, + use_shared_memory=use_shared_memory, + batch_size=batch_size, + shuffle=shuffle, + drop_last=drop_last, + collate_fn=batch_collate_fn) + else: + data_loader = DataLoader( + dataset=dataset, + places=device, + num_workers=num_workers, + return_list=True, + use_shared_memory=use_shared_memory, + batch_sampler=batch_sampler, + collate_fn=batch_collate_fn) + + logger.debug("build data_loader({}) success...".format(data_loader)) + return data_loader diff --git a/src/PaddleClas/ppcls/data/__pycache__/__init__.cpython-39.pyc b/src/PaddleClas/ppcls/data/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000..a93f9c4 Binary files /dev/null and b/src/PaddleClas/ppcls/data/__pycache__/__init__.cpython-39.pyc differ diff --git a/src/PaddleClas/ppcls/data/dataloader/.imagenet_dataset.py.un~ b/src/PaddleClas/ppcls/data/dataloader/.imagenet_dataset.py.un~ new file mode 100644 index 0000000..f7f916d Binary files /dev/null and b/src/PaddleClas/ppcls/data/dataloader/.imagenet_dataset.py.un~ differ diff --git a/src/PaddleClas/ppcls/data/dataloader/DistributedRandomIdentitySampler.py b/src/PaddleClas/ppcls/data/dataloader/DistributedRandomIdentitySampler.py new file mode 100644 index 0000000..1203803 --- /dev/null +++ b/src/PaddleClas/ppcls/data/dataloader/DistributedRandomIdentitySampler.py @@ -0,0 +1,90 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from collections import defaultdict +import numpy as np +import copy +import random +from paddle.io import DistributedBatchSampler, Sampler + + +class DistributedRandomIdentitySampler(DistributedBatchSampler): + """ + Randomly sample N identities, then for each identity, + randomly sample K instances, therefore batch size is N*K. + Args: + - data_source (list): list of (img_path, pid, camid). + - num_instances (int): number of instances per identity in a batch. + - batch_size (int): number of examples in a batch. + """ + + def __init__(self, dataset, batch_size, num_instances, drop_last, **args): + self.dataset = dataset + self.batch_size = batch_size + self.num_instances = num_instances + self.drop_last = drop_last + self.num_pids_per_batch = self.batch_size // self.num_instances + self.index_dic = defaultdict(list) + for index, pid in enumerate(self.dataset.labels): + self.index_dic[pid].append(index) + self.pids = list(self.index_dic.keys()) + # estimate number of examples in an epoch + self.length = 0 + for pid in self.pids: + idxs = self.index_dic[pid] + num = len(idxs) + if num < self.num_instances: + num = self.num_instances + self.length += num - num % self.num_instances + + def __iter__(self): + batch_idxs_dict = defaultdict(list) + for pid in self.pids: + idxs = copy.deepcopy(self.index_dic[pid]) + if len(idxs) < self.num_instances: + idxs = np.random.choice( + idxs, size=self.num_instances, replace=True) + random.shuffle(idxs) + batch_idxs = [] + for idx in idxs: + batch_idxs.append(idx) + if len(batch_idxs) == self.num_instances: + batch_idxs_dict[pid].append(batch_idxs) + batch_idxs = [] + avai_pids = copy.deepcopy(self.pids) + final_idxs = [] + while len(avai_pids) >= self.num_pids_per_batch: + selected_pids = random.sample(avai_pids, self.num_pids_per_batch) + for pid in selected_pids: + batch_idxs = batch_idxs_dict[pid].pop(0) + final_idxs.extend(batch_idxs) + if len(batch_idxs_dict[pid]) == 0: + avai_pids.remove(pid) + _sample_iter = iter(final_idxs) + batch_indices = [] + for idx in _sample_iter: + batch_indices.append(idx) + if len(batch_indices) == self.batch_size: + yield batch_indices + batch_indices = [] + if not self.drop_last and len(batch_indices) > 0: + yield batch_indices + + def __len__(self): + if self.drop_last: + return self.length // self.batch_size + else: + return (self.length + self.batch_size - 1) // self.batch_size diff --git a/src/PaddleClas/ppcls/data/dataloader/__init__.py b/src/PaddleClas/ppcls/data/dataloader/__init__.py new file mode 100644 index 0000000..8f81921 --- /dev/null +++ b/src/PaddleClas/ppcls/data/dataloader/__init__.py @@ -0,0 +1,9 @@ +from ppcls.data.dataloader.imagenet_dataset import ImageNetDataset +from ppcls.data.dataloader.multilabel_dataset import MultiLabelDataset +from ppcls.data.dataloader.common_dataset import create_operators +from ppcls.data.dataloader.vehicle_dataset import CompCars, VeriWild +from ppcls.data.dataloader.logo_dataset import LogoDataset +from ppcls.data.dataloader.icartoon_dataset import ICartoonDataset +from ppcls.data.dataloader.mix_dataset import MixDataset +from ppcls.data.dataloader.mix_sampler import MixSampler +from ppcls.data.dataloader.pk_sampler import PKSampler diff --git a/src/PaddleClas/ppcls/data/dataloader/__pycache__/DistributedRandomIdentitySampler.cpython-39.pyc b/src/PaddleClas/ppcls/data/dataloader/__pycache__/DistributedRandomIdentitySampler.cpython-39.pyc new file mode 100644 index 0000000..297a252 Binary files /dev/null and b/src/PaddleClas/ppcls/data/dataloader/__pycache__/DistributedRandomIdentitySampler.cpython-39.pyc differ diff --git a/src/PaddleClas/ppcls/data/dataloader/__pycache__/__init__.cpython-39.pyc b/src/PaddleClas/ppcls/data/dataloader/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000..94af5ef Binary files /dev/null and b/src/PaddleClas/ppcls/data/dataloader/__pycache__/__init__.cpython-39.pyc differ diff --git a/src/PaddleClas/ppcls/data/dataloader/__pycache__/common_dataset.cpython-39.pyc b/src/PaddleClas/ppcls/data/dataloader/__pycache__/common_dataset.cpython-39.pyc new file mode 100644 index 0000000..a35db70 Binary files /dev/null and b/src/PaddleClas/ppcls/data/dataloader/__pycache__/common_dataset.cpython-39.pyc differ diff --git a/src/PaddleClas/ppcls/data/dataloader/__pycache__/icartoon_dataset.cpython-39.pyc b/src/PaddleClas/ppcls/data/dataloader/__pycache__/icartoon_dataset.cpython-39.pyc new file mode 100644 index 0000000..4ce7f8f Binary files /dev/null and b/src/PaddleClas/ppcls/data/dataloader/__pycache__/icartoon_dataset.cpython-39.pyc differ diff --git a/src/PaddleClas/ppcls/data/dataloader/__pycache__/imagenet_dataset.cpython-39.pyc b/src/PaddleClas/ppcls/data/dataloader/__pycache__/imagenet_dataset.cpython-39.pyc new file mode 100644 index 0000000..669c776 Binary files /dev/null and b/src/PaddleClas/ppcls/data/dataloader/__pycache__/imagenet_dataset.cpython-39.pyc differ diff --git a/src/PaddleClas/ppcls/data/dataloader/__pycache__/logo_dataset.cpython-39.pyc b/src/PaddleClas/ppcls/data/dataloader/__pycache__/logo_dataset.cpython-39.pyc new file mode 100644 index 0000000..4044dcd Binary files /dev/null and b/src/PaddleClas/ppcls/data/dataloader/__pycache__/logo_dataset.cpython-39.pyc differ diff --git a/src/PaddleClas/ppcls/data/dataloader/__pycache__/mix_dataset.cpython-39.pyc b/src/PaddleClas/ppcls/data/dataloader/__pycache__/mix_dataset.cpython-39.pyc new file mode 100644 index 0000000..cb07e0a Binary files /dev/null and b/src/PaddleClas/ppcls/data/dataloader/__pycache__/mix_dataset.cpython-39.pyc differ diff --git a/src/PaddleClas/ppcls/data/dataloader/__pycache__/mix_sampler.cpython-39.pyc b/src/PaddleClas/ppcls/data/dataloader/__pycache__/mix_sampler.cpython-39.pyc new file mode 100644 index 0000000..06d9c7f Binary files /dev/null and b/src/PaddleClas/ppcls/data/dataloader/__pycache__/mix_sampler.cpython-39.pyc differ diff --git a/src/PaddleClas/ppcls/data/dataloader/__pycache__/multilabel_dataset.cpython-39.pyc b/src/PaddleClas/ppcls/data/dataloader/__pycache__/multilabel_dataset.cpython-39.pyc new file mode 100644 index 0000000..7d7b90f Binary files /dev/null and b/src/PaddleClas/ppcls/data/dataloader/__pycache__/multilabel_dataset.cpython-39.pyc differ diff --git a/src/PaddleClas/ppcls/data/dataloader/__pycache__/pk_sampler.cpython-39.pyc b/src/PaddleClas/ppcls/data/dataloader/__pycache__/pk_sampler.cpython-39.pyc new file mode 100644 index 0000000..70d4598 Binary files /dev/null and b/src/PaddleClas/ppcls/data/dataloader/__pycache__/pk_sampler.cpython-39.pyc differ diff --git a/src/PaddleClas/ppcls/data/dataloader/__pycache__/vehicle_dataset.cpython-39.pyc b/src/PaddleClas/ppcls/data/dataloader/__pycache__/vehicle_dataset.cpython-39.pyc new file mode 100644 index 0000000..fbc8c11 Binary files /dev/null and b/src/PaddleClas/ppcls/data/dataloader/__pycache__/vehicle_dataset.cpython-39.pyc differ diff --git a/src/PaddleClas/ppcls/data/dataloader/common_dataset.py b/src/PaddleClas/ppcls/data/dataloader/common_dataset.py new file mode 100644 index 0000000..b7b03d8 --- /dev/null +++ b/src/PaddleClas/ppcls/data/dataloader/common_dataset.py @@ -0,0 +1,84 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import numpy as np + +from paddle.io import Dataset +import cv2 + +from ppcls.data import preprocess +from ppcls.data.preprocess import transform +from ppcls.utils import logger + + +def create_operators(params): + """ + create operators based on the config + Args: + params(list): a dict list, used to create some operators + """ + assert isinstance(params, list), ('operator config should be a list') + ops = [] + for operator in params: + assert isinstance(operator, + dict) and len(operator) == 1, "yaml format error" + op_name = list(operator)[0] + param = {} if operator[op_name] is None else operator[op_name] + op = getattr(preprocess, op_name)(**param) + ops.append(op) + + return ops + + +class CommonDataset(Dataset): + def __init__( + self, + image_root, + cls_label_path, + transform_ops=None, ): + self._img_root = image_root + self._cls_path = cls_label_path + if transform_ops: + self._transform_ops = create_operators(transform_ops) + + self.images = [] + self.labels = [] + self._load_anno() + + def _load_anno(self): + pass + + def __getitem__(self, idx): + try: + with open(self.images[idx], 'rb') as f: + img = f.read() + if self._transform_ops: + img = transform(img, self._transform_ops) + img = img.transpose((2, 0, 1)) + return (img, self.labels[idx]) + + except Exception as ex: + logger.error("Exception occured when parse line: {} with msg: {}". + format(self.images[idx], ex)) + rnd_idx = np.random.randint(self.__len__()) + return self.__getitem__(rnd_idx) + + def __len__(self): + return len(self.images) + + @property + def class_num(self): + return len(set(self.labels)) diff --git a/src/PaddleClas/ppcls/data/dataloader/dali.py b/src/PaddleClas/ppcls/data/dataloader/dali.py new file mode 100644 index 0000000..a15c231 --- /dev/null +++ b/src/PaddleClas/ppcls/data/dataloader/dali.py @@ -0,0 +1,319 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import division + +import copy +import os + +import numpy as np +import nvidia.dali.ops as ops +import nvidia.dali.types as types +import paddle +from nvidia.dali import fn +from nvidia.dali.pipeline import Pipeline +from nvidia.dali.plugin.base_iterator import LastBatchPolicy +from nvidia.dali.plugin.paddle import DALIGenericIterator + + +class HybridTrainPipe(Pipeline): + def __init__(self, + file_root, + file_list, + batch_size, + resize_shorter, + crop, + min_area, + lower, + upper, + interp, + mean, + std, + device_id, + shard_id=0, + num_shards=1, + random_shuffle=True, + num_threads=4, + seed=42, + pad_output=False, + output_dtype=types.FLOAT, + dataset='Train'): + super(HybridTrainPipe, self).__init__( + batch_size, num_threads, device_id, seed=seed) + self.input = ops.readers.File( + file_root=file_root, + file_list=file_list, + shard_id=shard_id, + num_shards=num_shards, + random_shuffle=random_shuffle) + # set internal nvJPEG buffers size to handle full-sized ImageNet images + # without additional reallocations + device_memory_padding = 211025920 + host_memory_padding = 140544512 + self.decode = ops.decoders.ImageRandomCrop( + device='mixed', + output_type=types.DALIImageType.RGB, + device_memory_padding=device_memory_padding, + host_memory_padding=host_memory_padding, + random_aspect_ratio=[lower, upper], + random_area=[min_area, 1.0], + num_attempts=100) + self.res = ops.Resize( + device='gpu', resize_x=crop, resize_y=crop, interp_type=interp) + self.cmnp = ops.CropMirrorNormalize( + device="gpu", + dtype=output_dtype, + output_layout='CHW', + crop=(crop, crop), + mean=mean, + std=std, + pad_output=pad_output) + self.coin = ops.random.CoinFlip(probability=0.5) + self.to_int64 = ops.Cast(dtype=types.DALIDataType.INT64, device="gpu") + + def define_graph(self): + rng = self.coin() + jpegs, labels = self.input(name="Reader") + images = self.decode(jpegs) + images = self.res(images) + output = self.cmnp(images.gpu(), mirror=rng) + return [output, self.to_int64(labels.gpu())] + + def __len__(self): + return self.epoch_size("Reader") + + +class HybridValPipe(Pipeline): + def __init__(self, + file_root, + file_list, + batch_size, + resize_shorter, + crop, + interp, + mean, + std, + device_id, + shard_id=0, + num_shards=1, + random_shuffle=False, + num_threads=4, + seed=42, + pad_output=False, + output_dtype=types.FLOAT): + super(HybridValPipe, self).__init__( + batch_size, num_threads, device_id, seed=seed) + self.input = ops.readers.File( + file_root=file_root, + file_list=file_list, + shard_id=shard_id, + num_shards=num_shards, + random_shuffle=random_shuffle) + self.decode = ops.decoders.Image(device="mixed") + self.res = ops.Resize( + device="gpu", resize_shorter=resize_shorter, interp_type=interp) + self.cmnp = ops.CropMirrorNormalize( + device="gpu", + dtype=output_dtype, + output_layout='CHW', + crop=(crop, crop), + mean=mean, + std=std, + pad_output=pad_output) + self.to_int64 = ops.Cast(dtype=types.DALIDataType.INT64, device="gpu") + + def define_graph(self): + jpegs, labels = self.input(name="Reader") + images = self.decode(jpegs) + images = self.res(images) + output = self.cmnp(images) + return [output, self.to_int64(labels.gpu())] + + def __len__(self): + return self.epoch_size("Reader") + + +def dali_dataloader(config, mode, device, seed=None): + assert "gpu" in device, "gpu training is required for DALI" + device_id = int(device.split(':')[1]) + config_dataloader = config[mode] + seed = 42 if seed is None else seed + ops = [ + list(x.keys())[0] + for x in config_dataloader["dataset"]["transform_ops"] + ] + support_ops_train = [ + "DecodeImage", "NormalizeImage", "RandFlipImage", "RandCropImage" + ] + support_ops_eval = [ + "DecodeImage", "ResizeImage", "CropImage", "NormalizeImage" + ] + + if mode.lower() == 'train': + assert set(ops) == set( + support_ops_train + ), "The supported trasform_ops for train_dataset in dali is : {}".format( + ",".join(support_ops_train)) + else: + assert set(ops) == set( + support_ops_eval + ), "The supported trasform_ops for eval_dataset in dali is : {}".format( + ",".join(support_ops_eval)) + + normalize_ops = [ + op for op in config_dataloader["dataset"]["transform_ops"] + if "NormalizeImage" in op + ][0]["NormalizeImage"] + channel_num = normalize_ops.get("channel_num", 3) + output_dtype = types.FLOAT16 if normalize_ops.get("output_fp16", + False) else types.FLOAT + + env = os.environ + # assert float(env.get('FLAGS_fraction_of_gpu_memory_to_use', 0.92)) < 0.9, \ + # "Please leave enough GPU memory for DALI workspace, e.g., by setting" \ + # " `export FLAGS_fraction_of_gpu_memory_to_use=0.8`" + + gpu_num = paddle.distributed.get_world_size() + + batch_size = config_dataloader["sampler"]["batch_size"] + + file_root = config_dataloader["dataset"]["image_root"] + file_list = config_dataloader["dataset"]["cls_label_path"] + + interp = 1 # settings.interpolation or 1 # default to linear + interp_map = { + 0: types.DALIInterpType.INTERP_NN, # cv2.INTER_NEAREST + 1: types.DALIInterpType.INTERP_LINEAR, # cv2.INTER_LINEAR + 2: types.DALIInterpType.INTERP_CUBIC, # cv2.INTER_CUBIC + 3: types.DALIInterpType. + INTERP_LANCZOS3, # XXX use LANCZOS3 for cv2.INTER_LANCZOS4 + } + + assert interp in interp_map, "interpolation method not supported by DALI" + interp = interp_map[interp] + pad_output = channel_num == 4 + + transforms = { + k: v + for d in config_dataloader["dataset"]["transform_ops"] + for k, v in d.items() + } + + scale = transforms["NormalizeImage"].get("scale", 1.0 / 255) + scale = eval(scale) if isinstance(scale, str) else scale + mean = transforms["NormalizeImage"].get("mean", [0.485, 0.456, 0.406]) + std = transforms["NormalizeImage"].get("std", [0.229, 0.224, 0.225]) + mean = [v / scale for v in mean] + std = [v / scale for v in std] + + sampler_name = config_dataloader["sampler"].get("name", + "DistributedBatchSampler") + assert sampler_name in ["DistributedBatchSampler", "BatchSampler"] + + if mode.lower() == "train": + resize_shorter = 256 + crop = transforms["RandCropImage"]["size"] + scale = transforms["RandCropImage"].get("scale", [0.08, 1.]) + ratio = transforms["RandCropImage"].get("ratio", [3.0 / 4, 4.0 / 3]) + min_area = scale[0] + lower = ratio[0] + upper = ratio[1] + + if 'PADDLE_TRAINER_ID' in env and 'PADDLE_TRAINERS_NUM' in env: + shard_id = int(env['PADDLE_TRAINER_ID']) + num_shards = int(env['PADDLE_TRAINERS_NUM']) + device_id = int(env['FLAGS_selected_gpus']) + pipe = HybridTrainPipe( + file_root, + file_list, + batch_size, + resize_shorter, + crop, + min_area, + lower, + upper, + interp, + mean, + std, + device_id, + shard_id, + num_shards, + seed=seed + shard_id, + pad_output=pad_output, + output_dtype=output_dtype) + pipe.build() + pipelines = [pipe] + # sample_per_shard = len(pipe) // num_shards + else: + pipe = HybridTrainPipe( + file_root, + file_list, + batch_size, + resize_shorter, + crop, + min_area, + lower, + upper, + interp, + mean, + std, + device_id=device_id, + shard_id=0, + num_shards=1, + seed=seed, + pad_output=pad_output, + output_dtype=output_dtype) + pipe.build() + pipelines = [pipe] + # sample_per_shard = len(pipelines[0]) + return DALIGenericIterator( + pipelines, ['data', 'label'], reader_name='Reader') + else: + resize_shorter = transforms["ResizeImage"].get("resize_short", 256) + crop = transforms["CropImage"]["size"] + if 'PADDLE_TRAINER_ID' in env and 'PADDLE_TRAINERS_NUM' in env and sampler_name == "DistributedBatchSampler": + shard_id = int(env['PADDLE_TRAINER_ID']) + num_shards = int(env['PADDLE_TRAINERS_NUM']) + device_id = int(env['FLAGS_selected_gpus']) + + pipe = HybridValPipe( + file_root, + file_list, + batch_size, + resize_shorter, + crop, + interp, + mean, + std, + device_id=device_id, + shard_id=shard_id, + num_shards=num_shards, + pad_output=pad_output, + output_dtype=output_dtype) + else: + pipe = HybridValPipe( + file_root, + file_list, + batch_size, + resize_shorter, + crop, + interp, + mean, + std, + device_id=device_id, + pad_output=pad_output, + output_dtype=output_dtype) + pipe.build() + return DALIGenericIterator( + [pipe], ['data', 'label'], reader_name="Reader") diff --git a/src/PaddleClas/ppcls/data/dataloader/icartoon_dataset.py b/src/PaddleClas/ppcls/data/dataloader/icartoon_dataset.py new file mode 100644 index 0000000..18e3b4b --- /dev/null +++ b/src/PaddleClas/ppcls/data/dataloader/icartoon_dataset.py @@ -0,0 +1,36 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import numpy as np +import os + +from .common_dataset import CommonDataset + + +class ICartoonDataset(CommonDataset): + def _load_anno(self, seed=None): + assert os.path.exists(self._cls_path) + assert os.path.exists(self._img_root) + self.images = [] + self.labels = [] + + with open(self._cls_path) as fd: + lines = fd.readlines() + for l in lines: + l = l.strip().split("\t") + self.images.append(os.path.join(self._img_root, l[0])) + self.labels.append(np.int64(l[1])) + assert os.path.exists(self.images[-1]) diff --git a/src/PaddleClas/ppcls/data/dataloader/imagenet_dataset.py b/src/PaddleClas/ppcls/data/dataloader/imagenet_dataset.py new file mode 100644 index 0000000..1166ab3 --- /dev/null +++ b/src/PaddleClas/ppcls/data/dataloader/imagenet_dataset.py @@ -0,0 +1,38 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import numpy as np +import os + +from .common_dataset import CommonDataset + + +class ImageNetDataset(CommonDataset): + def _load_anno(self, seed=None): + assert os.path.exists(self._cls_path) + assert os.path.exists(self._img_root) + self.images = [] + self.labels = [] + + with open(self._cls_path) as fd: + lines = fd.readlines() + if seed is not None: + np.random.RandomState(seed).shuffle(lines) + for l in lines: + l = l.strip().split(" ") + self.images.append(os.path.join(self._img_root, l[0])) + self.labels.append(np.int64(l[1])) + assert os.path.exists(self.images[-1]) diff --git a/src/PaddleClas/ppcls/data/dataloader/logo_dataset.py b/src/PaddleClas/ppcls/data/dataloader/logo_dataset.py new file mode 100644 index 0000000..132ead9 --- /dev/null +++ b/src/PaddleClas/ppcls/data/dataloader/logo_dataset.py @@ -0,0 +1,46 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import io +import tarfile +import numpy as np +from PIL import Image #all use default backend + +import paddle +from paddle.io import Dataset +import pickle +import os +import cv2 +import random + +from .common_dataset import CommonDataset + + +class LogoDataset(CommonDataset): + def _load_anno(self): + assert os.path.exists(self._cls_path) + assert os.path.exists(self._img_root) + self.images = [] + self.labels = [] + with open(self._cls_path) as fd: + lines = fd.readlines() + for l in lines: + l = l.strip().split("\t") + if l[0] == 'image_id': + continue + self.images.append(os.path.join(self._img_root, l[3])) + self.labels.append(np.int64(l[1]) - 1) + assert os.path.exists(self.images[-1]) diff --git a/src/PaddleClas/ppcls/data/dataloader/mix_dataset.py b/src/PaddleClas/ppcls/data/dataloader/mix_dataset.py new file mode 100644 index 0000000..cbf4b40 --- /dev/null +++ b/src/PaddleClas/ppcls/data/dataloader/mix_dataset.py @@ -0,0 +1,49 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import numpy as np +import os + +from paddle.io import Dataset +from .. import dataloader + + +class MixDataset(Dataset): + def __init__(self, datasets_config): + super().__init__() + self.dataset_list = [] + start_idx = 0 + end_idx = 0 + for config_i in datasets_config: + dataset_name = config_i.pop('name') + dataset = getattr(dataloader, dataset_name)(**config_i) + end_idx += len(dataset) + self.dataset_list.append([end_idx, start_idx, dataset]) + start_idx = end_idx + + self.length = end_idx + + def __getitem__(self, idx): + for dataset_i in self.dataset_list: + if dataset_i[0] > idx: + dataset_i_idx = idx - dataset_i[1] + return dataset_i[2][dataset_i_idx] + + def __len__(self): + return self.length + + def get_dataset_list(self): + return self.dataset_list diff --git a/src/PaddleClas/ppcls/data/dataloader/mix_sampler.py b/src/PaddleClas/ppcls/data/dataloader/mix_sampler.py new file mode 100644 index 0000000..2df3109 --- /dev/null +++ b/src/PaddleClas/ppcls/data/dataloader/mix_sampler.py @@ -0,0 +1,79 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from __future__ import division + +from paddle.io import DistributedBatchSampler, Sampler + +from ppcls.utils import logger +from ppcls.data.dataloader.mix_dataset import MixDataset +from ppcls.data import dataloader + + +class MixSampler(DistributedBatchSampler): + def __init__(self, dataset, batch_size, sample_configs, iter_per_epoch): + super().__init__(dataset, batch_size) + assert isinstance(dataset, + MixDataset), "MixSampler only support MixDataset" + self.sampler_list = [] + self.batch_size = batch_size + self.start_list = [] + self.length = iter_per_epoch + dataset_list = dataset.get_dataset_list() + batch_size_left = self.batch_size + self.iter_list = [] + for i, config_i in enumerate(sample_configs): + self.start_list.append(dataset_list[i][1]) + sample_method = config_i.pop("name") + ratio_i = config_i.pop("ratio") + if i < len(sample_configs) - 1: + batch_size_i = int(self.batch_size * ratio_i) + batch_size_left -= batch_size_i + else: + batch_size_i = batch_size_left + assert batch_size_i <= len(dataset_list[i][2]) + config_i["batch_size"] = batch_size_i + if sample_method == "DistributedBatchSampler": + sampler_i = DistributedBatchSampler(dataset_list[i][2], + **config_i) + else: + sampler_i = getattr(dataloader, sample_method)( + dataset_list[i][2], **config_i) + self.sampler_list.append(sampler_i) + self.iter_list.append(iter(sampler_i)) + self.length += len(dataset_list[i][2]) * ratio_i + self.iter_counter = 0 + + def __iter__(self): + while self.iter_counter < self.length: + batch = [] + for i, iter_i in enumerate(self.iter_list): + batch_i = next(iter_i, None) + if batch_i is None: + iter_i = iter(self.sampler_list[i]) + self.iter_list[i] = iter_i + batch_i = next(iter_i, None) + assert batch_i is not None, "dataset {} return None".format( + i) + batch += [idx + self.start_list[i] for idx in batch_i] + if len(batch) == self.batch_size: + self.iter_counter += 1 + yield batch + else: + logger.info("Some dataset reaches end") + self.iter_counter = 0 + + def __len__(self): + return self.length diff --git a/src/PaddleClas/ppcls/data/dataloader/multilabel_dataset.py b/src/PaddleClas/ppcls/data/dataloader/multilabel_dataset.py new file mode 100644 index 0000000..2c1ed77 --- /dev/null +++ b/src/PaddleClas/ppcls/data/dataloader/multilabel_dataset.py @@ -0,0 +1,59 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import numpy as np +import os +import cv2 + +from ppcls.data.preprocess import transform +from ppcls.utils import logger + +from .common_dataset import CommonDataset + + +class MultiLabelDataset(CommonDataset): + def _load_anno(self): + assert os.path.exists(self._cls_path) + assert os.path.exists(self._img_root) + self.images = [] + self.labels = [] + with open(self._cls_path) as fd: + lines = fd.readlines() + for l in lines: + l = l.strip().split("\t") + self.images.append(os.path.join(self._img_root, l[0])) + + labels = l[1].split(',') + labels = [np.int64(i) for i in labels] + + self.labels.append(labels) + assert os.path.exists(self.images[-1]) + + def __getitem__(self, idx): + try: + with open(self.images[idx], 'rb') as f: + img = f.read() + if self._transform_ops: + img = transform(img, self._transform_ops) + img = img.transpose((2, 0, 1)) + label = np.array(self.labels[idx]).astype("float32") + return (img, label) + + except Exception as ex: + logger.error("Exception occured when parse line: {} with msg: {}". + format(self.images[idx], ex)) + rnd_idx = np.random.randint(self.__len__()) + return self.__getitem__(rnd_idx) diff --git a/src/PaddleClas/ppcls/data/dataloader/pk_sampler.py b/src/PaddleClas/ppcls/data/dataloader/pk_sampler.py new file mode 100644 index 0000000..bf563a6 --- /dev/null +++ b/src/PaddleClas/ppcls/data/dataloader/pk_sampler.py @@ -0,0 +1,105 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from collections import defaultdict +import numpy as np +import random +from paddle.io import DistributedBatchSampler + +from ppcls.utils import logger + + +class PKSampler(DistributedBatchSampler): + """ + First, randomly sample P identities. + Then for each identity randomly sample K instances. + Therefore batch size is P*K, and the sampler called PKSampler. + Args: + dataset (paddle.io.Dataset): list of (img_path, pid, cam_id). + sample_per_id(int): number of instances per identity in a batch. + batch_size (int): number of examples in a batch. + shuffle(bool): whether to shuffle indices order before generating + batch indices. Default False. + """ + + def __init__(self, + dataset, + batch_size, + sample_per_id, + shuffle=True, + drop_last=True, + sample_method="sample_avg_prob"): + super().__init__( + dataset, batch_size, shuffle=shuffle, drop_last=drop_last) + assert batch_size % sample_per_id == 0, \ + "PKSampler configs error, Sample_per_id must be a divisor of batch_size." + assert hasattr(self.dataset, + "labels"), "Dataset must have labels attribute." + self.sample_per_label = sample_per_id + self.label_dict = defaultdict(list) + self.sample_method = sample_method + for idx, label in enumerate(self.dataset.labels): + self.label_dict[label].append(idx) + self.label_list = list(self.label_dict) + assert len(self.label_list) * self.sample_per_label > self.batch_size, \ + "batch size should be smaller than " + if self.sample_method == "id_avg_prob": + self.prob_list = np.array([1 / len(self.label_list)] * + len(self.label_list)) + elif self.sample_method == "sample_avg_prob": + counter = [] + for label_i in self.label_list: + counter.append(len(self.label_dict[label_i])) + self.prob_list = np.array(counter) / sum(counter) + else: + logger.error( + "PKSampler only support id_avg_prob and sample_avg_prob sample method, " + "but receive {}.".format(self.sample_method)) + diff = np.abs(sum(self.prob_list) - 1) + if diff > 0.00000001: + self.prob_list[-1] = 1 - sum(self.prob_list[:-1]) + if self.prob_list[-1] > 1 or self.prob_list[-1] < 0: + logger.error("PKSampler prob list error") + else: + logger.info( + "PKSampler: sum of prob list not equal to 1, diff is {}, change the last prob".format(diff) + ) + + def __iter__(self): + label_per_batch = self.batch_size // self.sample_per_label + for _ in range(len(self)): + batch_index = [] + batch_label_list = np.random.choice( + self.label_list, + size=label_per_batch, + replace=False, + p=self.prob_list) + for label_i in batch_label_list: + label_i_indexes = self.label_dict[label_i] + if self.sample_per_label <= len(label_i_indexes): + batch_index.extend( + np.random.choice( + label_i_indexes, + size=self.sample_per_label, + replace=False)) + else: + batch_index.extend( + np.random.choice( + label_i_indexes, + size=self.sample_per_label, + replace=True)) + if not self.drop_last or len(batch_index) == self.batch_size: + yield batch_index diff --git a/src/PaddleClas/ppcls/data/dataloader/vehicle_dataset.py b/src/PaddleClas/ppcls/data/dataloader/vehicle_dataset.py new file mode 100644 index 0000000..2981a57 --- /dev/null +++ b/src/PaddleClas/ppcls/data/dataloader/vehicle_dataset.py @@ -0,0 +1,138 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import numpy as np +import paddle +from paddle.io import Dataset +import os +import cv2 + +from ppcls.data import preprocess +from ppcls.data.preprocess import transform +from ppcls.utils import logger +from .common_dataset import create_operators + + +class CompCars(Dataset): + def __init__(self, + image_root, + cls_label_path, + label_root=None, + transform_ops=None, + bbox_crop=False): + self._img_root = image_root + self._cls_path = cls_label_path + self._label_root = label_root + if transform_ops: + self._transform_ops = create_operators(transform_ops) + self._bbox_crop = bbox_crop + self._dtype = paddle.get_default_dtype() + self._load_anno() + + def _load_anno(self): + assert os.path.exists(self._cls_path) + assert os.path.exists(self._img_root) + if self._bbox_crop: + assert os.path.exists(self._label_root) + self.images = [] + self.labels = [] + self.bboxes = [] + with open(self._cls_path) as fd: + lines = fd.readlines() + for l in lines: + l = l.strip().split() + if not self._bbox_crop: + self.images.append(os.path.join(self._img_root, l[0])) + self.labels.append(int(l[1])) + else: + label_path = os.path.join(self._label_root, + l[0].split('.')[0] + '.txt') + assert os.path.exists(label_path) + with open(label_path) as f: + bbox = f.readlines()[-1].strip().split() + bbox = [int(x) for x in bbox] + self.images.append(os.path.join(self._img_root, l[0])) + self.labels.append(int(l[1])) + self.bboxes.append(bbox) + assert os.path.exists(self.images[-1]) + + def __getitem__(self, idx): + img = cv2.imread(self.images[idx]) + img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) + if self._bbox_crop: + bbox = self.bboxes[idx] + img = img[bbox[1]:bbox[3], bbox[0]:bbox[2], :] + if self._transform_ops: + img = transform(img, self._transform_ops) + img = img.transpose((2, 0, 1)) + return (img, self.labels[idx]) + + def __len__(self): + return len(self.images) + + @property + def class_num(self): + return len(set(self.labels)) + + +class VeriWild(Dataset): + def __init__( + self, + image_root, + cls_label_path, + transform_ops=None, ): + self._img_root = image_root + self._cls_path = cls_label_path + if transform_ops: + self._transform_ops = create_operators(transform_ops) + self._dtype = paddle.get_default_dtype() + self._load_anno() + + def _load_anno(self): + assert os.path.exists(self._cls_path) + assert os.path.exists(self._img_root) + self.images = [] + self.labels = [] + self.cameras = [] + with open(self._cls_path) as fd: + lines = fd.readlines() + for l in lines: + l = l.strip().split() + self.images.append(os.path.join(self._img_root, l[0])) + self.labels.append(np.int64(l[1])) + self.cameras.append(np.int64(l[2])) + assert os.path.exists(self.images[-1]) + + def __getitem__(self, idx): + try: + with open(self.images[idx], 'rb') as f: + img = f.read() + if self._transform_ops: + img = transform(img, self._transform_ops) + img = img.transpose((2, 0, 1)) + return (img, self.labels[idx], self.cameras[idx]) + except Exception as ex: + logger.error("Exception occured when parse line: {} with msg: {}". + format(self.images[idx], ex)) + rnd_idx = np.random.randint(self.__len__()) + return self.__getitem__(rnd_idx) + + def __len__(self): + return len(self.images) + + @property + def class_num(self): + return len(set(self.labels)) diff --git a/src/PaddleClas/ppcls/data/postprocess/__init__.py b/src/PaddleClas/ppcls/data/postprocess/__init__.py new file mode 100644 index 0000000..831a4da --- /dev/null +++ b/src/PaddleClas/ppcls/data/postprocess/__init__.py @@ -0,0 +1,41 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import copy +import importlib + +from . import topk + +from .topk import Topk, MultiLabelTopk + + +def build_postprocess(config): + config = copy.deepcopy(config) + model_name = config.pop("name") + mod = importlib.import_module(__name__) + postprocess_func = getattr(mod, model_name)(**config) + return postprocess_func + + +class DistillationPostProcess(object): + def __init__(self, model_name="Student", key=None, func="Topk", **kargs): + super().__init__() + self.func = eval(func)(**kargs) + self.model_name = model_name + self.key = key + + def __call__(self, x, file_names=None): + x = x[self.model_name] + if self.key is not None: + x = x[self.key] + return self.func(x, file_names=file_names) diff --git a/src/PaddleClas/ppcls/data/postprocess/__pycache__/__init__.cpython-39.pyc b/src/PaddleClas/ppcls/data/postprocess/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000..ff25208 Binary files /dev/null and b/src/PaddleClas/ppcls/data/postprocess/__pycache__/__init__.cpython-39.pyc differ diff --git a/src/PaddleClas/ppcls/data/postprocess/__pycache__/topk.cpython-39.pyc b/src/PaddleClas/ppcls/data/postprocess/__pycache__/topk.cpython-39.pyc new file mode 100644 index 0000000..a88a23e Binary files /dev/null and b/src/PaddleClas/ppcls/data/postprocess/__pycache__/topk.cpython-39.pyc differ diff --git a/src/PaddleClas/ppcls/data/postprocess/topk.py b/src/PaddleClas/ppcls/data/postprocess/topk.py new file mode 100644 index 0000000..9c1371b --- /dev/null +++ b/src/PaddleClas/ppcls/data/postprocess/topk.py @@ -0,0 +1,85 @@ +# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import numpy as np +import paddle +import paddle.nn.functional as F + + +class Topk(object): + def __init__(self, topk=1, class_id_map_file=None): + assert isinstance(topk, (int, )) + self.class_id_map = self.parse_class_id_map(class_id_map_file) + self.topk = topk + + def parse_class_id_map(self, class_id_map_file): + if class_id_map_file is None: + return None + if not os.path.exists(class_id_map_file): + print( + "Warning: If want to use your own label_dict, please input legal path!\nOtherwise label_names will be empty!" + ) + return None + + try: + class_id_map = {} + with open(class_id_map_file, "r") as fin: + lines = fin.readlines() + for line in lines: + partition = line.split("\n")[0].partition(" ") + class_id_map[int(partition[0])] = str(partition[-1]) + except Exception as ex: + print(ex) + class_id_map = None + return class_id_map + + def __call__(self, x, file_names=None, multilabel=False): + assert isinstance(x, paddle.Tensor) + if file_names is not None: + assert x.shape[0] == len(file_names) + x = F.softmax(x, axis=-1) if not multilabel else F.sigmoid(x) + x = x.numpy() + y = [] + for idx, probs in enumerate(x): + index = probs.argsort(axis=0)[-self.topk:][::-1].astype( + "int32") if not multilabel else np.where( + probs >= 0.5)[0].astype("int32") + clas_id_list = [] + score_list = [] + label_name_list = [] + for i in index: + clas_id_list.append(i.item()) + score_list.append(probs[i].item()) + if self.class_id_map is not None: + label_name_list.append(self.class_id_map[i.item()]) + result = { + "class_ids": clas_id_list, + "scores": np.around( + score_list, decimals=5).tolist(), + } + if file_names is not None: + result["file_name"] = file_names[idx] + if label_name_list is not None: + result["label_names"] = label_name_list + y.append(result) + return y + + +class MultiLabelTopk(Topk): + def __init__(self, topk=1, class_id_map_file=None): + super().__init__() + + def __call__(self, x, file_names=None): + return super().__call__(x, file_names, multilabel=True) diff --git a/src/PaddleClas/ppcls/data/preprocess/__init__.py b/src/PaddleClas/ppcls/data/preprocess/__init__.py new file mode 100644 index 0000000..075ee89 --- /dev/null +++ b/src/PaddleClas/ppcls/data/preprocess/__init__.py @@ -0,0 +1,100 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from ppcls.data.preprocess.ops.autoaugment import ImageNetPolicy as RawImageNetPolicy +from ppcls.data.preprocess.ops.randaugment import RandAugment as RawRandAugment +from ppcls.data.preprocess.ops.timm_autoaugment import RawTimmAutoAugment +from ppcls.data.preprocess.ops.cutout import Cutout + +from ppcls.data.preprocess.ops.hide_and_seek import HideAndSeek +from ppcls.data.preprocess.ops.random_erasing import RandomErasing +from ppcls.data.preprocess.ops.grid import GridMask + +from ppcls.data.preprocess.ops.operators import DecodeImage +from ppcls.data.preprocess.ops.operators import ResizeImage +from ppcls.data.preprocess.ops.operators import CropImage +from ppcls.data.preprocess.ops.operators import RandCropImage +from ppcls.data.preprocess.ops.operators import RandFlipImage +from ppcls.data.preprocess.ops.operators import NormalizeImage +from ppcls.data.preprocess.ops.operators import ToCHWImage +from ppcls.data.preprocess.ops.operators import AugMix + +from ppcls.data.preprocess.batch_ops.batch_operators import MixupOperator, CutmixOperator, OpSampler, FmixOperator + +import numpy as np +from PIL import Image + + +def transform(data, ops=[]): + """ transform """ + for op in ops: + data = op(data) + return data + + +class AutoAugment(RawImageNetPolicy): + """ ImageNetPolicy wrapper to auto fit different img types """ + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + def __call__(self, img): + if not isinstance(img, Image.Image): + img = np.ascontiguousarray(img) + img = Image.fromarray(img) + + img = super().__call__(img) + + if isinstance(img, Image.Image): + img = np.asarray(img) + + return img + + +class RandAugment(RawRandAugment): + """ RandAugment wrapper to auto fit different img types """ + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + def __call__(self, img): + if not isinstance(img, Image.Image): + img = np.ascontiguousarray(img) + img = Image.fromarray(img) + + img = super().__call__(img) + + if isinstance(img, Image.Image): + img = np.asarray(img) + + return img + + +class TimmAutoAugment(RawTimmAutoAugment): + """ TimmAutoAugment wrapper to auto fit different img tyeps. """ + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + def __call__(self, img): + if not isinstance(img, Image.Image): + img = np.ascontiguousarray(img) + img = Image.fromarray(img) + + img = super().__call__(img) + + if isinstance(img, Image.Image): + img = np.asarray(img) + + return img diff --git a/src/PaddleClas/ppcls/data/preprocess/__pycache__/__init__.cpython-39.pyc b/src/PaddleClas/ppcls/data/preprocess/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000..aad9c48 Binary files /dev/null and b/src/PaddleClas/ppcls/data/preprocess/__pycache__/__init__.cpython-39.pyc differ diff --git a/src/PaddleClas/ppcls/data/preprocess/batch_ops/__init__.py b/src/PaddleClas/ppcls/data/preprocess/batch_ops/__init__.py new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/src/PaddleClas/ppcls/data/preprocess/batch_ops/__init__.py @@ -0,0 +1 @@ + diff --git a/src/PaddleClas/ppcls/data/preprocess/batch_ops/__pycache__/__init__.cpython-39.pyc b/src/PaddleClas/ppcls/data/preprocess/batch_ops/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000..b641a17 Binary files /dev/null and b/src/PaddleClas/ppcls/data/preprocess/batch_ops/__pycache__/__init__.cpython-39.pyc differ diff --git a/src/PaddleClas/ppcls/data/preprocess/batch_ops/__pycache__/batch_operators.cpython-39.pyc b/src/PaddleClas/ppcls/data/preprocess/batch_ops/__pycache__/batch_operators.cpython-39.pyc new file mode 100644 index 0000000..fb04253 Binary files /dev/null and b/src/PaddleClas/ppcls/data/preprocess/batch_ops/__pycache__/batch_operators.cpython-39.pyc differ diff --git a/src/PaddleClas/ppcls/data/preprocess/batch_ops/batch_operators.py b/src/PaddleClas/ppcls/data/preprocess/batch_ops/batch_operators.py new file mode 100644 index 0000000..6f0abb8 --- /dev/null +++ b/src/PaddleClas/ppcls/data/preprocess/batch_ops/batch_operators.py @@ -0,0 +1,231 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from __future__ import unicode_literals +import random + +import numpy as np + +from ppcls.utils import logger +from ppcls.data.preprocess.ops.fmix import sample_mask + + +class BatchOperator(object): + """ BatchOperator """ + + def __init__(self, *args, **kwargs): + pass + + def _unpack(self, batch): + """ _unpack """ + assert isinstance(batch, list), \ + 'batch should be a list filled with tuples (img, label)' + bs = len(batch) + assert bs > 0, 'size of the batch data should > 0' + #imgs, labels = list(zip(*batch)) + imgs = [] + labels = [] + for item in batch: + imgs.append(item[0]) + labels.append(item[1]) + return np.array(imgs), np.array(labels), bs + + def _one_hot(self, targets): + return np.eye(self.class_num, dtype="float32")[targets] + + def _mix_target(self, targets0, targets1, lam): + one_hots0 = self._one_hot(targets0) + one_hots1 = self._one_hot(targets1) + return one_hots0 * lam + one_hots1 * (1 - lam) + + def __call__(self, batch): + return batch + + +class MixupOperator(BatchOperator): + """ Mixup operator + reference: https://arxiv.org/abs/1710.09412 + + """ + + def __init__(self, class_num, alpha: float=1.): + """Build Mixup operator + + Args: + alpha (float, optional): The parameter alpha of mixup. Defaults to 1.. + + Raises: + Exception: The value of parameter is illegal. + """ + if alpha <= 0: + raise Exception( + f"Parameter \"alpha\" of Mixup should be greater than 0. \"alpha\": {alpha}." + ) + if not class_num: + msg = "Please set \"Arch.class_num\" in config if use \"MixupOperator\"." + logger.error(Exception(msg)) + raise Exception(msg) + + self._alpha = alpha + self.class_num = class_num + + def __call__(self, batch): + imgs, labels, bs = self._unpack(batch) + idx = np.random.permutation(bs) + lam = np.random.beta(self._alpha, self._alpha) + imgs = lam * imgs + (1 - lam) * imgs[idx] + targets = self._mix_target(labels, labels[idx], lam) + return list(zip(imgs, targets)) + + +class CutmixOperator(BatchOperator): + """ Cutmix operator + reference: https://arxiv.org/abs/1905.04899 + + """ + + def __init__(self, class_num, alpha=0.2): + """Build Cutmix operator + + Args: + alpha (float, optional): The parameter alpha of cutmix. Defaults to 0.2. + + Raises: + Exception: The value of parameter is illegal. + """ + if alpha <= 0: + raise Exception( + f"Parameter \"alpha\" of Cutmix should be greater than 0. \"alpha\": {alpha}." + ) + if not class_num: + msg = "Please set \"Arch.class_num\" in config if use \"CutmixOperator\"." + logger.error(Exception(msg)) + raise Exception(msg) + + self._alpha = alpha + self.class_num = class_num + + def _rand_bbox(self, size, lam): + """ _rand_bbox """ + w = size[2] + h = size[3] + cut_rat = np.sqrt(1. - lam) + cut_w = int(w * cut_rat) + cut_h = int(h * cut_rat) + + # uniform + cx = np.random.randint(w) + cy = np.random.randint(h) + + bbx1 = np.clip(cx - cut_w // 2, 0, w) + bby1 = np.clip(cy - cut_h // 2, 0, h) + bbx2 = np.clip(cx + cut_w // 2, 0, w) + bby2 = np.clip(cy + cut_h // 2, 0, h) + + return bbx1, bby1, bbx2, bby2 + + def __call__(self, batch): + imgs, labels, bs = self._unpack(batch) + idx = np.random.permutation(bs) + lam = np.random.beta(self._alpha, self._alpha) + + bbx1, bby1, bbx2, bby2 = self._rand_bbox(imgs.shape, lam) + imgs[:, :, bbx1:bbx2, bby1:bby2] = imgs[idx, :, bbx1:bbx2, bby1:bby2] + lam = 1 - (float(bbx2 - bbx1) * (bby2 - bby1) / + (imgs.shape[-2] * imgs.shape[-1])) + targets = self._mix_target(labels, labels[idx], lam) + return list(zip(imgs, targets)) + + +class FmixOperator(BatchOperator): + """ Fmix operator + reference: https://arxiv.org/abs/2002.12047 + + """ + + def __init__(self, + class_num, + alpha=1, + decay_power=3, + max_soft=0., + reformulate=False): + if not class_num: + msg = "Please set \"Arch.class_num\" in config if use \"FmixOperator\"." + logger.error(Exception(msg)) + raise Exception(msg) + + self._alpha = alpha + self._decay_power = decay_power + self._max_soft = max_soft + self._reformulate = reformulate + self.class_num = class_num + + def __call__(self, batch): + imgs, labels, bs = self._unpack(batch) + idx = np.random.permutation(bs) + size = (imgs.shape[2], imgs.shape[3]) + lam, mask = sample_mask(self._alpha, self._decay_power, \ + size, self._max_soft, self._reformulate) + imgs = mask * imgs + (1 - mask) * imgs[idx] + targets = self._mix_target(labels, labels[idx], lam) + return list(zip(imgs, targets)) + + +class OpSampler(object): + """ Sample a operator from """ + + def __init__(self, class_num, **op_dict): + """Build OpSampler + + Raises: + Exception: The parameter \"prob\" of operator(s) are be set error. + """ + if not class_num: + msg = "Please set \"Arch.class_num\" in config if use \"OpSampler\"." + logger.error(Exception(msg)) + raise Exception(msg) + + if len(op_dict) < 1: + msg = f"ConfigWarning: No operator in \"OpSampler\". \"OpSampler\" has been skipped." + logger.warning(msg) + + self.ops = {} + total_prob = 0 + for op_name in op_dict: + param = op_dict[op_name] + if "prob" not in param: + msg = f"ConfigWarning: Parameter \"prob\" should be set when use operator in \"OpSampler\". The operator \"{op_name}\"'s prob has been set \"0\"." + logger.warning(msg) + prob = param.pop("prob", 0) + total_prob += prob + param.update({"class_num": class_num}) + op = eval(op_name)(**param) + self.ops.update({op: prob}) + + if total_prob > 1: + msg = f"ConfigError: The total prob of operators in \"OpSampler\" should be less 1." + logger.error(Exception(msg)) + raise Exception(msg) + + # add "None Op" when total_prob < 1, "None Op" do nothing + self.ops[None] = 1 - total_prob + + def __call__(self, batch): + op = random.choices( + list(self.ops.keys()), weights=list(self.ops.values()), k=1)[0] + # return batch directly when None Op + return op(batch) if op else batch diff --git a/src/PaddleClas/ppcls/data/preprocess/ops/__init__.py b/src/PaddleClas/ppcls/data/preprocess/ops/__init__.py new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/src/PaddleClas/ppcls/data/preprocess/ops/__init__.py @@ -0,0 +1 @@ + diff --git a/src/PaddleClas/ppcls/data/preprocess/ops/__pycache__/__init__.cpython-39.pyc b/src/PaddleClas/ppcls/data/preprocess/ops/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000..44a6c65 Binary files /dev/null and b/src/PaddleClas/ppcls/data/preprocess/ops/__pycache__/__init__.cpython-39.pyc differ diff --git a/src/PaddleClas/ppcls/data/preprocess/ops/__pycache__/autoaugment.cpython-39.pyc b/src/PaddleClas/ppcls/data/preprocess/ops/__pycache__/autoaugment.cpython-39.pyc new file mode 100644 index 0000000..6bc1a31 Binary files /dev/null and b/src/PaddleClas/ppcls/data/preprocess/ops/__pycache__/autoaugment.cpython-39.pyc differ diff --git a/src/PaddleClas/ppcls/data/preprocess/ops/__pycache__/cutout.cpython-39.pyc b/src/PaddleClas/ppcls/data/preprocess/ops/__pycache__/cutout.cpython-39.pyc new file mode 100644 index 0000000..087e231 Binary files /dev/null and b/src/PaddleClas/ppcls/data/preprocess/ops/__pycache__/cutout.cpython-39.pyc differ diff --git a/src/PaddleClas/ppcls/data/preprocess/ops/__pycache__/fmix.cpython-39.pyc b/src/PaddleClas/ppcls/data/preprocess/ops/__pycache__/fmix.cpython-39.pyc new file mode 100644 index 0000000..a0d5b32 Binary files /dev/null and b/src/PaddleClas/ppcls/data/preprocess/ops/__pycache__/fmix.cpython-39.pyc differ diff --git a/src/PaddleClas/ppcls/data/preprocess/ops/__pycache__/functional.cpython-39.pyc b/src/PaddleClas/ppcls/data/preprocess/ops/__pycache__/functional.cpython-39.pyc new file mode 100644 index 0000000..b8c9e26 Binary files /dev/null and b/src/PaddleClas/ppcls/data/preprocess/ops/__pycache__/functional.cpython-39.pyc differ diff --git a/src/PaddleClas/ppcls/data/preprocess/ops/__pycache__/grid.cpython-39.pyc b/src/PaddleClas/ppcls/data/preprocess/ops/__pycache__/grid.cpython-39.pyc new file mode 100644 index 0000000..b45f8b6 Binary files /dev/null and b/src/PaddleClas/ppcls/data/preprocess/ops/__pycache__/grid.cpython-39.pyc differ diff --git a/src/PaddleClas/ppcls/data/preprocess/ops/__pycache__/hide_and_seek.cpython-39.pyc b/src/PaddleClas/ppcls/data/preprocess/ops/__pycache__/hide_and_seek.cpython-39.pyc new file mode 100644 index 0000000..3fd0a2d Binary files /dev/null and b/src/PaddleClas/ppcls/data/preprocess/ops/__pycache__/hide_and_seek.cpython-39.pyc differ diff --git a/src/PaddleClas/ppcls/data/preprocess/ops/__pycache__/operators.cpython-39.pyc b/src/PaddleClas/ppcls/data/preprocess/ops/__pycache__/operators.cpython-39.pyc new file mode 100644 index 0000000..5f3acbd Binary files /dev/null and b/src/PaddleClas/ppcls/data/preprocess/ops/__pycache__/operators.cpython-39.pyc differ diff --git a/src/PaddleClas/ppcls/data/preprocess/ops/__pycache__/randaugment.cpython-39.pyc b/src/PaddleClas/ppcls/data/preprocess/ops/__pycache__/randaugment.cpython-39.pyc new file mode 100644 index 0000000..e599957 Binary files /dev/null and b/src/PaddleClas/ppcls/data/preprocess/ops/__pycache__/randaugment.cpython-39.pyc differ diff --git a/src/PaddleClas/ppcls/data/preprocess/ops/__pycache__/random_erasing.cpython-39.pyc b/src/PaddleClas/ppcls/data/preprocess/ops/__pycache__/random_erasing.cpython-39.pyc new file mode 100644 index 0000000..a86492f Binary files /dev/null and b/src/PaddleClas/ppcls/data/preprocess/ops/__pycache__/random_erasing.cpython-39.pyc differ diff --git a/src/PaddleClas/ppcls/data/preprocess/ops/__pycache__/timm_autoaugment.cpython-39.pyc b/src/PaddleClas/ppcls/data/preprocess/ops/__pycache__/timm_autoaugment.cpython-39.pyc new file mode 100644 index 0000000..5f8e988 Binary files /dev/null and b/src/PaddleClas/ppcls/data/preprocess/ops/__pycache__/timm_autoaugment.cpython-39.pyc differ diff --git a/src/PaddleClas/ppcls/data/preprocess/ops/autoaugment.py b/src/PaddleClas/ppcls/data/preprocess/ops/autoaugment.py new file mode 100644 index 0000000..330220a --- /dev/null +++ b/src/PaddleClas/ppcls/data/preprocess/ops/autoaugment.py @@ -0,0 +1,264 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# This code is based on https://github.com/DeepVoltaire/AutoAugment/blob/master/autoaugment.py + +from PIL import Image, ImageEnhance, ImageOps +import numpy as np +import random + + +class ImageNetPolicy(object): + """ Randomly choose one of the best 24 Sub-policies on ImageNet. + + Example: + >>> policy = ImageNetPolicy() + >>> transformed = policy(image) + + Example as a PyTorch Transform: + >>> transform=transforms.Compose([ + >>> transforms.Resize(256), + >>> ImageNetPolicy(), + >>> transforms.ToTensor()]) + """ + + def __init__(self, fillcolor=(128, 128, 128)): + self.policies = [ + SubPolicy(0.4, "posterize", 8, 0.6, "rotate", 9, fillcolor), + SubPolicy(0.6, "solarize", 5, 0.6, "autocontrast", 5, fillcolor), + SubPolicy(0.8, "equalize", 8, 0.6, "equalize", 3, fillcolor), + SubPolicy(0.6, "posterize", 7, 0.6, "posterize", 6, fillcolor), + SubPolicy(0.4, "equalize", 7, 0.2, "solarize", 4, fillcolor), + SubPolicy(0.4, "equalize", 4, 0.8, "rotate", 8, fillcolor), + SubPolicy(0.6, "solarize", 3, 0.6, "equalize", 7, fillcolor), + SubPolicy(0.8, "posterize", 5, 1.0, "equalize", 2, fillcolor), + SubPolicy(0.2, "rotate", 3, 0.6, "solarize", 8, fillcolor), + SubPolicy(0.6, "equalize", 8, 0.4, "posterize", 6, fillcolor), + SubPolicy(0.8, "rotate", 8, 0.4, "color", 0, fillcolor), + SubPolicy(0.4, "rotate", 9, 0.6, "equalize", 2, fillcolor), + SubPolicy(0.0, "equalize", 7, 0.8, "equalize", 8, fillcolor), + SubPolicy(0.6, "invert", 4, 1.0, "equalize", 8, fillcolor), + SubPolicy(0.6, "color", 4, 1.0, "contrast", 8, fillcolor), + SubPolicy(0.8, "rotate", 8, 1.0, "color", 2, fillcolor), + SubPolicy(0.8, "color", 8, 0.8, "solarize", 7, fillcolor), + SubPolicy(0.4, "sharpness", 7, 0.6, "invert", 8, fillcolor), + SubPolicy(0.6, "shearX", 5, 1.0, "equalize", 9, fillcolor), + SubPolicy(0.4, "color", 0, 0.6, "equalize", 3, fillcolor), + SubPolicy(0.4, "equalize", 7, 0.2, "solarize", 4, fillcolor), + SubPolicy(0.6, "solarize", 5, 0.6, "autocontrast", 5, fillcolor), + SubPolicy(0.6, "invert", 4, 1.0, "equalize", 8, fillcolor), + SubPolicy(0.6, "color", 4, 1.0, "contrast", 8, fillcolor), + SubPolicy(0.8, "equalize", 8, 0.6, "equalize", 3, fillcolor) + ] + + def __call__(self, img, policy_idx=None): + if policy_idx is None or not isinstance(policy_idx, int): + policy_idx = random.randint(0, len(self.policies) - 1) + else: + policy_idx = policy_idx % len(self.policies) + return self.policies[policy_idx](img) + + def __repr__(self): + return "AutoAugment ImageNet Policy" + + +class CIFAR10Policy(object): + """ Randomly choose one of the best 25 Sub-policies on CIFAR10. + + Example: + >>> policy = CIFAR10Policy() + >>> transformed = policy(image) + + Example as a PyTorch Transform: + >>> transform=transforms.Compose([ + >>> transforms.Resize(256), + >>> CIFAR10Policy(), + >>> transforms.ToTensor()]) + """ + + def __init__(self, fillcolor=(128, 128, 128)): + self.policies = [ + SubPolicy(0.1, "invert", 7, 0.2, "contrast", 6, fillcolor), + SubPolicy(0.7, "rotate", 2, 0.3, "translateX", 9, fillcolor), + SubPolicy(0.8, "sharpness", 1, 0.9, "sharpness", 3, fillcolor), + SubPolicy(0.5, "shearY", 8, 0.7, "translateY", 9, fillcolor), + SubPolicy(0.5, "autocontrast", 8, 0.9, "equalize", 2, fillcolor), + SubPolicy(0.2, "shearY", 7, 0.3, "posterize", 7, fillcolor), + SubPolicy(0.4, "color", 3, 0.6, "brightness", 7, fillcolor), + SubPolicy(0.3, "sharpness", 9, 0.7, "brightness", 9, fillcolor), + SubPolicy(0.6, "equalize", 5, 0.5, "equalize", 1, fillcolor), + SubPolicy(0.6, "contrast", 7, 0.6, "sharpness", 5, fillcolor), + SubPolicy(0.7, "color", 7, 0.5, "translateX", 8, fillcolor), + SubPolicy(0.3, "equalize", 7, 0.4, "autocontrast", 8, fillcolor), + SubPolicy(0.4, "translateY", 3, 0.2, "sharpness", 6, fillcolor), + SubPolicy(0.9, "brightness", 6, 0.2, "color", 8, fillcolor), + SubPolicy(0.5, "solarize", 2, 0.0, "invert", 3, fillcolor), + SubPolicy(0.2, "equalize", 0, 0.6, "autocontrast", 0, fillcolor), + SubPolicy(0.2, "equalize", 8, 0.8, "equalize", 4, fillcolor), + SubPolicy(0.9, "color", 9, 0.6, "equalize", 6, fillcolor), + SubPolicy(0.8, "autocontrast", 4, 0.2, "solarize", 8, fillcolor), + SubPolicy(0.1, "brightness", 3, 0.7, "color", 0, fillcolor), + SubPolicy(0.4, "solarize", 5, 0.9, "autocontrast", 3, fillcolor), + SubPolicy(0.9, "translateY", 9, 0.7, "translateY", 9, fillcolor), + SubPolicy(0.9, "autocontrast", 2, 0.8, "solarize", 3, fillcolor), + SubPolicy(0.8, "equalize", 8, 0.1, "invert", 3, fillcolor), + SubPolicy(0.7, "translateY", 9, 0.9, "autocontrast", 1, fillcolor) + ] + + def __call__(self, img, policy_idx=None): + if policy_idx is None or not isinstance(policy_idx, int): + policy_idx = random.randint(0, len(self.policies) - 1) + else: + policy_idx = policy_idx % len(self.policies) + return self.policies[policy_idx](img) + + def __repr__(self): + return "AutoAugment CIFAR10 Policy" + + +class SVHNPolicy(object): + """ Randomly choose one of the best 25 Sub-policies on SVHN. + + Example: + >>> policy = SVHNPolicy() + >>> transformed = policy(image) + + Example as a PyTorch Transform: + >>> transform=transforms.Compose([ + >>> transforms.Resize(256), + >>> SVHNPolicy(), + >>> transforms.ToTensor()]) + """ + + def __init__(self, fillcolor=(128, 128, 128)): + self.policies = [ + SubPolicy(0.9, "shearX", 4, 0.2, "invert", 3, fillcolor), + SubPolicy(0.9, "shearY", 8, 0.7, "invert", 5, fillcolor), + SubPolicy(0.6, "equalize", 5, 0.6, "solarize", 6, fillcolor), + SubPolicy(0.9, "invert", 3, 0.6, "equalize", 3, fillcolor), + SubPolicy(0.6, "equalize", 1, 0.9, "rotate", 3, fillcolor), + SubPolicy(0.9, "shearX", 4, 0.8, "autocontrast", 3, fillcolor), + SubPolicy(0.9, "shearY", 8, 0.4, "invert", 5, fillcolor), + SubPolicy(0.9, "shearY", 5, 0.2, "solarize", 6, fillcolor), + SubPolicy(0.9, "invert", 6, 0.8, "autocontrast", 1, fillcolor), + SubPolicy(0.6, "equalize", 3, 0.9, "rotate", 3, fillcolor), + SubPolicy(0.9, "shearX", 4, 0.3, "solarize", 3, fillcolor), + SubPolicy(0.8, "shearY", 8, 0.7, "invert", 4, fillcolor), + SubPolicy(0.9, "equalize", 5, 0.6, "translateY", 6, fillcolor), + SubPolicy(0.9, "invert", 4, 0.6, "equalize", 7, fillcolor), + SubPolicy(0.3, "contrast", 3, 0.8, "rotate", 4, fillcolor), + SubPolicy(0.8, "invert", 5, 0.0, "translateY", 2, fillcolor), + SubPolicy(0.7, "shearY", 6, 0.4, "solarize", 8, fillcolor), + SubPolicy(0.6, "invert", 4, 0.8, "rotate", 4, fillcolor), + SubPolicy( + 0.3, "shearY", 7, 0.9, "translateX", 3, fillcolor), SubPolicy( + 0.1, "shearX", 6, 0.6, "invert", 5, fillcolor), SubPolicy( + 0.7, "solarize", 2, 0.6, "translateY", 7, + fillcolor), SubPolicy(0.8, "shearY", 4, 0.8, "invert", + 8, fillcolor), SubPolicy( + 0.7, "shearX", 9, 0.8, + "translateY", 3, + fillcolor), SubPolicy( + 0.8, "shearY", 5, 0.7, + "autocontrast", 3, + fillcolor), + SubPolicy(0.7, "shearX", 2, 0.1, "invert", 5, fillcolor) + ] + + def __call__(self, img, policy_idx=None): + if policy_idx is None or not isinstance(policy_idx, int): + policy_idx = random.randint(0, len(self.policies) - 1) + else: + policy_idx = policy_idx % len(self.policies) + return self.policies[policy_idx](img) + + def __repr__(self): + return "AutoAugment SVHN Policy" + + +class SubPolicy(object): + def __init__(self, + p1, + operation1, + magnitude_idx1, + p2, + operation2, + magnitude_idx2, + fillcolor=(128, 128, 128)): + ranges = { + "shearX": np.linspace(0, 0.3, 10), + "shearY": np.linspace(0, 0.3, 10), + "translateX": np.linspace(0, 150 / 331, 10), + "translateY": np.linspace(0, 150 / 331, 10), + "rotate": np.linspace(0, 30, 10), + "color": np.linspace(0.0, 0.9, 10), + "posterize": np.round(np.linspace(8, 4, 10), 0).astype(np.int), + "solarize": np.linspace(256, 0, 10), + "contrast": np.linspace(0.0, 0.9, 10), + "sharpness": np.linspace(0.0, 0.9, 10), + "brightness": np.linspace(0.0, 0.9, 10), + "autocontrast": [0] * 10, + "equalize": [0] * 10, + "invert": [0] * 10 + } + + # from https://stackoverflow.com/questions/5252170/specify-image-filling-color-when-rotating-in-python-with-pil-and-setting-expand + def rotate_with_fill(img, magnitude): + rot = img.convert("RGBA").rotate(magnitude) + return Image.composite(rot, + Image.new("RGBA", rot.size, (128, ) * 4), + rot).convert(img.mode) + + func = { + "shearX": lambda img, magnitude: img.transform( + img.size, Image.AFFINE, (1, magnitude * random.choice([-1, 1]), 0, 0, 1, 0), + Image.BICUBIC, fillcolor=fillcolor), + "shearY": lambda img, magnitude: img.transform( + img.size, Image.AFFINE, (1, 0, 0, magnitude * random.choice([-1, 1]), 1, 0), + Image.BICUBIC, fillcolor=fillcolor), + "translateX": lambda img, magnitude: img.transform( + img.size, Image.AFFINE, (1, 0, magnitude * img.size[0] * random.choice([-1, 1]), 0, 1, 0), + fillcolor=fillcolor), + "translateY": lambda img, magnitude: img.transform( + img.size, Image.AFFINE, (1, 0, 0, 0, 1, magnitude * img.size[1] * random.choice([-1, 1])), + fillcolor=fillcolor), + "rotate": lambda img, magnitude: rotate_with_fill(img, magnitude), + # "rotate": lambda img, magnitude: img.rotate(magnitude * random.choice([-1, 1])), + "color": lambda img, magnitude: ImageEnhance.Color(img).enhance(1 + magnitude * random.choice([-1, 1])), + "posterize": lambda img, magnitude: ImageOps.posterize(img, magnitude), + "solarize": lambda img, magnitude: ImageOps.solarize(img, magnitude), + "contrast": lambda img, magnitude: ImageEnhance.Contrast(img).enhance( + 1 + magnitude * random.choice([-1, 1])), + "sharpness": lambda img, magnitude: ImageEnhance.Sharpness(img).enhance( + 1 + magnitude * random.choice([-1, 1])), + "brightness": lambda img, magnitude: ImageEnhance.Brightness(img).enhance( + 1 + magnitude * random.choice([-1, 1])), + "autocontrast": lambda img, magnitude: ImageOps.autocontrast(img), + "equalize": lambda img, magnitude: ImageOps.equalize(img), + "invert": lambda img, magnitude: ImageOps.invert(img) + } + + self.p1 = p1 + self.operation1 = func[operation1] + self.magnitude1 = ranges[operation1][magnitude_idx1] + self.p2 = p2 + self.operation2 = func[operation2] + self.magnitude2 = ranges[operation2][magnitude_idx2] + + def __call__(self, img): + if random.random() < self.p1: + img = self.operation1(img, self.magnitude1) + if random.random() < self.p2: + img = self.operation2(img, self.magnitude2) + return img diff --git a/src/PaddleClas/ppcls/data/preprocess/ops/cutout.py b/src/PaddleClas/ppcls/data/preprocess/ops/cutout.py new file mode 100644 index 0000000..b906e14 --- /dev/null +++ b/src/PaddleClas/ppcls/data/preprocess/ops/cutout.py @@ -0,0 +1,41 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# This code is based on https://github.com/uoguelph-mlrg/Cutout + +import numpy as np +import random + + +class Cutout(object): + def __init__(self, n_holes=1, length=112): + self.n_holes = n_holes + self.length = length + + def __call__(self, img): + """ cutout_image """ + h, w = img.shape[:2] + mask = np.ones((h, w), np.float32) + + for n in range(self.n_holes): + y = np.random.randint(h) + x = np.random.randint(w) + + y1 = np.clip(y - self.length // 2, 0, h) + y2 = np.clip(y + self.length // 2, 0, h) + x1 = np.clip(x - self.length // 2, 0, w) + x2 = np.clip(x + self.length // 2, 0, w) + + img[y1:y2, x1:x2] = 0 + return img diff --git a/src/PaddleClas/ppcls/data/preprocess/ops/fmix.py b/src/PaddleClas/ppcls/data/preprocess/ops/fmix.py new file mode 100644 index 0000000..dc2ef91 --- /dev/null +++ b/src/PaddleClas/ppcls/data/preprocess/ops/fmix.py @@ -0,0 +1,217 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import math +import random + +import numpy as np +from scipy.stats import beta + + +def fftfreqnd(h, w=None, z=None): + """ Get bin values for discrete fourier transform of size (h, w, z) + + :param h: Required, first dimension size + :param w: Optional, second dimension size + :param z: Optional, third dimension size + """ + fz = fx = 0 + fy = np.fft.fftfreq(h) + + if w is not None: + fy = np.expand_dims(fy, -1) + + if w % 2 == 1: + fx = np.fft.fftfreq(w)[:w // 2 + 2] + else: + fx = np.fft.fftfreq(w)[:w // 2 + 1] + + if z is not None: + fy = np.expand_dims(fy, -1) + if z % 2 == 1: + fz = np.fft.fftfreq(z)[:, None] + else: + fz = np.fft.fftfreq(z)[:, None] + + return np.sqrt(fx * fx + fy * fy + fz * fz) + + +def get_spectrum(freqs, decay_power, ch, h, w=0, z=0): + """ Samples a fourier image with given size and frequencies decayed by decay power + + :param freqs: Bin values for the discrete fourier transform + :param decay_power: Decay power for frequency decay prop 1/f**d + :param ch: Number of channels for the resulting mask + :param h: Required, first dimension size + :param w: Optional, second dimension size + :param z: Optional, third dimension size + """ + scale = np.ones(1) / (np.maximum(freqs, np.array([1. / max(w, h, z)])) + **decay_power) + + param_size = [ch] + list(freqs.shape) + [2] + param = np.random.randn(*param_size) + + scale = np.expand_dims(scale, -1)[None, :] + + return scale * param + + +def make_low_freq_image(decay, shape, ch=1): + """ Sample a low frequency image from fourier space + + :param decay_power: Decay power for frequency decay prop 1/f**d + :param shape: Shape of desired mask, list up to 3 dims + :param ch: Number of channels for desired mask + """ + freqs = fftfreqnd(*shape) + spectrum = get_spectrum(freqs, decay, ch, + *shape) #.reshape((1, *shape[:-1], -1)) + spectrum = spectrum[:, 0] + 1j * spectrum[:, 1] + mask = np.real(np.fft.irfftn(spectrum, shape)) + + if len(shape) == 1: + mask = mask[:1, :shape[0]] + if len(shape) == 2: + mask = mask[:1, :shape[0], :shape[1]] + if len(shape) == 3: + mask = mask[:1, :shape[0], :shape[1], :shape[2]] + + mask = mask + mask = (mask - mask.min()) + mask = mask / mask.max() + return mask + + +def sample_lam(alpha, reformulate=False): + """ Sample a lambda from symmetric beta distribution with given alpha + + :param alpha: Alpha value for beta distribution + :param reformulate: If True, uses the reformulation of [1]. + """ + if reformulate: + lam = beta.rvs(alpha + 1, alpha) + else: + lam = beta.rvs(alpha, alpha) + + return lam + + +def binarise_mask(mask, lam, in_shape, max_soft=0.0): + """ Binarises a given low frequency image such that it has mean lambda. + + :param mask: Low frequency image, usually the result of `make_low_freq_image` + :param lam: Mean value of final mask + :param in_shape: Shape of inputs + :param max_soft: Softening value between 0 and 0.5 which smooths hard edges in the mask. + :return: + """ + idx = mask.reshape(-1).argsort()[::-1] + mask = mask.reshape(-1) + num = math.ceil(lam * mask.size) if random.random() > 0.5 else math.floor( + lam * mask.size) + + eff_soft = max_soft + if max_soft > lam or max_soft > (1 - lam): + eff_soft = min(lam, 1 - lam) + + soft = int(mask.size * eff_soft) + num_low = int(num - soft) + num_high = int(num + soft) + + mask[idx[:num_high]] = 1 + mask[idx[num_low:]] = 0 + mask[idx[num_low:num_high]] = np.linspace(1, 0, (num_high - num_low)) + + mask = mask.reshape((1, 1, in_shape[0], in_shape[1])) + return mask + + +def sample_mask(alpha, decay_power, shape, max_soft=0.0, reformulate=False): + """ Samples a mean lambda from beta distribution parametrised by alpha, creates a low frequency image and binarises + it based on this lambda + + :param alpha: Alpha value for beta distribution from which to sample mean of mask + :param decay_power: Decay power for frequency decay prop 1/f**d + :param shape: Shape of desired mask, list up to 3 dims + :param max_soft: Softening value between 0 and 0.5 which smooths hard edges in the mask. + :param reformulate: If True, uses the reformulation of [1]. + """ + if isinstance(shape, int): + shape = (shape, ) + + # Choose lambda + lam = sample_lam(alpha, reformulate) + + # Make mask, get mean / std + mask = make_low_freq_image(decay_power, shape) + mask = binarise_mask(mask, lam, shape, max_soft) + + return float(lam), mask + + +def sample_and_apply(x, + alpha, + decay_power, + shape, + max_soft=0.0, + reformulate=False): + """ + + :param x: Image batch on which to apply fmix of shape [b, c, shape*] + :param alpha: Alpha value for beta distribution from which to sample mean of mask + :param decay_power: Decay power for frequency decay prop 1/f**d + :param shape: Shape of desired mask, list up to 3 dims + :param max_soft: Softening value between 0 and 0.5 which smooths hard edges in the mask. + :param reformulate: If True, uses the reformulation of [1]. + :return: mixed input, permutation indices, lambda value of mix, + """ + lam, mask = sample_mask(alpha, decay_power, shape, max_soft, reformulate) + index = np.random.permutation(x.shape[0]) + + x1, x2 = x * mask, x[index] * (1 - mask) + return x1 + x2, index, lam + + +class FMixBase: + """ FMix augmentation + + Args: + decay_power (float): Decay power for frequency decay prop 1/f**d + alpha (float): Alpha value for beta distribution from which to sample mean of mask + size ([int] | [int, int] | [int, int, int]): Shape of desired mask, list up to 3 dims + max_soft (float): Softening value between 0 and 0.5 which smooths hard edges in the mask. + reformulate (bool): If True, uses the reformulation of [1]. + """ + + def __init__(self, + decay_power=3, + alpha=1, + size=(32, 32), + max_soft=0.0, + reformulate=False): + super().__init__() + self.decay_power = decay_power + self.reformulate = reformulate + self.size = size + self.alpha = alpha + self.max_soft = max_soft + self.index = None + self.lam = None + + def __call__(self, x): + raise NotImplementedError + + def loss(self, *args, **kwargs): + raise NotImplementedError diff --git a/src/PaddleClas/ppcls/data/preprocess/ops/functional.py b/src/PaddleClas/ppcls/data/preprocess/ops/functional.py new file mode 100644 index 0000000..9f1369e --- /dev/null +++ b/src/PaddleClas/ppcls/data/preprocess/ops/functional.py @@ -0,0 +1,138 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# encoding: utf-8 + +import numpy as np +from PIL import Image, ImageOps, ImageEnhance + + + +def int_parameter(level, maxval): + """Helper function to scale `val` between 0 and maxval . + Args: + level: Level of the operation that will be between [0, `PARAMETER_MAX`]. + maxval: Maximum value that the operation can have. This will be scaled to + level/PARAMETER_MAX. + Returns: + An int that results from scaling `maxval` according to `level`. + """ + return int(level * maxval / 10) + + +def float_parameter(level, maxval): + """Helper function to scale `val` between 0 and maxval. + Args: + level: Level of the operation that will be between [0, `PARAMETER_MAX`]. + maxval: Maximum value that the operation can have. This will be scaled to + level/PARAMETER_MAX. + Returns: + A float that results from scaling `maxval` according to `level`. + """ + return float(level) * maxval / 10. + + +def sample_level(n): + return np.random.uniform(low=0.1, high=n) + + +def autocontrast(pil_img, *args): + return ImageOps.autocontrast(pil_img) + + +def equalize(pil_img, *args): + return ImageOps.equalize(pil_img) + + +def posterize(pil_img, level, *args): + level = int_parameter(sample_level(level), 4) + return ImageOps.posterize(pil_img, 4 - level) + + +def rotate(pil_img, level, *args): + degrees = int_parameter(sample_level(level), 30) + if np.random.uniform() > 0.5: + degrees = -degrees + return pil_img.rotate(degrees, resample=Image.BILINEAR) + + +def solarize(pil_img, level, *args): + level = int_parameter(sample_level(level), 256) + return ImageOps.solarize(pil_img, 256 - level) + + +def shear_x(pil_img, level): + level = float_parameter(sample_level(level), 0.3) + if np.random.uniform() > 0.5: + level = -level + return pil_img.transform(pil_img.size, + Image.AFFINE, (1, level, 0, 0, 1, 0), + resample=Image.BILINEAR) + + +def shear_y(pil_img, level): + level = float_parameter(sample_level(level), 0.3) + if np.random.uniform() > 0.5: + level = -level + return pil_img.transform(pil_img.size, + Image.AFFINE, (1, 0, 0, level, 1, 0), + resample=Image.BILINEAR) + + +def translate_x(pil_img, level): + level = int_parameter(sample_level(level), pil_img.size[0] / 3) + if np.random.random() > 0.5: + level = -level + return pil_img.transform(pil_img.size, + Image.AFFINE, (1, 0, level, 0, 1, 0), + resample=Image.BILINEAR) + + +def translate_y(pil_img, level): + level = int_parameter(sample_level(level), pil_img.size[1] / 3) + if np.random.random() > 0.5: + level = -level + return pil_img.transform(pil_img.size, + Image.AFFINE, (1, 0, 0, 0, 1, level), + resample=Image.BILINEAR) + + +# operation that overlaps with ImageNet-C's test set +def color(pil_img, level, *args): + level = float_parameter(sample_level(level), 1.8) + 0.1 + return ImageEnhance.Color(pil_img).enhance(level) + + +# operation that overlaps with ImageNet-C's test set +def contrast(pil_img, level, *args): + level = float_parameter(sample_level(level), 1.8) + 0.1 + return ImageEnhance.Contrast(pil_img).enhance(level) + + +# operation that overlaps with ImageNet-C's test set +def brightness(pil_img, level, *args): + level = float_parameter(sample_level(level), 1.8) + 0.1 + return ImageEnhance.Brightness(pil_img).enhance(level) + + +# operation that overlaps with ImageNet-C's test set +def sharpness(pil_img, level, *args): + level = float_parameter(sample_level(level), 1.8) + 0.1 + return ImageEnhance.Sharpness(pil_img).enhance(level) + + +augmentations = [ + autocontrast, equalize, posterize, rotate, solarize, shear_x, shear_y, + translate_x, translate_y +] diff --git a/src/PaddleClas/ppcls/data/preprocess/ops/grid.py b/src/PaddleClas/ppcls/data/preprocess/ops/grid.py new file mode 100644 index 0000000..6f0b2dc --- /dev/null +++ b/src/PaddleClas/ppcls/data/preprocess/ops/grid.py @@ -0,0 +1,89 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# This code is based on https://github.com/akuxcw/GridMask + +import numpy as np +from PIL import Image +import pdb + +# curr +CURR_EPOCH = 0 +# epoch for the prob to be the upper limit +NUM_EPOCHS = 240 + + +class GridMask(object): + def __init__(self, d1=96, d2=224, rotate=1, ratio=0.5, mode=0, prob=1.): + self.d1 = d1 + self.d2 = d2 + self.rotate = rotate + self.ratio = ratio + self.mode = mode + self.st_prob = prob + self.prob = prob + self.last_prob = -1 + + def set_prob(self): + global CURR_EPOCH + global NUM_EPOCHS + self.prob = self.st_prob * min(1, 1.0 * CURR_EPOCH / NUM_EPOCHS) + + def __call__(self, img): + self.set_prob() + if abs(self.last_prob - self.prob) > 1e-10: + global CURR_EPOCH + global NUM_EPOCHS + print( + "self.prob is updated, self.prob={}, CURR_EPOCH: {}, NUM_EPOCHS: {}". + format(self.prob, CURR_EPOCH, NUM_EPOCHS)) + self.last_prob = self.prob + # print("CURR_EPOCH: {}, NUM_EPOCHS: {}, self.prob is set as: {}".format(CURR_EPOCH, NUM_EPOCHS, self.prob) ) + if np.random.rand() > self.prob: + return img + _, h, w = img.shape + hh = int(1.5 * h) + ww = int(1.5 * w) + d = np.random.randint(self.d1, self.d2) + #d = self.d + self.l = int(d * self.ratio + 0.5) + mask = np.ones((hh, ww), np.float32) + st_h = np.random.randint(d) + st_w = np.random.randint(d) + for i in range(-1, hh // d + 1): + s = d * i + st_h + t = s + self.l + s = max(min(s, hh), 0) + t = max(min(t, hh), 0) + mask[s:t, :] *= 0 + for i in range(-1, ww // d + 1): + s = d * i + st_w + t = s + self.l + s = max(min(s, ww), 0) + t = max(min(t, ww), 0) + mask[:, s:t] *= 0 + r = np.random.randint(self.rotate) + mask = Image.fromarray(np.uint8(mask)) + mask = mask.rotate(r) + mask = np.asarray(mask) + mask = mask[(hh - h) // 2:(hh - h) // 2 + h, (ww - w) // 2:(ww - w) // + 2 + w] + + if self.mode == 1: + mask = 1 - mask + + mask = np.expand_dims(mask, axis=0) + img = (img * mask).astype(img.dtype) + + return img diff --git a/src/PaddleClas/ppcls/data/preprocess/ops/hide_and_seek.py b/src/PaddleClas/ppcls/data/preprocess/ops/hide_and_seek.py new file mode 100644 index 0000000..33f25f2 --- /dev/null +++ b/src/PaddleClas/ppcls/data/preprocess/ops/hide_and_seek.py @@ -0,0 +1,44 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# This code is based on https://github.com/kkanshul/Hide-and-Seek + +import numpy as np +import random + + +class HideAndSeek(object): + def __init__(self): + # possible grid size, 0 means no hiding + self.grid_sizes = [0, 16, 32, 44, 56] + # hiding probability + self.hide_prob = 0.5 + + def __call__(self, img): + # randomly choose one grid size + grid_size = np.random.choice(self.grid_sizes) + + _, h, w = img.shape + + # hide the patches + if grid_size == 0: + return img + for x in range(0, w, grid_size): + for y in range(0, h, grid_size): + x_end = min(w, x + grid_size) + y_end = min(h, y + grid_size) + if (random.random() <= self.hide_prob): + img[:, x:x_end, y:y_end] = 0 + + return img diff --git a/src/PaddleClas/ppcls/data/preprocess/ops/operators.py b/src/PaddleClas/ppcls/data/preprocess/ops/operators.py new file mode 100644 index 0000000..9cdc58b --- /dev/null +++ b/src/PaddleClas/ppcls/data/preprocess/ops/operators.py @@ -0,0 +1,384 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from __future__ import unicode_literals + +from functools import partial +import six +import math +import random +import cv2 +import numpy as np +from PIL import Image +from paddle.vision.transforms import ColorJitter as RawColorJitter + +from .autoaugment import ImageNetPolicy +from .functional import augmentations +from ppcls.utils import logger + + +class UnifiedResize(object): + def __init__(self, interpolation=None, backend="cv2"): + _cv2_interp_from_str = { + 'nearest': cv2.INTER_NEAREST, + 'bilinear': cv2.INTER_LINEAR, + 'area': cv2.INTER_AREA, + 'bicubic': cv2.INTER_CUBIC, + 'lanczos': cv2.INTER_LANCZOS4 + } + _pil_interp_from_str = { + 'nearest': Image.NEAREST, + 'bilinear': Image.BILINEAR, + 'bicubic': Image.BICUBIC, + 'box': Image.BOX, + 'lanczos': Image.LANCZOS, + 'hamming': Image.HAMMING + } + + def _pil_resize(src, size, resample): + pil_img = Image.fromarray(src) + pil_img = pil_img.resize(size, resample) + return np.asarray(pil_img) + + if backend.lower() == "cv2": + if isinstance(interpolation, str): + interpolation = _cv2_interp_from_str[interpolation.lower()] + # compatible with opencv < version 4.4.0 + elif interpolation is None: + interpolation = cv2.INTER_LINEAR + self.resize_func = partial(cv2.resize, interpolation=interpolation) + elif backend.lower() == "pil": + if isinstance(interpolation, str): + interpolation = _pil_interp_from_str[interpolation.lower()] + self.resize_func = partial(_pil_resize, resample=interpolation) + else: + logger.warning( + f"The backend of Resize only support \"cv2\" or \"PIL\". \"f{backend}\" is unavailable. Use \"cv2\" instead." + ) + self.resize_func = cv2.resize + + def __call__(self, src, size): + return self.resize_func(src, size) + + +class OperatorParamError(ValueError): + """ OperatorParamError + """ + pass + + +class DecodeImage(object): + """ decode image """ + + def __init__(self, to_rgb=True, to_np=False, channel_first=False): + self.to_rgb = to_rgb + self.to_np = to_np # to numpy + self.channel_first = channel_first # only enabled when to_np is True + + def __call__(self, img): + if six.PY2: + assert type(img) is str and len( + img) > 0, "invalid input 'img' in DecodeImage" + else: + assert type(img) is bytes and len( + img) > 0, "invalid input 'img' in DecodeImage" + data = np.frombuffer(img, dtype='uint8') + img = cv2.imdecode(data, 1) + if self.to_rgb: + assert img.shape[2] == 3, 'invalid shape of image[%s]' % ( + img.shape) + img = img[:, :, ::-1] + + if self.channel_first: + img = img.transpose((2, 0, 1)) + + return img + + +class ResizeImage(object): + """ resize image """ + + def __init__(self, + size=None, + resize_short=None, + interpolation=None, + backend="cv2"): + if resize_short is not None and resize_short > 0: + self.resize_short = resize_short + self.w = None + self.h = None + elif size is not None: + self.resize_short = None + self.w = size if type(size) is int else size[0] + self.h = size if type(size) is int else size[1] + else: + raise OperatorParamError("invalid params for ReisizeImage for '\ + 'both 'size' and 'resize_short' are None") + + self._resize_func = UnifiedResize( + interpolation=interpolation, backend=backend) + + def __call__(self, img): + img_h, img_w = img.shape[:2] + if self.resize_short is not None: + percent = float(self.resize_short) / min(img_w, img_h) + w = int(round(img_w * percent)) + h = int(round(img_h * percent)) + else: + w = self.w + h = self.h + return self._resize_func(img, (w, h)) + + +class CropImage(object): + """ crop image """ + + def __init__(self, size): + if type(size) is int: + self.size = (size, size) + else: + self.size = size # (h, w) + + def __call__(self, img): + w, h = self.size + img_h, img_w = img.shape[:2] + w_start = (img_w - w) // 2 + h_start = (img_h - h) // 2 + + w_end = w_start + w + h_end = h_start + h + return img[h_start:h_end, w_start:w_end, :] + + +class RandCropImage(object): + """ random crop image """ + + def __init__(self, + size, + scale=None, + ratio=None, + interpolation=None, + backend="cv2"): + if type(size) is int: + self.size = (size, size) # (h, w) + else: + self.size = size + + self.scale = [0.08, 1.0] if scale is None else scale + self.ratio = [3. / 4., 4. / 3.] if ratio is None else ratio + + self._resize_func = UnifiedResize( + interpolation=interpolation, backend=backend) + + def __call__(self, img): + size = self.size + scale = self.scale + ratio = self.ratio + + aspect_ratio = math.sqrt(random.uniform(*ratio)) + w = 1. * aspect_ratio + h = 1. / aspect_ratio + + img_h, img_w = img.shape[:2] + + bound = min((float(img_w) / img_h) / (w**2), + (float(img_h) / img_w) / (h**2)) + scale_max = min(scale[1], bound) + scale_min = min(scale[0], bound) + + target_area = img_w * img_h * random.uniform(scale_min, scale_max) + target_size = math.sqrt(target_area) + w = int(target_size * w) + h = int(target_size * h) + + i = random.randint(0, img_w - w) + j = random.randint(0, img_h - h) + + img = img[j:j + h, i:i + w, :] + + return self._resize_func(img, size) + + +class RandFlipImage(object): + """ random flip image + flip_code: + 1: Flipped Horizontally + 0: Flipped Vertically + -1: Flipped Horizontally & Vertically + """ + + def __init__(self, flip_code=1): + assert flip_code in [-1, 0, 1 + ], "flip_code should be a value in [-1, 0, 1]" + self.flip_code = flip_code + + def __call__(self, img): + if random.randint(0, 1) == 1: + return cv2.flip(img, self.flip_code) + else: + return img + + +class AutoAugment(object): + def __init__(self): + self.policy = ImageNetPolicy() + + def __call__(self, img): + from PIL import Image + img = np.ascontiguousarray(img) + img = Image.fromarray(img) + img = self.policy(img) + img = np.asarray(img) + + +class NormalizeImage(object): + """ normalize image such as substract mean, divide std + """ + + def __init__(self, + scale=None, + mean=None, + std=None, + order='chw', + output_fp16=False, + channel_num=3): + if isinstance(scale, str): + scale = eval(scale) + assert channel_num in [ + 3, 4 + ], "channel number of input image should be set to 3 or 4." + self.channel_num = channel_num + self.output_dtype = 'float16' if output_fp16 else 'float32' + self.scale = np.float32(scale if scale is not None else 1.0 / 255.0) + self.order = order + mean = mean if mean is not None else [0.485, 0.456, 0.406] + std = std if std is not None else [0.229, 0.224, 0.225] + + shape = (3, 1, 1) if self.order == 'chw' else (1, 1, 3) + self.mean = np.array(mean).reshape(shape).astype('float32') + self.std = np.array(std).reshape(shape).astype('float32') + + def __call__(self, img): + from PIL import Image + if isinstance(img, Image.Image): + img = np.array(img) + + assert isinstance(img, + np.ndarray), "invalid input 'img' in NormalizeImage" + + img = (img.astype('float32') * self.scale - self.mean) / self.std + + if self.channel_num == 4: + img_h = img.shape[1] if self.order == 'chw' else img.shape[0] + img_w = img.shape[2] if self.order == 'chw' else img.shape[1] + pad_zeros = np.zeros( + (1, img_h, img_w)) if self.order == 'chw' else np.zeros( + (img_h, img_w, 1)) + img = (np.concatenate( + (img, pad_zeros), axis=0) + if self.order == 'chw' else np.concatenate( + (img, pad_zeros), axis=2)) + return img.astype(self.output_dtype) + + +class ToCHWImage(object): + """ convert hwc image to chw image + """ + + def __init__(self): + pass + + def __call__(self, img): + from PIL import Image + if isinstance(img, Image.Image): + img = np.array(img) + + return img.transpose((2, 0, 1)) + + +class AugMix(object): + """ Perform AugMix augmentation and compute mixture. + """ + + def __init__(self, + prob=0.5, + aug_prob_coeff=0.1, + mixture_width=3, + mixture_depth=1, + aug_severity=1): + """ + Args: + prob: Probability of taking augmix + aug_prob_coeff: Probability distribution coefficients. + mixture_width: Number of augmentation chains to mix per augmented example. + mixture_depth: Depth of augmentation chains. -1 denotes stochastic depth in [1, 3]' + aug_severity: Severity of underlying augmentation operators (between 1 to 10). + """ + # fmt: off + self.prob = prob + self.aug_prob_coeff = aug_prob_coeff + self.mixture_width = mixture_width + self.mixture_depth = mixture_depth + self.aug_severity = aug_severity + self.augmentations = augmentations + # fmt: on + + def __call__(self, image): + """Perform AugMix augmentations and compute mixture. + Returns: + mixed: Augmented and mixed image. + """ + if random.random() > self.prob: + # Avoid the warning: the given NumPy array is not writeable + return np.asarray(image).copy() + + ws = np.float32( + np.random.dirichlet([self.aug_prob_coeff] * self.mixture_width)) + m = np.float32( + np.random.beta(self.aug_prob_coeff, self.aug_prob_coeff)) + + # image = Image.fromarray(image) + mix = np.zeros(image.shape) + for i in range(self.mixture_width): + image_aug = image.copy() + image_aug = Image.fromarray(image_aug) + depth = self.mixture_depth if self.mixture_depth > 0 else np.random.randint( + 1, 4) + for _ in range(depth): + op = np.random.choice(self.augmentations) + image_aug = op(image_aug, self.aug_severity) + mix += ws[i] * np.asarray(image_aug) + + mixed = (1 - m) * image + m * mix + return mixed.astype(np.uint8) + + +class ColorJitter(RawColorJitter): + """ColorJitter. + """ + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + def __call__(self, img): + if not isinstance(img, Image.Image): + img = np.ascontiguousarray(img) + img = Image.fromarray(img) + img = super()._apply_image(img) + if isinstance(img, Image.Image): + img = np.asarray(img) + return img diff --git a/src/PaddleClas/ppcls/data/preprocess/ops/randaugment.py b/src/PaddleClas/ppcls/data/preprocess/ops/randaugment.py new file mode 100644 index 0000000..cca59da --- /dev/null +++ b/src/PaddleClas/ppcls/data/preprocess/ops/randaugment.py @@ -0,0 +1,106 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# This code is based on https://github.com/heartInsert/randaugment + +from PIL import Image, ImageEnhance, ImageOps +import numpy as np +import random + + +class RandAugment(object): + def __init__(self, num_layers=2, magnitude=5, fillcolor=(128, 128, 128)): + self.num_layers = num_layers + self.magnitude = magnitude + self.max_level = 10 + + abso_level = self.magnitude / self.max_level + self.level_map = { + "shearX": 0.3 * abso_level, + "shearY": 0.3 * abso_level, + "translateX": 150.0 / 331 * abso_level, + "translateY": 150.0 / 331 * abso_level, + "rotate": 30 * abso_level, + "color": 0.9 * abso_level, + "posterize": int(4.0 * abso_level), + "solarize": 256.0 * abso_level, + "contrast": 0.9 * abso_level, + "sharpness": 0.9 * abso_level, + "brightness": 0.9 * abso_level, + "autocontrast": 0, + "equalize": 0, + "invert": 0 + } + + # from https://stackoverflow.com/questions/5252170/ + # specify-image-filling-color-when-rotating-in-python-with-pil-and-setting-expand + def rotate_with_fill(img, magnitude): + rot = img.convert("RGBA").rotate(magnitude) + return Image.composite(rot, + Image.new("RGBA", rot.size, (128, ) * 4), + rot).convert(img.mode) + + rnd_ch_op = random.choice + + self.func = { + "shearX": lambda img, magnitude: img.transform( + img.size, + Image.AFFINE, + (1, magnitude * rnd_ch_op([-1, 1]), 0, 0, 1, 0), + Image.BICUBIC, + fillcolor=fillcolor), + "shearY": lambda img, magnitude: img.transform( + img.size, + Image.AFFINE, + (1, 0, 0, magnitude * rnd_ch_op([-1, 1]), 1, 0), + Image.BICUBIC, + fillcolor=fillcolor), + "translateX": lambda img, magnitude: img.transform( + img.size, + Image.AFFINE, + (1, 0, magnitude * img.size[0] * rnd_ch_op([-1, 1]), 0, 1, 0), + fillcolor=fillcolor), + "translateY": lambda img, magnitude: img.transform( + img.size, + Image.AFFINE, + (1, 0, 0, 0, 1, magnitude * img.size[1] * rnd_ch_op([-1, 1])), + fillcolor=fillcolor), + "rotate": lambda img, magnitude: rotate_with_fill(img, magnitude), + "color": lambda img, magnitude: ImageEnhance.Color(img).enhance( + 1 + magnitude * rnd_ch_op([-1, 1])), + "posterize": lambda img, magnitude: + ImageOps.posterize(img, magnitude), + "solarize": lambda img, magnitude: + ImageOps.solarize(img, magnitude), + "contrast": lambda img, magnitude: + ImageEnhance.Contrast(img).enhance( + 1 + magnitude * rnd_ch_op([-1, 1])), + "sharpness": lambda img, magnitude: + ImageEnhance.Sharpness(img).enhance( + 1 + magnitude * rnd_ch_op([-1, 1])), + "brightness": lambda img, magnitude: + ImageEnhance.Brightness(img).enhance( + 1 + magnitude * rnd_ch_op([-1, 1])), + "autocontrast": lambda img, magnitude: + ImageOps.autocontrast(img), + "equalize": lambda img, magnitude: ImageOps.equalize(img), + "invert": lambda img, magnitude: ImageOps.invert(img) + } + + def __call__(self, img): + avaiable_op_names = list(self.level_map.keys()) + for layer_num in range(self.num_layers): + op_name = np.random.choice(avaiable_op_names) + img = self.func[op_name](img, self.level_map[op_name]) + return img diff --git a/src/PaddleClas/ppcls/data/preprocess/ops/random_erasing.py b/src/PaddleClas/ppcls/data/preprocess/ops/random_erasing.py new file mode 100644 index 0000000..f234abb --- /dev/null +++ b/src/PaddleClas/ppcls/data/preprocess/ops/random_erasing.py @@ -0,0 +1,90 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#This code is adapted from https://github.com/zhunzhong07/Random-Erasing, and refer to Timm. + +from functools import partial + +import math +import random + +import numpy as np + + +class Pixels(object): + def __init__(self, mode="const", mean=[0., 0., 0.]): + self._mode = mode + self._mean = mean + + def __call__(self, h=224, w=224, c=3): + if self._mode == "rand": + return np.random.normal(size=(1, 1, 3)) + elif self._mode == "pixel": + return np.random.normal(size=(h, w, c)) + elif self._mode == "const": + return self._mean + else: + raise Exception( + "Invalid mode in RandomErasing, only support \"const\", \"rand\", \"pixel\"" + ) + + +class RandomErasing(object): + """RandomErasing. + """ + + def __init__(self, + EPSILON=0.5, + sl=0.02, + sh=0.4, + r1=0.3, + mean=[0., 0., 0.], + attempt=100, + use_log_aspect=False, + mode='const'): + self.EPSILON = eval(EPSILON) if isinstance(EPSILON, str) else EPSILON + self.sl = eval(sl) if isinstance(sl, str) else sl + self.sh = eval(sh) if isinstance(sh, str) else sh + r1 = eval(r1) if isinstance(r1, str) else r1 + self.r1 = (math.log(r1), math.log(1 / r1)) if use_log_aspect else ( + r1, 1 / r1) + self.use_log_aspect = use_log_aspect + self.attempt = attempt + self.get_pixels = Pixels(mode, mean) + + def __call__(self, img): + if random.random() > self.EPSILON: + return img + + for _ in range(self.attempt): + area = img.shape[0] * img.shape[1] + + target_area = random.uniform(self.sl, self.sh) * area + aspect_ratio = random.uniform(*self.r1) + if self.use_log_aspect: + aspect_ratio = math.exp(aspect_ratio) + + h = int(round(math.sqrt(target_area * aspect_ratio))) + w = int(round(math.sqrt(target_area / aspect_ratio))) + + if w < img.shape[1] and h < img.shape[0]: + pixels = self.get_pixels(h, w, img.shape[2]) + x1 = random.randint(0, img.shape[0] - h) + y1 = random.randint(0, img.shape[1] - w) + if img.shape[2] == 3: + img[x1:x1 + h, y1:y1 + w, :] = pixels + else: + img[x1:x1 + h, y1:y1 + w, 0] = pixels[0] + return img + return img diff --git a/src/PaddleClas/ppcls/data/preprocess/ops/timm_autoaugment.py b/src/PaddleClas/ppcls/data/preprocess/ops/timm_autoaugment.py new file mode 100644 index 0000000..dd2994d --- /dev/null +++ b/src/PaddleClas/ppcls/data/preprocess/ops/timm_autoaugment.py @@ -0,0 +1,877 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Code was heavily based on https://github.com/rwightman/pytorch-image-models + +import random +import math +import re +from PIL import Image, ImageOps, ImageEnhance, ImageChops +import PIL +import numpy as np + +IMAGENET_DEFAULT_MEAN = (0.485, 0.456, 0.406) + +_PIL_VER = tuple([int(x) for x in PIL.__version__.split('.')[:2]]) + +_FILL = (128, 128, 128) + +# This signifies the max integer that the controller RNN could predict for the +# augmentation scheme. +_MAX_LEVEL = 10. + +_HPARAMS_DEFAULT = dict( + translate_const=250, + img_mean=_FILL, ) + +_RANDOM_INTERPOLATION = (Image.BILINEAR, Image.BICUBIC) + + +def _pil_interp(method): + if method == 'bicubic': + return Image.BICUBIC + elif method == 'lanczos': + return Image.LANCZOS + elif method == 'hamming': + return Image.HAMMING + else: + # default bilinear, do we want to allow nearest? + return Image.BILINEAR + + +def _interpolation(kwargs): + interpolation = kwargs.pop('resample', Image.BILINEAR) + if isinstance(interpolation, (list, tuple)): + return random.choice(interpolation) + else: + return interpolation + + +def _check_args_tf(kwargs): + if 'fillcolor' in kwargs and _PIL_VER < (5, 0): + kwargs.pop('fillcolor') + kwargs['resample'] = _interpolation(kwargs) + + +def shear_x(img, factor, **kwargs): + _check_args_tf(kwargs) + return img.transform(img.size, Image.AFFINE, (1, factor, 0, 0, 1, 0), + **kwargs) + + +def shear_y(img, factor, **kwargs): + _check_args_tf(kwargs) + return img.transform(img.size, Image.AFFINE, (1, 0, 0, factor, 1, 0), + **kwargs) + + +def translate_x_rel(img, pct, **kwargs): + pixels = pct * img.size[0] + _check_args_tf(kwargs) + return img.transform(img.size, Image.AFFINE, (1, 0, pixels, 0, 1, 0), + **kwargs) + + +def translate_y_rel(img, pct, **kwargs): + pixels = pct * img.size[1] + _check_args_tf(kwargs) + return img.transform(img.size, Image.AFFINE, (1, 0, 0, 0, 1, pixels), + **kwargs) + + +def translate_x_abs(img, pixels, **kwargs): + _check_args_tf(kwargs) + return img.transform(img.size, Image.AFFINE, (1, 0, pixels, 0, 1, 0), + **kwargs) + + +def translate_y_abs(img, pixels, **kwargs): + _check_args_tf(kwargs) + return img.transform(img.size, Image.AFFINE, (1, 0, 0, 0, 1, pixels), + **kwargs) + + +def rotate(img, degrees, **kwargs): + _check_args_tf(kwargs) + if _PIL_VER >= (5, 2): + return img.rotate(degrees, **kwargs) + elif _PIL_VER >= (5, 0): + w, h = img.size + post_trans = (0, 0) + rotn_center = (w / 2.0, h / 2.0) + angle = -math.radians(degrees) + matrix = [ + round(math.cos(angle), 15), + round(math.sin(angle), 15), + 0.0, + round(-math.sin(angle), 15), + round(math.cos(angle), 15), + 0.0, + ] + + def transform(x, y, matrix): + (a, b, c, d, e, f) = matrix + return a * x + b * y + c, d * x + e * y + f + + matrix[2], matrix[5] = transform(-rotn_center[0] - post_trans[0], + -rotn_center[1] - post_trans[1], + matrix) + matrix[2] += rotn_center[0] + matrix[5] += rotn_center[1] + return img.transform(img.size, Image.AFFINE, matrix, **kwargs) + else: + return img.rotate(degrees, resample=kwargs['resample']) + + +def auto_contrast(img, **__): + return ImageOps.autocontrast(img) + + +def invert(img, **__): + return ImageOps.invert(img) + + +def equalize(img, **__): + return ImageOps.equalize(img) + + +def solarize(img, thresh, **__): + return ImageOps.solarize(img, thresh) + + +def solarize_add(img, add, thresh=128, **__): + lut = [] + for i in range(256): + if i < thresh: + lut.append(min(255, i + add)) + else: + lut.append(i) + if img.mode in ("L", "RGB"): + if img.mode == "RGB" and len(lut) == 256: + lut = lut + lut + lut + return img.point(lut) + else: + return img + + +def posterize(img, bits_to_keep, **__): + if bits_to_keep >= 8: + return img + return ImageOps.posterize(img, bits_to_keep) + + +def contrast(img, factor, **__): + return ImageEnhance.Contrast(img).enhance(factor) + + +def color(img, factor, **__): + return ImageEnhance.Color(img).enhance(factor) + + +def brightness(img, factor, **__): + return ImageEnhance.Brightness(img).enhance(factor) + + +def sharpness(img, factor, **__): + return ImageEnhance.Sharpness(img).enhance(factor) + + +def _randomly_negate(v): + """With 50% prob, negate the value""" + return -v if random.random() > 0.5 else v + + +def _rotate_level_to_arg(level, _hparams): + # range [-30, 30] + level = (level / _MAX_LEVEL) * 30. + level = _randomly_negate(level) + return level, + + +def _enhance_level_to_arg(level, _hparams): + # range [0.1, 1.9] + return (level / _MAX_LEVEL) * 1.8 + 0.1, + + +def _enhance_increasing_level_to_arg(level, _hparams): + # the 'no change' level is 1.0, moving away from that towards 0. or 2.0 increases the enhancement blend + # range [0.1, 1.9] + level = (level / _MAX_LEVEL) * .9 + level = 1.0 + _randomly_negate(level) + return level, + + +def _shear_level_to_arg(level, _hparams): + # range [-0.3, 0.3] + level = (level / _MAX_LEVEL) * 0.3 + level = _randomly_negate(level) + return level, + + +def _translate_abs_level_to_arg(level, hparams): + translate_const = hparams['translate_const'] + level = (level / _MAX_LEVEL) * float(translate_const) + level = _randomly_negate(level) + return level, + + +def _translate_rel_level_to_arg(level, hparams): + # default range [-0.45, 0.45] + translate_pct = hparams.get('translate_pct', 0.45) + level = (level / _MAX_LEVEL) * translate_pct + level = _randomly_negate(level) + return level, + + +def _posterize_level_to_arg(level, _hparams): + # As per Tensorflow TPU EfficientNet impl + # range [0, 4], 'keep 0 up to 4 MSB of original image' + # intensity/severity of augmentation decreases with level + return int((level / _MAX_LEVEL) * 4), + + +def _posterize_increasing_level_to_arg(level, hparams): + # As per Tensorflow models research and UDA impl + # range [4, 0], 'keep 4 down to 0 MSB of original image', + # intensity/severity of augmentation increases with level + return 4 - _posterize_level_to_arg(level, hparams)[0], + + +def _posterize_original_level_to_arg(level, _hparams): + # As per original AutoAugment paper description + # range [4, 8], 'keep 4 up to 8 MSB of image' + # intensity/severity of augmentation decreases with level + return int((level / _MAX_LEVEL) * 4) + 4, + + +def _solarize_level_to_arg(level, _hparams): + # range [0, 256] + # intensity/severity of augmentation decreases with level + return int((level / _MAX_LEVEL) * 256), + + +def _solarize_increasing_level_to_arg(level, _hparams): + # range [0, 256] + # intensity/severity of augmentation increases with level + return 256 - _solarize_level_to_arg(level, _hparams)[0], + + +def _solarize_add_level_to_arg(level, _hparams): + # range [0, 110] + return int((level / _MAX_LEVEL) * 110), + + +LEVEL_TO_ARG = { + 'AutoContrast': None, + 'Equalize': None, + 'Invert': None, + 'Rotate': _rotate_level_to_arg, + # There are several variations of the posterize level scaling in various Tensorflow/Google repositories/papers + 'Posterize': _posterize_level_to_arg, + 'PosterizeIncreasing': _posterize_increasing_level_to_arg, + 'PosterizeOriginal': _posterize_original_level_to_arg, + 'Solarize': _solarize_level_to_arg, + 'SolarizeIncreasing': _solarize_increasing_level_to_arg, + 'SolarizeAdd': _solarize_add_level_to_arg, + 'Color': _enhance_level_to_arg, + 'ColorIncreasing': _enhance_increasing_level_to_arg, + 'Contrast': _enhance_level_to_arg, + 'ContrastIncreasing': _enhance_increasing_level_to_arg, + 'Brightness': _enhance_level_to_arg, + 'BrightnessIncreasing': _enhance_increasing_level_to_arg, + 'Sharpness': _enhance_level_to_arg, + 'SharpnessIncreasing': _enhance_increasing_level_to_arg, + 'ShearX': _shear_level_to_arg, + 'ShearY': _shear_level_to_arg, + 'TranslateX': _translate_abs_level_to_arg, + 'TranslateY': _translate_abs_level_to_arg, + 'TranslateXRel': _translate_rel_level_to_arg, + 'TranslateYRel': _translate_rel_level_to_arg, +} + +NAME_TO_OP = { + 'AutoContrast': auto_contrast, + 'Equalize': equalize, + 'Invert': invert, + 'Rotate': rotate, + 'Posterize': posterize, + 'PosterizeIncreasing': posterize, + 'PosterizeOriginal': posterize, + 'Solarize': solarize, + 'SolarizeIncreasing': solarize, + 'SolarizeAdd': solarize_add, + 'Color': color, + 'ColorIncreasing': color, + 'Contrast': contrast, + 'ContrastIncreasing': contrast, + 'Brightness': brightness, + 'BrightnessIncreasing': brightness, + 'Sharpness': sharpness, + 'SharpnessIncreasing': sharpness, + 'ShearX': shear_x, + 'ShearY': shear_y, + 'TranslateX': translate_x_abs, + 'TranslateY': translate_y_abs, + 'TranslateXRel': translate_x_rel, + 'TranslateYRel': translate_y_rel, +} + + +class AugmentOp(object): + def __init__(self, name, prob=0.5, magnitude=10, hparams=None): + hparams = hparams or _HPARAMS_DEFAULT + self.aug_fn = NAME_TO_OP[name] + self.level_fn = LEVEL_TO_ARG[name] + self.prob = prob + self.magnitude = magnitude + self.hparams = hparams.copy() + self.kwargs = dict( + fillcolor=hparams['img_mean'] if 'img_mean' in hparams else _FILL, + resample=hparams['interpolation'] + if 'interpolation' in hparams else _RANDOM_INTERPOLATION, ) + + # If magnitude_std is > 0, we introduce some randomness + # in the usually fixed policy and sample magnitude from a normal distribution + # with mean `magnitude` and std-dev of `magnitude_std`. + # NOTE This is my own hack, being tested, not in papers or reference impls. + self.magnitude_std = self.hparams.get('magnitude_std', 0) + + def __call__(self, img): + if self.prob < 1.0 and random.random() > self.prob: + return img + magnitude = self.magnitude + if self.magnitude_std and self.magnitude_std > 0: + magnitude = random.gauss(magnitude, self.magnitude_std) + magnitude = min(_MAX_LEVEL, max(0, magnitude)) # clip to valid range + level_args = self.level_fn( + magnitude, self.hparams) if self.level_fn is not None else tuple() + return self.aug_fn(img, *level_args, **self.kwargs) + + +def auto_augment_policy_v0(hparams): + # ImageNet v0 policy from TPU EfficientNet impl, cannot find a paper reference. + policy = [ + [('Equalize', 0.8, 1), ('ShearY', 0.8, 4)], + [('Color', 0.4, 9), ('Equalize', 0.6, 3)], + [('Color', 0.4, 1), ('Rotate', 0.6, 8)], + [('Solarize', 0.8, 3), ('Equalize', 0.4, 7)], + [('Solarize', 0.4, 2), ('Solarize', 0.6, 2)], + [('Color', 0.2, 0), ('Equalize', 0.8, 8)], + [('Equalize', 0.4, 8), ('SolarizeAdd', 0.8, 3)], + [('ShearX', 0.2, 9), ('Rotate', 0.6, 8)], + [('Color', 0.6, 1), ('Equalize', 1.0, 2)], + [('Invert', 0.4, 9), ('Rotate', 0.6, 0)], + [('Equalize', 1.0, 9), ('ShearY', 0.6, 3)], + [('Color', 0.4, 7), ('Equalize', 0.6, 0)], + [('Posterize', 0.4, 6), ('AutoContrast', 0.4, 7)], + [('Solarize', 0.6, 8), ('Color', 0.6, 9)], + [('Solarize', 0.2, 4), ('Rotate', 0.8, 9)], + [('Rotate', 1.0, 7), ('TranslateYRel', 0.8, 9)], + [('ShearX', 0.0, 0), ('Solarize', 0.8, 4)], + [('ShearY', 0.8, 0), ('Color', 0.6, 4)], + [('Color', 1.0, 0), ('Rotate', 0.6, 2)], + [('Equalize', 0.8, 4), ('Equalize', 0.0, 8)], + [('Equalize', 1.0, 4), ('AutoContrast', 0.6, 2)], + [('ShearY', 0.4, 7), ('SolarizeAdd', 0.6, 7)], + [('Posterize', 0.8, 2), ('Solarize', 0.6, 10) + ], # This results in black image with Tpu posterize + [('Solarize', 0.6, 8), ('Equalize', 0.6, 1)], + [('Color', 0.8, 6), ('Rotate', 0.4, 5)], + ] + pc = [[AugmentOp(*a, hparams=hparams) for a in sp] for sp in policy] + return pc + + +def auto_augment_policy_v0r(hparams): + # ImageNet v0 policy from TPU EfficientNet impl, with variation of Posterize used + # in Google research implementation (number of bits discarded increases with magnitude) + policy = [ + [('Equalize', 0.8, 1), ('ShearY', 0.8, 4)], + [('Color', 0.4, 9), ('Equalize', 0.6, 3)], + [('Color', 0.4, 1), ('Rotate', 0.6, 8)], + [('Solarize', 0.8, 3), ('Equalize', 0.4, 7)], + [('Solarize', 0.4, 2), ('Solarize', 0.6, 2)], + [('Color', 0.2, 0), ('Equalize', 0.8, 8)], + [('Equalize', 0.4, 8), ('SolarizeAdd', 0.8, 3)], + [('ShearX', 0.2, 9), ('Rotate', 0.6, 8)], + [('Color', 0.6, 1), ('Equalize', 1.0, 2)], + [('Invert', 0.4, 9), ('Rotate', 0.6, 0)], + [('Equalize', 1.0, 9), ('ShearY', 0.6, 3)], + [('Color', 0.4, 7), ('Equalize', 0.6, 0)], + [('PosterizeIncreasing', 0.4, 6), ('AutoContrast', 0.4, 7)], + [('Solarize', 0.6, 8), ('Color', 0.6, 9)], + [('Solarize', 0.2, 4), ('Rotate', 0.8, 9)], + [('Rotate', 1.0, 7), ('TranslateYRel', 0.8, 9)], + [('ShearX', 0.0, 0), ('Solarize', 0.8, 4)], + [('ShearY', 0.8, 0), ('Color', 0.6, 4)], + [('Color', 1.0, 0), ('Rotate', 0.6, 2)], + [('Equalize', 0.8, 4), ('Equalize', 0.0, 8)], + [('Equalize', 1.0, 4), ('AutoContrast', 0.6, 2)], + [('ShearY', 0.4, 7), ('SolarizeAdd', 0.6, 7)], + [('PosterizeIncreasing', 0.8, 2), ('Solarize', 0.6, 10)], + [('Solarize', 0.6, 8), ('Equalize', 0.6, 1)], + [('Color', 0.8, 6), ('Rotate', 0.4, 5)], + ] + pc = [[AugmentOp(*a, hparams=hparams) for a in sp] for sp in policy] + return pc + + +def auto_augment_policy_original(hparams): + # ImageNet policy from https://arxiv.org/abs/1805.09501 + policy = [ + [('PosterizeOriginal', 0.4, 8), ('Rotate', 0.6, 9)], + [('Solarize', 0.6, 5), ('AutoContrast', 0.6, 5)], + [('Equalize', 0.8, 8), ('Equalize', 0.6, 3)], + [('PosterizeOriginal', 0.6, 7), ('PosterizeOriginal', 0.6, 6)], + [('Equalize', 0.4, 7), ('Solarize', 0.2, 4)], + [('Equalize', 0.4, 4), ('Rotate', 0.8, 8)], + [('Solarize', 0.6, 3), ('Equalize', 0.6, 7)], + [('PosterizeOriginal', 0.8, 5), ('Equalize', 1.0, 2)], + [('Rotate', 0.2, 3), ('Solarize', 0.6, 8)], + [('Equalize', 0.6, 8), ('PosterizeOriginal', 0.4, 6)], + [('Rotate', 0.8, 8), ('Color', 0.4, 0)], + [('Rotate', 0.4, 9), ('Equalize', 0.6, 2)], + [('Equalize', 0.0, 7), ('Equalize', 0.8, 8)], + [('Invert', 0.6, 4), ('Equalize', 1.0, 8)], + [('Color', 0.6, 4), ('Contrast', 1.0, 8)], + [('Rotate', 0.8, 8), ('Color', 1.0, 2)], + [('Color', 0.8, 8), ('Solarize', 0.8, 7)], + [('Sharpness', 0.4, 7), ('Invert', 0.6, 8)], + [('ShearX', 0.6, 5), ('Equalize', 1.0, 9)], + [('Color', 0.4, 0), ('Equalize', 0.6, 3)], + [('Equalize', 0.4, 7), ('Solarize', 0.2, 4)], + [('Solarize', 0.6, 5), ('AutoContrast', 0.6, 5)], + [('Invert', 0.6, 4), ('Equalize', 1.0, 8)], + [('Color', 0.6, 4), ('Contrast', 1.0, 8)], + [('Equalize', 0.8, 8), ('Equalize', 0.6, 3)], + ] + pc = [[AugmentOp(*a, hparams=hparams) for a in sp] for sp in policy] + return pc + + +def auto_augment_policy_originalr(hparams): + # ImageNet policy from https://arxiv.org/abs/1805.09501 with research posterize variation + policy = [ + [('PosterizeIncreasing', 0.4, 8), ('Rotate', 0.6, 9)], + [('Solarize', 0.6, 5), ('AutoContrast', 0.6, 5)], + [('Equalize', 0.8, 8), ('Equalize', 0.6, 3)], + [('PosterizeIncreasing', 0.6, 7), ('PosterizeIncreasing', 0.6, 6)], + [('Equalize', 0.4, 7), ('Solarize', 0.2, 4)], + [('Equalize', 0.4, 4), ('Rotate', 0.8, 8)], + [('Solarize', 0.6, 3), ('Equalize', 0.6, 7)], + [('PosterizeIncreasing', 0.8, 5), ('Equalize', 1.0, 2)], + [('Rotate', 0.2, 3), ('Solarize', 0.6, 8)], + [('Equalize', 0.6, 8), ('PosterizeIncreasing', 0.4, 6)], + [('Rotate', 0.8, 8), ('Color', 0.4, 0)], + [('Rotate', 0.4, 9), ('Equalize', 0.6, 2)], + [('Equalize', 0.0, 7), ('Equalize', 0.8, 8)], + [('Invert', 0.6, 4), ('Equalize', 1.0, 8)], + [('Color', 0.6, 4), ('Contrast', 1.0, 8)], + [('Rotate', 0.8, 8), ('Color', 1.0, 2)], + [('Color', 0.8, 8), ('Solarize', 0.8, 7)], + [('Sharpness', 0.4, 7), ('Invert', 0.6, 8)], + [('ShearX', 0.6, 5), ('Equalize', 1.0, 9)], + [('Color', 0.4, 0), ('Equalize', 0.6, 3)], + [('Equalize', 0.4, 7), ('Solarize', 0.2, 4)], + [('Solarize', 0.6, 5), ('AutoContrast', 0.6, 5)], + [('Invert', 0.6, 4), ('Equalize', 1.0, 8)], + [('Color', 0.6, 4), ('Contrast', 1.0, 8)], + [('Equalize', 0.8, 8), ('Equalize', 0.6, 3)], + ] + pc = [[AugmentOp(*a, hparams=hparams) for a in sp] for sp in policy] + return pc + + +def auto_augment_policy(name='v0', hparams=None): + hparams = hparams or _HPARAMS_DEFAULT + if name == 'original': + return auto_augment_policy_original(hparams) + elif name == 'originalr': + return auto_augment_policy_originalr(hparams) + elif name == 'v0': + return auto_augment_policy_v0(hparams) + elif name == 'v0r': + return auto_augment_policy_v0r(hparams) + else: + assert False, 'Unknown AA policy (%s)' % name + + +class AutoAugment(object): + def __init__(self, policy): + self.policy = policy + + def __call__(self, img): + sub_policy = random.choice(self.policy) + for op in sub_policy: + img = op(img) + return img + + +def auto_augment_transform(config_str, hparams): + """ + Create a AutoAugment transform + + :param config_str: String defining configuration of auto augmentation. Consists of multiple sections separated by + dashes ('-'). The first section defines the AutoAugment policy (one of 'v0', 'v0r', 'original', 'originalr'). + The remaining sections, not order sepecific determine + 'mstd' - float std deviation of magnitude noise applied + Ex 'original-mstd0.5' results in AutoAugment with original policy, magnitude_std 0.5 + + :param hparams: Other hparams (kwargs) for the AutoAugmentation scheme + + :return: A callable Transform Op + """ + config = config_str.split('-') + policy_name = config[0] + config = config[1:] + for c in config: + cs = re.split(r'(\d.*)', c) + if len(cs) < 2: + continue + key, val = cs[:2] + if key == 'mstd': + # noise param injected via hparams for now + hparams.setdefault('magnitude_std', float(val)) + else: + assert False, 'Unknown AutoAugment config section' + aa_policy = auto_augment_policy(policy_name, hparams=hparams) + return AutoAugment(aa_policy) + + +_RAND_TRANSFORMS = [ + 'AutoContrast', + 'Equalize', + 'Invert', + 'Rotate', + 'Posterize', + 'Solarize', + 'SolarizeAdd', + 'Color', + 'Contrast', + 'Brightness', + 'Sharpness', + 'ShearX', + 'ShearY', + 'TranslateXRel', + 'TranslateYRel', + #'Cutout' # NOTE I've implement this as random erasing separately +] + +_RAND_INCREASING_TRANSFORMS = [ + 'AutoContrast', + 'Equalize', + 'Invert', + 'Rotate', + 'PosterizeIncreasing', + 'SolarizeIncreasing', + 'SolarizeAdd', + 'ColorIncreasing', + 'ContrastIncreasing', + 'BrightnessIncreasing', + 'SharpnessIncreasing', + 'ShearX', + 'ShearY', + 'TranslateXRel', + 'TranslateYRel', + #'Cutout' # NOTE I've implement this as random erasing separately +] + +# These experimental weights are based loosely on the relative improvements mentioned in paper. +# They may not result in increased performance, but could likely be tuned to so. +_RAND_CHOICE_WEIGHTS_0 = { + 'Rotate': 0.3, + 'ShearX': 0.2, + 'ShearY': 0.2, + 'TranslateXRel': 0.1, + 'TranslateYRel': 0.1, + 'Color': .025, + 'Sharpness': 0.025, + 'AutoContrast': 0.025, + 'Solarize': .005, + 'SolarizeAdd': .005, + 'Contrast': .005, + 'Brightness': .005, + 'Equalize': .005, + 'Posterize': 0, + 'Invert': 0, +} + + +def _select_rand_weights(weight_idx=0, transforms=None): + transforms = transforms or _RAND_TRANSFORMS + assert weight_idx == 0 # only one set of weights currently + rand_weights = _RAND_CHOICE_WEIGHTS_0 + probs = [rand_weights[k] for k in transforms] + probs /= np.sum(probs) + return probs + + +def rand_augment_ops(magnitude=10, hparams=None, transforms=None): + hparams = hparams or _HPARAMS_DEFAULT + transforms = transforms or _RAND_TRANSFORMS + return [ + AugmentOp( + name, prob=0.5, magnitude=magnitude, hparams=hparams) + for name in transforms + ] + + +class RandAugment(object): + def __init__(self, ops, num_layers=2, choice_weights=None): + self.ops = ops + self.num_layers = num_layers + self.choice_weights = choice_weights + + def __call__(self, img): + # no replacement when using weighted choice + ops = np.random.choice( + self.ops, + self.num_layers, + replace=self.choice_weights is None, + p=self.choice_weights) + for op in ops: + img = op(img) + return img + + +def rand_augment_transform(config_str, hparams): + """ + Create a RandAugment transform + + :param config_str: String defining configuration of random augmentation. Consists of multiple sections separated by + dashes ('-'). The first section defines the specific variant of rand augment (currently only 'rand'). The remaining + sections, not order sepecific determine + 'm' - integer magnitude of rand augment + 'n' - integer num layers (number of transform ops selected per image) + 'w' - integer probabiliy weight index (index of a set of weights to influence choice of op) + 'mstd' - float std deviation of magnitude noise applied + 'inc' - integer (bool), use augmentations that increase in severity with magnitude (default: 0) + Ex 'rand-m9-n3-mstd0.5' results in RandAugment with magnitude 9, num_layers 3, magnitude_std 0.5 + 'rand-mstd1-w0' results in magnitude_std 1.0, weights 0, default magnitude of 10 and num_layers 2 + + :param hparams: Other hparams (kwargs) for the RandAugmentation scheme + + :return: A callable Transform Op + """ + magnitude = _MAX_LEVEL # default to _MAX_LEVEL for magnitude (currently 10) + num_layers = 2 # default to 2 ops per image + weight_idx = None # default to no probability weights for op choice + transforms = _RAND_TRANSFORMS + config = config_str.split('-') + assert config[0] == 'rand' + config = config[1:] + for c in config: + cs = re.split(r'(\d.*)', c) + if len(cs) < 2: + continue + key, val = cs[:2] + if key == 'mstd': + # noise param injected via hparams for now + hparams.setdefault('magnitude_std', float(val)) + elif key == 'inc': + if bool(val): + transforms = _RAND_INCREASING_TRANSFORMS + elif key == 'm': + magnitude = int(val) + elif key == 'n': + num_layers = int(val) + elif key == 'w': + weight_idx = int(val) + else: + assert False, 'Unknown RandAugment config section' + ra_ops = rand_augment_ops( + magnitude=magnitude, hparams=hparams, transforms=transforms) + choice_weights = None if weight_idx is None else _select_rand_weights( + weight_idx) + return RandAugment(ra_ops, num_layers, choice_weights=choice_weights) + + +_AUGMIX_TRANSFORMS = [ + 'AutoContrast', + 'ColorIncreasing', # not in paper + 'ContrastIncreasing', # not in paper + 'BrightnessIncreasing', # not in paper + 'SharpnessIncreasing', # not in paper + 'Equalize', + 'Rotate', + 'PosterizeIncreasing', + 'SolarizeIncreasing', + 'ShearX', + 'ShearY', + 'TranslateXRel', + 'TranslateYRel', +] + + +def augmix_ops(magnitude=10, hparams=None, transforms=None): + hparams = hparams or _HPARAMS_DEFAULT + transforms = transforms or _AUGMIX_TRANSFORMS + return [ + AugmentOp( + name, prob=1.0, magnitude=magnitude, hparams=hparams) + for name in transforms + ] + + +class AugMixAugment(object): + """ AugMix Transform + Adapted and improved from impl here: https://github.com/google-research/augmix/blob/master/imagenet.py + From paper: 'AugMix: A Simple Data Processing Method to Improve Robustness and Uncertainty - + https://arxiv.org/abs/1912.02781 + """ + + def __init__(self, ops, alpha=1., width=3, depth=-1, blended=False): + self.ops = ops + self.alpha = alpha + self.width = width + self.depth = depth + self.blended = blended # blended mode is faster but not well tested + + def _calc_blended_weights(self, ws, m): + ws = ws * m + cump = 1. + rws = [] + for w in ws[::-1]: + alpha = w / cump + cump *= (1 - alpha) + rws.append(alpha) + return np.array(rws[::-1], dtype=np.float32) + + def _apply_blended(self, img, mixing_weights, m): + # This is my first crack and implementing a slightly faster mixed augmentation. Instead + # of accumulating the mix for each chain in a Numpy array and then blending with original, + # it recomputes the blending coefficients and applies one PIL image blend per chain. + # TODO the results appear in the right ballpark but they differ by more than rounding. + img_orig = img.copy() + ws = self._calc_blended_weights(mixing_weights, m) + for w in ws: + depth = self.depth if self.depth > 0 else np.random.randint(1, 4) + ops = np.random.choice(self.ops, depth, replace=True) + img_aug = img_orig # no ops are in-place, deep copy not necessary + for op in ops: + img_aug = op(img_aug) + img = Image.blend(img, img_aug, w) + return img + + def _apply_basic(self, img, mixing_weights, m): + # This is a literal adaptation of the paper/official implementation without normalizations and + # PIL <-> Numpy conversions between every op. It is still quite CPU compute heavy compared to the + # typical augmentation transforms, could use a GPU / Kornia implementation. + img_shape = img.size[0], img.size[1], len(img.getbands()) + mixed = np.zeros(img_shape, dtype=np.float32) + for mw in mixing_weights: + depth = self.depth if self.depth > 0 else np.random.randint(1, 4) + ops = np.random.choice(self.ops, depth, replace=True) + img_aug = img # no ops are in-place, deep copy not necessary + for op in ops: + img_aug = op(img_aug) + mixed += mw * np.asarray(img_aug, dtype=np.float32) + np.clip(mixed, 0, 255., out=mixed) + mixed = Image.fromarray(mixed.astype(np.uint8)) + return Image.blend(img, mixed, m) + + def __call__(self, img): + mixing_weights = np.float32( + np.random.dirichlet([self.alpha] * self.width)) + m = np.float32(np.random.beta(self.alpha, self.alpha)) + if self.blended: + mixed = self._apply_blended(img, mixing_weights, m) + else: + mixed = self._apply_basic(img, mixing_weights, m) + return mixed + + +def augment_and_mix_transform(config_str, hparams): + """ Create AugMix transform + + :param config_str: String defining configuration of random augmentation. Consists of multiple sections separated by + dashes ('-'). The first section defines the specific variant of rand augment (currently only 'rand'). The remaining + sections, not order sepecific determine + 'm' - integer magnitude (severity) of augmentation mix (default: 3) + 'w' - integer width of augmentation chain (default: 3) + 'd' - integer depth of augmentation chain (-1 is random [1, 3], default: -1) + 'b' - integer (bool), blend each branch of chain into end result without a final blend, less CPU (default: 0) + 'mstd' - float std deviation of magnitude noise applied (default: 0) + Ex 'augmix-m5-w4-d2' results in AugMix with severity 5, chain width 4, chain depth 2 + + :param hparams: Other hparams (kwargs) for the Augmentation transforms + + :return: A callable Transform Op + """ + magnitude = 3 + width = 3 + depth = -1 + alpha = 1. + blended = False + config = config_str.split('-') + assert config[0] == 'augmix' + config = config[1:] + for c in config: + cs = re.split(r'(\d.*)', c) + if len(cs) < 2: + continue + key, val = cs[:2] + if key == 'mstd': + # noise param injected via hparams for now + hparams.setdefault('magnitude_std', float(val)) + elif key == 'm': + magnitude = int(val) + elif key == 'w': + width = int(val) + elif key == 'd': + depth = int(val) + elif key == 'a': + alpha = float(val) + elif key == 'b': + blended = bool(val) + else: + assert False, 'Unknown AugMix config section' + ops = augmix_ops(magnitude=magnitude, hparams=hparams) + return AugMixAugment( + ops, alpha=alpha, width=width, depth=depth, blended=blended) + + +class RawTimmAutoAugment(object): + """TimmAutoAugment API for PaddleClas.""" + + def __init__(self, + config_str="rand-m9-mstd0.5-inc1", + interpolation="bicubic", + img_size=224, + mean=IMAGENET_DEFAULT_MEAN): + if isinstance(img_size, (tuple, list)): + img_size_min = min(img_size) + else: + img_size_min = img_size + + aa_params = dict( + translate_const=int(img_size_min * 0.45), + img_mean=tuple([min(255, round(255 * x)) for x in mean]), ) + if interpolation and interpolation != 'random': + aa_params['interpolation'] = _pil_interp(interpolation) + if config_str.startswith('rand'): + self.augment_func = rand_augment_transform(config_str, aa_params) + elif config_str.startswith('augmix'): + aa_params['translate_pct'] = 0.3 + self.augment_func = augment_and_mix_transform(config_str, + aa_params) + elif config_str.startswith('auto'): + self.augment_func = auto_augment_transform(config_str, aa_params) + else: + raise Exception( + "ConfigError: The TimmAutoAugment Op only support RandAugment, AutoAugment, AugMix, and the config_str only starts with \"rand\", \"augmix\", \"auto\"." + ) + + def __call__(self, img): + return self.augment_func(img) diff --git a/src/PaddleClas/ppcls/data/utils/__init__.py b/src/PaddleClas/ppcls/data/utils/__init__.py new file mode 100644 index 0000000..61d5aa2 --- /dev/null +++ b/src/PaddleClas/ppcls/data/utils/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. \ No newline at end of file diff --git a/src/PaddleClas/ppcls/data/utils/__pycache__/__init__.cpython-39.pyc b/src/PaddleClas/ppcls/data/utils/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000..55982e8 Binary files /dev/null and b/src/PaddleClas/ppcls/data/utils/__pycache__/__init__.cpython-39.pyc differ diff --git a/src/PaddleClas/ppcls/data/utils/__pycache__/get_image_list.cpython-39.pyc b/src/PaddleClas/ppcls/data/utils/__pycache__/get_image_list.cpython-39.pyc new file mode 100644 index 0000000..7a158c4 Binary files /dev/null and b/src/PaddleClas/ppcls/data/utils/__pycache__/get_image_list.cpython-39.pyc differ diff --git a/src/PaddleClas/ppcls/data/utils/get_image_list.py b/src/PaddleClas/ppcls/data/utils/get_image_list.py new file mode 100644 index 0000000..6f10935 --- /dev/null +++ b/src/PaddleClas/ppcls/data/utils/get_image_list.py @@ -0,0 +1,49 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import argparse +import base64 +import numpy as np + + +def get_image_list(img_file): + imgs_lists = [] + if img_file is None or not os.path.exists(img_file): + raise Exception("not found any img file in {}".format(img_file)) + + img_end = ['jpg', 'png', 'jpeg', 'JPEG', 'JPG', 'bmp'] + if os.path.isfile(img_file) and img_file.split('.')[-1] in img_end: + imgs_lists.append(img_file) + elif os.path.isdir(img_file): + for single_file in os.listdir(img_file): + if single_file.split('.')[-1] in img_end: + imgs_lists.append(os.path.join(img_file, single_file)) + if len(imgs_lists) == 0: + raise Exception("not found any img file in {}".format(img_file)) + imgs_lists = sorted(imgs_lists) + return imgs_lists + + +def get_image_list_from_label_file(image_path, label_file_path): + imgs_lists = [] + gt_labels = [] + with open(label_file_path, "r") as fin: + lines = fin.readlines() + for line in lines: + image_name, label = line.strip("\n").split() + label = int(label) + imgs_lists.append(os.path.join(image_path, image_name)) + gt_labels.append(int(label)) + return imgs_lists, gt_labels diff --git a/src/PaddleClas/ppcls/engine/__init__.py b/src/PaddleClas/ppcls/engine/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/PaddleClas/ppcls/engine/__pycache__/__init__.cpython-39.pyc b/src/PaddleClas/ppcls/engine/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000..22fdcbb Binary files /dev/null and b/src/PaddleClas/ppcls/engine/__pycache__/__init__.cpython-39.pyc differ diff --git a/src/PaddleClas/ppcls/engine/__pycache__/engine.cpython-39.pyc b/src/PaddleClas/ppcls/engine/__pycache__/engine.cpython-39.pyc new file mode 100644 index 0000000..3281cbb Binary files /dev/null and b/src/PaddleClas/ppcls/engine/__pycache__/engine.cpython-39.pyc differ diff --git a/src/PaddleClas/ppcls/engine/engine.py b/src/PaddleClas/ppcls/engine/engine.py new file mode 100644 index 0000000..61d09ff --- /dev/null +++ b/src/PaddleClas/ppcls/engine/engine.py @@ -0,0 +1,465 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os +import platform +import paddle +import paddle.distributed as dist +from visualdl import LogWriter +from paddle import nn +import numpy as np +import random + +from ppcls.utils.check import check_gpu +from ppcls.utils.misc import AverageMeter +from ppcls.utils import logger +from ppcls.utils.logger import init_logger +from ppcls.utils.config import print_config +from ppcls.data import build_dataloader +from ppcls.arch import build_model, RecModel, DistillationModel, TheseusLayer +from ppcls.arch import apply_to_static +from ppcls.loss import build_loss +from ppcls.metric import build_metrics +from ppcls.optimizer import build_optimizer +from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url +from ppcls.utils.save_load import init_model +from ppcls.utils import save_load + +from ppcls.data.utils.get_image_list import get_image_list +from ppcls.data.postprocess import build_postprocess +from ppcls.data import create_operators +from ppcls.engine.train import train_epoch +from ppcls.engine import evaluation +from ppcls.arch.gears.identity_head import IdentityHead + + +class Engine(object): + def __init__(self, config, mode="train"): + assert mode in ["train", "eval", "infer", "export"] + self.mode = mode + self.config = config + self.eval_mode = self.config["Global"].get("eval_mode", + "classification") + if "Head" in self.config["Arch"] or self.config["Arch"].get("is_rec", + False): + self.is_rec = True + else: + self.is_rec = False + + # set seed + seed = self.config["Global"].get("seed", False) + if seed or seed == 0: + assert isinstance(seed, int), "The 'seed' must be a integer!" + paddle.seed(seed) + np.random.seed(seed) + random.seed(seed) + + # init logger + self.output_dir = self.config['Global']['output_dir'] + log_file = os.path.join(self.output_dir, self.config["Arch"]["name"], + f"{mode}.log") + init_logger(name='root', log_file=log_file) + print_config(config) + + # init train_func and eval_func + assert self.eval_mode in ["classification", "retrieval"], logger.error( + "Invalid eval mode: {}".format(self.eval_mode)) + self.train_epoch_func = train_epoch + self.eval_func = getattr(evaluation, self.eval_mode + "_eval") + + self.use_dali = self.config['Global'].get("use_dali", False) + + # for visualdl + self.vdl_writer = None + if self.config['Global'][ + 'use_visualdl'] and mode == "train" and dist.get_rank() == 0: + vdl_writer_path = os.path.join(self.output_dir, "vdl") + if not os.path.exists(vdl_writer_path): + os.makedirs(vdl_writer_path) + self.vdl_writer = LogWriter(logdir=vdl_writer_path) + + # set device + assert self.config["Global"]["device"] in ["cpu", "gpu", "xpu", "npu"] + self.device = paddle.set_device(self.config["Global"]["device"]) + logger.info('train with paddle {} and device {}'.format( + paddle.__version__, self.device)) + + # AMP training + self.amp = True if "AMP" in self.config and self.mode == "train" else False + if self.amp and self.config["AMP"] is not None: + self.scale_loss = self.config["AMP"].get("scale_loss", 1.0) + self.use_dynamic_loss_scaling = self.config["AMP"].get( + "use_dynamic_loss_scaling", False) + else: + self.scale_loss = 1.0 + self.use_dynamic_loss_scaling = False + if self.amp: + AMP_RELATED_FLAGS_SETTING = { + 'FLAGS_cudnn_batchnorm_spatial_persistent': 1, + 'FLAGS_max_inplace_grad_add': 8, + } + paddle.fluid.set_flags(AMP_RELATED_FLAGS_SETTING) + + if "class_num" in config["Global"]: + global_class_num = config["Global"]["class_num"] + if "class_num" not in config["Arch"]: + config["Arch"]["class_num"] = global_class_num + msg = f"The Global.class_num will be deprecated. Please use Arch.class_num instead. Arch.class_num has been set to {global_class_num}." + else: + msg = "The Global.class_num will be deprecated. Please use Arch.class_num instead. The Global.class_num has been ignored." + logger.warning(msg) + #TODO(gaotingquan): support rec + class_num = config["Arch"].get("class_num", None) + self.config["DataLoader"].update({"class_num": class_num}) + # build dataloader + if self.mode == 'train': + self.train_dataloader = build_dataloader( + self.config["DataLoader"], "Train", self.device, self.use_dali) + if self.mode == "eval" or (self.mode == "train" and + self.config["Global"]["eval_during_train"]): + if self.eval_mode == "classification": + self.eval_dataloader = build_dataloader( + self.config["DataLoader"], "Eval", self.device, + self.use_dali) + elif self.eval_mode == "retrieval": + self.gallery_query_dataloader = None + if len(self.config["DataLoader"]["Eval"].keys()) == 1: + key = list(self.config["DataLoader"]["Eval"].keys())[0] + self.gallery_query_dataloader = build_dataloader( + self.config["DataLoader"]["Eval"], key, self.device, + self.use_dali) + else: + self.gallery_dataloader = build_dataloader( + self.config["DataLoader"]["Eval"], "Gallery", + self.device, self.use_dali) + self.query_dataloader = build_dataloader( + self.config["DataLoader"]["Eval"], "Query", + self.device, self.use_dali) + + # build loss + if self.mode == "train": + loss_info = self.config["Loss"]["Train"] + self.train_loss_func = build_loss(loss_info) + if self.mode == "eval" or (self.mode == "train" and + self.config["Global"]["eval_during_train"]): + loss_config = self.config.get("Loss", None) + if loss_config is not None: + loss_config = loss_config.get("Eval") + if loss_config is not None: + self.eval_loss_func = build_loss(loss_config) + else: + self.eval_loss_func = None + else: + self.eval_loss_func = None + + # build metric + if self.mode == 'train': + metric_config = self.config.get("Metric") + if metric_config is not None: + metric_config = metric_config.get("Train") + if metric_config is not None: + if hasattr( + self.train_dataloader, "collate_fn" + ) and self.train_dataloader.collate_fn is not None: + for m_idx, m in enumerate(metric_config): + if "TopkAcc" in m: + msg = f"'TopkAcc' metric can not be used when setting 'batch_transform_ops' in config. The 'TopkAcc' metric has been removed." + logger.warning(msg) + break + metric_config.pop(m_idx) + self.train_metric_func = build_metrics(metric_config) + else: + self.train_metric_func = None + else: + self.train_metric_func = None + + if self.mode == "eval" or (self.mode == "train" and + self.config["Global"]["eval_during_train"]): + metric_config = self.config.get("Metric") + if self.eval_mode == "classification": + if metric_config is not None: + metric_config = metric_config.get("Eval") + if metric_config is not None: + self.eval_metric_func = build_metrics(metric_config) + elif self.eval_mode == "retrieval": + if metric_config is None: + metric_config = [{"name": "Recallk", "topk": (1, 5)}] + else: + metric_config = metric_config["Eval"] + self.eval_metric_func = build_metrics(metric_config) + else: + self.eval_metric_func = None + + # build model + self.model = build_model(self.config) + # set @to_static for benchmark, skip this by default. + apply_to_static(self.config, self.model) + + # load_pretrain + if self.config["Global"]["pretrained_model"] is not None: + if self.config["Global"]["pretrained_model"].startswith("http"): + load_dygraph_pretrain_from_url( + self.model, self.config["Global"]["pretrained_model"]) + else: + load_dygraph_pretrain( + self.model, self.config["Global"]["pretrained_model"]) + + # build optimizer + if self.mode == 'train': + self.optimizer, self.lr_sch = build_optimizer( + self.config["Optimizer"], self.config["Global"]["epochs"], + len(self.train_dataloader), [self.model]) + + # for amp training + if self.amp: + self.scaler = paddle.amp.GradScaler( + init_loss_scaling=self.scale_loss, + use_dynamic_loss_scaling=self.use_dynamic_loss_scaling) + amp_level = self.config['AMP'].get("level", "O1") + if amp_level not in ["O1", "O2"]: + msg = "[Parameter Error]: The optimize level of AMP only support 'O1' and 'O2'. The level has been set 'O1'." + logger.warning(msg) + self.config['AMP']["level"] = "O1" + amp_level = "O1" + self.model, self.optimizer = paddle.amp.decorate( + models=self.model, + optimizers=self.optimizer, + level=amp_level, + save_dtype='float32') + + # for distributed + world_size = dist.get_world_size() + self.config["Global"]["distributed"] = world_size != 1 + if world_size != 4 and self.mode == "train": + msg = f"The training strategy in config files provided by PaddleClas is based on 4 gpus. But the number of gpus is {world_size} in current training. Please modify the stategy (learning rate, batch size and so on) if use config files in PaddleClas to train." + logger.warning(msg) + if self.config["Global"]["distributed"]: + dist.init_parallel_env() + self.model = paddle.DataParallel(self.model) + + # build postprocess for infer + if self.mode == 'infer': + self.preprocess_func = create_operators(self.config["Infer"][ + "transforms"]) + self.postprocess_func = build_postprocess(self.config["Infer"][ + "PostProcess"]) + + def train(self): + assert self.mode == "train" + print_batch_step = self.config['Global']['print_batch_step'] + save_interval = self.config["Global"]["save_interval"] + best_metric = { + "metric": 0.0, + "epoch": 0, + } + # key: + # val: metrics list word + self.output_info = dict() + self.time_info = { + "batch_cost": AverageMeter( + "batch_cost", '.5f', postfix=" s,"), + "reader_cost": AverageMeter( + "reader_cost", ".5f", postfix=" s,"), + } + # global iter counter + self.global_step = 0 + + if self.config["Global"]["checkpoints"] is not None: + metric_info = init_model(self.config["Global"], self.model, + self.optimizer) + if metric_info is not None: + best_metric.update(metric_info) + + self.max_iter = len(self.train_dataloader) - 1 if platform.system( + ) == "Windows" else len(self.train_dataloader) + for epoch_id in range(best_metric["epoch"] + 1, + self.config["Global"]["epochs"] + 1): + acc = 0.0 + # for one epoch train + self.train_epoch_func(self, epoch_id, print_batch_step) + + if self.use_dali: + self.train_dataloader.reset() + metric_msg = ", ".join([ + "{}: {:.5f}".format(key, self.output_info[key].avg) + for key in self.output_info + ]) + logger.info("[Train][Epoch {}/{}][Avg]{}".format( + epoch_id, self.config["Global"]["epochs"], metric_msg)) + self.output_info.clear() + + # eval model and save model if possible + if self.config["Global"][ + "eval_during_train"] and epoch_id % self.config["Global"][ + "eval_interval"] == 0: + acc = self.eval(epoch_id) + if acc > best_metric["metric"]: + best_metric["metric"] = acc + best_metric["epoch"] = epoch_id + save_load.save_model( + self.model, + self.optimizer, + best_metric, + self.output_dir, + model_name=self.config["Arch"]["name"], + prefix="best_model") + logger.info("[Eval][Epoch {}][best metric: {}]".format( + epoch_id, best_metric["metric"])) + logger.scaler( + name="eval_acc", + value=acc, + step=epoch_id, + writer=self.vdl_writer) + + self.model.train() + + # save model + if epoch_id % save_interval == 0: + save_load.save_model( + self.model, + self.optimizer, {"metric": acc, + "epoch": epoch_id}, + self.output_dir, + model_name=self.config["Arch"]["name"], + prefix="epoch_{}".format(epoch_id)) + # save the latest model + save_load.save_model( + self.model, + self.optimizer, {"metric": acc, + "epoch": epoch_id}, + self.output_dir, + model_name=self.config["Arch"]["name"], + prefix="latest") + + if self.vdl_writer is not None: + self.vdl_writer.close() + + @paddle.no_grad() + def eval(self, epoch_id=0): + assert self.mode in ["train", "eval"] + self.model.eval() + eval_result = self.eval_func(self, epoch_id) + self.model.train() + return eval_result + + @paddle.no_grad() + def infer(self): + assert self.mode == "infer" and self.eval_mode == "classification" + total_trainer = dist.get_world_size() + local_rank = dist.get_rank() + image_list = get_image_list(self.config["Infer"]["infer_imgs"]) + # data split + image_list = image_list[local_rank::total_trainer] + + batch_size = self.config["Infer"]["batch_size"] + self.model.eval() + batch_data = [] + image_file_list = [] + for idx, image_file in enumerate(image_list): + with open(image_file, 'rb') as f: + x = f.read() + for process in self.preprocess_func: + x = process(x) + batch_data.append(x) + image_file_list.append(image_file) + if len(batch_data) >= batch_size or idx == len(image_list) - 1: + batch_tensor = paddle.to_tensor(batch_data) + out = self.model(batch_tensor) + if isinstance(out, list): + out = out[0] + if isinstance(out, dict) and "logits" in out: + out = out["logits"] + if isinstance(out, dict) and "output" in out: + out = out["output"] + result = self.postprocess_func(out, image_file_list) + print(result) + batch_data.clear() + image_file_list.clear() + + def export(self): + assert self.mode == "export" + use_multilabel = self.config["Global"].get("use_multilabel", False) + model = ExportModel(self.config["Arch"], self.model, use_multilabel) + if self.config["Global"]["pretrained_model"] is not None: + load_dygraph_pretrain(model.base_model, + self.config["Global"]["pretrained_model"]) + + model.eval() + save_path = os.path.join(self.config["Global"]["save_inference_dir"], + "inference") + if model.quanter: + model.quanter.save_quantized_model( + model.base_model, + save_path, + input_spec=[ + paddle.static.InputSpec( + shape=[None] + self.config["Global"]["image_shape"], + dtype='float32') + ]) + else: + model = paddle.jit.to_static( + model, + input_spec=[ + paddle.static.InputSpec( + shape=[None] + self.config["Global"]["image_shape"], + dtype='float32') + ]) + paddle.jit.save(model, save_path) + + +class ExportModel(TheseusLayer): + """ + ExportModel: add softmax onto the model + """ + + def __init__(self, config, model, use_multilabel): + super().__init__() + self.base_model = model + # we should choose a final model to export + if isinstance(self.base_model, DistillationModel): + self.infer_model_name = config["infer_model_name"] + else: + self.infer_model_name = None + + self.infer_output_key = config.get("infer_output_key", None) + if self.infer_output_key == "features" and isinstance(self.base_model, + RecModel): + self.base_model.head = IdentityHead() + if use_multilabel: + self.out_act = nn.Sigmoid() + else: + if config.get("infer_add_softmax", True): + self.out_act = nn.Softmax(axis=-1) + else: + self.out_act = None + + def eval(self): + self.training = False + for layer in self.sublayers(): + layer.training = False + layer.eval() + + def forward(self, x): + x = self.base_model(x) + if isinstance(x, list): + x = x[0] + if self.infer_model_name is not None: + x = x[self.infer_model_name] + if self.infer_output_key is not None: + x = x[self.infer_output_key] + if self.out_act is not None: + x = self.out_act(x) + return x diff --git a/src/PaddleClas/ppcls/engine/evaluation/__init__.py b/src/PaddleClas/ppcls/engine/evaluation/__init__.py new file mode 100644 index 0000000..e0cd778 --- /dev/null +++ b/src/PaddleClas/ppcls/engine/evaluation/__init__.py @@ -0,0 +1,16 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from ppcls.engine.evaluation.classification import classification_eval +from ppcls.engine.evaluation.retrieval import retrieval_eval diff --git a/src/PaddleClas/ppcls/engine/evaluation/__pycache__/__init__.cpython-39.pyc b/src/PaddleClas/ppcls/engine/evaluation/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000..11ecf66 Binary files /dev/null and b/src/PaddleClas/ppcls/engine/evaluation/__pycache__/__init__.cpython-39.pyc differ diff --git a/src/PaddleClas/ppcls/engine/evaluation/__pycache__/classification.cpython-39.pyc b/src/PaddleClas/ppcls/engine/evaluation/__pycache__/classification.cpython-39.pyc new file mode 100644 index 0000000..089bcd4 Binary files /dev/null and b/src/PaddleClas/ppcls/engine/evaluation/__pycache__/classification.cpython-39.pyc differ diff --git a/src/PaddleClas/ppcls/engine/evaluation/__pycache__/retrieval.cpython-39.pyc b/src/PaddleClas/ppcls/engine/evaluation/__pycache__/retrieval.cpython-39.pyc new file mode 100644 index 0000000..61d40e3 Binary files /dev/null and b/src/PaddleClas/ppcls/engine/evaluation/__pycache__/retrieval.cpython-39.pyc differ diff --git a/src/PaddleClas/ppcls/engine/evaluation/classification.py b/src/PaddleClas/ppcls/engine/evaluation/classification.py new file mode 100644 index 0000000..d7b5c47 --- /dev/null +++ b/src/PaddleClas/ppcls/engine/evaluation/classification.py @@ -0,0 +1,169 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +import time +import platform +import paddle + +from ppcls.utils.misc import AverageMeter +from ppcls.utils import logger + + +def classification_eval(engine, epoch_id=0): + output_info = dict() + time_info = { + "batch_cost": AverageMeter( + "batch_cost", '.5f', postfix=" s,"), + "reader_cost": AverageMeter( + "reader_cost", ".5f", postfix=" s,"), + } + print_batch_step = engine.config["Global"]["print_batch_step"] + + metric_key = None + tic = time.time() + accum_samples = 0 + total_samples = len( + engine.eval_dataloader. + dataset) if not engine.use_dali else engine.eval_dataloader.size + max_iter = len(engine.eval_dataloader) - 1 if platform.system( + ) == "Windows" else len(engine.eval_dataloader) + for iter_id, batch in enumerate(engine.eval_dataloader): + if iter_id >= max_iter: + break + if iter_id == 5: + for key in time_info: + time_info[key].reset() + if engine.use_dali: + batch = [ + paddle.to_tensor(batch[0]['data']), + paddle.to_tensor(batch[0]['label']) + ] + time_info["reader_cost"].update(time.time() - tic) + batch_size = batch[0].shape[0] + batch[0] = paddle.to_tensor(batch[0]).astype("float32") + if not engine.config["Global"].get("use_multilabel", False): + batch[1] = batch[1].reshape([-1, 1]).astype("int64") + + # image input + if engine.amp: + amp_level = engine.config['AMP'].get("level", "O1").upper() + with paddle.amp.auto_cast( + custom_black_list={ + "flatten_contiguous_range", "greater_than" + }, + level=amp_level): + out = engine.model(batch[0]) + # calc loss + if engine.eval_loss_func is not None: + loss_dict = engine.eval_loss_func(out, batch[1]) + for key in loss_dict: + if key not in output_info: + output_info[key] = AverageMeter(key, '7.5f') + output_info[key].update(loss_dict[key].numpy()[0], + batch_size) + else: + out = engine.model(batch[0]) + # calc loss + if engine.eval_loss_func is not None: + loss_dict = engine.eval_loss_func(out, batch[1]) + for key in loss_dict: + if key not in output_info: + output_info[key] = AverageMeter(key, '7.5f') + output_info[key].update(loss_dict[key].numpy()[0], + batch_size) + + # just for DistributedBatchSampler issue: repeat sampling + current_samples = batch_size * paddle.distributed.get_world_size() + accum_samples += current_samples + + # calc metric + if engine.eval_metric_func is not None: + if paddle.distributed.get_world_size() > 1: + label_list = [] + paddle.distributed.all_gather(label_list, batch[1]) + labels = paddle.concat(label_list, 0) + + if isinstance(out, dict): + if "Student" in out: + out = out["Student"] + elif "logits" in out: + out = out["logits"] + else: + msg = "Error: Wrong key in out!" + raise Exception(msg) + if isinstance(out, list): + pred = [] + for x in out: + pred_list = [] + paddle.distributed.all_gather(pred_list, x) + pred_x = paddle.concat(pred_list, 0) + pred.append(pred_x) + else: + pred_list = [] + paddle.distributed.all_gather(pred_list, out) + pred = paddle.concat(pred_list, 0) + + if accum_samples > total_samples and not engine.use_dali: + pred = pred[:total_samples + current_samples - + accum_samples] + labels = labels[:total_samples + current_samples - + accum_samples] + current_samples = total_samples + current_samples - accum_samples + metric_dict = engine.eval_metric_func(pred, labels) + else: + metric_dict = engine.eval_metric_func(out, batch[1]) + + for key in metric_dict: + if metric_key is None: + metric_key = key + if key not in output_info: + output_info[key] = AverageMeter(key, '7.5f') + + output_info[key].update(metric_dict[key].numpy()[0], + current_samples) + + time_info["batch_cost"].update(time.time() - tic) + + if iter_id % print_batch_step == 0: + time_msg = "s, ".join([ + "{}: {:.5f}".format(key, time_info[key].avg) + for key in time_info + ]) + + ips_msg = "ips: {:.5f} images/sec".format( + batch_size / time_info["batch_cost"].avg) + + metric_msg = ", ".join([ + "{}: {:.5f}".format(key, output_info[key].val) + for key in output_info + ]) + logger.info("[Eval][Epoch {}][Iter: {}/{}]{}, {}, {}".format( + epoch_id, iter_id, + len(engine.eval_dataloader), metric_msg, time_msg, ips_msg)) + + tic = time.time() + if engine.use_dali: + engine.eval_dataloader.reset() + metric_msg = ", ".join([ + "{}: {:.5f}".format(key, output_info[key].avg) for key in output_info + ]) + logger.info("[Eval][Epoch {}][Avg]{}".format(epoch_id, metric_msg)) + + # do not try to save best eval.model + if engine.eval_metric_func is None: + return -1 + # return 1st metric in the dict + return output_info[metric_key].avg diff --git a/src/PaddleClas/ppcls/engine/evaluation/retrieval.py b/src/PaddleClas/ppcls/engine/evaluation/retrieval.py new file mode 100644 index 0000000..8471a42 --- /dev/null +++ b/src/PaddleClas/ppcls/engine/evaluation/retrieval.py @@ -0,0 +1,171 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import platform +import paddle +from ppcls.utils import logger + + +def retrieval_eval(engine, epoch_id=0): + engine.model.eval() + # step1. build gallery + if engine.gallery_query_dataloader is not None: + gallery_feas, gallery_img_id, gallery_unique_id = cal_feature( + engine, name='gallery_query') + query_feas, query_img_id, query_query_id = gallery_feas, gallery_img_id, gallery_unique_id + else: + gallery_feas, gallery_img_id, gallery_unique_id = cal_feature( + engine, name='gallery') + query_feas, query_img_id, query_query_id = cal_feature( + engine, name='query') + + # step2. do evaluation + sim_block_size = engine.config["Global"].get("sim_block_size", 64) + sections = [sim_block_size] * (len(query_feas) // sim_block_size) + if len(query_feas) % sim_block_size: + sections.append(len(query_feas) % sim_block_size) + fea_blocks = paddle.split(query_feas, num_or_sections=sections) + if query_query_id is not None: + query_id_blocks = paddle.split( + query_query_id, num_or_sections=sections) + image_id_blocks = paddle.split(query_img_id, num_or_sections=sections) + metric_key = None + + if engine.eval_loss_func is None: + metric_dict = {metric_key: 0.} + else: + metric_dict = dict() + for block_idx, block_fea in enumerate(fea_blocks): + similarity_matrix = paddle.matmul( + block_fea, gallery_feas, transpose_y=True) + if query_query_id is not None: + query_id_block = query_id_blocks[block_idx] + query_id_mask = (query_id_block != gallery_unique_id.t()) + + image_id_block = image_id_blocks[block_idx] + image_id_mask = (image_id_block != gallery_img_id.t()) + + keep_mask = paddle.logical_or(query_id_mask, image_id_mask) + similarity_matrix = similarity_matrix * keep_mask.astype( + "float32") + else: + keep_mask = None + + metric_tmp = engine.eval_metric_func(similarity_matrix, + image_id_blocks[block_idx], + gallery_img_id, keep_mask) + + for key in metric_tmp: + if key not in metric_dict: + metric_dict[key] = metric_tmp[key] * block_fea.shape[ + 0] / len(query_feas) + else: + metric_dict[key] += metric_tmp[key] * block_fea.shape[ + 0] / len(query_feas) + + metric_info_list = [] + for key in metric_dict: + if metric_key is None: + metric_key = key + metric_info_list.append("{}: {:.5f}".format(key, metric_dict[key])) + metric_msg = ", ".join(metric_info_list) + logger.info("[Eval][Epoch {}][Avg]{}".format(epoch_id, metric_msg)) + + return metric_dict[metric_key] + + +def cal_feature(engine, name='gallery'): + all_feas = None + all_image_id = None + all_unique_id = None + has_unique_id = False + + if name == 'gallery': + dataloader = engine.gallery_dataloader + elif name == 'query': + dataloader = engine.query_dataloader + elif name == 'gallery_query': + dataloader = engine.gallery_query_dataloader + else: + raise RuntimeError("Only support gallery or query dataset") + + max_iter = len(dataloader) - 1 if platform.system() == "Windows" else len( + dataloader) + for idx, batch in enumerate(dataloader): # load is very time-consuming + if idx >= max_iter: + break + if idx % engine.config["Global"]["print_batch_step"] == 0: + logger.info( + f"{name} feature calculation process: [{idx}/{len(dataloader)}]" + ) + if engine.use_dali: + batch = [ + paddle.to_tensor(batch[0]['data']), + paddle.to_tensor(batch[0]['label']) + ] + batch = [paddle.to_tensor(x) for x in batch] + batch[1] = batch[1].reshape([-1, 1]).astype("int64") + if len(batch) == 3: + has_unique_id = True + batch[2] = batch[2].reshape([-1, 1]).astype("int64") + out = engine.model(batch[0], batch[1]) + if "Student" in out: + out = out["Student"] + batch_feas = out["features"] + + # do norm + if engine.config["Global"].get("feature_normalize", True): + feas_norm = paddle.sqrt( + paddle.sum(paddle.square(batch_feas), axis=1, keepdim=True)) + batch_feas = paddle.divide(batch_feas, feas_norm) + + # do binarize + if engine.config["Global"].get("feature_binarize") == "round": + batch_feas = paddle.round(batch_feas).astype("float32") * 2.0 - 1.0 + + if engine.config["Global"].get("feature_binarize") == "sign": + batch_feas = paddle.sign(batch_feas).astype("float32") + + if all_feas is None: + all_feas = batch_feas + if has_unique_id: + all_unique_id = batch[2] + all_image_id = batch[1] + else: + all_feas = paddle.concat([all_feas, batch_feas]) + all_image_id = paddle.concat([all_image_id, batch[1]]) + if has_unique_id: + all_unique_id = paddle.concat([all_unique_id, batch[2]]) + + if engine.use_dali: + dataloader.reset() + + if paddle.distributed.get_world_size() > 1: + feat_list = [] + img_id_list = [] + unique_id_list = [] + paddle.distributed.all_gather(feat_list, all_feas) + paddle.distributed.all_gather(img_id_list, all_image_id) + all_feas = paddle.concat(feat_list, axis=0) + all_image_id = paddle.concat(img_id_list, axis=0) + if has_unique_id: + paddle.distributed.all_gather(unique_id_list, all_unique_id) + all_unique_id = paddle.concat(unique_id_list, axis=0) + + logger.info("Build {} done, all feat shape: {}, begin to eval..".format( + name, all_feas.shape)) + return all_feas, all_image_id, all_unique_id diff --git a/src/PaddleClas/ppcls/engine/train/__init__.py b/src/PaddleClas/ppcls/engine/train/__init__.py new file mode 100644 index 0000000..800d3a4 --- /dev/null +++ b/src/PaddleClas/ppcls/engine/train/__init__.py @@ -0,0 +1,14 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from ppcls.engine.train.train import train_epoch diff --git a/src/PaddleClas/ppcls/engine/train/__pycache__/__init__.cpython-39.pyc b/src/PaddleClas/ppcls/engine/train/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000..01cb834 Binary files /dev/null and b/src/PaddleClas/ppcls/engine/train/__pycache__/__init__.cpython-39.pyc differ diff --git a/src/PaddleClas/ppcls/engine/train/__pycache__/train.cpython-39.pyc b/src/PaddleClas/ppcls/engine/train/__pycache__/train.cpython-39.pyc new file mode 100644 index 0000000..ea695c6 Binary files /dev/null and b/src/PaddleClas/ppcls/engine/train/__pycache__/train.cpython-39.pyc differ diff --git a/src/PaddleClas/ppcls/engine/train/__pycache__/utils.cpython-39.pyc b/src/PaddleClas/ppcls/engine/train/__pycache__/utils.cpython-39.pyc new file mode 100644 index 0000000..ac0f4e0 Binary files /dev/null and b/src/PaddleClas/ppcls/engine/train/__pycache__/utils.cpython-39.pyc differ diff --git a/src/PaddleClas/ppcls/engine/train/train.py b/src/PaddleClas/ppcls/engine/train/train.py new file mode 100644 index 0000000..3b02bac --- /dev/null +++ b/src/PaddleClas/ppcls/engine/train/train.py @@ -0,0 +1,83 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import absolute_import, division, print_function + +import time +import paddle +from ppcls.engine.train.utils import update_loss, update_metric, log_info +from ppcls.utils import profiler + + +def train_epoch(engine, epoch_id, print_batch_step): + tic = time.time() + v_current = [int(i) for i in paddle.__version__.split(".")] + for iter_id, batch in enumerate(engine.train_dataloader): + if iter_id >= engine.max_iter: + break + profiler.add_profiler_step(engine.config["profiler_options"]) + if iter_id == 5: + for key in engine.time_info: + engine.time_info[key].reset() + engine.time_info["reader_cost"].update(time.time() - tic) + if engine.use_dali: + batch = [ + paddle.to_tensor(batch[0]['data']), + paddle.to_tensor(batch[0]['label']) + ] + batch_size = batch[0].shape[0] + if not engine.config["Global"].get("use_multilabel", False): + batch[1] = batch[1].reshape([batch_size, -1]) + engine.global_step += 1 + + # image input + if engine.amp: + amp_level = engine.config['AMP'].get("level", "O1").upper() + with paddle.amp.auto_cast( + custom_black_list={ + "flatten_contiguous_range", "greater_than" + }, + level=amp_level): + out = forward(engine, batch) + loss_dict = engine.train_loss_func(out, batch[1]) + else: + out = forward(engine, batch) + loss_dict = engine.train_loss_func(out, batch[1]) + + # step opt and lr + if engine.amp: + scaled = engine.scaler.scale(loss_dict["loss"]) + scaled.backward() + engine.scaler.minimize(engine.optimizer, scaled) + else: + loss_dict["loss"].backward() + engine.optimizer.step() + engine.optimizer.clear_grad() + engine.lr_sch.step() + + # below code just for logging + # update metric_for_logger + update_metric(engine, out, batch, batch_size) + # update_loss_for_logger + update_loss(engine, loss_dict, batch_size) + engine.time_info["batch_cost"].update(time.time() - tic) + if iter_id % print_batch_step == 0: + log_info(engine, batch_size, epoch_id, iter_id) + tic = time.time() + + +def forward(engine, batch): + if not engine.is_rec: + return engine.model(batch[0]) + else: + return engine.model(batch[0], batch[1]) diff --git a/src/PaddleClas/ppcls/engine/train/utils.py b/src/PaddleClas/ppcls/engine/train/utils.py new file mode 100644 index 0000000..92eb35d --- /dev/null +++ b/src/PaddleClas/ppcls/engine/train/utils.py @@ -0,0 +1,72 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import absolute_import, division, print_function + +import datetime +from ppcls.utils import logger +from ppcls.utils.misc import AverageMeter + + +def update_metric(trainer, out, batch, batch_size): + # calc metric + if trainer.train_metric_func is not None: + metric_dict = trainer.train_metric_func(out, batch[-1]) + for key in metric_dict: + if key not in trainer.output_info: + trainer.output_info[key] = AverageMeter(key, '7.5f') + trainer.output_info[key].update(metric_dict[key].numpy()[0], + batch_size) + + +def update_loss(trainer, loss_dict, batch_size): + # update_output_info + for key in loss_dict: + if key not in trainer.output_info: + trainer.output_info[key] = AverageMeter(key, '7.5f') + trainer.output_info[key].update(loss_dict[key].numpy()[0], batch_size) + + +def log_info(trainer, batch_size, epoch_id, iter_id): + lr_msg = "lr: {:.5f}".format(trainer.lr_sch.get_lr()) + metric_msg = ", ".join([ + "{}: {:.5f}".format(key, trainer.output_info[key].avg) + for key in trainer.output_info + ]) + time_msg = "s, ".join([ + "{}: {:.5f}".format(key, trainer.time_info[key].avg) + for key in trainer.time_info + ]) + + ips_msg = "ips: {:.5f} images/sec".format( + batch_size / trainer.time_info["batch_cost"].avg) + eta_sec = ((trainer.config["Global"]["epochs"] - epoch_id + 1 + ) * len(trainer.train_dataloader) - iter_id + ) * trainer.time_info["batch_cost"].avg + eta_msg = "eta: {:s}".format(str(datetime.timedelta(seconds=int(eta_sec)))) + logger.info("[Train][Epoch {}/{}][Iter: {}/{}]{}, {}, {}, {}, {}".format( + epoch_id, trainer.config["Global"]["epochs"], iter_id, + len(trainer.train_dataloader), lr_msg, metric_msg, time_msg, ips_msg, + eta_msg)) + + logger.scaler( + name="lr", + value=trainer.lr_sch.get_lr(), + step=trainer.global_step, + writer=trainer.vdl_writer) + for key in trainer.output_info: + logger.scaler( + name="train_{}".format(key), + value=trainer.output_info[key].avg, + step=trainer.global_step, + writer=trainer.vdl_writer) diff --git a/src/PaddleClas/ppcls/loss/__init__.py b/src/PaddleClas/ppcls/loss/__init__.py new file mode 100644 index 0000000..d15dab9 --- /dev/null +++ b/src/PaddleClas/ppcls/loss/__init__.py @@ -0,0 +1,67 @@ +import copy + +import paddle +import paddle.nn as nn +from ppcls.utils import logger + +from .celoss import CELoss, MixCELoss +from .googlenetloss import GoogLeNetLoss +from .centerloss import CenterLoss +from .emlloss import EmlLoss +from .msmloss import MSMLoss +from .npairsloss import NpairsLoss +from .trihardloss import TriHardLoss +from .triplet import TripletLoss, TripletLossV2 +from .supconloss import SupConLoss +from .pairwisecosface import PairwiseCosface +from .dmlloss import DMLLoss +from .distanceloss import DistanceLoss + +from .distillationloss import DistillationCELoss +from .distillationloss import DistillationGTCELoss +from .distillationloss import DistillationDMLLoss +from .distillationloss import DistillationDistanceLoss +from .distillationloss import DistillationRKDLoss +from .multilabelloss import MultiLabelLoss + +from .deephashloss import DSHSDLoss, LCDSHLoss + + +class CombinedLoss(nn.Layer): + def __init__(self, config_list): + super().__init__() + self.loss_func = [] + self.loss_weight = [] + assert isinstance(config_list, list), ( + 'operator config should be a list') + for config in config_list: + assert isinstance(config, + dict) and len(config) == 1, "yaml format error" + name = list(config)[0] + param = config[name] + assert "weight" in param, "weight must be in param, but param just contains {}".format( + param.keys()) + self.loss_weight.append(param.pop("weight")) + self.loss_func.append(eval(name)(**param)) + + def __call__(self, input, batch): + loss_dict = {} + # just for accelerate classification traing speed + if len(self.loss_func) == 1: + loss = self.loss_func[0](input, batch) + loss_dict.update(loss) + loss_dict["loss"] = list(loss.values())[0] + else: + for idx, loss_func in enumerate(self.loss_func): + loss = loss_func(input, batch) + weight = self.loss_weight[idx] + loss = {key: loss[key] * weight for key in loss} + loss_dict.update(loss) + loss_dict["loss"] = paddle.add_n(list(loss_dict.values())) + return loss_dict + + +def build_loss(config): + module_class = CombinedLoss(copy.deepcopy(config)) + logger.debug("build loss {} success.".format(module_class)) + return module_class diff --git a/src/PaddleClas/ppcls/loss/__pycache__/__init__.cpython-39.pyc b/src/PaddleClas/ppcls/loss/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000..cc3358b Binary files /dev/null and b/src/PaddleClas/ppcls/loss/__pycache__/__init__.cpython-39.pyc differ diff --git a/src/PaddleClas/ppcls/loss/__pycache__/celoss.cpython-39.pyc b/src/PaddleClas/ppcls/loss/__pycache__/celoss.cpython-39.pyc new file mode 100644 index 0000000..794a040 Binary files /dev/null and b/src/PaddleClas/ppcls/loss/__pycache__/celoss.cpython-39.pyc differ diff --git a/src/PaddleClas/ppcls/loss/__pycache__/centerloss.cpython-39.pyc b/src/PaddleClas/ppcls/loss/__pycache__/centerloss.cpython-39.pyc new file mode 100644 index 0000000..43e5df7 Binary files /dev/null and b/src/PaddleClas/ppcls/loss/__pycache__/centerloss.cpython-39.pyc differ diff --git a/src/PaddleClas/ppcls/loss/__pycache__/comfunc.cpython-39.pyc b/src/PaddleClas/ppcls/loss/__pycache__/comfunc.cpython-39.pyc new file mode 100644 index 0000000..0baf00e Binary files /dev/null and b/src/PaddleClas/ppcls/loss/__pycache__/comfunc.cpython-39.pyc differ diff --git a/src/PaddleClas/ppcls/loss/__pycache__/deephashloss.cpython-39.pyc b/src/PaddleClas/ppcls/loss/__pycache__/deephashloss.cpython-39.pyc new file mode 100644 index 0000000..9260c26 Binary files /dev/null and b/src/PaddleClas/ppcls/loss/__pycache__/deephashloss.cpython-39.pyc differ diff --git a/src/PaddleClas/ppcls/loss/__pycache__/distanceloss.cpython-39.pyc b/src/PaddleClas/ppcls/loss/__pycache__/distanceloss.cpython-39.pyc new file mode 100644 index 0000000..20e799b Binary files /dev/null and b/src/PaddleClas/ppcls/loss/__pycache__/distanceloss.cpython-39.pyc differ diff --git a/src/PaddleClas/ppcls/loss/__pycache__/distillationloss.cpython-39.pyc b/src/PaddleClas/ppcls/loss/__pycache__/distillationloss.cpython-39.pyc new file mode 100644 index 0000000..9a669ee Binary files /dev/null and b/src/PaddleClas/ppcls/loss/__pycache__/distillationloss.cpython-39.pyc differ diff --git a/src/PaddleClas/ppcls/loss/__pycache__/dmlloss.cpython-39.pyc b/src/PaddleClas/ppcls/loss/__pycache__/dmlloss.cpython-39.pyc new file mode 100644 index 0000000..79557bd Binary files /dev/null and b/src/PaddleClas/ppcls/loss/__pycache__/dmlloss.cpython-39.pyc differ diff --git a/src/PaddleClas/ppcls/loss/__pycache__/emlloss.cpython-39.pyc b/src/PaddleClas/ppcls/loss/__pycache__/emlloss.cpython-39.pyc new file mode 100644 index 0000000..ac8b739 Binary files /dev/null and b/src/PaddleClas/ppcls/loss/__pycache__/emlloss.cpython-39.pyc differ diff --git a/src/PaddleClas/ppcls/loss/__pycache__/googlenetloss.cpython-39.pyc b/src/PaddleClas/ppcls/loss/__pycache__/googlenetloss.cpython-39.pyc new file mode 100644 index 0000000..cf905df Binary files /dev/null and b/src/PaddleClas/ppcls/loss/__pycache__/googlenetloss.cpython-39.pyc differ diff --git a/src/PaddleClas/ppcls/loss/__pycache__/msmloss.cpython-39.pyc b/src/PaddleClas/ppcls/loss/__pycache__/msmloss.cpython-39.pyc new file mode 100644 index 0000000..4591ee7 Binary files /dev/null and b/src/PaddleClas/ppcls/loss/__pycache__/msmloss.cpython-39.pyc differ diff --git a/src/PaddleClas/ppcls/loss/__pycache__/multilabelloss.cpython-39.pyc b/src/PaddleClas/ppcls/loss/__pycache__/multilabelloss.cpython-39.pyc new file mode 100644 index 0000000..385d408 Binary files /dev/null and b/src/PaddleClas/ppcls/loss/__pycache__/multilabelloss.cpython-39.pyc differ diff --git a/src/PaddleClas/ppcls/loss/__pycache__/npairsloss.cpython-39.pyc b/src/PaddleClas/ppcls/loss/__pycache__/npairsloss.cpython-39.pyc new file mode 100644 index 0000000..a31cb1c Binary files /dev/null and b/src/PaddleClas/ppcls/loss/__pycache__/npairsloss.cpython-39.pyc differ diff --git a/src/PaddleClas/ppcls/loss/__pycache__/pairwisecosface.cpython-39.pyc b/src/PaddleClas/ppcls/loss/__pycache__/pairwisecosface.cpython-39.pyc new file mode 100644 index 0000000..86cee45 Binary files /dev/null and b/src/PaddleClas/ppcls/loss/__pycache__/pairwisecosface.cpython-39.pyc differ diff --git a/src/PaddleClas/ppcls/loss/__pycache__/rkdloss.cpython-39.pyc b/src/PaddleClas/ppcls/loss/__pycache__/rkdloss.cpython-39.pyc new file mode 100644 index 0000000..3ef7b9f Binary files /dev/null and b/src/PaddleClas/ppcls/loss/__pycache__/rkdloss.cpython-39.pyc differ diff --git a/src/PaddleClas/ppcls/loss/__pycache__/supconloss.cpython-39.pyc b/src/PaddleClas/ppcls/loss/__pycache__/supconloss.cpython-39.pyc new file mode 100644 index 0000000..e537144 Binary files /dev/null and b/src/PaddleClas/ppcls/loss/__pycache__/supconloss.cpython-39.pyc differ diff --git a/src/PaddleClas/ppcls/loss/__pycache__/trihardloss.cpython-39.pyc b/src/PaddleClas/ppcls/loss/__pycache__/trihardloss.cpython-39.pyc new file mode 100644 index 0000000..56e2651 Binary files /dev/null and b/src/PaddleClas/ppcls/loss/__pycache__/trihardloss.cpython-39.pyc differ diff --git a/src/PaddleClas/ppcls/loss/__pycache__/triplet.cpython-39.pyc b/src/PaddleClas/ppcls/loss/__pycache__/triplet.cpython-39.pyc new file mode 100644 index 0000000..8554871 Binary files /dev/null and b/src/PaddleClas/ppcls/loss/__pycache__/triplet.cpython-39.pyc differ diff --git a/src/PaddleClas/ppcls/loss/celoss.py b/src/PaddleClas/ppcls/loss/celoss.py new file mode 100644 index 0000000..a789261 --- /dev/null +++ b/src/PaddleClas/ppcls/loss/celoss.py @@ -0,0 +1,67 @@ +# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import warnings + +import paddle +import paddle.nn as nn +import paddle.nn.functional as F + +from ppcls.utils import logger + + +class CELoss(nn.Layer): + """ + Cross entropy loss + """ + + def __init__(self, epsilon=None): + super().__init__() + if epsilon is not None and (epsilon <= 0 or epsilon >= 1): + epsilon = None + self.epsilon = epsilon + + def _labelsmoothing(self, target, class_num): + if len(target.shape) == 1 or target.shape[-1] != class_num: + one_hot_target = F.one_hot(target, class_num) + else: + one_hot_target = target + soft_target = F.label_smooth(one_hot_target, epsilon=self.epsilon) + soft_target = paddle.reshape(soft_target, shape=[-1, class_num]) + return soft_target + + def forward(self, x, label): + if isinstance(x, dict): + x = x["logits"] + if self.epsilon is not None: + class_num = x.shape[-1] + label = self._labelsmoothing(label, class_num) + x = -F.log_softmax(x, axis=-1) + loss = paddle.sum(x * label, axis=-1) + else: + if label.shape[-1] == x.shape[-1]: + label = F.softmax(label, axis=-1) + soft_label = True + else: + soft_label = False + loss = F.cross_entropy(x, label=label, soft_label=soft_label) + loss = loss.mean() + return {"CELoss": loss} + + +class MixCELoss(object): + def __init__(self, *args, **kwargs): + msg = "\"MixCELos\" is deprecated, please use \"CELoss\" instead." + logger.error(DeprecationWarning(msg)) + raise DeprecationWarning(msg) diff --git a/src/PaddleClas/ppcls/loss/centerloss.py b/src/PaddleClas/ppcls/loss/centerloss.py new file mode 100644 index 0000000..d85b3f2 --- /dev/null +++ b/src/PaddleClas/ppcls/loss/centerloss.py @@ -0,0 +1,54 @@ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +import paddle +import paddle.nn as nn +import paddle.nn.functional as F + + +class CenterLoss(nn.Layer): + def __init__(self, num_classes=5013, feat_dim=2048): + super(CenterLoss, self).__init__() + self.num_classes = num_classes + self.feat_dim = feat_dim + self.centers = paddle.randn( + shape=[self.num_classes, self.feat_dim]).astype( + "float64") #random center + + def __call__(self, input, target): + """ + inputs: network output: {"features: xxx", "logits": xxxx} + target: image label + """ + feats = input["features"] + labels = target + batch_size = feats.shape[0] + + #calc feat * feat + dist1 = paddle.sum(paddle.square(feats), axis=1, keepdim=True) + dist1 = paddle.expand(dist1, [batch_size, self.num_classes]) + + #dist2 of centers + dist2 = paddle.sum(paddle.square(self.centers), axis=1, + keepdim=True) #num_classes + dist2 = paddle.expand(dist2, + [self.num_classes, batch_size]).astype("float64") + dist2 = paddle.transpose(dist2, [1, 0]) + + #first x * x + y * y + distmat = paddle.add(dist1, dist2) + tmp = paddle.matmul(feats, paddle.transpose(self.centers, [1, 0])) + distmat = distmat - 2.0 * tmp + + #generate the mask + classes = paddle.arange(self.num_classes).astype("int64") + labels = paddle.expand( + paddle.unsqueeze(labels, 1), (batch_size, self.num_classes)) + mask = paddle.equal( + paddle.expand(classes, [batch_size, self.num_classes]), + labels).astype("float64") #get mask + + dist = paddle.multiply(distmat, mask) + loss = paddle.sum(paddle.clip(dist, min=1e-12, max=1e+12)) / batch_size + + return {'CenterLoss': loss} diff --git a/src/PaddleClas/ppcls/loss/comfunc.py b/src/PaddleClas/ppcls/loss/comfunc.py new file mode 100644 index 0000000..277bdd6 --- /dev/null +++ b/src/PaddleClas/ppcls/loss/comfunc.py @@ -0,0 +1,45 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np + + +def rerange_index(batch_size, samples_each_class): + tmp = np.arange(0, batch_size * batch_size) + tmp = tmp.reshape(-1, batch_size) + rerange_index = [] + + for i in range(batch_size): + step = i // samples_each_class + start = step * samples_each_class + end = (step + 1) * samples_each_class + + pos_idx = [] + neg_idx = [] + for j, k in enumerate(tmp[i]): + if j >= start and j < end: + if j == i: + pos_idx.insert(0, k) + else: + pos_idx.append(k) + else: + neg_idx.append(k) + rerange_index += (pos_idx + neg_idx) + + rerange_index = np.array(rerange_index).astype(np.int32) + return rerange_index diff --git a/src/PaddleClas/ppcls/loss/deephashloss.py b/src/PaddleClas/ppcls/loss/deephashloss.py new file mode 100644 index 0000000..c9a58dc --- /dev/null +++ b/src/PaddleClas/ppcls/loss/deephashloss.py @@ -0,0 +1,92 @@ +#copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. + +import paddle +import paddle.nn as nn + +class DSHSDLoss(nn.Layer): + """ + # DSHSD(IEEE ACCESS 2019) + # paper [Deep Supervised Hashing Based on Stable Distribution](https://ieeexplore.ieee.org/document/8648432/) + # [DSHSD] epoch:70, bit:48, dataset:cifar10-1, MAP:0.809, Best MAP: 0.809 + # [DSHSD] epoch:250, bit:48, dataset:nuswide_21, MAP:0.809, Best MAP: 0.815 + # [DSHSD] epoch:135, bit:48, dataset:imagenet, MAP:0.647, Best MAP: 0.647 + """ + def __init__(self, alpha, multi_label=False): + super(DSHSDLoss, self).__init__() + self.alpha = alpha + self.multi_label = multi_label + + def forward(self, input, label): + feature = input["features"] + logits = input["logits"] + + dist = paddle.sum(paddle.square( + (paddle.unsqueeze(feature, 1) - paddle.unsqueeze(feature, 0))), + axis=2) + + # label to ont-hot + label = paddle.flatten(label) + n_class = logits.shape[1] + label = paddle.nn.functional.one_hot(label, n_class).astype("float32") + + s = (paddle.matmul( + label, label, transpose_y=True) == 0).astype("float32") + margin = 2 * feature.shape[1] + Ld = (1 - s) / 2 * dist + s / 2 * (margin - dist).clip(min=0) + Ld = Ld.mean() + + if self.multi_label: + # multiple labels classification loss + Lc = (logits - label * logits + ( + (1 + (-logits).exp()).log())).sum(axis=1).mean() + else: + # single labels classification loss + Lc = (-paddle.nn.functional.softmax(logits).log() * label).sum( + axis=1).mean() + + return {"dshsdloss": Lc + Ld * self.alpha} + + +class LCDSHLoss(nn.Layer): + """ + # paper [Locality-Constrained Deep Supervised Hashing for Image Retrieval](https://www.ijcai.org/Proceedings/2017/0499.pdf) + # [LCDSH] epoch:145, bit:48, dataset:cifar10-1, MAP:0.798, Best MAP: 0.798 + # [LCDSH] epoch:183, bit:48, dataset:nuswide_21, MAP:0.833, Best MAP: 0.834 + """ + def __init__(self, n_class, _lambda): + super(LCDSHLoss, self).__init__() + self._lambda = _lambda + self.n_class = n_class + + def forward(self, input, label): + feature = input["features"] + + # label to ont-hot + label = paddle.flatten(label) + label = paddle.nn.functional.one_hot(label, self.n_class).astype("float32") + + s = 2 * (paddle.matmul(label, label, transpose_y=True) > 0).astype("float32") - 1 + inner_product = paddle.matmul(feature, feature, transpose_y=True) * 0.5 + + inner_product = inner_product.clip(min=-50, max=50) + L1 = paddle.log(1 + paddle.exp(-s * inner_product)).mean() + + b = feature.sign() + inner_product_ = paddle.matmul(b, b, transpose_y=True) * 0.5 + sigmoid = paddle.nn.Sigmoid() + L2 = (sigmoid(inner_product) - sigmoid(inner_product_)).pow(2).mean() + + return {"lcdshloss": L1 + self._lambda * L2} + diff --git a/src/PaddleClas/ppcls/loss/distanceloss.py b/src/PaddleClas/ppcls/loss/distanceloss.py new file mode 100644 index 0000000..0a09f0c --- /dev/null +++ b/src/PaddleClas/ppcls/loss/distanceloss.py @@ -0,0 +1,43 @@ +#copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. + +import paddle +import paddle.nn as nn +import paddle.nn.functional as F + +from paddle.nn import L1Loss +from paddle.nn import MSELoss as L2Loss +from paddle.nn import SmoothL1Loss + + +class DistanceLoss(nn.Layer): + """ + DistanceLoss: + mode: loss mode + """ + + def __init__(self, mode="l2", **kargs): + super().__init__() + assert mode in ["l1", "l2", "smooth_l1"] + if mode == "l1": + self.loss_func = nn.L1Loss(**kargs) + elif mode == "l2": + self.loss_func = nn.MSELoss(**kargs) + elif mode == "smooth_l1": + self.loss_func = nn.SmoothL1Loss(**kargs) + self.mode = mode + + def forward(self, x, y): + loss = self.loss_func(x, y) + return {"loss_{}".format(self.mode): loss} diff --git a/src/PaddleClas/ppcls/loss/distillationloss.py b/src/PaddleClas/ppcls/loss/distillationloss.py new file mode 100644 index 0000000..0340234 --- /dev/null +++ b/src/PaddleClas/ppcls/loss/distillationloss.py @@ -0,0 +1,174 @@ +#copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. + +import paddle +import paddle.nn as nn + +from .celoss import CELoss +from .dmlloss import DMLLoss +from .distanceloss import DistanceLoss +from .rkdloss import RKdAngle, RkdDistance + + +class DistillationCELoss(CELoss): + """ + DistillationCELoss + """ + + def __init__(self, + model_name_pairs=[], + epsilon=None, + key=None, + name="loss_ce"): + super().__init__(epsilon=epsilon) + assert isinstance(model_name_pairs, list) + self.key = key + self.model_name_pairs = model_name_pairs + self.name = name + + def forward(self, predicts, batch): + loss_dict = dict() + for idx, pair in enumerate(self.model_name_pairs): + out1 = predicts[pair[0]] + out2 = predicts[pair[1]] + if self.key is not None: + out1 = out1[self.key] + out2 = out2[self.key] + loss = super().forward(out1, out2) + for key in loss: + loss_dict["{}_{}_{}".format(key, pair[0], pair[1])] = loss[key] + return loss_dict + + +class DistillationGTCELoss(CELoss): + """ + DistillationGTCELoss + """ + + def __init__(self, + model_names=[], + epsilon=None, + key=None, + name="loss_gt_ce"): + super().__init__(epsilon=epsilon) + assert isinstance(model_names, list) + self.key = key + self.model_names = model_names + self.name = name + + def forward(self, predicts, batch): + loss_dict = dict() + for name in self.model_names: + out = predicts[name] + if self.key is not None: + out = out[self.key] + loss = super().forward(out, batch) + for key in loss: + loss_dict["{}_{}".format(key, name)] = loss[key] + return loss_dict + + +class DistillationDMLLoss(DMLLoss): + """ + """ + + def __init__(self, + model_name_pairs=[], + act="softmax", + key=None, + name="loss_dml"): + super().__init__(act=act) + assert isinstance(model_name_pairs, list) + self.key = key + self.model_name_pairs = model_name_pairs + self.name = name + + def forward(self, predicts, batch): + loss_dict = dict() + for idx, pair in enumerate(self.model_name_pairs): + out1 = predicts[pair[0]] + out2 = predicts[pair[1]] + if self.key is not None: + out1 = out1[self.key] + out2 = out2[self.key] + loss = super().forward(out1, out2) + if isinstance(loss, dict): + for key in loss: + loss_dict["{}_{}_{}_{}".format(key, pair[0], pair[1], + idx)] = loss[key] + else: + loss_dict["{}_{}".format(self.name, idx)] = loss + return loss_dict + + +class DistillationDistanceLoss(DistanceLoss): + """ + """ + + def __init__(self, + mode="l2", + model_name_pairs=[], + key=None, + name="loss_", + **kargs): + super().__init__(mode=mode, **kargs) + assert isinstance(model_name_pairs, list) + self.key = key + self.model_name_pairs = model_name_pairs + self.name = name + mode + + def forward(self, predicts, batch): + loss_dict = dict() + for idx, pair in enumerate(self.model_name_pairs): + out1 = predicts[pair[0]] + out2 = predicts[pair[1]] + if self.key is not None: + out1 = out1[self.key] + out2 = out2[self.key] + loss = super().forward(out1, out2) + for key in loss: + loss_dict["{}_{}_{}".format(self.name, key, idx)] = loss[key] + return loss_dict + + +class DistillationRKDLoss(nn.Layer): + def __init__(self, + target_size=None, + model_name_pairs=(["Student", "Teacher"], ), + student_keepkeys=[], + teacher_keepkeys=[]): + super().__init__() + self.student_keepkeys = student_keepkeys + self.teacher_keepkeys = teacher_keepkeys + self.model_name_pairs = model_name_pairs + assert len(self.student_keepkeys) == len(self.teacher_keepkeys) + + self.rkd_angle_loss = RKdAngle(target_size=target_size) + self.rkd_dist_loss = RkdDistance(target_size=target_size) + + def __call__(self, predicts, batch): + loss_dict = {} + for m1, m2 in self.model_name_pairs: + for idx, ( + student_name, teacher_name + ) in enumerate(zip(self.student_keepkeys, self.teacher_keepkeys)): + student_out = predicts[m1][student_name] + teacher_out = predicts[m2][teacher_name] + + loss_dict[f"loss_angle_{idx}_{m1}_{m2}"] = self.rkd_angle_loss( + student_out, teacher_out) + loss_dict[f"loss_dist_{idx}_{m1}_{m2}"] = self.rkd_dist_loss( + student_out, teacher_out) + + return loss_dict diff --git a/src/PaddleClas/ppcls/loss/dmlloss.py b/src/PaddleClas/ppcls/loss/dmlloss.py new file mode 100644 index 0000000..48bf6c0 --- /dev/null +++ b/src/PaddleClas/ppcls/loss/dmlloss.py @@ -0,0 +1,50 @@ +# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import paddle +import paddle.nn as nn +import paddle.nn.functional as F + + +class DMLLoss(nn.Layer): + """ + DMLLoss + """ + + def __init__(self, act="softmax", eps=1e-12): + super().__init__() + if act is not None: + assert act in ["softmax", "sigmoid"] + if act == "softmax": + self.act = nn.Softmax(axis=-1) + elif act == "sigmoid": + self.act = nn.Sigmoid() + else: + self.act = None + self.eps = eps + + def _kldiv(self, x, target): + class_num = x.shape[-1] + cost = target * paddle.log( + (target + self.eps) / (x + self.eps)) * class_num + return cost + + def forward(self, x, target): + if self.act is not None: + x = self.act(x) + target = self.act(target) + loss = self._kldiv(x, target) + self._kldiv(target, x) + loss = loss / 2 + loss = paddle.mean(loss) + return {"DMLLoss": loss} diff --git a/src/PaddleClas/ppcls/loss/emlloss.py b/src/PaddleClas/ppcls/loss/emlloss.py new file mode 100644 index 0000000..9735703 --- /dev/null +++ b/src/PaddleClas/ppcls/loss/emlloss.py @@ -0,0 +1,97 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import math +import paddle +import numpy as np +from .comfunc import rerange_index + + +class EmlLoss(paddle.nn.Layer): + def __init__(self, batch_size=40, samples_each_class=2): + super(EmlLoss, self).__init__() + assert (batch_size % samples_each_class == 0) + self.samples_each_class = samples_each_class + self.batch_size = batch_size + self.rerange_index = rerange_index(batch_size, samples_each_class) + self.thresh = 20.0 + self.beta = 100000 + + def surrogate_function(self, beta, theta, bias): + x = theta * paddle.exp(bias) + output = paddle.log(1 + beta * x) / math.log(1 + beta) + return output + + def surrogate_function_approximate(self, beta, theta, bias): + output = ( + paddle.log(theta) + bias + math.log(beta)) / math.log(1 + beta) + return output + + def surrogate_function_stable(self, beta, theta, target, thresh): + max_gap = paddle.to_tensor(thresh, dtype='float32') + max_gap.stop_gradient = True + + target_max = paddle.maximum(target, max_gap) + target_min = paddle.minimum(target, max_gap) + + loss1 = self.surrogate_function(beta, theta, target_min) + loss2 = self.surrogate_function_approximate(beta, theta, target_max) + bias = self.surrogate_function(beta, theta, max_gap) + loss = loss1 + loss2 - bias + return loss + + def forward(self, input, target=None): + features = input["features"] + samples_each_class = self.samples_each_class + batch_size = self.batch_size + rerange_index = self.rerange_index + + #calc distance + diffs = paddle.unsqueeze( + features, axis=1) - paddle.unsqueeze( + features, axis=0) + similary_matrix = paddle.sum(paddle.square(diffs), axis=-1) + + tmp = paddle.reshape(similary_matrix, shape=[-1, 1]) + rerange_index = paddle.to_tensor(rerange_index) + tmp = paddle.gather(tmp, index=rerange_index) + similary_matrix = paddle.reshape(tmp, shape=[-1, batch_size]) + + ignore, pos, neg = paddle.split( + similary_matrix, + num_or_sections=[ + 1, samples_each_class - 1, batch_size - samples_each_class + ], + axis=1) + ignore.stop_gradient = True + + pos_max = paddle.max(pos, axis=1, keepdim=True) + pos = paddle.exp(pos - pos_max) + pos_mean = paddle.mean(pos, axis=1, keepdim=True) + + neg_min = paddle.min(neg, axis=1, keepdim=True) + neg = paddle.exp(neg_min - neg) + neg_mean = paddle.mean(neg, axis=1, keepdim=True) + + bias = pos_max - neg_min + theta = paddle.multiply(neg_mean, pos_mean) + + loss = self.surrogate_function_stable(self.beta, theta, bias, + self.thresh) + loss = paddle.mean(loss) + return {"emlloss": loss} diff --git a/src/PaddleClas/ppcls/loss/googlenetloss.py b/src/PaddleClas/ppcls/loss/googlenetloss.py new file mode 100644 index 0000000..c580aa6 --- /dev/null +++ b/src/PaddleClas/ppcls/loss/googlenetloss.py @@ -0,0 +1,41 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import paddle +import paddle.nn as nn +import paddle.nn.functional as F + + +class GoogLeNetLoss(nn.Layer): + """ + Cross entropy loss used after googlenet + """ + def __init__(self, epsilon=None): + super().__init__() + assert (epsilon is None or epsilon <= 0 or epsilon >= 1), "googlenet is not support label_smooth" + + + def forward(self, inputs, label): + input0, input1, input2 = inputs + if isinstance(input0, dict): + input0 = input0["logits"] + if isinstance(input1, dict): + input1 = input1["logits"] + if isinstance(input2, dict): + input2 = input2["logits"] + + loss0 = F.cross_entropy(input0, label=label, soft_label=False) + loss1 = F.cross_entropy(input1, label=label, soft_label=False) + loss2 = F.cross_entropy(input2, label=label, soft_label=False) + loss = loss0 + 0.3 * loss1 + 0.3 * loss2 + loss = loss.mean() + return {"GooleNetLoss": loss} diff --git a/src/PaddleClas/ppcls/loss/msmloss.py b/src/PaddleClas/ppcls/loss/msmloss.py new file mode 100644 index 0000000..3aa0dd8 --- /dev/null +++ b/src/PaddleClas/ppcls/loss/msmloss.py @@ -0,0 +1,78 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +import paddle +from .comfunc import rerange_index + + +class MSMLoss(paddle.nn.Layer): + """ + MSMLoss Loss, based on triplet loss. USE P * K samples. + the batch size is fixed. Batch_size = P * K; but the K may vary between batches. + same label gather together + + supported_metrics = [ + 'euclidean', + 'sqeuclidean', + 'cityblock', + ] + only consider samples_each_class = 2 + """ + + def __init__(self, batch_size=120, samples_each_class=2, margin=0.1): + super(MSMLoss, self).__init__() + self.margin = margin + self.samples_each_class = samples_each_class + self.batch_size = batch_size + self.rerange_index = rerange_index(batch_size, samples_each_class) + + def forward(self, input, target=None): + #normalization + features = input["features"] + features = self._nomalize(features) + samples_each_class = self.samples_each_class + rerange_index = paddle.to_tensor(self.rerange_index) + + #calc sm + diffs = paddle.unsqueeze( + features, axis=1) - paddle.unsqueeze( + features, axis=0) + similary_matrix = paddle.sum(paddle.square(diffs), axis=-1) + + #rerange + tmp = paddle.reshape(similary_matrix, shape=[-1, 1]) + tmp = paddle.gather(tmp, index=rerange_index) + similary_matrix = paddle.reshape(tmp, shape=[-1, self.batch_size]) + + #split + ignore, pos, neg = paddle.split( + similary_matrix, + num_or_sections=[1, samples_each_class - 1, -1], + axis=1) + ignore.stop_gradient = True + + hard_pos = paddle.max(pos) + hard_neg = paddle.min(neg) + + loss = hard_pos + self.margin - hard_neg + loss = paddle.nn.ReLU()(loss) + return {"msmloss": loss} + + def _nomalize(self, input): + input_norm = paddle.sqrt( + paddle.sum(paddle.square(input), axis=1, keepdim=True)) + return paddle.divide(input, input_norm) diff --git a/src/PaddleClas/ppcls/loss/multilabelloss.py b/src/PaddleClas/ppcls/loss/multilabelloss.py new file mode 100644 index 0000000..d30d5b8 --- /dev/null +++ b/src/PaddleClas/ppcls/loss/multilabelloss.py @@ -0,0 +1,43 @@ +import paddle +import paddle.nn as nn +import paddle.nn.functional as F + + +class MultiLabelLoss(nn.Layer): + """ + Multi-label loss + """ + + def __init__(self, epsilon=None): + super().__init__() + if epsilon is not None and (epsilon <= 0 or epsilon >= 1): + epsilon = None + self.epsilon = epsilon + + def _labelsmoothing(self, target, class_num): + if target.ndim == 1 or target.shape[-1] != class_num: + one_hot_target = F.one_hot(target, class_num) + else: + one_hot_target = target + soft_target = F.label_smooth(one_hot_target, epsilon=self.epsilon) + soft_target = paddle.reshape(soft_target, shape=[-1, class_num]) + return soft_target + + def _binary_crossentropy(self, input, target, class_num): + if self.epsilon is not None: + target = self._labelsmoothing(target, class_num) + cost = F.binary_cross_entropy_with_logits( + logit=input, label=target) + else: + cost = F.binary_cross_entropy_with_logits( + logit=input, label=target) + + return cost + + def forward(self, x, target): + if isinstance(x, dict): + x = x["logits"] + class_num = x.shape[-1] + loss = self._binary_crossentropy(x, target, class_num) + loss = loss.mean() + return {"MultiLabelLoss": loss} diff --git a/src/PaddleClas/ppcls/loss/npairsloss.py b/src/PaddleClas/ppcls/loss/npairsloss.py new file mode 100644 index 0000000..d4b359e --- /dev/null +++ b/src/PaddleClas/ppcls/loss/npairsloss.py @@ -0,0 +1,38 @@ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +import paddle + + +class NpairsLoss(paddle.nn.Layer): + def __init__(self, reg_lambda=0.01): + super(NpairsLoss, self).__init__() + self.reg_lambda = reg_lambda + + def forward(self, input, target=None): + """ + anchor and positive(should include label) + """ + features = input["features"] + reg_lambda = self.reg_lambda + batch_size = features.shape[0] + fea_dim = features.shape[1] + num_class = batch_size // 2 + + #reshape + out_feas = paddle.reshape(features, shape=[-1, 2, fea_dim]) + anc_feas, pos_feas = paddle.split(out_feas, num_or_sections=2, axis=1) + anc_feas = paddle.squeeze(anc_feas, axis=1) + pos_feas = paddle.squeeze(pos_feas, axis=1) + + #get simi matrix + similarity_matrix = paddle.matmul( + anc_feas, pos_feas, transpose_y=True) #get similarity matrix + sparse_labels = paddle.arange(0, num_class, dtype='int64') + xentloss = paddle.nn.CrossEntropyLoss()( + similarity_matrix, sparse_labels) #by default: mean + + #l2 norm + reg = paddle.mean(paddle.sum(paddle.square(features), axis=1)) + l2loss = 0.5 * reg_lambda * reg + return {"npairsloss": xentloss + l2loss} diff --git a/src/PaddleClas/ppcls/loss/pairwisecosface.py b/src/PaddleClas/ppcls/loss/pairwisecosface.py new file mode 100644 index 0000000..beb8068 --- /dev/null +++ b/src/PaddleClas/ppcls/loss/pairwisecosface.py @@ -0,0 +1,55 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import math +import paddle +import paddle.nn as nn +import paddle.nn.functional as F + + +class PairwiseCosface(nn.Layer): + def __init__(self, margin, gamma): + super(PairwiseCosface, self).__init__() + self.margin = margin + self.gamma = gamma + + def forward(self, embedding, targets): + if isinstance(embedding, dict): + embedding = embedding['features'] + # Normalize embedding features + embedding = F.normalize(embedding, axis=1) + dist_mat = paddle.matmul(embedding, embedding, transpose_y=True) + + N = dist_mat.shape[0] + is_pos = targets.reshape([N,1]).expand([N,N]).equal(paddle.t(targets.reshape([N,1]).expand([N,N]))).astype('float') + is_neg = targets.reshape([N,1]).expand([N,N]).not_equal(paddle.t(targets.reshape([N,1]).expand([N,N]))).astype('float') + + # Mask scores related to itself + is_pos = is_pos - paddle.eye(N, N) + + s_p = dist_mat * is_pos + s_n = dist_mat * is_neg + + logit_p = -self.gamma * s_p + (-99999999.) * (1 - is_pos) + logit_n = self.gamma * (s_n + self.margin) + (-99999999.) * (1 - is_neg) + + loss = F.softplus(paddle.logsumexp(logit_p, axis=1) + paddle.logsumexp(logit_n, axis=1)).mean() + + return {"PairwiseCosface": loss} + + diff --git a/src/PaddleClas/ppcls/loss/rkdloss.py b/src/PaddleClas/ppcls/loss/rkdloss.py new file mode 100644 index 0000000..e6ffea2 --- /dev/null +++ b/src/PaddleClas/ppcls/loss/rkdloss.py @@ -0,0 +1,97 @@ +# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import paddle +import paddle.nn as nn +import paddle.nn.functional as F + + +def pdist(e, squared=False, eps=1e-12): + e_square = e.pow(2).sum(axis=1) + prod = paddle.mm(e, e.t()) + res = (e_square.unsqueeze(1) + e_square.unsqueeze(0) - 2 * prod).clip( + min=eps) + + if not squared: + res = res.sqrt() + return res + + +class RKdAngle(nn.Layer): + # reference: https://github.com/lenscloth/RKD/blob/master/metric/loss.py + def __init__(self, target_size=None): + super().__init__() + if target_size is not None: + self.avgpool = paddle.nn.AdaptiveAvgPool2D(target_size) + else: + self.avgpool = None + + def forward(self, student, teacher): + # GAP to reduce memory + if self.avgpool is not None: + # NxC1xH1xW1 -> NxC1x1x1 + student = self.avgpool(student) + # NxC2xH2xW2 -> NxC2x1x1 + teacher = self.avgpool(teacher) + + # reshape for feature map distillation + bs = student.shape[0] + student = student.reshape([bs, -1]) + teacher = teacher.reshape([bs, -1]) + + td = (teacher.unsqueeze(0) - teacher.unsqueeze(1)) + norm_td = F.normalize(td, p=2, axis=2) + t_angle = paddle.bmm(norm_td, norm_td.transpose([0, 2, 1])).reshape( + [-1, 1]) + + sd = (student.unsqueeze(0) - student.unsqueeze(1)) + norm_sd = F.normalize(sd, p=2, axis=2) + s_angle = paddle.bmm(norm_sd, norm_sd.transpose([0, 2, 1])).reshape( + [-1, 1]) + loss = F.smooth_l1_loss(s_angle, t_angle, reduction='mean') + return loss + + +class RkdDistance(nn.Layer): + # reference: https://github.com/lenscloth/RKD/blob/master/metric/loss.py + def __init__(self, eps=1e-12, target_size=1): + super().__init__() + self.eps = eps + if target_size is not None: + self.avgpool = paddle.nn.AdaptiveAvgPool2D(target_size) + else: + self.avgpool = None + + def forward(self, student, teacher): + # GAP to reduce memory + if self.avgpool is not None: + # NxC1xH1xW1 -> NxC1x1x1 + student = self.avgpool(student) + # NxC2xH2xW2 -> NxC2x1x1 + teacher = self.avgpool(teacher) + + bs = student.shape[0] + student = student.reshape([bs, -1]) + teacher = teacher.reshape([bs, -1]) + + t_d = pdist(teacher, squared=False) + mean_td = t_d.mean() + t_d = t_d / (mean_td + self.eps) + + d = pdist(student, squared=False) + mean_d = d.mean() + d = d / (mean_d + self.eps) + + loss = F.smooth_l1_loss(d, t_d, reduction="mean") + return loss diff --git a/src/PaddleClas/ppcls/loss/supconloss.py b/src/PaddleClas/ppcls/loss/supconloss.py new file mode 100644 index 0000000..3dd33bc --- /dev/null +++ b/src/PaddleClas/ppcls/loss/supconloss.py @@ -0,0 +1,108 @@ +import paddle +from paddle import nn + + +class SupConLoss(nn.Layer): + """Supervised Contrastive Learning: https://arxiv.org/pdf/2004.11362.pdf. + It also supports the unsupervised contrastive loss in SimCLR""" + + def __init__(self, + views=16, + temperature=0.07, + contrast_mode='all', + base_temperature=0.07, + normalize_feature=True): + super(SupConLoss, self).__init__() + self.temperature = paddle.to_tensor(temperature) + self.contrast_mode = contrast_mode + self.base_temperature = paddle.to_tensor(base_temperature) + self.num_ids = None + self.views = views + self.normalize_feature = normalize_feature + + def forward(self, features, labels, mask=None): + """Compute loss for model. If both `labels` and `mask` are None, + it degenerates to SimCLR unsupervised loss: + https://arxiv.org/pdf/2002.05709.pdf + Args: + features: hidden vector of shape [bsz, n_views, ...]. + labels: ground truth of shape [bsz]. + mask: contrastive mask of shape [bsz, bsz], mask_{i,j}=1 if sample j + has the same class as sample i. Can be asymmetric. + Returns: + A loss scalar. + """ + features = features["features"] + if self.num_ids is None: + self.num_ids = int(features.shape[0] / self.views) + + if self.normalize_feature: + features = 1. * features / (paddle.expand_as( + paddle.norm( + features, p=2, axis=-1, keepdim=True), features) + 1e-12) + features = features.reshape([self.num_ids, self.views, -1]) + labels = labels.reshape([self.num_ids, self.views])[:, 0] + + if len(features.shape) < 3: + raise ValueError('`features` needs to be [bsz, n_views, ...],' + 'at least 3 dimensions are required') + if len(features.shape) > 3: + features = features.reshape( + [features.shape[0], features.shape[1], -1]) + + batch_size = features.shape[0] + if labels is not None and mask is not None: + raise ValueError('Cannot define both `labels` and `mask`') + elif labels is None and mask is None: + mask = paddle.eye(batch_size, dtype='float32') + elif labels is not None: + labels = labels.reshape([-1, 1]) + if labels.shape[0] != batch_size: + raise ValueError( + 'Num of labels does not match num of features') + mask = paddle.cast( + paddle.equal(labels, paddle.t(labels)), 'float32') + else: + mask = paddle.cast(mask, 'float32') + + contrast_count = features.shape[1] + contrast_feature = paddle.concat( + paddle.unbind( + features, axis=1), axis=0) + if self.contrast_mode == 'one': + anchor_feature = features[:, 0] + anchor_count = 1 + elif self.contrast_mode == 'all': + anchor_feature = contrast_feature + anchor_count = contrast_count + else: + raise ValueError('Unknown mode: {}'.format(self.contrast_mode)) + + # compute logits + anchor_dot_contrast = paddle.divide( + paddle.matmul(anchor_feature, paddle.t(contrast_feature)), + self.temperature) + # for numerical stability + logits_max = paddle.max(anchor_dot_contrast, axis=1, keepdim=True) + logits = anchor_dot_contrast - logits_max.detach() + + # tile mask + mask = paddle.tile(mask, [anchor_count, contrast_count]) + + logits_mask = 1 - paddle.eye(batch_size * anchor_count) + mask = mask * logits_mask + + # compute log_prob + exp_logits = paddle.exp(logits) * logits_mask + log_prob = logits - paddle.log( + paddle.sum(exp_logits, axis=1, keepdim=True)) + + # compute mean of log-likelihood over positive + mean_log_prob_pos = paddle.sum((mask * log_prob), + axis=1) / paddle.sum(mask, axis=1) + + # loss + loss = -(self.temperature / self.base_temperature) * mean_log_prob_pos + loss = paddle.mean(loss.reshape([anchor_count, batch_size])) + + return {"SupConLoss": loss} diff --git a/src/PaddleClas/ppcls/loss/trihardloss.py b/src/PaddleClas/ppcls/loss/trihardloss.py new file mode 100644 index 0000000..132c604 --- /dev/null +++ b/src/PaddleClas/ppcls/loss/trihardloss.py @@ -0,0 +1,82 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import paddle +from .comfunc import rerange_index + + +class TriHardLoss(paddle.nn.Layer): + """ + TriHard Loss, based on triplet loss. USE P * K samples. + the batch size is fixed. Batch_size = P * K; but the K may vary between batches. + same label gather together + + supported_metrics = [ + 'euclidean', + 'sqeuclidean', + 'cityblock', + ] + only consider samples_each_class = 2 + """ + + def __init__(self, batch_size=120, samples_each_class=2, margin=0.1): + super(TriHardLoss, self).__init__() + self.margin = margin + self.samples_each_class = samples_each_class + self.batch_size = batch_size + self.rerange_index = rerange_index(batch_size, samples_each_class) + + def forward(self, input, target=None): + features = input["features"] + assert (self.batch_size == features.shape[0]) + + #normalization + features = self._nomalize(features) + samples_each_class = self.samples_each_class + rerange_index = paddle.to_tensor(self.rerange_index) + + #calc sm + diffs = paddle.unsqueeze( + features, axis=1) - paddle.unsqueeze( + features, axis=0) + similary_matrix = paddle.sum(paddle.square(diffs), axis=-1) + + #rerange + tmp = paddle.reshape(similary_matrix, shape=[-1, 1]) + tmp = paddle.gather(tmp, index=rerange_index) + similary_matrix = paddle.reshape(tmp, shape=[-1, self.batch_size]) + + #split + ignore, pos, neg = paddle.split( + similary_matrix, + num_or_sections=[1, samples_each_class - 1, -1], + axis=1) + + ignore.stop_gradient = True + hard_pos = paddle.max(pos, axis=1) + hard_neg = paddle.min(neg, axis=1) + + loss = hard_pos + self.margin - hard_neg + loss = paddle.nn.ReLU()(loss) + loss = paddle.mean(loss) + return {"trihardloss": loss} + + def _nomalize(self, input): + input_norm = paddle.sqrt( + paddle.sum(paddle.square(input), axis=1, keepdim=True)) + return paddle.divide(input, input_norm) diff --git a/src/PaddleClas/ppcls/loss/triplet.py b/src/PaddleClas/ppcls/loss/triplet.py new file mode 100644 index 0000000..d1c7eec --- /dev/null +++ b/src/PaddleClas/ppcls/loss/triplet.py @@ -0,0 +1,137 @@ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import paddle +import paddle.nn as nn + + +class TripletLossV2(nn.Layer): + """Triplet loss with hard positive/negative mining. + Args: + margin (float): margin for triplet. + """ + + def __init__(self, margin=0.5, normalize_feature=True): + super(TripletLossV2, self).__init__() + self.margin = margin + self.ranking_loss = paddle.nn.loss.MarginRankingLoss(margin=margin) + self.normalize_feature = normalize_feature + + def forward(self, input, target): + """ + Args: + inputs: feature matrix with shape (batch_size, feat_dim) + target: ground truth labels with shape (num_classes) + """ + inputs = input["features"] + + if self.normalize_feature: + inputs = 1. * inputs / (paddle.expand_as( + paddle.norm( + inputs, p=2, axis=-1, keepdim=True), inputs) + 1e-12) + + bs = inputs.shape[0] + + # compute distance + dist = paddle.pow(inputs, 2).sum(axis=1, keepdim=True).expand([bs, bs]) + dist = dist + dist.t() + dist = paddle.addmm( + input=dist, x=inputs, y=inputs.t(), alpha=-2.0, beta=1.0) + dist = paddle.clip(dist, min=1e-12).sqrt() + + # hard negative mining + is_pos = paddle.expand(target, ( + bs, bs)).equal(paddle.expand(target, (bs, bs)).t()) + is_neg = paddle.expand(target, ( + bs, bs)).not_equal(paddle.expand(target, (bs, bs)).t()) + + # `dist_ap` means distance(anchor, positive) + ## both `dist_ap` and `relative_p_inds` with shape [N, 1] + ''' + dist_ap, relative_p_inds = paddle.max( + paddle.reshape(dist[is_pos], (bs, -1)), axis=1, keepdim=True) + # `dist_an` means distance(anchor, negative) + # both `dist_an` and `relative_n_inds` with shape [N, 1] + dist_an, relative_n_inds = paddle.min( + paddle.reshape(dist[is_neg], (bs, -1)), axis=1, keepdim=True) + ''' + dist_ap = paddle.max(paddle.reshape( + paddle.masked_select(dist, is_pos), (bs, -1)), + axis=1, + keepdim=True) + # `dist_an` means distance(anchor, negative) + # both `dist_an` and `relative_n_inds` with shape [N, 1] + dist_an = paddle.min(paddle.reshape( + paddle.masked_select(dist, is_neg), (bs, -1)), + axis=1, + keepdim=True) + # shape [N] + dist_ap = paddle.squeeze(dist_ap, axis=1) + dist_an = paddle.squeeze(dist_an, axis=1) + + # Compute ranking hinge loss + y = paddle.ones_like(dist_an) + loss = self.ranking_loss(dist_an, dist_ap, y) + return {"TripletLossV2": loss} + + +class TripletLoss(nn.Layer): + """Triplet loss with hard positive/negative mining. + Reference: + Hermans et al. In Defense of the Triplet Loss for Person Re-Identification. arXiv:1703.07737. + Code imported from https://github.com/Cysu/open-reid/blob/master/reid/loss/triplet.py. + Args: + margin (float): margin for triplet. + """ + + def __init__(self, margin=1.0): + super(TripletLoss, self).__init__() + self.margin = margin + self.ranking_loss = paddle.nn.loss.MarginRankingLoss(margin=margin) + + def forward(self, input, target): + """ + Args: + inputs: feature matrix with shape (batch_size, feat_dim) + target: ground truth labels with shape (num_classes) + """ + inputs = input["features"] + + bs = inputs.shape[0] + # Compute pairwise distance, replace by the official when merged + dist = paddle.pow(inputs, 2).sum(axis=1, keepdim=True).expand([bs, bs]) + dist = dist + dist.t() + dist = paddle.addmm( + input=dist, x=inputs, y=inputs.t(), alpha=-2.0, beta=1.0) + dist = paddle.clip(dist, min=1e-12).sqrt() + + mask = paddle.equal( + target.expand([bs, bs]), target.expand([bs, bs]).t()) + mask_numpy_idx = mask.numpy() + dist_ap, dist_an = [], [] + for i in range(bs): + # dist_ap_i = paddle.to_tensor(dist[i].numpy()[mask_numpy_idx[i]].max(),dtype='float64').unsqueeze(0) + # dist_ap_i.stop_gradient = False + # dist_ap.append(dist_ap_i) + dist_ap.append( + max([ + dist[i][j] if mask_numpy_idx[i][j] == True else float( + "-inf") for j in range(bs) + ]).unsqueeze(0)) + # dist_an_i = paddle.to_tensor(dist[i].numpy()[mask_numpy_idx[i] == False].min(), dtype='float64').unsqueeze(0) + # dist_an_i.stop_gradient = False + # dist_an.append(dist_an_i) + dist_an.append( + min([ + dist[i][k] if mask_numpy_idx[i][k] == False else float( + "inf") for k in range(bs) + ]).unsqueeze(0)) + + dist_ap = paddle.concat(dist_ap, axis=0) + dist_an = paddle.concat(dist_an, axis=0) + + # Compute ranking hinge loss + y = paddle.ones_like(dist_an) + loss = self.ranking_loss(dist_an, dist_ap, y) + return {"TripletLoss": loss} diff --git a/src/PaddleClas/ppcls/metric/__init__.py b/src/PaddleClas/ppcls/metric/__init__.py new file mode 100644 index 0000000..9472123 --- /dev/null +++ b/src/PaddleClas/ppcls/metric/__init__.py @@ -0,0 +1,51 @@ +#copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. + +from paddle import nn +import copy +from collections import OrderedDict + +from .metrics import TopkAcc, mAP, mINP, Recallk, Precisionk +from .metrics import DistillationTopkAcc +from .metrics import GoogLeNetTopkAcc +from .metrics import HammingDistance, AccuracyScore + + +class CombinedMetrics(nn.Layer): + def __init__(self, config_list): + super().__init__() + self.metric_func_list = [] + assert isinstance(config_list, list), ( + 'operator config should be a list') + for config in config_list: + assert isinstance(config, + dict) and len(config) == 1, "yaml format error" + metric_name = list(config)[0] + metric_params = config[metric_name] + if metric_params is not None: + self.metric_func_list.append( + eval(metric_name)(**metric_params)) + else: + self.metric_func_list.append(eval(metric_name)()) + + def __call__(self, *args, **kwargs): + metric_dict = OrderedDict() + for idx, metric_func in enumerate(self.metric_func_list): + metric_dict.update(metric_func(*args, **kwargs)) + return metric_dict + + +def build_metrics(config): + metrics_list = CombinedMetrics(copy.deepcopy(config)) + return metrics_list diff --git a/src/PaddleClas/ppcls/metric/__pycache__/__init__.cpython-39.pyc b/src/PaddleClas/ppcls/metric/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000..c075b76 Binary files /dev/null and b/src/PaddleClas/ppcls/metric/__pycache__/__init__.cpython-39.pyc differ diff --git a/src/PaddleClas/ppcls/metric/__pycache__/metrics.cpython-39.pyc b/src/PaddleClas/ppcls/metric/__pycache__/metrics.cpython-39.pyc new file mode 100644 index 0000000..d7ce7f4 Binary files /dev/null and b/src/PaddleClas/ppcls/metric/__pycache__/metrics.cpython-39.pyc differ diff --git a/src/PaddleClas/ppcls/metric/metrics.py b/src/PaddleClas/ppcls/metric/metrics.py new file mode 100644 index 0000000..7c6407e --- /dev/null +++ b/src/PaddleClas/ppcls/metric/metrics.py @@ -0,0 +1,309 @@ +# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import numpy as np +import paddle +import paddle.nn as nn +import paddle.nn.functional as F + +from sklearn.metrics import hamming_loss +from sklearn.metrics import accuracy_score as accuracy_metric +from sklearn.metrics import multilabel_confusion_matrix +from sklearn.preprocessing import binarize + + +class TopkAcc(nn.Layer): + def __init__(self, topk=(1, 5)): + super().__init__() + assert isinstance(topk, (int, list, tuple)) + if isinstance(topk, int): + topk = [topk] + self.topk = topk + + def forward(self, x, label): + if isinstance(x, dict): + x = x["logits"] + + metric_dict = dict() + for k in self.topk: + metric_dict["top{}".format(k)] = paddle.metric.accuracy( + x, label, k=k) + return metric_dict + + +class mAP(nn.Layer): + def __init__(self): + super().__init__() + + def forward(self, similarities_matrix, query_img_id, gallery_img_id, + keep_mask): + metric_dict = dict() + + choosen_indices = paddle.argsort( + similarities_matrix, axis=1, descending=True) + gallery_labels_transpose = paddle.transpose(gallery_img_id, [1, 0]) + gallery_labels_transpose = paddle.broadcast_to( + gallery_labels_transpose, + shape=[ + choosen_indices.shape[0], gallery_labels_transpose.shape[1] + ]) + choosen_label = paddle.index_sample(gallery_labels_transpose, + choosen_indices) + equal_flag = paddle.equal(choosen_label, query_img_id) + if keep_mask is not None: + keep_mask = paddle.index_sample( + keep_mask.astype('float32'), choosen_indices) + equal_flag = paddle.logical_and(equal_flag, + keep_mask.astype('bool')) + equal_flag = paddle.cast(equal_flag, 'float32') + + num_rel = paddle.sum(equal_flag, axis=1) + num_rel = paddle.greater_than(num_rel, paddle.to_tensor(0.)) + num_rel_index = paddle.nonzero(num_rel.astype("int")) + num_rel_index = paddle.reshape(num_rel_index, [num_rel_index.shape[0]]) + equal_flag = paddle.index_select(equal_flag, num_rel_index, axis=0) + + acc_sum = paddle.cumsum(equal_flag, axis=1) + div = paddle.arange(acc_sum.shape[1]).astype("float32") + 1 + precision = paddle.divide(acc_sum, div) + + #calc map + precision_mask = paddle.multiply(equal_flag, precision) + ap = paddle.sum(precision_mask, axis=1) / paddle.sum(equal_flag, + axis=1) + metric_dict["mAP"] = paddle.mean(ap).numpy()[0] + return metric_dict + + +class mINP(nn.Layer): + def __init__(self): + super().__init__() + + def forward(self, similarities_matrix, query_img_id, gallery_img_id, + keep_mask): + metric_dict = dict() + + choosen_indices = paddle.argsort( + similarities_matrix, axis=1, descending=True) + gallery_labels_transpose = paddle.transpose(gallery_img_id, [1, 0]) + gallery_labels_transpose = paddle.broadcast_to( + gallery_labels_transpose, + shape=[ + choosen_indices.shape[0], gallery_labels_transpose.shape[1] + ]) + choosen_label = paddle.index_sample(gallery_labels_transpose, + choosen_indices) + equal_flag = paddle.equal(choosen_label, query_img_id) + if keep_mask is not None: + keep_mask = paddle.index_sample( + keep_mask.astype('float32'), choosen_indices) + equal_flag = paddle.logical_and(equal_flag, + keep_mask.astype('bool')) + equal_flag = paddle.cast(equal_flag, 'float32') + + num_rel = paddle.sum(equal_flag, axis=1) + num_rel = paddle.greater_than(num_rel, paddle.to_tensor(0.)) + num_rel_index = paddle.nonzero(num_rel.astype("int")) + num_rel_index = paddle.reshape(num_rel_index, [num_rel_index.shape[0]]) + equal_flag = paddle.index_select(equal_flag, num_rel_index, axis=0) + + #do accumulative sum + div = paddle.arange(equal_flag.shape[1]).astype("float32") + 2 + minus = paddle.divide(equal_flag, div) + auxilary = paddle.subtract(equal_flag, minus) + hard_index = paddle.argmax(auxilary, axis=1).astype("float32") + all_INP = paddle.divide(paddle.sum(equal_flag, axis=1), hard_index) + mINP = paddle.mean(all_INP) + metric_dict["mINP"] = mINP.numpy()[0] + return metric_dict + + +class Recallk(nn.Layer): + def __init__(self, topk=(1, 5)): + super().__init__() + assert isinstance(topk, (int, list, tuple)) + if isinstance(topk, int): + topk = [topk] + self.topk = topk + + def forward(self, similarities_matrix, query_img_id, gallery_img_id, + keep_mask): + metric_dict = dict() + + #get cmc + choosen_indices = paddle.argsort( + similarities_matrix, axis=1, descending=True) + gallery_labels_transpose = paddle.transpose(gallery_img_id, [1, 0]) + gallery_labels_transpose = paddle.broadcast_to( + gallery_labels_transpose, + shape=[ + choosen_indices.shape[0], gallery_labels_transpose.shape[1] + ]) + choosen_label = paddle.index_sample(gallery_labels_transpose, + choosen_indices) + equal_flag = paddle.equal(choosen_label, query_img_id) + if keep_mask is not None: + keep_mask = paddle.index_sample( + keep_mask.astype('float32'), choosen_indices) + equal_flag = paddle.logical_and(equal_flag, + keep_mask.astype('bool')) + equal_flag = paddle.cast(equal_flag, 'float32') + real_query_num = paddle.sum(equal_flag, axis=1) + real_query_num = paddle.sum( + paddle.greater_than(real_query_num, paddle.to_tensor(0.)).astype( + "float32")) + + acc_sum = paddle.cumsum(equal_flag, axis=1) + mask = paddle.greater_than(acc_sum, + paddle.to_tensor(0.)).astype("float32") + all_cmc = (paddle.sum(mask, axis=0) / real_query_num).numpy() + + for k in self.topk: + metric_dict["recall{}".format(k)] = all_cmc[k - 1] + return metric_dict + + +class Precisionk(nn.Layer): + def __init__(self, topk=(1, 5)): + super().__init__() + assert isinstance(topk, (int, list, tuple)) + if isinstance(topk, int): + topk = [topk] + self.topk = topk + + def forward(self, similarities_matrix, query_img_id, gallery_img_id, + keep_mask): + metric_dict = dict() + + #get cmc + choosen_indices = paddle.argsort( + similarities_matrix, axis=1, descending=True) + gallery_labels_transpose = paddle.transpose(gallery_img_id, [1, 0]) + gallery_labels_transpose = paddle.broadcast_to( + gallery_labels_transpose, + shape=[ + choosen_indices.shape[0], gallery_labels_transpose.shape[1] + ]) + choosen_label = paddle.index_sample(gallery_labels_transpose, + choosen_indices) + equal_flag = paddle.equal(choosen_label, query_img_id) + if keep_mask is not None: + keep_mask = paddle.index_sample( + keep_mask.astype('float32'), choosen_indices) + equal_flag = paddle.logical_and(equal_flag, + keep_mask.astype('bool')) + equal_flag = paddle.cast(equal_flag, 'float32') + + Ns = paddle.arange(gallery_img_id.shape[0]) + 1 + equal_flag_cumsum = paddle.cumsum(equal_flag, axis=1) + Precision_at_k = (paddle.mean(equal_flag_cumsum, axis=0) / Ns).numpy() + + for k in self.topk: + metric_dict["precision@{}".format(k)] = Precision_at_k[k - 1] + + return metric_dict + + +class DistillationTopkAcc(TopkAcc): + def __init__(self, model_key, feature_key=None, topk=(1, 5)): + super().__init__(topk=topk) + self.model_key = model_key + self.feature_key = feature_key + + def forward(self, x, label): + if isinstance(x, dict): + x = x[self.model_key] + if self.feature_key is not None: + x = x[self.feature_key] + return super().forward(x, label) + + +class GoogLeNetTopkAcc(TopkAcc): + def __init__(self, topk=(1, 5)): + super().__init__() + assert isinstance(topk, (int, list, tuple)) + if isinstance(topk, int): + topk = [topk] + self.topk = topk + + def forward(self, x, label): + return super().forward(x[0], label) + + +class MutiLabelMetric(object): + def __init__(self): + pass + + def _multi_hot_encode(self, logits, threshold=0.5): + return binarize(logits, threshold=threshold) + + def __call__(self, output): + output = F.sigmoid(output) + preds = self._multi_hot_encode(logits=output.numpy(), threshold=0.5) + return preds + + +class HammingDistance(MutiLabelMetric): + """ + Soft metric based label for multilabel classification + Returns: + The smaller the return value is, the better model is. + """ + + def __init__(self): + super().__init__() + + def __call__(self, output, target): + preds = super().__call__(output) + metric_dict = dict() + metric_dict["HammingDistance"] = paddle.to_tensor( + hamming_loss(target, preds)) + return metric_dict + + +class AccuracyScore(MutiLabelMetric): + """ + Hard metric for multilabel classification + Args: + base: ["sample", "label"], default="sample" + if "sample", return metric score based sample, + if "label", return metric score based label. + Returns: + accuracy: + """ + + def __init__(self, base="label"): + super().__init__() + assert base in ["sample", "label" + ], 'must be one of ["sample", "label"]' + self.base = base + + def __call__(self, output, target): + preds = super().__call__(output) + metric_dict = dict() + if self.base == "sample": + accuracy = accuracy_metric(target, preds) + elif self.base == "label": + mcm = multilabel_confusion_matrix(target, preds) + tns = mcm[:, 0, 0] + fns = mcm[:, 1, 0] + tps = mcm[:, 1, 1] + fps = mcm[:, 0, 1] + accuracy = (sum(tps) + sum(tns)) / ( + sum(tps) + sum(tns) + sum(fns) + sum(fps)) + precision = sum(tps) / (sum(tps) + sum(fps)) + recall = sum(tps) / (sum(tps) + sum(fns)) + F1 = 2 * (accuracy * recall) / (accuracy + recall) + metric_dict["AccuracyScore"] = paddle.to_tensor(accuracy) + return metric_dict diff --git a/src/PaddleClas/ppcls/optimizer/__init__.py b/src/PaddleClas/ppcls/optimizer/__init__.py new file mode 100644 index 0000000..61db39f --- /dev/null +++ b/src/PaddleClas/ppcls/optimizer/__init__.py @@ -0,0 +1,72 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import copy +import paddle + +from ppcls.utils import logger + +from . import optimizer + +__all__ = ['build_optimizer'] + + +def build_lr_scheduler(lr_config, epochs, step_each_epoch): + from . import learning_rate + lr_config.update({'epochs': epochs, 'step_each_epoch': step_each_epoch}) + if 'name' in lr_config: + lr_name = lr_config.pop('name') + lr = getattr(learning_rate, lr_name)(**lr_config) + if isinstance(lr, paddle.optimizer.lr.LRScheduler): + return lr + else: + return lr() + else: + lr = lr_config['learning_rate'] + return lr + + +# model_list is None in static graph +def build_optimizer(config, epochs, step_each_epoch, model_list=None): + config = copy.deepcopy(config) + # step1 build lr + lr = build_lr_scheduler(config.pop('lr'), epochs, step_each_epoch) + logger.debug("build lr ({}) success..".format(lr)) + # step2 build regularization + if 'regularizer' in config and config['regularizer'] is not None: + if 'weight_decay' in config: + logger.warning( + "ConfigError: Only one of regularizer and weight_decay can be set in Optimizer Config. \"weight_decay\" has been ignored." + ) + reg_config = config.pop('regularizer') + reg_name = reg_config.pop('name') + 'Decay' + reg = getattr(paddle.regularizer, reg_name)(**reg_config) + config["weight_decay"] = reg + logger.debug("build regularizer ({}) success..".format(reg)) + # step3 build optimizer + optim_name = config.pop('name') + if 'clip_norm' in config: + clip_norm = config.pop('clip_norm') + grad_clip = paddle.nn.ClipGradByNorm(clip_norm=clip_norm) + else: + grad_clip = None + optim = getattr(optimizer, optim_name)(learning_rate=lr, + grad_clip=grad_clip, + **config)(model_list=model_list) + logger.debug("build optimizer ({}) success..".format(optim)) + return optim, lr diff --git a/src/PaddleClas/ppcls/optimizer/__pycache__/__init__.cpython-39.pyc b/src/PaddleClas/ppcls/optimizer/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000..282c4ce Binary files /dev/null and b/src/PaddleClas/ppcls/optimizer/__pycache__/__init__.cpython-39.pyc differ diff --git a/src/PaddleClas/ppcls/optimizer/__pycache__/learning_rate.cpython-39.pyc b/src/PaddleClas/ppcls/optimizer/__pycache__/learning_rate.cpython-39.pyc new file mode 100644 index 0000000..d3b715c Binary files /dev/null and b/src/PaddleClas/ppcls/optimizer/__pycache__/learning_rate.cpython-39.pyc differ diff --git a/src/PaddleClas/ppcls/optimizer/__pycache__/optimizer.cpython-39.pyc b/src/PaddleClas/ppcls/optimizer/__pycache__/optimizer.cpython-39.pyc new file mode 100644 index 0000000..7d244ac Binary files /dev/null and b/src/PaddleClas/ppcls/optimizer/__pycache__/optimizer.cpython-39.pyc differ diff --git a/src/PaddleClas/ppcls/optimizer/learning_rate.py b/src/PaddleClas/ppcls/optimizer/learning_rate.py new file mode 100644 index 0000000..b59387d --- /dev/null +++ b/src/PaddleClas/ppcls/optimizer/learning_rate.py @@ -0,0 +1,326 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import (absolute_import, division, print_function, + unicode_literals) + +from paddle.optimizer import lr +from paddle.optimizer.lr import LRScheduler + +from ppcls.utils import logger + + +class Linear(object): + """ + Linear learning rate decay + Args: + lr (float): The initial learning rate. It is a python float number. + epochs(int): The decay step size. It determines the decay cycle. + end_lr(float, optional): The minimum final learning rate. Default: 0.0001. + power(float, optional): Power of polynomial. Default: 1.0. + warmup_epoch(int): The epoch numbers for LinearWarmup. Default: 0. + warmup_start_lr(float): Initial learning rate of warm up. Default: 0.0. + last_epoch (int, optional): The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate. + """ + + def __init__(self, + learning_rate, + epochs, + step_each_epoch, + end_lr=0.0, + power=1.0, + warmup_epoch=0, + warmup_start_lr=0.0, + last_epoch=-1, + **kwargs): + super().__init__() + if warmup_epoch >= epochs: + msg = f"When using warm up, the value of \"Global.epochs\" must be greater than value of \"Optimizer.lr.warmup_epoch\". The value of \"Optimizer.lr.warmup_epoch\" has been set to {epochs}." + logger.warning(msg) + warmup_epoch = epochs + self.learning_rate = learning_rate + self.steps = (epochs - warmup_epoch) * step_each_epoch + self.end_lr = end_lr + self.power = power + self.last_epoch = last_epoch + self.warmup_steps = round(warmup_epoch * step_each_epoch) + self.warmup_start_lr = warmup_start_lr + + def __call__(self): + learning_rate = lr.PolynomialDecay( + learning_rate=self.learning_rate, + decay_steps=self.steps, + end_lr=self.end_lr, + power=self.power, + last_epoch=self. + last_epoch) if self.steps > 0 else self.learning_rate + if self.warmup_steps > 0: + learning_rate = lr.LinearWarmup( + learning_rate=learning_rate, + warmup_steps=self.warmup_steps, + start_lr=self.warmup_start_lr, + end_lr=self.learning_rate, + last_epoch=self.last_epoch) + return learning_rate + + +class Cosine(object): + """ + Cosine learning rate decay + lr = 0.05 * (math.cos(epoch * (math.pi / epochs)) + 1) + Args: + lr(float): initial learning rate + step_each_epoch(int): steps each epoch + epochs(int): total training epochs + eta_min(float): Minimum learning rate. Default: 0.0. + warmup_epoch(int): The epoch numbers for LinearWarmup. Default: 0. + warmup_start_lr(float): Initial learning rate of warm up. Default: 0.0. + last_epoch (int, optional): The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate. + """ + + def __init__(self, + learning_rate, + step_each_epoch, + epochs, + eta_min=0.0, + warmup_epoch=0, + warmup_start_lr=0.0, + last_epoch=-1, + **kwargs): + super().__init__() + if warmup_epoch >= epochs: + msg = f"When using warm up, the value of \"Global.epochs\" must be greater than value of \"Optimizer.lr.warmup_epoch\". The value of \"Optimizer.lr.warmup_epoch\" has been set to {epochs}." + logger.warning(msg) + warmup_epoch = epochs + self.learning_rate = learning_rate + self.T_max = (epochs - warmup_epoch) * step_each_epoch + self.eta_min = eta_min + self.last_epoch = last_epoch + self.warmup_steps = round(warmup_epoch * step_each_epoch) + self.warmup_start_lr = warmup_start_lr + + def __call__(self): + learning_rate = lr.CosineAnnealingDecay( + learning_rate=self.learning_rate, + T_max=self.T_max, + eta_min=self.eta_min, + last_epoch=self. + last_epoch) if self.T_max > 0 else self.learning_rate + if self.warmup_steps > 0: + learning_rate = lr.LinearWarmup( + learning_rate=learning_rate, + warmup_steps=self.warmup_steps, + start_lr=self.warmup_start_lr, + end_lr=self.learning_rate, + last_epoch=self.last_epoch) + return learning_rate + + +class Step(object): + """ + Piecewise learning rate decay + Args: + step_each_epoch(int): steps each epoch + learning_rate (float): The initial learning rate. It is a python float number. + step_size (int): the interval to update. + gamma (float, optional): The Ratio that the learning rate will be reduced. ``new_lr = origin_lr * gamma`` . + It should be less than 1.0. Default: 0.1. + warmup_epoch(int): The epoch numbers for LinearWarmup. Default: 0. + warmup_start_lr(float): Initial learning rate of warm up. Default: 0.0. + last_epoch (int, optional): The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate. + """ + + def __init__(self, + learning_rate, + step_size, + step_each_epoch, + epochs, + gamma, + warmup_epoch=0, + warmup_start_lr=0.0, + last_epoch=-1, + **kwargs): + super().__init__() + if warmup_epoch >= epochs: + msg = f"When using warm up, the value of \"Global.epochs\" must be greater than value of \"Optimizer.lr.warmup_epoch\". The value of \"Optimizer.lr.warmup_epoch\" has been set to {epochs}." + logger.warning(msg) + warmup_epoch = epochs + self.step_size = step_each_epoch * step_size + self.learning_rate = learning_rate + self.gamma = gamma + self.last_epoch = last_epoch + self.warmup_steps = round(warmup_epoch * step_each_epoch) + self.warmup_start_lr = warmup_start_lr + + def __call__(self): + learning_rate = lr.StepDecay( + learning_rate=self.learning_rate, + step_size=self.step_size, + gamma=self.gamma, + last_epoch=self.last_epoch) + if self.warmup_steps > 0: + learning_rate = lr.LinearWarmup( + learning_rate=learning_rate, + warmup_steps=self.warmup_steps, + start_lr=self.warmup_start_lr, + end_lr=self.learning_rate, + last_epoch=self.last_epoch) + return learning_rate + + +class Piecewise(object): + """ + Piecewise learning rate decay + Args: + boundaries(list): A list of steps numbers. The type of element in the list is python int. + values(list): A list of learning rate values that will be picked during different epoch boundaries. + The type of element in the list is python float. + warmup_epoch(int): The epoch numbers for LinearWarmup. Default: 0. + warmup_start_lr(float): Initial learning rate of warm up. Default: 0.0. + last_epoch (int, optional): The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate. + """ + + def __init__(self, + step_each_epoch, + decay_epochs, + values, + epochs, + warmup_epoch=0, + warmup_start_lr=0.0, + last_epoch=-1, + **kwargs): + super().__init__() + if warmup_epoch >= epochs: + msg = f"When using warm up, the value of \"Global.epochs\" must be greater than value of \"Optimizer.lr.warmup_epoch\". The value of \"Optimizer.lr.warmup_epoch\" has been set to {epochs}." + logger.warning(msg) + warmup_epoch = epochs + self.boundaries = [step_each_epoch * e for e in decay_epochs] + self.values = values + self.last_epoch = last_epoch + self.warmup_steps = round(warmup_epoch * step_each_epoch) + self.warmup_start_lr = warmup_start_lr + + def __call__(self): + learning_rate = lr.PiecewiseDecay( + boundaries=self.boundaries, + values=self.values, + last_epoch=self.last_epoch) + if self.warmup_steps > 0: + learning_rate = lr.LinearWarmup( + learning_rate=learning_rate, + warmup_steps=self.warmup_steps, + start_lr=self.warmup_start_lr, + end_lr=self.values[0], + last_epoch=self.last_epoch) + return learning_rate + + +class MultiStepDecay(LRScheduler): + """ + Update the learning rate by ``gamma`` once ``epoch`` reaches one of the milestones. + The algorithm can be described as the code below. + .. code-block:: text + learning_rate = 0.5 + milestones = [30, 50] + gamma = 0.1 + if epoch < 30: + learning_rate = 0.5 + elif epoch < 50: + learning_rate = 0.05 + else: + learning_rate = 0.005 + Args: + learning_rate (float): The initial learning rate. It is a python float number. + milestones (tuple|list): List or tuple of each boundaries. Must be increasing. + gamma (float, optional): The Ratio that the learning rate will be reduced. ``new_lr = origin_lr * gamma`` . + It should be less than 1.0. Default: 0.1. + last_epoch (int, optional): The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate. + verbose (bool, optional): If ``True``, prints a message to stdout for each update. Default: ``False`` . + + Returns: + ``MultiStepDecay`` instance to schedule learning rate. + Examples: + + .. code-block:: python + import paddle + import numpy as np + # train on default dynamic graph mode + linear = paddle.nn.Linear(10, 10) + scheduler = paddle.optimizer.lr.MultiStepDecay(learning_rate=0.5, milestones=[2, 4, 6], gamma=0.8, verbose=True) + sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters()) + for epoch in range(20): + for batch_id in range(5): + x = paddle.uniform([10, 10]) + out = linear(x) + loss = paddle.mean(out) + loss.backward() + sgd.step() + sgd.clear_gradients() + scheduler.step() # If you update learning rate each step + # scheduler.step() # If you update learning rate each epoch + # train on static graph mode + paddle.enable_static() + main_prog = paddle.static.Program() + start_prog = paddle.static.Program() + with paddle.static.program_guard(main_prog, start_prog): + x = paddle.static.data(name='x', shape=[None, 4, 5]) + y = paddle.static.data(name='y', shape=[None, 4, 5]) + z = paddle.static.nn.fc(x, 100) + loss = paddle.mean(z) + scheduler = paddle.optimizer.lr.MultiStepDecay(learning_rate=0.5, milestones=[2, 4, 6], gamma=0.8, verbose=True) + sgd = paddle.optimizer.SGD(learning_rate=scheduler) + sgd.minimize(loss) + exe = paddle.static.Executor() + exe.run(start_prog) + for epoch in range(20): + for batch_id in range(5): + out = exe.run( + main_prog, + feed={ + 'x': np.random.randn(3, 4, 5).astype('float32'), + 'y': np.random.randn(3, 4, 5).astype('float32') + }, + fetch_list=loss.name) + scheduler.step() # If you update learning rate each step + # scheduler.step() # If you update learning rate each epoch + """ + + def __init__(self, + learning_rate, + milestones, + epochs, + step_each_epoch, + gamma=0.1, + last_epoch=-1, + verbose=False): + if not isinstance(milestones, (tuple, list)): + raise TypeError( + "The type of 'milestones' in 'MultiStepDecay' must be 'tuple, list', but received %s." + % type(milestones)) + if not all([ + milestones[i] < milestones[i + 1] + for i in range(len(milestones) - 1) + ]): + raise ValueError('The elements of milestones must be incremented') + if gamma >= 1.0: + raise ValueError('gamma should be < 1.0.') + self.milestones = [x * step_each_epoch for x in milestones] + self.gamma = gamma + super().__init__(learning_rate, last_epoch, verbose) + + def get_lr(self): + for i in range(len(self.milestones)): + if self.last_epoch < self.milestones[i]: + return self.base_lr * (self.gamma**i) + return self.base_lr * (self.gamma**len(self.milestones)) diff --git a/src/PaddleClas/ppcls/optimizer/optimizer.py b/src/PaddleClas/ppcls/optimizer/optimizer.py new file mode 100644 index 0000000..4422ea7 --- /dev/null +++ b/src/PaddleClas/ppcls/optimizer/optimizer.py @@ -0,0 +1,217 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from paddle import optimizer as optim +import paddle + +from ppcls.utils import logger + + +class Momentum(object): + """ + Simple Momentum optimizer with velocity state. + Args: + learning_rate (float|Variable) - The learning rate used to update parameters. + Can be a float value or a Variable with one float value as data element. + momentum (float) - Momentum factor. + regularization (WeightDecayRegularizer, optional) - The strategy of regularization. + """ + + def __init__(self, + learning_rate, + momentum, + weight_decay=None, + grad_clip=None, + multi_precision=True): + super().__init__() + self.learning_rate = learning_rate + self.momentum = momentum + self.weight_decay = weight_decay + self.grad_clip = grad_clip + self.multi_precision = multi_precision + + def __call__(self, model_list): + # model_list is None in static graph + parameters = sum([m.parameters() for m in model_list], + []) if model_list else None + opt = optim.Momentum( + learning_rate=self.learning_rate, + momentum=self.momentum, + weight_decay=self.weight_decay, + grad_clip=self.grad_clip, + multi_precision=self.multi_precision, + parameters=parameters) + if hasattr(opt, '_use_multi_tensor'): + opt = optim.Momentum( + learning_rate=self.learning_rate, + momentum=self.momentum, + weight_decay=self.weight_decay, + grad_clip=self.grad_clip, + multi_precision=self.multi_precision, + parameters=parameters, + use_multi_tensor=True) + return opt + + +class Adam(object): + def __init__(self, + learning_rate=0.001, + beta1=0.9, + beta2=0.999, + epsilon=1e-08, + parameter_list=None, + weight_decay=None, + grad_clip=None, + name=None, + lazy_mode=False, + multi_precision=False): + self.learning_rate = learning_rate + self.beta1 = beta1 + self.beta2 = beta2 + self.epsilon = epsilon + self.parameter_list = parameter_list + self.learning_rate = learning_rate + self.weight_decay = weight_decay + self.grad_clip = grad_clip + self.name = name + self.lazy_mode = lazy_mode + self.multi_precision = multi_precision + + def __call__(self, model_list): + # model_list is None in static graph + parameters = sum([m.parameters() for m in model_list], + []) if model_list else None + opt = optim.Adam( + learning_rate=self.learning_rate, + beta1=self.beta1, + beta2=self.beta2, + epsilon=self.epsilon, + weight_decay=self.weight_decay, + grad_clip=self.grad_clip, + name=self.name, + lazy_mode=self.lazy_mode, + multi_precision=self.multi_precision, + parameters=parameters) + return opt + + +class RMSProp(object): + """ + Root Mean Squared Propagation (RMSProp) is an unpublished, adaptive learning rate method. + Args: + learning_rate (float|Variable) - The learning rate used to update parameters. + Can be a float value or a Variable with one float value as data element. + momentum (float) - Momentum factor. + rho (float) - rho value in equation. + epsilon (float) - avoid division by zero, default is 1e-6. + regularization (WeightDecayRegularizer, optional) - The strategy of regularization. + """ + + def __init__(self, + learning_rate, + momentum=0.0, + rho=0.95, + epsilon=1e-6, + weight_decay=None, + grad_clip=None, + multi_precision=False): + super().__init__() + self.learning_rate = learning_rate + self.momentum = momentum + self.rho = rho + self.epsilon = epsilon + self.weight_decay = weight_decay + self.grad_clip = grad_clip + + def __call__(self, model_list): + # model_list is None in static graph + parameters = sum([m.parameters() for m in model_list], + []) if model_list else None + opt = optim.RMSProp( + learning_rate=self.learning_rate, + momentum=self.momentum, + rho=self.rho, + epsilon=self.epsilon, + weight_decay=self.weight_decay, + grad_clip=self.grad_clip, + parameters=parameters) + return opt + + +class AdamW(object): + def __init__(self, + learning_rate=0.001, + beta1=0.9, + beta2=0.999, + epsilon=1e-8, + weight_decay=None, + multi_precision=False, + grad_clip=None, + no_weight_decay_name=None, + one_dim_param_no_weight_decay=False, + **args): + super().__init__() + self.learning_rate = learning_rate + self.beta1 = beta1 + self.beta2 = beta2 + self.epsilon = epsilon + self.grad_clip = grad_clip + self.weight_decay = weight_decay + self.multi_precision = multi_precision + self.no_weight_decay_name_list = no_weight_decay_name.split( + ) if no_weight_decay_name else [] + self.one_dim_param_no_weight_decay = one_dim_param_no_weight_decay + + def __call__(self, model_list): + # model_list is None in static graph + parameters = sum([m.parameters() for m in model_list], + []) if model_list else None + + # TODO(gaotingquan): model_list is None when in static graph, "no_weight_decay" not work. + if model_list is None: + if self.one_dim_param_no_weight_decay or len( + self.no_weight_decay_name_list) != 0: + msg = "\"AdamW\" does not support setting \"no_weight_decay\" in static graph. Please use dynamic graph." + logger.error(Exception(msg)) + raise Exception(msg) + + self.no_weight_decay_param_name_list = [ + p.name for model in model_list for n, p in model.named_parameters() + if any(nd in n for nd in self.no_weight_decay_name_list) + ] if model_list else [] + + if self.one_dim_param_no_weight_decay: + self.no_weight_decay_param_name_list += [ + p.name for model in model_list + for n, p in model.named_parameters() if len(p.shape) == 1 + ] if model_list else [] + + opt = optim.AdamW( + learning_rate=self.learning_rate, + beta1=self.beta1, + beta2=self.beta2, + epsilon=self.epsilon, + parameters=parameters, + weight_decay=self.weight_decay, + multi_precision=self.multi_precision, + grad_clip=self.grad_clip, + apply_decay_param_fun=self._apply_decay_param_fun) + return opt + + def _apply_decay_param_fun(self, name): + return name not in self.no_weight_decay_param_name_list diff --git a/src/PaddleClas/ppcls/static/program.py b/src/PaddleClas/ppcls/static/program.py new file mode 100644 index 0000000..b3534a2 --- /dev/null +++ b/src/PaddleClas/ppcls/static/program.py @@ -0,0 +1,449 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os +import time +import numpy as np + +from collections import OrderedDict + +import paddle +import paddle.nn.functional as F + +from paddle.distributed import fleet +from paddle.distributed.fleet import DistributedStrategy + +# from ppcls.optimizer import OptimizerBuilder +# from ppcls.optimizer.learning_rate import LearningRateBuilder + +from ppcls.arch import build_model +from ppcls.loss import build_loss +from ppcls.metric import build_metrics +from ppcls.optimizer import build_optimizer +from ppcls.optimizer import build_lr_scheduler + +from ppcls.utils.misc import AverageMeter +from ppcls.utils import logger, profiler + + +def create_feeds(image_shape, use_mix=False, class_num=None, dtype="float32"): + """ + Create feeds as model input + + Args: + image_shape(list[int]): model input shape, such as [3, 224, 224] + use_mix(bool): whether to use mix(include mixup, cutmix, fmix) + class_num(int): the class number of network, required if use_mix + + Returns: + feeds(dict): dict of model input variables + """ + feeds = OrderedDict() + feeds['data'] = paddle.static.data( + name="data", shape=[None] + image_shape, dtype=dtype) + + if use_mix: + if class_num is None: + msg = "When use MixUp, CutMix and so on, you must set class_num." + logger.error(msg) + raise Exception(msg) + feeds['target'] = paddle.static.data( + name="target", shape=[None, class_num], dtype="float32") + else: + feeds['label'] = paddle.static.data( + name="label", shape=[None, 1], dtype="int64") + + return feeds + + +def create_fetchs(out, + feeds, + architecture, + topk=5, + epsilon=None, + class_num=None, + use_mix=False, + config=None, + mode="Train"): + """ + Create fetchs as model outputs(included loss and measures), + will call create_loss and create_metric(if use_mix). + Args: + out(variable): model output variable + feeds(dict): dict of model input variables. + If use mix_up, it will not include label. + architecture(dict): architecture information, + name(such as ResNet50) is needed + topk(int): usually top5 + epsilon(float): parameter for label smoothing, 0.0 <= epsilon <= 1.0 + class_num(int): the class number of network, required if use_mix + use_mix(bool): whether to use mix(include mixup, cutmix, fmix) + config(dict): model config + + Returns: + fetchs(dict): dict of model outputs(included loss and measures) + """ + fetchs = OrderedDict() + # build loss + if use_mix: + if class_num is None: + msg = "When use MixUp, CutMix and so on, you must set class_num." + logger.error(msg) + raise Exception(msg) + target = paddle.reshape(feeds['target'], [-1, class_num]) + else: + target = paddle.reshape(feeds['label'], [-1, 1]) + + loss_func = build_loss(config["Loss"][mode]) + loss_dict = loss_func(out, target) + + loss_out = loss_dict["loss"] + fetchs['loss'] = (loss_out, AverageMeter('loss', '7.4f', need_avg=True)) + + # build metric + if not use_mix: + metric_func = build_metrics(config["Metric"][mode]) + + metric_dict = metric_func(out, target) + + for key in metric_dict: + if mode != "Train" and paddle.distributed.get_world_size() > 1: + paddle.distributed.all_reduce( + metric_dict[key], op=paddle.distributed.ReduceOp.SUM) + metric_dict[key] = metric_dict[ + key] / paddle.distributed.get_world_size() + + fetchs[key] = (metric_dict[key], AverageMeter( + key, '7.4f', need_avg=True)) + + return fetchs + + +def create_optimizer(config, step_each_epoch): + # create learning_rate instance + optimizer, lr_sch = build_optimizer( + config["Optimizer"], config["Global"]["epochs"], step_each_epoch) + return optimizer, lr_sch + + +def create_strategy(config): + """ + Create build strategy and exec strategy. + + Args: + config(dict): config + + Returns: + build_strategy: build strategy + exec_strategy: exec strategy + """ + build_strategy = paddle.static.BuildStrategy() + exec_strategy = paddle.static.ExecutionStrategy() + + exec_strategy.num_threads = 1 + exec_strategy.num_iteration_per_drop_scope = ( + 10000 + if 'AMP' in config and config.AMP.get("level", "O1") == "O2" else 10) + + fuse_op = True if 'AMP' in config else False + + fuse_bn_act_ops = config.get('fuse_bn_act_ops', fuse_op) + fuse_elewise_add_act_ops = config.get('fuse_elewise_add_act_ops', fuse_op) + fuse_bn_add_act_ops = config.get('fuse_bn_add_act_ops', fuse_op) + enable_addto = config.get('enable_addto', fuse_op) + + build_strategy.fuse_bn_act_ops = fuse_bn_act_ops + build_strategy.fuse_elewise_add_act_ops = fuse_elewise_add_act_ops + build_strategy.fuse_bn_add_act_ops = fuse_bn_add_act_ops + build_strategy.enable_addto = enable_addto + + return build_strategy, exec_strategy + + +def dist_optimizer(config, optimizer): + """ + Create a distributed optimizer based on a normal optimizer + + Args: + config(dict): + optimizer(): a normal optimizer + + Returns: + optimizer: a distributed optimizer + """ + build_strategy, exec_strategy = create_strategy(config) + + dist_strategy = DistributedStrategy() + dist_strategy.execution_strategy = exec_strategy + dist_strategy.build_strategy = build_strategy + + dist_strategy.nccl_comm_num = 1 + dist_strategy.fuse_all_reduce_ops = True + dist_strategy.fuse_grad_size_in_MB = 16 + optimizer = fleet.distributed_optimizer(optimizer, strategy=dist_strategy) + + return optimizer + + +def mixed_precision_optimizer(config, optimizer): + if 'AMP' in config: + amp_cfg = config.AMP if config.AMP else dict() + scale_loss = amp_cfg.get('scale_loss', 1.0) + use_dynamic_loss_scaling = amp_cfg.get('use_dynamic_loss_scaling', + False) + use_pure_fp16 = amp_cfg.get("level", "O1") == "O2" + optimizer = paddle.static.amp.decorate( + optimizer, + init_loss_scaling=scale_loss, + use_dynamic_loss_scaling=use_dynamic_loss_scaling, + use_pure_fp16=use_pure_fp16, + use_fp16_guard=True) + + return optimizer + + +def build(config, + main_prog, + startup_prog, + class_num=None, + step_each_epoch=100, + is_train=True, + is_distributed=True): + """ + Build a program using a model and an optimizer + 1. create feeds + 2. create a dataloader + 3. create a model + 4. create fetchs + 5. create an optimizer + + Args: + config(dict): config + main_prog(): main program + startup_prog(): startup program + class_num(int): the class number of network, required if use_mix + is_train(bool): train or eval + is_distributed(bool): whether to use distributed training method + + Returns: + dataloader(): a bridge between the model and the data + fetchs(dict): dict of model outputs(included loss and measures) + """ + with paddle.static.program_guard(main_prog, startup_prog): + with paddle.utils.unique_name.guard(): + mode = "Train" if is_train else "Eval" + use_mix = "batch_transform_ops" in config["DataLoader"][mode][ + "dataset"] + feeds = create_feeds( + config["Global"]["image_shape"], + use_mix, + class_num=class_num, + dtype="float32") + + # build model + # data_format should be assigned in arch-dict + input_image_channel = config["Global"]["image_shape"][ + 0] # default as [3, 224, 224] + model = build_model(config) + out = model(feeds["data"]) + # end of build model + + fetchs = create_fetchs( + out, + feeds, + config["Arch"], + epsilon=config.get('ls_epsilon'), + class_num=class_num, + use_mix=use_mix, + config=config, + mode=mode) + lr_scheduler = None + optimizer = None + if is_train: + optimizer, lr_scheduler = build_optimizer( + config["Optimizer"], config["Global"]["epochs"], + step_each_epoch) + optimizer = mixed_precision_optimizer(config, optimizer) + if is_distributed: + optimizer = dist_optimizer(config, optimizer) + optimizer.minimize(fetchs['loss'][0]) + return fetchs, lr_scheduler, feeds, optimizer + + +def compile(config, program, loss_name=None, share_prog=None): + """ + Compile the program + + Args: + config(dict): config + program(): the program which is wrapped by + loss_name(str): loss name + share_prog(): the shared program, used for evaluation during training + + Returns: + compiled_program(): a compiled program + """ + build_strategy, exec_strategy = create_strategy(config) + + compiled_program = paddle.static.CompiledProgram( + program).with_data_parallel( + share_vars_from=share_prog, + loss_name=loss_name, + build_strategy=build_strategy, + exec_strategy=exec_strategy) + + return compiled_program + + +total_step = 0 + + +def run(dataloader, + exe, + program, + feeds, + fetchs, + epoch=0, + mode='train', + config=None, + vdl_writer=None, + lr_scheduler=None, + profiler_options=None): + """ + Feed data to the model and fetch the measures and loss + + Args: + dataloader(paddle io dataloader): + exe(): + program(): + fetchs(dict): dict of measures and the loss + epoch(int): epoch of training or evaluation + model(str): log only + + Returns: + """ + fetch_list = [f[0] for f in fetchs.values()] + metric_dict = OrderedDict([("lr", AverageMeter( + 'lr', 'f', postfix=",", need_avg=False))]) + + for k in fetchs: + metric_dict[k] = fetchs[k][1] + + metric_dict["batch_time"] = AverageMeter( + 'batch_cost', '.5f', postfix=" s,") + metric_dict["reader_time"] = AverageMeter( + 'reader_cost', '.5f', postfix=" s,") + + for m in metric_dict.values(): + m.reset() + + use_dali = config["Global"].get('use_dali', False) + tic = time.time() + + if not use_dali: + dataloader = dataloader() + + idx = 0 + batch_size = None + while True: + # The DALI maybe raise RuntimeError for some particular images, such as ImageNet1k/n04418357_26036.JPEG + try: + batch = next(dataloader) + except StopIteration: + break + except RuntimeError: + logger.warning( + "Except RuntimeError when reading data from dataloader, try to read once again..." + ) + continue + idx += 1 + # ignore the warmup iters + if idx == 5: + metric_dict["batch_time"].reset() + metric_dict["reader_time"].reset() + + metric_dict['reader_time'].update(time.time() - tic) + + profiler.add_profiler_step(profiler_options) + + if use_dali: + batch_size = batch[0]["data"].shape()[0] + feed_dict = batch[0] + else: + batch_size = batch[0].shape()[0] + feed_dict = { + key.name: batch[idx] + for idx, key in enumerate(feeds.values()) + } + + metrics = exe.run(program=program, + feed=feed_dict, + fetch_list=fetch_list) + + for name, m in zip(fetchs.keys(), metrics): + metric_dict[name].update(np.mean(m), batch_size) + metric_dict["batch_time"].update(time.time() - tic) + if mode == "train": + metric_dict['lr'].update(lr_scheduler.get_lr()) + + fetchs_str = ' '.join([ + str(metric_dict[key].mean) + if "time" in key else str(metric_dict[key].value) + for key in metric_dict + ]) + ips_info = " ips: {:.5f} images/sec.".format( + batch_size / metric_dict["batch_time"].avg) + fetchs_str += ips_info + + if lr_scheduler is not None: + lr_scheduler.step() + + if vdl_writer: + global total_step + logger.scaler('loss', metrics[0][0], total_step, vdl_writer) + total_step += 1 + if mode == 'eval': + if idx % config.get('print_interval', 10) == 0: + logger.info("{:s} step:{:<4d} {:s}".format(mode, idx, + fetchs_str)) + else: + epoch_str = "epoch:{:<3d}".format(epoch) + step_str = "{:s} step:{:<4d}".format(mode, idx) + + if idx % config.get('print_interval', 10) == 0: + logger.info("{:s} {:s} {:s}".format(epoch_str, step_str, + fetchs_str)) + + tic = time.time() + + end_str = ' '.join([str(m.mean) for m in metric_dict.values()] + + [metric_dict["batch_time"].total]) + ips_info = "ips: {:.5f} images/sec.".format(batch_size / + metric_dict["batch_time"].avg) + if mode == 'eval': + logger.info("END {:s} {:s} {:s}".format(mode, end_str, ips_info)) + else: + end_epoch_str = "END epoch:{:<3d}".format(epoch) + logger.info("{:s} {:s} {:s} {:s}".format(end_epoch_str, mode, end_str, + ips_info)) + if use_dali: + dataloader.reset() + + # return top1_acc in order to save the best model + if mode == 'eval': + return fetchs["top1"][1].avg diff --git a/src/PaddleClas/ppcls/static/run_dali.sh b/src/PaddleClas/ppcls/static/run_dali.sh new file mode 100644 index 0000000..5bf0ef4 --- /dev/null +++ b/src/PaddleClas/ppcls/static/run_dali.sh @@ -0,0 +1,8 @@ +#!/usr/bin/env bash + +export CUDA_VISIBLE_DEVICES="0,1,2,3" + +python3.7 -m paddle.distributed.launch \ + --gpus="0,1,2,3" \ + ppcls/static/train.py \ + -c ./ppcls/configs/ImageNet/ResNet/ResNet50_amp_O1.yaml diff --git a/src/PaddleClas/ppcls/static/save_load.py b/src/PaddleClas/ppcls/static/save_load.py new file mode 100644 index 0000000..13badfd --- /dev/null +++ b/src/PaddleClas/ppcls/static/save_load.py @@ -0,0 +1,139 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import errno +import os +import re +import shutil +import tempfile + +import paddle + +from ppcls.utils import logger + +__all__ = ['init_model', 'save_model'] + + +def _mkdir_if_not_exist(path): + """ + mkdir if not exists, ignore the exception when multiprocess mkdir together + """ + if not os.path.exists(path): + try: + os.makedirs(path) + except OSError as e: + if e.errno == errno.EEXIST and os.path.isdir(path): + logger.warning( + 'be happy if some process has already created {}'.format( + path)) + else: + raise OSError('Failed to mkdir {}'.format(path)) + + +def _load_state(path): + if os.path.exists(path + '.pdopt'): + # XXX another hack to ignore the optimizer state + tmp = tempfile.mkdtemp() + dst = os.path.join(tmp, os.path.basename(os.path.normpath(path))) + shutil.copy(path + '.pdparams', dst + '.pdparams') + state = paddle.static.load_program_state(dst) + shutil.rmtree(tmp) + else: + state = paddle.static.load_program_state(path) + return state + + +def load_params(exe, prog, path, ignore_params=None): + """ + Load model from the given path. + Args: + exe (fluid.Executor): The fluid.Executor object. + prog (fluid.Program): load weight to which Program object. + path (string): URL string or loca model path. + ignore_params (list): ignore variable to load when finetuning. + It can be specified by finetune_exclude_pretrained_params + and the usage can refer to the document + docs/advanced_tutorials/TRANSFER_LEARNING.md + """ + if not (os.path.isdir(path) or os.path.exists(path + '.pdparams')): + raise ValueError("Model pretrain path {} does not " + "exists.".format(path)) + + logger.info("Loading parameters from {}...".format(path)) + + ignore_set = set() + state = _load_state(path) + + # ignore the parameter which mismatch the shape + # between the model and pretrain weight. + all_var_shape = {} + for block in prog.blocks: + for param in block.all_parameters(): + all_var_shape[param.name] = param.shape + ignore_set.update([ + name for name, shape in all_var_shape.items() + if name in state and shape != state[name].shape + ]) + + if ignore_params: + all_var_names = [var.name for var in prog.list_vars()] + ignore_list = filter( + lambda var: any([re.match(name, var) for name in ignore_params]), + all_var_names) + ignore_set.update(list(ignore_list)) + + if len(ignore_set) > 0: + for k in ignore_set: + if k in state: + logger.warning( + 'variable {} is already excluded automatically'.format(k)) + del state[k] + + paddle.static.set_program_state(prog, state) + + +def init_model(config, program, exe): + """ + load model from checkpoint or pretrained_model + """ + checkpoints = config.get('checkpoints') + if checkpoints: + paddle.static.load(program, checkpoints, exe) + logger.info("Finish initing model from {}".format(checkpoints)) + return + + pretrained_model = config.get('pretrained_model') + if pretrained_model: + if not isinstance(pretrained_model, list): + pretrained_model = [pretrained_model] + for pretrain in pretrained_model: + load_params(exe, program, pretrain) + logger.info("Finish initing model from {}".format(pretrained_model)) + + +def save_model(program, model_path, epoch_id, prefix='ppcls'): + """ + save model to the target path + """ + if paddle.distributed.get_rank() != 0: + return + model_path = os.path.join(model_path, str(epoch_id)) + _mkdir_if_not_exist(model_path) + model_prefix = os.path.join(model_path, prefix) + paddle.static.save(program, model_prefix) + logger.info("Already save model in {}".format(model_path)) diff --git a/src/PaddleClas/ppcls/static/train.py b/src/PaddleClas/ppcls/static/train.py new file mode 100644 index 0000000..9c03598 --- /dev/null +++ b/src/PaddleClas/ppcls/static/train.py @@ -0,0 +1,209 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import argparse +import os +import sys +__dir__ = os.path.dirname(os.path.abspath(__file__)) +sys.path.append(__dir__) +sys.path.append(os.path.abspath(os.path.join(__dir__, '../../'))) + +import paddle +from paddle.distributed import fleet +from visualdl import LogWriter + +from ppcls.data import build_dataloader +from ppcls.utils.config import get_config, print_config +from ppcls.utils import logger +from ppcls.utils.logger import init_logger +from ppcls.static.save_load import init_model, save_model +from ppcls.static import program + + +def parse_args(): + parser = argparse.ArgumentParser("PaddleClas train script") + parser.add_argument( + '-c', + '--config', + type=str, + default='configs/ResNet/ResNet50.yaml', + help='config file path') + parser.add_argument( + '-p', + '--profiler_options', + type=str, + default=None, + help='The option of profiler, which should be in format \"key1=value1;key2=value2;key3=value3\".' + ) + parser.add_argument( + '-o', + '--override', + action='append', + default=[], + help='config options to be overridden') + args = parser.parse_args() + return args + + +def main(args): + """ + all the config of training paradigm should be in config["Global"] + """ + config = get_config(args.config, overrides=args.override, show=False) + global_config = config["Global"] + + mode = "train" + + log_file = os.path.join(global_config['output_dir'], + config["Arch"]["name"], f"{mode}.log") + init_logger(name='root', log_file=log_file) + print_config(config) + + if global_config.get("is_distributed", True): + fleet.init(is_collective=True) + # assign the device + use_gpu = global_config.get("use_gpu", True) + # amp related config + if 'AMP' in config: + AMP_RELATED_FLAGS_SETTING = { + 'FLAGS_cudnn_exhaustive_search': 1, + 'FLAGS_conv_workspace_size_limit': 1500, + 'FLAGS_cudnn_batchnorm_spatial_persistent': 1, + 'FLAGS_max_inplace_grad_add': 8, + } + os.environ['FLAGS_cudnn_batchnorm_spatial_persistent'] = '1' + paddle.fluid.set_flags(AMP_RELATED_FLAGS_SETTING) + + use_xpu = global_config.get("use_xpu", False) + use_npu = global_config.get("use_npu", False) + assert ( + use_gpu and use_xpu and use_npu + ) is not True, "gpu, xpu and npu can not be true in the same time in static mode!" + + if use_gpu: + device = paddle.set_device('gpu') + elif use_xpu: + device = paddle.set_device('xpu') + elif use_npu: + device = paddle.set_device('npu') + else: + device = paddle.set_device('cpu') + + # visualDL + vdl_writer = None + if global_config["use_visualdl"]: + vdl_dir = os.path.join(global_config["output_dir"], "vdl") + vdl_writer = LogWriter(vdl_dir) + + # build dataloader + eval_dataloader = None + use_dali = global_config.get('use_dali', False) + + class_num = config["Arch"].get("class_num", None) + config["DataLoader"].update({"class_num": class_num}) + train_dataloader = build_dataloader( + config["DataLoader"], "Train", device=device, use_dali=use_dali) + if global_config["eval_during_train"]: + eval_dataloader = build_dataloader( + config["DataLoader"], "Eval", device=device, use_dali=use_dali) + + step_each_epoch = len(train_dataloader) + + # startup_prog is used to do some parameter init work, + # and train prog is used to hold the network + startup_prog = paddle.static.Program() + train_prog = paddle.static.Program() + + best_top1_acc = 0.0 # best top1 acc record + + train_fetchs, lr_scheduler, train_feeds, optimizer = program.build( + config, + train_prog, + startup_prog, + class_num, + step_each_epoch=step_each_epoch, + is_train=True, + is_distributed=global_config.get("is_distributed", True)) + + if global_config["eval_during_train"]: + eval_prog = paddle.static.Program() + eval_fetchs, _, eval_feeds, _ = program.build( + config, + eval_prog, + startup_prog, + is_train=False, + is_distributed=global_config.get("is_distributed", True)) + # clone to prune some content which is irrelevant in eval_prog + eval_prog = eval_prog.clone(for_test=True) + + # create the "Executor" with the statement of which device + exe = paddle.static.Executor(device) + # Parameter initialization + exe.run(startup_prog) + # load pretrained models or checkpoints + init_model(global_config, train_prog, exe) + + if 'AMP' in config and config.AMP.get("level", "O1") == "O2": + optimizer.amp_init( + device, + scope=paddle.static.global_scope(), + test_program=eval_prog + if global_config["eval_during_train"] else None) + + if not global_config.get("is_distributed", True): + compiled_train_prog = program.compile( + config, train_prog, loss_name=train_fetchs["loss"][0].name) + else: + compiled_train_prog = train_prog + + if eval_dataloader is not None: + compiled_eval_prog = program.compile(config, eval_prog) + + for epoch_id in range(global_config["epochs"]): + # 1. train with train dataset + program.run(train_dataloader, exe, compiled_train_prog, train_feeds, + train_fetchs, epoch_id, 'train', config, vdl_writer, + lr_scheduler, args.profiler_options) + # 2. evaate with eval dataset + if global_config["eval_during_train"] and epoch_id % global_config[ + "eval_interval"] == 0: + top1_acc = program.run(eval_dataloader, exe, compiled_eval_prog, + eval_feeds, eval_fetchs, epoch_id, "eval", + config) + if top1_acc > best_top1_acc: + best_top1_acc = top1_acc + message = "The best top1 acc {:.5f}, in epoch: {:d}".format( + best_top1_acc, epoch_id) + logger.info(message) + if epoch_id % global_config["save_interval"] == 0: + + model_path = os.path.join(global_config["output_dir"], + config["Arch"]["name"]) + save_model(train_prog, model_path, "best_model") + + # 3. save the persistable model + if epoch_id % global_config["save_interval"] == 0: + model_path = os.path.join(global_config["output_dir"], + config["Arch"]["name"]) + save_model(train_prog, model_path, epoch_id) + + +if __name__ == '__main__': + paddle.enable_static() + args = parse_args() + main(args) diff --git a/src/PaddleClas/ppcls/utils/__init__.py b/src/PaddleClas/ppcls/utils/__init__.py new file mode 100644 index 0000000..632cc78 --- /dev/null +++ b/src/PaddleClas/ppcls/utils/__init__.py @@ -0,0 +1,27 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from . import logger +from . import misc +from . import model_zoo +from . import metrics + +from .save_load import init_model, save_model +from .config import get_config +from .misc import AverageMeter +from .metrics import multi_hot_encode +from .metrics import hamming_distance +from .metrics import accuracy_score +from .metrics import precision_recall_fscore +from .metrics import mean_average_precision diff --git a/src/PaddleClas/ppcls/utils/__pycache__/__init__.cpython-39.pyc b/src/PaddleClas/ppcls/utils/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000..742ad34 Binary files /dev/null and b/src/PaddleClas/ppcls/utils/__pycache__/__init__.cpython-39.pyc differ diff --git a/src/PaddleClas/ppcls/utils/__pycache__/check.cpython-39.pyc b/src/PaddleClas/ppcls/utils/__pycache__/check.cpython-39.pyc new file mode 100644 index 0000000..5537a77 Binary files /dev/null and b/src/PaddleClas/ppcls/utils/__pycache__/check.cpython-39.pyc differ diff --git a/src/PaddleClas/ppcls/utils/__pycache__/config.cpython-39.pyc b/src/PaddleClas/ppcls/utils/__pycache__/config.cpython-39.pyc new file mode 100644 index 0000000..bb62e36 Binary files /dev/null and b/src/PaddleClas/ppcls/utils/__pycache__/config.cpython-39.pyc differ diff --git a/src/PaddleClas/ppcls/utils/__pycache__/download.cpython-39.pyc b/src/PaddleClas/ppcls/utils/__pycache__/download.cpython-39.pyc new file mode 100644 index 0000000..7aafa18 Binary files /dev/null and b/src/PaddleClas/ppcls/utils/__pycache__/download.cpython-39.pyc differ diff --git a/src/PaddleClas/ppcls/utils/__pycache__/logger.cpython-39.pyc b/src/PaddleClas/ppcls/utils/__pycache__/logger.cpython-39.pyc new file mode 100644 index 0000000..1a80880 Binary files /dev/null and b/src/PaddleClas/ppcls/utils/__pycache__/logger.cpython-39.pyc differ diff --git a/src/PaddleClas/ppcls/utils/__pycache__/metrics.cpython-39.pyc b/src/PaddleClas/ppcls/utils/__pycache__/metrics.cpython-39.pyc new file mode 100644 index 0000000..d0d59db Binary files /dev/null and b/src/PaddleClas/ppcls/utils/__pycache__/metrics.cpython-39.pyc differ diff --git a/src/PaddleClas/ppcls/utils/__pycache__/misc.cpython-39.pyc b/src/PaddleClas/ppcls/utils/__pycache__/misc.cpython-39.pyc new file mode 100644 index 0000000..bc2f9ff Binary files /dev/null and b/src/PaddleClas/ppcls/utils/__pycache__/misc.cpython-39.pyc differ diff --git a/src/PaddleClas/ppcls/utils/__pycache__/model_zoo.cpython-39.pyc b/src/PaddleClas/ppcls/utils/__pycache__/model_zoo.cpython-39.pyc new file mode 100644 index 0000000..52849be Binary files /dev/null and b/src/PaddleClas/ppcls/utils/__pycache__/model_zoo.cpython-39.pyc differ diff --git a/src/PaddleClas/ppcls/utils/__pycache__/profiler.cpython-39.pyc b/src/PaddleClas/ppcls/utils/__pycache__/profiler.cpython-39.pyc new file mode 100644 index 0000000..87676dd Binary files /dev/null and b/src/PaddleClas/ppcls/utils/__pycache__/profiler.cpython-39.pyc differ diff --git a/src/PaddleClas/ppcls/utils/__pycache__/save_load.cpython-39.pyc b/src/PaddleClas/ppcls/utils/__pycache__/save_load.cpython-39.pyc new file mode 100644 index 0000000..621cfc1 Binary files /dev/null and b/src/PaddleClas/ppcls/utils/__pycache__/save_load.cpython-39.pyc differ diff --git a/src/PaddleClas/ppcls/utils/check.py b/src/PaddleClas/ppcls/utils/check.py new file mode 100644 index 0000000..bc70308 --- /dev/null +++ b/src/PaddleClas/ppcls/utils/check.py @@ -0,0 +1,151 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os +import sys + +import paddle +from paddle import is_compiled_with_cuda + +from ppcls.arch import get_architectures +from ppcls.arch import similar_architectures +from ppcls.arch import get_blacklist_model_in_static_mode +from ppcls.utils import logger + + +def check_version(): + """ + Log error and exit when the installed version of paddlepaddle is + not satisfied. + """ + err = "PaddlePaddle version 1.8.0 or higher is required, " \ + "or a suitable develop version is satisfied as well. \n" \ + "Please make sure the version is good with your code." + try: + pass + # paddle.utils.require_version('0.0.0') + except Exception: + logger.error(err) + sys.exit(1) + + +def check_gpu(): + """ + Log error and exit when using paddlepaddle cpu version. + """ + err = "You are using paddlepaddle cpu version! Please try to " \ + "install paddlepaddle-gpu to run model on GPU." + + try: + assert is_compiled_with_cuda() + except AssertionError: + logger.error(err) + sys.exit(1) + + +def check_architecture(architecture): + """ + check architecture and recommend similar architectures + """ + assert isinstance(architecture, dict), \ + ("the type of architecture({}) should be dict". format(architecture)) + assert "name" in architecture, \ + ("name must be in the architecture keys, just contains: {}". format( + architecture.keys())) + + similar_names = similar_architectures(architecture["name"], + get_architectures()) + model_list = ', '.join(similar_names) + err = "Architecture [{}] is not exist! Maybe you want: [{}]" \ + "".format(architecture["name"], model_list) + try: + assert architecture["name"] in similar_names + except AssertionError: + logger.error(err) + sys.exit(1) + + +def check_model_with_running_mode(architecture): + """ + check whether the model is consistent with the operating mode + """ + # some model are not supported in the static mode + blacklist = get_blacklist_model_in_static_mode() + if not paddle.in_dynamic_mode() and architecture["name"] in blacklist: + logger.error("Model: {} is not supported in the staic mode.".format( + architecture["name"])) + sys.exit(1) + return + + +def check_mix(architecture, use_mix=False): + """ + check mix parameter + """ + err = "Cannot use mix processing in GoogLeNet, " \ + "please set use_mix = False." + try: + if architecture["name"] == "GoogLeNet": + assert use_mix is not True + except AssertionError: + logger.error(err) + sys.exit(1) + + +def check_classes_num(classes_num): + """ + check classes_num + """ + err = "classes_num({}) should be a positive integer" \ + "and larger than 1".format(classes_num) + try: + assert isinstance(classes_num, int) + assert classes_num > 1 + except AssertionError: + logger.error(err) + sys.exit(1) + + +def check_data_dir(path): + """ + check cata_dir + """ + err = "Data path is not exist, please given a right path" \ + "".format(path) + try: + assert os.isdir(path) + except AssertionError: + logger.error(err) + sys.exit(1) + + +def check_function_params(config, key): + """ + check specify config + """ + k_config = config.get(key) + assert k_config is not None, \ + ('{} is required in config'.format(key)) + + assert k_config.get('function'), \ + ('function is required {} config'.format(key)) + params = k_config.get('params') + assert params is not None, \ + ('params is required in {} config'.format(key)) + assert isinstance(params, dict), \ + ('the params in {} config should be a dict'.format(key)) diff --git a/src/PaddleClas/ppcls/utils/config.py b/src/PaddleClas/ppcls/utils/config.py new file mode 100644 index 0000000..e3277c4 --- /dev/null +++ b/src/PaddleClas/ppcls/utils/config.py @@ -0,0 +1,210 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import copy +import argparse +import yaml +from ppcls.utils import logger +from ppcls.utils import check +__all__ = ['get_config'] + + +class AttrDict(dict): + def __getattr__(self, key): + return self[key] + + def __setattr__(self, key, value): + if key in self.__dict__: + self.__dict__[key] = value + else: + self[key] = value + + def __deepcopy__(self, content): + return copy.deepcopy(dict(self)) + + +def create_attr_dict(yaml_config): + from ast import literal_eval + for key, value in yaml_config.items(): + if type(value) is dict: + yaml_config[key] = value = AttrDict(value) + if isinstance(value, str): + try: + value = literal_eval(value) + except BaseException: + pass + if isinstance(value, AttrDict): + create_attr_dict(yaml_config[key]) + else: + yaml_config[key] = value + + +def parse_config(cfg_file): + """Load a config file into AttrDict""" + with open(cfg_file, 'r') as fopen: + yaml_config = AttrDict(yaml.load(fopen, Loader=yaml.SafeLoader)) + create_attr_dict(yaml_config) + return yaml_config + + +def print_dict(d, delimiter=0): + """ + Recursively visualize a dict and + indenting acrrording by the relationship of keys. + """ + placeholder = "-" * 60 + for k, v in sorted(d.items()): + if isinstance(v, dict): + logger.info("{}{} : ".format(delimiter * " ", k)) + print_dict(v, delimiter + 4) + elif isinstance(v, list) and len(v) >= 1 and isinstance(v[0], dict): + logger.info("{}{} : ".format(delimiter * " ", k)) + for value in v: + print_dict(value, delimiter + 4) + else: + logger.info("{}{} : {}".format(delimiter * " ", k, v)) + if k.isupper(): + logger.info(placeholder) + + +def print_config(config): + """ + visualize configs + Arguments: + config: configs + """ + logger.advertise() + print_dict(config) + + +def check_config(config): + """ + Check config + """ + check.check_version() + use_gpu = config.get('use_gpu', True) + if use_gpu: + check.check_gpu() + architecture = config.get('ARCHITECTURE') + #check.check_architecture(architecture) + use_mix = config.get('use_mix', False) + check.check_mix(architecture, use_mix) + classes_num = config.get('classes_num') + check.check_classes_num(classes_num) + mode = config.get('mode', 'train') + if mode.lower() == 'train': + check.check_function_params(config, 'LEARNING_RATE') + check.check_function_params(config, 'OPTIMIZER') + + +def override(dl, ks, v): + """ + Recursively replace dict of list + Args: + dl(dict or list): dict or list to be replaced + ks(list): list of keys + v(str): value to be replaced + """ + + def str2num(v): + try: + return eval(v) + except Exception: + return v + + assert isinstance(dl, (list, dict)), ("{} should be a list or a dict") + assert len(ks) > 0, ('lenght of keys should larger than 0') + if isinstance(dl, list): + k = str2num(ks[0]) + if len(ks) == 1: + assert k < len(dl), ('index({}) out of range({})'.format(k, dl)) + dl[k] = str2num(v) + else: + override(dl[k], ks[1:], v) + else: + if len(ks) == 1: + # assert ks[0] in dl, ('{} is not exist in {}'.format(ks[0], dl)) + if not ks[0] in dl: + print('A new filed ({}) detected!'.format(ks[0], dl)) + dl[ks[0]] = str2num(v) + else: + override(dl[ks[0]], ks[1:], v) + + +def override_config(config, options=None): + """ + Recursively override the config + Args: + config(dict): dict to be replaced + options(list): list of pairs(key0.key1.idx.key2=value) + such as: [ + 'topk=2', + 'VALID.transforms.1.ResizeImage.resize_short=300' + ] + Returns: + config(dict): replaced config + """ + if options is not None: + for opt in options: + assert isinstance(opt, str), ( + "option({}) should be a str".format(opt)) + assert "=" in opt, ( + "option({}) should contain a =" + "to distinguish between key and value".format(opt)) + pair = opt.split('=') + assert len(pair) == 2, ("there can be only a = in the option") + key, value = pair + keys = key.split('.') + override(config, keys, value) + return config + + +def get_config(fname, overrides=None, show=False): + """ + Read config from file + """ + assert os.path.exists(fname), ( + 'config file({}) is not exist'.format(fname)) + config = parse_config(fname) + override_config(config, overrides) + if show: + print_config(config) + # check_config(config) + return config + + +def parse_args(): + parser = argparse.ArgumentParser("generic-image-rec train script") + parser.add_argument( + '-c', + '--config', + type=str, + default='configs/config.yaml', + help='config file path') + parser.add_argument( + '-o', + '--override', + action='append', + default=[], + help='config options to be overridden') + parser.add_argument( + '-p', + '--profiler_options', + type=str, + default=None, + help='The option of profiler, which should be in format \"key1=value1;key2=value2;key3=value3\".' + ) + args = parser.parse_args() + return args diff --git a/src/PaddleClas/ppcls/utils/download.py b/src/PaddleClas/ppcls/utils/download.py new file mode 100644 index 0000000..9c45750 --- /dev/null +++ b/src/PaddleClas/ppcls/utils/download.py @@ -0,0 +1,319 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os +import sys +import os.path as osp +import shutil +import requests +import hashlib +import tarfile +import zipfile +import time +from collections import OrderedDict +from tqdm import tqdm + +from ppcls.utils import logger + +__all__ = ['get_weights_path_from_url'] + +WEIGHTS_HOME = osp.expanduser("~/.paddleclas/weights") + +DOWNLOAD_RETRY_LIMIT = 3 + + +def is_url(path): + """ + Whether path is URL. + Args: + path (string): URL string or not. + """ + return path.startswith('http://') or path.startswith('https://') + + +def get_weights_path_from_url(url, md5sum=None): + """Get weights path from WEIGHT_HOME, if not exists, + download it from url. + + Args: + url (str): download url + md5sum (str): md5 sum of download package + + Returns: + str: a local path to save downloaded weights. + + Examples: + .. code-block:: python + + from paddle.utils.download import get_weights_path_from_url + + resnet18_pretrained_weight_url = 'https://paddle-hapi.bj.bcebos.com/models/resnet18.pdparams' + local_weight_path = get_weights_path_from_url(resnet18_pretrained_weight_url) + + """ + path = get_path_from_url(url, WEIGHTS_HOME, md5sum) + return path + + +def _map_path(url, root_dir): + # parse path after download under root_dir + fname = osp.split(url)[-1] + fpath = fname + return osp.join(root_dir, fpath) + + +def _get_unique_endpoints(trainer_endpoints): + # Sorting is to avoid different environmental variables for each card + trainer_endpoints.sort() + ips = set() + unique_endpoints = set() + for endpoint in trainer_endpoints: + ip = endpoint.split(":")[0] + if ip in ips: + continue + ips.add(ip) + unique_endpoints.add(endpoint) + logger.info("unique_endpoints {}".format(unique_endpoints)) + return unique_endpoints + + +def get_path_from_url(url, + root_dir, + md5sum=None, + check_exist=True, + decompress=True): + """ Download from given url to root_dir. + if file or directory specified by url is exists under + root_dir, return the path directly, otherwise download + from url and decompress it, return the path. + + Args: + url (str): download url + root_dir (str): root dir for downloading, it should be + WEIGHTS_HOME or DATASET_HOME + md5sum (str): md5 sum of download package + + Returns: + str: a local path to save downloaded models & weights & datasets. + """ + + from paddle.fluid.dygraph.parallel import ParallelEnv + + assert is_url(url), "downloading from {} not a url".format(url) + # parse path after download to decompress under root_dir + fullpath = _map_path(url, root_dir) + # Mainly used to solve the problem of downloading data from different + # machines in the case of multiple machines. Different ips will download + # data, and the same ip will only download data once. + unique_endpoints = _get_unique_endpoints(ParallelEnv() + .trainer_endpoints[:]) + if osp.exists(fullpath) and check_exist and _md5check(fullpath, md5sum): + logger.info("Found {}".format(fullpath)) + else: + if ParallelEnv().current_endpoint in unique_endpoints: + fullpath = _download(url, root_dir, md5sum) + else: + while not os.path.exists(fullpath): + time.sleep(1) + + if ParallelEnv().current_endpoint in unique_endpoints: + if decompress and (tarfile.is_tarfile(fullpath) or + zipfile.is_zipfile(fullpath)): + fullpath = _decompress(fullpath) + + return fullpath + + +def _download(url, path, md5sum=None): + """ + Download from url, save to path. + + url (str): download url + path (str): download to given path + """ + if not osp.exists(path): + os.makedirs(path) + + fname = osp.split(url)[-1] + fullname = osp.join(path, fname) + retry_cnt = 0 + + while not (osp.exists(fullname) and _md5check(fullname, md5sum)): + if retry_cnt < DOWNLOAD_RETRY_LIMIT: + retry_cnt += 1 + else: + raise RuntimeError("Download from {} failed. " + "Retry limit reached".format(url)) + + logger.info("Downloading {} from {}".format(fname, url)) + + try: + req = requests.get(url, stream=True) + except Exception as e: # requests.exceptions.ConnectionError + logger.info( + "Downloading {} from {} failed {} times with exception {}". + format(fname, url, retry_cnt + 1, str(e))) + time.sleep(1) + continue + + if req.status_code != 200: + raise RuntimeError("Downloading from {} failed with code " + "{}!".format(url, req.status_code)) + + # For protecting download interupted, download to + # tmp_fullname firstly, move tmp_fullname to fullname + # after download finished + tmp_fullname = fullname + "_tmp" + total_size = req.headers.get('content-length') + with open(tmp_fullname, 'wb') as f: + if total_size: + with tqdm(total=(int(total_size) + 1023) // 1024) as pbar: + for chunk in req.iter_content(chunk_size=1024): + f.write(chunk) + pbar.update(1) + else: + for chunk in req.iter_content(chunk_size=1024): + if chunk: + f.write(chunk) + shutil.move(tmp_fullname, fullname) + + return fullname + + +def _md5check(fullname, md5sum=None): + if md5sum is None: + return True + + logger.info("File {} md5 checking...".format(fullname)) + md5 = hashlib.md5() + with open(fullname, 'rb') as f: + for chunk in iter(lambda: f.read(4096), b""): + md5.update(chunk) + calc_md5sum = md5.hexdigest() + + if calc_md5sum != md5sum: + logger.info("File {} md5 check failed, {}(calc) != " + "{}(base)".format(fullname, calc_md5sum, md5sum)) + return False + return True + + +def _decompress(fname): + """ + Decompress for zip and tar file + """ + logger.info("Decompressing {}...".format(fname)) + + # For protecting decompressing interupted, + # decompress to fpath_tmp directory firstly, if decompress + # successed, move decompress files to fpath and delete + # fpath_tmp and remove download compress file. + + if tarfile.is_tarfile(fname): + uncompressed_path = _uncompress_file_tar(fname) + elif zipfile.is_zipfile(fname): + uncompressed_path = _uncompress_file_zip(fname) + else: + raise TypeError("Unsupport compress file type {}".format(fname)) + + return uncompressed_path + + +def _uncompress_file_zip(filepath): + files = zipfile.ZipFile(filepath, 'r') + file_list = files.namelist() + + file_dir = os.path.dirname(filepath) + + if _is_a_single_file(file_list): + rootpath = file_list[0] + uncompressed_path = os.path.join(file_dir, rootpath) + + for item in file_list: + files.extract(item, file_dir) + + elif _is_a_single_dir(file_list): + rootpath = os.path.splitext(file_list[0])[0].split(os.sep)[-1] + uncompressed_path = os.path.join(file_dir, rootpath) + + for item in file_list: + files.extract(item, file_dir) + + else: + rootpath = os.path.splitext(filepath)[0].split(os.sep)[-1] + uncompressed_path = os.path.join(file_dir, rootpath) + if not os.path.exists(uncompressed_path): + os.makedirs(uncompressed_path) + for item in file_list: + files.extract(item, os.path.join(file_dir, rootpath)) + + files.close() + + return uncompressed_path + + +def _uncompress_file_tar(filepath, mode="r:*"): + files = tarfile.open(filepath, mode) + file_list = files.getnames() + + file_dir = os.path.dirname(filepath) + + if _is_a_single_file(file_list): + rootpath = file_list[0] + uncompressed_path = os.path.join(file_dir, rootpath) + for item in file_list: + files.extract(item, file_dir) + elif _is_a_single_dir(file_list): + rootpath = os.path.splitext(file_list[0])[0].split(os.sep)[-1] + uncompressed_path = os.path.join(file_dir, rootpath) + for item in file_list: + files.extract(item, file_dir) + else: + rootpath = os.path.splitext(filepath)[0].split(os.sep)[-1] + uncompressed_path = os.path.join(file_dir, rootpath) + if not os.path.exists(uncompressed_path): + os.makedirs(uncompressed_path) + + for item in file_list: + files.extract(item, os.path.join(file_dir, rootpath)) + + files.close() + + return uncompressed_path + + +def _is_a_single_file(file_list): + if len(file_list) == 1 and file_list[0].find(os.sep) < -1: + return True + return False + + +def _is_a_single_dir(file_list): + new_file_list = [] + for file_path in file_list: + if '/' in file_path: + file_path = file_path.replace('/', os.sep) + elif '\\' in file_path: + file_path = file_path.replace('\\', os.sep) + new_file_list.append(file_path) + + file_name = new_file_list[0].split(os.sep)[0] + for i in range(1, len(new_file_list)): + if file_name != new_file_list[i].split(os.sep)[0]: + return False + return True diff --git a/src/PaddleClas/ppcls/utils/ema.py b/src/PaddleClas/ppcls/utils/ema.py new file mode 100644 index 0000000..b54cdb1 --- /dev/null +++ b/src/PaddleClas/ppcls/utils/ema.py @@ -0,0 +1,63 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import paddle +import numpy as np + + +class ExponentialMovingAverage(): + """ + Exponential Moving Average + Code was heavily based on https://github.com/Wanger-SJTU/SegToolbox.Pytorch/blob/master/lib/utils/ema.py + """ + + def __init__(self, model, decay, thres_steps=True): + self._model = model + self._decay = decay + self._thres_steps = thres_steps + self._shadow = {} + self._backup = {} + + def register(self): + self._update_step = 0 + for name, param in self._model.named_parameters(): + if param.stop_gradient is False: + self._shadow[name] = param.numpy().copy() + + def update(self): + decay = min(self._decay, (1 + self._update_step) / ( + 10 + self._update_step)) if self._thres_steps else self._decay + for name, param in self._model.named_parameters(): + if param.stop_gradient is False: + assert name in self._shadow + new_val = np.array(param.numpy().copy()) + old_val = np.array(self._shadow[name]) + new_average = decay * old_val + (1 - decay) * new_val + self._shadow[name] = new_average + self._update_step += 1 + return decay + + def apply(self): + for name, param in self._model.named_parameters(): + if param.stop_gradient is False: + assert name in self._shadow + self._backup[name] = np.array(param.numpy().copy()) + param.set_value(np.array(self._shadow[name])) + + def restore(self): + for name, param in self._model.named_parameters(): + if param.stop_gradient is False: + assert name in self._backup + param.set_value(self._backup[name]) + self._backup = {} diff --git a/src/PaddleClas/ppcls/utils/feature_maps_visualization/fm_vis.py b/src/PaddleClas/ppcls/utils/feature_maps_visualization/fm_vis.py new file mode 100644 index 0000000..a5368b1 --- /dev/null +++ b/src/PaddleClas/ppcls/utils/feature_maps_visualization/fm_vis.py @@ -0,0 +1,97 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import numpy as np +import cv2 +import utils +import argparse +import os +import sys +__dir__ = os.path.dirname(os.path.abspath(__file__)) +sys.path.append(__dir__) +sys.path.append(os.path.abspath(os.path.join(__dir__, '../../..'))) + +import paddle +from paddle.distributed import ParallelEnv + +from resnet import ResNet50 +from ppcls.utils.save_load import load_dygraph_pretrain + + +def parse_args(): + def str2bool(v): + return v.lower() in ("true", "t", "1") + + parser = argparse.ArgumentParser() + parser.add_argument("-i", "--image_file", required=True, type=str) + parser.add_argument("-c", "--channel_num", type=int) + parser.add_argument("-p", "--pretrained_model", type=str) + parser.add_argument("--show", type=str2bool, default=False) + parser.add_argument("--interpolation", type=int, default=1) + parser.add_argument("--save_path", type=str, default=None) + parser.add_argument("--use_gpu", type=str2bool, default=True) + + return parser.parse_args() + + +def create_operators(interpolation=1): + size = 224 + img_mean = [0.485, 0.456, 0.406] + img_std = [0.229, 0.224, 0.225] + img_scale = 1.0 / 255.0 + + resize_op = utils.ResizeImage( + resize_short=256, interpolation=interpolation) + crop_op = utils.CropImage(size=(size, size)) + normalize_op = utils.NormalizeImage( + scale=img_scale, mean=img_mean, std=img_std) + totensor_op = utils.ToTensor() + + return [resize_op, crop_op, normalize_op, totensor_op] + + +def preprocess(data, ops): + for op in ops: + data = op(data) + return data + + +def main(): + args = parse_args() + operators = create_operators(args.interpolation) + # assign the place + place = 'gpu:{}'.format(ParallelEnv().dev_id) if args.use_gpu else 'cpu' + place = paddle.set_device(place) + + net = ResNet50() + load_dygraph_pretrain(net, args.pretrained_model) + + img = cv2.imread(args.image_file, cv2.IMREAD_COLOR) + data = preprocess(img, operators) + data = np.expand_dims(data, axis=0) + data = paddle.to_tensor(data) + net.eval() + _, fm = net(data) + assert args.channel_num >= 0 and args.channel_num <= fm.shape[ + 1], "the channel is out of the range, should be in {} but got {}".format( + [0, fm.shape[1]], args.channel_num) + + fm = (np.squeeze(fm[0][args.channel_num].numpy()) * 255).astype(np.uint8) + fm = cv2.resize(fm, (img.shape[1], img.shape[0])) + if args.save_path is not None: + print("the feature map is saved in path: {}".format(args.save_path)) + cv2.imwrite(args.save_path, fm) + + +if __name__ == "__main__": + main() diff --git a/src/PaddleClas/ppcls/utils/feature_maps_visualization/resnet.py b/src/PaddleClas/ppcls/utils/feature_maps_visualization/resnet.py new file mode 100644 index 0000000..b758814 --- /dev/null +++ b/src/PaddleClas/ppcls/utils/feature_maps_visualization/resnet.py @@ -0,0 +1,535 @@ +# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import, division, print_function + +import numpy as np +import paddle +from paddle import ParamAttr +import paddle.nn as nn +from paddle.nn import Conv2D, BatchNorm, Linear +from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D +from paddle.nn.initializer import Uniform +import math + +from ppcls.arch.backbone.base.theseus_layer import TheseusLayer +from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url + +MODEL_URLS = { + "ResNet18": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/ResNet18_pretrained.pdparams", + "ResNet18_vd": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/ResNet18_vd_pretrained.pdparams", + "ResNet34": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/ResNet34_pretrained.pdparams", + "ResNet34_vd": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/ResNet34_vd_pretrained.pdparams", + "ResNet50": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/ResNet50_pretrained.pdparams", + "ResNet50_vd": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/ResNet50_vd_pretrained.pdparams", + "ResNet101": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/ResNet101_pretrained.pdparams", + "ResNet101_vd": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/ResNet101_vd_pretrained.pdparams", + "ResNet152": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/ResNet152_pretrained.pdparams", + "ResNet152_vd": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/ResNet152_vd_pretrained.pdparams", + "ResNet200_vd": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/ResNet200_vd_pretrained.pdparams", +} + +__all__ = MODEL_URLS.keys() +''' +ResNet config: dict. + key: depth of ResNet. + values: config's dict of specific model. + keys: + block_type: Two different blocks in ResNet, BasicBlock and BottleneckBlock are optional. + block_depth: The number of blocks in different stages in ResNet. + num_channels: The number of channels to enter the next stage. +''' +NET_CONFIG = { + "18": { + "block_type": "BasicBlock", + "block_depth": [2, 2, 2, 2], + "num_channels": [64, 64, 128, 256] + }, + "34": { + "block_type": "BasicBlock", + "block_depth": [3, 4, 6, 3], + "num_channels": [64, 64, 128, 256] + }, + "50": { + "block_type": "BottleneckBlock", + "block_depth": [3, 4, 6, 3], + "num_channels": [64, 256, 512, 1024] + }, + "101": { + "block_type": "BottleneckBlock", + "block_depth": [3, 4, 23, 3], + "num_channels": [64, 256, 512, 1024] + }, + "152": { + "block_type": "BottleneckBlock", + "block_depth": [3, 8, 36, 3], + "num_channels": [64, 256, 512, 1024] + }, + "200": { + "block_type": "BottleneckBlock", + "block_depth": [3, 12, 48, 3], + "num_channels": [64, 256, 512, 1024] + }, +} + + +class ConvBNLayer(TheseusLayer): + def __init__(self, + num_channels, + num_filters, + filter_size, + stride=1, + groups=1, + is_vd_mode=False, + act=None, + lr_mult=1.0, + data_format="NCHW"): + super().__init__() + self.is_vd_mode = is_vd_mode + self.act = act + self.avg_pool = AvgPool2D( + kernel_size=2, stride=2, padding=0, ceil_mode=True) + self.conv = Conv2D( + in_channels=num_channels, + out_channels=num_filters, + kernel_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + groups=groups, + weight_attr=ParamAttr(learning_rate=lr_mult), + bias_attr=False, + data_format=data_format) + self.bn = BatchNorm( + num_filters, + param_attr=ParamAttr(learning_rate=lr_mult), + bias_attr=ParamAttr(learning_rate=lr_mult), + data_layout=data_format) + self.relu = nn.ReLU() + + def forward(self, x): + if self.is_vd_mode: + x = self.avg_pool(x) + x = self.conv(x) + x = self.bn(x) + if self.act: + x = self.relu(x) + return x + + +class BottleneckBlock(TheseusLayer): + def __init__(self, + num_channels, + num_filters, + stride, + shortcut=True, + if_first=False, + lr_mult=1.0, + data_format="NCHW"): + super().__init__() + + self.conv0 = ConvBNLayer( + num_channels=num_channels, + num_filters=num_filters, + filter_size=1, + act="relu", + lr_mult=lr_mult, + data_format=data_format) + self.conv1 = ConvBNLayer( + num_channels=num_filters, + num_filters=num_filters, + filter_size=3, + stride=stride, + act="relu", + lr_mult=lr_mult, + data_format=data_format) + self.conv2 = ConvBNLayer( + num_channels=num_filters, + num_filters=num_filters * 4, + filter_size=1, + act=None, + lr_mult=lr_mult, + data_format=data_format) + + if not shortcut: + self.short = ConvBNLayer( + num_channels=num_channels, + num_filters=num_filters * 4, + filter_size=1, + stride=stride if if_first else 1, + is_vd_mode=False if if_first else True, + lr_mult=lr_mult, + data_format=data_format) + self.relu = nn.ReLU() + self.shortcut = shortcut + + def forward(self, x): + identity = x + x = self.conv0(x) + x = self.conv1(x) + x = self.conv2(x) + + if self.shortcut: + short = identity + else: + short = self.short(identity) + x = paddle.add(x=x, y=short) + x = self.relu(x) + return x + + +class BasicBlock(TheseusLayer): + def __init__(self, + num_channels, + num_filters, + stride, + shortcut=True, + if_first=False, + lr_mult=1.0, + data_format="NCHW"): + super().__init__() + + self.stride = stride + self.conv0 = ConvBNLayer( + num_channels=num_channels, + num_filters=num_filters, + filter_size=3, + stride=stride, + act="relu", + lr_mult=lr_mult, + data_format=data_format) + self.conv1 = ConvBNLayer( + num_channels=num_filters, + num_filters=num_filters, + filter_size=3, + act=None, + lr_mult=lr_mult, + data_format=data_format) + if not shortcut: + self.short = ConvBNLayer( + num_channels=num_channels, + num_filters=num_filters, + filter_size=1, + stride=stride if if_first else 1, + is_vd_mode=False if if_first else True, + lr_mult=lr_mult, + data_format=data_format) + self.shortcut = shortcut + self.relu = nn.ReLU() + + def forward(self, x): + identity = x + x = self.conv0(x) + x = self.conv1(x) + if self.shortcut: + short = identity + else: + short = self.short(identity) + x = paddle.add(x=x, y=short) + x = self.relu(x) + return x + + +class ResNet(TheseusLayer): + """ + ResNet + Args: + config: dict. config of ResNet. + version: str="vb". Different version of ResNet, version vd can perform better. + class_num: int=1000. The number of classes. + lr_mult_list: list. Control the learning rate of different stages. + Returns: + model: nn.Layer. Specific ResNet model depends on args. + """ + + def __init__(self, + config, + version="vb", + class_num=1000, + lr_mult_list=[1.0, 1.0, 1.0, 1.0, 1.0], + data_format="NCHW", + input_image_channel=3, + return_patterns=None): + super().__init__() + + self.cfg = config + self.lr_mult_list = lr_mult_list + self.is_vd_mode = version == "vd" + self.class_num = class_num + self.num_filters = [64, 128, 256, 512] + self.block_depth = self.cfg["block_depth"] + self.block_type = self.cfg["block_type"] + self.num_channels = self.cfg["num_channels"] + self.channels_mult = 1 if self.num_channels[-1] == 256 else 4 + + assert isinstance(self.lr_mult_list, ( + list, tuple + )), "lr_mult_list should be in (list, tuple) but got {}".format( + type(self.lr_mult_list)) + assert len(self.lr_mult_list + ) == 5, "lr_mult_list length should be 5 but got {}".format( + len(self.lr_mult_list)) + + self.stem_cfg = { + #num_channels, num_filters, filter_size, stride + "vb": [[input_image_channel, 64, 7, 2]], + "vd": + [[input_image_channel, 32, 3, 2], [32, 32, 3, 1], [32, 64, 3, 1]] + } + + self.stem = nn.Sequential(* [ + ConvBNLayer( + num_channels=in_c, + num_filters=out_c, + filter_size=k, + stride=s, + act="relu", + lr_mult=self.lr_mult_list[0], + data_format=data_format) + for in_c, out_c, k, s in self.stem_cfg[version] + ]) + + self.max_pool = MaxPool2D( + kernel_size=3, stride=2, padding=1, data_format=data_format) + block_list = [] + for block_idx in range(len(self.block_depth)): + shortcut = False + for i in range(self.block_depth[block_idx]): + block_list.append(globals()[self.block_type]( + num_channels=self.num_channels[block_idx] if i == 0 else + self.num_filters[block_idx] * self.channels_mult, + num_filters=self.num_filters[block_idx], + stride=2 if i == 0 and block_idx != 0 else 1, + shortcut=shortcut, + if_first=block_idx == i == 0 if version == "vd" else True, + lr_mult=self.lr_mult_list[block_idx + 1], + data_format=data_format)) + shortcut = True + self.blocks = nn.Sequential(*block_list) + + self.avg_pool = AdaptiveAvgPool2D(1, data_format=data_format) + self.flatten = nn.Flatten() + self.avg_pool_channels = self.num_channels[-1] * 2 + stdv = 1.0 / math.sqrt(self.avg_pool_channels * 1.0) + self.fc = Linear( + self.avg_pool_channels, + self.class_num, + weight_attr=ParamAttr(initializer=Uniform(-stdv, stdv))) + + self.data_format = data_format + if return_patterns is not None: + self.update_res(return_patterns) + self.register_forward_post_hook(self._return_dict_hook) + + def forward(self, x): + with paddle.static.amp.fp16_guard(): + if self.data_format == "NHWC": + x = paddle.transpose(x, [0, 2, 3, 1]) + x.stop_gradient = True + x = self.stem(x) + fm = x + x = self.max_pool(x) + x = self.blocks(x) + x = self.avg_pool(x) + x = self.flatten(x) + x = self.fc(x) + return x, fm + + +def _load_pretrained(pretrained, model, model_url, use_ssld): + if pretrained is False: + pass + elif pretrained is True: + load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld) + elif isinstance(pretrained, str): + load_dygraph_pretrain(model, pretrained) + else: + raise RuntimeError( + "pretrained type is not available. Please use `string` or `boolean` type." + ) + + +def ResNet18(pretrained=False, use_ssld=False, **kwargs): + """ + ResNet18 + Args: + pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise. + If str, means the path of the pretrained model. + use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True. + Returns: + model: nn.Layer. Specific `ResNet18` model depends on args. + """ + model = ResNet(config=NET_CONFIG["18"], version="vb", **kwargs) + _load_pretrained(pretrained, model, MODEL_URLS["ResNet18"], use_ssld) + return model + + +def ResNet18_vd(pretrained=False, use_ssld=False, **kwargs): + """ + ResNet18_vd + Args: + pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise. + If str, means the path of the pretrained model. + use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True. + Returns: + model: nn.Layer. Specific `ResNet18_vd` model depends on args. + """ + model = ResNet(config=NET_CONFIG["18"], version="vd", **kwargs) + _load_pretrained(pretrained, model, MODEL_URLS["ResNet18_vd"], use_ssld) + return model + + +def ResNet34(pretrained=False, use_ssld=False, **kwargs): + """ + ResNet34 + Args: + pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise. + If str, means the path of the pretrained model. + use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True. + Returns: + model: nn.Layer. Specific `ResNet34` model depends on args. + """ + model = ResNet(config=NET_CONFIG["34"], version="vb", **kwargs) + _load_pretrained(pretrained, model, MODEL_URLS["ResNet34"], use_ssld) + return model + + +def ResNet34_vd(pretrained=False, use_ssld=False, **kwargs): + """ + ResNet34_vd + Args: + pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise. + If str, means the path of the pretrained model. + use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True. + Returns: + model: nn.Layer. Specific `ResNet34_vd` model depends on args. + """ + model = ResNet(config=NET_CONFIG["34"], version="vd", **kwargs) + _load_pretrained(pretrained, model, MODEL_URLS["ResNet34_vd"], use_ssld) + return model + + +def ResNet50(pretrained=False, use_ssld=False, **kwargs): + """ + ResNet50 + Args: + pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise. + If str, means the path of the pretrained model. + use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True. + Returns: + model: nn.Layer. Specific `ResNet50` model depends on args. + """ + model = ResNet(config=NET_CONFIG["50"], version="vb", **kwargs) + _load_pretrained(pretrained, model, MODEL_URLS["ResNet50"], use_ssld) + return model + + +def ResNet50_vd(pretrained=False, use_ssld=False, **kwargs): + """ + ResNet50_vd + Args: + pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise. + If str, means the path of the pretrained model. + use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True. + Returns: + model: nn.Layer. Specific `ResNet50_vd` model depends on args. + """ + model = ResNet(config=NET_CONFIG["50"], version="vd", **kwargs) + _load_pretrained(pretrained, model, MODEL_URLS["ResNet50_vd"], use_ssld) + return model + + +def ResNet101(pretrained=False, use_ssld=False, **kwargs): + """ + ResNet101 + Args: + pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise. + If str, means the path of the pretrained model. + use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True. + Returns: + model: nn.Layer. Specific `ResNet101` model depends on args. + """ + model = ResNet(config=NET_CONFIG["101"], version="vb", **kwargs) + _load_pretrained(pretrained, model, MODEL_URLS["ResNet101"], use_ssld) + return model + + +def ResNet101_vd(pretrained=False, use_ssld=False, **kwargs): + """ + ResNet101_vd + Args: + pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise. + If str, means the path of the pretrained model. + use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True. + Returns: + model: nn.Layer. Specific `ResNet101_vd` model depends on args. + """ + model = ResNet(config=NET_CONFIG["101"], version="vd", **kwargs) + _load_pretrained(pretrained, model, MODEL_URLS["ResNet101_vd"], use_ssld) + return model + + +def ResNet152(pretrained=False, use_ssld=False, **kwargs): + """ + ResNet152 + Args: + pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise. + If str, means the path of the pretrained model. + use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True. + Returns: + model: nn.Layer. Specific `ResNet152` model depends on args. + """ + model = ResNet(config=NET_CONFIG["152"], version="vb", **kwargs) + _load_pretrained(pretrained, model, MODEL_URLS["ResNet152"], use_ssld) + return model + + +def ResNet152_vd(pretrained=False, use_ssld=False, **kwargs): + """ + ResNet152_vd + Args: + pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise. + If str, means the path of the pretrained model. + use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True. + Returns: + model: nn.Layer. Specific `ResNet152_vd` model depends on args. + """ + model = ResNet(config=NET_CONFIG["152"], version="vd", **kwargs) + _load_pretrained(pretrained, model, MODEL_URLS["ResNet152_vd"], use_ssld) + return model + + +def ResNet200_vd(pretrained=False, use_ssld=False, **kwargs): + """ + ResNet200_vd + Args: + pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise. + If str, means the path of the pretrained model. + use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True. + Returns: + model: nn.Layer. Specific `ResNet200_vd` model depends on args. + """ + model = ResNet(config=NET_CONFIG["200"], version="vd", **kwargs) + _load_pretrained(pretrained, model, MODEL_URLS["ResNet200_vd"], use_ssld) + return model diff --git a/src/PaddleClas/ppcls/utils/feature_maps_visualization/utils.py b/src/PaddleClas/ppcls/utils/feature_maps_visualization/utils.py new file mode 100644 index 0000000..7c70149 --- /dev/null +++ b/src/PaddleClas/ppcls/utils/feature_maps_visualization/utils.py @@ -0,0 +1,85 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import cv2 +import numpy as np + + +class DecodeImage(object): + def __init__(self, to_rgb=True): + self.to_rgb = to_rgb + + def __call__(self, img): + data = np.frombuffer(img, dtype='uint8') + img = cv2.imdecode(data, 1) + if self.to_rgb: + assert img.shape[2] == 3, 'invalid shape of image[%s]' % ( + img.shape) + img = img[:, :, ::-1] + + return img + + +class ResizeImage(object): + def __init__(self, resize_short=None, interpolation=1): + self.resize_short = resize_short + self.interpolation = interpolation + + def __call__(self, img): + img_h, img_w = img.shape[:2] + percent = float(self.resize_short) / min(img_w, img_h) + w = int(round(img_w * percent)) + h = int(round(img_h * percent)) + return cv2.resize(img, (w, h), interpolation=self.interpolation) + + +class CropImage(object): + def __init__(self, size): + if type(size) is int: + self.size = (size, size) + else: + self.size = size + + def __call__(self, img): + w, h = self.size + img_h, img_w = img.shape[:2] + w_start = (img_w - w) // 2 + h_start = (img_h - h) // 2 + + w_end = w_start + w + h_end = h_start + h + return img[h_start:h_end, w_start:w_end, :] + + +class NormalizeImage(object): + def __init__(self, scale=None, mean=None, std=None): + self.scale = np.float32(scale if scale is not None else 1.0 / 255.0) + mean = mean if mean is not None else [0.485, 0.456, 0.406] + std = std if std is not None else [0.229, 0.224, 0.225] + + shape = (1, 1, 3) + self.mean = np.array(mean).reshape(shape).astype('float32') + self.std = np.array(std).reshape(shape).astype('float32') + + def __call__(self, img): + return (img.astype('float32') * self.scale - self.mean) / self.std + + +class ToTensor(object): + def __init__(self): + pass + + def __call__(self, img): + img = img.transpose((2, 0, 1)) + return img diff --git a/src/PaddleClas/ppcls/utils/gallery2fc.py b/src/PaddleClas/ppcls/utils/gallery2fc.py new file mode 100644 index 0000000..67b0852 --- /dev/null +++ b/src/PaddleClas/ppcls/utils/gallery2fc.py @@ -0,0 +1,119 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import paddle +import cv2 + +from ppcls.arch import build_model +from ppcls.utils.config import parse_config, parse_args +from ppcls.utils.save_load import load_dygraph_pretrain +from ppcls.utils.logger import init_logger +from ppcls.data import create_operators +from ppcls.arch.slim import quantize_model + + +class GalleryLayer(paddle.nn.Layer): + def __init__(self, configs): + super().__init__() + self.configs = configs + embedding_size = self.configs["Arch"]["Head"]["embedding_size"] + self.batch_size = self.configs["IndexProcess"]["batch_size"] + self.image_shape = self.configs["Global"]["image_shape"].copy() + self.image_shape.insert(0, self.batch_size) + + image_root = self.configs["IndexProcess"]["image_root"] + data_file = self.configs["IndexProcess"]["data_file"] + delimiter = self.configs["IndexProcess"]["delimiter"] + self.gallery_images = [] + gallery_docs = [] + gallery_labels = [] + + with open(data_file, 'r', encoding='utf-8') as f: + lines = f.readlines() + for ori_line in lines: + line = ori_line.strip().split(delimiter) + text_num = len(line) + assert text_num >= 2, f"line({ori_line}) must be splitted into at least 2 parts, but got {text_num}" + image_file = os.path.join(image_root, line[0]) + + self.gallery_images.append(image_file) + gallery_docs.append(ori_line.strip()) + gallery_labels.append(line[1].strip()) + self.gallery_layer = paddle.nn.Linear(embedding_size, len(self.gallery_images), bias_attr=False) + self.gallery_layer.skip_quant = True + output_label_str = "" + for i, label_i in enumerate(gallery_labels): + output_label_str += "{} {}\n".format(i, label_i) + output_path = configs["Global"]["save_inference_dir"] + "_label.txt" + with open(output_path, "w") as f: + f.write(output_label_str) + + def forward(self, x, label=None): + x = paddle.nn.functional.normalize(x) + x = self.gallery_layer(x) + return x + + def build_gallery_layer(self, feature_extractor): + transform_configs = self.configs["IndexProcess"]["transform_ops"] + preprocess_ops = create_operators(transform_configs) + embedding_size = self.configs["Arch"]["Head"]["embedding_size"] + batch_index = 0 + input_tensor = paddle.zeros(self.image_shape) + gallery_feature = paddle.zeros((len(self.gallery_images), embedding_size)) + for i, image_path in enumerate(self.gallery_images): + image = cv2.imread(image_path)[:, :, ::-1] + for op in preprocess_ops: + image = op(image) + input_tensor[batch_index] = image + batch_index += 1 + if batch_index == self.batch_size or i == len(self.gallery_images) - 1: + batch_feature = feature_extractor(input_tensor)["features"] + for j in range(batch_index): + feature = batch_feature[j] + norm_feature = paddle.nn.functional.normalize(feature, axis=0) + gallery_feature[i - batch_index + j + 1] = norm_feature + self.gallery_layer.set_state_dict({"_layer.weight": gallery_feature.T}) + + +def export_fuse_model(configs): + slim_config = configs["Slim"].copy() + configs["Slim"] = None + fuse_model = build_model(configs) + fuse_model.head = GalleryLayer(configs) + configs["Slim"] = slim_config + quantize_model(configs, fuse_model) + load_dygraph_pretrain(fuse_model, configs["Global"]["pretrained_model"]) + fuse_model.eval() + fuse_model.head.build_gallery_layer(fuse_model) + save_path = configs["Global"]["save_inference_dir"] + fuse_model.quanter.save_quantized_model( + fuse_model, + save_path, + input_spec=[ + paddle.static.InputSpec( + shape=[None] + configs["Global"]["image_shape"], + dtype='float32') + ]) + + +def main(): + args = parse_args() + configs = parse_config(args.config) + init_logger(name='gallery2fc') + export_fuse_model(configs) + + +if __name__ == '__main__': + main() diff --git a/src/PaddleClas/ppcls/utils/imagenet1k_label_list.txt b/src/PaddleClas/ppcls/utils/imagenet1k_label_list.txt new file mode 100644 index 0000000..376e180 --- /dev/null +++ b/src/PaddleClas/ppcls/utils/imagenet1k_label_list.txt @@ -0,0 +1,1000 @@ +0 tench, Tinca tinca +1 goldfish, Carassius auratus +2 great white shark, white shark, man-eater, man-eating shark, Carcharodon carcharias +3 tiger shark, Galeocerdo cuvieri +4 hammerhead, hammerhead shark +5 electric ray, crampfish, numbfish, torpedo +6 stingray +7 cock +8 hen +9 ostrich, Struthio camelus +10 brambling, Fringilla montifringilla +11 goldfinch, Carduelis carduelis +12 house finch, linnet, Carpodacus mexicanus +13 junco, snowbird +14 indigo bunting, indigo finch, indigo bird, Passerina cyanea +15 robin, American robin, Turdus migratorius +16 bulbul +17 jay +18 magpie +19 chickadee +20 water ouzel, dipper +21 kite +22 bald eagle, American eagle, Haliaeetus leucocephalus +23 vulture +24 great grey owl, great gray owl, Strix nebulosa +25 European fire salamander, Salamandra salamandra +26 common newt, Triturus vulgaris +27 eft +28 spotted salamander, Ambystoma maculatum +29 axolotl, mud puppy, Ambystoma mexicanum +30 bullfrog, Rana catesbeiana +31 tree frog, tree-frog +32 tailed frog, bell toad, ribbed toad, tailed toad, Ascaphus trui +33 loggerhead, loggerhead turtle, Caretta caretta +34 leatherback turtle, leatherback, leathery turtle, Dermochelys coriacea +35 mud turtle +36 terrapin +37 box turtle, box tortoise +38 banded gecko +39 common iguana, iguana, Iguana iguana +40 American chameleon, anole, Anolis carolinensis +41 whiptail, whiptail lizard +42 agama +43 frilled lizard, Chlamydosaurus kingi +44 alligator lizard +45 Gila monster, Heloderma suspectum +46 green lizard, Lacerta viridis +47 African chameleon, Chamaeleo chamaeleon +48 Komodo dragon, Komodo lizard, dragon lizard, giant lizard, Varanus komodoensis +49 African crocodile, Nile crocodile, Crocodylus niloticus +50 American alligator, Alligator mississipiensis +51 triceratops +52 thunder snake, worm snake, Carphophis amoenus +53 ringneck snake, ring-necked snake, ring snake +54 hognose snake, puff adder, sand viper +55 green snake, grass snake +56 king snake, kingsnake +57 garter snake, grass snake +58 water snake +59 vine snake +60 night snake, Hypsiglena torquata +61 boa constrictor, Constrictor constrictor +62 rock python, rock snake, Python sebae +63 Indian cobra, Naja naja +64 green mamba +65 sea snake +66 horned viper, cerastes, sand viper, horned asp, Cerastes cornutus +67 diamondback, diamondback rattlesnake, Crotalus adamanteus +68 sidewinder, horned rattlesnake, Crotalus cerastes +69 trilobite +70 harvestman, daddy longlegs, Phalangium opilio +71 scorpion +72 black and gold garden spider, Argiope aurantia +73 barn spider, Araneus cavaticus +74 garden spider, Aranea diademata +75 black widow, Latrodectus mactans +76 tarantula +77 wolf spider, hunting spider +78 tick +79 centipede +80 black grouse +81 ptarmigan +82 ruffed grouse, partridge, Bonasa umbellus +83 prairie chicken, prairie grouse, prairie fowl +84 peacock +85 quail +86 partridge +87 African grey, African gray, Psittacus erithacus +88 macaw +89 sulphur-crested cockatoo, Kakatoe galerita, Cacatua galerita +90 lorikeet +91 coucal +92 bee eater +93 hornbill +94 hummingbird +95 jacamar +96 toucan +97 drake +98 red-breasted merganser, Mergus serrator +99 goose +100 black swan, Cygnus atratus +101 tusker +102 echidna, spiny anteater, anteater +103 platypus, duckbill, duckbilled platypus, duck-billed platypus, Ornithorhynchus anatinus +104 wallaby, brush kangaroo +105 koala, koala bear, kangaroo bear, native bear, Phascolarctos cinereus +106 wombat +107 jellyfish +108 sea anemone, anemone +109 brain coral +110 flatworm, platyhelminth +111 nematode, nematode worm, roundworm +112 conch +113 snail +114 slug +115 sea slug, nudibranch +116 chiton, coat-of-mail shell, sea cradle, polyplacophore +117 chambered nautilus, pearly nautilus, nautilus +118 Dungeness crab, Cancer magister +119 rock crab, Cancer irroratus +120 fiddler crab +121 king crab, Alaska crab, Alaskan king crab, Alaska king crab, Paralithodes camtschatica +122 American lobster, Northern lobster, Maine lobster, Homarus americanus +123 spiny lobster, langouste, rock lobster, crawfish, crayfish, sea crawfish +124 crayfish, crawfish, crawdad, crawdaddy +125 hermit crab +126 isopod +127 white stork, Ciconia ciconia +128 black stork, Ciconia nigra +129 spoonbill +130 flamingo +131 little blue heron, Egretta caerulea +132 American egret, great white heron, Egretta albus +133 bittern +134 crane +135 limpkin, Aramus pictus +136 European gallinule, Porphyrio porphyrio +137 American coot, marsh hen, mud hen, water hen, Fulica americana +138 bustard +139 ruddy turnstone, Arenaria interpres +140 red-backed sandpiper, dunlin, Erolia alpina +141 redshank, Tringa totanus +142 dowitcher +143 oystercatcher, oyster catcher +144 pelican +145 king penguin, Aptenodytes patagonica +146 albatross, mollymawk +147 grey whale, gray whale, devilfish, Eschrichtius gibbosus, Eschrichtius robustus +148 killer whale, killer, orca, grampus, sea wolf, Orcinus orca +149 dugong, Dugong dugon +150 sea lion +151 Chihuahua +152 Japanese spaniel +153 Maltese dog, Maltese terrier, Maltese +154 Pekinese, Pekingese, Peke +155 Shih-Tzu +156 Blenheim spaniel +157 papillon +158 toy terrier +159 Rhodesian ridgeback +160 Afghan hound, Afghan +161 basset, basset hound +162 beagle +163 bloodhound, sleuthhound +164 bluetick +165 black-and-tan coonhound +166 Walker hound, Walker foxhound +167 English foxhound +168 redbone +169 borzoi, Russian wolfhound +170 Irish wolfhound +171 Italian greyhound +172 whippet +173 Ibizan hound, Ibizan Podenco +174 Norwegian elkhound, elkhound +175 otterhound, otter hound +176 Saluki, gazelle hound +177 Scottish deerhound, deerhound +178 Weimaraner +179 Staffordshire bullterrier, Staffordshire bull terrier +180 American Staffordshire terrier, Staffordshire terrier, American pit bull terrier, pit bull terrier +181 Bedlington terrier +182 Border terrier +183 Kerry blue terrier +184 Irish terrier +185 Norfolk terrier +186 Norwich terrier +187 Yorkshire terrier +188 wire-haired fox terrier +189 Lakeland terrier +190 Sealyham terrier, Sealyham +191 Airedale, Airedale terrier +192 cairn, cairn terrier +193 Australian terrier +194 Dandie Dinmont, Dandie Dinmont terrier +195 Boston bull, Boston terrier +196 miniature schnauzer +197 giant schnauzer +198 standard schnauzer +199 Scotch terrier, Scottish terrier, Scottie +200 Tibetan terrier, chrysanthemum dog +201 silky terrier, Sydney silky +202 soft-coated wheaten terrier +203 West Highland white terrier +204 Lhasa, Lhasa apso +205 flat-coated retriever +206 curly-coated retriever +207 golden retriever +208 Labrador retriever +209 Chesapeake Bay retriever +210 German short-haired pointer +211 vizsla, Hungarian pointer +212 English setter +213 Irish setter, red setter +214 Gordon setter +215 Brittany spaniel +216 clumber, clumber spaniel +217 English springer, English springer spaniel +218 Welsh springer spaniel +219 cocker spaniel, English cocker spaniel, cocker +220 Sussex spaniel +221 Irish water spaniel +222 kuvasz +223 schipperke +224 groenendael +225 malinois +226 briard +227 kelpie +228 komondor +229 Old English sheepdog, bobtail +230 Shetland sheepdog, Shetland sheep dog, Shetland +231 collie +232 Border collie +233 Bouvier des Flandres, Bouviers des Flandres +234 Rottweiler +235 German shepherd, German shepherd dog, German police dog, alsatian +236 Doberman, Doberman pinscher +237 miniature pinscher +238 Greater Swiss Mountain dog +239 Bernese mountain dog +240 Appenzeller +241 EntleBucher +242 boxer +243 bull mastiff +244 Tibetan mastiff +245 French bulldog +246 Great Dane +247 Saint Bernard, St Bernard +248 Eskimo dog, husky +249 malamute, malemute, Alaskan malamute +250 Siberian husky +251 dalmatian, coach dog, carriage dog +252 affenpinscher, monkey pinscher, monkey dog +253 basenji +254 pug, pug-dog +255 Leonberg +256 Newfoundland, Newfoundland dog +257 Great Pyrenees +258 Samoyed, Samoyede +259 Pomeranian +260 chow, chow chow +261 keeshond +262 Brabancon griffon +263 Pembroke, Pembroke Welsh corgi +264 Cardigan, Cardigan Welsh corgi +265 toy poodle +266 miniature poodle +267 standard poodle +268 Mexican hairless +269 timber wolf, grey wolf, gray wolf, Canis lupus +270 white wolf, Arctic wolf, Canis lupus tundrarum +271 red wolf, maned wolf, Canis rufus, Canis niger +272 coyote, prairie wolf, brush wolf, Canis latrans +273 dingo, warrigal, warragal, Canis dingo +274 dhole, Cuon alpinus +275 African hunting dog, hyena dog, Cape hunting dog, Lycaon pictus +276 hyena, hyaena +277 red fox, Vulpes vulpes +278 kit fox, Vulpes macrotis +279 Arctic fox, white fox, Alopex lagopus +280 grey fox, gray fox, Urocyon cinereoargenteus +281 tabby, tabby cat +282 tiger cat +283 Persian cat +284 Siamese cat, Siamese +285 Egyptian cat +286 cougar, puma, catamount, mountain lion, painter, panther, Felis concolor +287 lynx, catamount +288 leopard, Panthera pardus +289 snow leopard, ounce, Panthera uncia +290 jaguar, panther, Panthera onca, Felis onca +291 lion, king of beasts, Panthera leo +292 tiger, Panthera tigris +293 cheetah, chetah, Acinonyx jubatus +294 brown bear, bruin, Ursus arctos +295 American black bear, black bear, Ursus americanus, Euarctos americanus +296 ice bear, polar bear, Ursus Maritimus, Thalarctos maritimus +297 sloth bear, Melursus ursinus, Ursus ursinus +298 mongoose +299 meerkat, mierkat +300 tiger beetle +301 ladybug, ladybeetle, lady beetle, ladybird, ladybird beetle +302 ground beetle, carabid beetle +303 long-horned beetle, longicorn, longicorn beetle +304 leaf beetle, chrysomelid +305 dung beetle +306 rhinoceros beetle +307 weevil +308 fly +309 bee +310 ant, emmet, pismire +311 grasshopper, hopper +312 cricket +313 walking stick, walkingstick, stick insect +314 cockroach, roach +315 mantis, mantid +316 cicada, cicala +317 leafhopper +318 lacewing, lacewing fly +319 dragonfly, darning needle, devil's darning needle, sewing needle, snake feeder, snake doctor, mosquito hawk, skeeter hawk +320 damselfly +321 admiral +322 ringlet, ringlet butterfly +323 monarch, monarch butterfly, milkweed butterfly, Danaus plexippus +324 cabbage butterfly +325 sulphur butterfly, sulfur butterfly +326 lycaenid, lycaenid butterfly +327 starfish, sea star +328 sea urchin +329 sea cucumber, holothurian +330 wood rabbit, cottontail, cottontail rabbit +331 hare +332 Angora, Angora rabbit +333 hamster +334 porcupine, hedgehog +335 fox squirrel, eastern fox squirrel, Sciurus niger +336 marmot +337 beaver +338 guinea pig, Cavia cobaya +339 sorrel +340 zebra +341 hog, pig, grunter, squealer, Sus scrofa +342 wild boar, boar, Sus scrofa +343 warthog +344 hippopotamus, hippo, river horse, Hippopotamus amphibius +345 ox +346 water buffalo, water ox, Asiatic buffalo, Bubalus bubalis +347 bison +348 ram, tup +349 bighorn, bighorn sheep, cimarron, Rocky Mountain bighorn, Rocky Mountain sheep, Ovis canadensis +350 ibex, Capra ibex +351 hartebeest +352 impala, Aepyceros melampus +353 gazelle +354 Arabian camel, dromedary, Camelus dromedarius +355 llama +356 weasel +357 mink +358 polecat, fitch, foulmart, foumart, Mustela putorius +359 black-footed ferret, ferret, Mustela nigripes +360 otter +361 skunk, polecat, wood pussy +362 badger +363 armadillo +364 three-toed sloth, ai, Bradypus tridactylus +365 orangutan, orang, orangutang, Pongo pygmaeus +366 gorilla, Gorilla gorilla +367 chimpanzee, chimp, Pan troglodytes +368 gibbon, Hylobates lar +369 siamang, Hylobates syndactylus, Symphalangus syndactylus +370 guenon, guenon monkey +371 patas, hussar monkey, Erythrocebus patas +372 baboon +373 macaque +374 langur +375 colobus, colobus monkey +376 proboscis monkey, Nasalis larvatus +377 marmoset +378 capuchin, ringtail, Cebus capucinus +379 howler monkey, howler +380 titi, titi monkey +381 spider monkey, Ateles geoffroyi +382 squirrel monkey, Saimiri sciureus +383 Madagascar cat, ring-tailed lemur, Lemur catta +384 indri, indris, Indri indri, Indri brevicaudatus +385 Indian elephant, Elephas maximus +386 African elephant, Loxodonta africana +387 lesser panda, red panda, panda, bear cat, cat bear, Ailurus fulgens +388 giant panda, panda, panda bear, coon bear, Ailuropoda melanoleuca +389 barracouta, snoek +390 eel +391 coho, cohoe, coho salmon, blue jack, silver salmon, Oncorhynchus kisutch +392 rock beauty, Holocanthus tricolor +393 anemone fish +394 sturgeon +395 gar, garfish, garpike, billfish, Lepisosteus osseus +396 lionfish +397 puffer, pufferfish, blowfish, globefish +398 abacus +399 abaya +400 academic gown, academic robe, judge's robe +401 accordion, piano accordion, squeeze box +402 acoustic guitar +403 aircraft carrier, carrier, flattop, attack aircraft carrier +404 airliner +405 airship, dirigible +406 altar +407 ambulance +408 amphibian, amphibious vehicle +409 analog clock +410 apiary, bee house +411 apron +412 ashcan, trash can, garbage can, wastebin, ash bin, ash-bin, ashbin, dustbin, trash barrel, trash bin +413 assault rifle, assault gun +414 backpack, back pack, knapsack, packsack, rucksack, haversack +415 bakery, bakeshop, bakehouse +416 balance beam, beam +417 balloon +418 ballpoint, ballpoint pen, ballpen, Biro +419 Band Aid +420 banjo +421 bannister, banister, balustrade, balusters, handrail +422 barbell +423 barber chair +424 barbershop +425 barn +426 barometer +427 barrel, cask +428 barrow, garden cart, lawn cart, wheelbarrow +429 baseball +430 basketball +431 bassinet +432 bassoon +433 bathing cap, swimming cap +434 bath towel +435 bathtub, bathing tub, bath, tub +436 beach wagon, station wagon, wagon, estate car, beach waggon, station waggon, waggon +437 beacon, lighthouse, beacon light, pharos +438 beaker +439 bearskin, busby, shako +440 beer bottle +441 beer glass +442 bell cote, bell cot +443 bib +444 bicycle-built-for-two, tandem bicycle, tandem +445 bikini, two-piece +446 binder, ring-binder +447 binoculars, field glasses, opera glasses +448 birdhouse +449 boathouse +450 bobsled, bobsleigh, bob +451 bolo tie, bolo, bola tie, bola +452 bonnet, poke bonnet +453 bookcase +454 bookshop, bookstore, bookstall +455 bottlecap +456 bow +457 bow tie, bow-tie, bowtie +458 brass, memorial tablet, plaque +459 brassiere, bra, bandeau +460 breakwater, groin, groyne, mole, bulwark, seawall, jetty +461 breastplate, aegis, egis +462 broom +463 bucket, pail +464 buckle +465 bulletproof vest +466 bullet train, bullet +467 butcher shop, meat market +468 cab, hack, taxi, taxicab +469 caldron, cauldron +470 candle, taper, wax light +471 cannon +472 canoe +473 can opener, tin opener +474 cardigan +475 car mirror +476 carousel, carrousel, merry-go-round, roundabout, whirligig +477 carpenter's kit, tool kit +478 carton +479 car wheel +480 cash machine, cash dispenser, automated teller machine, automatic teller machine, automated teller, automatic teller, ATM +481 cassette +482 cassette player +483 castle +484 catamaran +485 CD player +486 cello, violoncello +487 cellular telephone, cellular phone, cellphone, cell, mobile phone +488 chain +489 chainlink fence +490 chain mail, ring mail, mail, chain armor, chain armour, ring armor, ring armour +491 chain saw, chainsaw +492 chest +493 chiffonier, commode +494 chime, bell, gong +495 china cabinet, china closet +496 Christmas stocking +497 church, church building +498 cinema, movie theater, movie theatre, movie house, picture palace +499 cleaver, meat cleaver, chopper +500 cliff dwelling +501 cloak +502 clog, geta, patten, sabot +503 cocktail shaker +504 coffee mug +505 coffeepot +506 coil, spiral, volute, whorl, helix +507 combination lock +508 computer keyboard, keypad +509 confectionery, confectionary, candy store +510 container ship, containership, container vessel +511 convertible +512 corkscrew, bottle screw +513 cornet, horn, trumpet, trump +514 cowboy boot +515 cowboy hat, ten-gallon hat +516 cradle +517 crane +518 crash helmet +519 crate +520 crib, cot +521 Crock Pot +522 croquet ball +523 crutch +524 cuirass +525 dam, dike, dyke +526 desk +527 desktop computer +528 dial telephone, dial phone +529 diaper, nappy, napkin +530 digital clock +531 digital watch +532 dining table, board +533 dishrag, dishcloth +534 dishwasher, dish washer, dishwashing machine +535 disk brake, disc brake +536 dock, dockage, docking facility +537 dogsled, dog sled, dog sleigh +538 dome +539 doormat, welcome mat +540 drilling platform, offshore rig +541 drum, membranophone, tympan +542 drumstick +543 dumbbell +544 Dutch oven +545 electric fan, blower +546 electric guitar +547 electric locomotive +548 entertainment center +549 envelope +550 espresso maker +551 face powder +552 feather boa, boa +553 file, file cabinet, filing cabinet +554 fireboat +555 fire engine, fire truck +556 fire screen, fireguard +557 flagpole, flagstaff +558 flute, transverse flute +559 folding chair +560 football helmet +561 forklift +562 fountain +563 fountain pen +564 four-poster +565 freight car +566 French horn, horn +567 frying pan, frypan, skillet +568 fur coat +569 garbage truck, dustcart +570 gasmask, respirator, gas helmet +571 gas pump, gasoline pump, petrol pump, island dispenser +572 goblet +573 go-kart +574 golf ball +575 golfcart, golf cart +576 gondola +577 gong, tam-tam +578 gown +579 grand piano, grand +580 greenhouse, nursery, glasshouse +581 grille, radiator grille +582 grocery store, grocery, food market, market +583 guillotine +584 hair slide +585 hair spray +586 half track +587 hammer +588 hamper +589 hand blower, blow dryer, blow drier, hair dryer, hair drier +590 hand-held computer, hand-held microcomputer +591 handkerchief, hankie, hanky, hankey +592 hard disc, hard disk, fixed disk +593 harmonica, mouth organ, harp, mouth harp +594 harp +595 harvester, reaper +596 hatchet +597 holster +598 home theater, home theatre +599 honeycomb +600 hook, claw +601 hoopskirt, crinoline +602 horizontal bar, high bar +603 horse cart, horse-cart +604 hourglass +605 iPod +606 iron, smoothing iron +607 jack-o'-lantern +608 jean, blue jean, denim +609 jeep, landrover +610 jersey, T-shirt, tee shirt +611 jigsaw puzzle +612 jinrikisha, ricksha, rickshaw +613 joystick +614 kimono +615 knee pad +616 knot +617 lab coat, laboratory coat +618 ladle +619 lampshade, lamp shade +620 laptop, laptop computer +621 lawn mower, mower +622 lens cap, lens cover +623 letter opener, paper knife, paperknife +624 library +625 lifeboat +626 lighter, light, igniter, ignitor +627 limousine, limo +628 liner, ocean liner +629 lipstick, lip rouge +630 Loafer +631 lotion +632 loudspeaker, speaker, speaker unit, loudspeaker system, speaker system +633 loupe, jeweler's loupe +634 lumbermill, sawmill +635 magnetic compass +636 mailbag, postbag +637 mailbox, letter box +638 maillot +639 maillot, tank suit +640 manhole cover +641 maraca +642 marimba, xylophone +643 mask +644 matchstick +645 maypole +646 maze, labyrinth +647 measuring cup +648 medicine chest, medicine cabinet +649 megalith, megalithic structure +650 microphone, mike +651 microwave, microwave oven +652 military uniform +653 milk can +654 minibus +655 miniskirt, mini +656 minivan +657 missile +658 mitten +659 mixing bowl +660 mobile home, manufactured home +661 Model T +662 modem +663 monastery +664 monitor +665 moped +666 mortar +667 mortarboard +668 mosque +669 mosquito net +670 motor scooter, scooter +671 mountain bike, all-terrain bike, off-roader +672 mountain tent +673 mouse, computer mouse +674 mousetrap +675 moving van +676 muzzle +677 nail +678 neck brace +679 necklace +680 nipple +681 notebook, notebook computer +682 obelisk +683 oboe, hautboy, hautbois +684 ocarina, sweet potato +685 odometer, hodometer, mileometer, milometer +686 oil filter +687 organ, pipe organ +688 oscilloscope, scope, cathode-ray oscilloscope, CRO +689 overskirt +690 oxcart +691 oxygen mask +692 packet +693 paddle, boat paddle +694 paddlewheel, paddle wheel +695 padlock +696 paintbrush +697 pajama, pyjama, pj's, jammies +698 palace +699 panpipe, pandean pipe, syrinx +700 paper towel +701 parachute, chute +702 parallel bars, bars +703 park bench +704 parking meter +705 passenger car, coach, carriage +706 patio, terrace +707 pay-phone, pay-station +708 pedestal, plinth, footstall +709 pencil box, pencil case +710 pencil sharpener +711 perfume, essence +712 Petri dish +713 photocopier +714 pick, plectrum, plectron +715 pickelhaube +716 picket fence, paling +717 pickup, pickup truck +718 pier +719 piggy bank, penny bank +720 pill bottle +721 pillow +722 ping-pong ball +723 pinwheel +724 pirate, pirate ship +725 pitcher, ewer +726 plane, carpenter's plane, woodworking plane +727 planetarium +728 plastic bag +729 plate rack +730 plow, plough +731 plunger, plumber's helper +732 Polaroid camera, Polaroid Land camera +733 pole +734 police van, police wagon, paddy wagon, patrol wagon, wagon, black Maria +735 poncho +736 pool table, billiard table, snooker table +737 pop bottle, soda bottle +738 pot, flowerpot +739 potter's wheel +740 power drill +741 prayer rug, prayer mat +742 printer +743 prison, prison house +744 projectile, missile +745 projector +746 puck, hockey puck +747 punching bag, punch bag, punching ball, punchball +748 purse +749 quill, quill pen +750 quilt, comforter, comfort, puff +751 racer, race car, racing car +752 racket, racquet +753 radiator +754 radio, wireless +755 radio telescope, radio reflector +756 rain barrel +757 recreational vehicle, RV, R.V. +758 reel +759 reflex camera +760 refrigerator, icebox +761 remote control, remote +762 restaurant, eating house, eating place, eatery +763 revolver, six-gun, six-shooter +764 rifle +765 rocking chair, rocker +766 rotisserie +767 rubber eraser, rubber, pencil eraser +768 rugby ball +769 rule, ruler +770 running shoe +771 safe +772 safety pin +773 saltshaker, salt shaker +774 sandal +775 sarong +776 sax, saxophone +777 scabbard +778 scale, weighing machine +779 school bus +780 schooner +781 scoreboard +782 screen, CRT screen +783 screw +784 screwdriver +785 seat belt, seatbelt +786 sewing machine +787 shield, buckler +788 shoe shop, shoe-shop, shoe store +789 shoji +790 shopping basket +791 shopping cart +792 shovel +793 shower cap +794 shower curtain +795 ski +796 ski mask +797 sleeping bag +798 slide rule, slipstick +799 sliding door +800 slot, one-armed bandit +801 snorkel +802 snowmobile +803 snowplow, snowplough +804 soap dispenser +805 soccer ball +806 sock +807 solar dish, solar collector, solar furnace +808 sombrero +809 soup bowl +810 space bar +811 space heater +812 space shuttle +813 spatula +814 speedboat +815 spider web, spider's web +816 spindle +817 sports car, sport car +818 spotlight, spot +819 stage +820 steam locomotive +821 steel arch bridge +822 steel drum +823 stethoscope +824 stole +825 stone wall +826 stopwatch, stop watch +827 stove +828 strainer +829 streetcar, tram, tramcar, trolley, trolley car +830 stretcher +831 studio couch, day bed +832 stupa, tope +833 submarine, pigboat, sub, U-boat +834 suit, suit of clothes +835 sundial +836 sunglass +837 sunglasses, dark glasses, shades +838 sunscreen, sunblock, sun blocker +839 suspension bridge +840 swab, swob, mop +841 sweatshirt +842 swimming trunks, bathing trunks +843 swing +844 switch, electric switch, electrical switch +845 syringe +846 table lamp +847 tank, army tank, armored combat vehicle, armoured combat vehicle +848 tape player +849 teapot +850 teddy, teddy bear +851 television, television system +852 tennis ball +853 thatch, thatched roof +854 theater curtain, theatre curtain +855 thimble +856 thresher, thrasher, threshing machine +857 throne +858 tile roof +859 toaster +860 tobacco shop, tobacconist shop, tobacconist +861 toilet seat +862 torch +863 totem pole +864 tow truck, tow car, wrecker +865 toyshop +866 tractor +867 trailer truck, tractor trailer, trucking rig, rig, articulated lorry, semi +868 tray +869 trench coat +870 tricycle, trike, velocipede +871 trimaran +872 tripod +873 triumphal arch +874 trolleybus, trolley coach, trackless trolley +875 trombone +876 tub, vat +877 turnstile +878 typewriter keyboard +879 umbrella +880 unicycle, monocycle +881 upright, upright piano +882 vacuum, vacuum cleaner +883 vase +884 vault +885 velvet +886 vending machine +887 vestment +888 viaduct +889 violin, fiddle +890 volleyball +891 waffle iron +892 wall clock +893 wallet, billfold, notecase, pocketbook +894 wardrobe, closet, press +895 warplane, military plane +896 washbasin, handbasin, washbowl, lavabo, wash-hand basin +897 washer, automatic washer, washing machine +898 water bottle +899 water jug +900 water tower +901 whiskey jug +902 whistle +903 wig +904 window screen +905 window shade +906 Windsor tie +907 wine bottle +908 wing +909 wok +910 wooden spoon +911 wool, woolen, woollen +912 worm fence, snake fence, snake-rail fence, Virginia fence +913 wreck +914 yawl +915 yurt +916 web site, website, internet site, site +917 comic book +918 crossword puzzle, crossword +919 street sign +920 traffic light, traffic signal, stoplight +921 book jacket, dust cover, dust jacket, dust wrapper +922 menu +923 plate +924 guacamole +925 consomme +926 hot pot, hotpot +927 trifle +928 ice cream, icecream +929 ice lolly, lolly, lollipop, popsicle +930 French loaf +931 bagel, beigel +932 pretzel +933 cheeseburger +934 hotdog, hot dog, red hot +935 mashed potato +936 head cabbage +937 broccoli +938 cauliflower +939 zucchini, courgette +940 spaghetti squash +941 acorn squash +942 butternut squash +943 cucumber, cuke +944 artichoke, globe artichoke +945 bell pepper +946 cardoon +947 mushroom +948 Granny Smith +949 strawberry +950 orange +951 lemon +952 fig +953 pineapple, ananas +954 banana +955 jackfruit, jak, jack +956 custard apple +957 pomegranate +958 hay +959 carbonara +960 chocolate sauce, chocolate syrup +961 dough +962 meat loaf, meatloaf +963 pizza, pizza pie +964 potpie +965 burrito +966 red wine +967 espresso +968 cup +969 eggnog +970 alp +971 bubble +972 cliff, drop, drop-off +973 coral reef +974 geyser +975 lakeside, lakeshore +976 promontory, headland, head, foreland +977 sandbar, sand bar +978 seashore, coast, seacoast, sea-coast +979 valley, vale +980 volcano +981 ballplayer, baseball player +982 groom, bridegroom +983 scuba diver +984 rapeseed +985 daisy +986 yellow lady's slipper, yellow lady-slipper, Cypripedium calceolus, Cypripedium parviflorum +987 corn +988 acorn +989 hip, rose hip, rosehip +990 buckeye, horse chestnut, conker +991 coral fungus +992 agaric +993 gyromitra +994 stinkhorn, carrion fungus +995 earthstar +996 hen-of-the-woods, hen of the woods, Polyporus frondosus, Grifola frondosa +997 bolete +998 ear, spike, capitulum +999 toilet tissue, toilet paper, bathroom tissue diff --git a/src/PaddleClas/ppcls/utils/logger.py b/src/PaddleClas/ppcls/utils/logger.py new file mode 100644 index 0000000..d4faaa9 --- /dev/null +++ b/src/PaddleClas/ppcls/utils/logger.py @@ -0,0 +1,137 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import sys + +import logging +import datetime +import paddle.distributed as dist + +_logger = None + + +def init_logger(name='root', log_file=None, log_level=logging.INFO): + """Initialize and get a logger by name. + If the logger has not been initialized, this method will initialize the + logger by adding one or two handlers, otherwise the initialized logger will + be directly returned. During initialization, a StreamHandler will always be + added. If `log_file` is specified a FileHandler will also be added. + Args: + name (str): Logger name. + log_file (str | None): The log filename. If specified, a FileHandler + will be added to the logger. + log_level (int): The logger level. Note that only the process of + rank 0 is affected, and other processes will set the level to + "Error" thus be silent most of the time. + Returns: + logging.Logger: The expected logger. + """ + global _logger + assert _logger is None, "logger should not be initialized twice or more." + _logger = logging.getLogger(name) + + formatter = logging.Formatter( + '[%(asctime)s] %(name)s %(levelname)s: %(message)s', + datefmt="%Y/%m/%d %H:%M:%S") + + stream_handler = logging.StreamHandler(stream=sys.stdout) + stream_handler.setFormatter(formatter) + _logger.addHandler(stream_handler) + if log_file is not None and dist.get_rank() == 0: + log_file_folder = os.path.split(log_file)[0] + os.makedirs(log_file_folder, exist_ok=True) + file_handler = logging.FileHandler(log_file, 'a') + file_handler.setFormatter(formatter) + _logger.addHandler(file_handler) + if dist.get_rank() == 0: + _logger.setLevel(log_level) + else: + _logger.setLevel(logging.ERROR) + + +def log_at_trainer0(log): + """ + logs will print multi-times when calling Fleet API. + Only display single log and ignore the others. + """ + + def wrapper(fmt, *args): + if dist.get_rank() == 0: + log(fmt, *args) + + return wrapper + + +@log_at_trainer0 +def info(fmt, *args): + _logger.info(fmt, *args) + + +@log_at_trainer0 +def debug(fmt, *args): + _logger.debug(fmt, *args) + + +@log_at_trainer0 +def warning(fmt, *args): + _logger.warning(fmt, *args) + + +@log_at_trainer0 +def error(fmt, *args): + _logger.error(fmt, *args) + + +def scaler(name, value, step, writer): + """ + This function will draw a scalar curve generated by the visualdl. + Usage: Install visualdl: pip3 install visualdl==2.0.0b4 + and then: + visualdl --logdir ./scalar --host 0.0.0.0 --port 8830 + to preview loss corve in real time. + """ + if writer is None: + return + writer.add_scalar(tag=name, step=step, value=value) + + +def advertise(): + """ + Show the advertising message like the following: + + =========================================================== + == PaddleClas is powered by PaddlePaddle ! == + =========================================================== + == == + == For more info please go to the following website. == + == == + == https://github.com/PaddlePaddle/PaddleClas == + =========================================================== + + """ + copyright = "PaddleClas is powered by PaddlePaddle !" + ad = "For more info please go to the following website." + website = "https://github.com/PaddlePaddle/PaddleClas" + AD_LEN = 6 + len(max([copyright, ad, website], key=len)) + + info("\n{0}\n{1}\n{2}\n{3}\n{4}\n{5}\n{6}\n{7}\n".format( + "=" * (AD_LEN + 4), + "=={}==".format(copyright.center(AD_LEN)), + "=" * (AD_LEN + 4), + "=={}==".format(' ' * AD_LEN), + "=={}==".format(ad.center(AD_LEN)), + "=={}==".format(' ' * AD_LEN), + "=={}==".format(website.center(AD_LEN)), + "=" * (AD_LEN + 4), )) diff --git a/src/PaddleClas/ppcls/utils/metrics.py b/src/PaddleClas/ppcls/utils/metrics.py new file mode 100644 index 0000000..b0db68a --- /dev/null +++ b/src/PaddleClas/ppcls/utils/metrics.py @@ -0,0 +1,107 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from sklearn.metrics import hamming_loss +from sklearn.metrics import accuracy_score as accuracy_metric +from sklearn.metrics import multilabel_confusion_matrix +from sklearn.metrics import precision_recall_fscore_support +from sklearn.metrics import average_precision_score +from sklearn.preprocessing import binarize + +import numpy as np + +__all__ = ["multi_hot_encode", "hamming_distance", "accuracy_score", "precision_recall_fscore", "mean_average_precision"] + + +def multi_hot_encode(logits, threshold=0.5): + """ + Encode logits to multi-hot by elementwise for multilabel + """ + + return binarize(logits, threshold=threshold) + + +def hamming_distance(output, target): + """ + Soft metric based label for multilabel classification + Returns: + The smaller the return value is, the better model is. + """ + + return hamming_loss(target, output) + + +def accuracy_score(output, target, base="sample"): + """ + Hard metric for multilabel classification + Args: + output: + target: + base: ["sample", "label"], default="sample" + if "sample", return metric score based sample, + if "label", return metric score based label. + Returns: + accuracy: + """ + + assert base in ["sample", "label"], 'must be one of ["sample", "label"]' + + if base == "sample": + accuracy = accuracy_metric(target, output) + elif base == "label": + mcm = multilabel_confusion_matrix(target, output) + tns = mcm[:, 0, 0] + fns = mcm[:, 1, 0] + tps = mcm[:, 1, 1] + fps = mcm[:, 0, 1] + + accuracy = (sum(tps) + sum(tns)) / (sum(tps) + sum(tns) + sum(fns) + sum(fps)) + + return accuracy + + +def precision_recall_fscore(output, target): + """ + Metric based label for multilabel classification + Returns: + precisions: + recalls: + fscores: + """ + + precisions, recalls, fscores, _ = precision_recall_fscore_support(target, output) + + return precisions, recalls, fscores + + +def mean_average_precision(logits, target): + """ + Calculate average precision + Args: + logits: probability from network before sigmoid or softmax + target: ground truth, 0 or 1 + """ + if not (isinstance(logits, np.ndarray) and isinstance(target, np.ndarray)): + raise TypeError("logits and target should be np.ndarray.") + + aps = [] + for i in range(target.shape[1]): + ap = average_precision_score(target[:, i], logits[:, i]) + aps.append(ap) + + return np.mean(aps) diff --git a/src/PaddleClas/ppcls/utils/misc.py b/src/PaddleClas/ppcls/utils/misc.py new file mode 100644 index 0000000..08ab7b6 --- /dev/null +++ b/src/PaddleClas/ppcls/utils/misc.py @@ -0,0 +1,63 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +__all__ = ['AverageMeter'] + + +class AverageMeter(object): + """ + Computes and stores the average and current value + Code was based on https://github.com/pytorch/examples/blob/master/imagenet/main.py + """ + + def __init__(self, name='', fmt='f', postfix="", need_avg=True): + self.name = name + self.fmt = fmt + self.postfix = postfix + self.need_avg = need_avg + self.reset() + + def reset(self): + """ reset """ + self.val = 0 + self.avg = 0 + self.sum = 0 + self.count = 0 + + def update(self, val, n=1): + """ update """ + self.val = val + self.sum += val * n + self.count += n + self.avg = self.sum / self.count + + @property + def total(self): + return '{self.name}_sum: {self.sum:{self.fmt}}{self.postfix}'.format( + self=self) + + @property + def total_minute(self): + return '{self.name} {s:{self.fmt}}{self.postfix} min'.format( + s=self.sum / 60, self=self) + + @property + def mean(self): + return '{self.name}: {self.avg:{self.fmt}}{self.postfix}'.format( + self=self) if self.need_avg else '' + + @property + def value(self): + return '{self.name}: {self.val:{self.fmt}}{self.postfix}'.format( + self=self) diff --git a/src/PaddleClas/ppcls/utils/model_zoo.py b/src/PaddleClas/ppcls/utils/model_zoo.py new file mode 100644 index 0000000..fc527f6 --- /dev/null +++ b/src/PaddleClas/ppcls/utils/model_zoo.py @@ -0,0 +1,213 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os +import requests +import shutil +import tarfile +import tqdm +import zipfile + +from ppcls.arch import similar_architectures +from ppcls.utils import logger + +__all__ = ['get'] + +DOWNLOAD_RETRY_LIMIT = 3 + + +class UrlError(Exception): + """ UrlError + """ + + def __init__(self, url='', code=''): + message = "Downloading from {} failed with code {}!".format(url, code) + super(UrlError, self).__init__(message) + + +class ModelNameError(Exception): + """ ModelNameError + """ + + def __init__(self, message=''): + super(ModelNameError, self).__init__(message) + + +class RetryError(Exception): + """ RetryError + """ + + def __init__(self, url='', times=''): + message = "Download from {} failed. Retry({}) limit reached".format( + url, times) + super(RetryError, self).__init__(message) + + +def _get_url(architecture, postfix="pdparams"): + prefix = "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/" + fname = architecture + "_pretrained." + postfix + return prefix + fname + + +def _move_and_merge_tree(src, dst): + """ + Move src directory to dst, if dst is already exists, + merge src to dst + """ + if not os.path.exists(dst): + shutil.move(src, dst) + elif os.path.isfile(src): + shutil.move(src, dst) + else: + for fp in os.listdir(src): + src_fp = os.path.join(src, fp) + dst_fp = os.path.join(dst, fp) + if os.path.isdir(src_fp): + if os.path.isdir(dst_fp): + _move_and_merge_tree(src_fp, dst_fp) + else: + shutil.move(src_fp, dst_fp) + elif os.path.isfile(src_fp) and \ + not os.path.isfile(dst_fp): + shutil.move(src_fp, dst_fp) + + +def _download(url, path): + """ + Download from url, save to path. + url (str): download url + path (str): download to given path + """ + if not os.path.exists(path): + os.makedirs(path) + + fname = os.path.split(url)[-1] + fullname = os.path.join(path, fname) + retry_cnt = 0 + + while not os.path.exists(fullname): + if retry_cnt < DOWNLOAD_RETRY_LIMIT: + retry_cnt += 1 + else: + raise RetryError(url, DOWNLOAD_RETRY_LIMIT) + + logger.info("Downloading {} from {}".format(fname, url)) + + req = requests.get(url, stream=True) + if req.status_code != 200: + raise UrlError(url, req.status_code) + + # For protecting download interupted, download to + # tmp_fullname firstly, move tmp_fullname to fullname + # after download finished + tmp_fullname = fullname + "_tmp" + total_size = req.headers.get('content-length') + with open(tmp_fullname, 'wb') as f: + if total_size: + for chunk in tqdm.tqdm( + req.iter_content(chunk_size=1024), + total=(int(total_size) + 1023) // 1024, + unit='KB'): + f.write(chunk) + else: + for chunk in req.iter_content(chunk_size=1024): + if chunk: + f.write(chunk) + shutil.move(tmp_fullname, fullname) + + return fullname + + +def _decompress(fname): + """ + Decompress for zip and tar file + """ + logger.info("Decompressing {}...".format(fname)) + + # For protecting decompressing interupted, + # decompress to fpath_tmp directory firstly, if decompress + # successed, move decompress files to fpath and delete + # fpath_tmp and remove download compress file. + fpath = os.path.split(fname)[0] + fpath_tmp = os.path.join(fpath, 'tmp') + if os.path.isdir(fpath_tmp): + shutil.rmtree(fpath_tmp) + os.makedirs(fpath_tmp) + + if fname.find('tar') >= 0: + with tarfile.open(fname) as tf: + tf.extractall(path=fpath_tmp) + elif fname.find('zip') >= 0: + with zipfile.ZipFile(fname) as zf: + zf.extractall(path=fpath_tmp) + else: + raise TypeError("Unsupport compress file type {}".format(fname)) + + fs = os.listdir(fpath_tmp) + assert len( + fs + ) == 1, "There should just be 1 pretrained path in an archive file but got {}.".format( + len(fs)) + + f = fs[0] + src_dir = os.path.join(fpath_tmp, f) + dst_dir = os.path.join(fpath, f) + _move_and_merge_tree(src_dir, dst_dir) + + shutil.rmtree(fpath_tmp) + os.remove(fname) + + return f + + +def _get_pretrained(): + with open('./ppcls/utils/pretrained.list') as flist: + pretrained = [line.strip() for line in flist] + return pretrained + + +def _check_pretrained_name(architecture): + assert isinstance(architecture, str), \ + ("the type of architecture({}) should be str". format(architecture)) + pretrained = _get_pretrained() + similar_names = similar_architectures(architecture, pretrained) + model_list = ', '.join(similar_names) + err = "{} is not exist! Maybe you want: [{}]" \ + "".format(architecture, model_list) + if architecture not in similar_names: + raise ModelNameError(err) + + +def list_models(): + pretrained = _get_pretrained() + msg = "All avialable pretrained models are as follows: {}".format( + pretrained) + logger.info(msg) + return + + +def get(architecture, path, decompress=False, postfix="pdparams"): + """ + Get the pretrained model. + """ + _check_pretrained_name(architecture) + url = _get_url(architecture, postfix=postfix) + fname = _download(url, path) + if postfix == "tar" and decompress: + _decompress(fname) + logger.info("download {} finished ".format(fname)) diff --git a/src/PaddleClas/ppcls/utils/pretrained.list b/src/PaddleClas/ppcls/utils/pretrained.list new file mode 100644 index 0000000..36d70f5 --- /dev/null +++ b/src/PaddleClas/ppcls/utils/pretrained.list @@ -0,0 +1,121 @@ +ResNet18 +ResNet34 +ResNet50 +ResNet101 +ResNet152 +ResNet50_vc +ResNet18_vd +ResNet34_vd +ResNet50_vd +ResNet50_vd_v2 +ResNet101_vd +ResNet152_vd +ResNet200_vd +ResNet50_vd_ssld +ResNet50_vd_ssld_v2 +Fix_ResNet50_vd_ssld_v2 +ResNet101_vd_ssld +MobileNetV3_large_x0_35 +MobileNetV3_large_x0_5 +MobileNetV3_large_x0_75 +MobileNetV3_large_x1_0 +MobileNetV3_large_x1_25 +MobileNetV3_small_x0_35 +MobileNetV3_small_x0_5 +MobileNetV3_small_x0_75 +MobileNetV3_small_x1_0 +MobileNetV3_small_x1_25 +MobileNetV3_large_x1_0_ssld +MobileNetV3_large_x1_0_ssld_int8 +MobileNetV3_small_x1_0_ssld +MobileNetV2_x0_25 +MobileNetV2_x0_5 +MobileNetV2_x0_75 +MobileNetV2 +MobileNetV2_x1_5 +MobileNetV2_x2_0 +MobileNetV2_ssld +MobileNetV1_x0_25 +MobileNetV1_x0_5 +MobileNetV1_x0_75 +MobileNetV1 +MobileNetV1_ssld +ShuffleNetV2_x0_25 +ShuffleNetV2_x0_33 +ShuffleNetV2_x0_5 +ShuffleNetV2 +ShuffleNetV2_x1_5 +ShuffleNetV2_x2_0 +ShuffleNetV2_swish +ResNeXt50_32x4d +ResNeXt50_64x4d +ResNeXt101_32x4d +ResNeXt101_64x4d +ResNeXt152_32x4d +ResNeXt152_64x4d +ResNeXt50_vd_32x4d +ResNeXt50_vd_64x4d +ResNeXt101_vd_32x4d +ResNeXt101_vd_64x4d +ResNeXt152_vd_32x4d +ResNeXt152_vd_64x4d +SE_ResNet18_vd +SE_ResNet34_vd +SE_ResNet50_vd +SE_ResNeXt50_32x4d +SE_ResNeXt101_32x4d +SE_ResNeXt50_vd_32x4d +SENet154_vd +Res2Net50_26w_4s +Res2Net50_vd_26w_4s +Res2Net50_14w_8s +Res2Net101_vd_26w_4s +Res2Net200_vd_26w_4s +GoogLeNet +InceptionV4 +Xception41 +Xception41_deeplab +Xception65 +Xception65_deeplab +Xception71 +HRNet_W18_C +HRNet_W30_C +HRNet_W32_C +HRNet_W40_C +HRNet_W44_C +HRNet_W48_C +HRNet_W64_C +DPN68 +DPN92 +DPN98 +DPN107 +DPN131 +DenseNet121 +DenseNet161 +DenseNet169 +DenseNet201 +DenseNet264 +EfficientNetB0_small +EfficientNetB0 +EfficientNetB1 +EfficientNetB2 +EfficientNetB3 +EfficientNetB4 +EfficientNetB5 +EfficientNetB6 +EfficientNetB7 +ResNeXt101_32x8d_wsl +ResNeXt101_32x16d_wsl +ResNeXt101_32x32d_wsl +ResNeXt101_32x48d_wsl +Fix_ResNeXt101_32x48d_wsl +AlexNet +SqueezeNet1_0 +SqueezeNet1_1 +VGG11 +VGG13 +VGG16 +VGG19 +DarkNet53_ImageNet1k +ResNet50_ACNet_deploy +CSPResNet50_leaky diff --git a/src/PaddleClas/ppcls/utils/profiler.py b/src/PaddleClas/ppcls/utils/profiler.py new file mode 100644 index 0000000..7cf945a --- /dev/null +++ b/src/PaddleClas/ppcls/utils/profiler.py @@ -0,0 +1,111 @@ +# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +import paddle + +# A global variable to record the number of calling times for profiler +# functions. It is used to specify the tracing range of training steps. +_profiler_step_id = 0 + +# A global variable to avoid parsing from string every time. +_profiler_options = None + + +class ProfilerOptions(object): + ''' + Use a string to initialize a ProfilerOptions. + The string should be in the format: "key1=value1;key2=value;key3=value3". + For example: + "profile_path=model.profile" + "batch_range=[50, 60]; profile_path=model.profile" + "batch_range=[50, 60]; tracer_option=OpDetail; profile_path=model.profile" + + ProfilerOptions supports following key-value pair: + batch_range - a integer list, e.g. [100, 110]. + state - a string, the optional values are 'CPU', 'GPU' or 'All'. + sorted_key - a string, the optional values are 'calls', 'total', + 'max', 'min' or 'ave. + tracer_option - a string, the optional values are 'Default', 'OpDetail', + 'AllOpDetail'. + profile_path - a string, the path to save the serialized profile data, + which can be used to generate a timeline. + exit_on_finished - a boolean. + ''' + + def __init__(self, options_str): + assert isinstance(options_str, str) + + self._options = { + 'batch_range': [10, 20], + 'state': 'All', + 'sorted_key': 'total', + 'tracer_option': 'Default', + 'profile_path': '/tmp/profile', + 'exit_on_finished': True + } + self._parse_from_string(options_str) + + def _parse_from_string(self, options_str): + for kv in options_str.replace(' ', '').split(';'): + key, value = kv.split('=') + if key == 'batch_range': + value_list = value.replace('[', '').replace(']', '').split(',') + value_list = list(map(int, value_list)) + if len(value_list) >= 2 and value_list[0] >= 0 and value_list[ + 1] > value_list[0]: + self._options[key] = value_list + elif key == 'exit_on_finished': + self._options[key] = value.lower() in ("yes", "true", "t", "1") + elif key in [ + 'state', 'sorted_key', 'tracer_option', 'profile_path' + ]: + self._options[key] = value + + def __getitem__(self, name): + if self._options.get(name, None) is None: + raise ValueError( + "ProfilerOptions does not have an option named %s." % name) + return self._options[name] + + +def add_profiler_step(options_str=None): + ''' + Enable the operator-level timing using PaddlePaddle's profiler. + The profiler uses a independent variable to count the profiler steps. + One call of this function is treated as a profiler step. + + Args: + profiler_options - a string to initialize the ProfilerOptions. + Default is None, and the profiler is disabled. + ''' + if options_str is None: + return + + global _profiler_step_id + global _profiler_options + + if _profiler_options is None: + _profiler_options = ProfilerOptions(options_str) + + if _profiler_step_id == _profiler_options['batch_range'][0]: + paddle.utils.profiler.start_profiler( + _profiler_options['state'], _profiler_options['tracer_option']) + elif _profiler_step_id == _profiler_options['batch_range'][1]: + paddle.utils.profiler.stop_profiler(_profiler_options['sorted_key'], + _profiler_options['profile_path']) + if _profiler_options['exit_on_finished']: + sys.exit(0) + + _profiler_step_id += 1 diff --git a/src/PaddleClas/ppcls/utils/save_load.py b/src/PaddleClas/ppcls/utils/save_load.py new file mode 100644 index 0000000..625a284 --- /dev/null +++ b/src/PaddleClas/ppcls/utils/save_load.py @@ -0,0 +1,136 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import errno +import os +import re +import shutil +import tempfile + +import paddle +from ppcls.utils import logger +from .download import get_weights_path_from_url + +__all__ = ['init_model', 'save_model', 'load_dygraph_pretrain'] + + +def _mkdir_if_not_exist(path): + """ + mkdir if not exists, ignore the exception when multiprocess mkdir together + """ + if not os.path.exists(path): + try: + os.makedirs(path) + except OSError as e: + if e.errno == errno.EEXIST and os.path.isdir(path): + logger.warning( + 'be happy if some process has already created {}'.format( + path)) + else: + raise OSError('Failed to mkdir {}'.format(path)) + + +def load_dygraph_pretrain(model, path=None): + if not (os.path.isdir(path) or os.path.exists(path + '.pdparams')): + raise ValueError("Model pretrain path {} does not " + "exists.".format(path)) + param_state_dict = paddle.load(path + ".pdparams") + model.set_dict(param_state_dict) + return + + +def load_dygraph_pretrain_from_url(model, pretrained_url, use_ssld=False): + if use_ssld: + pretrained_url = pretrained_url.replace("_pretrained", + "_ssld_pretrained") + local_weight_path = get_weights_path_from_url(pretrained_url).replace( + ".pdparams", "") + load_dygraph_pretrain(model, path=local_weight_path) + return + + +def load_distillation_model(model, pretrained_model): + logger.info("In distillation mode, teacher model will be " + "loaded firstly before student model.") + + if not isinstance(pretrained_model, list): + pretrained_model = [pretrained_model] + + teacher = model.teacher if hasattr(model, + "teacher") else model._layers.teacher + student = model.student if hasattr(model, + "student") else model._layers.student + load_dygraph_pretrain(teacher, path=pretrained_model[0]) + logger.info("Finish initing teacher model from {}".format( + pretrained_model)) + # load student model + if len(pretrained_model) >= 2: + load_dygraph_pretrain(student, path=pretrained_model[1]) + logger.info("Finish initing student model from {}".format( + pretrained_model)) + + +def init_model(config, net, optimizer=None): + """ + load model from checkpoint or pretrained_model + """ + checkpoints = config.get('checkpoints') + if checkpoints and optimizer is not None: + assert os.path.exists(checkpoints + ".pdparams"), \ + "Given dir {}.pdparams not exist.".format(checkpoints) + assert os.path.exists(checkpoints + ".pdopt"), \ + "Given dir {}.pdopt not exist.".format(checkpoints) + para_dict = paddle.load(checkpoints + ".pdparams") + opti_dict = paddle.load(checkpoints + ".pdopt") + metric_dict = paddle.load(checkpoints + ".pdstates") + net.set_dict(para_dict) + optimizer.set_state_dict(opti_dict) + logger.info("Finish load checkpoints from {}".format(checkpoints)) + return metric_dict + + pretrained_model = config.get('pretrained_model') + use_distillation = config.get('use_distillation', False) + if pretrained_model: + if use_distillation: + load_distillation_model(net, pretrained_model) + else: # common load + load_dygraph_pretrain(net, path=pretrained_model) + logger.info( + logger.coloring("Finish load pretrained model from {}".format( + pretrained_model), "HEADER")) + + +def save_model(net, + optimizer, + metric_info, + model_path, + model_name="", + prefix='ppcls'): + """ + save model to the target path + """ + if paddle.distributed.get_rank() != 0: + return + model_path = os.path.join(model_path, model_name) + _mkdir_if_not_exist(model_path) + model_path = os.path.join(model_path, prefix) + + paddle.save(net.state_dict(), model_path + ".pdparams") + paddle.save(optimizer.state_dict(), model_path + ".pdopt") + paddle.save(metric_info, model_path + ".pdstates") + logger.info("Already save model in {}".format(model_path))