@ -0,0 +1,497 @@
|
||||
[2022/04/18 12:43:50] root INFO:
|
||||
===========================================================
|
||||
== PaddleClas is powered by PaddlePaddle ! ==
|
||||
===========================================================
|
||||
== ==
|
||||
== For more info please go to the following website. ==
|
||||
== ==
|
||||
== https://github.com/PaddlePaddle/PaddleClas ==
|
||||
===========================================================
|
||||
|
||||
[2022/04/18 12:43:50] root INFO: Arch :
|
||||
[2022/04/18 12:43:50] root INFO: class_num : 100
|
||||
[2022/04/18 12:43:50] root INFO: name : ResNet50_vd
|
||||
[2022/04/18 12:43:50] root INFO: DataLoader :
|
||||
[2022/04/18 12:43:50] root INFO: Eval :
|
||||
[2022/04/18 12:43:50] root INFO: dataset :
|
||||
[2022/04/18 12:43:50] root INFO: cls_label_path : ./dataset/CIFAR100/test_list.txt
|
||||
[2022/04/18 12:43:50] root INFO: image_root : ./dataset/CIFAR100/
|
||||
[2022/04/18 12:43:50] root INFO: name : ImageNetDataset
|
||||
[2022/04/18 12:43:50] root INFO: transform_ops :
|
||||
[2022/04/18 12:43:50] root INFO: DecodeImage :
|
||||
[2022/04/18 12:43:50] root INFO: channel_first : False
|
||||
[2022/04/18 12:43:50] root INFO: to_rgb : True
|
||||
[2022/04/18 12:43:50] root INFO: ResizeImage :
|
||||
[2022/04/18 12:43:50] root INFO: resize_short : 36
|
||||
[2022/04/18 12:43:50] root INFO: CropImage :
|
||||
[2022/04/18 12:43:50] root INFO: size : 32
|
||||
[2022/04/18 12:43:50] root INFO: NormalizeImage :
|
||||
[2022/04/18 12:43:50] root INFO: mean : [0.485, 0.456, 0.406]
|
||||
[2022/04/18 12:43:50] root INFO: order :
|
||||
[2022/04/18 12:43:50] root INFO: scale : 1.0/255.0
|
||||
[2022/04/18 12:43:50] root INFO: std : [0.229, 0.224, 0.225]
|
||||
[2022/04/18 12:43:50] root INFO: loader :
|
||||
[2022/04/18 12:43:50] root INFO: num_workers : 4
|
||||
[2022/04/18 12:43:50] root INFO: use_shared_memory : True
|
||||
[2022/04/18 12:43:50] root INFO: sampler :
|
||||
[2022/04/18 12:43:50] root INFO: batch_size : 64
|
||||
[2022/04/18 12:43:50] root INFO: drop_last : False
|
||||
[2022/04/18 12:43:50] root INFO: name : DistributedBatchSampler
|
||||
[2022/04/18 12:43:50] root INFO: shuffle : False
|
||||
[2022/04/18 12:43:50] root INFO: Train :
|
||||
[2022/04/18 12:43:50] root INFO: dataset :
|
||||
[2022/04/18 12:43:50] root INFO: cls_label_path : ./dataset/CIFAR100/train_list.txt
|
||||
[2022/04/18 12:43:50] root INFO: image_root : ./dataset/CIFAR100/
|
||||
[2022/04/18 12:43:50] root INFO: name : ImageNetDataset
|
||||
[2022/04/18 12:43:50] root INFO: transform_ops :
|
||||
[2022/04/18 12:43:50] root INFO: DecodeImage :
|
||||
[2022/04/18 12:43:50] root INFO: channel_first : False
|
||||
[2022/04/18 12:43:50] root INFO: to_rgb : True
|
||||
[2022/04/18 12:43:50] root INFO: RandCropImage :
|
||||
[2022/04/18 12:43:50] root INFO: size : 32
|
||||
[2022/04/18 12:43:50] root INFO: RandFlipImage :
|
||||
[2022/04/18 12:43:50] root INFO: flip_code : 1
|
||||
[2022/04/18 12:43:50] root INFO: NormalizeImage :
|
||||
[2022/04/18 12:43:50] root INFO: mean : [0.485, 0.456, 0.406]
|
||||
[2022/04/18 12:43:50] root INFO: order :
|
||||
[2022/04/18 12:43:50] root INFO: scale : 1.0/255.0
|
||||
[2022/04/18 12:43:50] root INFO: std : [0.229, 0.224, 0.225]
|
||||
[2022/04/18 12:43:50] root INFO: loader :
|
||||
[2022/04/18 12:43:50] root INFO: num_workers : 4
|
||||
[2022/04/18 12:43:50] root INFO: use_shared_memory : True
|
||||
[2022/04/18 12:43:50] root INFO: sampler :
|
||||
[2022/04/18 12:43:50] root INFO: batch_size : 64
|
||||
[2022/04/18 12:43:50] root INFO: drop_last : False
|
||||
[2022/04/18 12:43:50] root INFO: name : DistributedBatchSampler
|
||||
[2022/04/18 12:43:50] root INFO: shuffle : True
|
||||
[2022/04/18 12:43:50] root INFO: Global :
|
||||
[2022/04/18 12:43:50] root INFO: checkpoints : None
|
||||
[2022/04/18 12:43:50] root INFO: device : gpu
|
||||
[2022/04/18 12:43:50] root INFO: epochs : 100
|
||||
[2022/04/18 12:43:50] root INFO: eval_during_train : True
|
||||
[2022/04/18 12:43:50] root INFO: eval_interval : 1
|
||||
[2022/04/18 12:43:50] root INFO: image_shape : [3, 32, 32]
|
||||
[2022/04/18 12:43:50] root INFO: output_dir : output_CIFAR
|
||||
[2022/04/18 12:43:50] root INFO: pretrained_model : None
|
||||
[2022/04/18 12:43:50] root INFO: print_batch_step : 10
|
||||
[2022/04/18 12:43:50] root INFO: save_inference_dir : ./inference
|
||||
[2022/04/18 12:43:50] root INFO: save_interval : 1
|
||||
[2022/04/18 12:43:50] root INFO: use_visualdl : False
|
||||
[2022/04/18 12:43:50] root INFO: Infer :
|
||||
[2022/04/18 12:43:50] root INFO: PostProcess :
|
||||
[2022/04/18 12:43:50] root INFO: name : Topk
|
||||
[2022/04/18 12:43:50] root INFO: topk : 5
|
||||
[2022/04/18 12:43:50] root INFO: batch_size : 10
|
||||
[2022/04/18 12:43:50] root INFO: infer_imgs : docs/images/inference_deployment/whl_demo.jpg
|
||||
[2022/04/18 12:43:50] root INFO: transforms :
|
||||
[2022/04/18 12:43:50] root INFO: DecodeImage :
|
||||
[2022/04/18 12:43:50] root INFO: channel_first : False
|
||||
[2022/04/18 12:43:50] root INFO: to_rgb : True
|
||||
[2022/04/18 12:43:50] root INFO: ResizeImage :
|
||||
[2022/04/18 12:43:50] root INFO: resize_short : 36
|
||||
[2022/04/18 12:43:50] root INFO: CropImage :
|
||||
[2022/04/18 12:43:50] root INFO: size : 32
|
||||
[2022/04/18 12:43:50] root INFO: NormalizeImage :
|
||||
[2022/04/18 12:43:50] root INFO: mean : [0.485, 0.456, 0.406]
|
||||
[2022/04/18 12:43:50] root INFO: order :
|
||||
[2022/04/18 12:43:50] root INFO: scale : 1.0/255.0
|
||||
[2022/04/18 12:43:50] root INFO: std : [0.229, 0.224, 0.225]
|
||||
[2022/04/18 12:43:50] root INFO: ToCHWImage : None
|
||||
[2022/04/18 12:43:50] root INFO: Loss :
|
||||
[2022/04/18 12:43:50] root INFO: Eval :
|
||||
[2022/04/18 12:43:50] root INFO: CELoss :
|
||||
[2022/04/18 12:43:50] root INFO: weight : 1.0
|
||||
[2022/04/18 12:43:50] root INFO: Train :
|
||||
[2022/04/18 12:43:50] root INFO: CELoss :
|
||||
[2022/04/18 12:43:50] root INFO: weight : 1.0
|
||||
[2022/04/18 12:43:50] root INFO: Metric :
|
||||
[2022/04/18 12:43:50] root INFO: Eval :
|
||||
[2022/04/18 12:43:50] root INFO: TopkAcc :
|
||||
[2022/04/18 12:43:50] root INFO: topk : [1, 5]
|
||||
[2022/04/18 12:43:50] root INFO: Train :
|
||||
[2022/04/18 12:43:50] root INFO: TopkAcc :
|
||||
[2022/04/18 12:43:50] root INFO: topk : [1, 5]
|
||||
[2022/04/18 12:43:50] root INFO: Optimizer :
|
||||
[2022/04/18 12:43:50] root INFO: lr :
|
||||
[2022/04/18 12:43:50] root INFO: learning_rate : 0.04
|
||||
[2022/04/18 12:43:50] root INFO: name : Cosine
|
||||
[2022/04/18 12:43:50] root INFO: momentum : 0.9
|
||||
[2022/04/18 12:43:50] root INFO: name : Momentum
|
||||
[2022/04/18 12:43:50] root INFO: regularizer :
|
||||
[2022/04/18 12:43:50] root INFO: coeff : 0.0001
|
||||
[2022/04/18 12:43:50] root INFO: name : L2
|
||||
[2022/04/18 12:43:50] root INFO: profiler_options : None
|
||||
[2022/04/18 12:47:26] root INFO:
|
||||
===========================================================
|
||||
== PaddleClas is powered by PaddlePaddle ! ==
|
||||
===========================================================
|
||||
== ==
|
||||
== For more info please go to the following website. ==
|
||||
== ==
|
||||
== https://github.com/PaddlePaddle/PaddleClas ==
|
||||
===========================================================
|
||||
|
||||
[2022/04/18 12:47:26] root INFO: Arch :
|
||||
[2022/04/18 12:47:26] root INFO: class_num : 100
|
||||
[2022/04/18 12:47:26] root INFO: name : ResNet50_vd
|
||||
[2022/04/18 12:47:26] root INFO: DataLoader :
|
||||
[2022/04/18 12:47:26] root INFO: Eval :
|
||||
[2022/04/18 12:47:26] root INFO: dataset :
|
||||
[2022/04/18 12:47:26] root INFO: cls_label_path : ./dataset/CIFAR100/test_list.txt
|
||||
[2022/04/18 12:47:26] root INFO: image_root : ./dataset/CIFAR100/
|
||||
[2022/04/18 12:47:26] root INFO: name : ImageNetDataset
|
||||
[2022/04/18 12:47:26] root INFO: transform_ops :
|
||||
[2022/04/18 12:47:26] root INFO: DecodeImage :
|
||||
[2022/04/18 12:47:26] root INFO: channel_first : False
|
||||
[2022/04/18 12:47:26] root INFO: to_rgb : True
|
||||
[2022/04/18 12:47:26] root INFO: ResizeImage :
|
||||
[2022/04/18 12:47:26] root INFO: resize_short : 36
|
||||
[2022/04/18 12:47:26] root INFO: CropImage :
|
||||
[2022/04/18 12:47:26] root INFO: size : 32
|
||||
[2022/04/18 12:47:26] root INFO: NormalizeImage :
|
||||
[2022/04/18 12:47:26] root INFO: mean : [0.485, 0.456, 0.406]
|
||||
[2022/04/18 12:47:26] root INFO: order :
|
||||
[2022/04/18 12:47:26] root INFO: scale : 1.0/255.0
|
||||
[2022/04/18 12:47:26] root INFO: std : [0.229, 0.224, 0.225]
|
||||
[2022/04/18 12:47:26] root INFO: loader :
|
||||
[2022/04/18 12:47:26] root INFO: num_workers : 4
|
||||
[2022/04/18 12:47:26] root INFO: use_shared_memory : True
|
||||
[2022/04/18 12:47:26] root INFO: sampler :
|
||||
[2022/04/18 12:47:26] root INFO: batch_size : 64
|
||||
[2022/04/18 12:47:26] root INFO: drop_last : False
|
||||
[2022/04/18 12:47:26] root INFO: name : DistributedBatchSampler
|
||||
[2022/04/18 12:47:26] root INFO: shuffle : False
|
||||
[2022/04/18 12:47:26] root INFO: Train :
|
||||
[2022/04/18 12:47:26] root INFO: dataset :
|
||||
[2022/04/18 12:47:26] root INFO: cls_label_path : ./dataset/CIFAR100/train_list.txt
|
||||
[2022/04/18 12:47:26] root INFO: image_root : ./dataset/CIFAR100/
|
||||
[2022/04/18 12:47:26] root INFO: name : ImageNetDataset
|
||||
[2022/04/18 12:47:26] root INFO: transform_ops :
|
||||
[2022/04/18 12:47:26] root INFO: DecodeImage :
|
||||
[2022/04/18 12:47:26] root INFO: channel_first : False
|
||||
[2022/04/18 12:47:26] root INFO: to_rgb : True
|
||||
[2022/04/18 12:47:26] root INFO: RandCropImage :
|
||||
[2022/04/18 12:47:26] root INFO: size : 32
|
||||
[2022/04/18 12:47:26] root INFO: RandFlipImage :
|
||||
[2022/04/18 12:47:26] root INFO: flip_code : 1
|
||||
[2022/04/18 12:47:26] root INFO: NormalizeImage :
|
||||
[2022/04/18 12:47:26] root INFO: mean : [0.485, 0.456, 0.406]
|
||||
[2022/04/18 12:47:26] root INFO: order :
|
||||
[2022/04/18 12:47:26] root INFO: scale : 1.0/255.0
|
||||
[2022/04/18 12:47:26] root INFO: std : [0.229, 0.224, 0.225]
|
||||
[2022/04/18 12:47:26] root INFO: loader :
|
||||
[2022/04/18 12:47:26] root INFO: num_workers : 4
|
||||
[2022/04/18 12:47:26] root INFO: use_shared_memory : True
|
||||
[2022/04/18 12:47:26] root INFO: sampler :
|
||||
[2022/04/18 12:47:26] root INFO: batch_size : 64
|
||||
[2022/04/18 12:47:26] root INFO: drop_last : False
|
||||
[2022/04/18 12:47:26] root INFO: name : DistributedBatchSampler
|
||||
[2022/04/18 12:47:26] root INFO: shuffle : True
|
||||
[2022/04/18 12:47:26] root INFO: Global :
|
||||
[2022/04/18 12:47:26] root INFO: checkpoints : None
|
||||
[2022/04/18 12:47:26] root INFO: device : gpu
|
||||
[2022/04/18 12:47:26] root INFO: epochs : 100
|
||||
[2022/04/18 12:47:26] root INFO: eval_during_train : True
|
||||
[2022/04/18 12:47:26] root INFO: eval_interval : 1
|
||||
[2022/04/18 12:47:26] root INFO: image_shape : [3, 32, 32]
|
||||
[2022/04/18 12:47:26] root INFO: output_dir : output_CIFAR
|
||||
[2022/04/18 12:47:26] root INFO: pretrained_model : None
|
||||
[2022/04/18 12:47:26] root INFO: print_batch_step : 10
|
||||
[2022/04/18 12:47:26] root INFO: save_inference_dir : ./inference
|
||||
[2022/04/18 12:47:26] root INFO: save_interval : 1
|
||||
[2022/04/18 12:47:26] root INFO: use_gpu : False
|
||||
[2022/04/18 12:47:26] root INFO: use_visualdl : False
|
||||
[2022/04/18 12:47:26] root INFO: Infer :
|
||||
[2022/04/18 12:47:26] root INFO: PostProcess :
|
||||
[2022/04/18 12:47:26] root INFO: name : Topk
|
||||
[2022/04/18 12:47:26] root INFO: topk : 5
|
||||
[2022/04/18 12:47:26] root INFO: batch_size : 10
|
||||
[2022/04/18 12:47:26] root INFO: infer_imgs : docs/images/inference_deployment/whl_demo.jpg
|
||||
[2022/04/18 12:47:26] root INFO: transforms :
|
||||
[2022/04/18 12:47:26] root INFO: DecodeImage :
|
||||
[2022/04/18 12:47:26] root INFO: channel_first : False
|
||||
[2022/04/18 12:47:26] root INFO: to_rgb : True
|
||||
[2022/04/18 12:47:26] root INFO: ResizeImage :
|
||||
[2022/04/18 12:47:26] root INFO: resize_short : 36
|
||||
[2022/04/18 12:47:26] root INFO: CropImage :
|
||||
[2022/04/18 12:47:26] root INFO: size : 32
|
||||
[2022/04/18 12:47:26] root INFO: NormalizeImage :
|
||||
[2022/04/18 12:47:26] root INFO: mean : [0.485, 0.456, 0.406]
|
||||
[2022/04/18 12:47:26] root INFO: order :
|
||||
[2022/04/18 12:47:26] root INFO: scale : 1.0/255.0
|
||||
[2022/04/18 12:47:26] root INFO: std : [0.229, 0.224, 0.225]
|
||||
[2022/04/18 12:47:26] root INFO: ToCHWImage : None
|
||||
[2022/04/18 12:47:26] root INFO: Loss :
|
||||
[2022/04/18 12:47:26] root INFO: Eval :
|
||||
[2022/04/18 12:47:26] root INFO: CELoss :
|
||||
[2022/04/18 12:47:26] root INFO: weight : 1.0
|
||||
[2022/04/18 12:47:26] root INFO: Train :
|
||||
[2022/04/18 12:47:26] root INFO: CELoss :
|
||||
[2022/04/18 12:47:26] root INFO: weight : 1.0
|
||||
[2022/04/18 12:47:26] root INFO: Metric :
|
||||
[2022/04/18 12:47:26] root INFO: Eval :
|
||||
[2022/04/18 12:47:26] root INFO: TopkAcc :
|
||||
[2022/04/18 12:47:26] root INFO: topk : [1, 5]
|
||||
[2022/04/18 12:47:26] root INFO: Train :
|
||||
[2022/04/18 12:47:26] root INFO: TopkAcc :
|
||||
[2022/04/18 12:47:26] root INFO: topk : [1, 5]
|
||||
[2022/04/18 12:47:26] root INFO: Optimizer :
|
||||
[2022/04/18 12:47:26] root INFO: lr :
|
||||
[2022/04/18 12:47:26] root INFO: learning_rate : 0.04
|
||||
[2022/04/18 12:47:26] root INFO: name : Cosine
|
||||
[2022/04/18 12:47:26] root INFO: momentum : 0.9
|
||||
[2022/04/18 12:47:26] root INFO: name : Momentum
|
||||
[2022/04/18 12:47:26] root INFO: regularizer :
|
||||
[2022/04/18 12:47:26] root INFO: coeff : 0.0001
|
||||
[2022/04/18 12:47:26] root INFO: name : L2
|
||||
[2022/04/18 12:47:26] root INFO: profiler_options : None
|
||||
[2022/04/18 12:49:17] root INFO:
|
||||
===========================================================
|
||||
== PaddleClas is powered by PaddlePaddle ! ==
|
||||
===========================================================
|
||||
== ==
|
||||
== For more info please go to the following website. ==
|
||||
== ==
|
||||
== https://github.com/PaddlePaddle/PaddleClas ==
|
||||
===========================================================
|
||||
|
||||
[2022/04/18 12:49:17] root INFO: Arch :
|
||||
[2022/04/18 12:49:17] root INFO: class_num : 100
|
||||
[2022/04/18 12:49:17] root INFO: name : ResNet50_vd
|
||||
[2022/04/18 12:49:17] root INFO: DataLoader :
|
||||
[2022/04/18 12:49:17] root INFO: Eval :
|
||||
[2022/04/18 12:49:17] root INFO: dataset :
|
||||
[2022/04/18 12:49:17] root INFO: cls_label_path : ./dataset/CIFAR100/test_list.txt
|
||||
[2022/04/18 12:49:17] root INFO: image_root : ./dataset/CIFAR100/
|
||||
[2022/04/18 12:49:17] root INFO: name : ImageNetDataset
|
||||
[2022/04/18 12:49:17] root INFO: transform_ops :
|
||||
[2022/04/18 12:49:17] root INFO: DecodeImage :
|
||||
[2022/04/18 12:49:17] root INFO: channel_first : False
|
||||
[2022/04/18 12:49:17] root INFO: to_rgb : True
|
||||
[2022/04/18 12:49:17] root INFO: ResizeImage :
|
||||
[2022/04/18 12:49:17] root INFO: resize_short : 36
|
||||
[2022/04/18 12:49:17] root INFO: CropImage :
|
||||
[2022/04/18 12:49:17] root INFO: size : 32
|
||||
[2022/04/18 12:49:17] root INFO: NormalizeImage :
|
||||
[2022/04/18 12:49:17] root INFO: mean : [0.485, 0.456, 0.406]
|
||||
[2022/04/18 12:49:17] root INFO: order :
|
||||
[2022/04/18 12:49:17] root INFO: scale : 1.0/255.0
|
||||
[2022/04/18 12:49:17] root INFO: std : [0.229, 0.224, 0.225]
|
||||
[2022/04/18 12:49:17] root INFO: loader :
|
||||
[2022/04/18 12:49:17] root INFO: num_workers : 4
|
||||
[2022/04/18 12:49:17] root INFO: use_shared_memory : True
|
||||
[2022/04/18 12:49:17] root INFO: sampler :
|
||||
[2022/04/18 12:49:17] root INFO: batch_size : 64
|
||||
[2022/04/18 12:49:17] root INFO: drop_last : False
|
||||
[2022/04/18 12:49:17] root INFO: name : DistributedBatchSampler
|
||||
[2022/04/18 12:49:17] root INFO: shuffle : False
|
||||
[2022/04/18 12:49:17] root INFO: Train :
|
||||
[2022/04/18 12:49:17] root INFO: dataset :
|
||||
[2022/04/18 12:49:17] root INFO: cls_label_path : ./dataset/CIFAR100/train_list.txt
|
||||
[2022/04/18 12:49:17] root INFO: image_root : ./dataset/CIFAR100/
|
||||
[2022/04/18 12:49:17] root INFO: name : ImageNetDataset
|
||||
[2022/04/18 12:49:17] root INFO: transform_ops :
|
||||
[2022/04/18 12:49:17] root INFO: DecodeImage :
|
||||
[2022/04/18 12:49:17] root INFO: channel_first : False
|
||||
[2022/04/18 12:49:17] root INFO: to_rgb : True
|
||||
[2022/04/18 12:49:17] root INFO: RandCropImage :
|
||||
[2022/04/18 12:49:17] root INFO: size : 32
|
||||
[2022/04/18 12:49:17] root INFO: RandFlipImage :
|
||||
[2022/04/18 12:49:17] root INFO: flip_code : 1
|
||||
[2022/04/18 12:49:17] root INFO: NormalizeImage :
|
||||
[2022/04/18 12:49:17] root INFO: mean : [0.485, 0.456, 0.406]
|
||||
[2022/04/18 12:49:17] root INFO: order :
|
||||
[2022/04/18 12:49:17] root INFO: scale : 1.0/255.0
|
||||
[2022/04/18 12:49:17] root INFO: std : [0.229, 0.224, 0.225]
|
||||
[2022/04/18 12:49:17] root INFO: loader :
|
||||
[2022/04/18 12:49:17] root INFO: num_workers : 4
|
||||
[2022/04/18 12:49:17] root INFO: use_shared_memory : True
|
||||
[2022/04/18 12:49:18] root INFO: sampler :
|
||||
[2022/04/18 12:49:18] root INFO: batch_size : 64
|
||||
[2022/04/18 12:49:18] root INFO: drop_last : False
|
||||
[2022/04/18 12:49:18] root INFO: name : DistributedBatchSampler
|
||||
[2022/04/18 12:49:18] root INFO: shuffle : True
|
||||
[2022/04/18 12:49:18] root INFO: Global :
|
||||
[2022/04/18 12:49:18] root INFO: checkpoints : None
|
||||
[2022/04/18 12:49:18] root INFO: device : cpu
|
||||
[2022/04/18 12:49:18] root INFO: epochs : 100
|
||||
[2022/04/18 12:49:18] root INFO: eval_during_train : True
|
||||
[2022/04/18 12:49:18] root INFO: eval_interval : 1
|
||||
[2022/04/18 12:49:18] root INFO: image_shape : [3, 32, 32]
|
||||
[2022/04/18 12:49:18] root INFO: output_dir : output_CIFAR
|
||||
[2022/04/18 12:49:18] root INFO: pretrained_model : None
|
||||
[2022/04/18 12:49:18] root INFO: print_batch_step : 10
|
||||
[2022/04/18 12:49:18] root INFO: save_inference_dir : ./inference
|
||||
[2022/04/18 12:49:18] root INFO: save_interval : 1
|
||||
[2022/04/18 12:49:18] root INFO: use_visualdl : False
|
||||
[2022/04/18 12:49:18] root INFO: Infer :
|
||||
[2022/04/18 12:49:18] root INFO: PostProcess :
|
||||
[2022/04/18 12:49:18] root INFO: name : Topk
|
||||
[2022/04/18 12:49:18] root INFO: topk : 5
|
||||
[2022/04/18 12:49:18] root INFO: batch_size : 10
|
||||
[2022/04/18 12:49:18] root INFO: infer_imgs : docs/images/inference_deployment/whl_demo.jpg
|
||||
[2022/04/18 12:49:18] root INFO: transforms :
|
||||
[2022/04/18 12:49:18] root INFO: DecodeImage :
|
||||
[2022/04/18 12:49:18] root INFO: channel_first : False
|
||||
[2022/04/18 12:49:18] root INFO: to_rgb : True
|
||||
[2022/04/18 12:49:18] root INFO: ResizeImage :
|
||||
[2022/04/18 12:49:18] root INFO: resize_short : 36
|
||||
[2022/04/18 12:49:18] root INFO: CropImage :
|
||||
[2022/04/18 12:49:18] root INFO: size : 32
|
||||
[2022/04/18 12:49:18] root INFO: NormalizeImage :
|
||||
[2022/04/18 12:49:18] root INFO: mean : [0.485, 0.456, 0.406]
|
||||
[2022/04/18 12:49:18] root INFO: order :
|
||||
[2022/04/18 12:49:18] root INFO: scale : 1.0/255.0
|
||||
[2022/04/18 12:49:18] root INFO: std : [0.229, 0.224, 0.225]
|
||||
[2022/04/18 12:49:18] root INFO: ToCHWImage : None
|
||||
[2022/04/18 12:49:18] root INFO: Loss :
|
||||
[2022/04/18 12:49:18] root INFO: Eval :
|
||||
[2022/04/18 12:49:18] root INFO: CELoss :
|
||||
[2022/04/18 12:49:18] root INFO: weight : 1.0
|
||||
[2022/04/18 12:49:18] root INFO: Train :
|
||||
[2022/04/18 12:49:18] root INFO: CELoss :
|
||||
[2022/04/18 12:49:18] root INFO: weight : 1.0
|
||||
[2022/04/18 12:49:18] root INFO: Metric :
|
||||
[2022/04/18 12:49:18] root INFO: Eval :
|
||||
[2022/04/18 12:49:18] root INFO: TopkAcc :
|
||||
[2022/04/18 12:49:18] root INFO: topk : [1, 5]
|
||||
[2022/04/18 12:49:18] root INFO: Train :
|
||||
[2022/04/18 12:49:18] root INFO: TopkAcc :
|
||||
[2022/04/18 12:49:18] root INFO: topk : [1, 5]
|
||||
[2022/04/18 12:49:18] root INFO: Optimizer :
|
||||
[2022/04/18 12:49:18] root INFO: lr :
|
||||
[2022/04/18 12:49:18] root INFO: learning_rate : 0.04
|
||||
[2022/04/18 12:49:18] root INFO: name : Cosine
|
||||
[2022/04/18 12:49:18] root INFO: momentum : 0.9
|
||||
[2022/04/18 12:49:18] root INFO: name : Momentum
|
||||
[2022/04/18 12:49:18] root INFO: regularizer :
|
||||
[2022/04/18 12:49:18] root INFO: coeff : 0.0001
|
||||
[2022/04/18 12:49:18] root INFO: name : L2
|
||||
[2022/04/18 12:49:18] root INFO: profiler_options : None
|
||||
[2022/04/18 12:49:18] root INFO: train with paddle 2.2.2 and device CPUPlace
|
||||
[2022/04/18 12:49:26] root WARNING: The training strategy in config files provided by PaddleClas is based on 4 gpus. But the number of gpus is 1 in current training. Please modify the stategy (learning rate, batch size and so on) if use config files in PaddleClas to train.
|
||||
[2022/04/18 12:49:28] root INFO: [Train][Epoch 1/100][Iter: 0/782]lr: 0.04000, top1: 0.00000, top5: 0.00000, CELoss: 5.06183, loss: 5.06183, batch_cost: 2.00989s, reader_cost: 0.05953, ips: 31.84257 images/sec, eta: 1 day, 19:39:33
|
||||
[2022/04/18 12:49:45] root INFO: [Train][Epoch 1/100][Iter: 10/782]lr: 0.04000, top1: 0.00568, top5: 0.04261, CELoss: 8.25212, loss: 8.25212, batch_cost: 1.73383s, reader_cost: 0.00000, ips: 36.91248 images/sec, eta: 1 day, 13:39:28
|
||||
[2022/04/18 12:50:44] root INFO:
|
||||
===========================================================
|
||||
== PaddleClas is powered by PaddlePaddle ! ==
|
||||
===========================================================
|
||||
== ==
|
||||
== For more info please go to the following website. ==
|
||||
== ==
|
||||
== https://github.com/PaddlePaddle/PaddleClas ==
|
||||
===========================================================
|
||||
|
||||
[2022/04/18 12:50:44] root INFO: Arch :
|
||||
[2022/04/18 12:50:44] root INFO: class_num : 100
|
||||
[2022/04/18 12:50:44] root INFO: name : ResNet50_vd
|
||||
[2022/04/18 12:50:44] root INFO: DataLoader :
|
||||
[2022/04/18 12:50:44] root INFO: Eval :
|
||||
[2022/04/18 12:50:44] root INFO: dataset :
|
||||
[2022/04/18 12:50:44] root INFO: cls_label_path : ./dataset/CIFAR100/test_list.txt
|
||||
[2022/04/18 12:50:44] root INFO: image_root : ./dataset/CIFAR100/
|
||||
[2022/04/18 12:50:44] root INFO: name : ImageNetDataset
|
||||
[2022/04/18 12:50:44] root INFO: transform_ops :
|
||||
[2022/04/18 12:50:44] root INFO: DecodeImage :
|
||||
[2022/04/18 12:50:44] root INFO: channel_first : False
|
||||
[2022/04/18 12:50:44] root INFO: to_rgb : True
|
||||
[2022/04/18 12:50:44] root INFO: ResizeImage :
|
||||
[2022/04/18 12:50:44] root INFO: resize_short : 36
|
||||
[2022/04/18 12:50:44] root INFO: CropImage :
|
||||
[2022/04/18 12:50:44] root INFO: size : 32
|
||||
[2022/04/18 12:50:44] root INFO: NormalizeImage :
|
||||
[2022/04/18 12:50:44] root INFO: mean : [0.485, 0.456, 0.406]
|
||||
[2022/04/18 12:50:44] root INFO: order :
|
||||
[2022/04/18 12:50:44] root INFO: scale : 1.0/255.0
|
||||
[2022/04/18 12:50:44] root INFO: std : [0.229, 0.224, 0.225]
|
||||
[2022/04/18 12:50:44] root INFO: loader :
|
||||
[2022/04/18 12:50:44] root INFO: num_workers : 4
|
||||
[2022/04/18 12:50:44] root INFO: use_shared_memory : True
|
||||
[2022/04/18 12:50:44] root INFO: sampler :
|
||||
[2022/04/18 12:50:44] root INFO: batch_size : 64
|
||||
[2022/04/18 12:50:44] root INFO: drop_last : False
|
||||
[2022/04/18 12:50:44] root INFO: name : DistributedBatchSampler
|
||||
[2022/04/18 12:50:44] root INFO: shuffle : False
|
||||
[2022/04/18 12:50:44] root INFO: Train :
|
||||
[2022/04/18 12:50:44] root INFO: dataset :
|
||||
[2022/04/18 12:50:44] root INFO: cls_label_path : ./dataset/CIFAR100/train_list.txt
|
||||
[2022/04/18 12:50:44] root INFO: image_root : ./dataset/CIFAR100/
|
||||
[2022/04/18 12:50:44] root INFO: name : ImageNetDataset
|
||||
[2022/04/18 12:50:44] root INFO: transform_ops :
|
||||
[2022/04/18 12:50:44] root INFO: DecodeImage :
|
||||
[2022/04/18 12:50:44] root INFO: channel_first : False
|
||||
[2022/04/18 12:50:44] root INFO: to_rgb : True
|
||||
[2022/04/18 12:50:44] root INFO: RandCropImage :
|
||||
[2022/04/18 12:50:44] root INFO: size : 32
|
||||
[2022/04/18 12:50:44] root INFO: RandFlipImage :
|
||||
[2022/04/18 12:50:44] root INFO: flip_code : 1
|
||||
[2022/04/18 12:50:44] root INFO: NormalizeImage :
|
||||
[2022/04/18 12:50:44] root INFO: mean : [0.485, 0.456, 0.406]
|
||||
[2022/04/18 12:50:44] root INFO: order :
|
||||
[2022/04/18 12:50:44] root INFO: scale : 1.0/255.0
|
||||
[2022/04/18 12:50:44] root INFO: std : [0.229, 0.224, 0.225]
|
||||
[2022/04/18 12:50:44] root INFO: loader :
|
||||
[2022/04/18 12:50:44] root INFO: num_workers : 4
|
||||
[2022/04/18 12:50:44] root INFO: use_shared_memory : True
|
||||
[2022/04/18 12:50:44] root INFO: sampler :
|
||||
[2022/04/18 12:50:44] root INFO: batch_size : 64
|
||||
[2022/04/18 12:50:44] root INFO: drop_last : False
|
||||
[2022/04/18 12:50:44] root INFO: name : DistributedBatchSampler
|
||||
[2022/04/18 12:50:44] root INFO: shuffle : True
|
||||
[2022/04/18 12:50:44] root INFO: Global :
|
||||
[2022/04/18 12:50:44] root INFO: checkpoints : None
|
||||
[2022/04/18 12:50:44] root INFO: device : gpu
|
||||
[2022/04/18 12:50:44] root INFO: epochs : 100
|
||||
[2022/04/18 12:50:44] root INFO: eval_during_train : True
|
||||
[2022/04/18 12:50:44] root INFO: eval_interval : 1
|
||||
[2022/04/18 12:50:44] root INFO: image_shape : [3, 32, 32]
|
||||
[2022/04/18 12:50:44] root INFO: output_dir : output_CIFAR
|
||||
[2022/04/18 12:50:44] root INFO: pretrained_model : None
|
||||
[2022/04/18 12:50:44] root INFO: print_batch_step : 10
|
||||
[2022/04/18 12:50:44] root INFO: save_inference_dir : ./inference
|
||||
[2022/04/18 12:50:44] root INFO: save_interval : 1
|
||||
[2022/04/18 12:50:44] root INFO: use_visualdl : False
|
||||
[2022/04/18 12:50:44] root INFO: Infer :
|
||||
[2022/04/18 12:50:44] root INFO: PostProcess :
|
||||
[2022/04/18 12:50:44] root INFO: name : Topk
|
||||
[2022/04/18 12:50:44] root INFO: topk : 5
|
||||
[2022/04/18 12:50:44] root INFO: batch_size : 10
|
||||
[2022/04/18 12:50:44] root INFO: infer_imgs : docs/images/inference_deployment/whl_demo.jpg
|
||||
[2022/04/18 12:50:44] root INFO: transforms :
|
||||
[2022/04/18 12:50:44] root INFO: DecodeImage :
|
||||
[2022/04/18 12:50:44] root INFO: channel_first : False
|
||||
[2022/04/18 12:50:44] root INFO: to_rgb : True
|
||||
[2022/04/18 12:50:44] root INFO: ResizeImage :
|
||||
[2022/04/18 12:50:44] root INFO: resize_short : 36
|
||||
[2022/04/18 12:50:44] root INFO: CropImage :
|
||||
[2022/04/18 12:50:44] root INFO: size : 32
|
||||
[2022/04/18 12:50:44] root INFO: NormalizeImage :
|
||||
[2022/04/18 12:50:44] root INFO: mean : [0.485, 0.456, 0.406]
|
||||
[2022/04/18 12:50:44] root INFO: order :
|
||||
[2022/04/18 12:50:44] root INFO: scale : 1.0/255.0
|
||||
[2022/04/18 12:50:44] root INFO: std : [0.229, 0.224, 0.225]
|
||||
[2022/04/18 12:50:44] root INFO: ToCHWImage : None
|
||||
[2022/04/18 12:50:44] root INFO: Loss :
|
||||
[2022/04/18 12:50:44] root INFO: Eval :
|
||||
[2022/04/18 12:50:44] root INFO: CELoss :
|
||||
[2022/04/18 12:50:44] root INFO: weight : 1.0
|
||||
[2022/04/18 12:50:44] root INFO: Train :
|
||||
[2022/04/18 12:50:44] root INFO: CELoss :
|
||||
[2022/04/18 12:50:44] root INFO: weight : 1.0
|
||||
[2022/04/18 12:50:44] root INFO: Metric :
|
||||
[2022/04/18 12:50:44] root INFO: Eval :
|
||||
[2022/04/18 12:50:44] root INFO: TopkAcc :
|
||||
[2022/04/18 12:50:44] root INFO: topk : [1, 5]
|
||||
[2022/04/18 12:50:44] root INFO: Train :
|
||||
[2022/04/18 12:50:44] root INFO: TopkAcc :
|
||||
[2022/04/18 12:50:44] root INFO: topk : [1, 5]
|
||||
[2022/04/18 12:50:44] root INFO: Optimizer :
|
||||
[2022/04/18 12:50:44] root INFO: lr :
|
||||
[2022/04/18 12:50:44] root INFO: learning_rate : 0.04
|
||||
[2022/04/18 12:50:44] root INFO: name : Cosine
|
||||
[2022/04/18 12:50:44] root INFO: momentum : 0.9
|
||||
[2022/04/18 12:50:44] root INFO: name : Momentum
|
||||
[2022/04/18 12:50:44] root INFO: regularizer :
|
||||
[2022/04/18 12:50:44] root INFO: coeff : 0.0001
|
||||
[2022/04/18 12:50:44] root INFO: name : L2
|
||||
[2022/04/18 12:50:44] root INFO: profiler_options : None
|
||||
@ -0,0 +1,20 @@
|
||||
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from . import optimizer
|
||||
|
||||
from .arch import *
|
||||
from .optimizer import *
|
||||
from .data import *
|
||||
from .utils import *
|
||||
Binary file not shown.
@ -0,0 +1,134 @@
|
||||
#copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
|
||||
#
|
||||
#Licensed under the Apache License, Version 2.0 (the "License");
|
||||
#you may not use this file except in compliance with the License.
|
||||
#You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
#Unless required by applicable law or agreed to in writing, software
|
||||
#distributed under the License is distributed on an "AS IS" BASIS,
|
||||
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
#See the License for the specific language governing permissions and
|
||||
#limitations under the License.
|
||||
|
||||
import copy
|
||||
import importlib
|
||||
|
||||
import paddle.nn as nn
|
||||
from paddle.jit import to_static
|
||||
from paddle.static import InputSpec
|
||||
|
||||
from . import backbone, gears
|
||||
from .backbone import *
|
||||
from .gears import build_gear
|
||||
from .utils import *
|
||||
from ppcls.arch.backbone.base.theseus_layer import TheseusLayer
|
||||
from ppcls.utils import logger
|
||||
from ppcls.utils.save_load import load_dygraph_pretrain
|
||||
from ppcls.arch.slim import prune_model, quantize_model
|
||||
|
||||
__all__ = ["build_model", "RecModel", "DistillationModel"]
|
||||
|
||||
|
||||
def build_model(config):
|
||||
arch_config = copy.deepcopy(config["Arch"])
|
||||
model_type = arch_config.pop("name")
|
||||
mod = importlib.import_module(__name__)
|
||||
arch = getattr(mod, model_type)(**arch_config)
|
||||
if isinstance(arch, TheseusLayer):
|
||||
prune_model(config, arch)
|
||||
quantize_model(config, arch)
|
||||
return arch
|
||||
|
||||
|
||||
def apply_to_static(config, model):
|
||||
support_to_static = config['Global'].get('to_static', False)
|
||||
|
||||
if support_to_static:
|
||||
specs = None
|
||||
if 'image_shape' in config['Global']:
|
||||
specs = [InputSpec([None] + config['Global']['image_shape'])]
|
||||
model = to_static(model, input_spec=specs)
|
||||
logger.info("Successfully to apply @to_static with specs: {}".format(
|
||||
specs))
|
||||
return model
|
||||
|
||||
|
||||
class RecModel(TheseusLayer):
|
||||
def __init__(self, **config):
|
||||
super().__init__()
|
||||
backbone_config = config["Backbone"]
|
||||
backbone_name = backbone_config.pop("name")
|
||||
self.backbone = eval(backbone_name)(**backbone_config)
|
||||
if "BackboneStopLayer" in config:
|
||||
backbone_stop_layer = config["BackboneStopLayer"]["name"]
|
||||
self.backbone.stop_after(backbone_stop_layer)
|
||||
|
||||
if "Neck" in config:
|
||||
self.neck = build_gear(config["Neck"])
|
||||
else:
|
||||
self.neck = None
|
||||
|
||||
if "Head" in config:
|
||||
self.head = build_gear(config["Head"])
|
||||
else:
|
||||
self.head = None
|
||||
|
||||
def forward(self, x, label=None):
|
||||
out = dict()
|
||||
x = self.backbone(x)
|
||||
out["backbone"] = x
|
||||
if self.neck is not None:
|
||||
x = self.neck(x)
|
||||
out["neck"] = x
|
||||
out["features"] = x
|
||||
if self.head is not None:
|
||||
y = self.head(x, label)
|
||||
out["logits"] = y
|
||||
return out
|
||||
|
||||
|
||||
class DistillationModel(nn.Layer):
|
||||
def __init__(self,
|
||||
models=None,
|
||||
pretrained_list=None,
|
||||
freeze_params_list=None,
|
||||
**kargs):
|
||||
super().__init__()
|
||||
assert isinstance(models, list)
|
||||
self.model_list = []
|
||||
self.model_name_list = []
|
||||
if pretrained_list is not None:
|
||||
assert len(pretrained_list) == len(models)
|
||||
|
||||
if freeze_params_list is None:
|
||||
freeze_params_list = [False] * len(models)
|
||||
assert len(freeze_params_list) == len(models)
|
||||
for idx, model_config in enumerate(models):
|
||||
assert len(model_config) == 1
|
||||
key = list(model_config.keys())[0]
|
||||
model_config = model_config[key]
|
||||
model_name = model_config.pop("name")
|
||||
model = eval(model_name)(**model_config)
|
||||
|
||||
if freeze_params_list[idx]:
|
||||
for param in model.parameters():
|
||||
param.trainable = False
|
||||
self.model_list.append(self.add_sublayer(key, model))
|
||||
self.model_name_list.append(key)
|
||||
|
||||
if pretrained_list is not None:
|
||||
for idx, pretrained in enumerate(pretrained_list):
|
||||
if pretrained is not None:
|
||||
load_dygraph_pretrain(
|
||||
self.model_name_list[idx], path=pretrained)
|
||||
|
||||
def forward(self, x, label=None):
|
||||
result_dict = dict()
|
||||
for idx, model_name in enumerate(self.model_name_list):
|
||||
if label is None:
|
||||
result_dict[model_name] = self.model_list[idx](x)
|
||||
else:
|
||||
result_dict[model_name] = self.model_list[idx](x, label)
|
||||
return result_dict
|
||||
Binary file not shown.
Binary file not shown.
@ -0,0 +1,83 @@
|
||||
# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import sys
|
||||
import inspect
|
||||
|
||||
from ppcls.arch.backbone.legendary_models.mobilenet_v1 import MobileNetV1_x0_25, MobileNetV1_x0_5, MobileNetV1_x0_75, MobileNetV1
|
||||
from ppcls.arch.backbone.legendary_models.mobilenet_v3 import MobileNetV3_small_x0_35, MobileNetV3_small_x0_5, MobileNetV3_small_x0_75, MobileNetV3_small_x1_0, MobileNetV3_small_x1_25, MobileNetV3_large_x0_35, MobileNetV3_large_x0_5, MobileNetV3_large_x0_75, MobileNetV3_large_x1_0, MobileNetV3_large_x1_25
|
||||
from ppcls.arch.backbone.legendary_models.resnet import ResNet18, ResNet18_vd, ResNet34, ResNet34_vd, ResNet50, ResNet50_vd, ResNet101, ResNet101_vd, ResNet152, ResNet152_vd, ResNet200_vd
|
||||
from ppcls.arch.backbone.legendary_models.vgg import VGG11, VGG13, VGG16, VGG19
|
||||
from ppcls.arch.backbone.legendary_models.inception_v3 import InceptionV3
|
||||
from ppcls.arch.backbone.legendary_models.hrnet import HRNet_W18_C, HRNet_W30_C, HRNet_W32_C, HRNet_W40_C, HRNet_W44_C, HRNet_W48_C, HRNet_W60_C, HRNet_W64_C, SE_HRNet_W64_C
|
||||
from ppcls.arch.backbone.legendary_models.pp_lcnet import PPLCNet_x0_25, PPLCNet_x0_35, PPLCNet_x0_5, PPLCNet_x0_75, PPLCNet_x1_0, PPLCNet_x1_5, PPLCNet_x2_0, PPLCNet_x2_5
|
||||
from ppcls.arch.backbone.legendary_models.esnet import ESNet_x0_25, ESNet_x0_5, ESNet_x0_75, ESNet_x1_0
|
||||
|
||||
from ppcls.arch.backbone.model_zoo.resnet_vc import ResNet50_vc
|
||||
from ppcls.arch.backbone.model_zoo.resnext import ResNeXt50_32x4d, ResNeXt50_64x4d, ResNeXt101_32x4d, ResNeXt101_64x4d, ResNeXt152_32x4d, ResNeXt152_64x4d
|
||||
from ppcls.arch.backbone.model_zoo.resnext_vd import ResNeXt50_vd_32x4d, ResNeXt50_vd_64x4d, ResNeXt101_vd_32x4d, ResNeXt101_vd_64x4d, ResNeXt152_vd_32x4d, ResNeXt152_vd_64x4d
|
||||
from ppcls.arch.backbone.model_zoo.res2net import Res2Net50_26w_4s, Res2Net50_14w_8s
|
||||
from ppcls.arch.backbone.model_zoo.res2net_vd import Res2Net50_vd_26w_4s, Res2Net101_vd_26w_4s, Res2Net200_vd_26w_4s
|
||||
from ppcls.arch.backbone.model_zoo.se_resnet_vd import SE_ResNet18_vd, SE_ResNet34_vd, SE_ResNet50_vd
|
||||
from ppcls.arch.backbone.model_zoo.se_resnext_vd import SE_ResNeXt50_vd_32x4d, SE_ResNeXt50_vd_32x4d, SENet154_vd
|
||||
from ppcls.arch.backbone.model_zoo.se_resnext import SE_ResNeXt50_32x4d, SE_ResNeXt101_32x4d, SE_ResNeXt152_64x4d
|
||||
from ppcls.arch.backbone.model_zoo.dpn import DPN68, DPN92, DPN98, DPN107, DPN131
|
||||
from ppcls.arch.backbone.model_zoo.densenet import DenseNet121, DenseNet161, DenseNet169, DenseNet201, DenseNet264
|
||||
from ppcls.arch.backbone.model_zoo.efficientnet import EfficientNetB0, EfficientNetB1, EfficientNetB2, EfficientNetB3, EfficientNetB4, EfficientNetB5, EfficientNetB6, EfficientNetB7, EfficientNetB0_small
|
||||
from ppcls.arch.backbone.model_zoo.resnest import ResNeSt50_fast_1s1x64d, ResNeSt50, ResNeSt101
|
||||
from ppcls.arch.backbone.model_zoo.googlenet import GoogLeNet
|
||||
from ppcls.arch.backbone.model_zoo.mobilenet_v2 import MobileNetV2_x0_25, MobileNetV2_x0_5, MobileNetV2_x0_75, MobileNetV2, MobileNetV2_x1_5, MobileNetV2_x2_0
|
||||
from ppcls.arch.backbone.model_zoo.shufflenet_v2 import ShuffleNetV2_x0_25, ShuffleNetV2_x0_33, ShuffleNetV2_x0_5, ShuffleNetV2_x1_0, ShuffleNetV2_x1_5, ShuffleNetV2_x2_0, ShuffleNetV2_swish
|
||||
from ppcls.arch.backbone.model_zoo.ghostnet import GhostNet_x0_5, GhostNet_x1_0, GhostNet_x1_3
|
||||
from ppcls.arch.backbone.model_zoo.alexnet import AlexNet
|
||||
from ppcls.arch.backbone.model_zoo.inception_v4 import InceptionV4
|
||||
from ppcls.arch.backbone.model_zoo.xception import Xception41, Xception65, Xception71
|
||||
from ppcls.arch.backbone.model_zoo.xception_deeplab import Xception41_deeplab, Xception65_deeplab
|
||||
from ppcls.arch.backbone.model_zoo.resnext101_wsl import ResNeXt101_32x8d_wsl, ResNeXt101_32x16d_wsl, ResNeXt101_32x32d_wsl, ResNeXt101_32x48d_wsl
|
||||
from ppcls.arch.backbone.model_zoo.squeezenet import SqueezeNet1_0, SqueezeNet1_1
|
||||
from ppcls.arch.backbone.model_zoo.darknet import DarkNet53
|
||||
from ppcls.arch.backbone.model_zoo.regnet import RegNetX_200MF, RegNetX_4GF, RegNetX_32GF, RegNetY_200MF, RegNetY_4GF, RegNetY_32GF
|
||||
from ppcls.arch.backbone.model_zoo.vision_transformer import ViT_small_patch16_224, ViT_base_patch16_224, ViT_base_patch16_384, ViT_base_patch32_384, ViT_large_patch16_224, ViT_large_patch16_384, ViT_large_patch32_384
|
||||
from ppcls.arch.backbone.model_zoo.distilled_vision_transformer import DeiT_tiny_patch16_224, DeiT_small_patch16_224, DeiT_base_patch16_224, DeiT_tiny_distilled_patch16_224, DeiT_small_distilled_patch16_224, DeiT_base_distilled_patch16_224, DeiT_base_patch16_384, DeiT_base_distilled_patch16_384
|
||||
from ppcls.arch.backbone.model_zoo.swin_transformer import SwinTransformer_tiny_patch4_window7_224, SwinTransformer_small_patch4_window7_224, SwinTransformer_base_patch4_window7_224, SwinTransformer_base_patch4_window12_384, SwinTransformer_large_patch4_window7_224, SwinTransformer_large_patch4_window12_384
|
||||
from ppcls.arch.backbone.model_zoo.mixnet import MixNet_S, MixNet_M, MixNet_L
|
||||
from ppcls.arch.backbone.model_zoo.rexnet import ReXNet_1_0, ReXNet_1_3, ReXNet_1_5, ReXNet_2_0, ReXNet_3_0
|
||||
from ppcls.arch.backbone.model_zoo.gvt import pcpvt_small, pcpvt_base, pcpvt_large, alt_gvt_small, alt_gvt_base, alt_gvt_large
|
||||
from ppcls.arch.backbone.model_zoo.levit import LeViT_128S, LeViT_128, LeViT_192, LeViT_256, LeViT_384
|
||||
from ppcls.arch.backbone.model_zoo.dla import DLA34, DLA46_c, DLA46x_c, DLA60, DLA60x, DLA60x_c, DLA102, DLA102x, DLA102x2, DLA169
|
||||
from ppcls.arch.backbone.model_zoo.rednet import RedNet26, RedNet38, RedNet50, RedNet101, RedNet152
|
||||
from ppcls.arch.backbone.model_zoo.tnt import TNT_small
|
||||
from ppcls.arch.backbone.model_zoo.hardnet import HarDNet68, HarDNet85, HarDNet39_ds, HarDNet68_ds
|
||||
from ppcls.arch.backbone.model_zoo.cspnet import CSPDarkNet53
|
||||
from ppcls.arch.backbone.model_zoo.pvt_v2 import PVT_V2_B0, PVT_V2_B1, PVT_V2_B2_Linear, PVT_V2_B2, PVT_V2_B3, PVT_V2_B4, PVT_V2_B5
|
||||
from ppcls.arch.backbone.model_zoo.repvgg import RepVGG_A0, RepVGG_A1, RepVGG_A2, RepVGG_B0, RepVGG_B1, RepVGG_B2, RepVGG_B1g2, RepVGG_B1g4, RepVGG_B2g4, RepVGG_B3g4
|
||||
from ppcls.arch.backbone.variant_models.resnet_variant import ResNet50_last_stage_stride1
|
||||
from ppcls.arch.backbone.variant_models.vgg_variant import VGG19Sigmoid
|
||||
from ppcls.arch.backbone.variant_models.pp_lcnet_variant import PPLCNet_x2_5_Tanh
|
||||
|
||||
|
||||
# help whl get all the models' api (class type) and components' api (func type)
|
||||
def get_apis():
|
||||
current_func = sys._getframe().f_code.co_name
|
||||
current_module = sys.modules[__name__]
|
||||
api = []
|
||||
for _, obj in inspect.getmembers(current_module,
|
||||
inspect.isclass) + inspect.getmembers(
|
||||
current_module, inspect.isfunction):
|
||||
api.append(obj.__name__)
|
||||
api.remove(current_func)
|
||||
return api
|
||||
|
||||
|
||||
__all__ = get_apis()
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -0,0 +1,301 @@
|
||||
# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from typing import Tuple, List, Dict, Union, Callable, Any
|
||||
|
||||
from paddle import nn
|
||||
from ppcls.utils import logger
|
||||
|
||||
|
||||
class Identity(nn.Layer):
|
||||
def __init__(self):
|
||||
super(Identity, self).__init__()
|
||||
|
||||
def forward(self, inputs):
|
||||
return inputs
|
||||
|
||||
|
||||
class TheseusLayer(nn.Layer):
|
||||
def __init__(self, *args, **kwargs):
|
||||
super(TheseusLayer, self).__init__()
|
||||
self.res_dict = {}
|
||||
self.res_name = self.full_name()
|
||||
self.pruner = None
|
||||
self.quanter = None
|
||||
|
||||
def _return_dict_hook(self, layer, input, output):
|
||||
res_dict = {"output": output}
|
||||
# 'list' is needed to avoid error raised by popping self.res_dict
|
||||
for res_key in list(self.res_dict):
|
||||
# clear the res_dict because the forward process may change according to input
|
||||
res_dict[res_key] = self.res_dict.pop(res_key)
|
||||
return res_dict
|
||||
|
||||
def init_res(self,
|
||||
stages_pattern,
|
||||
return_patterns=None,
|
||||
return_stages=None):
|
||||
if return_patterns and return_stages:
|
||||
msg = f"The 'return_patterns' would be ignored when 'return_stages' is set."
|
||||
logger.warning(msg)
|
||||
return_stages = None
|
||||
|
||||
if return_stages is True:
|
||||
return_patterns = stages_pattern
|
||||
# return_stages is int or bool
|
||||
if type(return_stages) is int:
|
||||
return_stages = [return_stages]
|
||||
if isinstance(return_stages, list):
|
||||
if max(return_stages) > len(stages_pattern) or min(
|
||||
return_stages) < 0:
|
||||
msg = f"The 'return_stages' set error. Illegal value(s) have been ignored. The stages' pattern list is {stages_pattern}."
|
||||
logger.warning(msg)
|
||||
return_stages = [
|
||||
val for val in return_stages
|
||||
if val >= 0 and val < len(stages_pattern)
|
||||
]
|
||||
return_patterns = [stages_pattern[i] for i in return_stages]
|
||||
|
||||
if return_patterns:
|
||||
self.update_res(return_patterns)
|
||||
|
||||
def replace_sub(self, *args, **kwargs) -> None:
|
||||
msg = "The function 'replace_sub()' is deprecated, please use 'upgrade_sublayer()' instead."
|
||||
logger.error(DeprecationWarning(msg))
|
||||
raise DeprecationWarning(msg)
|
||||
|
||||
def upgrade_sublayer(self,
|
||||
layer_name_pattern: Union[str, List[str]],
|
||||
handle_func: Callable[[nn.Layer, str], nn.Layer]
|
||||
) -> Dict[str, nn.Layer]:
|
||||
"""use 'handle_func' to modify the sub-layer(s) specified by 'layer_name_pattern'.
|
||||
|
||||
Args:
|
||||
layer_name_pattern (Union[str, List[str]]): The name of layer to be modified by 'handle_func'.
|
||||
handle_func (Callable[[nn.Layer, str], nn.Layer]): The function to modify target layer specified by 'layer_name_pattern'. The formal params are the layer(nn.Layer) and pattern(str) that is (a member of) layer_name_pattern (when layer_name_pattern is List type). And the return is the layer processed.
|
||||
|
||||
Returns:
|
||||
Dict[str, nn.Layer]: The key is the pattern and corresponding value is the result returned by 'handle_func()'.
|
||||
|
||||
Examples:
|
||||
|
||||
from paddle import nn
|
||||
import paddleclas
|
||||
|
||||
def rep_func(layer: nn.Layer, pattern: str):
|
||||
new_layer = nn.Conv2D(
|
||||
in_channels=layer._in_channels,
|
||||
out_channels=layer._out_channels,
|
||||
kernel_size=5,
|
||||
padding=2
|
||||
)
|
||||
return new_layer
|
||||
|
||||
net = paddleclas.MobileNetV1()
|
||||
res = net.replace_sub(layer_name_pattern=["blocks[11].depthwise_conv.conv", "blocks[12].depthwise_conv.conv"], handle_func=rep_func)
|
||||
print(res)
|
||||
# {'blocks[11].depthwise_conv.conv': the corresponding new_layer, 'blocks[12].depthwise_conv.conv': the corresponding new_layer}
|
||||
"""
|
||||
|
||||
if not isinstance(layer_name_pattern, list):
|
||||
layer_name_pattern = [layer_name_pattern]
|
||||
|
||||
hit_layer_pattern_list = []
|
||||
for pattern in layer_name_pattern:
|
||||
# parse pattern to find target layer and its parent
|
||||
layer_list = parse_pattern_str(pattern=pattern, parent_layer=self)
|
||||
if not layer_list:
|
||||
continue
|
||||
sub_layer_parent = layer_list[-2]["layer"] if len(
|
||||
layer_list) > 1 else self
|
||||
|
||||
sub_layer = layer_list[-1]["layer"]
|
||||
sub_layer_name = layer_list[-1]["name"]
|
||||
sub_layer_index = layer_list[-1]["index"]
|
||||
|
||||
new_sub_layer = handle_func(sub_layer, pattern)
|
||||
|
||||
if sub_layer_index:
|
||||
getattr(sub_layer_parent,
|
||||
sub_layer_name)[sub_layer_index] = new_sub_layer
|
||||
else:
|
||||
setattr(sub_layer_parent, sub_layer_name, new_sub_layer)
|
||||
|
||||
hit_layer_pattern_list.append(pattern)
|
||||
return hit_layer_pattern_list
|
||||
|
||||
def stop_after(self, stop_layer_name: str) -> bool:
|
||||
"""stop forward and backward after 'stop_layer_name'.
|
||||
|
||||
Args:
|
||||
stop_layer_name (str): The name of layer that stop forward and backward after this layer.
|
||||
|
||||
Returns:
|
||||
bool: 'True' if successful, 'False' otherwise.
|
||||
"""
|
||||
|
||||
layer_list = parse_pattern_str(stop_layer_name, self)
|
||||
if not layer_list:
|
||||
return False
|
||||
|
||||
parent_layer = self
|
||||
for layer_dict in layer_list:
|
||||
name, index = layer_dict["name"], layer_dict["index"]
|
||||
if not set_identity(parent_layer, name, index):
|
||||
msg = f"Failed to set the layers that after stop_layer_name('{stop_layer_name}') to IdentityLayer. The error layer's name is '{name}'."
|
||||
logger.warning(msg)
|
||||
return False
|
||||
parent_layer = layer_dict["layer"]
|
||||
|
||||
return True
|
||||
|
||||
def update_res(
|
||||
self,
|
||||
return_patterns: Union[str, List[str]]) -> Dict[str, nn.Layer]:
|
||||
"""update the result(s) to be returned.
|
||||
|
||||
Args:
|
||||
return_patterns (Union[str, List[str]]): The name of layer to return output.
|
||||
|
||||
Returns:
|
||||
Dict[str, nn.Layer]: The pattern(str) and corresponding layer(nn.Layer) that have been set successfully.
|
||||
"""
|
||||
|
||||
# clear res_dict that could have been set
|
||||
self.res_dict = {}
|
||||
|
||||
class Handler(object):
|
||||
def __init__(self, res_dict):
|
||||
# res_dict is a reference
|
||||
self.res_dict = res_dict
|
||||
|
||||
def __call__(self, layer, pattern):
|
||||
layer.res_dict = self.res_dict
|
||||
layer.res_name = pattern
|
||||
if hasattr(layer, "hook_remove_helper"):
|
||||
layer.hook_remove_helper.remove()
|
||||
layer.hook_remove_helper = layer.register_forward_post_hook(
|
||||
save_sub_res_hook)
|
||||
return layer
|
||||
|
||||
handle_func = Handler(self.res_dict)
|
||||
|
||||
hit_layer_pattern_list = self.upgrade_sublayer(
|
||||
return_patterns, handle_func=handle_func)
|
||||
|
||||
if hasattr(self, "hook_remove_helper"):
|
||||
self.hook_remove_helper.remove()
|
||||
self.hook_remove_helper = self.register_forward_post_hook(
|
||||
self._return_dict_hook)
|
||||
|
||||
return hit_layer_pattern_list
|
||||
|
||||
|
||||
def save_sub_res_hook(layer, input, output):
|
||||
layer.res_dict[layer.res_name] = output
|
||||
|
||||
|
||||
def set_identity(parent_layer: nn.Layer,
|
||||
layer_name: str,
|
||||
layer_index: str=None) -> bool:
|
||||
"""set the layer specified by layer_name and layer_index to Indentity.
|
||||
|
||||
Args:
|
||||
parent_layer (nn.Layer): The parent layer of target layer specified by layer_name and layer_index.
|
||||
layer_name (str): The name of target layer to be set to Indentity.
|
||||
layer_index (str, optional): The index of target layer to be set to Indentity in parent_layer. Defaults to None.
|
||||
|
||||
Returns:
|
||||
bool: True if successfully, False otherwise.
|
||||
"""
|
||||
|
||||
stop_after = False
|
||||
for sub_layer_name in parent_layer._sub_layers:
|
||||
if stop_after:
|
||||
parent_layer._sub_layers[sub_layer_name] = Identity()
|
||||
continue
|
||||
if sub_layer_name == layer_name:
|
||||
stop_after = True
|
||||
|
||||
if layer_index and stop_after:
|
||||
stop_after = False
|
||||
for sub_layer_index in parent_layer._sub_layers[
|
||||
layer_name]._sub_layers:
|
||||
if stop_after:
|
||||
parent_layer._sub_layers[layer_name][
|
||||
sub_layer_index] = Identity()
|
||||
continue
|
||||
if layer_index == sub_layer_index:
|
||||
stop_after = True
|
||||
|
||||
return stop_after
|
||||
|
||||
|
||||
def parse_pattern_str(pattern: str, parent_layer: nn.Layer) -> Union[
|
||||
None, List[Dict[str, Union[nn.Layer, str, None]]]]:
|
||||
"""parse the string type pattern.
|
||||
|
||||
Args:
|
||||
pattern (str): The pattern to discribe layer.
|
||||
parent_layer (nn.Layer): The root layer relative to the pattern.
|
||||
|
||||
Returns:
|
||||
Union[None, List[Dict[str, Union[nn.Layer, str, None]]]]: None if failed. If successfully, the members are layers parsed in order:
|
||||
[
|
||||
{"layer": first layer, "name": first layer's name parsed, "index": first layer's index parsed if exist},
|
||||
{"layer": second layer, "name": second layer's name parsed, "index": second layer's index parsed if exist},
|
||||
...
|
||||
]
|
||||
"""
|
||||
|
||||
pattern_list = pattern.split(".")
|
||||
if not pattern_list:
|
||||
msg = f"The pattern('{pattern}') is illegal. Please check and retry."
|
||||
logger.warning(msg)
|
||||
return None
|
||||
|
||||
layer_list = []
|
||||
while len(pattern_list) > 0:
|
||||
if '[' in pattern_list[0]:
|
||||
target_layer_name = pattern_list[0].split('[')[0]
|
||||
target_layer_index = pattern_list[0].split('[')[1].split(']')[0]
|
||||
else:
|
||||
target_layer_name = pattern_list[0]
|
||||
target_layer_index = None
|
||||
|
||||
target_layer = getattr(parent_layer, target_layer_name, None)
|
||||
|
||||
if target_layer is None:
|
||||
msg = f"Not found layer named('{target_layer_name}') specifed in pattern('{pattern}')."
|
||||
logger.warning(msg)
|
||||
return None
|
||||
|
||||
if target_layer_index and target_layer:
|
||||
if int(target_layer_index) < 0 or int(target_layer_index) >= len(
|
||||
target_layer):
|
||||
msg = f"Not found layer by index('{target_layer_index}') specifed in pattern('{pattern}'). The index should < {len(target_layer)} and > 0."
|
||||
logger.warning(msg)
|
||||
return None
|
||||
|
||||
target_layer = target_layer[target_layer_index]
|
||||
|
||||
layer_list.append({
|
||||
"layer": target_layer,
|
||||
"name": target_layer_name,
|
||||
"index": target_layer_index
|
||||
})
|
||||
|
||||
pattern_list = pattern_list[1:]
|
||||
parent_layer = target_layer
|
||||
return layer_list
|
||||
@ -0,0 +1,6 @@
|
||||
from .resnet import ResNet18, ResNet34, ResNet50, ResNet101, ResNet152, ResNet18_vd, ResNet34_vd, ResNet50_vd, ResNet101_vd, ResNet152_vd
|
||||
from .hrnet import HRNet_W18_C, HRNet_W30_C, HRNet_W32_C, HRNet_W40_C, HRNet_W44_C, HRNet_W48_C, HRNet_W64_C
|
||||
from .mobilenet_v1 import MobileNetV1_x0_25, MobileNetV1_x0_5, MobileNetV1_x0_75, MobileNetV1
|
||||
from .mobilenet_v3 import MobileNetV3_small_x0_35, MobileNetV3_small_x0_5, MobileNetV3_small_x0_75, MobileNetV3_small_x1_0, MobileNetV3_small_x1_25, MobileNetV3_large_x0_35, MobileNetV3_large_x0_5, MobileNetV3_large_x0_75, MobileNetV3_large_x1_0, MobileNetV3_large_x1_25
|
||||
from .inception_v3 import InceptionV3
|
||||
from .vgg import VGG11, VGG13, VGG16, VGG19
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -0,0 +1,369 @@
|
||||
# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import, division, print_function
|
||||
import math
|
||||
import paddle
|
||||
from paddle import ParamAttr, reshape, transpose, concat, split
|
||||
import paddle.nn as nn
|
||||
from paddle.nn import Conv2D, BatchNorm, Linear, Dropout
|
||||
from paddle.nn import AdaptiveAvgPool2D, MaxPool2D
|
||||
from paddle.nn.initializer import KaimingNormal
|
||||
from paddle.regularizer import L2Decay
|
||||
|
||||
from ppcls.arch.backbone.base.theseus_layer import TheseusLayer
|
||||
from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
|
||||
|
||||
MODEL_URLS = {
|
||||
"ESNet_x0_25":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/ESNet_x0_25_pretrained.pdparams",
|
||||
"ESNet_x0_5":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/ESNet_x0_5_pretrained.pdparams",
|
||||
"ESNet_x0_75":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/ESNet_x0_75_pretrained.pdparams",
|
||||
"ESNet_x1_0":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/ESNet_x1_0_pretrained.pdparams",
|
||||
}
|
||||
|
||||
MODEL_STAGES_PATTERN = {"ESNet": ["blocks[2]", "blocks[9]", "blocks[12]"]}
|
||||
|
||||
__all__ = list(MODEL_URLS.keys())
|
||||
|
||||
|
||||
def channel_shuffle(x, groups):
|
||||
batch_size, num_channels, height, width = x.shape[0:4]
|
||||
channels_per_group = num_channels // groups
|
||||
x = reshape(
|
||||
x=x, shape=[batch_size, groups, channels_per_group, height, width])
|
||||
x = transpose(x=x, perm=[0, 2, 1, 3, 4])
|
||||
x = reshape(x=x, shape=[batch_size, num_channels, height, width])
|
||||
return x
|
||||
|
||||
|
||||
def make_divisible(v, divisor=8, min_value=None):
|
||||
if min_value is None:
|
||||
min_value = divisor
|
||||
new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
|
||||
if new_v < 0.9 * v:
|
||||
new_v += divisor
|
||||
return new_v
|
||||
|
||||
|
||||
class ConvBNLayer(TheseusLayer):
|
||||
def __init__(self,
|
||||
in_channels,
|
||||
out_channels,
|
||||
kernel_size,
|
||||
stride=1,
|
||||
groups=1,
|
||||
if_act=True):
|
||||
super().__init__()
|
||||
self.conv = Conv2D(
|
||||
in_channels=in_channels,
|
||||
out_channels=out_channels,
|
||||
kernel_size=kernel_size,
|
||||
stride=stride,
|
||||
padding=(kernel_size - 1) // 2,
|
||||
groups=groups,
|
||||
weight_attr=ParamAttr(initializer=KaimingNormal()),
|
||||
bias_attr=False)
|
||||
|
||||
self.bn = BatchNorm(
|
||||
out_channels,
|
||||
param_attr=ParamAttr(regularizer=L2Decay(0.0)),
|
||||
bias_attr=ParamAttr(regularizer=L2Decay(0.0)))
|
||||
self.if_act = if_act
|
||||
self.hardswish = nn.Hardswish()
|
||||
|
||||
def forward(self, x):
|
||||
x = self.conv(x)
|
||||
x = self.bn(x)
|
||||
if self.if_act:
|
||||
x = self.hardswish(x)
|
||||
return x
|
||||
|
||||
|
||||
class SEModule(TheseusLayer):
|
||||
def __init__(self, channel, reduction=4):
|
||||
super().__init__()
|
||||
self.avg_pool = AdaptiveAvgPool2D(1)
|
||||
self.conv1 = Conv2D(
|
||||
in_channels=channel,
|
||||
out_channels=channel // reduction,
|
||||
kernel_size=1,
|
||||
stride=1,
|
||||
padding=0)
|
||||
self.relu = nn.ReLU()
|
||||
self.conv2 = Conv2D(
|
||||
in_channels=channel // reduction,
|
||||
out_channels=channel,
|
||||
kernel_size=1,
|
||||
stride=1,
|
||||
padding=0)
|
||||
self.hardsigmoid = nn.Hardsigmoid()
|
||||
|
||||
def forward(self, x):
|
||||
identity = x
|
||||
x = self.avg_pool(x)
|
||||
x = self.conv1(x)
|
||||
x = self.relu(x)
|
||||
x = self.conv2(x)
|
||||
x = self.hardsigmoid(x)
|
||||
x = paddle.multiply(x=identity, y=x)
|
||||
return x
|
||||
|
||||
|
||||
class ESBlock1(TheseusLayer):
|
||||
def __init__(self, in_channels, out_channels):
|
||||
super().__init__()
|
||||
self.pw_1_1 = ConvBNLayer(
|
||||
in_channels=in_channels // 2,
|
||||
out_channels=out_channels // 2,
|
||||
kernel_size=1,
|
||||
stride=1)
|
||||
self.dw_1 = ConvBNLayer(
|
||||
in_channels=out_channels // 2,
|
||||
out_channels=out_channels // 2,
|
||||
kernel_size=3,
|
||||
stride=1,
|
||||
groups=out_channels // 2,
|
||||
if_act=False)
|
||||
self.se = SEModule(out_channels)
|
||||
|
||||
self.pw_1_2 = ConvBNLayer(
|
||||
in_channels=out_channels,
|
||||
out_channels=out_channels // 2,
|
||||
kernel_size=1,
|
||||
stride=1)
|
||||
|
||||
def forward(self, x):
|
||||
x1, x2 = split(
|
||||
x, num_or_sections=[x.shape[1] // 2, x.shape[1] // 2], axis=1)
|
||||
x2 = self.pw_1_1(x2)
|
||||
x3 = self.dw_1(x2)
|
||||
x3 = concat([x2, x3], axis=1)
|
||||
x3 = self.se(x3)
|
||||
x3 = self.pw_1_2(x3)
|
||||
x = concat([x1, x3], axis=1)
|
||||
return channel_shuffle(x, 2)
|
||||
|
||||
|
||||
class ESBlock2(TheseusLayer):
|
||||
def __init__(self, in_channels, out_channels):
|
||||
super().__init__()
|
||||
|
||||
# branch1
|
||||
self.dw_1 = ConvBNLayer(
|
||||
in_channels=in_channels,
|
||||
out_channels=in_channels,
|
||||
kernel_size=3,
|
||||
stride=2,
|
||||
groups=in_channels,
|
||||
if_act=False)
|
||||
self.pw_1 = ConvBNLayer(
|
||||
in_channels=in_channels,
|
||||
out_channels=out_channels // 2,
|
||||
kernel_size=1,
|
||||
stride=1)
|
||||
# branch2
|
||||
self.pw_2_1 = ConvBNLayer(
|
||||
in_channels=in_channels,
|
||||
out_channels=out_channels // 2,
|
||||
kernel_size=1)
|
||||
self.dw_2 = ConvBNLayer(
|
||||
in_channels=out_channels // 2,
|
||||
out_channels=out_channels // 2,
|
||||
kernel_size=3,
|
||||
stride=2,
|
||||
groups=out_channels // 2,
|
||||
if_act=False)
|
||||
self.se = SEModule(out_channels // 2)
|
||||
self.pw_2_2 = ConvBNLayer(
|
||||
in_channels=out_channels // 2,
|
||||
out_channels=out_channels // 2,
|
||||
kernel_size=1)
|
||||
self.concat_dw = ConvBNLayer(
|
||||
in_channels=out_channels,
|
||||
out_channels=out_channels,
|
||||
kernel_size=3,
|
||||
groups=out_channels)
|
||||
self.concat_pw = ConvBNLayer(
|
||||
in_channels=out_channels, out_channels=out_channels, kernel_size=1)
|
||||
|
||||
def forward(self, x):
|
||||
x1 = self.dw_1(x)
|
||||
x1 = self.pw_1(x1)
|
||||
x2 = self.pw_2_1(x)
|
||||
x2 = self.dw_2(x2)
|
||||
x2 = self.se(x2)
|
||||
x2 = self.pw_2_2(x2)
|
||||
x = concat([x1, x2], axis=1)
|
||||
x = self.concat_dw(x)
|
||||
x = self.concat_pw(x)
|
||||
return x
|
||||
|
||||
|
||||
class ESNet(TheseusLayer):
|
||||
def __init__(self,
|
||||
stages_pattern,
|
||||
class_num=1000,
|
||||
scale=1.0,
|
||||
dropout_prob=0.2,
|
||||
class_expand=1280,
|
||||
return_patterns=None,
|
||||
return_stages=None):
|
||||
super().__init__()
|
||||
self.scale = scale
|
||||
self.class_num = class_num
|
||||
self.class_expand = class_expand
|
||||
stage_repeats = [3, 7, 3]
|
||||
stage_out_channels = [
|
||||
-1, 24, make_divisible(116 * scale), make_divisible(232 * scale),
|
||||
make_divisible(464 * scale), 1024
|
||||
]
|
||||
|
||||
self.conv1 = ConvBNLayer(
|
||||
in_channels=3,
|
||||
out_channels=stage_out_channels[1],
|
||||
kernel_size=3,
|
||||
stride=2)
|
||||
self.max_pool = MaxPool2D(kernel_size=3, stride=2, padding=1)
|
||||
|
||||
block_list = []
|
||||
for stage_id, num_repeat in enumerate(stage_repeats):
|
||||
for i in range(num_repeat):
|
||||
if i == 0:
|
||||
block = ESBlock2(
|
||||
in_channels=stage_out_channels[stage_id + 1],
|
||||
out_channels=stage_out_channels[stage_id + 2])
|
||||
else:
|
||||
block = ESBlock1(
|
||||
in_channels=stage_out_channels[stage_id + 2],
|
||||
out_channels=stage_out_channels[stage_id + 2])
|
||||
block_list.append(block)
|
||||
self.blocks = nn.Sequential(*block_list)
|
||||
|
||||
self.conv2 = ConvBNLayer(
|
||||
in_channels=stage_out_channels[-2],
|
||||
out_channels=stage_out_channels[-1],
|
||||
kernel_size=1)
|
||||
|
||||
self.avg_pool = AdaptiveAvgPool2D(1)
|
||||
|
||||
self.last_conv = Conv2D(
|
||||
in_channels=stage_out_channels[-1],
|
||||
out_channels=self.class_expand,
|
||||
kernel_size=1,
|
||||
stride=1,
|
||||
padding=0,
|
||||
bias_attr=False)
|
||||
self.hardswish = nn.Hardswish()
|
||||
self.dropout = Dropout(p=dropout_prob, mode="downscale_in_infer")
|
||||
self.flatten = nn.Flatten(start_axis=1, stop_axis=-1)
|
||||
self.fc = Linear(self.class_expand, self.class_num)
|
||||
|
||||
super().init_res(
|
||||
stages_pattern,
|
||||
return_patterns=return_patterns,
|
||||
return_stages=return_stages)
|
||||
|
||||
def forward(self, x):
|
||||
x = self.conv1(x)
|
||||
x = self.max_pool(x)
|
||||
x = self.blocks(x)
|
||||
x = self.conv2(x)
|
||||
x = self.avg_pool(x)
|
||||
x = self.last_conv(x)
|
||||
x = self.hardswish(x)
|
||||
x = self.dropout(x)
|
||||
x = self.flatten(x)
|
||||
x = self.fc(x)
|
||||
return x
|
||||
|
||||
|
||||
def _load_pretrained(pretrained, model, model_url, use_ssld):
|
||||
if pretrained is False:
|
||||
pass
|
||||
elif pretrained is True:
|
||||
load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
|
||||
elif isinstance(pretrained, str):
|
||||
load_dygraph_pretrain(model, pretrained)
|
||||
else:
|
||||
raise RuntimeError(
|
||||
"pretrained type is not available. Please use `string` or `boolean` type."
|
||||
)
|
||||
|
||||
|
||||
def ESNet_x0_25(pretrained=False, use_ssld=False, **kwargs):
|
||||
"""
|
||||
ESNet_x0_25
|
||||
Args:
|
||||
pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
|
||||
If str, means the path of the pretrained model.
|
||||
use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
|
||||
Returns:
|
||||
model: nn.Layer. Specific `ESNet_x0_25` model depends on args.
|
||||
"""
|
||||
model = ESNet(
|
||||
scale=0.25, stages_pattern=MODEL_STAGES_PATTERN["ESNet"], **kwargs)
|
||||
_load_pretrained(pretrained, model, MODEL_URLS["ESNet_x0_25"], use_ssld)
|
||||
return model
|
||||
|
||||
|
||||
def ESNet_x0_5(pretrained=False, use_ssld=False, **kwargs):
|
||||
"""
|
||||
ESNet_x0_5
|
||||
Args:
|
||||
pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
|
||||
If str, means the path of the pretrained model.
|
||||
use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
|
||||
Returns:
|
||||
model: nn.Layer. Specific `ESNet_x0_5` model depends on args.
|
||||
"""
|
||||
model = ESNet(
|
||||
scale=0.5, stages_pattern=MODEL_STAGES_PATTERN["ESNet"], **kwargs)
|
||||
_load_pretrained(pretrained, model, MODEL_URLS["ESNet_x0_5"], use_ssld)
|
||||
return model
|
||||
|
||||
|
||||
def ESNet_x0_75(pretrained=False, use_ssld=False, **kwargs):
|
||||
"""
|
||||
ESNet_x0_75
|
||||
Args:
|
||||
pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
|
||||
If str, means the path of the pretrained model.
|
||||
use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
|
||||
Returns:
|
||||
model: nn.Layer. Specific `ESNet_x0_75` model depends on args.
|
||||
"""
|
||||
model = ESNet(
|
||||
scale=0.75, stages_pattern=MODEL_STAGES_PATTERN["ESNet"], **kwargs)
|
||||
_load_pretrained(pretrained, model, MODEL_URLS["ESNet_x0_75"], use_ssld)
|
||||
return model
|
||||
|
||||
|
||||
def ESNet_x1_0(pretrained=False, use_ssld=False, **kwargs):
|
||||
"""
|
||||
ESNet_x1_0
|
||||
Args:
|
||||
pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
|
||||
If str, means the path of the pretrained model.
|
||||
use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
|
||||
Returns:
|
||||
model: nn.Layer. Specific `ESNet_x1_0` model depends on args.
|
||||
"""
|
||||
model = ESNet(
|
||||
scale=1.0, stages_pattern=MODEL_STAGES_PATTERN["ESNet"], **kwargs)
|
||||
_load_pretrained(pretrained, model, MODEL_URLS["ESNet_x1_0"], use_ssld)
|
||||
return model
|
||||
@ -0,0 +1,794 @@
|
||||
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import math
|
||||
import paddle
|
||||
from paddle import nn
|
||||
from paddle import ParamAttr
|
||||
from paddle.nn.functional import upsample
|
||||
from paddle.nn.initializer import Uniform
|
||||
|
||||
from ppcls.arch.backbone.base.theseus_layer import TheseusLayer, Identity
|
||||
from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
|
||||
|
||||
MODEL_URLS = {
|
||||
"HRNet_W18_C":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/HRNet_W18_C_pretrained.pdparams",
|
||||
"HRNet_W30_C":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/HRNet_W30_C_pretrained.pdparams",
|
||||
"HRNet_W32_C":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/HRNet_W32_C_pretrained.pdparams",
|
||||
"HRNet_W40_C":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/HRNet_W40_C_pretrained.pdparams",
|
||||
"HRNet_W44_C":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/HRNet_W44_C_pretrained.pdparams",
|
||||
"HRNet_W48_C":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/HRNet_W48_C_pretrained.pdparams",
|
||||
"HRNet_W64_C":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/HRNet_W64_C_pretrained.pdparams"
|
||||
}
|
||||
|
||||
MODEL_STAGES_PATTERN = {"HRNet": ["st4"]}
|
||||
|
||||
__all__ = list(MODEL_URLS.keys())
|
||||
|
||||
|
||||
def _create_act(act):
|
||||
if act == "hardswish":
|
||||
return nn.Hardswish()
|
||||
elif act == "relu":
|
||||
return nn.ReLU()
|
||||
elif act is None:
|
||||
return Identity()
|
||||
else:
|
||||
raise RuntimeError(
|
||||
"The activation function is not supported: {}".format(act))
|
||||
|
||||
|
||||
class ConvBNLayer(TheseusLayer):
|
||||
def __init__(self,
|
||||
num_channels,
|
||||
num_filters,
|
||||
filter_size,
|
||||
stride=1,
|
||||
groups=1,
|
||||
act="relu"):
|
||||
super().__init__()
|
||||
|
||||
self.conv = nn.Conv2D(
|
||||
in_channels=num_channels,
|
||||
out_channels=num_filters,
|
||||
kernel_size=filter_size,
|
||||
stride=stride,
|
||||
padding=(filter_size - 1) // 2,
|
||||
groups=groups,
|
||||
bias_attr=False)
|
||||
self.bn = nn.BatchNorm(num_filters, act=None)
|
||||
self.act = _create_act(act)
|
||||
|
||||
def forward(self, x):
|
||||
x = self.conv(x)
|
||||
x = self.bn(x)
|
||||
x = self.act(x)
|
||||
return x
|
||||
|
||||
|
||||
class BottleneckBlock(TheseusLayer):
|
||||
def __init__(self,
|
||||
num_channels,
|
||||
num_filters,
|
||||
has_se,
|
||||
stride=1,
|
||||
downsample=False):
|
||||
super().__init__()
|
||||
|
||||
self.has_se = has_se
|
||||
self.downsample = downsample
|
||||
|
||||
self.conv1 = ConvBNLayer(
|
||||
num_channels=num_channels,
|
||||
num_filters=num_filters,
|
||||
filter_size=1,
|
||||
act="relu")
|
||||
self.conv2 = ConvBNLayer(
|
||||
num_channels=num_filters,
|
||||
num_filters=num_filters,
|
||||
filter_size=3,
|
||||
stride=stride,
|
||||
act="relu")
|
||||
self.conv3 = ConvBNLayer(
|
||||
num_channels=num_filters,
|
||||
num_filters=num_filters * 4,
|
||||
filter_size=1,
|
||||
act=None)
|
||||
|
||||
if self.downsample:
|
||||
self.conv_down = ConvBNLayer(
|
||||
num_channels=num_channels,
|
||||
num_filters=num_filters * 4,
|
||||
filter_size=1,
|
||||
act=None)
|
||||
|
||||
if self.has_se:
|
||||
self.se = SELayer(
|
||||
num_channels=num_filters * 4,
|
||||
num_filters=num_filters * 4,
|
||||
reduction_ratio=16)
|
||||
self.relu = nn.ReLU()
|
||||
|
||||
def forward(self, x, res_dict=None):
|
||||
residual = x
|
||||
x = self.conv1(x)
|
||||
x = self.conv2(x)
|
||||
x = self.conv3(x)
|
||||
if self.downsample:
|
||||
residual = self.conv_down(residual)
|
||||
if self.has_se:
|
||||
x = self.se(x)
|
||||
x = paddle.add(x=residual, y=x)
|
||||
x = self.relu(x)
|
||||
return x
|
||||
|
||||
|
||||
class BasicBlock(nn.Layer):
|
||||
def __init__(self, num_channels, num_filters, has_se=False):
|
||||
super().__init__()
|
||||
|
||||
self.has_se = has_se
|
||||
|
||||
self.conv1 = ConvBNLayer(
|
||||
num_channels=num_channels,
|
||||
num_filters=num_filters,
|
||||
filter_size=3,
|
||||
stride=1,
|
||||
act="relu")
|
||||
self.conv2 = ConvBNLayer(
|
||||
num_channels=num_filters,
|
||||
num_filters=num_filters,
|
||||
filter_size=3,
|
||||
stride=1,
|
||||
act=None)
|
||||
|
||||
if self.has_se:
|
||||
self.se = SELayer(
|
||||
num_channels=num_filters,
|
||||
num_filters=num_filters,
|
||||
reduction_ratio=16)
|
||||
self.relu = nn.ReLU()
|
||||
|
||||
def forward(self, x):
|
||||
residual = x
|
||||
x = self.conv1(x)
|
||||
x = self.conv2(x)
|
||||
|
||||
if self.has_se:
|
||||
x = self.se(x)
|
||||
|
||||
x = paddle.add(x=residual, y=x)
|
||||
x = self.relu(x)
|
||||
return x
|
||||
|
||||
|
||||
class SELayer(TheseusLayer):
|
||||
def __init__(self, num_channels, num_filters, reduction_ratio):
|
||||
super().__init__()
|
||||
|
||||
self.avg_pool = nn.AdaptiveAvgPool2D(1)
|
||||
|
||||
self._num_channels = num_channels
|
||||
|
||||
med_ch = int(num_channels / reduction_ratio)
|
||||
stdv = 1.0 / math.sqrt(num_channels * 1.0)
|
||||
self.fc_squeeze = nn.Linear(
|
||||
num_channels,
|
||||
med_ch,
|
||||
weight_attr=ParamAttr(initializer=Uniform(-stdv, stdv)))
|
||||
self.relu = nn.ReLU()
|
||||
stdv = 1.0 / math.sqrt(med_ch * 1.0)
|
||||
self.fc_excitation = nn.Linear(
|
||||
med_ch,
|
||||
num_filters,
|
||||
weight_attr=ParamAttr(initializer=Uniform(-stdv, stdv)))
|
||||
self.sigmoid = nn.Sigmoid()
|
||||
|
||||
def forward(self, x, res_dict=None):
|
||||
residual = x
|
||||
x = self.avg_pool(x)
|
||||
x = paddle.squeeze(x, axis=[2, 3])
|
||||
x = self.fc_squeeze(x)
|
||||
x = self.relu(x)
|
||||
x = self.fc_excitation(x)
|
||||
x = self.sigmoid(x)
|
||||
x = paddle.unsqueeze(x, axis=[2, 3])
|
||||
x = residual * x
|
||||
return x
|
||||
|
||||
|
||||
class Stage(TheseusLayer):
|
||||
def __init__(self, num_modules, num_filters, has_se=False):
|
||||
super().__init__()
|
||||
|
||||
self._num_modules = num_modules
|
||||
|
||||
self.stage_func_list = nn.LayerList()
|
||||
for i in range(num_modules):
|
||||
self.stage_func_list.append(
|
||||
HighResolutionModule(
|
||||
num_filters=num_filters, has_se=has_se))
|
||||
|
||||
def forward(self, x, res_dict=None):
|
||||
x = x
|
||||
for idx in range(self._num_modules):
|
||||
x = self.stage_func_list[idx](x)
|
||||
return x
|
||||
|
||||
|
||||
class HighResolutionModule(TheseusLayer):
|
||||
def __init__(self, num_filters, has_se=False):
|
||||
super().__init__()
|
||||
|
||||
self.basic_block_list = nn.LayerList()
|
||||
|
||||
for i in range(len(num_filters)):
|
||||
self.basic_block_list.append(
|
||||
nn.Sequential(* [
|
||||
BasicBlock(
|
||||
num_channels=num_filters[i],
|
||||
num_filters=num_filters[i],
|
||||
has_se=has_se) for j in range(4)
|
||||
]))
|
||||
|
||||
self.fuse_func = FuseLayers(
|
||||
in_channels=num_filters, out_channels=num_filters)
|
||||
|
||||
def forward(self, x, res_dict=None):
|
||||
out = []
|
||||
for idx, xi in enumerate(x):
|
||||
basic_block_list = self.basic_block_list[idx]
|
||||
for basic_block_func in basic_block_list:
|
||||
xi = basic_block_func(xi)
|
||||
out.append(xi)
|
||||
out = self.fuse_func(out)
|
||||
return out
|
||||
|
||||
|
||||
class FuseLayers(TheseusLayer):
|
||||
def __init__(self, in_channels, out_channels):
|
||||
super().__init__()
|
||||
|
||||
self._actual_ch = len(in_channels)
|
||||
self._in_channels = in_channels
|
||||
|
||||
self.residual_func_list = nn.LayerList()
|
||||
self.relu = nn.ReLU()
|
||||
for i in range(len(in_channels)):
|
||||
for j in range(len(in_channels)):
|
||||
if j > i:
|
||||
self.residual_func_list.append(
|
||||
ConvBNLayer(
|
||||
num_channels=in_channels[j],
|
||||
num_filters=out_channels[i],
|
||||
filter_size=1,
|
||||
stride=1,
|
||||
act=None))
|
||||
elif j < i:
|
||||
pre_num_filters = in_channels[j]
|
||||
for k in range(i - j):
|
||||
if k == i - j - 1:
|
||||
self.residual_func_list.append(
|
||||
ConvBNLayer(
|
||||
num_channels=pre_num_filters,
|
||||
num_filters=out_channels[i],
|
||||
filter_size=3,
|
||||
stride=2,
|
||||
act=None))
|
||||
pre_num_filters = out_channels[i]
|
||||
else:
|
||||
self.residual_func_list.append(
|
||||
ConvBNLayer(
|
||||
num_channels=pre_num_filters,
|
||||
num_filters=out_channels[j],
|
||||
filter_size=3,
|
||||
stride=2,
|
||||
act="relu"))
|
||||
pre_num_filters = out_channels[j]
|
||||
|
||||
def forward(self, x, res_dict=None):
|
||||
out = []
|
||||
residual_func_idx = 0
|
||||
for i in range(len(self._in_channels)):
|
||||
residual = x[i]
|
||||
for j in range(len(self._in_channels)):
|
||||
if j > i:
|
||||
xj = self.residual_func_list[residual_func_idx](x[j])
|
||||
residual_func_idx += 1
|
||||
|
||||
xj = upsample(xj, scale_factor=2**(j - i), mode="nearest")
|
||||
residual = paddle.add(x=residual, y=xj)
|
||||
elif j < i:
|
||||
xj = x[j]
|
||||
for k in range(i - j):
|
||||
xj = self.residual_func_list[residual_func_idx](xj)
|
||||
residual_func_idx += 1
|
||||
|
||||
residual = paddle.add(x=residual, y=xj)
|
||||
|
||||
residual = self.relu(residual)
|
||||
out.append(residual)
|
||||
|
||||
return out
|
||||
|
||||
|
||||
class LastClsOut(TheseusLayer):
|
||||
def __init__(self,
|
||||
num_channel_list,
|
||||
has_se,
|
||||
num_filters_list=[32, 64, 128, 256]):
|
||||
super().__init__()
|
||||
|
||||
self.func_list = nn.LayerList()
|
||||
for idx in range(len(num_channel_list)):
|
||||
self.func_list.append(
|
||||
BottleneckBlock(
|
||||
num_channels=num_channel_list[idx],
|
||||
num_filters=num_filters_list[idx],
|
||||
has_se=has_se,
|
||||
downsample=True))
|
||||
|
||||
def forward(self, x, res_dict=None):
|
||||
out = []
|
||||
for idx, xi in enumerate(x):
|
||||
xi = self.func_list[idx](xi)
|
||||
out.append(xi)
|
||||
return out
|
||||
|
||||
|
||||
class HRNet(TheseusLayer):
|
||||
"""
|
||||
HRNet
|
||||
Args:
|
||||
width: int=18. Base channel number of HRNet.
|
||||
has_se: bool=False. If 'True', add se module to HRNet.
|
||||
class_num: int=1000. Output num of last fc layer.
|
||||
Returns:
|
||||
model: nn.Layer. Specific HRNet model depends on args.
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
stages_pattern,
|
||||
width=18,
|
||||
has_se=False,
|
||||
class_num=1000,
|
||||
return_patterns=None,
|
||||
return_stages=None):
|
||||
super().__init__()
|
||||
|
||||
self.width = width
|
||||
self.has_se = has_se
|
||||
self._class_num = class_num
|
||||
|
||||
channels_2 = [self.width, self.width * 2]
|
||||
channels_3 = [self.width, self.width * 2, self.width * 4]
|
||||
channels_4 = [
|
||||
self.width, self.width * 2, self.width * 4, self.width * 8
|
||||
]
|
||||
|
||||
self.conv_layer1_1 = ConvBNLayer(
|
||||
num_channels=3,
|
||||
num_filters=64,
|
||||
filter_size=3,
|
||||
stride=2,
|
||||
act="relu")
|
||||
|
||||
self.conv_layer1_2 = ConvBNLayer(
|
||||
num_channels=64,
|
||||
num_filters=64,
|
||||
filter_size=3,
|
||||
stride=2,
|
||||
act="relu")
|
||||
|
||||
self.layer1 = nn.Sequential(* [
|
||||
BottleneckBlock(
|
||||
num_channels=64 if i == 0 else 256,
|
||||
num_filters=64,
|
||||
has_se=has_se,
|
||||
stride=1,
|
||||
downsample=True if i == 0 else False) for i in range(4)
|
||||
])
|
||||
|
||||
self.conv_tr1_1 = ConvBNLayer(
|
||||
num_channels=256, num_filters=width, filter_size=3)
|
||||
self.conv_tr1_2 = ConvBNLayer(
|
||||
num_channels=256, num_filters=width * 2, filter_size=3, stride=2)
|
||||
|
||||
self.st2 = Stage(
|
||||
num_modules=1, num_filters=channels_2, has_se=self.has_se)
|
||||
|
||||
self.conv_tr2 = ConvBNLayer(
|
||||
num_channels=width * 2,
|
||||
num_filters=width * 4,
|
||||
filter_size=3,
|
||||
stride=2)
|
||||
self.st3 = Stage(
|
||||
num_modules=4, num_filters=channels_3, has_se=self.has_se)
|
||||
|
||||
self.conv_tr3 = ConvBNLayer(
|
||||
num_channels=width * 4,
|
||||
num_filters=width * 8,
|
||||
filter_size=3,
|
||||
stride=2)
|
||||
|
||||
self.st4 = Stage(
|
||||
num_modules=3, num_filters=channels_4, has_se=self.has_se)
|
||||
|
||||
# classification
|
||||
num_filters_list = [32, 64, 128, 256]
|
||||
self.last_cls = LastClsOut(
|
||||
num_channel_list=channels_4,
|
||||
has_se=self.has_se,
|
||||
num_filters_list=num_filters_list)
|
||||
|
||||
last_num_filters = [256, 512, 1024]
|
||||
self.cls_head_conv_list = nn.LayerList()
|
||||
for idx in range(3):
|
||||
self.cls_head_conv_list.append(
|
||||
ConvBNLayer(
|
||||
num_channels=num_filters_list[idx] * 4,
|
||||
num_filters=last_num_filters[idx],
|
||||
filter_size=3,
|
||||
stride=2))
|
||||
|
||||
self.conv_last = ConvBNLayer(
|
||||
num_channels=1024, num_filters=2048, filter_size=1, stride=1)
|
||||
|
||||
self.avg_pool = nn.AdaptiveAvgPool2D(1)
|
||||
|
||||
stdv = 1.0 / math.sqrt(2048 * 1.0)
|
||||
|
||||
self.fc = nn.Linear(
|
||||
2048,
|
||||
class_num,
|
||||
weight_attr=ParamAttr(initializer=Uniform(-stdv, stdv)))
|
||||
|
||||
super().init_res(
|
||||
stages_pattern,
|
||||
return_patterns=return_patterns,
|
||||
return_stages=return_stages)
|
||||
|
||||
def forward(self, x):
|
||||
x = self.conv_layer1_1(x)
|
||||
x = self.conv_layer1_2(x)
|
||||
|
||||
x = self.layer1(x)
|
||||
|
||||
tr1_1 = self.conv_tr1_1(x)
|
||||
tr1_2 = self.conv_tr1_2(x)
|
||||
x = self.st2([tr1_1, tr1_2])
|
||||
|
||||
tr2 = self.conv_tr2(x[-1])
|
||||
x.append(tr2)
|
||||
x = self.st3(x)
|
||||
|
||||
tr3 = self.conv_tr3(x[-1])
|
||||
x.append(tr3)
|
||||
x = self.st4(x)
|
||||
|
||||
x = self.last_cls(x)
|
||||
|
||||
y = x[0]
|
||||
for idx in range(3):
|
||||
y = paddle.add(x[idx + 1], self.cls_head_conv_list[idx](y))
|
||||
|
||||
y = self.conv_last(y)
|
||||
y = self.avg_pool(y)
|
||||
y = paddle.reshape(y, shape=[-1, y.shape[1]])
|
||||
y = self.fc(y)
|
||||
return y
|
||||
|
||||
|
||||
def _load_pretrained(pretrained, model, model_url, use_ssld):
|
||||
if pretrained is False:
|
||||
pass
|
||||
elif pretrained is True:
|
||||
load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
|
||||
elif isinstance(pretrained, str):
|
||||
load_dygraph_pretrain(model, pretrained)
|
||||
else:
|
||||
raise RuntimeError(
|
||||
"pretrained type is not available. Please use `string` or `boolean` type."
|
||||
)
|
||||
|
||||
|
||||
def HRNet_W18_C(pretrained=False, use_ssld=False, **kwargs):
|
||||
"""
|
||||
HRNet_W18_C
|
||||
Args:
|
||||
pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
|
||||
If str, means the path of the pretrained model.
|
||||
use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
|
||||
Returns:
|
||||
model: nn.Layer. Specific `HRNet_W18_C` model depends on args.
|
||||
"""
|
||||
model = HRNet(
|
||||
width=18, stages_pattern=MODEL_STAGES_PATTERN["HRNet"], **kwargs)
|
||||
_load_pretrained(pretrained, model, MODEL_URLS["HRNet_W18_C"], use_ssld)
|
||||
return model
|
||||
|
||||
|
||||
def HRNet_W30_C(pretrained=False, use_ssld=False, **kwargs):
|
||||
"""
|
||||
HRNet_W30_C
|
||||
Args:
|
||||
pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
|
||||
If str, means the path of the pretrained model.
|
||||
use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
|
||||
Returns:
|
||||
model: nn.Layer. Specific `HRNet_W30_C` model depends on args.
|
||||
"""
|
||||
model = HRNet(
|
||||
width=30, stages_pattern=MODEL_STAGES_PATTERN["HRNet"], **kwargs)
|
||||
_load_pretrained(pretrained, model, MODEL_URLS["HRNet_W30_C"], use_ssld)
|
||||
return model
|
||||
|
||||
|
||||
def HRNet_W32_C(pretrained=False, use_ssld=False, **kwargs):
|
||||
"""
|
||||
HRNet_W32_C
|
||||
Args:
|
||||
pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
|
||||
If str, means the path of the pretrained model.
|
||||
use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
|
||||
Returns:
|
||||
model: nn.Layer. Specific `HRNet_W32_C` model depends on args.
|
||||
"""
|
||||
model = HRNet(
|
||||
width=32, stages_pattern=MODEL_STAGES_PATTERN["HRNet"], **kwargs)
|
||||
_load_pretrained(pretrained, model, MODEL_URLS["HRNet_W32_C"], use_ssld)
|
||||
return model
|
||||
|
||||
|
||||
def HRNet_W40_C(pretrained=False, use_ssld=False, **kwargs):
|
||||
"""
|
||||
HRNet_W40_C
|
||||
Args:
|
||||
pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
|
||||
If str, means the path of the pretrained model.
|
||||
use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
|
||||
Returns:
|
||||
model: nn.Layer. Specific `HRNet_W40_C` model depends on args.
|
||||
"""
|
||||
model = HRNet(
|
||||
width=40, stages_pattern=MODEL_STAGES_PATTERN["HRNet"], **kwargs)
|
||||
_load_pretrained(pretrained, model, MODEL_URLS["HRNet_W40_C"], use_ssld)
|
||||
return model
|
||||
|
||||
|
||||
def HRNet_W44_C(pretrained=False, use_ssld=False, **kwargs):
|
||||
"""
|
||||
HRNet_W44_C
|
||||
Args:
|
||||
pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
|
||||
If str, means the path of the pretrained model.
|
||||
use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
|
||||
Returns:
|
||||
model: nn.Layer. Specific `HRNet_W44_C` model depends on args.
|
||||
"""
|
||||
model = HRNet(
|
||||
width=44, stages_pattern=MODEL_STAGES_PATTERN["HRNet"], **kwargs)
|
||||
_load_pretrained(pretrained, model, MODEL_URLS["HRNet_W44_C"], use_ssld)
|
||||
return model
|
||||
|
||||
|
||||
def HRNet_W48_C(pretrained=False, use_ssld=False, **kwargs):
|
||||
"""
|
||||
HRNet_W48_C
|
||||
Args:
|
||||
pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
|
||||
If str, means the path of the pretrained model.
|
||||
use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
|
||||
Returns:
|
||||
model: nn.Layer. Specific `HRNet_W48_C` model depends on args.
|
||||
"""
|
||||
model = HRNet(
|
||||
width=48, stages_pattern=MODEL_STAGES_PATTERN["HRNet"], **kwargs)
|
||||
_load_pretrained(pretrained, model, MODEL_URLS["HRNet_W48_C"], use_ssld)
|
||||
return model
|
||||
|
||||
|
||||
def HRNet_W60_C(pretrained=False, use_ssld=False, **kwargs):
|
||||
"""
|
||||
HRNet_W60_C
|
||||
Args:
|
||||
pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
|
||||
If str, means the path of the pretrained model.
|
||||
use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
|
||||
Returns:
|
||||
model: nn.Layer. Specific `HRNet_W60_C` model depends on args.
|
||||
"""
|
||||
model = HRNet(
|
||||
width=60, stages_pattern=MODEL_STAGES_PATTERN["HRNet"], **kwargs)
|
||||
_load_pretrained(pretrained, model, MODEL_URLS["HRNet_W60_C"], use_ssld)
|
||||
return model
|
||||
|
||||
|
||||
def HRNet_W64_C(pretrained=False, use_ssld=False, **kwargs):
|
||||
"""
|
||||
HRNet_W64_C
|
||||
Args:
|
||||
pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
|
||||
If str, means the path of the pretrained model.
|
||||
use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
|
||||
Returns:
|
||||
model: nn.Layer. Specific `HRNet_W64_C` model depends on args.
|
||||
"""
|
||||
model = HRNet(
|
||||
width=64, stages_pattern=MODEL_STAGES_PATTERN["HRNet"], **kwargs)
|
||||
_load_pretrained(pretrained, model, MODEL_URLS["HRNet_W64_C"], use_ssld)
|
||||
return model
|
||||
|
||||
|
||||
def SE_HRNet_W18_C(pretrained=False, use_ssld=False, **kwargs):
|
||||
"""
|
||||
SE_HRNet_W18_C
|
||||
Args:
|
||||
pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
|
||||
If str, means the path of the pretrained model.
|
||||
use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
|
||||
Returns:
|
||||
model: nn.Layer. Specific `SE_HRNet_W18_C` model depends on args.
|
||||
"""
|
||||
model = HRNet(
|
||||
width=18,
|
||||
stages_pattern=MODEL_STAGES_PATTERN["HRNet"],
|
||||
has_se=True,
|
||||
**kwargs)
|
||||
_load_pretrained(pretrained, model, MODEL_URLS["SE_HRNet_W18_C"], use_ssld)
|
||||
return model
|
||||
|
||||
|
||||
def SE_HRNet_W30_C(pretrained=False, use_ssld=False, **kwargs):
|
||||
"""
|
||||
SE_HRNet_W30_C
|
||||
Args:
|
||||
pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
|
||||
If str, means the path of the pretrained model.
|
||||
use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
|
||||
Returns:
|
||||
model: nn.Layer. Specific `SE_HRNet_W30_C` model depends on args.
|
||||
"""
|
||||
model = HRNet(
|
||||
width=30,
|
||||
stages_pattern=MODEL_STAGES_PATTERN["HRNet"],
|
||||
has_se=True,
|
||||
**kwargs)
|
||||
_load_pretrained(pretrained, model, MODEL_URLS["SE_HRNet_W30_C"], use_ssld)
|
||||
return model
|
||||
|
||||
|
||||
def SE_HRNet_W32_C(pretrained=False, use_ssld=False, **kwargs):
|
||||
"""
|
||||
SE_HRNet_W32_C
|
||||
Args:
|
||||
pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
|
||||
If str, means the path of the pretrained model.
|
||||
use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
|
||||
Returns:
|
||||
model: nn.Layer. Specific `SE_HRNet_W32_C` model depends on args.
|
||||
"""
|
||||
model = HRNet(
|
||||
width=32,
|
||||
stages_pattern=MODEL_STAGES_PATTERN["HRNet"],
|
||||
has_se=True,
|
||||
**kwargs)
|
||||
_load_pretrained(pretrained, model, MODEL_URLS["SE_HRNet_W32_C"], use_ssld)
|
||||
return model
|
||||
|
||||
|
||||
def SE_HRNet_W40_C(pretrained=False, use_ssld=False, **kwargs):
|
||||
"""
|
||||
SE_HRNet_W40_C
|
||||
Args:
|
||||
pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
|
||||
If str, means the path of the pretrained model.
|
||||
use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
|
||||
Returns:
|
||||
model: nn.Layer. Specific `SE_HRNet_W40_C` model depends on args.
|
||||
"""
|
||||
model = HRNet(
|
||||
width=40,
|
||||
stages_pattern=MODEL_STAGES_PATTERN["HRNet"],
|
||||
has_se=True,
|
||||
**kwargs)
|
||||
_load_pretrained(pretrained, model, MODEL_URLS["SE_HRNet_W40_C"], use_ssld)
|
||||
return model
|
||||
|
||||
|
||||
def SE_HRNet_W44_C(pretrained=False, use_ssld=False, **kwargs):
|
||||
"""
|
||||
SE_HRNet_W44_C
|
||||
Args:
|
||||
pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
|
||||
If str, means the path of the pretrained model.
|
||||
use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
|
||||
Returns:
|
||||
model: nn.Layer. Specific `SE_HRNet_W44_C` model depends on args.
|
||||
"""
|
||||
model = HRNet(
|
||||
width=44,
|
||||
stages_pattern=MODEL_STAGES_PATTERN["HRNet"],
|
||||
has_se=True,
|
||||
**kwargs)
|
||||
_load_pretrained(pretrained, model, MODEL_URLS["SE_HRNet_W44_C"], use_ssld)
|
||||
return model
|
||||
|
||||
|
||||
def SE_HRNet_W48_C(pretrained=False, use_ssld=False, **kwargs):
|
||||
"""
|
||||
SE_HRNet_W48_C
|
||||
Args:
|
||||
pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
|
||||
If str, means the path of the pretrained model.
|
||||
use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
|
||||
Returns:
|
||||
model: nn.Layer. Specific `SE_HRNet_W48_C` model depends on args.
|
||||
"""
|
||||
model = HRNet(
|
||||
width=48,
|
||||
stages_pattern=MODEL_STAGES_PATTERN["HRNet"],
|
||||
has_se=True,
|
||||
**kwargs)
|
||||
_load_pretrained(pretrained, model, MODEL_URLS["SE_HRNet_W48_C"], use_ssld)
|
||||
return model
|
||||
|
||||
|
||||
def SE_HRNet_W60_C(pretrained=False, use_ssld=False, **kwargs):
|
||||
"""
|
||||
SE_HRNet_W60_C
|
||||
Args:
|
||||
pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
|
||||
If str, means the path of the pretrained model.
|
||||
use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
|
||||
Returns:
|
||||
model: nn.Layer. Specific `SE_HRNet_W60_C` model depends on args.
|
||||
"""
|
||||
model = HRNet(
|
||||
width=60,
|
||||
stages_pattern=MODEL_STAGES_PATTERN["HRNet"],
|
||||
has_se=True,
|
||||
**kwargs)
|
||||
_load_pretrained(pretrained, model, MODEL_URLS["SE_HRNet_W60_C"], use_ssld)
|
||||
return model
|
||||
|
||||
|
||||
def SE_HRNet_W64_C(pretrained=False, use_ssld=False, **kwargs):
|
||||
"""
|
||||
SE_HRNet_W64_C
|
||||
Args:
|
||||
pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
|
||||
If str, means the path of the pretrained model.
|
||||
use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
|
||||
Returns:
|
||||
model: nn.Layer. Specific `SE_HRNet_W64_C` model depends on args.
|
||||
"""
|
||||
model = HRNet(
|
||||
width=64,
|
||||
stages_pattern=MODEL_STAGES_PATTERN["HRNet"],
|
||||
has_se=True,
|
||||
**kwargs)
|
||||
_load_pretrained(pretrained, model, MODEL_URLS["SE_HRNet_W64_C"], use_ssld)
|
||||
return model
|
||||
@ -0,0 +1,557 @@
|
||||
# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import, division, print_function
|
||||
import math
|
||||
import paddle
|
||||
from paddle import ParamAttr
|
||||
import paddle.nn as nn
|
||||
from paddle.nn import Conv2D, BatchNorm, Linear, Dropout
|
||||
from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D
|
||||
from paddle.nn.initializer import Uniform
|
||||
|
||||
from ppcls.arch.backbone.base.theseus_layer import TheseusLayer
|
||||
from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
|
||||
|
||||
MODEL_URLS = {
|
||||
"InceptionV3":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/InceptionV3_pretrained.pdparams"
|
||||
}
|
||||
|
||||
MODEL_STAGES_PATTERN = {
|
||||
"InceptionV3": [
|
||||
"inception_block_list[2]", "inception_block_list[3]",
|
||||
"inception_block_list[7]", "inception_block_list[8]",
|
||||
"inception_block_list[10]"
|
||||
]
|
||||
}
|
||||
|
||||
__all__ = MODEL_URLS.keys()
|
||||
'''
|
||||
InceptionV3 config: dict.
|
||||
key: inception blocks of InceptionV3.
|
||||
values: conv num in different blocks.
|
||||
'''
|
||||
NET_CONFIG = {
|
||||
"inception_a": [[192, 256, 288], [32, 64, 64]],
|
||||
"inception_b": [288],
|
||||
"inception_c": [[768, 768, 768, 768], [128, 160, 160, 192]],
|
||||
"inception_d": [768],
|
||||
"inception_e": [1280, 2048]
|
||||
}
|
||||
|
||||
|
||||
class ConvBNLayer(TheseusLayer):
|
||||
def __init__(self,
|
||||
num_channels,
|
||||
num_filters,
|
||||
filter_size,
|
||||
stride=1,
|
||||
padding=0,
|
||||
groups=1,
|
||||
act="relu"):
|
||||
super().__init__()
|
||||
self.act = act
|
||||
self.conv = Conv2D(
|
||||
in_channels=num_channels,
|
||||
out_channels=num_filters,
|
||||
kernel_size=filter_size,
|
||||
stride=stride,
|
||||
padding=padding,
|
||||
groups=groups,
|
||||
bias_attr=False)
|
||||
self.bn = BatchNorm(num_filters)
|
||||
self.relu = nn.ReLU()
|
||||
|
||||
def forward(self, x):
|
||||
x = self.conv(x)
|
||||
x = self.bn(x)
|
||||
if self.act:
|
||||
x = self.relu(x)
|
||||
return x
|
||||
|
||||
|
||||
class InceptionStem(TheseusLayer):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.conv_1a_3x3 = ConvBNLayer(
|
||||
num_channels=3,
|
||||
num_filters=32,
|
||||
filter_size=3,
|
||||
stride=2,
|
||||
act="relu")
|
||||
self.conv_2a_3x3 = ConvBNLayer(
|
||||
num_channels=32,
|
||||
num_filters=32,
|
||||
filter_size=3,
|
||||
stride=1,
|
||||
act="relu")
|
||||
self.conv_2b_3x3 = ConvBNLayer(
|
||||
num_channels=32,
|
||||
num_filters=64,
|
||||
filter_size=3,
|
||||
padding=1,
|
||||
act="relu")
|
||||
|
||||
self.max_pool = MaxPool2D(kernel_size=3, stride=2, padding=0)
|
||||
self.conv_3b_1x1 = ConvBNLayer(
|
||||
num_channels=64, num_filters=80, filter_size=1, act="relu")
|
||||
self.conv_4a_3x3 = ConvBNLayer(
|
||||
num_channels=80, num_filters=192, filter_size=3, act="relu")
|
||||
|
||||
def forward(self, x):
|
||||
x = self.conv_1a_3x3(x)
|
||||
x = self.conv_2a_3x3(x)
|
||||
x = self.conv_2b_3x3(x)
|
||||
x = self.max_pool(x)
|
||||
x = self.conv_3b_1x1(x)
|
||||
x = self.conv_4a_3x3(x)
|
||||
x = self.max_pool(x)
|
||||
return x
|
||||
|
||||
|
||||
class InceptionA(TheseusLayer):
|
||||
def __init__(self, num_channels, pool_features):
|
||||
super().__init__()
|
||||
self.branch1x1 = ConvBNLayer(
|
||||
num_channels=num_channels,
|
||||
num_filters=64,
|
||||
filter_size=1,
|
||||
act="relu")
|
||||
self.branch5x5_1 = ConvBNLayer(
|
||||
num_channels=num_channels,
|
||||
num_filters=48,
|
||||
filter_size=1,
|
||||
act="relu")
|
||||
self.branch5x5_2 = ConvBNLayer(
|
||||
num_channels=48,
|
||||
num_filters=64,
|
||||
filter_size=5,
|
||||
padding=2,
|
||||
act="relu")
|
||||
|
||||
self.branch3x3dbl_1 = ConvBNLayer(
|
||||
num_channels=num_channels,
|
||||
num_filters=64,
|
||||
filter_size=1,
|
||||
act="relu")
|
||||
self.branch3x3dbl_2 = ConvBNLayer(
|
||||
num_channels=64,
|
||||
num_filters=96,
|
||||
filter_size=3,
|
||||
padding=1,
|
||||
act="relu")
|
||||
self.branch3x3dbl_3 = ConvBNLayer(
|
||||
num_channels=96,
|
||||
num_filters=96,
|
||||
filter_size=3,
|
||||
padding=1,
|
||||
act="relu")
|
||||
self.branch_pool = AvgPool2D(
|
||||
kernel_size=3, stride=1, padding=1, exclusive=False)
|
||||
self.branch_pool_conv = ConvBNLayer(
|
||||
num_channels=num_channels,
|
||||
num_filters=pool_features,
|
||||
filter_size=1,
|
||||
act="relu")
|
||||
|
||||
def forward(self, x):
|
||||
branch1x1 = self.branch1x1(x)
|
||||
branch5x5 = self.branch5x5_1(x)
|
||||
branch5x5 = self.branch5x5_2(branch5x5)
|
||||
|
||||
branch3x3dbl = self.branch3x3dbl_1(x)
|
||||
branch3x3dbl = self.branch3x3dbl_2(branch3x3dbl)
|
||||
branch3x3dbl = self.branch3x3dbl_3(branch3x3dbl)
|
||||
|
||||
branch_pool = self.branch_pool(x)
|
||||
branch_pool = self.branch_pool_conv(branch_pool)
|
||||
x = paddle.concat(
|
||||
[branch1x1, branch5x5, branch3x3dbl, branch_pool], axis=1)
|
||||
return x
|
||||
|
||||
|
||||
class InceptionB(TheseusLayer):
|
||||
def __init__(self, num_channels):
|
||||
super().__init__()
|
||||
self.branch3x3 = ConvBNLayer(
|
||||
num_channels=num_channels,
|
||||
num_filters=384,
|
||||
filter_size=3,
|
||||
stride=2,
|
||||
act="relu")
|
||||
self.branch3x3dbl_1 = ConvBNLayer(
|
||||
num_channels=num_channels,
|
||||
num_filters=64,
|
||||
filter_size=1,
|
||||
act="relu")
|
||||
self.branch3x3dbl_2 = ConvBNLayer(
|
||||
num_channels=64,
|
||||
num_filters=96,
|
||||
filter_size=3,
|
||||
padding=1,
|
||||
act="relu")
|
||||
self.branch3x3dbl_3 = ConvBNLayer(
|
||||
num_channels=96,
|
||||
num_filters=96,
|
||||
filter_size=3,
|
||||
stride=2,
|
||||
act="relu")
|
||||
self.branch_pool = MaxPool2D(kernel_size=3, stride=2)
|
||||
|
||||
def forward(self, x):
|
||||
branch3x3 = self.branch3x3(x)
|
||||
|
||||
branch3x3dbl = self.branch3x3dbl_1(x)
|
||||
branch3x3dbl = self.branch3x3dbl_2(branch3x3dbl)
|
||||
branch3x3dbl = self.branch3x3dbl_3(branch3x3dbl)
|
||||
|
||||
branch_pool = self.branch_pool(x)
|
||||
|
||||
x = paddle.concat([branch3x3, branch3x3dbl, branch_pool], axis=1)
|
||||
|
||||
return x
|
||||
|
||||
|
||||
class InceptionC(TheseusLayer):
|
||||
def __init__(self, num_channels, channels_7x7):
|
||||
super().__init__()
|
||||
self.branch1x1 = ConvBNLayer(
|
||||
num_channels=num_channels,
|
||||
num_filters=192,
|
||||
filter_size=1,
|
||||
act="relu")
|
||||
|
||||
self.branch7x7_1 = ConvBNLayer(
|
||||
num_channels=num_channels,
|
||||
num_filters=channels_7x7,
|
||||
filter_size=1,
|
||||
stride=1,
|
||||
act="relu")
|
||||
self.branch7x7_2 = ConvBNLayer(
|
||||
num_channels=channels_7x7,
|
||||
num_filters=channels_7x7,
|
||||
filter_size=(1, 7),
|
||||
stride=1,
|
||||
padding=(0, 3),
|
||||
act="relu")
|
||||
self.branch7x7_3 = ConvBNLayer(
|
||||
num_channels=channels_7x7,
|
||||
num_filters=192,
|
||||
filter_size=(7, 1),
|
||||
stride=1,
|
||||
padding=(3, 0),
|
||||
act="relu")
|
||||
|
||||
self.branch7x7dbl_1 = ConvBNLayer(
|
||||
num_channels=num_channels,
|
||||
num_filters=channels_7x7,
|
||||
filter_size=1,
|
||||
act="relu")
|
||||
self.branch7x7dbl_2 = ConvBNLayer(
|
||||
num_channels=channels_7x7,
|
||||
num_filters=channels_7x7,
|
||||
filter_size=(7, 1),
|
||||
padding=(3, 0),
|
||||
act="relu")
|
||||
self.branch7x7dbl_3 = ConvBNLayer(
|
||||
num_channels=channels_7x7,
|
||||
num_filters=channels_7x7,
|
||||
filter_size=(1, 7),
|
||||
padding=(0, 3),
|
||||
act="relu")
|
||||
self.branch7x7dbl_4 = ConvBNLayer(
|
||||
num_channels=channels_7x7,
|
||||
num_filters=channels_7x7,
|
||||
filter_size=(7, 1),
|
||||
padding=(3, 0),
|
||||
act="relu")
|
||||
self.branch7x7dbl_5 = ConvBNLayer(
|
||||
num_channels=channels_7x7,
|
||||
num_filters=192,
|
||||
filter_size=(1, 7),
|
||||
padding=(0, 3),
|
||||
act="relu")
|
||||
|
||||
self.branch_pool = AvgPool2D(
|
||||
kernel_size=3, stride=1, padding=1, exclusive=False)
|
||||
self.branch_pool_conv = ConvBNLayer(
|
||||
num_channels=num_channels,
|
||||
num_filters=192,
|
||||
filter_size=1,
|
||||
act="relu")
|
||||
|
||||
def forward(self, x):
|
||||
branch1x1 = self.branch1x1(x)
|
||||
|
||||
branch7x7 = self.branch7x7_1(x)
|
||||
branch7x7 = self.branch7x7_2(branch7x7)
|
||||
branch7x7 = self.branch7x7_3(branch7x7)
|
||||
|
||||
branch7x7dbl = self.branch7x7dbl_1(x)
|
||||
branch7x7dbl = self.branch7x7dbl_2(branch7x7dbl)
|
||||
branch7x7dbl = self.branch7x7dbl_3(branch7x7dbl)
|
||||
branch7x7dbl = self.branch7x7dbl_4(branch7x7dbl)
|
||||
branch7x7dbl = self.branch7x7dbl_5(branch7x7dbl)
|
||||
|
||||
branch_pool = self.branch_pool(x)
|
||||
branch_pool = self.branch_pool_conv(branch_pool)
|
||||
|
||||
x = paddle.concat(
|
||||
[branch1x1, branch7x7, branch7x7dbl, branch_pool], axis=1)
|
||||
|
||||
return x
|
||||
|
||||
|
||||
class InceptionD(TheseusLayer):
|
||||
def __init__(self, num_channels):
|
||||
super().__init__()
|
||||
self.branch3x3_1 = ConvBNLayer(
|
||||
num_channels=num_channels,
|
||||
num_filters=192,
|
||||
filter_size=1,
|
||||
act="relu")
|
||||
self.branch3x3_2 = ConvBNLayer(
|
||||
num_channels=192,
|
||||
num_filters=320,
|
||||
filter_size=3,
|
||||
stride=2,
|
||||
act="relu")
|
||||
self.branch7x7x3_1 = ConvBNLayer(
|
||||
num_channels=num_channels,
|
||||
num_filters=192,
|
||||
filter_size=1,
|
||||
act="relu")
|
||||
self.branch7x7x3_2 = ConvBNLayer(
|
||||
num_channels=192,
|
||||
num_filters=192,
|
||||
filter_size=(1, 7),
|
||||
padding=(0, 3),
|
||||
act="relu")
|
||||
self.branch7x7x3_3 = ConvBNLayer(
|
||||
num_channels=192,
|
||||
num_filters=192,
|
||||
filter_size=(7, 1),
|
||||
padding=(3, 0),
|
||||
act="relu")
|
||||
self.branch7x7x3_4 = ConvBNLayer(
|
||||
num_channels=192,
|
||||
num_filters=192,
|
||||
filter_size=3,
|
||||
stride=2,
|
||||
act="relu")
|
||||
self.branch_pool = MaxPool2D(kernel_size=3, stride=2)
|
||||
|
||||
def forward(self, x):
|
||||
branch3x3 = self.branch3x3_1(x)
|
||||
branch3x3 = self.branch3x3_2(branch3x3)
|
||||
|
||||
branch7x7x3 = self.branch7x7x3_1(x)
|
||||
branch7x7x3 = self.branch7x7x3_2(branch7x7x3)
|
||||
branch7x7x3 = self.branch7x7x3_3(branch7x7x3)
|
||||
branch7x7x3 = self.branch7x7x3_4(branch7x7x3)
|
||||
|
||||
branch_pool = self.branch_pool(x)
|
||||
|
||||
x = paddle.concat([branch3x3, branch7x7x3, branch_pool], axis=1)
|
||||
return x
|
||||
|
||||
|
||||
class InceptionE(TheseusLayer):
|
||||
def __init__(self, num_channels):
|
||||
super().__init__()
|
||||
self.branch1x1 = ConvBNLayer(
|
||||
num_channels=num_channels,
|
||||
num_filters=320,
|
||||
filter_size=1,
|
||||
act="relu")
|
||||
self.branch3x3_1 = ConvBNLayer(
|
||||
num_channels=num_channels,
|
||||
num_filters=384,
|
||||
filter_size=1,
|
||||
act="relu")
|
||||
self.branch3x3_2a = ConvBNLayer(
|
||||
num_channels=384,
|
||||
num_filters=384,
|
||||
filter_size=(1, 3),
|
||||
padding=(0, 1),
|
||||
act="relu")
|
||||
self.branch3x3_2b = ConvBNLayer(
|
||||
num_channels=384,
|
||||
num_filters=384,
|
||||
filter_size=(3, 1),
|
||||
padding=(1, 0),
|
||||
act="relu")
|
||||
|
||||
self.branch3x3dbl_1 = ConvBNLayer(
|
||||
num_channels=num_channels,
|
||||
num_filters=448,
|
||||
filter_size=1,
|
||||
act="relu")
|
||||
self.branch3x3dbl_2 = ConvBNLayer(
|
||||
num_channels=448,
|
||||
num_filters=384,
|
||||
filter_size=3,
|
||||
padding=1,
|
||||
act="relu")
|
||||
self.branch3x3dbl_3a = ConvBNLayer(
|
||||
num_channels=384,
|
||||
num_filters=384,
|
||||
filter_size=(1, 3),
|
||||
padding=(0, 1),
|
||||
act="relu")
|
||||
self.branch3x3dbl_3b = ConvBNLayer(
|
||||
num_channels=384,
|
||||
num_filters=384,
|
||||
filter_size=(3, 1),
|
||||
padding=(1, 0),
|
||||
act="relu")
|
||||
self.branch_pool = AvgPool2D(
|
||||
kernel_size=3, stride=1, padding=1, exclusive=False)
|
||||
self.branch_pool_conv = ConvBNLayer(
|
||||
num_channels=num_channels,
|
||||
num_filters=192,
|
||||
filter_size=1,
|
||||
act="relu")
|
||||
|
||||
def forward(self, x):
|
||||
branch1x1 = self.branch1x1(x)
|
||||
|
||||
branch3x3 = self.branch3x3_1(x)
|
||||
branch3x3 = [
|
||||
self.branch3x3_2a(branch3x3),
|
||||
self.branch3x3_2b(branch3x3),
|
||||
]
|
||||
branch3x3 = paddle.concat(branch3x3, axis=1)
|
||||
|
||||
branch3x3dbl = self.branch3x3dbl_1(x)
|
||||
branch3x3dbl = self.branch3x3dbl_2(branch3x3dbl)
|
||||
branch3x3dbl = [
|
||||
self.branch3x3dbl_3a(branch3x3dbl),
|
||||
self.branch3x3dbl_3b(branch3x3dbl),
|
||||
]
|
||||
branch3x3dbl = paddle.concat(branch3x3dbl, axis=1)
|
||||
|
||||
branch_pool = self.branch_pool(x)
|
||||
branch_pool = self.branch_pool_conv(branch_pool)
|
||||
|
||||
x = paddle.concat(
|
||||
[branch1x1, branch3x3, branch3x3dbl, branch_pool], axis=1)
|
||||
return x
|
||||
|
||||
|
||||
class Inception_V3(TheseusLayer):
|
||||
"""
|
||||
Inception_V3
|
||||
Args:
|
||||
config: dict. config of Inception_V3.
|
||||
class_num: int=1000. The number of classes.
|
||||
pretrained: (True or False) or path of pretrained_model. Whether to load the pretrained model.
|
||||
Returns:
|
||||
model: nn.Layer. Specific Inception_V3 model depends on args.
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
config,
|
||||
stages_pattern,
|
||||
class_num=1000,
|
||||
return_patterns=None,
|
||||
return_stages=None):
|
||||
super().__init__()
|
||||
|
||||
self.inception_a_list = config["inception_a"]
|
||||
self.inception_c_list = config["inception_c"]
|
||||
self.inception_b_list = config["inception_b"]
|
||||
self.inception_d_list = config["inception_d"]
|
||||
self.inception_e_list = config["inception_e"]
|
||||
|
||||
self.inception_stem = InceptionStem()
|
||||
|
||||
self.inception_block_list = nn.LayerList()
|
||||
for i in range(len(self.inception_a_list[0])):
|
||||
inception_a = InceptionA(self.inception_a_list[0][i],
|
||||
self.inception_a_list[1][i])
|
||||
self.inception_block_list.append(inception_a)
|
||||
|
||||
for i in range(len(self.inception_b_list)):
|
||||
inception_b = InceptionB(self.inception_b_list[i])
|
||||
self.inception_block_list.append(inception_b)
|
||||
|
||||
for i in range(len(self.inception_c_list[0])):
|
||||
inception_c = InceptionC(self.inception_c_list[0][i],
|
||||
self.inception_c_list[1][i])
|
||||
self.inception_block_list.append(inception_c)
|
||||
|
||||
for i in range(len(self.inception_d_list)):
|
||||
inception_d = InceptionD(self.inception_d_list[i])
|
||||
self.inception_block_list.append(inception_d)
|
||||
|
||||
for i in range(len(self.inception_e_list)):
|
||||
inception_e = InceptionE(self.inception_e_list[i])
|
||||
self.inception_block_list.append(inception_e)
|
||||
|
||||
self.avg_pool = AdaptiveAvgPool2D(1)
|
||||
self.dropout = Dropout(p=0.2, mode="downscale_in_infer")
|
||||
stdv = 1.0 / math.sqrt(2048 * 1.0)
|
||||
self.fc = Linear(
|
||||
2048,
|
||||
class_num,
|
||||
weight_attr=ParamAttr(initializer=Uniform(-stdv, stdv)),
|
||||
bias_attr=ParamAttr())
|
||||
|
||||
super().init_res(
|
||||
stages_pattern,
|
||||
return_patterns=return_patterns,
|
||||
return_stages=return_stages)
|
||||
|
||||
def forward(self, x):
|
||||
x = self.inception_stem(x)
|
||||
for inception_block in self.inception_block_list:
|
||||
x = inception_block(x)
|
||||
x = self.avg_pool(x)
|
||||
x = paddle.reshape(x, shape=[-1, 2048])
|
||||
x = self.dropout(x)
|
||||
x = self.fc(x)
|
||||
return x
|
||||
|
||||
|
||||
def _load_pretrained(pretrained, model, model_url, use_ssld):
|
||||
if pretrained is False:
|
||||
pass
|
||||
elif pretrained is True:
|
||||
load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
|
||||
elif isinstance(pretrained, str):
|
||||
load_dygraph_pretrain(model, pretrained)
|
||||
else:
|
||||
raise RuntimeError(
|
||||
"pretrained type is not available. Please use `string` or `boolean` type."
|
||||
)
|
||||
|
||||
|
||||
def InceptionV3(pretrained=False, use_ssld=False, **kwargs):
|
||||
"""
|
||||
InceptionV3
|
||||
Args:
|
||||
pretrained: bool=false or str. if `true` load pretrained parameters, `false` otherwise.
|
||||
if str, means the path of the pretrained model.
|
||||
use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
|
||||
Returns:
|
||||
model: nn.Layer. Specific `InceptionV3` model
|
||||
"""
|
||||
model = Inception_V3(
|
||||
NET_CONFIG,
|
||||
stages_pattern=MODEL_STAGES_PATTERN["InceptionV3"],
|
||||
**kwargs)
|
||||
_load_pretrained(pretrained, model, MODEL_URLS["InceptionV3"], use_ssld)
|
||||
return model
|
||||
@ -0,0 +1,257 @@
|
||||
# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import, division, print_function
|
||||
|
||||
from paddle import ParamAttr
|
||||
import paddle.nn as nn
|
||||
from paddle.nn import Conv2D, BatchNorm, Linear, ReLU, Flatten
|
||||
from paddle.nn import AdaptiveAvgPool2D
|
||||
from paddle.nn.initializer import KaimingNormal
|
||||
|
||||
from ppcls.arch.backbone.base.theseus_layer import TheseusLayer
|
||||
from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
|
||||
|
||||
MODEL_URLS = {
|
||||
"MobileNetV1_x0_25":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/MobileNetV1_x0_25_pretrained.pdparams",
|
||||
"MobileNetV1_x0_5":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/MobileNetV1_x0_5_pretrained.pdparams",
|
||||
"MobileNetV1_x0_75":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/MobileNetV1_x0_75_pretrained.pdparams",
|
||||
"MobileNetV1":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/MobileNetV1_pretrained.pdparams"
|
||||
}
|
||||
|
||||
MODEL_STAGES_PATTERN = {
|
||||
"MobileNetV1": ["blocks[0]", "blocks[2]", "blocks[4]", "blocks[10]"]
|
||||
}
|
||||
|
||||
__all__ = MODEL_URLS.keys()
|
||||
|
||||
|
||||
class ConvBNLayer(TheseusLayer):
|
||||
def __init__(self,
|
||||
num_channels,
|
||||
filter_size,
|
||||
num_filters,
|
||||
stride,
|
||||
padding,
|
||||
num_groups=1):
|
||||
super().__init__()
|
||||
|
||||
self.conv = Conv2D(
|
||||
in_channels=num_channels,
|
||||
out_channels=num_filters,
|
||||
kernel_size=filter_size,
|
||||
stride=stride,
|
||||
padding=padding,
|
||||
groups=num_groups,
|
||||
weight_attr=ParamAttr(initializer=KaimingNormal()),
|
||||
bias_attr=False)
|
||||
self.bn = BatchNorm(num_filters)
|
||||
self.relu = ReLU()
|
||||
|
||||
def forward(self, x):
|
||||
x = self.conv(x)
|
||||
x = self.bn(x)
|
||||
x = self.relu(x)
|
||||
return x
|
||||
|
||||
|
||||
class DepthwiseSeparable(TheseusLayer):
|
||||
def __init__(self, num_channels, num_filters1, num_filters2, num_groups,
|
||||
stride, scale):
|
||||
super().__init__()
|
||||
|
||||
self.depthwise_conv = ConvBNLayer(
|
||||
num_channels=num_channels,
|
||||
num_filters=int(num_filters1 * scale),
|
||||
filter_size=3,
|
||||
stride=stride,
|
||||
padding=1,
|
||||
num_groups=int(num_groups * scale))
|
||||
|
||||
self.pointwise_conv = ConvBNLayer(
|
||||
num_channels=int(num_filters1 * scale),
|
||||
filter_size=1,
|
||||
num_filters=int(num_filters2 * scale),
|
||||
stride=1,
|
||||
padding=0)
|
||||
|
||||
def forward(self, x):
|
||||
x = self.depthwise_conv(x)
|
||||
x = self.pointwise_conv(x)
|
||||
return x
|
||||
|
||||
|
||||
class MobileNet(TheseusLayer):
|
||||
"""
|
||||
MobileNet
|
||||
Args:
|
||||
scale: float=1.0. The coefficient that controls the size of network parameters.
|
||||
class_num: int=1000. The number of classes.
|
||||
Returns:
|
||||
model: nn.Layer. Specific MobileNet model depends on args.
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
stages_pattern,
|
||||
scale=1.0,
|
||||
class_num=1000,
|
||||
return_patterns=None,
|
||||
return_stages=None):
|
||||
super().__init__()
|
||||
self.scale = scale
|
||||
|
||||
self.conv = ConvBNLayer(
|
||||
num_channels=3,
|
||||
filter_size=3,
|
||||
num_filters=int(32 * scale),
|
||||
stride=2,
|
||||
padding=1)
|
||||
|
||||
#num_channels, num_filters1, num_filters2, num_groups, stride
|
||||
self.cfg = [[int(32 * scale), 32, 64, 32, 1],
|
||||
[int(64 * scale), 64, 128, 64, 2],
|
||||
[int(128 * scale), 128, 128, 128, 1],
|
||||
[int(128 * scale), 128, 256, 128, 2],
|
||||
[int(256 * scale), 256, 256, 256, 1],
|
||||
[int(256 * scale), 256, 512, 256, 2],
|
||||
[int(512 * scale), 512, 512, 512, 1],
|
||||
[int(512 * scale), 512, 512, 512, 1],
|
||||
[int(512 * scale), 512, 512, 512, 1],
|
||||
[int(512 * scale), 512, 512, 512, 1],
|
||||
[int(512 * scale), 512, 512, 512, 1],
|
||||
[int(512 * scale), 512, 1024, 512, 2],
|
||||
[int(1024 * scale), 1024, 1024, 1024, 1]]
|
||||
|
||||
self.blocks = nn.Sequential(* [
|
||||
DepthwiseSeparable(
|
||||
num_channels=params[0],
|
||||
num_filters1=params[1],
|
||||
num_filters2=params[2],
|
||||
num_groups=params[3],
|
||||
stride=params[4],
|
||||
scale=scale) for params in self.cfg
|
||||
])
|
||||
|
||||
self.avg_pool = AdaptiveAvgPool2D(1)
|
||||
self.flatten = Flatten(start_axis=1, stop_axis=-1)
|
||||
|
||||
self.fc = Linear(
|
||||
int(1024 * scale),
|
||||
class_num,
|
||||
weight_attr=ParamAttr(initializer=KaimingNormal()))
|
||||
|
||||
super().init_res(
|
||||
stages_pattern,
|
||||
return_patterns=return_patterns,
|
||||
return_stages=return_stages)
|
||||
|
||||
def forward(self, x):
|
||||
x = self.conv(x)
|
||||
x = self.blocks(x)
|
||||
x = self.avg_pool(x)
|
||||
x = self.flatten(x)
|
||||
x = self.fc(x)
|
||||
return x
|
||||
|
||||
|
||||
def _load_pretrained(pretrained, model, model_url, use_ssld):
|
||||
if pretrained is False:
|
||||
pass
|
||||
elif pretrained is True:
|
||||
load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
|
||||
elif isinstance(pretrained, str):
|
||||
load_dygraph_pretrain(model, pretrained)
|
||||
else:
|
||||
raise RuntimeError(
|
||||
"pretrained type is not available. Please use `string` or `boolean` type."
|
||||
)
|
||||
|
||||
|
||||
def MobileNetV1_x0_25(pretrained=False, use_ssld=False, **kwargs):
|
||||
"""
|
||||
MobileNetV1_x0_25
|
||||
Args:
|
||||
pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
|
||||
If str, means the path of the pretrained model.
|
||||
use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
|
||||
Returns:
|
||||
model: nn.Layer. Specific `MobileNetV1_x0_25` model depends on args.
|
||||
"""
|
||||
model = MobileNet(
|
||||
scale=0.25,
|
||||
stages_pattern=MODEL_STAGES_PATTERN["MobileNetV1"],
|
||||
**kwargs)
|
||||
_load_pretrained(pretrained, model, MODEL_URLS["MobileNetV1_x0_25"],
|
||||
use_ssld)
|
||||
return model
|
||||
|
||||
|
||||
def MobileNetV1_x0_5(pretrained=False, use_ssld=False, **kwargs):
|
||||
"""
|
||||
MobileNetV1_x0_5
|
||||
Args:
|
||||
pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
|
||||
If str, means the path of the pretrained model.
|
||||
use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
|
||||
Returns:
|
||||
model: nn.Layer. Specific `MobileNetV1_x0_5` model depends on args.
|
||||
"""
|
||||
model = MobileNet(
|
||||
scale=0.5,
|
||||
stages_pattern=MODEL_STAGES_PATTERN["MobileNetV1"],
|
||||
**kwargs)
|
||||
_load_pretrained(pretrained, model, MODEL_URLS["MobileNetV1_x0_5"],
|
||||
use_ssld)
|
||||
return model
|
||||
|
||||
|
||||
def MobileNetV1_x0_75(pretrained=False, use_ssld=False, **kwargs):
|
||||
"""
|
||||
MobileNetV1_x0_75
|
||||
Args:
|
||||
pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
|
||||
If str, means the path of the pretrained model.
|
||||
use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
|
||||
Returns:
|
||||
model: nn.Layer. Specific `MobileNetV1_x0_75` model depends on args.
|
||||
"""
|
||||
model = MobileNet(
|
||||
scale=0.75,
|
||||
stages_pattern=MODEL_STAGES_PATTERN["MobileNetV1"],
|
||||
**kwargs)
|
||||
_load_pretrained(pretrained, model, MODEL_URLS["MobileNetV1_x0_75"],
|
||||
use_ssld)
|
||||
return model
|
||||
|
||||
|
||||
def MobileNetV1(pretrained=False, use_ssld=False, **kwargs):
|
||||
"""
|
||||
MobileNetV1
|
||||
Args:
|
||||
pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
|
||||
If str, means the path of the pretrained model.
|
||||
use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
|
||||
Returns:
|
||||
model: nn.Layer. Specific `MobileNetV1` model depends on args.
|
||||
"""
|
||||
model = MobileNet(
|
||||
scale=1.0,
|
||||
stages_pattern=MODEL_STAGES_PATTERN["MobileNetV1"],
|
||||
**kwargs)
|
||||
_load_pretrained(pretrained, model, MODEL_URLS["MobileNetV1"], use_ssld)
|
||||
return model
|
||||
@ -0,0 +1,586 @@
|
||||
# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import, division, print_function
|
||||
|
||||
import paddle
|
||||
import paddle.nn as nn
|
||||
from paddle import ParamAttr
|
||||
from paddle.nn import AdaptiveAvgPool2D, BatchNorm, Conv2D, Dropout, Linear
|
||||
from paddle.regularizer import L2Decay
|
||||
from ppcls.arch.backbone.base.theseus_layer import TheseusLayer
|
||||
from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
|
||||
|
||||
MODEL_URLS = {
|
||||
"MobileNetV3_small_x0_35":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/MobileNetV3_small_x0_35_pretrained.pdparams",
|
||||
"MobileNetV3_small_x0_5":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/MobileNetV3_small_x0_5_pretrained.pdparams",
|
||||
"MobileNetV3_small_x0_75":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/MobileNetV3_small_x0_75_pretrained.pdparams",
|
||||
"MobileNetV3_small_x1_0":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/MobileNetV3_small_x1_0_pretrained.pdparams",
|
||||
"MobileNetV3_small_x1_25":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/MobileNetV3_small_x1_25_pretrained.pdparams",
|
||||
"MobileNetV3_large_x0_35":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/MobileNetV3_large_x0_35_pretrained.pdparams",
|
||||
"MobileNetV3_large_x0_5":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/MobileNetV3_large_x0_5_pretrained.pdparams",
|
||||
"MobileNetV3_large_x0_75":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/MobileNetV3_large_x0_75_pretrained.pdparams",
|
||||
"MobileNetV3_large_x1_0":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/MobileNetV3_large_x1_0_pretrained.pdparams",
|
||||
"MobileNetV3_large_x1_25":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/MobileNetV3_large_x1_25_pretrained.pdparams",
|
||||
}
|
||||
|
||||
MODEL_STAGES_PATTERN = {
|
||||
"MobileNetV3_small":
|
||||
["blocks[0]", "blocks[2]", "blocks[7]", "blocks[10]"],
|
||||
"MobileNetV3_large":
|
||||
["blocks[0]", "blocks[2]", "blocks[5]", "blocks[11]", "blocks[14]"]
|
||||
}
|
||||
|
||||
__all__ = MODEL_URLS.keys()
|
||||
|
||||
# "large", "small" is just for MobinetV3_large, MobileNetV3_small respectively.
|
||||
# The type of "large" or "small" config is a list. Each element(list) represents a depthwise block, which is composed of k, exp, se, act, s.
|
||||
# k: kernel_size
|
||||
# exp: middle channel number in depthwise block
|
||||
# c: output channel number in depthwise block
|
||||
# se: whether to use SE block
|
||||
# act: which activation to use
|
||||
# s: stride in depthwise block
|
||||
NET_CONFIG = {
|
||||
"large": [
|
||||
# k, exp, c, se, act, s
|
||||
[3, 16, 16, False, "relu", 1],
|
||||
[3, 64, 24, False, "relu", 2],
|
||||
[3, 72, 24, False, "relu", 1],
|
||||
[5, 72, 40, True, "relu", 2],
|
||||
[5, 120, 40, True, "relu", 1],
|
||||
[5, 120, 40, True, "relu", 1],
|
||||
[3, 240, 80, False, "hardswish", 2],
|
||||
[3, 200, 80, False, "hardswish", 1],
|
||||
[3, 184, 80, False, "hardswish", 1],
|
||||
[3, 184, 80, False, "hardswish", 1],
|
||||
[3, 480, 112, True, "hardswish", 1],
|
||||
[3, 672, 112, True, "hardswish", 1],
|
||||
[5, 672, 160, True, "hardswish", 2],
|
||||
[5, 960, 160, True, "hardswish", 1],
|
||||
[5, 960, 160, True, "hardswish", 1],
|
||||
],
|
||||
"small": [
|
||||
# k, exp, c, se, act, s
|
||||
[3, 16, 16, True, "relu", 2],
|
||||
[3, 72, 24, False, "relu", 2],
|
||||
[3, 88, 24, False, "relu", 1],
|
||||
[5, 96, 40, True, "hardswish", 2],
|
||||
[5, 240, 40, True, "hardswish", 1],
|
||||
[5, 240, 40, True, "hardswish", 1],
|
||||
[5, 120, 48, True, "hardswish", 1],
|
||||
[5, 144, 48, True, "hardswish", 1],
|
||||
[5, 288, 96, True, "hardswish", 2],
|
||||
[5, 576, 96, True, "hardswish", 1],
|
||||
[5, 576, 96, True, "hardswish", 1],
|
||||
]
|
||||
}
|
||||
# first conv output channel number in MobileNetV3
|
||||
STEM_CONV_NUMBER = 16
|
||||
# last second conv output channel for "small"
|
||||
LAST_SECOND_CONV_SMALL = 576
|
||||
# last second conv output channel for "large"
|
||||
LAST_SECOND_CONV_LARGE = 960
|
||||
# last conv output channel number for "large" and "small"
|
||||
LAST_CONV = 1280
|
||||
|
||||
|
||||
def _make_divisible(v, divisor=8, min_value=None):
|
||||
if min_value is None:
|
||||
min_value = divisor
|
||||
new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
|
||||
if new_v < 0.9 * v:
|
||||
new_v += divisor
|
||||
return new_v
|
||||
|
||||
|
||||
def _create_act(act):
|
||||
if act == "hardswish":
|
||||
return nn.Hardswish()
|
||||
elif act == "relu":
|
||||
return nn.ReLU()
|
||||
elif act is None:
|
||||
return None
|
||||
else:
|
||||
raise RuntimeError(
|
||||
"The activation function is not supported: {}".format(act))
|
||||
|
||||
|
||||
class MobileNetV3(TheseusLayer):
|
||||
"""
|
||||
MobileNetV3
|
||||
Args:
|
||||
config: list. MobileNetV3 depthwise blocks config.
|
||||
scale: float=1.0. The coefficient that controls the size of network parameters.
|
||||
class_num: int=1000. The number of classes.
|
||||
inplanes: int=16. The output channel number of first convolution layer.
|
||||
class_squeeze: int=960. The output channel number of penultimate convolution layer.
|
||||
class_expand: int=1280. The output channel number of last convolution layer.
|
||||
dropout_prob: float=0.2. Probability of setting units to zero.
|
||||
Returns:
|
||||
model: nn.Layer. Specific MobileNetV3 model depends on args.
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
config,
|
||||
stages_pattern,
|
||||
scale=1.0,
|
||||
class_num=1000,
|
||||
inplanes=STEM_CONV_NUMBER,
|
||||
class_squeeze=LAST_SECOND_CONV_LARGE,
|
||||
class_expand=LAST_CONV,
|
||||
dropout_prob=0.2,
|
||||
return_patterns=None,
|
||||
return_stages=None):
|
||||
super().__init__()
|
||||
|
||||
self.cfg = config
|
||||
self.scale = scale
|
||||
self.inplanes = inplanes
|
||||
self.class_squeeze = class_squeeze
|
||||
self.class_expand = class_expand
|
||||
self.class_num = class_num
|
||||
|
||||
self.conv = ConvBNLayer(
|
||||
in_c=3,
|
||||
out_c=_make_divisible(self.inplanes * self.scale),
|
||||
filter_size=3,
|
||||
stride=2,
|
||||
padding=1,
|
||||
num_groups=1,
|
||||
if_act=True,
|
||||
act="hardswish")
|
||||
|
||||
self.blocks = nn.Sequential(* [
|
||||
ResidualUnit(
|
||||
in_c=_make_divisible(self.inplanes * self.scale if i == 0 else
|
||||
self.cfg[i - 1][2] * self.scale),
|
||||
mid_c=_make_divisible(self.scale * exp),
|
||||
out_c=_make_divisible(self.scale * c),
|
||||
filter_size=k,
|
||||
stride=s,
|
||||
use_se=se,
|
||||
act=act) for i, (k, exp, c, se, act, s) in enumerate(self.cfg)
|
||||
])
|
||||
|
||||
self.last_second_conv = ConvBNLayer(
|
||||
in_c=_make_divisible(self.cfg[-1][2] * self.scale),
|
||||
out_c=_make_divisible(self.scale * self.class_squeeze),
|
||||
filter_size=1,
|
||||
stride=1,
|
||||
padding=0,
|
||||
num_groups=1,
|
||||
if_act=True,
|
||||
act="hardswish")
|
||||
|
||||
self.avg_pool = AdaptiveAvgPool2D(1)
|
||||
|
||||
self.last_conv = Conv2D(
|
||||
in_channels=_make_divisible(self.scale * self.class_squeeze),
|
||||
out_channels=self.class_expand,
|
||||
kernel_size=1,
|
||||
stride=1,
|
||||
padding=0,
|
||||
bias_attr=False)
|
||||
|
||||
self.hardswish = nn.Hardswish()
|
||||
if dropout_prob is not None:
|
||||
self.dropout = Dropout(p=dropout_prob, mode="downscale_in_infer")
|
||||
else:
|
||||
self.dropout = None
|
||||
self.flatten = nn.Flatten(start_axis=1, stop_axis=-1)
|
||||
|
||||
self.fc = Linear(self.class_expand, class_num)
|
||||
|
||||
super().init_res(
|
||||
stages_pattern,
|
||||
return_patterns=return_patterns,
|
||||
return_stages=return_stages)
|
||||
|
||||
def forward(self, x):
|
||||
x = self.conv(x)
|
||||
x = self.blocks(x)
|
||||
x = self.last_second_conv(x)
|
||||
x = self.avg_pool(x)
|
||||
x = self.last_conv(x)
|
||||
x = self.hardswish(x)
|
||||
if self.dropout is not None:
|
||||
x = self.dropout(x)
|
||||
x = self.flatten(x)
|
||||
x = self.fc(x)
|
||||
|
||||
return x
|
||||
|
||||
|
||||
class ConvBNLayer(TheseusLayer):
|
||||
def __init__(self,
|
||||
in_c,
|
||||
out_c,
|
||||
filter_size,
|
||||
stride,
|
||||
padding,
|
||||
num_groups=1,
|
||||
if_act=True,
|
||||
act=None):
|
||||
super().__init__()
|
||||
|
||||
self.conv = Conv2D(
|
||||
in_channels=in_c,
|
||||
out_channels=out_c,
|
||||
kernel_size=filter_size,
|
||||
stride=stride,
|
||||
padding=padding,
|
||||
groups=num_groups,
|
||||
bias_attr=False)
|
||||
self.bn = BatchNorm(
|
||||
num_channels=out_c,
|
||||
act=None,
|
||||
param_attr=ParamAttr(regularizer=L2Decay(0.0)),
|
||||
bias_attr=ParamAttr(regularizer=L2Decay(0.0)))
|
||||
self.if_act = if_act
|
||||
self.act = _create_act(act)
|
||||
|
||||
def forward(self, x):
|
||||
x = self.conv(x)
|
||||
x = self.bn(x)
|
||||
if self.if_act:
|
||||
x = self.act(x)
|
||||
return x
|
||||
|
||||
|
||||
class ResidualUnit(TheseusLayer):
|
||||
def __init__(self,
|
||||
in_c,
|
||||
mid_c,
|
||||
out_c,
|
||||
filter_size,
|
||||
stride,
|
||||
use_se,
|
||||
act=None):
|
||||
super().__init__()
|
||||
self.if_shortcut = stride == 1 and in_c == out_c
|
||||
self.if_se = use_se
|
||||
|
||||
self.expand_conv = ConvBNLayer(
|
||||
in_c=in_c,
|
||||
out_c=mid_c,
|
||||
filter_size=1,
|
||||
stride=1,
|
||||
padding=0,
|
||||
if_act=True,
|
||||
act=act)
|
||||
self.bottleneck_conv = ConvBNLayer(
|
||||
in_c=mid_c,
|
||||
out_c=mid_c,
|
||||
filter_size=filter_size,
|
||||
stride=stride,
|
||||
padding=int((filter_size - 1) // 2),
|
||||
num_groups=mid_c,
|
||||
if_act=True,
|
||||
act=act)
|
||||
if self.if_se:
|
||||
self.mid_se = SEModule(mid_c)
|
||||
self.linear_conv = ConvBNLayer(
|
||||
in_c=mid_c,
|
||||
out_c=out_c,
|
||||
filter_size=1,
|
||||
stride=1,
|
||||
padding=0,
|
||||
if_act=False,
|
||||
act=None)
|
||||
|
||||
def forward(self, x):
|
||||
identity = x
|
||||
x = self.expand_conv(x)
|
||||
x = self.bottleneck_conv(x)
|
||||
if self.if_se:
|
||||
x = self.mid_se(x)
|
||||
x = self.linear_conv(x)
|
||||
if self.if_shortcut:
|
||||
x = paddle.add(identity, x)
|
||||
return x
|
||||
|
||||
|
||||
# nn.Hardsigmoid can't transfer "slope" and "offset" in nn.functional.hardsigmoid
|
||||
class Hardsigmoid(TheseusLayer):
|
||||
def __init__(self, slope=0.2, offset=0.5):
|
||||
super().__init__()
|
||||
self.slope = slope
|
||||
self.offset = offset
|
||||
|
||||
def forward(self, x):
|
||||
return nn.functional.hardsigmoid(
|
||||
x, slope=self.slope, offset=self.offset)
|
||||
|
||||
|
||||
class SEModule(TheseusLayer):
|
||||
def __init__(self, channel, reduction=4):
|
||||
super().__init__()
|
||||
self.avg_pool = AdaptiveAvgPool2D(1)
|
||||
self.conv1 = Conv2D(
|
||||
in_channels=channel,
|
||||
out_channels=channel // reduction,
|
||||
kernel_size=1,
|
||||
stride=1,
|
||||
padding=0)
|
||||
self.relu = nn.ReLU()
|
||||
self.conv2 = Conv2D(
|
||||
in_channels=channel // reduction,
|
||||
out_channels=channel,
|
||||
kernel_size=1,
|
||||
stride=1,
|
||||
padding=0)
|
||||
self.hardsigmoid = Hardsigmoid(slope=0.2, offset=0.5)
|
||||
|
||||
def forward(self, x):
|
||||
identity = x
|
||||
x = self.avg_pool(x)
|
||||
x = self.conv1(x)
|
||||
x = self.relu(x)
|
||||
x = self.conv2(x)
|
||||
x = self.hardsigmoid(x)
|
||||
return paddle.multiply(x=identity, y=x)
|
||||
|
||||
|
||||
def _load_pretrained(pretrained, model, model_url, use_ssld):
|
||||
if pretrained is False:
|
||||
pass
|
||||
elif pretrained is True:
|
||||
load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
|
||||
elif isinstance(pretrained, str):
|
||||
load_dygraph_pretrain(model, pretrained)
|
||||
else:
|
||||
raise RuntimeError(
|
||||
"pretrained type is not available. Please use `string` or `boolean` type."
|
||||
)
|
||||
|
||||
|
||||
def MobileNetV3_small_x0_35(pretrained=False, use_ssld=False, **kwargs):
|
||||
"""
|
||||
MobileNetV3_small_x0_35
|
||||
Args:
|
||||
pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
|
||||
If str, means the path of the pretrained model.
|
||||
use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
|
||||
Returns:
|
||||
model: nn.Layer. Specific `MobileNetV3_small_x0_35` model depends on args.
|
||||
"""
|
||||
model = MobileNetV3(
|
||||
config=NET_CONFIG["small"],
|
||||
scale=0.35,
|
||||
stages_pattern=MODEL_STAGES_PATTERN["MobileNetV3_small"],
|
||||
class_squeeze=LAST_SECOND_CONV_SMALL,
|
||||
**kwargs)
|
||||
_load_pretrained(pretrained, model, MODEL_URLS["MobileNetV3_small_x0_35"],
|
||||
use_ssld)
|
||||
return model
|
||||
|
||||
|
||||
def MobileNetV3_small_x0_5(pretrained=False, use_ssld=False, **kwargs):
|
||||
"""
|
||||
MobileNetV3_small_x0_5
|
||||
Args:
|
||||
pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
|
||||
If str, means the path of the pretrained model.
|
||||
use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
|
||||
Returns:
|
||||
model: nn.Layer. Specific `MobileNetV3_small_x0_5` model depends on args.
|
||||
"""
|
||||
model = MobileNetV3(
|
||||
config=NET_CONFIG["small"],
|
||||
scale=0.5,
|
||||
stages_pattern=MODEL_STAGES_PATTERN["MobileNetV3_small"],
|
||||
class_squeeze=LAST_SECOND_CONV_SMALL,
|
||||
**kwargs)
|
||||
_load_pretrained(pretrained, model, MODEL_URLS["MobileNetV3_small_x0_5"],
|
||||
use_ssld)
|
||||
return model
|
||||
|
||||
|
||||
def MobileNetV3_small_x0_75(pretrained=False, use_ssld=False, **kwargs):
|
||||
"""
|
||||
MobileNetV3_small_x0_75
|
||||
Args:
|
||||
pretrained: bool=false or str. if `true` load pretrained parameters, `false` otherwise.
|
||||
if str, means the path of the pretrained model.
|
||||
use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
|
||||
Returns:
|
||||
model: nn.Layer. Specific `MobileNetV3_small_x0_75` model depends on args.
|
||||
"""
|
||||
model = MobileNetV3(
|
||||
config=NET_CONFIG["small"],
|
||||
scale=0.75,
|
||||
stages_pattern=MODEL_STAGES_PATTERN["MobileNetV3_small"],
|
||||
class_squeeze=LAST_SECOND_CONV_SMALL,
|
||||
**kwargs)
|
||||
_load_pretrained(pretrained, model, MODEL_URLS["MobileNetV3_small_x0_75"],
|
||||
use_ssld)
|
||||
return model
|
||||
|
||||
|
||||
def MobileNetV3_small_x1_0(pretrained=False, use_ssld=False, **kwargs):
|
||||
"""
|
||||
MobileNetV3_small_x1_0
|
||||
Args:
|
||||
pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
|
||||
If str, means the path of the pretrained model.
|
||||
use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
|
||||
Returns:
|
||||
model: nn.Layer. Specific `MobileNetV3_small_x1_0` model depends on args.
|
||||
"""
|
||||
model = MobileNetV3(
|
||||
config=NET_CONFIG["small"],
|
||||
scale=1.0,
|
||||
stages_pattern=MODEL_STAGES_PATTERN["MobileNetV3_small"],
|
||||
class_squeeze=LAST_SECOND_CONV_SMALL,
|
||||
**kwargs)
|
||||
_load_pretrained(pretrained, model, MODEL_URLS["MobileNetV3_small_x1_0"],
|
||||
use_ssld)
|
||||
return model
|
||||
|
||||
|
||||
def MobileNetV3_small_x1_25(pretrained=False, use_ssld=False, **kwargs):
|
||||
"""
|
||||
MobileNetV3_small_x1_25
|
||||
Args:
|
||||
pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
|
||||
If str, means the path of the pretrained model.
|
||||
use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
|
||||
Returns:
|
||||
model: nn.Layer. Specific `MobileNetV3_small_x1_25` model depends on args.
|
||||
"""
|
||||
model = MobileNetV3(
|
||||
config=NET_CONFIG["small"],
|
||||
scale=1.25,
|
||||
stages_pattern=MODEL_STAGES_PATTERN["MobileNetV3_small"],
|
||||
class_squeeze=LAST_SECOND_CONV_SMALL,
|
||||
**kwargs)
|
||||
_load_pretrained(pretrained, model, MODEL_URLS["MobileNetV3_small_x1_25"],
|
||||
use_ssld)
|
||||
return model
|
||||
|
||||
|
||||
def MobileNetV3_large_x0_35(pretrained=False, use_ssld=False, **kwargs):
|
||||
"""
|
||||
MobileNetV3_large_x0_35
|
||||
Args:
|
||||
pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
|
||||
If str, means the path of the pretrained model.
|
||||
use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
|
||||
Returns:
|
||||
model: nn.Layer. Specific `MobileNetV3_large_x0_35` model depends on args.
|
||||
"""
|
||||
model = MobileNetV3(
|
||||
config=NET_CONFIG["large"],
|
||||
scale=0.35,
|
||||
stages_pattern=MODEL_STAGES_PATTERN["MobileNetV3_small"],
|
||||
class_squeeze=LAST_SECOND_CONV_LARGE,
|
||||
**kwargs)
|
||||
_load_pretrained(pretrained, model, MODEL_URLS["MobileNetV3_large_x0_35"],
|
||||
use_ssld)
|
||||
return model
|
||||
|
||||
|
||||
def MobileNetV3_large_x0_5(pretrained=False, use_ssld=False, **kwargs):
|
||||
"""
|
||||
MobileNetV3_large_x0_5
|
||||
Args:
|
||||
pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
|
||||
If str, means the path of the pretrained model.
|
||||
use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
|
||||
Returns:
|
||||
model: nn.Layer. Specific `MobileNetV3_large_x0_5` model depends on args.
|
||||
"""
|
||||
model = MobileNetV3(
|
||||
config=NET_CONFIG["large"],
|
||||
scale=0.5,
|
||||
stages_pattern=MODEL_STAGES_PATTERN["MobileNetV3_large"],
|
||||
class_squeeze=LAST_SECOND_CONV_LARGE,
|
||||
**kwargs)
|
||||
_load_pretrained(pretrained, model, MODEL_URLS["MobileNetV3_large_x0_5"],
|
||||
use_ssld)
|
||||
return model
|
||||
|
||||
|
||||
def MobileNetV3_large_x0_75(pretrained=False, use_ssld=False, **kwargs):
|
||||
"""
|
||||
MobileNetV3_large_x0_75
|
||||
Args:
|
||||
pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
|
||||
If str, means the path of the pretrained model.
|
||||
use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
|
||||
Returns:
|
||||
model: nn.Layer. Specific `MobileNetV3_large_x0_75` model depends on args.
|
||||
"""
|
||||
model = MobileNetV3(
|
||||
config=NET_CONFIG["large"],
|
||||
scale=0.75,
|
||||
stages_pattern=MODEL_STAGES_PATTERN["MobileNetV3_large"],
|
||||
class_squeeze=LAST_SECOND_CONV_LARGE,
|
||||
**kwargs)
|
||||
_load_pretrained(pretrained, model, MODEL_URLS["MobileNetV3_large_x0_75"],
|
||||
use_ssld)
|
||||
return model
|
||||
|
||||
|
||||
def MobileNetV3_large_x1_0(pretrained=False, use_ssld=False, **kwargs):
|
||||
"""
|
||||
MobileNetV3_large_x1_0
|
||||
Args:
|
||||
pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
|
||||
If str, means the path of the pretrained model.
|
||||
use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
|
||||
Returns:
|
||||
model: nn.Layer. Specific `MobileNetV3_large_x1_0` model depends on args.
|
||||
"""
|
||||
model = MobileNetV3(
|
||||
config=NET_CONFIG["large"],
|
||||
scale=1.0,
|
||||
stages_pattern=MODEL_STAGES_PATTERN["MobileNetV3_large"],
|
||||
class_squeeze=LAST_SECOND_CONV_LARGE,
|
||||
**kwargs)
|
||||
_load_pretrained(pretrained, model, MODEL_URLS["MobileNetV3_large_x1_0"],
|
||||
use_ssld)
|
||||
return model
|
||||
|
||||
|
||||
def MobileNetV3_large_x1_25(pretrained=False, use_ssld=False, **kwargs):
|
||||
"""
|
||||
MobileNetV3_large_x1_25
|
||||
Args:
|
||||
pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
|
||||
If str, means the path of the pretrained model.
|
||||
use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
|
||||
Returns:
|
||||
model: nn.Layer. Specific `MobileNetV3_large_x1_25` model depends on args.
|
||||
"""
|
||||
model = MobileNetV3(
|
||||
config=NET_CONFIG["large"],
|
||||
scale=1.25,
|
||||
stages_pattern=MODEL_STAGES_PATTERN["MobileNetV3_large"],
|
||||
class_squeeze=LAST_SECOND_CONV_LARGE,
|
||||
**kwargs)
|
||||
_load_pretrained(pretrained, model, MODEL_URLS["MobileNetV3_large_x1_25"],
|
||||
use_ssld)
|
||||
return model
|
||||
@ -0,0 +1,419 @@
|
||||
# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import, division, print_function
|
||||
|
||||
import paddle
|
||||
import paddle.nn as nn
|
||||
from paddle import ParamAttr
|
||||
from paddle.nn import AdaptiveAvgPool2D, BatchNorm, Conv2D, Dropout, Linear
|
||||
from paddle.regularizer import L2Decay
|
||||
from paddle.nn.initializer import KaimingNormal
|
||||
from ppcls.arch.backbone.base.theseus_layer import TheseusLayer
|
||||
from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
|
||||
|
||||
MODEL_URLS = {
|
||||
"PPLCNet_x0_25":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/PPLCNet_x0_25_pretrained.pdparams",
|
||||
"PPLCNet_x0_35":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/PPLCNet_x0_35_pretrained.pdparams",
|
||||
"PPLCNet_x0_5":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/PPLCNet_x0_5_pretrained.pdparams",
|
||||
"PPLCNet_x0_75":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/PPLCNet_x0_75_pretrained.pdparams",
|
||||
"PPLCNet_x1_0":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/PPLCNet_x1_0_pretrained.pdparams",
|
||||
"PPLCNet_x1_5":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/PPLCNet_x1_5_pretrained.pdparams",
|
||||
"PPLCNet_x2_0":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/PPLCNet_x2_0_pretrained.pdparams",
|
||||
"PPLCNet_x2_5":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/PPLCNet_x2_5_pretrained.pdparams"
|
||||
}
|
||||
|
||||
MODEL_STAGES_PATTERN = {
|
||||
"PPLCNet": ["blocks2", "blocks3", "blocks4", "blocks5", "blocks6"]
|
||||
}
|
||||
|
||||
__all__ = list(MODEL_URLS.keys())
|
||||
|
||||
# Each element(list) represents a depthwise block, which is composed of k, in_c, out_c, s, use_se.
|
||||
# k: kernel_size
|
||||
# in_c: input channel number in depthwise block
|
||||
# out_c: output channel number in depthwise block
|
||||
# s: stride in depthwise block
|
||||
# use_se: whether to use SE block
|
||||
|
||||
NET_CONFIG = {
|
||||
"blocks2":
|
||||
#k, in_c, out_c, s, use_se
|
||||
[[3, 16, 32, 1, False]],
|
||||
"blocks3": [[3, 32, 64, 2, False], [3, 64, 64, 1, False]],
|
||||
"blocks4": [[3, 64, 128, 2, False], [3, 128, 128, 1, False]],
|
||||
"blocks5": [[3, 128, 256, 2, False], [5, 256, 256, 1, False],
|
||||
[5, 256, 256, 1, False], [5, 256, 256, 1, False],
|
||||
[5, 256, 256, 1, False], [5, 256, 256, 1, False]],
|
||||
"blocks6": [[5, 256, 512, 2, True], [5, 512, 512, 1, True]]
|
||||
}
|
||||
|
||||
|
||||
def make_divisible(v, divisor=8, min_value=None):
|
||||
if min_value is None:
|
||||
min_value = divisor
|
||||
new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
|
||||
if new_v < 0.9 * v:
|
||||
new_v += divisor
|
||||
return new_v
|
||||
|
||||
|
||||
class ConvBNLayer(TheseusLayer):
|
||||
def __init__(self,
|
||||
num_channels,
|
||||
filter_size,
|
||||
num_filters,
|
||||
stride,
|
||||
num_groups=1):
|
||||
super().__init__()
|
||||
|
||||
self.conv = Conv2D(
|
||||
in_channels=num_channels,
|
||||
out_channels=num_filters,
|
||||
kernel_size=filter_size,
|
||||
stride=stride,
|
||||
padding=(filter_size - 1) // 2,
|
||||
groups=num_groups,
|
||||
weight_attr=ParamAttr(initializer=KaimingNormal()),
|
||||
bias_attr=False)
|
||||
|
||||
self.bn = BatchNorm(
|
||||
num_filters,
|
||||
param_attr=ParamAttr(regularizer=L2Decay(0.0)),
|
||||
bias_attr=ParamAttr(regularizer=L2Decay(0.0)))
|
||||
self.hardswish = nn.Hardswish()
|
||||
|
||||
def forward(self, x):
|
||||
x = self.conv(x)
|
||||
x = self.bn(x)
|
||||
x = self.hardswish(x)
|
||||
return x
|
||||
|
||||
|
||||
class DepthwiseSeparable(TheseusLayer):
|
||||
def __init__(self,
|
||||
num_channels,
|
||||
num_filters,
|
||||
stride,
|
||||
dw_size=3,
|
||||
use_se=False):
|
||||
super().__init__()
|
||||
self.use_se = use_se
|
||||
self.dw_conv = ConvBNLayer(
|
||||
num_channels=num_channels,
|
||||
num_filters=num_channels,
|
||||
filter_size=dw_size,
|
||||
stride=stride,
|
||||
num_groups=num_channels)
|
||||
if use_se:
|
||||
self.se = SEModule(num_channels)
|
||||
self.pw_conv = ConvBNLayer(
|
||||
num_channels=num_channels,
|
||||
filter_size=1,
|
||||
num_filters=num_filters,
|
||||
stride=1)
|
||||
|
||||
def forward(self, x):
|
||||
x = self.dw_conv(x)
|
||||
if self.use_se:
|
||||
x = self.se(x)
|
||||
x = self.pw_conv(x)
|
||||
return x
|
||||
|
||||
|
||||
class SEModule(TheseusLayer):
|
||||
def __init__(self, channel, reduction=4):
|
||||
super().__init__()
|
||||
self.avg_pool = AdaptiveAvgPool2D(1)
|
||||
self.conv1 = Conv2D(
|
||||
in_channels=channel,
|
||||
out_channels=channel // reduction,
|
||||
kernel_size=1,
|
||||
stride=1,
|
||||
padding=0)
|
||||
self.relu = nn.ReLU()
|
||||
self.conv2 = Conv2D(
|
||||
in_channels=channel // reduction,
|
||||
out_channels=channel,
|
||||
kernel_size=1,
|
||||
stride=1,
|
||||
padding=0)
|
||||
self.hardsigmoid = nn.Hardsigmoid()
|
||||
|
||||
def forward(self, x):
|
||||
identity = x
|
||||
x = self.avg_pool(x)
|
||||
x = self.conv1(x)
|
||||
x = self.relu(x)
|
||||
x = self.conv2(x)
|
||||
x = self.hardsigmoid(x)
|
||||
x = paddle.multiply(x=identity, y=x)
|
||||
return x
|
||||
|
||||
|
||||
class PPLCNet(TheseusLayer):
|
||||
def __init__(self,
|
||||
stages_pattern,
|
||||
scale=1.0,
|
||||
class_num=1000,
|
||||
dropout_prob=0.2,
|
||||
class_expand=1280,
|
||||
return_patterns=None,
|
||||
return_stages=None):
|
||||
super().__init__()
|
||||
self.scale = scale
|
||||
self.class_expand = class_expand
|
||||
|
||||
self.conv1 = ConvBNLayer(
|
||||
num_channels=3,
|
||||
filter_size=3,
|
||||
num_filters=make_divisible(16 * scale),
|
||||
stride=2)
|
||||
|
||||
self.blocks2 = nn.Sequential(* [
|
||||
DepthwiseSeparable(
|
||||
num_channels=make_divisible(in_c * scale),
|
||||
num_filters=make_divisible(out_c * scale),
|
||||
dw_size=k,
|
||||
stride=s,
|
||||
use_se=se)
|
||||
for i, (k, in_c, out_c, s, se) in enumerate(NET_CONFIG["blocks2"])
|
||||
])
|
||||
|
||||
self.blocks3 = nn.Sequential(* [
|
||||
DepthwiseSeparable(
|
||||
num_channels=make_divisible(in_c * scale),
|
||||
num_filters=make_divisible(out_c * scale),
|
||||
dw_size=k,
|
||||
stride=s,
|
||||
use_se=se)
|
||||
for i, (k, in_c, out_c, s, se) in enumerate(NET_CONFIG["blocks3"])
|
||||
])
|
||||
|
||||
self.blocks4 = nn.Sequential(* [
|
||||
DepthwiseSeparable(
|
||||
num_channels=make_divisible(in_c * scale),
|
||||
num_filters=make_divisible(out_c * scale),
|
||||
dw_size=k,
|
||||
stride=s,
|
||||
use_se=se)
|
||||
for i, (k, in_c, out_c, s, se) in enumerate(NET_CONFIG["blocks4"])
|
||||
])
|
||||
|
||||
self.blocks5 = nn.Sequential(* [
|
||||
DepthwiseSeparable(
|
||||
num_channels=make_divisible(in_c * scale),
|
||||
num_filters=make_divisible(out_c * scale),
|
||||
dw_size=k,
|
||||
stride=s,
|
||||
use_se=se)
|
||||
for i, (k, in_c, out_c, s, se) in enumerate(NET_CONFIG["blocks5"])
|
||||
])
|
||||
|
||||
self.blocks6 = nn.Sequential(* [
|
||||
DepthwiseSeparable(
|
||||
num_channels=make_divisible(in_c * scale),
|
||||
num_filters=make_divisible(out_c * scale),
|
||||
dw_size=k,
|
||||
stride=s,
|
||||
use_se=se)
|
||||
for i, (k, in_c, out_c, s, se) in enumerate(NET_CONFIG["blocks6"])
|
||||
])
|
||||
|
||||
self.avg_pool = AdaptiveAvgPool2D(1)
|
||||
|
||||
self.last_conv = Conv2D(
|
||||
in_channels=make_divisible(NET_CONFIG["blocks6"][-1][2] * scale),
|
||||
out_channels=self.class_expand,
|
||||
kernel_size=1,
|
||||
stride=1,
|
||||
padding=0,
|
||||
bias_attr=False)
|
||||
|
||||
self.hardswish = nn.Hardswish()
|
||||
self.dropout = Dropout(p=dropout_prob, mode="downscale_in_infer")
|
||||
self.flatten = nn.Flatten(start_axis=1, stop_axis=-1)
|
||||
|
||||
self.fc = Linear(self.class_expand, class_num)
|
||||
|
||||
super().init_res(
|
||||
stages_pattern,
|
||||
return_patterns=return_patterns,
|
||||
return_stages=return_stages)
|
||||
|
||||
def forward(self, x):
|
||||
x = self.conv1(x)
|
||||
|
||||
x = self.blocks2(x)
|
||||
x = self.blocks3(x)
|
||||
x = self.blocks4(x)
|
||||
x = self.blocks5(x)
|
||||
x = self.blocks6(x)
|
||||
|
||||
x = self.avg_pool(x)
|
||||
x = self.last_conv(x)
|
||||
x = self.hardswish(x)
|
||||
x = self.dropout(x)
|
||||
x = self.flatten(x)
|
||||
x = self.fc(x)
|
||||
return x
|
||||
|
||||
|
||||
def _load_pretrained(pretrained, model, model_url, use_ssld):
|
||||
if pretrained is False:
|
||||
pass
|
||||
elif pretrained is True:
|
||||
load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
|
||||
elif isinstance(pretrained, str):
|
||||
load_dygraph_pretrain(model, pretrained)
|
||||
else:
|
||||
raise RuntimeError(
|
||||
"pretrained type is not available. Please use `string` or `boolean` type."
|
||||
)
|
||||
|
||||
|
||||
def PPLCNet_x0_25(pretrained=False, use_ssld=False, **kwargs):
|
||||
"""
|
||||
PPLCNet_x0_25
|
||||
Args:
|
||||
pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
|
||||
If str, means the path of the pretrained model.
|
||||
use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
|
||||
Returns:
|
||||
model: nn.Layer. Specific `PPLCNet_x0_25` model depends on args.
|
||||
"""
|
||||
model = PPLCNet(
|
||||
scale=0.25, stages_pattern=MODEL_STAGES_PATTERN["PPLCNet"], **kwargs)
|
||||
_load_pretrained(pretrained, model, MODEL_URLS["PPLCNet_x0_25"], use_ssld)
|
||||
return model
|
||||
|
||||
|
||||
def PPLCNet_x0_35(pretrained=False, use_ssld=False, **kwargs):
|
||||
"""
|
||||
PPLCNet_x0_35
|
||||
Args:
|
||||
pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
|
||||
If str, means the path of the pretrained model.
|
||||
use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
|
||||
Returns:
|
||||
model: nn.Layer. Specific `PPLCNet_x0_35` model depends on args.
|
||||
"""
|
||||
model = PPLCNet(
|
||||
scale=0.35, stages_pattern=MODEL_STAGES_PATTERN["PPLCNet"], **kwargs)
|
||||
_load_pretrained(pretrained, model, MODEL_URLS["PPLCNet_x0_35"], use_ssld)
|
||||
return model
|
||||
|
||||
|
||||
def PPLCNet_x0_5(pretrained=False, use_ssld=False, **kwargs):
|
||||
"""
|
||||
PPLCNet_x0_5
|
||||
Args:
|
||||
pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
|
||||
If str, means the path of the pretrained model.
|
||||
use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
|
||||
Returns:
|
||||
model: nn.Layer. Specific `PPLCNet_x0_5` model depends on args.
|
||||
"""
|
||||
model = PPLCNet(
|
||||
scale=0.5, stages_pattern=MODEL_STAGES_PATTERN["PPLCNet"], **kwargs)
|
||||
_load_pretrained(pretrained, model, MODEL_URLS["PPLCNet_x0_5"], use_ssld)
|
||||
return model
|
||||
|
||||
|
||||
def PPLCNet_x0_75(pretrained=False, use_ssld=False, **kwargs):
|
||||
"""
|
||||
PPLCNet_x0_75
|
||||
Args:
|
||||
pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
|
||||
If str, means the path of the pretrained model.
|
||||
use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
|
||||
Returns:
|
||||
model: nn.Layer. Specific `PPLCNet_x0_75` model depends on args.
|
||||
"""
|
||||
model = PPLCNet(
|
||||
scale=0.75, stages_pattern=MODEL_STAGES_PATTERN["PPLCNet"], **kwargs)
|
||||
_load_pretrained(pretrained, model, MODEL_URLS["PPLCNet_x0_75"], use_ssld)
|
||||
return model
|
||||
|
||||
|
||||
def PPLCNet_x1_0(pretrained=False, use_ssld=False, **kwargs):
|
||||
"""
|
||||
PPLCNet_x1_0
|
||||
Args:
|
||||
pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
|
||||
If str, means the path of the pretrained model.
|
||||
use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
|
||||
Returns:
|
||||
model: nn.Layer. Specific `PPLCNet_x1_0` model depends on args.
|
||||
"""
|
||||
model = PPLCNet(
|
||||
scale=1.0, stages_pattern=MODEL_STAGES_PATTERN["PPLCNet"], **kwargs)
|
||||
_load_pretrained(pretrained, model, MODEL_URLS["PPLCNet_x1_0"], use_ssld)
|
||||
return model
|
||||
|
||||
|
||||
def PPLCNet_x1_5(pretrained=False, use_ssld=False, **kwargs):
|
||||
"""
|
||||
PPLCNet_x1_5
|
||||
Args:
|
||||
pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
|
||||
If str, means the path of the pretrained model.
|
||||
use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
|
||||
Returns:
|
||||
model: nn.Layer. Specific `PPLCNet_x1_5` model depends on args.
|
||||
"""
|
||||
model = PPLCNet(
|
||||
scale=1.5, stages_pattern=MODEL_STAGES_PATTERN["PPLCNet"], **kwargs)
|
||||
_load_pretrained(pretrained, model, MODEL_URLS["PPLCNet_x1_5"], use_ssld)
|
||||
return model
|
||||
|
||||
|
||||
def PPLCNet_x2_0(pretrained=False, use_ssld=False, **kwargs):
|
||||
"""
|
||||
PPLCNet_x2_0
|
||||
Args:
|
||||
pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
|
||||
If str, means the path of the pretrained model.
|
||||
use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
|
||||
Returns:
|
||||
model: nn.Layer. Specific `PPLCNet_x2_0` model depends on args.
|
||||
"""
|
||||
model = PPLCNet(
|
||||
scale=2.0, stages_pattern=MODEL_STAGES_PATTERN["PPLCNet"], **kwargs)
|
||||
_load_pretrained(pretrained, model, MODEL_URLS["PPLCNet_x2_0"], use_ssld)
|
||||
return model
|
||||
|
||||
|
||||
def PPLCNet_x2_5(pretrained=False, use_ssld=False, **kwargs):
|
||||
"""
|
||||
PPLCNet_x2_5
|
||||
Args:
|
||||
pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
|
||||
If str, means the path of the pretrained model.
|
||||
use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
|
||||
Returns:
|
||||
model: nn.Layer. Specific `PPLCNet_x2_5` model depends on args.
|
||||
"""
|
||||
model = PPLCNet(
|
||||
scale=2.5, stages_pattern=MODEL_STAGES_PATTERN["PPLCNet"], **kwargs)
|
||||
_load_pretrained(pretrained, model, MODEL_URLS["PPLCNet_x2_5"], use_ssld)
|
||||
return model
|
||||
@ -0,0 +1,591 @@
|
||||
# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import, division, print_function
|
||||
|
||||
import numpy as np
|
||||
import paddle
|
||||
from paddle import ParamAttr
|
||||
import paddle.nn as nn
|
||||
from paddle.nn import Conv2D, BatchNorm, Linear
|
||||
from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D
|
||||
from paddle.nn.initializer import Uniform
|
||||
import math
|
||||
|
||||
from ppcls.arch.backbone.base.theseus_layer import TheseusLayer
|
||||
from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
|
||||
|
||||
MODEL_URLS = {
|
||||
"ResNet18":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/ResNet18_pretrained.pdparams",
|
||||
"ResNet18_vd":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/ResNet18_vd_pretrained.pdparams",
|
||||
"ResNet34":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/ResNet34_pretrained.pdparams",
|
||||
"ResNet34_vd":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/ResNet34_vd_pretrained.pdparams",
|
||||
"ResNet50":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/ResNet50_pretrained.pdparams",
|
||||
"ResNet50_vd":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/ResNet50_vd_pretrained.pdparams",
|
||||
"ResNet101":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/ResNet101_pretrained.pdparams",
|
||||
"ResNet101_vd":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/ResNet101_vd_pretrained.pdparams",
|
||||
"ResNet152":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/ResNet152_pretrained.pdparams",
|
||||
"ResNet152_vd":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/ResNet152_vd_pretrained.pdparams",
|
||||
"ResNet200_vd":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/ResNet200_vd_pretrained.pdparams",
|
||||
}
|
||||
|
||||
MODEL_STAGES_PATTERN = {
|
||||
"ResNet18": ["blocks[1]", "blocks[3]", "blocks[5]", "blocks[7]"],
|
||||
"ResNet34": ["blocks[2]", "blocks[6]", "blocks[12]", "blocks[15]"],
|
||||
"ResNet50": ["blocks[2]", "blocks[6]", "blocks[12]", "blocks[15]"],
|
||||
"ResNet101": ["blocks[2]", "blocks[6]", "blocks[29]", "blocks[32]"],
|
||||
"ResNet152": ["blocks[2]", "blocks[10]", "blocks[46]", "blocks[49]"],
|
||||
"ResNet200": ["blocks[2]", "blocks[14]", "blocks[62]", "blocks[65]"]
|
||||
}
|
||||
|
||||
__all__ = MODEL_URLS.keys()
|
||||
'''
|
||||
ResNet config: dict.
|
||||
key: depth of ResNet.
|
||||
values: config's dict of specific model.
|
||||
keys:
|
||||
block_type: Two different blocks in ResNet, BasicBlock and BottleneckBlock are optional.
|
||||
block_depth: The number of blocks in different stages in ResNet.
|
||||
num_channels: The number of channels to enter the next stage.
|
||||
'''
|
||||
NET_CONFIG = {
|
||||
"18": {
|
||||
"block_type": "BasicBlock",
|
||||
"block_depth": [2, 2, 2, 2],
|
||||
"num_channels": [64, 64, 128, 256]
|
||||
},
|
||||
"34": {
|
||||
"block_type": "BasicBlock",
|
||||
"block_depth": [3, 4, 6, 3],
|
||||
"num_channels": [64, 64, 128, 256]
|
||||
},
|
||||
"50": {
|
||||
"block_type": "BottleneckBlock",
|
||||
"block_depth": [3, 4, 6, 3],
|
||||
"num_channels": [64, 256, 512, 1024]
|
||||
},
|
||||
"101": {
|
||||
"block_type": "BottleneckBlock",
|
||||
"block_depth": [3, 4, 23, 3],
|
||||
"num_channels": [64, 256, 512, 1024]
|
||||
},
|
||||
"152": {
|
||||
"block_type": "BottleneckBlock",
|
||||
"block_depth": [3, 8, 36, 3],
|
||||
"num_channels": [64, 256, 512, 1024]
|
||||
},
|
||||
"200": {
|
||||
"block_type": "BottleneckBlock",
|
||||
"block_depth": [3, 12, 48, 3],
|
||||
"num_channels": [64, 256, 512, 1024]
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
class ConvBNLayer(TheseusLayer):
|
||||
def __init__(self,
|
||||
num_channels,
|
||||
num_filters,
|
||||
filter_size,
|
||||
stride=1,
|
||||
groups=1,
|
||||
is_vd_mode=False,
|
||||
act=None,
|
||||
lr_mult=1.0,
|
||||
data_format="NCHW"):
|
||||
super().__init__()
|
||||
self.is_vd_mode = is_vd_mode
|
||||
self.act = act
|
||||
self.avg_pool = AvgPool2D(
|
||||
kernel_size=2, stride=2, padding=0, ceil_mode=True)
|
||||
self.conv = Conv2D(
|
||||
in_channels=num_channels,
|
||||
out_channels=num_filters,
|
||||
kernel_size=filter_size,
|
||||
stride=stride,
|
||||
padding=(filter_size - 1) // 2,
|
||||
groups=groups,
|
||||
weight_attr=ParamAttr(learning_rate=lr_mult),
|
||||
bias_attr=False,
|
||||
data_format=data_format)
|
||||
self.bn = BatchNorm(
|
||||
num_filters,
|
||||
param_attr=ParamAttr(learning_rate=lr_mult),
|
||||
bias_attr=ParamAttr(learning_rate=lr_mult),
|
||||
data_layout=data_format)
|
||||
self.relu = nn.ReLU()
|
||||
|
||||
def forward(self, x):
|
||||
if self.is_vd_mode:
|
||||
x = self.avg_pool(x)
|
||||
x = self.conv(x)
|
||||
x = self.bn(x)
|
||||
if self.act:
|
||||
x = self.relu(x)
|
||||
return x
|
||||
|
||||
|
||||
class BottleneckBlock(TheseusLayer):
|
||||
def __init__(self,
|
||||
num_channels,
|
||||
num_filters,
|
||||
stride,
|
||||
shortcut=True,
|
||||
if_first=False,
|
||||
lr_mult=1.0,
|
||||
data_format="NCHW"):
|
||||
super().__init__()
|
||||
|
||||
self.conv0 = ConvBNLayer(
|
||||
num_channels=num_channels,
|
||||
num_filters=num_filters,
|
||||
filter_size=1,
|
||||
act="relu",
|
||||
lr_mult=lr_mult,
|
||||
data_format=data_format)
|
||||
self.conv1 = ConvBNLayer(
|
||||
num_channels=num_filters,
|
||||
num_filters=num_filters,
|
||||
filter_size=3,
|
||||
stride=stride,
|
||||
act="relu",
|
||||
lr_mult=lr_mult,
|
||||
data_format=data_format)
|
||||
self.conv2 = ConvBNLayer(
|
||||
num_channels=num_filters,
|
||||
num_filters=num_filters * 4,
|
||||
filter_size=1,
|
||||
act=None,
|
||||
lr_mult=lr_mult,
|
||||
data_format=data_format)
|
||||
|
||||
if not shortcut:
|
||||
self.short = ConvBNLayer(
|
||||
num_channels=num_channels,
|
||||
num_filters=num_filters * 4,
|
||||
filter_size=1,
|
||||
stride=stride if if_first else 1,
|
||||
is_vd_mode=False if if_first else True,
|
||||
lr_mult=lr_mult,
|
||||
data_format=data_format)
|
||||
self.relu = nn.ReLU()
|
||||
self.shortcut = shortcut
|
||||
|
||||
def forward(self, x):
|
||||
identity = x
|
||||
x = self.conv0(x)
|
||||
x = self.conv1(x)
|
||||
x = self.conv2(x)
|
||||
|
||||
if self.shortcut:
|
||||
short = identity
|
||||
else:
|
||||
short = self.short(identity)
|
||||
x = paddle.add(x=x, y=short)
|
||||
x = self.relu(x)
|
||||
return x
|
||||
|
||||
|
||||
class BasicBlock(TheseusLayer):
|
||||
def __init__(self,
|
||||
num_channels,
|
||||
num_filters,
|
||||
stride,
|
||||
shortcut=True,
|
||||
if_first=False,
|
||||
lr_mult=1.0,
|
||||
data_format="NCHW"):
|
||||
super().__init__()
|
||||
|
||||
self.stride = stride
|
||||
self.conv0 = ConvBNLayer(
|
||||
num_channels=num_channels,
|
||||
num_filters=num_filters,
|
||||
filter_size=3,
|
||||
stride=stride,
|
||||
act="relu",
|
||||
lr_mult=lr_mult,
|
||||
data_format=data_format)
|
||||
self.conv1 = ConvBNLayer(
|
||||
num_channels=num_filters,
|
||||
num_filters=num_filters,
|
||||
filter_size=3,
|
||||
act=None,
|
||||
lr_mult=lr_mult,
|
||||
data_format=data_format)
|
||||
if not shortcut:
|
||||
self.short = ConvBNLayer(
|
||||
num_channels=num_channels,
|
||||
num_filters=num_filters,
|
||||
filter_size=1,
|
||||
stride=stride if if_first else 1,
|
||||
is_vd_mode=False if if_first else True,
|
||||
lr_mult=lr_mult,
|
||||
data_format=data_format)
|
||||
self.shortcut = shortcut
|
||||
self.relu = nn.ReLU()
|
||||
|
||||
def forward(self, x):
|
||||
identity = x
|
||||
x = self.conv0(x)
|
||||
x = self.conv1(x)
|
||||
if self.shortcut:
|
||||
short = identity
|
||||
else:
|
||||
short = self.short(identity)
|
||||
x = paddle.add(x=x, y=short)
|
||||
x = self.relu(x)
|
||||
return x
|
||||
|
||||
|
||||
class ResNet(TheseusLayer):
|
||||
"""
|
||||
ResNet
|
||||
Args:
|
||||
config: dict. config of ResNet.
|
||||
version: str="vb". Different version of ResNet, version vd can perform better.
|
||||
class_num: int=1000. The number of classes.
|
||||
lr_mult_list: list. Control the learning rate of different stages.
|
||||
Returns:
|
||||
model: nn.Layer. Specific ResNet model depends on args.
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
config,
|
||||
stages_pattern,
|
||||
version="vb",
|
||||
class_num=1000,
|
||||
lr_mult_list=[1.0, 1.0, 1.0, 1.0, 1.0],
|
||||
data_format="NCHW",
|
||||
input_image_channel=3,
|
||||
return_patterns=None,
|
||||
return_stages=None):
|
||||
super().__init__()
|
||||
|
||||
self.cfg = config
|
||||
self.lr_mult_list = lr_mult_list
|
||||
self.is_vd_mode = version == "vd"
|
||||
self.class_num = class_num
|
||||
self.num_filters = [64, 128, 256, 512]
|
||||
self.block_depth = self.cfg["block_depth"]
|
||||
self.block_type = self.cfg["block_type"]
|
||||
self.num_channels = self.cfg["num_channels"]
|
||||
self.channels_mult = 1 if self.num_channels[-1] == 256 else 4
|
||||
|
||||
assert isinstance(self.lr_mult_list, (
|
||||
list, tuple
|
||||
)), "lr_mult_list should be in (list, tuple) but got {}".format(
|
||||
type(self.lr_mult_list))
|
||||
assert len(self.lr_mult_list
|
||||
) == 5, "lr_mult_list length should be 5 but got {}".format(
|
||||
len(self.lr_mult_list))
|
||||
|
||||
self.stem_cfg = {
|
||||
#num_channels, num_filters, filter_size, stride
|
||||
"vb": [[input_image_channel, 64, 7, 2]],
|
||||
"vd":
|
||||
[[input_image_channel, 32, 3, 2], [32, 32, 3, 1], [32, 64, 3, 1]]
|
||||
}
|
||||
|
||||
self.stem = nn.Sequential(* [
|
||||
ConvBNLayer(
|
||||
num_channels=in_c,
|
||||
num_filters=out_c,
|
||||
filter_size=k,
|
||||
stride=s,
|
||||
act="relu",
|
||||
lr_mult=self.lr_mult_list[0],
|
||||
data_format=data_format)
|
||||
for in_c, out_c, k, s in self.stem_cfg[version]
|
||||
])
|
||||
|
||||
self.max_pool = MaxPool2D(
|
||||
kernel_size=3, stride=2, padding=1, data_format=data_format)
|
||||
block_list = []
|
||||
for block_idx in range(len(self.block_depth)):
|
||||
shortcut = False
|
||||
for i in range(self.block_depth[block_idx]):
|
||||
block_list.append(globals()[self.block_type](
|
||||
num_channels=self.num_channels[block_idx] if i == 0 else
|
||||
self.num_filters[block_idx] * self.channels_mult,
|
||||
num_filters=self.num_filters[block_idx],
|
||||
stride=2 if i == 0 and block_idx != 0 else 1,
|
||||
shortcut=shortcut,
|
||||
if_first=block_idx == i == 0 if version == "vd" else True,
|
||||
lr_mult=self.lr_mult_list[block_idx + 1],
|
||||
data_format=data_format))
|
||||
shortcut = True
|
||||
self.blocks = nn.Sequential(*block_list)
|
||||
|
||||
self.avg_pool = AdaptiveAvgPool2D(1, data_format=data_format)
|
||||
self.flatten = nn.Flatten()
|
||||
self.avg_pool_channels = self.num_channels[-1] * 2
|
||||
stdv = 1.0 / math.sqrt(self.avg_pool_channels * 1.0)
|
||||
self.fc = Linear(
|
||||
self.avg_pool_channels,
|
||||
self.class_num,
|
||||
weight_attr=ParamAttr(initializer=Uniform(-stdv, stdv)))
|
||||
|
||||
self.data_format = data_format
|
||||
|
||||
super().init_res(
|
||||
stages_pattern,
|
||||
return_patterns=return_patterns,
|
||||
return_stages=return_stages)
|
||||
|
||||
def forward(self, x):
|
||||
with paddle.static.amp.fp16_guard():
|
||||
if self.data_format == "NHWC":
|
||||
x = paddle.transpose(x, [0, 2, 3, 1])
|
||||
x.stop_gradient = True
|
||||
x = self.stem(x)
|
||||
x = self.max_pool(x)
|
||||
x = self.blocks(x)
|
||||
x = self.avg_pool(x)
|
||||
x = self.flatten(x)
|
||||
x = self.fc(x)
|
||||
return x
|
||||
|
||||
|
||||
def _load_pretrained(pretrained, model, model_url, use_ssld):
|
||||
if pretrained is False:
|
||||
pass
|
||||
elif pretrained is True:
|
||||
load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
|
||||
elif isinstance(pretrained, str):
|
||||
load_dygraph_pretrain(model, pretrained)
|
||||
else:
|
||||
raise RuntimeError(
|
||||
"pretrained type is not available. Please use `string` or `boolean` type."
|
||||
)
|
||||
|
||||
|
||||
def ResNet18(pretrained=False, use_ssld=False, **kwargs):
|
||||
"""
|
||||
ResNet18
|
||||
Args:
|
||||
pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
|
||||
If str, means the path of the pretrained model.
|
||||
use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
|
||||
Returns:
|
||||
model: nn.Layer. Specific `ResNet18` model depends on args.
|
||||
"""
|
||||
model = ResNet(
|
||||
config=NET_CONFIG["18"],
|
||||
stages_pattern=MODEL_STAGES_PATTERN["ResNet18"],
|
||||
version="vb",
|
||||
**kwargs)
|
||||
_load_pretrained(pretrained, model, MODEL_URLS["ResNet18"], use_ssld)
|
||||
return model
|
||||
|
||||
|
||||
def ResNet18_vd(pretrained=False, use_ssld=False, **kwargs):
|
||||
"""
|
||||
ResNet18_vd
|
||||
Args:
|
||||
pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
|
||||
If str, means the path of the pretrained model.
|
||||
use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
|
||||
Returns:
|
||||
model: nn.Layer. Specific `ResNet18_vd` model depends on args.
|
||||
"""
|
||||
model = ResNet(
|
||||
config=NET_CONFIG["18"],
|
||||
stages_pattern=MODEL_STAGES_PATTERN["ResNet18"],
|
||||
version="vd",
|
||||
**kwargs)
|
||||
_load_pretrained(pretrained, model, MODEL_URLS["ResNet18_vd"], use_ssld)
|
||||
return model
|
||||
|
||||
|
||||
def ResNet34(pretrained=False, use_ssld=False, **kwargs):
|
||||
"""
|
||||
ResNet34
|
||||
Args:
|
||||
pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
|
||||
If str, means the path of the pretrained model.
|
||||
use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
|
||||
Returns:
|
||||
model: nn.Layer. Specific `ResNet34` model depends on args.
|
||||
"""
|
||||
model = ResNet(
|
||||
config=NET_CONFIG["34"],
|
||||
stages_pattern=MODEL_STAGES_PATTERN["ResNet34"],
|
||||
version="vb",
|
||||
**kwargs)
|
||||
_load_pretrained(pretrained, model, MODEL_URLS["ResNet34"], use_ssld)
|
||||
return model
|
||||
|
||||
|
||||
def ResNet34_vd(pretrained=False, use_ssld=False, **kwargs):
|
||||
"""
|
||||
ResNet34_vd
|
||||
Args:
|
||||
pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
|
||||
If str, means the path of the pretrained model.
|
||||
use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
|
||||
Returns:
|
||||
model: nn.Layer. Specific `ResNet34_vd` model depends on args.
|
||||
"""
|
||||
model = ResNet(
|
||||
config=NET_CONFIG["34"],
|
||||
stages_pattern=MODEL_STAGES_PATTERN["ResNet34"],
|
||||
version="vd",
|
||||
**kwargs)
|
||||
_load_pretrained(pretrained, model, MODEL_URLS["ResNet34_vd"], use_ssld)
|
||||
return model
|
||||
|
||||
|
||||
def ResNet50(pretrained=False, use_ssld=False, **kwargs):
|
||||
"""
|
||||
ResNet50
|
||||
Args:
|
||||
pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
|
||||
If str, means the path of the pretrained model.
|
||||
use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
|
||||
Returns:
|
||||
model: nn.Layer. Specific `ResNet50` model depends on args.
|
||||
"""
|
||||
model = ResNet(
|
||||
config=NET_CONFIG["50"],
|
||||
stages_pattern=MODEL_STAGES_PATTERN["ResNet50"],
|
||||
version="vb",
|
||||
**kwargs)
|
||||
_load_pretrained(pretrained, model, MODEL_URLS["ResNet50"], use_ssld)
|
||||
return model
|
||||
|
||||
|
||||
def ResNet50_vd(pretrained=False, use_ssld=False, **kwargs):
|
||||
"""
|
||||
ResNet50_vd
|
||||
Args:
|
||||
pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
|
||||
If str, means the path of the pretrained model.
|
||||
use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
|
||||
Returns:
|
||||
model: nn.Layer. Specific `ResNet50_vd` model depends on args.
|
||||
"""
|
||||
model = ResNet(
|
||||
config=NET_CONFIG["50"],
|
||||
stages_pattern=MODEL_STAGES_PATTERN["ResNet50"],
|
||||
version="vd",
|
||||
**kwargs)
|
||||
_load_pretrained(pretrained, model, MODEL_URLS["ResNet50_vd"], use_ssld)
|
||||
return model
|
||||
|
||||
|
||||
def ResNet101(pretrained=False, use_ssld=False, **kwargs):
|
||||
"""
|
||||
ResNet101
|
||||
Args:
|
||||
pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
|
||||
If str, means the path of the pretrained model.
|
||||
use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
|
||||
Returns:
|
||||
model: nn.Layer. Specific `ResNet101` model depends on args.
|
||||
"""
|
||||
model = ResNet(
|
||||
config=NET_CONFIG["101"],
|
||||
stages_pattern=MODEL_STAGES_PATTERN["ResNet101"],
|
||||
version="vb",
|
||||
**kwargs)
|
||||
_load_pretrained(pretrained, model, MODEL_URLS["ResNet101"], use_ssld)
|
||||
return model
|
||||
|
||||
|
||||
def ResNet101_vd(pretrained=False, use_ssld=False, **kwargs):
|
||||
"""
|
||||
ResNet101_vd
|
||||
Args:
|
||||
pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
|
||||
If str, means the path of the pretrained model.
|
||||
use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
|
||||
Returns:
|
||||
model: nn.Layer. Specific `ResNet101_vd` model depends on args.
|
||||
"""
|
||||
model = ResNet(
|
||||
config=NET_CONFIG["101"],
|
||||
stages_pattern=MODEL_STAGES_PATTERN["ResNet101"],
|
||||
version="vd",
|
||||
**kwargs)
|
||||
_load_pretrained(pretrained, model, MODEL_URLS["ResNet101_vd"], use_ssld)
|
||||
return model
|
||||
|
||||
|
||||
def ResNet152(pretrained=False, use_ssld=False, **kwargs):
|
||||
"""
|
||||
ResNet152
|
||||
Args:
|
||||
pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
|
||||
If str, means the path of the pretrained model.
|
||||
use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
|
||||
Returns:
|
||||
model: nn.Layer. Specific `ResNet152` model depends on args.
|
||||
"""
|
||||
model = ResNet(
|
||||
config=NET_CONFIG["152"],
|
||||
stages_pattern=MODEL_STAGES_PATTERN["ResNet152"],
|
||||
version="vb",
|
||||
**kwargs)
|
||||
_load_pretrained(pretrained, model, MODEL_URLS["ResNet152"], use_ssld)
|
||||
return model
|
||||
|
||||
|
||||
def ResNet152_vd(pretrained=False, use_ssld=False, **kwargs):
|
||||
"""
|
||||
ResNet152_vd
|
||||
Args:
|
||||
pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
|
||||
If str, means the path of the pretrained model.
|
||||
use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
|
||||
Returns:
|
||||
model: nn.Layer. Specific `ResNet152_vd` model depends on args.
|
||||
"""
|
||||
model = ResNet(
|
||||
config=NET_CONFIG["152"],
|
||||
stages_pattern=MODEL_STAGES_PATTERN["ResNet152"],
|
||||
version="vd",
|
||||
**kwargs)
|
||||
_load_pretrained(pretrained, model, MODEL_URLS["ResNet152_vd"], use_ssld)
|
||||
return model
|
||||
|
||||
|
||||
def ResNet200_vd(pretrained=False, use_ssld=False, **kwargs):
|
||||
"""
|
||||
ResNet200_vd
|
||||
Args:
|
||||
pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
|
||||
If str, means the path of the pretrained model.
|
||||
use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
|
||||
Returns:
|
||||
model: nn.Layer. Specific `ResNet200_vd` model depends on args.
|
||||
"""
|
||||
model = ResNet(
|
||||
config=NET_CONFIG["200"],
|
||||
stages_pattern=MODEL_STAGES_PATTERN["ResNet200"],
|
||||
version="vd",
|
||||
**kwargs)
|
||||
_load_pretrained(pretrained, model, MODEL_URLS["ResNet200_vd"], use_ssld)
|
||||
return model
|
||||
@ -0,0 +1,259 @@
|
||||
# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import, division, print_function
|
||||
|
||||
import paddle.nn as nn
|
||||
from paddle.nn import Conv2D, BatchNorm, Linear, Dropout
|
||||
from paddle.nn import MaxPool2D
|
||||
|
||||
from ppcls.arch.backbone.base.theseus_layer import TheseusLayer
|
||||
from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
|
||||
|
||||
MODEL_URLS = {
|
||||
"VGG11":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/VGG11_pretrained.pdparams",
|
||||
"VGG13":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/VGG13_pretrained.pdparams",
|
||||
"VGG16":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/VGG16_pretrained.pdparams",
|
||||
"VGG19":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/VGG19_pretrained.pdparams",
|
||||
}
|
||||
|
||||
MODEL_STAGES_PATTERN = {
|
||||
"VGG": [
|
||||
"conv_block_1", "conv_block_2", "conv_block_3", "conv_block_4",
|
||||
"conv_block_5"
|
||||
]
|
||||
}
|
||||
|
||||
__all__ = MODEL_URLS.keys()
|
||||
|
||||
# VGG config
|
||||
# key: VGG network depth
|
||||
# value: conv num in different blocks
|
||||
NET_CONFIG = {
|
||||
11: [1, 1, 2, 2, 2],
|
||||
13: [2, 2, 2, 2, 2],
|
||||
16: [2, 2, 3, 3, 3],
|
||||
19: [2, 2, 4, 4, 4]
|
||||
}
|
||||
|
||||
|
||||
class ConvBlock(TheseusLayer):
|
||||
def __init__(self, input_channels, output_channels, groups):
|
||||
super().__init__()
|
||||
|
||||
self.groups = groups
|
||||
self.conv1 = Conv2D(
|
||||
in_channels=input_channels,
|
||||
out_channels=output_channels,
|
||||
kernel_size=3,
|
||||
stride=1,
|
||||
padding=1,
|
||||
bias_attr=False)
|
||||
if groups == 2 or groups == 3 or groups == 4:
|
||||
self.conv2 = Conv2D(
|
||||
in_channels=output_channels,
|
||||
out_channels=output_channels,
|
||||
kernel_size=3,
|
||||
stride=1,
|
||||
padding=1,
|
||||
bias_attr=False)
|
||||
if groups == 3 or groups == 4:
|
||||
self.conv3 = Conv2D(
|
||||
in_channels=output_channels,
|
||||
out_channels=output_channels,
|
||||
kernel_size=3,
|
||||
stride=1,
|
||||
padding=1,
|
||||
bias_attr=False)
|
||||
if groups == 4:
|
||||
self.conv4 = Conv2D(
|
||||
in_channels=output_channels,
|
||||
out_channels=output_channels,
|
||||
kernel_size=3,
|
||||
stride=1,
|
||||
padding=1,
|
||||
bias_attr=False)
|
||||
|
||||
self.max_pool = MaxPool2D(kernel_size=2, stride=2, padding=0)
|
||||
self.relu = nn.ReLU()
|
||||
|
||||
def forward(self, inputs):
|
||||
x = self.conv1(inputs)
|
||||
x = self.relu(x)
|
||||
if self.groups == 2 or self.groups == 3 or self.groups == 4:
|
||||
x = self.conv2(x)
|
||||
x = self.relu(x)
|
||||
if self.groups == 3 or self.groups == 4:
|
||||
x = self.conv3(x)
|
||||
x = self.relu(x)
|
||||
if self.groups == 4:
|
||||
x = self.conv4(x)
|
||||
x = self.relu(x)
|
||||
x = self.max_pool(x)
|
||||
return x
|
||||
|
||||
|
||||
class VGGNet(TheseusLayer):
|
||||
"""
|
||||
VGGNet
|
||||
Args:
|
||||
config: list. VGGNet config.
|
||||
stop_grad_layers: int=0. The parameters in blocks which index larger than `stop_grad_layers`, will be set `param.trainable=False`
|
||||
class_num: int=1000. The number of classes.
|
||||
Returns:
|
||||
model: nn.Layer. Specific VGG model depends on args.
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
config,
|
||||
stages_pattern,
|
||||
stop_grad_layers=0,
|
||||
class_num=1000,
|
||||
return_patterns=None,
|
||||
return_stages=None):
|
||||
super().__init__()
|
||||
|
||||
self.stop_grad_layers = stop_grad_layers
|
||||
|
||||
self.conv_block_1 = ConvBlock(3, 64, config[0])
|
||||
self.conv_block_2 = ConvBlock(64, 128, config[1])
|
||||
self.conv_block_3 = ConvBlock(128, 256, config[2])
|
||||
self.conv_block_4 = ConvBlock(256, 512, config[3])
|
||||
self.conv_block_5 = ConvBlock(512, 512, config[4])
|
||||
|
||||
self.relu = nn.ReLU()
|
||||
self.flatten = nn.Flatten(start_axis=1, stop_axis=-1)
|
||||
|
||||
for idx, block in enumerate([
|
||||
self.conv_block_1, self.conv_block_2, self.conv_block_3,
|
||||
self.conv_block_4, self.conv_block_5
|
||||
]):
|
||||
if self.stop_grad_layers >= idx + 1:
|
||||
for param in block.parameters():
|
||||
param.trainable = False
|
||||
|
||||
self.drop = Dropout(p=0.5, mode="downscale_in_infer")
|
||||
self.fc1 = Linear(7 * 7 * 512, 4096)
|
||||
self.fc2 = Linear(4096, 4096)
|
||||
self.fc3 = Linear(4096, class_num)
|
||||
|
||||
super().init_res(
|
||||
stages_pattern,
|
||||
return_patterns=return_patterns,
|
||||
return_stages=return_stages)
|
||||
|
||||
def forward(self, inputs):
|
||||
x = self.conv_block_1(inputs)
|
||||
x = self.conv_block_2(x)
|
||||
x = self.conv_block_3(x)
|
||||
x = self.conv_block_4(x)
|
||||
x = self.conv_block_5(x)
|
||||
x = self.flatten(x)
|
||||
x = self.fc1(x)
|
||||
x = self.relu(x)
|
||||
x = self.drop(x)
|
||||
x = self.fc2(x)
|
||||
x = self.relu(x)
|
||||
x = self.drop(x)
|
||||
x = self.fc3(x)
|
||||
return x
|
||||
|
||||
|
||||
def _load_pretrained(pretrained, model, model_url, use_ssld):
|
||||
if pretrained is False:
|
||||
pass
|
||||
elif pretrained is True:
|
||||
load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
|
||||
elif isinstance(pretrained, str):
|
||||
load_dygraph_pretrain(model, pretrained)
|
||||
else:
|
||||
raise RuntimeError(
|
||||
"pretrained type is not available. Please use `string` or `boolean` type."
|
||||
)
|
||||
|
||||
|
||||
def VGG11(pretrained=False, use_ssld=False, **kwargs):
|
||||
"""
|
||||
VGG11
|
||||
Args:
|
||||
pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
|
||||
If str, means the path of the pretrained model.
|
||||
use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
|
||||
Returns:
|
||||
model: nn.Layer. Specific `VGG11` model depends on args.
|
||||
"""
|
||||
model = VGGNet(
|
||||
config=NET_CONFIG[11],
|
||||
stages_pattern=MODEL_STAGES_PATTERN["VGG"],
|
||||
**kwargs)
|
||||
_load_pretrained(pretrained, model, MODEL_URLS["VGG11"], use_ssld)
|
||||
return model
|
||||
|
||||
|
||||
def VGG13(pretrained=False, use_ssld=False, **kwargs):
|
||||
"""
|
||||
VGG13
|
||||
Args:
|
||||
pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
|
||||
If str, means the path of the pretrained model.
|
||||
use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
|
||||
Returns:
|
||||
model: nn.Layer. Specific `VGG13` model depends on args.
|
||||
"""
|
||||
model = VGGNet(
|
||||
config=NET_CONFIG[13],
|
||||
stages_pattern=MODEL_STAGES_PATTERN["VGG"],
|
||||
**kwargs)
|
||||
_load_pretrained(pretrained, model, MODEL_URLS["VGG13"], use_ssld)
|
||||
return model
|
||||
|
||||
|
||||
def VGG16(pretrained=False, use_ssld=False, **kwargs):
|
||||
"""
|
||||
VGG16
|
||||
Args:
|
||||
pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
|
||||
If str, means the path of the pretrained model.
|
||||
use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
|
||||
Returns:
|
||||
model: nn.Layer. Specific `VGG16` model depends on args.
|
||||
"""
|
||||
model = VGGNet(
|
||||
config=NET_CONFIG[16],
|
||||
stages_pattern=MODEL_STAGES_PATTERN["VGG"],
|
||||
**kwargs)
|
||||
_load_pretrained(pretrained, model, MODEL_URLS["VGG16"], use_ssld)
|
||||
return model
|
||||
|
||||
|
||||
def VGG19(pretrained=False, use_ssld=False, **kwargs):
|
||||
"""
|
||||
VGG19
|
||||
Args:
|
||||
pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
|
||||
If str, means the path of the pretrained model.
|
||||
use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
|
||||
Returns:
|
||||
model: nn.Layer. Specific `VGG19` model depends on args.
|
||||
"""
|
||||
model = VGGNet(
|
||||
config=NET_CONFIG[19],
|
||||
stages_pattern=MODEL_STAGES_PATTERN["VGG"],
|
||||
**kwargs)
|
||||
_load_pretrained(pretrained, model, MODEL_URLS["VGG19"], use_ssld)
|
||||
return model
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -0,0 +1,168 @@
|
||||
# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import paddle
|
||||
from paddle import ParamAttr
|
||||
import paddle.nn as nn
|
||||
import paddle.nn.functional as F
|
||||
from paddle.nn import Conv2D, BatchNorm, Linear, Dropout, ReLU
|
||||
from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D
|
||||
from paddle.nn.initializer import Uniform
|
||||
import math
|
||||
|
||||
from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
|
||||
|
||||
MODEL_URLS = {
|
||||
"AlexNet":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/AlexNet_pretrained.pdparams"
|
||||
}
|
||||
|
||||
__all__ = list(MODEL_URLS.keys())
|
||||
|
||||
|
||||
class ConvPoolLayer(nn.Layer):
|
||||
def __init__(self,
|
||||
input_channels,
|
||||
output_channels,
|
||||
filter_size,
|
||||
stride,
|
||||
padding,
|
||||
stdv,
|
||||
groups=1,
|
||||
act=None,
|
||||
name=None):
|
||||
super(ConvPoolLayer, self).__init__()
|
||||
|
||||
self.relu = ReLU() if act == "relu" else None
|
||||
|
||||
self._conv = Conv2D(
|
||||
in_channels=input_channels,
|
||||
out_channels=output_channels,
|
||||
kernel_size=filter_size,
|
||||
stride=stride,
|
||||
padding=padding,
|
||||
groups=groups,
|
||||
weight_attr=ParamAttr(
|
||||
name=name + "_weights", initializer=Uniform(-stdv, stdv)),
|
||||
bias_attr=ParamAttr(
|
||||
name=name + "_offset", initializer=Uniform(-stdv, stdv)))
|
||||
self._pool = MaxPool2D(kernel_size=3, stride=2, padding=0)
|
||||
|
||||
def forward(self, inputs):
|
||||
x = self._conv(inputs)
|
||||
if self.relu is not None:
|
||||
x = self.relu(x)
|
||||
x = self._pool(x)
|
||||
return x
|
||||
|
||||
|
||||
class AlexNetDY(nn.Layer):
|
||||
def __init__(self, class_num=1000):
|
||||
super(AlexNetDY, self).__init__()
|
||||
|
||||
stdv = 1.0 / math.sqrt(3 * 11 * 11)
|
||||
self._conv1 = ConvPoolLayer(
|
||||
3, 64, 11, 4, 2, stdv, act="relu", name="conv1")
|
||||
stdv = 1.0 / math.sqrt(64 * 5 * 5)
|
||||
self._conv2 = ConvPoolLayer(
|
||||
64, 192, 5, 1, 2, stdv, act="relu", name="conv2")
|
||||
stdv = 1.0 / math.sqrt(192 * 3 * 3)
|
||||
self._conv3 = Conv2D(
|
||||
192,
|
||||
384,
|
||||
3,
|
||||
stride=1,
|
||||
padding=1,
|
||||
weight_attr=ParamAttr(
|
||||
name="conv3_weights", initializer=Uniform(-stdv, stdv)),
|
||||
bias_attr=ParamAttr(
|
||||
name="conv3_offset", initializer=Uniform(-stdv, stdv)))
|
||||
stdv = 1.0 / math.sqrt(384 * 3 * 3)
|
||||
self._conv4 = Conv2D(
|
||||
384,
|
||||
256,
|
||||
3,
|
||||
stride=1,
|
||||
padding=1,
|
||||
weight_attr=ParamAttr(
|
||||
name="conv4_weights", initializer=Uniform(-stdv, stdv)),
|
||||
bias_attr=ParamAttr(
|
||||
name="conv4_offset", initializer=Uniform(-stdv, stdv)))
|
||||
stdv = 1.0 / math.sqrt(256 * 3 * 3)
|
||||
self._conv5 = ConvPoolLayer(
|
||||
256, 256, 3, 1, 1, stdv, act="relu", name="conv5")
|
||||
stdv = 1.0 / math.sqrt(256 * 6 * 6)
|
||||
|
||||
self._drop1 = Dropout(p=0.5, mode="downscale_in_infer")
|
||||
self._fc6 = Linear(
|
||||
in_features=256 * 6 * 6,
|
||||
out_features=4096,
|
||||
weight_attr=ParamAttr(
|
||||
name="fc6_weights", initializer=Uniform(-stdv, stdv)),
|
||||
bias_attr=ParamAttr(
|
||||
name="fc6_offset", initializer=Uniform(-stdv, stdv)))
|
||||
|
||||
self._drop2 = Dropout(p=0.5, mode="downscale_in_infer")
|
||||
self._fc7 = Linear(
|
||||
in_features=4096,
|
||||
out_features=4096,
|
||||
weight_attr=ParamAttr(
|
||||
name="fc7_weights", initializer=Uniform(-stdv, stdv)),
|
||||
bias_attr=ParamAttr(
|
||||
name="fc7_offset", initializer=Uniform(-stdv, stdv)))
|
||||
self._fc8 = Linear(
|
||||
in_features=4096,
|
||||
out_features=class_num,
|
||||
weight_attr=ParamAttr(
|
||||
name="fc8_weights", initializer=Uniform(-stdv, stdv)),
|
||||
bias_attr=ParamAttr(
|
||||
name="fc8_offset", initializer=Uniform(-stdv, stdv)))
|
||||
|
||||
def forward(self, inputs):
|
||||
x = self._conv1(inputs)
|
||||
x = self._conv2(x)
|
||||
x = self._conv3(x)
|
||||
x = F.relu(x)
|
||||
x = self._conv4(x)
|
||||
x = F.relu(x)
|
||||
x = self._conv5(x)
|
||||
x = paddle.flatten(x, start_axis=1, stop_axis=-1)
|
||||
x = self._drop1(x)
|
||||
x = self._fc6(x)
|
||||
x = F.relu(x)
|
||||
x = self._drop2(x)
|
||||
x = self._fc7(x)
|
||||
x = F.relu(x)
|
||||
x = self._fc8(x)
|
||||
return x
|
||||
|
||||
|
||||
def _load_pretrained(pretrained, model, model_url, use_ssld=False):
|
||||
if pretrained is False:
|
||||
pass
|
||||
elif pretrained is True:
|
||||
load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
|
||||
elif isinstance(pretrained, str):
|
||||
load_dygraph_pretrain(model, pretrained)
|
||||
else:
|
||||
raise RuntimeError(
|
||||
"pretrained type is not available. Please use `string` or `boolean` type."
|
||||
)
|
||||
|
||||
|
||||
def AlexNet(pretrained=False, use_ssld=False, **kwargs):
|
||||
model = AlexNetDY(**kwargs)
|
||||
_load_pretrained(
|
||||
pretrained, model, MODEL_URLS["AlexNet"], use_ssld=use_ssld)
|
||||
return model
|
||||
@ -0,0 +1,376 @@
|
||||
# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# Code was heavily based on https://github.com/rwightman/pytorch-image-models
|
||||
|
||||
import paddle
|
||||
import paddle.nn as nn
|
||||
import paddle.nn.functional as F
|
||||
from paddle import ParamAttr
|
||||
|
||||
from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
|
||||
|
||||
MODEL_URLS = {
|
||||
"CSPDarkNet53":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/CSPDarkNet53_pretrained.pdparams"
|
||||
}
|
||||
|
||||
MODEL_CFGS = {
|
||||
"CSPDarkNet53": dict(
|
||||
stem=dict(
|
||||
out_chs=32, kernel_size=3, stride=1, pool=''),
|
||||
stage=dict(
|
||||
out_chs=(64, 128, 256, 512, 1024),
|
||||
depth=(1, 2, 8, 8, 4),
|
||||
stride=(2, ) * 5,
|
||||
exp_ratio=(2., ) + (1., ) * 4,
|
||||
bottle_ratio=(0.5, ) + (1.0, ) * 4,
|
||||
block_ratio=(1., ) + (0.5, ) * 4,
|
||||
down_growth=True, ))
|
||||
}
|
||||
|
||||
__all__ = ['CSPDarkNet53'
|
||||
] # model_registry will add each entrypoint fn to this
|
||||
|
||||
|
||||
class ConvBnAct(nn.Layer):
|
||||
def __init__(self,
|
||||
input_channels,
|
||||
output_channels,
|
||||
kernel_size=1,
|
||||
stride=1,
|
||||
padding=None,
|
||||
dilation=1,
|
||||
groups=1,
|
||||
act_layer=nn.LeakyReLU,
|
||||
norm_layer=nn.BatchNorm2D):
|
||||
super().__init__()
|
||||
if padding is None:
|
||||
padding = (kernel_size - 1) // 2
|
||||
self.conv = nn.Conv2D(
|
||||
in_channels=input_channels,
|
||||
out_channels=output_channels,
|
||||
kernel_size=kernel_size,
|
||||
stride=stride,
|
||||
padding=padding,
|
||||
dilation=dilation,
|
||||
groups=groups,
|
||||
weight_attr=ParamAttr(),
|
||||
bias_attr=False)
|
||||
|
||||
self.bn = norm_layer(num_features=output_channels)
|
||||
self.act = act_layer()
|
||||
|
||||
def forward(self, inputs):
|
||||
x = self.conv(inputs)
|
||||
x = self.bn(x)
|
||||
if self.act is not None:
|
||||
x = self.act(x)
|
||||
return x
|
||||
|
||||
|
||||
def create_stem(in_chans=3,
|
||||
out_chs=32,
|
||||
kernel_size=3,
|
||||
stride=2,
|
||||
pool='',
|
||||
act_layer=None,
|
||||
norm_layer=None):
|
||||
stem = nn.Sequential()
|
||||
if not isinstance(out_chs, (tuple, list)):
|
||||
out_chs = [out_chs]
|
||||
assert len(out_chs)
|
||||
in_c = in_chans
|
||||
for i, out_c in enumerate(out_chs):
|
||||
conv_name = f'conv{i + 1}'
|
||||
stem.add_sublayer(
|
||||
conv_name,
|
||||
ConvBnAct(
|
||||
in_c,
|
||||
out_c,
|
||||
kernel_size,
|
||||
stride=stride if i == 0 else 1,
|
||||
act_layer=act_layer,
|
||||
norm_layer=norm_layer))
|
||||
in_c = out_c
|
||||
last_conv = conv_name
|
||||
if pool:
|
||||
stem.add_sublayer(
|
||||
'pool', nn.MaxPool2D(
|
||||
kernel_size=3, stride=2, padding=1))
|
||||
return stem, dict(
|
||||
num_chs=in_c, reduction=stride, module='.'.join(['stem', last_conv]))
|
||||
|
||||
|
||||
class DarkBlock(nn.Layer):
|
||||
def __init__(self,
|
||||
in_chs,
|
||||
out_chs,
|
||||
dilation=1,
|
||||
bottle_ratio=0.5,
|
||||
groups=1,
|
||||
act_layer=nn.ReLU,
|
||||
norm_layer=nn.BatchNorm2D,
|
||||
attn_layer=None,
|
||||
drop_block=None):
|
||||
super(DarkBlock, self).__init__()
|
||||
mid_chs = int(round(out_chs * bottle_ratio))
|
||||
ckwargs = dict(act_layer=act_layer, norm_layer=norm_layer)
|
||||
self.conv1 = ConvBnAct(in_chs, mid_chs, kernel_size=1, **ckwargs)
|
||||
self.conv2 = ConvBnAct(
|
||||
mid_chs,
|
||||
out_chs,
|
||||
kernel_size=3,
|
||||
dilation=dilation,
|
||||
groups=groups,
|
||||
**ckwargs)
|
||||
|
||||
def forward(self, x):
|
||||
shortcut = x
|
||||
x = self.conv1(x)
|
||||
x = self.conv2(x)
|
||||
x = x + shortcut
|
||||
return x
|
||||
|
||||
|
||||
class CrossStage(nn.Layer):
|
||||
def __init__(self,
|
||||
in_chs,
|
||||
out_chs,
|
||||
stride,
|
||||
dilation,
|
||||
depth,
|
||||
block_ratio=1.,
|
||||
bottle_ratio=1.,
|
||||
exp_ratio=1.,
|
||||
groups=1,
|
||||
first_dilation=None,
|
||||
down_growth=False,
|
||||
cross_linear=False,
|
||||
block_dpr=None,
|
||||
block_fn=DarkBlock,
|
||||
**block_kwargs):
|
||||
super(CrossStage, self).__init__()
|
||||
first_dilation = first_dilation or dilation
|
||||
down_chs = out_chs if down_growth else in_chs
|
||||
exp_chs = int(round(out_chs * exp_ratio))
|
||||
block_out_chs = int(round(out_chs * block_ratio))
|
||||
conv_kwargs = dict(
|
||||
act_layer=block_kwargs.get('act_layer'),
|
||||
norm_layer=block_kwargs.get('norm_layer'))
|
||||
|
||||
if stride != 1 or first_dilation != dilation:
|
||||
self.conv_down = ConvBnAct(
|
||||
in_chs,
|
||||
down_chs,
|
||||
kernel_size=3,
|
||||
stride=stride,
|
||||
dilation=first_dilation,
|
||||
groups=groups,
|
||||
**conv_kwargs)
|
||||
prev_chs = down_chs
|
||||
else:
|
||||
self.conv_down = None
|
||||
prev_chs = in_chs
|
||||
|
||||
self.conv_exp = ConvBnAct(
|
||||
prev_chs, exp_chs, kernel_size=1, **conv_kwargs)
|
||||
prev_chs = exp_chs // 2 # output of conv_exp is always split in two
|
||||
|
||||
self.blocks = nn.Sequential()
|
||||
for i in range(depth):
|
||||
self.blocks.add_sublayer(
|
||||
str(i),
|
||||
block_fn(prev_chs, block_out_chs, dilation, bottle_ratio,
|
||||
groups, **block_kwargs))
|
||||
prev_chs = block_out_chs
|
||||
|
||||
# transition convs
|
||||
self.conv_transition_b = ConvBnAct(
|
||||
prev_chs, exp_chs // 2, kernel_size=1, **conv_kwargs)
|
||||
self.conv_transition = ConvBnAct(
|
||||
exp_chs, out_chs, kernel_size=1, **conv_kwargs)
|
||||
|
||||
def forward(self, x):
|
||||
if self.conv_down is not None:
|
||||
x = self.conv_down(x)
|
||||
x = self.conv_exp(x)
|
||||
split = x.shape[1] // 2
|
||||
xs, xb = x[:, :split], x[:, split:]
|
||||
xb = self.blocks(xb)
|
||||
xb = self.conv_transition_b(xb)
|
||||
out = self.conv_transition(paddle.concat([xs, xb], axis=1))
|
||||
return out
|
||||
|
||||
|
||||
class DarkStage(nn.Layer):
|
||||
def __init__(self,
|
||||
in_chs,
|
||||
out_chs,
|
||||
stride,
|
||||
dilation,
|
||||
depth,
|
||||
block_ratio=1.,
|
||||
bottle_ratio=1.,
|
||||
groups=1,
|
||||
first_dilation=None,
|
||||
block_fn=DarkBlock,
|
||||
block_dpr=None,
|
||||
**block_kwargs):
|
||||
super().__init__()
|
||||
first_dilation = first_dilation or dilation
|
||||
|
||||
self.conv_down = ConvBnAct(
|
||||
in_chs,
|
||||
out_chs,
|
||||
kernel_size=3,
|
||||
stride=stride,
|
||||
dilation=first_dilation,
|
||||
groups=groups,
|
||||
act_layer=block_kwargs.get('act_layer'),
|
||||
norm_layer=block_kwargs.get('norm_layer'))
|
||||
|
||||
prev_chs = out_chs
|
||||
block_out_chs = int(round(out_chs * block_ratio))
|
||||
self.blocks = nn.Sequential()
|
||||
for i in range(depth):
|
||||
self.blocks.add_sublayer(
|
||||
str(i),
|
||||
block_fn(prev_chs, block_out_chs, dilation, bottle_ratio,
|
||||
groups, **block_kwargs))
|
||||
prev_chs = block_out_chs
|
||||
|
||||
def forward(self, x):
|
||||
x = self.conv_down(x)
|
||||
x = self.blocks(x)
|
||||
return x
|
||||
|
||||
|
||||
def _cfg_to_stage_args(cfg, curr_stride=2, output_stride=32):
|
||||
# get per stage args for stage and containing blocks, calculate strides to meet target output_stride
|
||||
num_stages = len(cfg['depth'])
|
||||
if 'groups' not in cfg:
|
||||
cfg['groups'] = (1, ) * num_stages
|
||||
if 'down_growth' in cfg and not isinstance(cfg['down_growth'],
|
||||
(list, tuple)):
|
||||
cfg['down_growth'] = (cfg['down_growth'], ) * num_stages
|
||||
stage_strides = []
|
||||
stage_dilations = []
|
||||
stage_first_dilations = []
|
||||
dilation = 1
|
||||
for cfg_stride in cfg['stride']:
|
||||
stage_first_dilations.append(dilation)
|
||||
if curr_stride >= output_stride:
|
||||
dilation *= cfg_stride
|
||||
stride = 1
|
||||
else:
|
||||
stride = cfg_stride
|
||||
curr_stride *= stride
|
||||
stage_strides.append(stride)
|
||||
stage_dilations.append(dilation)
|
||||
cfg['stride'] = stage_strides
|
||||
cfg['dilation'] = stage_dilations
|
||||
cfg['first_dilation'] = stage_first_dilations
|
||||
stage_args = [
|
||||
dict(zip(cfg.keys(), values)) for values in zip(*cfg.values())
|
||||
]
|
||||
return stage_args
|
||||
|
||||
|
||||
class CSPNet(nn.Layer):
|
||||
def __init__(self,
|
||||
cfg,
|
||||
in_chans=3,
|
||||
class_num=1000,
|
||||
output_stride=32,
|
||||
global_pool='avg',
|
||||
drop_rate=0.,
|
||||
act_layer=nn.LeakyReLU,
|
||||
norm_layer=nn.BatchNorm2D,
|
||||
zero_init_last_bn=True,
|
||||
stage_fn=CrossStage,
|
||||
block_fn=DarkBlock):
|
||||
super().__init__()
|
||||
self.class_num = class_num
|
||||
self.drop_rate = drop_rate
|
||||
assert output_stride in (8, 16, 32)
|
||||
layer_args = dict(act_layer=act_layer, norm_layer=norm_layer)
|
||||
|
||||
# Construct the stem
|
||||
self.stem, stem_feat_info = create_stem(in_chans, **cfg['stem'],
|
||||
**layer_args)
|
||||
self.feature_info = [stem_feat_info]
|
||||
prev_chs = stem_feat_info['num_chs']
|
||||
curr_stride = stem_feat_info[
|
||||
'reduction'] # reduction does not include pool
|
||||
if cfg['stem']['pool']:
|
||||
curr_stride *= 2
|
||||
|
||||
# Construct the stages
|
||||
per_stage_args = _cfg_to_stage_args(
|
||||
cfg['stage'], curr_stride=curr_stride, output_stride=output_stride)
|
||||
self.stages = nn.LayerList()
|
||||
for i, sa in enumerate(per_stage_args):
|
||||
self.stages.add_sublayer(
|
||||
str(i),
|
||||
stage_fn(
|
||||
prev_chs, **sa, **layer_args, block_fn=block_fn))
|
||||
prev_chs = sa['out_chs']
|
||||
curr_stride *= sa['stride']
|
||||
self.feature_info += [
|
||||
dict(
|
||||
num_chs=prev_chs,
|
||||
reduction=curr_stride,
|
||||
module=f'stages.{i}')
|
||||
]
|
||||
|
||||
# Construct the head
|
||||
self.num_features = prev_chs
|
||||
|
||||
self.pool = nn.AdaptiveAvgPool2D(1)
|
||||
self.flatten = nn.Flatten(1)
|
||||
self.fc = nn.Linear(
|
||||
prev_chs,
|
||||
class_num,
|
||||
weight_attr=ParamAttr(),
|
||||
bias_attr=ParamAttr())
|
||||
|
||||
def forward(self, x):
|
||||
x = self.stem(x)
|
||||
for stage in self.stages:
|
||||
x = stage(x)
|
||||
x = self.pool(x)
|
||||
x = self.flatten(x)
|
||||
x = self.fc(x)
|
||||
return x
|
||||
|
||||
|
||||
def _load_pretrained(pretrained, model, model_url, use_ssld=False):
|
||||
if pretrained is False:
|
||||
pass
|
||||
elif pretrained is True:
|
||||
load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
|
||||
elif isinstance(pretrained, str):
|
||||
load_dygraph_pretrain(model, pretrained)
|
||||
else:
|
||||
raise RuntimeError(
|
||||
"pretrained type is not available. Please use `string` or `boolean` type."
|
||||
)
|
||||
|
||||
|
||||
def CSPDarkNet53(pretrained=False, use_ssld=False, **kwargs):
|
||||
model = CSPNet(MODEL_CFGS["CSPDarkNet53"], block_fn=DarkBlock, **kwargs)
|
||||
_load_pretrained(
|
||||
pretrained, model, MODEL_URLS["CSPDarkNet53"], use_ssld=use_ssld)
|
||||
return model
|
||||
@ -0,0 +1,197 @@
|
||||
# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import paddle
|
||||
from paddle import ParamAttr
|
||||
import paddle.nn as nn
|
||||
import paddle.nn.functional as F
|
||||
from paddle.nn import Conv2D, BatchNorm, Linear, Dropout
|
||||
from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D
|
||||
from paddle.nn.initializer import Uniform
|
||||
import math
|
||||
|
||||
from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
|
||||
|
||||
MODEL_URLS = {
|
||||
"DarkNet53":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DarkNet53_pretrained.pdparams"
|
||||
}
|
||||
|
||||
__all__ = list(MODEL_URLS.keys())
|
||||
|
||||
|
||||
class ConvBNLayer(nn.Layer):
|
||||
def __init__(self,
|
||||
input_channels,
|
||||
output_channels,
|
||||
filter_size,
|
||||
stride,
|
||||
padding,
|
||||
name=None):
|
||||
super(ConvBNLayer, self).__init__()
|
||||
|
||||
self._conv = Conv2D(
|
||||
in_channels=input_channels,
|
||||
out_channels=output_channels,
|
||||
kernel_size=filter_size,
|
||||
stride=stride,
|
||||
padding=padding,
|
||||
weight_attr=ParamAttr(name=name + ".conv.weights"),
|
||||
bias_attr=False)
|
||||
|
||||
bn_name = name + ".bn"
|
||||
self._bn = BatchNorm(
|
||||
num_channels=output_channels,
|
||||
act="relu",
|
||||
param_attr=ParamAttr(name=bn_name + ".scale"),
|
||||
bias_attr=ParamAttr(name=bn_name + ".offset"),
|
||||
moving_mean_name=bn_name + ".mean",
|
||||
moving_variance_name=bn_name + ".var")
|
||||
|
||||
def forward(self, inputs):
|
||||
x = self._conv(inputs)
|
||||
x = self._bn(x)
|
||||
return x
|
||||
|
||||
|
||||
class BasicBlock(nn.Layer):
|
||||
def __init__(self, input_channels, output_channels, name=None):
|
||||
super(BasicBlock, self).__init__()
|
||||
|
||||
self._conv1 = ConvBNLayer(
|
||||
input_channels, output_channels, 1, 1, 0, name=name + ".0")
|
||||
self._conv2 = ConvBNLayer(
|
||||
output_channels, output_channels * 2, 3, 1, 1, name=name + ".1")
|
||||
|
||||
def forward(self, inputs):
|
||||
x = self._conv1(inputs)
|
||||
x = self._conv2(x)
|
||||
return paddle.add(x=inputs, y=x)
|
||||
|
||||
|
||||
class DarkNet(nn.Layer):
|
||||
def __init__(self, class_num=1000):
|
||||
super(DarkNet, self).__init__()
|
||||
|
||||
self.stages = [1, 2, 8, 8, 4]
|
||||
self._conv1 = ConvBNLayer(3, 32, 3, 1, 1, name="yolo_input")
|
||||
self._conv2 = ConvBNLayer(
|
||||
32, 64, 3, 2, 1, name="yolo_input.downsample")
|
||||
|
||||
self._basic_block_01 = BasicBlock(64, 32, name="stage.0.0")
|
||||
self._downsample_0 = ConvBNLayer(
|
||||
64, 128, 3, 2, 1, name="stage.0.downsample")
|
||||
|
||||
self._basic_block_11 = BasicBlock(128, 64, name="stage.1.0")
|
||||
self._basic_block_12 = BasicBlock(128, 64, name="stage.1.1")
|
||||
self._downsample_1 = ConvBNLayer(
|
||||
128, 256, 3, 2, 1, name="stage.1.downsample")
|
||||
|
||||
self._basic_block_21 = BasicBlock(256, 128, name="stage.2.0")
|
||||
self._basic_block_22 = BasicBlock(256, 128, name="stage.2.1")
|
||||
self._basic_block_23 = BasicBlock(256, 128, name="stage.2.2")
|
||||
self._basic_block_24 = BasicBlock(256, 128, name="stage.2.3")
|
||||
self._basic_block_25 = BasicBlock(256, 128, name="stage.2.4")
|
||||
self._basic_block_26 = BasicBlock(256, 128, name="stage.2.5")
|
||||
self._basic_block_27 = BasicBlock(256, 128, name="stage.2.6")
|
||||
self._basic_block_28 = BasicBlock(256, 128, name="stage.2.7")
|
||||
self._downsample_2 = ConvBNLayer(
|
||||
256, 512, 3, 2, 1, name="stage.2.downsample")
|
||||
|
||||
self._basic_block_31 = BasicBlock(512, 256, name="stage.3.0")
|
||||
self._basic_block_32 = BasicBlock(512, 256, name="stage.3.1")
|
||||
self._basic_block_33 = BasicBlock(512, 256, name="stage.3.2")
|
||||
self._basic_block_34 = BasicBlock(512, 256, name="stage.3.3")
|
||||
self._basic_block_35 = BasicBlock(512, 256, name="stage.3.4")
|
||||
self._basic_block_36 = BasicBlock(512, 256, name="stage.3.5")
|
||||
self._basic_block_37 = BasicBlock(512, 256, name="stage.3.6")
|
||||
self._basic_block_38 = BasicBlock(512, 256, name="stage.3.7")
|
||||
self._downsample_3 = ConvBNLayer(
|
||||
512, 1024, 3, 2, 1, name="stage.3.downsample")
|
||||
|
||||
self._basic_block_41 = BasicBlock(1024, 512, name="stage.4.0")
|
||||
self._basic_block_42 = BasicBlock(1024, 512, name="stage.4.1")
|
||||
self._basic_block_43 = BasicBlock(1024, 512, name="stage.4.2")
|
||||
self._basic_block_44 = BasicBlock(1024, 512, name="stage.4.3")
|
||||
|
||||
self._pool = AdaptiveAvgPool2D(1)
|
||||
|
||||
stdv = 1.0 / math.sqrt(1024.0)
|
||||
self._out = Linear(
|
||||
1024,
|
||||
class_num,
|
||||
weight_attr=ParamAttr(
|
||||
name="fc_weights", initializer=Uniform(-stdv, stdv)),
|
||||
bias_attr=ParamAttr(name="fc_offset"))
|
||||
|
||||
def forward(self, inputs):
|
||||
x = self._conv1(inputs)
|
||||
x = self._conv2(x)
|
||||
|
||||
x = self._basic_block_01(x)
|
||||
x = self._downsample_0(x)
|
||||
|
||||
x = self._basic_block_11(x)
|
||||
x = self._basic_block_12(x)
|
||||
x = self._downsample_1(x)
|
||||
|
||||
x = self._basic_block_21(x)
|
||||
x = self._basic_block_22(x)
|
||||
x = self._basic_block_23(x)
|
||||
x = self._basic_block_24(x)
|
||||
x = self._basic_block_25(x)
|
||||
x = self._basic_block_26(x)
|
||||
x = self._basic_block_27(x)
|
||||
x = self._basic_block_28(x)
|
||||
x = self._downsample_2(x)
|
||||
|
||||
x = self._basic_block_31(x)
|
||||
x = self._basic_block_32(x)
|
||||
x = self._basic_block_33(x)
|
||||
x = self._basic_block_34(x)
|
||||
x = self._basic_block_35(x)
|
||||
x = self._basic_block_36(x)
|
||||
x = self._basic_block_37(x)
|
||||
x = self._basic_block_38(x)
|
||||
x = self._downsample_3(x)
|
||||
|
||||
x = self._basic_block_41(x)
|
||||
x = self._basic_block_42(x)
|
||||
x = self._basic_block_43(x)
|
||||
x = self._basic_block_44(x)
|
||||
|
||||
x = self._pool(x)
|
||||
x = paddle.squeeze(x, axis=[2, 3])
|
||||
x = self._out(x)
|
||||
return x
|
||||
|
||||
|
||||
def _load_pretrained(pretrained, model, model_url, use_ssld=False):
|
||||
if pretrained is False:
|
||||
pass
|
||||
elif pretrained is True:
|
||||
load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
|
||||
elif isinstance(pretrained, str):
|
||||
load_dygraph_pretrain(model, pretrained)
|
||||
else:
|
||||
raise RuntimeError(
|
||||
"pretrained type is not available. Please use `string` or `boolean` type."
|
||||
)
|
||||
|
||||
|
||||
def DarkNet53(pretrained=False, use_ssld=False, **kwargs):
|
||||
model = DarkNet(**kwargs)
|
||||
_load_pretrained(
|
||||
pretrained, model, MODEL_URLS["DarkNet53"], use_ssld=use_ssld)
|
||||
return model
|
||||
@ -0,0 +1,344 @@
|
||||
# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import numpy as np
|
||||
import paddle
|
||||
from paddle import ParamAttr
|
||||
import paddle.nn as nn
|
||||
from paddle.nn import Conv2D, BatchNorm, Linear, Dropout
|
||||
from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D
|
||||
from paddle.nn.initializer import Uniform
|
||||
|
||||
import math
|
||||
|
||||
from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
|
||||
|
||||
MODEL_URLS = {
|
||||
"DenseNet121":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DenseNet121_pretrained.pdparams",
|
||||
"DenseNet161":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DenseNet161_pretrained.pdparams",
|
||||
"DenseNet169":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DenseNet169_pretrained.pdparams",
|
||||
"DenseNet201":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DenseNet201_pretrained.pdparams",
|
||||
"DenseNet264":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DenseNet264_pretrained.pdparams",
|
||||
}
|
||||
|
||||
__all__ = list(MODEL_URLS.keys())
|
||||
|
||||
|
||||
class BNACConvLayer(nn.Layer):
|
||||
def __init__(self,
|
||||
num_channels,
|
||||
num_filters,
|
||||
filter_size,
|
||||
stride=1,
|
||||
pad=0,
|
||||
groups=1,
|
||||
act="relu",
|
||||
name=None):
|
||||
super(BNACConvLayer, self).__init__()
|
||||
|
||||
self._batch_norm = BatchNorm(
|
||||
num_channels,
|
||||
act=act,
|
||||
param_attr=ParamAttr(name=name + '_bn_scale'),
|
||||
bias_attr=ParamAttr(name + '_bn_offset'),
|
||||
moving_mean_name=name + '_bn_mean',
|
||||
moving_variance_name=name + '_bn_variance')
|
||||
|
||||
self._conv = Conv2D(
|
||||
in_channels=num_channels,
|
||||
out_channels=num_filters,
|
||||
kernel_size=filter_size,
|
||||
stride=stride,
|
||||
padding=pad,
|
||||
groups=groups,
|
||||
weight_attr=ParamAttr(name=name + "_weights"),
|
||||
bias_attr=False)
|
||||
|
||||
def forward(self, input):
|
||||
y = self._batch_norm(input)
|
||||
y = self._conv(y)
|
||||
return y
|
||||
|
||||
|
||||
class DenseLayer(nn.Layer):
|
||||
def __init__(self, num_channels, growth_rate, bn_size, dropout, name=None):
|
||||
super(DenseLayer, self).__init__()
|
||||
self.dropout = dropout
|
||||
|
||||
self.bn_ac_func1 = BNACConvLayer(
|
||||
num_channels=num_channels,
|
||||
num_filters=bn_size * growth_rate,
|
||||
filter_size=1,
|
||||
pad=0,
|
||||
stride=1,
|
||||
name=name + "_x1")
|
||||
|
||||
self.bn_ac_func2 = BNACConvLayer(
|
||||
num_channels=bn_size * growth_rate,
|
||||
num_filters=growth_rate,
|
||||
filter_size=3,
|
||||
pad=1,
|
||||
stride=1,
|
||||
name=name + "_x2")
|
||||
|
||||
if dropout:
|
||||
self.dropout_func = Dropout(p=dropout, mode="downscale_in_infer")
|
||||
|
||||
def forward(self, input):
|
||||
conv = self.bn_ac_func1(input)
|
||||
conv = self.bn_ac_func2(conv)
|
||||
if self.dropout:
|
||||
conv = self.dropout_func(conv)
|
||||
conv = paddle.concat([input, conv], axis=1)
|
||||
return conv
|
||||
|
||||
|
||||
class DenseBlock(nn.Layer):
|
||||
def __init__(self,
|
||||
num_channels,
|
||||
num_layers,
|
||||
bn_size,
|
||||
growth_rate,
|
||||
dropout,
|
||||
name=None):
|
||||
super(DenseBlock, self).__init__()
|
||||
self.dropout = dropout
|
||||
|
||||
self.dense_layer_func = []
|
||||
|
||||
pre_channel = num_channels
|
||||
for layer in range(num_layers):
|
||||
self.dense_layer_func.append(
|
||||
self.add_sublayer(
|
||||
"{}_{}".format(name, layer + 1),
|
||||
DenseLayer(
|
||||
num_channels=pre_channel,
|
||||
growth_rate=growth_rate,
|
||||
bn_size=bn_size,
|
||||
dropout=dropout,
|
||||
name=name + '_' + str(layer + 1))))
|
||||
pre_channel = pre_channel + growth_rate
|
||||
|
||||
def forward(self, input):
|
||||
conv = input
|
||||
for func in self.dense_layer_func:
|
||||
conv = func(conv)
|
||||
return conv
|
||||
|
||||
|
||||
class TransitionLayer(nn.Layer):
|
||||
def __init__(self, num_channels, num_output_features, name=None):
|
||||
super(TransitionLayer, self).__init__()
|
||||
|
||||
self.conv_ac_func = BNACConvLayer(
|
||||
num_channels=num_channels,
|
||||
num_filters=num_output_features,
|
||||
filter_size=1,
|
||||
pad=0,
|
||||
stride=1,
|
||||
name=name)
|
||||
|
||||
self.pool2d_avg = AvgPool2D(kernel_size=2, stride=2, padding=0)
|
||||
|
||||
def forward(self, input):
|
||||
y = self.conv_ac_func(input)
|
||||
y = self.pool2d_avg(y)
|
||||
return y
|
||||
|
||||
|
||||
class ConvBNLayer(nn.Layer):
|
||||
def __init__(self,
|
||||
num_channels,
|
||||
num_filters,
|
||||
filter_size,
|
||||
stride=1,
|
||||
pad=0,
|
||||
groups=1,
|
||||
act="relu",
|
||||
name=None):
|
||||
super(ConvBNLayer, self).__init__()
|
||||
|
||||
self._conv = Conv2D(
|
||||
in_channels=num_channels,
|
||||
out_channels=num_filters,
|
||||
kernel_size=filter_size,
|
||||
stride=stride,
|
||||
padding=pad,
|
||||
groups=groups,
|
||||
weight_attr=ParamAttr(name=name + "_weights"),
|
||||
bias_attr=False)
|
||||
self._batch_norm = BatchNorm(
|
||||
num_filters,
|
||||
act=act,
|
||||
param_attr=ParamAttr(name=name + '_bn_scale'),
|
||||
bias_attr=ParamAttr(name + '_bn_offset'),
|
||||
moving_mean_name=name + '_bn_mean',
|
||||
moving_variance_name=name + '_bn_variance')
|
||||
|
||||
def forward(self, input):
|
||||
y = self._conv(input)
|
||||
y = self._batch_norm(y)
|
||||
return y
|
||||
|
||||
|
||||
class DenseNet(nn.Layer):
|
||||
def __init__(self, layers=60, bn_size=4, dropout=0, class_num=1000):
|
||||
super(DenseNet, self).__init__()
|
||||
|
||||
supported_layers = [121, 161, 169, 201, 264]
|
||||
assert layers in supported_layers, \
|
||||
"supported layers are {} but input layer is {}".format(
|
||||
supported_layers, layers)
|
||||
densenet_spec = {
|
||||
121: (64, 32, [6, 12, 24, 16]),
|
||||
161: (96, 48, [6, 12, 36, 24]),
|
||||
169: (64, 32, [6, 12, 32, 32]),
|
||||
201: (64, 32, [6, 12, 48, 32]),
|
||||
264: (64, 32, [6, 12, 64, 48])
|
||||
}
|
||||
num_init_features, growth_rate, block_config = densenet_spec[layers]
|
||||
|
||||
self.conv1_func = ConvBNLayer(
|
||||
num_channels=3,
|
||||
num_filters=num_init_features,
|
||||
filter_size=7,
|
||||
stride=2,
|
||||
pad=3,
|
||||
act='relu',
|
||||
name="conv1")
|
||||
|
||||
self.pool2d_max = MaxPool2D(kernel_size=3, stride=2, padding=1)
|
||||
|
||||
self.block_config = block_config
|
||||
|
||||
self.dense_block_func_list = []
|
||||
self.transition_func_list = []
|
||||
pre_num_channels = num_init_features
|
||||
num_features = num_init_features
|
||||
for i, num_layers in enumerate(block_config):
|
||||
self.dense_block_func_list.append(
|
||||
self.add_sublayer(
|
||||
"db_conv_{}".format(i + 2),
|
||||
DenseBlock(
|
||||
num_channels=pre_num_channels,
|
||||
num_layers=num_layers,
|
||||
bn_size=bn_size,
|
||||
growth_rate=growth_rate,
|
||||
dropout=dropout,
|
||||
name='conv' + str(i + 2))))
|
||||
|
||||
num_features = num_features + num_layers * growth_rate
|
||||
pre_num_channels = num_features
|
||||
|
||||
if i != len(block_config) - 1:
|
||||
self.transition_func_list.append(
|
||||
self.add_sublayer(
|
||||
"tr_conv{}_blk".format(i + 2),
|
||||
TransitionLayer(
|
||||
num_channels=pre_num_channels,
|
||||
num_output_features=num_features // 2,
|
||||
name='conv' + str(i + 2) + "_blk")))
|
||||
pre_num_channels = num_features // 2
|
||||
num_features = num_features // 2
|
||||
|
||||
self.batch_norm = BatchNorm(
|
||||
num_features,
|
||||
act="relu",
|
||||
param_attr=ParamAttr(name='conv5_blk_bn_scale'),
|
||||
bias_attr=ParamAttr(name='conv5_blk_bn_offset'),
|
||||
moving_mean_name='conv5_blk_bn_mean',
|
||||
moving_variance_name='conv5_blk_bn_variance')
|
||||
|
||||
self.pool2d_avg = AdaptiveAvgPool2D(1)
|
||||
|
||||
stdv = 1.0 / math.sqrt(num_features * 1.0)
|
||||
|
||||
self.out = Linear(
|
||||
num_features,
|
||||
class_num,
|
||||
weight_attr=ParamAttr(
|
||||
initializer=Uniform(-stdv, stdv), name="fc_weights"),
|
||||
bias_attr=ParamAttr(name="fc_offset"))
|
||||
|
||||
def forward(self, input):
|
||||
conv = self.conv1_func(input)
|
||||
conv = self.pool2d_max(conv)
|
||||
|
||||
for i, num_layers in enumerate(self.block_config):
|
||||
conv = self.dense_block_func_list[i](conv)
|
||||
if i != len(self.block_config) - 1:
|
||||
conv = self.transition_func_list[i](conv)
|
||||
|
||||
conv = self.batch_norm(conv)
|
||||
y = self.pool2d_avg(conv)
|
||||
y = paddle.flatten(y, start_axis=1, stop_axis=-1)
|
||||
y = self.out(y)
|
||||
return y
|
||||
|
||||
|
||||
def _load_pretrained(pretrained, model, model_url, use_ssld=False):
|
||||
if pretrained is False:
|
||||
pass
|
||||
elif pretrained is True:
|
||||
load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
|
||||
elif isinstance(pretrained, str):
|
||||
load_dygraph_pretrain(model, pretrained)
|
||||
else:
|
||||
raise RuntimeError(
|
||||
"pretrained type is not available. Please use `string` or `boolean` type."
|
||||
)
|
||||
|
||||
|
||||
def DenseNet121(pretrained=False, use_ssld=False, **kwargs):
|
||||
model = DenseNet(layers=121, **kwargs)
|
||||
_load_pretrained(
|
||||
pretrained, model, MODEL_URLS["DenseNet121"], use_ssld=use_ssld)
|
||||
return model
|
||||
|
||||
|
||||
def DenseNet161(pretrained=False, use_ssld=False, **kwargs):
|
||||
model = DenseNet(layers=161, **kwargs)
|
||||
_load_pretrained(
|
||||
pretrained, model, MODEL_URLS["DenseNet161"], use_ssld=use_ssld)
|
||||
return model
|
||||
|
||||
|
||||
def DenseNet169(pretrained=False, use_ssld=False, **kwargs):
|
||||
model = DenseNet(layers=169, **kwargs)
|
||||
_load_pretrained(
|
||||
pretrained, model, MODEL_URLS["DenseNet169"], use_ssld=use_ssld)
|
||||
return model
|
||||
|
||||
|
||||
def DenseNet201(pretrained=False, use_ssld=False, **kwargs):
|
||||
model = DenseNet(layers=201, **kwargs)
|
||||
_load_pretrained(
|
||||
pretrained, model, MODEL_URLS["DenseNet201"], use_ssld=use_ssld)
|
||||
return model
|
||||
|
||||
|
||||
def DenseNet264(pretrained=False, use_ssld=False, **kwargs):
|
||||
model = DenseNet(layers=264, **kwargs)
|
||||
_load_pretrained(
|
||||
pretrained, model, MODEL_URLS["DenseNet264"], use_ssld=use_ssld)
|
||||
return model
|
||||
@ -0,0 +1,272 @@
|
||||
# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# Code was heavily based on https://github.com/facebookresearch/deit
|
||||
|
||||
import paddle
|
||||
import paddle.nn as nn
|
||||
from .vision_transformer import VisionTransformer, Identity, trunc_normal_, zeros_
|
||||
|
||||
from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
|
||||
|
||||
MODEL_URLS = {
|
||||
"DeiT_tiny_patch16_224":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DeiT_tiny_patch16_224_pretrained.pdparams",
|
||||
"DeiT_small_patch16_224":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DeiT_small_patch16_224_pretrained.pdparams",
|
||||
"DeiT_base_patch16_224":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DeiT_base_patch16_224_pretrained.pdparams",
|
||||
"DeiT_tiny_distilled_patch16_224":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DeiT_tiny_distilled_patch16_224_pretrained.pdparams",
|
||||
"DeiT_small_distilled_patch16_224":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DeiT_small_distilled_patch16_224_pretrained.pdparams",
|
||||
"DeiT_base_distilled_patch16_224":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DeiT_base_distilled_patch16_224_pretrained.pdparams",
|
||||
"DeiT_base_patch16_384":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DeiT_base_patch16_384_pretrained.pdparams",
|
||||
"DeiT_base_distilled_patch16_384":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DeiT_base_distilled_patch16_384_pretrained.pdparams",
|
||||
}
|
||||
|
||||
__all__ = list(MODEL_URLS.keys())
|
||||
|
||||
|
||||
class DistilledVisionTransformer(VisionTransformer):
|
||||
def __init__(self,
|
||||
img_size=224,
|
||||
patch_size=16,
|
||||
class_num=1000,
|
||||
embed_dim=768,
|
||||
depth=12,
|
||||
num_heads=12,
|
||||
mlp_ratio=4,
|
||||
qkv_bias=False,
|
||||
norm_layer='nn.LayerNorm',
|
||||
epsilon=1e-5,
|
||||
**kwargs):
|
||||
super().__init__(
|
||||
img_size=img_size,
|
||||
patch_size=patch_size,
|
||||
class_num=class_num,
|
||||
embed_dim=embed_dim,
|
||||
depth=depth,
|
||||
num_heads=num_heads,
|
||||
mlp_ratio=mlp_ratio,
|
||||
qkv_bias=qkv_bias,
|
||||
norm_layer=norm_layer,
|
||||
epsilon=epsilon,
|
||||
**kwargs)
|
||||
self.pos_embed = self.create_parameter(
|
||||
shape=(1, self.patch_embed.num_patches + 2, self.embed_dim),
|
||||
default_initializer=zeros_)
|
||||
self.add_parameter("pos_embed", self.pos_embed)
|
||||
|
||||
self.dist_token = self.create_parameter(
|
||||
shape=(1, 1, self.embed_dim), default_initializer=zeros_)
|
||||
self.add_parameter("cls_token", self.cls_token)
|
||||
|
||||
self.head_dist = nn.Linear(
|
||||
self.embed_dim,
|
||||
self.class_num) if self.class_num > 0 else Identity()
|
||||
|
||||
trunc_normal_(self.dist_token)
|
||||
trunc_normal_(self.pos_embed)
|
||||
self.head_dist.apply(self._init_weights)
|
||||
|
||||
def forward_features(self, x):
|
||||
B = paddle.shape(x)[0]
|
||||
x = self.patch_embed(x)
|
||||
|
||||
cls_tokens = self.cls_token.expand((B, -1, -1))
|
||||
dist_token = self.dist_token.expand((B, -1, -1))
|
||||
x = paddle.concat((cls_tokens, dist_token, x), axis=1)
|
||||
|
||||
x = x + self.pos_embed
|
||||
x = self.pos_drop(x)
|
||||
|
||||
for blk in self.blocks:
|
||||
x = blk(x)
|
||||
|
||||
x = self.norm(x)
|
||||
return x[:, 0], x[:, 1]
|
||||
|
||||
def forward(self, x):
|
||||
x, x_dist = self.forward_features(x)
|
||||
x = self.head(x)
|
||||
x_dist = self.head_dist(x_dist)
|
||||
return (x + x_dist) / 2
|
||||
|
||||
|
||||
def _load_pretrained(pretrained, model, model_url, use_ssld=False):
|
||||
if pretrained is False:
|
||||
pass
|
||||
elif pretrained is True:
|
||||
load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
|
||||
elif isinstance(pretrained, str):
|
||||
load_dygraph_pretrain(model, pretrained)
|
||||
else:
|
||||
raise RuntimeError(
|
||||
"pretrained type is not available. Please use `string` or `boolean` type."
|
||||
)
|
||||
|
||||
|
||||
def DeiT_tiny_patch16_224(pretrained=False, use_ssld=False, **kwargs):
|
||||
model = VisionTransformer(
|
||||
patch_size=16,
|
||||
embed_dim=192,
|
||||
depth=12,
|
||||
num_heads=3,
|
||||
mlp_ratio=4,
|
||||
qkv_bias=True,
|
||||
epsilon=1e-6,
|
||||
**kwargs)
|
||||
_load_pretrained(
|
||||
pretrained,
|
||||
model,
|
||||
MODEL_URLS["DeiT_tiny_patch16_224"],
|
||||
use_ssld=use_ssld)
|
||||
return model
|
||||
|
||||
|
||||
def DeiT_small_patch16_224(pretrained=False, use_ssld=False, **kwargs):
|
||||
model = VisionTransformer(
|
||||
patch_size=16,
|
||||
embed_dim=384,
|
||||
depth=12,
|
||||
num_heads=6,
|
||||
mlp_ratio=4,
|
||||
qkv_bias=True,
|
||||
epsilon=1e-6,
|
||||
**kwargs)
|
||||
_load_pretrained(
|
||||
pretrained,
|
||||
model,
|
||||
MODEL_URLS["DeiT_small_patch16_224"],
|
||||
use_ssld=use_ssld)
|
||||
return model
|
||||
|
||||
|
||||
def DeiT_base_patch16_224(pretrained=False, use_ssld=False, **kwargs):
|
||||
model = VisionTransformer(
|
||||
patch_size=16,
|
||||
embed_dim=768,
|
||||
depth=12,
|
||||
num_heads=12,
|
||||
mlp_ratio=4,
|
||||
qkv_bias=True,
|
||||
epsilon=1e-6,
|
||||
**kwargs)
|
||||
_load_pretrained(
|
||||
pretrained,
|
||||
model,
|
||||
MODEL_URLS["DeiT_base_patch16_224"],
|
||||
use_ssld=use_ssld)
|
||||
return model
|
||||
|
||||
|
||||
def DeiT_tiny_distilled_patch16_224(pretrained=False, use_ssld=False,
|
||||
**kwargs):
|
||||
model = DistilledVisionTransformer(
|
||||
patch_size=16,
|
||||
embed_dim=192,
|
||||
depth=12,
|
||||
num_heads=3,
|
||||
mlp_ratio=4,
|
||||
qkv_bias=True,
|
||||
epsilon=1e-6,
|
||||
**kwargs)
|
||||
_load_pretrained(
|
||||
pretrained,
|
||||
model,
|
||||
MODEL_URLS["DeiT_tiny_distilled_patch16_224"],
|
||||
use_ssld=use_ssld)
|
||||
return model
|
||||
|
||||
|
||||
def DeiT_small_distilled_patch16_224(pretrained=False,
|
||||
use_ssld=False,
|
||||
**kwargs):
|
||||
model = DistilledVisionTransformer(
|
||||
patch_size=16,
|
||||
embed_dim=384,
|
||||
depth=12,
|
||||
num_heads=6,
|
||||
mlp_ratio=4,
|
||||
qkv_bias=True,
|
||||
epsilon=1e-6,
|
||||
**kwargs)
|
||||
_load_pretrained(
|
||||
pretrained,
|
||||
model,
|
||||
MODEL_URLS["DeiT_small_distilled_patch16_224"],
|
||||
use_ssld=use_ssld)
|
||||
return model
|
||||
|
||||
|
||||
def DeiT_base_distilled_patch16_224(pretrained=False, use_ssld=False,
|
||||
**kwargs):
|
||||
model = DistilledVisionTransformer(
|
||||
patch_size=16,
|
||||
embed_dim=768,
|
||||
depth=12,
|
||||
num_heads=12,
|
||||
mlp_ratio=4,
|
||||
qkv_bias=True,
|
||||
epsilon=1e-6,
|
||||
**kwargs)
|
||||
_load_pretrained(
|
||||
pretrained,
|
||||
model,
|
||||
MODEL_URLS["DeiT_base_distilled_patch16_224"],
|
||||
use_ssld=use_ssld)
|
||||
return model
|
||||
|
||||
|
||||
def DeiT_base_patch16_384(pretrained=False, use_ssld=False, **kwargs):
|
||||
model = VisionTransformer(
|
||||
img_size=384,
|
||||
patch_size=16,
|
||||
embed_dim=768,
|
||||
depth=12,
|
||||
num_heads=12,
|
||||
mlp_ratio=4,
|
||||
qkv_bias=True,
|
||||
epsilon=1e-6,
|
||||
**kwargs)
|
||||
_load_pretrained(
|
||||
pretrained,
|
||||
model,
|
||||
MODEL_URLS["DeiT_base_patch16_384"],
|
||||
use_ssld=use_ssld)
|
||||
return model
|
||||
|
||||
|
||||
def DeiT_base_distilled_patch16_384(pretrained=False, use_ssld=False,
|
||||
**kwargs):
|
||||
model = DistilledVisionTransformer(
|
||||
img_size=384,
|
||||
patch_size=16,
|
||||
embed_dim=768,
|
||||
depth=12,
|
||||
num_heads=12,
|
||||
mlp_ratio=4,
|
||||
qkv_bias=True,
|
||||
epsilon=1e-6,
|
||||
**kwargs)
|
||||
_load_pretrained(
|
||||
pretrained,
|
||||
model,
|
||||
MODEL_URLS["DeiT_base_distilled_patch16_384"],
|
||||
use_ssld=use_ssld)
|
||||
return model
|
||||
@ -0,0 +1,528 @@
|
||||
# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# Code was based on https://github.com/ucbdrive/dla
|
||||
|
||||
import math
|
||||
|
||||
import paddle
|
||||
import paddle.nn as nn
|
||||
import paddle.nn.functional as F
|
||||
|
||||
from paddle.nn.initializer import Normal, Constant
|
||||
|
||||
from ppcls.arch.backbone.base.theseus_layer import Identity
|
||||
from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
|
||||
|
||||
MODEL_URLS = {
|
||||
"DLA34":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DLA34_pretrained.pdparams",
|
||||
"DLA46_c":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DLA46_c_pretrained.pdparams",
|
||||
"DLA46x_c":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DLA46x_c_pretrained.pdparams",
|
||||
"DLA60":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DLA60_pretrained.pdparams",
|
||||
"DLA60x":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DLA60x_pretrained.pdparams",
|
||||
"DLA60x_c":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DLA60x_c_pretrained.pdparams",
|
||||
"DLA102":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DLA102_pretrained.pdparams",
|
||||
"DLA102x":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DLA102x_pretrained.pdparams",
|
||||
"DLA102x2":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DLA102x2_pretrained.pdparams",
|
||||
"DLA169":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DLA169_pretrained.pdparams"
|
||||
}
|
||||
|
||||
__all__ = MODEL_URLS.keys()
|
||||
|
||||
zeros_ = Constant(value=0.)
|
||||
ones_ = Constant(value=1.)
|
||||
|
||||
|
||||
class DlaBasic(nn.Layer):
|
||||
def __init__(self, inplanes, planes, stride=1, dilation=1, **cargs):
|
||||
super(DlaBasic, self).__init__()
|
||||
self.conv1 = nn.Conv2D(
|
||||
inplanes,
|
||||
planes,
|
||||
kernel_size=3,
|
||||
stride=stride,
|
||||
padding=dilation,
|
||||
bias_attr=False,
|
||||
dilation=dilation)
|
||||
self.bn1 = nn.BatchNorm2D(planes)
|
||||
self.relu = nn.ReLU()
|
||||
self.conv2 = nn.Conv2D(
|
||||
planes,
|
||||
planes,
|
||||
kernel_size=3,
|
||||
stride=1,
|
||||
padding=dilation,
|
||||
bias_attr=False,
|
||||
dilation=dilation)
|
||||
self.bn2 = nn.BatchNorm2D(planes)
|
||||
self.stride = stride
|
||||
|
||||
def forward(self, x, residual=None):
|
||||
if residual is None:
|
||||
residual = x
|
||||
|
||||
out = self.conv1(x)
|
||||
out = self.bn1(out)
|
||||
out = self.relu(out)
|
||||
|
||||
out = self.conv2(out)
|
||||
out = self.bn2(out)
|
||||
|
||||
out += residual
|
||||
out = self.relu(out)
|
||||
|
||||
return out
|
||||
|
||||
|
||||
class DlaBottleneck(nn.Layer):
|
||||
expansion = 2
|
||||
|
||||
def __init__(self,
|
||||
inplanes,
|
||||
outplanes,
|
||||
stride=1,
|
||||
dilation=1,
|
||||
cardinality=1,
|
||||
base_width=64):
|
||||
super(DlaBottleneck, self).__init__()
|
||||
self.stride = stride
|
||||
mid_planes = int(
|
||||
math.floor(outplanes * (base_width / 64)) * cardinality)
|
||||
mid_planes = mid_planes // self.expansion
|
||||
|
||||
self.conv1 = nn.Conv2D(
|
||||
inplanes, mid_planes, kernel_size=1, bias_attr=False)
|
||||
self.bn1 = nn.BatchNorm2D(mid_planes)
|
||||
self.conv2 = nn.Conv2D(
|
||||
mid_planes,
|
||||
mid_planes,
|
||||
kernel_size=3,
|
||||
stride=stride,
|
||||
padding=dilation,
|
||||
bias_attr=False,
|
||||
dilation=dilation,
|
||||
groups=cardinality)
|
||||
self.bn2 = nn.BatchNorm2D(mid_planes)
|
||||
self.conv3 = nn.Conv2D(
|
||||
mid_planes, outplanes, kernel_size=1, bias_attr=False)
|
||||
self.bn3 = nn.BatchNorm2D(outplanes)
|
||||
self.relu = nn.ReLU()
|
||||
|
||||
def forward(self, x, residual=None):
|
||||
if residual is None:
|
||||
residual = x
|
||||
|
||||
out = self.conv1(x)
|
||||
out = self.bn1(out)
|
||||
out = self.relu(out)
|
||||
|
||||
out = self.conv2(out)
|
||||
out = self.bn2(out)
|
||||
out = self.relu(out)
|
||||
|
||||
out = self.conv3(out)
|
||||
out = self.bn3(out)
|
||||
|
||||
out += residual
|
||||
out = self.relu(out)
|
||||
|
||||
return out
|
||||
|
||||
|
||||
class DlaRoot(nn.Layer):
|
||||
def __init__(self, in_channels, out_channels, kernel_size, residual):
|
||||
super(DlaRoot, self).__init__()
|
||||
self.conv = nn.Conv2D(
|
||||
in_channels,
|
||||
out_channels,
|
||||
1,
|
||||
stride=1,
|
||||
bias_attr=False,
|
||||
padding=(kernel_size - 1) // 2)
|
||||
self.bn = nn.BatchNorm2D(out_channels)
|
||||
self.relu = nn.ReLU()
|
||||
self.residual = residual
|
||||
|
||||
def forward(self, *x):
|
||||
children = x
|
||||
x = self.conv(paddle.concat(x, 1))
|
||||
x = self.bn(x)
|
||||
if self.residual:
|
||||
x += children[0]
|
||||
x = self.relu(x)
|
||||
|
||||
return x
|
||||
|
||||
|
||||
class DlaTree(nn.Layer):
|
||||
def __init__(self,
|
||||
levels,
|
||||
block,
|
||||
in_channels,
|
||||
out_channels,
|
||||
stride=1,
|
||||
dilation=1,
|
||||
cardinality=1,
|
||||
base_width=64,
|
||||
level_root=False,
|
||||
root_dim=0,
|
||||
root_kernel_size=1,
|
||||
root_residual=False):
|
||||
super(DlaTree, self).__init__()
|
||||
if root_dim == 0:
|
||||
root_dim = 2 * out_channels
|
||||
if level_root:
|
||||
root_dim += in_channels
|
||||
|
||||
self.downsample = nn.MaxPool2D(
|
||||
stride, stride=stride) if stride > 1 else Identity()
|
||||
self.project = Identity()
|
||||
cargs = dict(
|
||||
dilation=dilation, cardinality=cardinality, base_width=base_width)
|
||||
|
||||
if levels == 1:
|
||||
self.tree1 = block(in_channels, out_channels, stride, **cargs)
|
||||
self.tree2 = block(out_channels, out_channels, 1, **cargs)
|
||||
if in_channels != out_channels:
|
||||
self.project = nn.Sequential(
|
||||
nn.Conv2D(
|
||||
in_channels,
|
||||
out_channels,
|
||||
kernel_size=1,
|
||||
stride=1,
|
||||
bias_attr=False),
|
||||
nn.BatchNorm2D(out_channels))
|
||||
else:
|
||||
cargs.update(
|
||||
dict(
|
||||
root_kernel_size=root_kernel_size,
|
||||
root_residual=root_residual))
|
||||
self.tree1 = DlaTree(
|
||||
levels - 1,
|
||||
block,
|
||||
in_channels,
|
||||
out_channels,
|
||||
stride,
|
||||
root_dim=0,
|
||||
**cargs)
|
||||
self.tree2 = DlaTree(
|
||||
levels - 1,
|
||||
block,
|
||||
out_channels,
|
||||
out_channels,
|
||||
root_dim=root_dim + out_channels,
|
||||
**cargs)
|
||||
|
||||
if levels == 1:
|
||||
self.root = DlaRoot(root_dim, out_channels, root_kernel_size,
|
||||
root_residual)
|
||||
|
||||
self.level_root = level_root
|
||||
self.root_dim = root_dim
|
||||
self.levels = levels
|
||||
|
||||
def forward(self, x, residual=None, children=None):
|
||||
children = [] if children is None else children
|
||||
bottom = self.downsample(x)
|
||||
residual = self.project(bottom)
|
||||
|
||||
if self.level_root:
|
||||
children.append(bottom)
|
||||
x1 = self.tree1(x, residual)
|
||||
|
||||
if self.levels == 1:
|
||||
x2 = self.tree2(x1)
|
||||
x = self.root(x2, x1, *children)
|
||||
else:
|
||||
children.append(x1)
|
||||
x = self.tree2(x1, children=children)
|
||||
return x
|
||||
|
||||
|
||||
class DLA(nn.Layer):
|
||||
def __init__(self,
|
||||
levels,
|
||||
channels,
|
||||
in_chans=3,
|
||||
cardinality=1,
|
||||
base_width=64,
|
||||
block=DlaBottleneck,
|
||||
residual_root=False,
|
||||
drop_rate=0.0,
|
||||
class_num=1000,
|
||||
with_pool=True):
|
||||
super(DLA, self).__init__()
|
||||
self.channels = channels
|
||||
self.class_num = class_num
|
||||
self.with_pool = with_pool
|
||||
self.cardinality = cardinality
|
||||
self.base_width = base_width
|
||||
self.drop_rate = drop_rate
|
||||
|
||||
self.base_layer = nn.Sequential(
|
||||
nn.Conv2D(
|
||||
in_chans,
|
||||
channels[0],
|
||||
kernel_size=7,
|
||||
stride=1,
|
||||
padding=3,
|
||||
bias_attr=False),
|
||||
nn.BatchNorm2D(channels[0]),
|
||||
nn.ReLU())
|
||||
|
||||
self.level0 = self._make_conv_level(channels[0], channels[0],
|
||||
levels[0])
|
||||
self.level1 = self._make_conv_level(
|
||||
channels[0], channels[1], levels[1], stride=2)
|
||||
|
||||
cargs = dict(
|
||||
cardinality=cardinality,
|
||||
base_width=base_width,
|
||||
root_residual=residual_root)
|
||||
|
||||
self.level2 = DlaTree(
|
||||
levels[2],
|
||||
block,
|
||||
channels[1],
|
||||
channels[2],
|
||||
2,
|
||||
level_root=False,
|
||||
**cargs)
|
||||
self.level3 = DlaTree(
|
||||
levels[3],
|
||||
block,
|
||||
channels[2],
|
||||
channels[3],
|
||||
2,
|
||||
level_root=True,
|
||||
**cargs)
|
||||
self.level4 = DlaTree(
|
||||
levels[4],
|
||||
block,
|
||||
channels[3],
|
||||
channels[4],
|
||||
2,
|
||||
level_root=True,
|
||||
**cargs)
|
||||
self.level5 = DlaTree(
|
||||
levels[5],
|
||||
block,
|
||||
channels[4],
|
||||
channels[5],
|
||||
2,
|
||||
level_root=True,
|
||||
**cargs)
|
||||
|
||||
self.feature_info = [
|
||||
# rare to have a meaningful stride 1 level
|
||||
dict(
|
||||
num_chs=channels[0], reduction=1, module='level0'),
|
||||
dict(
|
||||
num_chs=channels[1], reduction=2, module='level1'),
|
||||
dict(
|
||||
num_chs=channels[2], reduction=4, module='level2'),
|
||||
dict(
|
||||
num_chs=channels[3], reduction=8, module='level3'),
|
||||
dict(
|
||||
num_chs=channels[4], reduction=16, module='level4'),
|
||||
dict(
|
||||
num_chs=channels[5], reduction=32, module='level5'),
|
||||
]
|
||||
|
||||
self.num_features = channels[-1]
|
||||
|
||||
if with_pool:
|
||||
self.global_pool = nn.AdaptiveAvgPool2D(1)
|
||||
|
||||
if class_num > 0:
|
||||
self.fc = nn.Conv2D(self.num_features, class_num, 1)
|
||||
|
||||
for m in self.sublayers():
|
||||
if isinstance(m, nn.Conv2D):
|
||||
n = m._kernel_size[0] * m._kernel_size[1] * m._out_channels
|
||||
normal_ = Normal(mean=0.0, std=math.sqrt(2. / n))
|
||||
normal_(m.weight)
|
||||
elif isinstance(m, nn.BatchNorm2D):
|
||||
ones_(m.weight)
|
||||
zeros_(m.bias)
|
||||
|
||||
def _make_conv_level(self, inplanes, planes, convs, stride=1, dilation=1):
|
||||
modules = []
|
||||
for i in range(convs):
|
||||
modules.extend([
|
||||
nn.Conv2D(
|
||||
inplanes,
|
||||
planes,
|
||||
kernel_size=3,
|
||||
stride=stride if i == 0 else 1,
|
||||
padding=dilation,
|
||||
bias_attr=False,
|
||||
dilation=dilation), nn.BatchNorm2D(planes), nn.ReLU()
|
||||
])
|
||||
inplanes = planes
|
||||
return nn.Sequential(*modules)
|
||||
|
||||
def forward_features(self, x):
|
||||
x = self.base_layer(x)
|
||||
|
||||
x = self.level0(x)
|
||||
x = self.level1(x)
|
||||
x = self.level2(x)
|
||||
x = self.level3(x)
|
||||
x = self.level4(x)
|
||||
x = self.level5(x)
|
||||
|
||||
return x
|
||||
|
||||
def forward(self, x):
|
||||
x = self.forward_features(x)
|
||||
|
||||
if self.with_pool:
|
||||
x = self.global_pool(x)
|
||||
|
||||
if self.drop_rate > 0.:
|
||||
x = F.dropout(x, p=self.drop_rate, training=self.training)
|
||||
|
||||
if self.class_num > 0:
|
||||
x = self.fc(x)
|
||||
x = x.flatten(1)
|
||||
|
||||
return x
|
||||
|
||||
|
||||
def _load_pretrained(pretrained, model, model_url, use_ssld=False):
|
||||
if pretrained is False:
|
||||
pass
|
||||
elif pretrained is True:
|
||||
load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
|
||||
elif isinstance(pretrained, str):
|
||||
load_dygraph_pretrain(model, pretrained)
|
||||
else:
|
||||
raise RuntimeError(
|
||||
"pretrained type is not available. Please use `string` or `boolean` type."
|
||||
)
|
||||
|
||||
|
||||
def DLA34(pretrained=False, **kwargs):
|
||||
model = DLA(levels=(1, 1, 1, 2, 2, 1),
|
||||
channels=(16, 32, 64, 128, 256, 512),
|
||||
block=DlaBasic,
|
||||
**kwargs)
|
||||
_load_pretrained(pretrained, model, MODEL_URLS["DLA34"])
|
||||
return model
|
||||
|
||||
|
||||
def DLA46_c(pretrained=False, **kwargs):
|
||||
model = DLA(levels=(1, 1, 1, 2, 2, 1),
|
||||
channels=(16, 32, 64, 64, 128, 256),
|
||||
block=DlaBottleneck,
|
||||
**kwargs)
|
||||
_load_pretrained(pretrained, model, MODEL_URLS["DLA46_c"])
|
||||
return model
|
||||
|
||||
|
||||
def DLA46x_c(pretrained=False, **kwargs):
|
||||
model = DLA(levels=(1, 1, 1, 2, 2, 1),
|
||||
channels=(16, 32, 64, 64, 128, 256),
|
||||
block=DlaBottleneck,
|
||||
cardinality=32,
|
||||
base_width=4,
|
||||
**kwargs)
|
||||
_load_pretrained(pretrained, model, MODEL_URLS["DLA46x_c"])
|
||||
return model
|
||||
|
||||
|
||||
def DLA60(pretrained=False, **kwargs):
|
||||
model = DLA(levels=(1, 1, 1, 2, 3, 1),
|
||||
channels=(16, 32, 128, 256, 512, 1024),
|
||||
block=DlaBottleneck,
|
||||
**kwargs)
|
||||
_load_pretrained(pretrained, model, MODEL_URLS["DLA60"])
|
||||
return model
|
||||
|
||||
|
||||
def DLA60x(pretrained=False, **kwargs):
|
||||
model = DLA(levels=(1, 1, 1, 2, 3, 1),
|
||||
channels=(16, 32, 128, 256, 512, 1024),
|
||||
block=DlaBottleneck,
|
||||
cardinality=32,
|
||||
base_width=4,
|
||||
**kwargs)
|
||||
_load_pretrained(pretrained, model, MODEL_URLS["DLA60x"])
|
||||
return model
|
||||
|
||||
|
||||
def DLA60x_c(pretrained=False, **kwargs):
|
||||
model = DLA(levels=(1, 1, 1, 2, 3, 1),
|
||||
channels=(16, 32, 64, 64, 128, 256),
|
||||
block=DlaBottleneck,
|
||||
cardinality=32,
|
||||
base_width=4,
|
||||
**kwargs)
|
||||
_load_pretrained(pretrained, model, MODEL_URLS["DLA60x_c"])
|
||||
return model
|
||||
|
||||
|
||||
def DLA102(pretrained=False, **kwargs):
|
||||
model = DLA(levels=(1, 1, 1, 3, 4, 1),
|
||||
channels=(16, 32, 128, 256, 512, 1024),
|
||||
block=DlaBottleneck,
|
||||
residual_root=True,
|
||||
**kwargs)
|
||||
_load_pretrained(pretrained, model, MODEL_URLS["DLA102"])
|
||||
return model
|
||||
|
||||
|
||||
def DLA102x(pretrained=False, **kwargs):
|
||||
model = DLA(levels=(1, 1, 1, 3, 4, 1),
|
||||
channels=(16, 32, 128, 256, 512, 1024),
|
||||
block=DlaBottleneck,
|
||||
cardinality=32,
|
||||
base_width=4,
|
||||
residual_root=True,
|
||||
**kwargs)
|
||||
_load_pretrained(pretrained, model, MODEL_URLS["DLA102x"])
|
||||
return model
|
||||
|
||||
|
||||
def DLA102x2(pretrained=False, **kwargs):
|
||||
model = DLA(levels=(1, 1, 1, 3, 4, 1),
|
||||
channels=(16, 32, 128, 256, 512, 1024),
|
||||
block=DlaBottleneck,
|
||||
cardinality=64,
|
||||
base_width=4,
|
||||
residual_root=True,
|
||||
**kwargs)
|
||||
_load_pretrained(pretrained, model, MODEL_URLS["DLA102x2"])
|
||||
return model
|
||||
|
||||
|
||||
def DLA169(pretrained=False, **kwargs):
|
||||
model = DLA(levels=(1, 1, 2, 3, 5, 1),
|
||||
channels=(16, 32, 128, 256, 512, 1024),
|
||||
block=DlaBottleneck,
|
||||
residual_root=True,
|
||||
**kwargs)
|
||||
_load_pretrained(pretrained, model, MODEL_URLS["DLA169"])
|
||||
return model
|
||||
@ -0,0 +1,451 @@
|
||||
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import numpy as np
|
||||
import sys
|
||||
import paddle
|
||||
from paddle import ParamAttr
|
||||
import paddle.nn as nn
|
||||
from paddle.nn import Conv2D, BatchNorm, Linear
|
||||
from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D
|
||||
from paddle.nn.initializer import Uniform
|
||||
|
||||
import math
|
||||
|
||||
from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
|
||||
|
||||
MODEL_URLS = {
|
||||
"DPN68":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DPN68_pretrained.pdparams",
|
||||
"DPN92":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DPN92_pretrained.pdparams",
|
||||
"DPN98":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DPN98_pretrained.pdparams",
|
||||
"DPN107":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DPN107_pretrained.pdparams",
|
||||
"DPN131":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DPN131_pretrained.pdparams",
|
||||
}
|
||||
|
||||
__all__ = list(MODEL_URLS.keys())
|
||||
|
||||
|
||||
class ConvBNLayer(nn.Layer):
|
||||
def __init__(self,
|
||||
num_channels,
|
||||
num_filters,
|
||||
filter_size,
|
||||
stride=1,
|
||||
pad=0,
|
||||
groups=1,
|
||||
act="relu",
|
||||
name=None):
|
||||
super(ConvBNLayer, self).__init__()
|
||||
|
||||
self._conv = Conv2D(
|
||||
in_channels=num_channels,
|
||||
out_channels=num_filters,
|
||||
kernel_size=filter_size,
|
||||
stride=stride,
|
||||
padding=pad,
|
||||
groups=groups,
|
||||
weight_attr=ParamAttr(name=name + "_weights"),
|
||||
bias_attr=False)
|
||||
self._batch_norm = BatchNorm(
|
||||
num_filters,
|
||||
act=act,
|
||||
param_attr=ParamAttr(name=name + '_bn_scale'),
|
||||
bias_attr=ParamAttr(name + '_bn_offset'),
|
||||
moving_mean_name=name + '_bn_mean',
|
||||
moving_variance_name=name + '_bn_variance')
|
||||
|
||||
def forward(self, input):
|
||||
y = self._conv(input)
|
||||
y = self._batch_norm(y)
|
||||
return y
|
||||
|
||||
|
||||
class BNACConvLayer(nn.Layer):
|
||||
def __init__(self,
|
||||
num_channels,
|
||||
num_filters,
|
||||
filter_size,
|
||||
stride=1,
|
||||
pad=0,
|
||||
groups=1,
|
||||
act="relu",
|
||||
name=None):
|
||||
super(BNACConvLayer, self).__init__()
|
||||
self.num_channels = num_channels
|
||||
|
||||
self._batch_norm = BatchNorm(
|
||||
num_channels,
|
||||
act=act,
|
||||
param_attr=ParamAttr(name=name + '_bn_scale'),
|
||||
bias_attr=ParamAttr(name + '_bn_offset'),
|
||||
moving_mean_name=name + '_bn_mean',
|
||||
moving_variance_name=name + '_bn_variance')
|
||||
|
||||
self._conv = Conv2D(
|
||||
in_channels=num_channels,
|
||||
out_channels=num_filters,
|
||||
kernel_size=filter_size,
|
||||
stride=stride,
|
||||
padding=pad,
|
||||
groups=groups,
|
||||
weight_attr=ParamAttr(name=name + "_weights"),
|
||||
bias_attr=False)
|
||||
|
||||
def forward(self, input):
|
||||
y = self._batch_norm(input)
|
||||
y = self._conv(y)
|
||||
return y
|
||||
|
||||
|
||||
class DualPathFactory(nn.Layer):
|
||||
def __init__(self,
|
||||
num_channels,
|
||||
num_1x1_a,
|
||||
num_3x3_b,
|
||||
num_1x1_c,
|
||||
inc,
|
||||
G,
|
||||
_type='normal',
|
||||
name=None):
|
||||
super(DualPathFactory, self).__init__()
|
||||
|
||||
self.num_1x1_c = num_1x1_c
|
||||
self.inc = inc
|
||||
self.name = name
|
||||
|
||||
kw = 3
|
||||
kh = 3
|
||||
pw = (kw - 1) // 2
|
||||
ph = (kh - 1) // 2
|
||||
|
||||
# type
|
||||
if _type == 'proj':
|
||||
key_stride = 1
|
||||
self.has_proj = True
|
||||
elif _type == 'down':
|
||||
key_stride = 2
|
||||
self.has_proj = True
|
||||
elif _type == 'normal':
|
||||
key_stride = 1
|
||||
self.has_proj = False
|
||||
else:
|
||||
print("not implemented now!!!")
|
||||
sys.exit(1)
|
||||
|
||||
data_in_ch = sum(num_channels) if isinstance(num_channels,
|
||||
list) else num_channels
|
||||
|
||||
if self.has_proj:
|
||||
self.c1x1_w_func = BNACConvLayer(
|
||||
num_channels=data_in_ch,
|
||||
num_filters=num_1x1_c + 2 * inc,
|
||||
filter_size=(1, 1),
|
||||
pad=(0, 0),
|
||||
stride=(key_stride, key_stride),
|
||||
name=name + "_match")
|
||||
|
||||
self.c1x1_a_func = BNACConvLayer(
|
||||
num_channels=data_in_ch,
|
||||
num_filters=num_1x1_a,
|
||||
filter_size=(1, 1),
|
||||
pad=(0, 0),
|
||||
name=name + "_conv1")
|
||||
|
||||
self.c3x3_b_func = BNACConvLayer(
|
||||
num_channels=num_1x1_a,
|
||||
num_filters=num_3x3_b,
|
||||
filter_size=(kw, kh),
|
||||
pad=(pw, ph),
|
||||
stride=(key_stride, key_stride),
|
||||
groups=G,
|
||||
name=name + "_conv2")
|
||||
|
||||
self.c1x1_c_func = BNACConvLayer(
|
||||
num_channels=num_3x3_b,
|
||||
num_filters=num_1x1_c + inc,
|
||||
filter_size=(1, 1),
|
||||
pad=(0, 0),
|
||||
name=name + "_conv3")
|
||||
|
||||
def forward(self, input):
|
||||
# PROJ
|
||||
if isinstance(input, list):
|
||||
data_in = paddle.concat([input[0], input[1]], axis=1)
|
||||
else:
|
||||
data_in = input
|
||||
|
||||
if self.has_proj:
|
||||
c1x1_w = self.c1x1_w_func(data_in)
|
||||
data_o1, data_o2 = paddle.split(
|
||||
c1x1_w, num_or_sections=[self.num_1x1_c, 2 * self.inc], axis=1)
|
||||
else:
|
||||
data_o1 = input[0]
|
||||
data_o2 = input[1]
|
||||
|
||||
c1x1_a = self.c1x1_a_func(data_in)
|
||||
c3x3_b = self.c3x3_b_func(c1x1_a)
|
||||
c1x1_c = self.c1x1_c_func(c3x3_b)
|
||||
|
||||
c1x1_c1, c1x1_c2 = paddle.split(
|
||||
c1x1_c, num_or_sections=[self.num_1x1_c, self.inc], axis=1)
|
||||
|
||||
# OUTPUTS
|
||||
summ = paddle.add(x=data_o1, y=c1x1_c1)
|
||||
dense = paddle.concat([data_o2, c1x1_c2], axis=1)
|
||||
# tensor, channels
|
||||
return [summ, dense]
|
||||
|
||||
|
||||
class DPN(nn.Layer):
|
||||
def __init__(self, layers=68, class_num=1000):
|
||||
super(DPN, self).__init__()
|
||||
|
||||
self._class_num = class_num
|
||||
|
||||
args = self.get_net_args(layers)
|
||||
bws = args['bw']
|
||||
inc_sec = args['inc_sec']
|
||||
rs = args['r']
|
||||
k_r = args['k_r']
|
||||
k_sec = args['k_sec']
|
||||
G = args['G']
|
||||
init_num_filter = args['init_num_filter']
|
||||
init_filter_size = args['init_filter_size']
|
||||
init_padding = args['init_padding']
|
||||
|
||||
self.k_sec = k_sec
|
||||
|
||||
self.conv1_x_1_func = ConvBNLayer(
|
||||
num_channels=3,
|
||||
num_filters=init_num_filter,
|
||||
filter_size=init_filter_size,
|
||||
stride=2,
|
||||
pad=init_padding,
|
||||
act='relu',
|
||||
name="conv1")
|
||||
|
||||
self.pool2d_max = MaxPool2D(kernel_size=3, stride=2, padding=1)
|
||||
|
||||
num_channel_dpn = init_num_filter
|
||||
|
||||
self.dpn_func_list = []
|
||||
#conv2 - conv5
|
||||
match_list, num = [], 0
|
||||
for gc in range(4):
|
||||
bw = bws[gc]
|
||||
inc = inc_sec[gc]
|
||||
R = (k_r * bw) // rs[gc]
|
||||
if gc == 0:
|
||||
_type1 = 'proj'
|
||||
_type2 = 'normal'
|
||||
match = 1
|
||||
else:
|
||||
_type1 = 'down'
|
||||
_type2 = 'normal'
|
||||
match = match + k_sec[gc - 1]
|
||||
match_list.append(match)
|
||||
self.dpn_func_list.append(
|
||||
self.add_sublayer(
|
||||
"dpn{}".format(match),
|
||||
DualPathFactory(
|
||||
num_channels=num_channel_dpn,
|
||||
num_1x1_a=R,
|
||||
num_3x3_b=R,
|
||||
num_1x1_c=bw,
|
||||
inc=inc,
|
||||
G=G,
|
||||
_type=_type1,
|
||||
name="dpn" + str(match))))
|
||||
num_channel_dpn = [bw, 3 * inc]
|
||||
|
||||
for i_ly in range(2, k_sec[gc] + 1):
|
||||
num += 1
|
||||
if num in match_list:
|
||||
num += 1
|
||||
self.dpn_func_list.append(
|
||||
self.add_sublayer(
|
||||
"dpn{}".format(num),
|
||||
DualPathFactory(
|
||||
num_channels=num_channel_dpn,
|
||||
num_1x1_a=R,
|
||||
num_3x3_b=R,
|
||||
num_1x1_c=bw,
|
||||
inc=inc,
|
||||
G=G,
|
||||
_type=_type2,
|
||||
name="dpn" + str(num))))
|
||||
|
||||
num_channel_dpn = [
|
||||
num_channel_dpn[0], num_channel_dpn[1] + inc
|
||||
]
|
||||
|
||||
out_channel = sum(num_channel_dpn)
|
||||
|
||||
self.conv5_x_x_bn = BatchNorm(
|
||||
num_channels=sum(num_channel_dpn),
|
||||
act="relu",
|
||||
param_attr=ParamAttr(name='final_concat_bn_scale'),
|
||||
bias_attr=ParamAttr('final_concat_bn_offset'),
|
||||
moving_mean_name='final_concat_bn_mean',
|
||||
moving_variance_name='final_concat_bn_variance')
|
||||
|
||||
self.pool2d_avg = AdaptiveAvgPool2D(1)
|
||||
|
||||
stdv = 0.01
|
||||
|
||||
self.out = Linear(
|
||||
out_channel,
|
||||
class_num,
|
||||
weight_attr=ParamAttr(
|
||||
initializer=Uniform(-stdv, stdv), name="fc_weights"),
|
||||
bias_attr=ParamAttr(name="fc_offset"))
|
||||
|
||||
def forward(self, input):
|
||||
conv1_x_1 = self.conv1_x_1_func(input)
|
||||
convX_x_x = self.pool2d_max(conv1_x_1)
|
||||
|
||||
dpn_idx = 0
|
||||
for gc in range(4):
|
||||
convX_x_x = self.dpn_func_list[dpn_idx](convX_x_x)
|
||||
dpn_idx += 1
|
||||
for i_ly in range(2, self.k_sec[gc] + 1):
|
||||
convX_x_x = self.dpn_func_list[dpn_idx](convX_x_x)
|
||||
dpn_idx += 1
|
||||
|
||||
conv5_x_x = paddle.concat(convX_x_x, axis=1)
|
||||
conv5_x_x = self.conv5_x_x_bn(conv5_x_x)
|
||||
|
||||
y = self.pool2d_avg(conv5_x_x)
|
||||
y = paddle.flatten(y, start_axis=1, stop_axis=-1)
|
||||
y = self.out(y)
|
||||
return y
|
||||
|
||||
def get_net_args(self, layers):
|
||||
if layers == 68:
|
||||
k_r = 128
|
||||
G = 32
|
||||
k_sec = [3, 4, 12, 3]
|
||||
inc_sec = [16, 32, 32, 64]
|
||||
bw = [64, 128, 256, 512]
|
||||
r = [64, 64, 64, 64]
|
||||
init_num_filter = 10
|
||||
init_filter_size = 3
|
||||
init_padding = 1
|
||||
elif layers == 92:
|
||||
k_r = 96
|
||||
G = 32
|
||||
k_sec = [3, 4, 20, 3]
|
||||
inc_sec = [16, 32, 24, 128]
|
||||
bw = [256, 512, 1024, 2048]
|
||||
r = [256, 256, 256, 256]
|
||||
init_num_filter = 64
|
||||
init_filter_size = 7
|
||||
init_padding = 3
|
||||
elif layers == 98:
|
||||
k_r = 160
|
||||
G = 40
|
||||
k_sec = [3, 6, 20, 3]
|
||||
inc_sec = [16, 32, 32, 128]
|
||||
bw = [256, 512, 1024, 2048]
|
||||
r = [256, 256, 256, 256]
|
||||
init_num_filter = 96
|
||||
init_filter_size = 7
|
||||
init_padding = 3
|
||||
elif layers == 107:
|
||||
k_r = 200
|
||||
G = 50
|
||||
k_sec = [4, 8, 20, 3]
|
||||
inc_sec = [20, 64, 64, 128]
|
||||
bw = [256, 512, 1024, 2048]
|
||||
r = [256, 256, 256, 256]
|
||||
init_num_filter = 128
|
||||
init_filter_size = 7
|
||||
init_padding = 3
|
||||
elif layers == 131:
|
||||
k_r = 160
|
||||
G = 40
|
||||
k_sec = [4, 8, 28, 3]
|
||||
inc_sec = [16, 32, 32, 128]
|
||||
bw = [256, 512, 1024, 2048]
|
||||
r = [256, 256, 256, 256]
|
||||
init_num_filter = 128
|
||||
init_filter_size = 7
|
||||
init_padding = 3
|
||||
else:
|
||||
raise NotImplementedError
|
||||
net_arg = {
|
||||
'k_r': k_r,
|
||||
'G': G,
|
||||
'k_sec': k_sec,
|
||||
'inc_sec': inc_sec,
|
||||
'bw': bw,
|
||||
'r': r
|
||||
}
|
||||
net_arg['init_num_filter'] = init_num_filter
|
||||
net_arg['init_filter_size'] = init_filter_size
|
||||
net_arg['init_padding'] = init_padding
|
||||
|
||||
return net_arg
|
||||
|
||||
|
||||
def _load_pretrained(pretrained, model, model_url, use_ssld=False):
|
||||
if pretrained is False:
|
||||
pass
|
||||
elif pretrained is True:
|
||||
load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
|
||||
elif isinstance(pretrained, str):
|
||||
load_dygraph_pretrain(model, pretrained)
|
||||
else:
|
||||
raise RuntimeError(
|
||||
"pretrained type is not available. Please use `string` or `boolean` type."
|
||||
)
|
||||
|
||||
|
||||
def DPN68(pretrained=False, use_ssld=False, **kwargs):
|
||||
model = DPN(layers=68, **kwargs)
|
||||
_load_pretrained(pretrained, model, MODEL_URLS["DPN68"])
|
||||
return model
|
||||
|
||||
|
||||
def DPN92(pretrained=False, use_ssld=False, **kwargs):
|
||||
model = DPN(layers=92, **kwargs)
|
||||
_load_pretrained(pretrained, model, MODEL_URLS["DPN92"])
|
||||
return model
|
||||
|
||||
|
||||
def DPN98(pretrained=False, use_ssld=False, **kwargs):
|
||||
model = DPN(layers=98, **kwargs)
|
||||
_load_pretrained(pretrained, model, MODEL_URLS["DPN98"])
|
||||
return model
|
||||
|
||||
|
||||
def DPN107(pretrained=False, use_ssld=False, **kwargs):
|
||||
model = DPN(layers=107, **kwargs)
|
||||
_load_pretrained(pretrained, model, MODEL_URLS["DPN107"])
|
||||
return model
|
||||
|
||||
|
||||
def DPN131(pretrained=False, use_ssld=False, **kwargs):
|
||||
model = DPN(layers=131, **kwargs)
|
||||
_load_pretrained(pretrained, model, MODEL_URLS["DPN131"])
|
||||
return model
|
||||
@ -0,0 +1,976 @@
|
||||
# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# Code was based on https://github.com/lukemelas/EfficientNet-PyTorch
|
||||
|
||||
import paddle
|
||||
from paddle import ParamAttr
|
||||
import paddle.nn as nn
|
||||
import paddle.nn.functional as F
|
||||
from paddle.nn import Conv2D, BatchNorm, Linear, Dropout
|
||||
from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D
|
||||
import math
|
||||
import collections
|
||||
import re
|
||||
import copy
|
||||
|
||||
from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
|
||||
|
||||
MODEL_URLS = {
|
||||
"EfficientNetB0_small":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/EfficientNetB0_small_pretrained.pdparams",
|
||||
"EfficientNetB0":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/EfficientNetB0_pretrained.pdparams",
|
||||
"EfficientNetB1":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/EfficientNetB1_pretrained.pdparams",
|
||||
"EfficientNetB2":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/EfficientNetB2_pretrained.pdparams",
|
||||
"EfficientNetB3":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/EfficientNetB3_pretrained.pdparams",
|
||||
"EfficientNetB4":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/EfficientNetB4_pretrained.pdparams",
|
||||
"EfficientNetB5":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/EfficientNetB5_pretrained.pdparams",
|
||||
"EfficientNetB6":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/EfficientNetB6_pretrained.pdparams",
|
||||
"EfficientNetB7":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/EfficientNetB7_pretrained.pdparams",
|
||||
}
|
||||
|
||||
__all__ = list(MODEL_URLS.keys())
|
||||
|
||||
GlobalParams = collections.namedtuple('GlobalParams', [
|
||||
'batch_norm_momentum',
|
||||
'batch_norm_epsilon',
|
||||
'dropout_rate',
|
||||
'num_classes',
|
||||
'width_coefficient',
|
||||
'depth_coefficient',
|
||||
'depth_divisor',
|
||||
'min_depth',
|
||||
'drop_connect_rate',
|
||||
])
|
||||
|
||||
BlockArgs = collections.namedtuple('BlockArgs', [
|
||||
'kernel_size', 'num_repeat', 'input_filters', 'output_filters',
|
||||
'expand_ratio', 'id_skip', 'stride', 'se_ratio'
|
||||
])
|
||||
|
||||
GlobalParams.__new__.__defaults__ = (None, ) * len(GlobalParams._fields)
|
||||
BlockArgs.__new__.__defaults__ = (None, ) * len(BlockArgs._fields)
|
||||
|
||||
|
||||
def efficientnet_params(model_name):
|
||||
""" Map EfficientNet model name to parameter coefficients. """
|
||||
params_dict = {
|
||||
# Coefficients: width,depth,resolution,dropout
|
||||
'efficientnet-b0': (1.0, 1.0, 224, 0.2),
|
||||
'efficientnet-b1': (1.0, 1.1, 240, 0.2),
|
||||
'efficientnet-b2': (1.1, 1.2, 260, 0.3),
|
||||
'efficientnet-b3': (1.2, 1.4, 300, 0.3),
|
||||
'efficientnet-b4': (1.4, 1.8, 380, 0.4),
|
||||
'efficientnet-b5': (1.6, 2.2, 456, 0.4),
|
||||
'efficientnet-b6': (1.8, 2.6, 528, 0.5),
|
||||
'efficientnet-b7': (2.0, 3.1, 600, 0.5),
|
||||
}
|
||||
return params_dict[model_name]
|
||||
|
||||
|
||||
def efficientnet(width_coefficient=None,
|
||||
depth_coefficient=None,
|
||||
dropout_rate=0.2,
|
||||
drop_connect_rate=0.2):
|
||||
""" Get block arguments according to parameter and coefficients. """
|
||||
blocks_args = [
|
||||
'r1_k3_s11_e1_i32_o16_se0.25',
|
||||
'r2_k3_s22_e6_i16_o24_se0.25',
|
||||
'r2_k5_s22_e6_i24_o40_se0.25',
|
||||
'r3_k3_s22_e6_i40_o80_se0.25',
|
||||
'r3_k5_s11_e6_i80_o112_se0.25',
|
||||
'r4_k5_s22_e6_i112_o192_se0.25',
|
||||
'r1_k3_s11_e6_i192_o320_se0.25',
|
||||
]
|
||||
blocks_args = BlockDecoder.decode(blocks_args)
|
||||
|
||||
global_params = GlobalParams(
|
||||
batch_norm_momentum=0.99,
|
||||
batch_norm_epsilon=1e-3,
|
||||
dropout_rate=dropout_rate,
|
||||
drop_connect_rate=drop_connect_rate,
|
||||
num_classes=1000,
|
||||
width_coefficient=width_coefficient,
|
||||
depth_coefficient=depth_coefficient,
|
||||
depth_divisor=8,
|
||||
min_depth=None)
|
||||
|
||||
return blocks_args, global_params
|
||||
|
||||
|
||||
def get_model_params(model_name, override_params):
|
||||
""" Get the block args and global params for a given model """
|
||||
if model_name.startswith('efficientnet'):
|
||||
w, d, _, p = efficientnet_params(model_name)
|
||||
blocks_args, global_params = efficientnet(
|
||||
width_coefficient=w, depth_coefficient=d, dropout_rate=p)
|
||||
else:
|
||||
raise NotImplementedError('model name is not pre-defined: %s' %
|
||||
model_name)
|
||||
if override_params:
|
||||
global_params = global_params._replace(**override_params)
|
||||
return blocks_args, global_params
|
||||
|
||||
|
||||
def round_filters(filters, global_params):
|
||||
""" Calculate and round number of filters based on depth multiplier. """
|
||||
multiplier = global_params.width_coefficient
|
||||
if not multiplier:
|
||||
return filters
|
||||
divisor = global_params.depth_divisor
|
||||
min_depth = global_params.min_depth
|
||||
filters *= multiplier
|
||||
min_depth = min_depth or divisor
|
||||
new_filters = max(min_depth,
|
||||
int(filters + divisor / 2) // divisor * divisor)
|
||||
if new_filters < 0.9 * filters: # prevent rounding by more than 10%
|
||||
new_filters += divisor
|
||||
return int(new_filters)
|
||||
|
||||
|
||||
def round_repeats(repeats, global_params):
|
||||
""" Round number of filters based on depth multiplier. """
|
||||
multiplier = global_params.depth_coefficient
|
||||
if not multiplier:
|
||||
return repeats
|
||||
return int(math.ceil(multiplier * repeats))
|
||||
|
||||
|
||||
class BlockDecoder(object):
|
||||
"""
|
||||
Block Decoder, straight from the official TensorFlow repository.
|
||||
"""
|
||||
|
||||
@staticmethod
|
||||
def _decode_block_string(block_string):
|
||||
""" Gets a block through a string notation of arguments. """
|
||||
assert isinstance(block_string, str)
|
||||
|
||||
ops = block_string.split('_')
|
||||
options = {}
|
||||
for op in ops:
|
||||
splits = re.split(r'(\d.*)', op)
|
||||
if len(splits) >= 2:
|
||||
key, value = splits[:2]
|
||||
options[key] = value
|
||||
|
||||
# Check stride
|
||||
cond_1 = ('s' in options and len(options['s']) == 1)
|
||||
cond_2 = ((len(options['s']) == 2) and
|
||||
(options['s'][0] == options['s'][1]))
|
||||
assert (cond_1 or cond_2)
|
||||
|
||||
return BlockArgs(
|
||||
kernel_size=int(options['k']),
|
||||
num_repeat=int(options['r']),
|
||||
input_filters=int(options['i']),
|
||||
output_filters=int(options['o']),
|
||||
expand_ratio=int(options['e']),
|
||||
id_skip=('noskip' not in block_string),
|
||||
se_ratio=float(options['se']) if 'se' in options else None,
|
||||
stride=[int(options['s'][0])])
|
||||
|
||||
@staticmethod
|
||||
def _encode_block_string(block):
|
||||
"""Encodes a block to a string."""
|
||||
args = [
|
||||
'r%d' % block.num_repeat, 'k%d' % block.kernel_size, 's%d%d' %
|
||||
(block.strides[0], block.strides[1]), 'e%s' % block.expand_ratio,
|
||||
'i%d' % block.input_filters, 'o%d' % block.output_filters
|
||||
]
|
||||
if 0 < block.se_ratio <= 1:
|
||||
args.append('se%s' % block.se_ratio)
|
||||
if block.id_skip is False:
|
||||
args.append('noskip')
|
||||
return '_'.join(args)
|
||||
|
||||
@staticmethod
|
||||
def decode(string_list):
|
||||
"""
|
||||
Decode a list of string notations to specify blocks in the network.
|
||||
|
||||
string_list: list of strings, each string is a notation of block
|
||||
return
|
||||
list of BlockArgs namedtuples of block args
|
||||
"""
|
||||
assert isinstance(string_list, list)
|
||||
blocks_args = []
|
||||
for block_string in string_list:
|
||||
blocks_args.append(BlockDecoder._decode_block_string(block_string))
|
||||
return blocks_args
|
||||
|
||||
@staticmethod
|
||||
def encode(blocks_args):
|
||||
"""
|
||||
Encodes a list of BlockArgs to a list of strings.
|
||||
|
||||
:param blocks_args: a list of BlockArgs namedtuples of block args
|
||||
:return: a list of strings, each string is a notation of block
|
||||
"""
|
||||
block_strings = []
|
||||
for block in blocks_args:
|
||||
block_strings.append(BlockDecoder._encode_block_string(block))
|
||||
return block_strings
|
||||
|
||||
|
||||
def initial_type(name, use_bias=False):
|
||||
param_attr = ParamAttr(name=name + "_weights")
|
||||
if use_bias:
|
||||
bias_attr = ParamAttr(name=name + "_offset")
|
||||
else:
|
||||
bias_attr = False
|
||||
return param_attr, bias_attr
|
||||
|
||||
|
||||
def init_batch_norm_layer(name="batch_norm"):
|
||||
param_attr = ParamAttr(name=name + "_scale")
|
||||
bias_attr = ParamAttr(name=name + "_offset")
|
||||
return param_attr, bias_attr
|
||||
|
||||
|
||||
def init_fc_layer(name="fc"):
|
||||
param_attr = ParamAttr(name=name + "_weights")
|
||||
bias_attr = ParamAttr(name=name + "_offset")
|
||||
return param_attr, bias_attr
|
||||
|
||||
|
||||
def cal_padding(img_size, stride, filter_size, dilation=1):
|
||||
"""Calculate padding size."""
|
||||
if img_size % stride == 0:
|
||||
out_size = max(filter_size - stride, 0)
|
||||
else:
|
||||
out_size = max(filter_size - (img_size % stride), 0)
|
||||
return out_size // 2, out_size - out_size // 2
|
||||
|
||||
|
||||
inp_shape = {
|
||||
"b0_small": [224, 112, 112, 56, 28, 14, 14, 7],
|
||||
"b0": [224, 112, 112, 56, 28, 14, 14, 7],
|
||||
"b1": [240, 120, 120, 60, 30, 15, 15, 8],
|
||||
"b2": [260, 130, 130, 65, 33, 17, 17, 9],
|
||||
"b3": [300, 150, 150, 75, 38, 19, 19, 10],
|
||||
"b4": [380, 190, 190, 95, 48, 24, 24, 12],
|
||||
"b5": [456, 228, 228, 114, 57, 29, 29, 15],
|
||||
"b6": [528, 264, 264, 132, 66, 33, 33, 17],
|
||||
"b7": [600, 300, 300, 150, 75, 38, 38, 19]
|
||||
}
|
||||
|
||||
|
||||
def _drop_connect(inputs, prob, is_test):
|
||||
if is_test:
|
||||
output = inputs
|
||||
else:
|
||||
keep_prob = 1.0 - prob
|
||||
inputs_shape = paddle.shape(inputs)
|
||||
random_tensor = keep_prob + paddle.rand(
|
||||
shape=[inputs_shape[0], 1, 1, 1])
|
||||
binary_tensor = paddle.floor(random_tensor)
|
||||
output = paddle.multiply(inputs, binary_tensor) / keep_prob
|
||||
return output
|
||||
|
||||
|
||||
class Conv2ds(nn.Layer):
|
||||
def __init__(self,
|
||||
input_channels,
|
||||
output_channels,
|
||||
filter_size,
|
||||
stride=1,
|
||||
padding=0,
|
||||
groups=None,
|
||||
name="conv2d",
|
||||
act=None,
|
||||
use_bias=False,
|
||||
padding_type=None,
|
||||
model_name=None,
|
||||
cur_stage=None):
|
||||
super(Conv2ds, self).__init__()
|
||||
assert act in [None, "swish", "sigmoid"]
|
||||
self.act = act
|
||||
|
||||
param_attr, bias_attr = initial_type(name=name, use_bias=use_bias)
|
||||
|
||||
def get_padding(filter_size, stride=1, dilation=1):
|
||||
padding = ((stride - 1) + dilation * (filter_size - 1)) // 2
|
||||
return padding
|
||||
|
||||
inps = 1 if model_name == None and cur_stage == None else inp_shape[
|
||||
model_name][cur_stage]
|
||||
self.need_crop = False
|
||||
if padding_type == "SAME":
|
||||
top_padding, bottom_padding = cal_padding(inps, stride,
|
||||
filter_size)
|
||||
left_padding, right_padding = cal_padding(inps, stride,
|
||||
filter_size)
|
||||
height_padding = bottom_padding
|
||||
width_padding = right_padding
|
||||
if top_padding != bottom_padding or left_padding != right_padding:
|
||||
height_padding = top_padding + stride
|
||||
width_padding = left_padding + stride
|
||||
self.need_crop = True
|
||||
padding = [height_padding, width_padding]
|
||||
elif padding_type == "VALID":
|
||||
height_padding = 0
|
||||
width_padding = 0
|
||||
padding = [height_padding, width_padding]
|
||||
elif padding_type == "DYNAMIC":
|
||||
padding = get_padding(filter_size, stride)
|
||||
else:
|
||||
padding = padding_type
|
||||
|
||||
groups = 1 if groups is None else groups
|
||||
self._conv = Conv2D(
|
||||
input_channels,
|
||||
output_channels,
|
||||
filter_size,
|
||||
groups=groups,
|
||||
stride=stride,
|
||||
# act=act,
|
||||
padding=padding,
|
||||
weight_attr=param_attr,
|
||||
bias_attr=bias_attr)
|
||||
|
||||
def forward(self, inputs):
|
||||
x = self._conv(inputs)
|
||||
if self.act == "swish":
|
||||
x = F.swish(x)
|
||||
elif self.act == "sigmoid":
|
||||
x = F.sigmoid(x)
|
||||
|
||||
if self.need_crop:
|
||||
x = x[:, :, 1:, 1:]
|
||||
return x
|
||||
|
||||
|
||||
class ConvBNLayer(nn.Layer):
|
||||
def __init__(self,
|
||||
input_channels,
|
||||
filter_size,
|
||||
output_channels,
|
||||
stride=1,
|
||||
num_groups=1,
|
||||
padding_type="SAME",
|
||||
conv_act=None,
|
||||
bn_act="swish",
|
||||
use_bn=True,
|
||||
use_bias=False,
|
||||
name=None,
|
||||
conv_name=None,
|
||||
bn_name=None,
|
||||
model_name=None,
|
||||
cur_stage=None):
|
||||
super(ConvBNLayer, self).__init__()
|
||||
|
||||
self._conv = Conv2ds(
|
||||
input_channels=input_channels,
|
||||
output_channels=output_channels,
|
||||
filter_size=filter_size,
|
||||
stride=stride,
|
||||
groups=num_groups,
|
||||
act=conv_act,
|
||||
padding_type=padding_type,
|
||||
name=conv_name,
|
||||
use_bias=use_bias,
|
||||
model_name=model_name,
|
||||
cur_stage=cur_stage)
|
||||
self.use_bn = use_bn
|
||||
if use_bn is True:
|
||||
bn_name = name + bn_name
|
||||
param_attr, bias_attr = init_batch_norm_layer(bn_name)
|
||||
|
||||
self._bn = BatchNorm(
|
||||
num_channels=output_channels,
|
||||
act=bn_act,
|
||||
momentum=0.99,
|
||||
epsilon=0.001,
|
||||
moving_mean_name=bn_name + "_mean",
|
||||
moving_variance_name=bn_name + "_variance",
|
||||
param_attr=param_attr,
|
||||
bias_attr=bias_attr)
|
||||
|
||||
def forward(self, inputs):
|
||||
if self.use_bn:
|
||||
x = self._conv(inputs)
|
||||
x = self._bn(x)
|
||||
return x
|
||||
else:
|
||||
return self._conv(inputs)
|
||||
|
||||
|
||||
class ExpandConvNorm(nn.Layer):
|
||||
def __init__(self,
|
||||
input_channels,
|
||||
block_args,
|
||||
padding_type,
|
||||
name=None,
|
||||
model_name=None,
|
||||
cur_stage=None):
|
||||
super(ExpandConvNorm, self).__init__()
|
||||
|
||||
self.oup = block_args.input_filters * block_args.expand_ratio
|
||||
self.expand_ratio = block_args.expand_ratio
|
||||
|
||||
if self.expand_ratio != 1:
|
||||
self._conv = ConvBNLayer(
|
||||
input_channels,
|
||||
1,
|
||||
self.oup,
|
||||
bn_act=None,
|
||||
padding_type=padding_type,
|
||||
name=name,
|
||||
conv_name=name + "_expand_conv",
|
||||
bn_name="_bn0",
|
||||
model_name=model_name,
|
||||
cur_stage=cur_stage)
|
||||
|
||||
def forward(self, inputs):
|
||||
if self.expand_ratio != 1:
|
||||
return self._conv(inputs)
|
||||
else:
|
||||
return inputs
|
||||
|
||||
|
||||
class DepthwiseConvNorm(nn.Layer):
|
||||
def __init__(self,
|
||||
input_channels,
|
||||
block_args,
|
||||
padding_type,
|
||||
name=None,
|
||||
model_name=None,
|
||||
cur_stage=None):
|
||||
super(DepthwiseConvNorm, self).__init__()
|
||||
|
||||
self.k = block_args.kernel_size
|
||||
self.s = block_args.stride
|
||||
if isinstance(self.s, list) or isinstance(self.s, tuple):
|
||||
self.s = self.s[0]
|
||||
oup = block_args.input_filters * block_args.expand_ratio
|
||||
|
||||
self._conv = ConvBNLayer(
|
||||
input_channels,
|
||||
self.k,
|
||||
oup,
|
||||
self.s,
|
||||
num_groups=input_channels,
|
||||
bn_act=None,
|
||||
padding_type=padding_type,
|
||||
name=name,
|
||||
conv_name=name + "_depthwise_conv",
|
||||
bn_name="_bn1",
|
||||
model_name=model_name,
|
||||
cur_stage=cur_stage)
|
||||
|
||||
def forward(self, inputs):
|
||||
return self._conv(inputs)
|
||||
|
||||
|
||||
class ProjectConvNorm(nn.Layer):
|
||||
def __init__(self,
|
||||
input_channels,
|
||||
block_args,
|
||||
padding_type,
|
||||
name=None,
|
||||
model_name=None,
|
||||
cur_stage=None):
|
||||
super(ProjectConvNorm, self).__init__()
|
||||
|
||||
final_oup = block_args.output_filters
|
||||
|
||||
self._conv = ConvBNLayer(
|
||||
input_channels,
|
||||
1,
|
||||
final_oup,
|
||||
bn_act=None,
|
||||
padding_type=padding_type,
|
||||
name=name,
|
||||
conv_name=name + "_project_conv",
|
||||
bn_name="_bn2",
|
||||
model_name=model_name,
|
||||
cur_stage=cur_stage)
|
||||
|
||||
def forward(self, inputs):
|
||||
return self._conv(inputs)
|
||||
|
||||
|
||||
class SEBlock(nn.Layer):
|
||||
def __init__(self,
|
||||
input_channels,
|
||||
num_squeezed_channels,
|
||||
oup,
|
||||
padding_type,
|
||||
name=None,
|
||||
model_name=None,
|
||||
cur_stage=None):
|
||||
super(SEBlock, self).__init__()
|
||||
|
||||
self._pool = AdaptiveAvgPool2D(1)
|
||||
self._conv1 = Conv2ds(
|
||||
input_channels,
|
||||
num_squeezed_channels,
|
||||
1,
|
||||
use_bias=True,
|
||||
padding_type=padding_type,
|
||||
act="swish",
|
||||
name=name + "_se_reduce")
|
||||
|
||||
self._conv2 = Conv2ds(
|
||||
num_squeezed_channels,
|
||||
oup,
|
||||
1,
|
||||
act="sigmoid",
|
||||
use_bias=True,
|
||||
padding_type=padding_type,
|
||||
name=name + "_se_expand")
|
||||
|
||||
def forward(self, inputs):
|
||||
x = self._pool(inputs)
|
||||
x = self._conv1(x)
|
||||
x = self._conv2(x)
|
||||
out = paddle.multiply(inputs, x)
|
||||
return out
|
||||
|
||||
|
||||
class MbConvBlock(nn.Layer):
|
||||
def __init__(self,
|
||||
input_channels,
|
||||
block_args,
|
||||
padding_type,
|
||||
use_se,
|
||||
name=None,
|
||||
drop_connect_rate=None,
|
||||
model_name=None,
|
||||
cur_stage=None):
|
||||
super(MbConvBlock, self).__init__()
|
||||
|
||||
oup = block_args.input_filters * block_args.expand_ratio
|
||||
self.block_args = block_args
|
||||
self.has_se = use_se and (block_args.se_ratio is not None) and (
|
||||
0 < block_args.se_ratio <= 1)
|
||||
self.id_skip = block_args.id_skip
|
||||
self.expand_ratio = block_args.expand_ratio
|
||||
self.drop_connect_rate = drop_connect_rate
|
||||
|
||||
if self.expand_ratio != 1:
|
||||
self._ecn = ExpandConvNorm(
|
||||
input_channels,
|
||||
block_args,
|
||||
padding_type=padding_type,
|
||||
name=name,
|
||||
model_name=model_name,
|
||||
cur_stage=cur_stage)
|
||||
|
||||
self._dcn = DepthwiseConvNorm(
|
||||
input_channels * block_args.expand_ratio,
|
||||
block_args,
|
||||
padding_type=padding_type,
|
||||
name=name,
|
||||
model_name=model_name,
|
||||
cur_stage=cur_stage)
|
||||
|
||||
if self.has_se:
|
||||
num_squeezed_channels = max(
|
||||
1, int(block_args.input_filters * block_args.se_ratio))
|
||||
self._se = SEBlock(
|
||||
input_channels * block_args.expand_ratio,
|
||||
num_squeezed_channels,
|
||||
oup,
|
||||
padding_type=padding_type,
|
||||
name=name,
|
||||
model_name=model_name,
|
||||
cur_stage=cur_stage)
|
||||
|
||||
self._pcn = ProjectConvNorm(
|
||||
input_channels * block_args.expand_ratio,
|
||||
block_args,
|
||||
padding_type=padding_type,
|
||||
name=name,
|
||||
model_name=model_name,
|
||||
cur_stage=cur_stage)
|
||||
|
||||
def forward(self, inputs):
|
||||
x = inputs
|
||||
if self.expand_ratio != 1:
|
||||
x = self._ecn(x)
|
||||
x = F.swish(x)
|
||||
|
||||
x = self._dcn(x)
|
||||
x = F.swish(x)
|
||||
if self.has_se:
|
||||
x = self._se(x)
|
||||
x = self._pcn(x)
|
||||
|
||||
if self.id_skip and \
|
||||
self.block_args.stride == 1 and \
|
||||
self.block_args.input_filters == self.block_args.output_filters:
|
||||
if self.drop_connect_rate:
|
||||
x = _drop_connect(x, self.drop_connect_rate, not self.training)
|
||||
x = paddle.add(x, inputs)
|
||||
return x
|
||||
|
||||
|
||||
class ConvStemNorm(nn.Layer):
|
||||
def __init__(self,
|
||||
input_channels,
|
||||
padding_type,
|
||||
_global_params,
|
||||
name=None,
|
||||
model_name=None,
|
||||
cur_stage=None):
|
||||
super(ConvStemNorm, self).__init__()
|
||||
|
||||
output_channels = round_filters(32, _global_params)
|
||||
self._conv = ConvBNLayer(
|
||||
input_channels,
|
||||
filter_size=3,
|
||||
output_channels=output_channels,
|
||||
stride=2,
|
||||
bn_act=None,
|
||||
padding_type=padding_type,
|
||||
name="",
|
||||
conv_name="_conv_stem",
|
||||
bn_name="_bn0",
|
||||
model_name=model_name,
|
||||
cur_stage=cur_stage)
|
||||
|
||||
def forward(self, inputs):
|
||||
return self._conv(inputs)
|
||||
|
||||
|
||||
class ExtractFeatures(nn.Layer):
|
||||
def __init__(self,
|
||||
input_channels,
|
||||
_block_args,
|
||||
_global_params,
|
||||
padding_type,
|
||||
use_se,
|
||||
model_name=None):
|
||||
super(ExtractFeatures, self).__init__()
|
||||
|
||||
self._global_params = _global_params
|
||||
|
||||
self._conv_stem = ConvStemNorm(
|
||||
input_channels,
|
||||
padding_type=padding_type,
|
||||
_global_params=_global_params,
|
||||
model_name=model_name,
|
||||
cur_stage=0)
|
||||
|
||||
self.block_args_copy = copy.deepcopy(_block_args)
|
||||
idx = 0
|
||||
block_size = 0
|
||||
for block_arg in self.block_args_copy:
|
||||
block_arg = block_arg._replace(
|
||||
input_filters=round_filters(block_arg.input_filters,
|
||||
_global_params),
|
||||
output_filters=round_filters(block_arg.output_filters,
|
||||
_global_params),
|
||||
num_repeat=round_repeats(block_arg.num_repeat, _global_params))
|
||||
block_size += 1
|
||||
for _ in range(block_arg.num_repeat - 1):
|
||||
block_size += 1
|
||||
|
||||
self.conv_seq = []
|
||||
cur_stage = 1
|
||||
for block_args in _block_args:
|
||||
block_args = block_args._replace(
|
||||
input_filters=round_filters(block_args.input_filters,
|
||||
_global_params),
|
||||
output_filters=round_filters(block_args.output_filters,
|
||||
_global_params),
|
||||
num_repeat=round_repeats(block_args.num_repeat,
|
||||
_global_params))
|
||||
|
||||
drop_connect_rate = self._global_params.drop_connect_rate
|
||||
if drop_connect_rate:
|
||||
drop_connect_rate *= float(idx) / block_size
|
||||
|
||||
_mc_block = self.add_sublayer(
|
||||
"_blocks." + str(idx) + ".",
|
||||
MbConvBlock(
|
||||
block_args.input_filters,
|
||||
block_args=block_args,
|
||||
padding_type=padding_type,
|
||||
use_se=use_se,
|
||||
name="_blocks." + str(idx) + ".",
|
||||
drop_connect_rate=drop_connect_rate,
|
||||
model_name=model_name,
|
||||
cur_stage=cur_stage))
|
||||
self.conv_seq.append(_mc_block)
|
||||
idx += 1
|
||||
if block_args.num_repeat > 1:
|
||||
block_args = block_args._replace(
|
||||
input_filters=block_args.output_filters, stride=1)
|
||||
for _ in range(block_args.num_repeat - 1):
|
||||
drop_connect_rate = self._global_params.drop_connect_rate
|
||||
if drop_connect_rate:
|
||||
drop_connect_rate *= float(idx) / block_size
|
||||
_mc_block = self.add_sublayer(
|
||||
"block." + str(idx) + ".",
|
||||
MbConvBlock(
|
||||
block_args.input_filters,
|
||||
block_args,
|
||||
padding_type=padding_type,
|
||||
use_se=use_se,
|
||||
name="_blocks." + str(idx) + ".",
|
||||
drop_connect_rate=drop_connect_rate,
|
||||
model_name=model_name,
|
||||
cur_stage=cur_stage))
|
||||
self.conv_seq.append(_mc_block)
|
||||
idx += 1
|
||||
cur_stage += 1
|
||||
|
||||
def forward(self, inputs):
|
||||
x = self._conv_stem(inputs)
|
||||
x = F.swish(x)
|
||||
for _mc_block in self.conv_seq:
|
||||
x = _mc_block(x)
|
||||
return x
|
||||
|
||||
|
||||
class EfficientNet(nn.Layer):
|
||||
def __init__(self,
|
||||
name="b0",
|
||||
padding_type="SAME",
|
||||
override_params=None,
|
||||
use_se=True,
|
||||
class_num=1000):
|
||||
super(EfficientNet, self).__init__()
|
||||
|
||||
model_name = 'efficientnet-' + name
|
||||
self.name = name
|
||||
self._block_args, self._global_params = get_model_params(
|
||||
model_name, override_params)
|
||||
self.padding_type = padding_type
|
||||
self.use_se = use_se
|
||||
|
||||
self._ef = ExtractFeatures(
|
||||
3,
|
||||
self._block_args,
|
||||
self._global_params,
|
||||
self.padding_type,
|
||||
self.use_se,
|
||||
model_name=self.name)
|
||||
|
||||
output_channels = round_filters(1280, self._global_params)
|
||||
if name == "b0_small" or name == "b0" or name == "b1":
|
||||
oup = 320
|
||||
elif name == "b2":
|
||||
oup = 352
|
||||
elif name == "b3":
|
||||
oup = 384
|
||||
elif name == "b4":
|
||||
oup = 448
|
||||
elif name == "b5":
|
||||
oup = 512
|
||||
elif name == "b6":
|
||||
oup = 576
|
||||
elif name == "b7":
|
||||
oup = 640
|
||||
self._conv = ConvBNLayer(
|
||||
oup,
|
||||
1,
|
||||
output_channels,
|
||||
bn_act="swish",
|
||||
padding_type=self.padding_type,
|
||||
name="",
|
||||
conv_name="_conv_head",
|
||||
bn_name="_bn1",
|
||||
model_name=self.name,
|
||||
cur_stage=7)
|
||||
self._pool = AdaptiveAvgPool2D(1)
|
||||
|
||||
if self._global_params.dropout_rate:
|
||||
self._drop = Dropout(
|
||||
p=self._global_params.dropout_rate, mode="upscale_in_train")
|
||||
|
||||
param_attr, bias_attr = init_fc_layer("_fc")
|
||||
self._fc = Linear(
|
||||
output_channels,
|
||||
class_num,
|
||||
weight_attr=param_attr,
|
||||
bias_attr=bias_attr)
|
||||
|
||||
def forward(self, inputs):
|
||||
x = self._ef(inputs)
|
||||
x = self._conv(x)
|
||||
x = self._pool(x)
|
||||
if self._global_params.dropout_rate:
|
||||
x = self._drop(x)
|
||||
x = paddle.squeeze(x, axis=[2, 3])
|
||||
x = self._fc(x)
|
||||
return x
|
||||
|
||||
|
||||
def _load_pretrained(pretrained, model, model_url, use_ssld=False):
|
||||
if pretrained is False:
|
||||
pass
|
||||
elif pretrained is True:
|
||||
load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
|
||||
elif isinstance(pretrained, str):
|
||||
load_dygraph_pretrain(model, pretrained)
|
||||
else:
|
||||
raise RuntimeError(
|
||||
"pretrained type is not available. Please use `string` or `boolean` type."
|
||||
)
|
||||
|
||||
|
||||
def EfficientNetB0_small(padding_type='DYNAMIC',
|
||||
override_params=None,
|
||||
use_se=False,
|
||||
pretrained=False,
|
||||
use_ssld=False,
|
||||
**kwargs):
|
||||
model = EfficientNet(
|
||||
name='b0',
|
||||
padding_type=padding_type,
|
||||
override_params=override_params,
|
||||
use_se=use_se,
|
||||
**kwargs)
|
||||
_load_pretrained(pretrained, model, MODEL_URLS["EfficientNetB0_small"])
|
||||
return model
|
||||
|
||||
|
||||
def EfficientNetB0(padding_type='SAME',
|
||||
override_params=None,
|
||||
use_se=True,
|
||||
pretrained=False,
|
||||
use_ssld=False,
|
||||
**kwargs):
|
||||
model = EfficientNet(
|
||||
name='b0',
|
||||
padding_type=padding_type,
|
||||
override_params=override_params,
|
||||
use_se=use_se,
|
||||
**kwargs)
|
||||
_load_pretrained(pretrained, model, MODEL_URLS["EfficientNetB0"])
|
||||
return model
|
||||
|
||||
|
||||
def EfficientNetB1(padding_type='SAME',
|
||||
override_params=None,
|
||||
use_se=True,
|
||||
pretrained=False,
|
||||
use_ssld=False,
|
||||
**kwargs):
|
||||
model = EfficientNet(
|
||||
name='b1',
|
||||
padding_type=padding_type,
|
||||
override_params=override_params,
|
||||
use_se=use_se,
|
||||
**kwargs)
|
||||
_load_pretrained(pretrained, model, MODEL_URLS["EfficientNetB1"])
|
||||
return model
|
||||
|
||||
|
||||
def EfficientNetB2(padding_type='SAME',
|
||||
override_params=None,
|
||||
use_se=True,
|
||||
pretrained=False,
|
||||
use_ssld=False,
|
||||
**kwargs):
|
||||
model = EfficientNet(
|
||||
name='b2',
|
||||
padding_type=padding_type,
|
||||
override_params=override_params,
|
||||
use_se=use_se,
|
||||
**kwargs)
|
||||
_load_pretrained(pretrained, model, MODEL_URLS["EfficientNetB2"])
|
||||
return model
|
||||
|
||||
|
||||
def EfficientNetB3(padding_type='SAME',
|
||||
override_params=None,
|
||||
use_se=True,
|
||||
pretrained=False,
|
||||
use_ssld=False,
|
||||
**kwargs):
|
||||
model = EfficientNet(
|
||||
name='b3',
|
||||
padding_type=padding_type,
|
||||
override_params=override_params,
|
||||
use_se=use_se,
|
||||
**kwargs)
|
||||
_load_pretrained(pretrained, model, MODEL_URLS["EfficientNetB3"])
|
||||
return model
|
||||
|
||||
|
||||
def EfficientNetB4(padding_type='SAME',
|
||||
override_params=None,
|
||||
use_se=True,
|
||||
pretrained=False,
|
||||
use_ssld=False,
|
||||
**kwargs):
|
||||
model = EfficientNet(
|
||||
name='b4',
|
||||
padding_type=padding_type,
|
||||
override_params=override_params,
|
||||
use_se=use_se,
|
||||
**kwargs)
|
||||
_load_pretrained(pretrained, model, MODEL_URLS["EfficientNetB4"])
|
||||
return model
|
||||
|
||||
|
||||
def EfficientNetB5(padding_type='SAME',
|
||||
override_params=None,
|
||||
use_se=True,
|
||||
pretrained=False,
|
||||
use_ssld=False,
|
||||
**kwargs):
|
||||
model = EfficientNet(
|
||||
name='b5',
|
||||
padding_type=padding_type,
|
||||
override_params=override_params,
|
||||
use_se=use_se,
|
||||
**kwargs)
|
||||
_load_pretrained(pretrained, model, MODEL_URLS["EfficientNetB5"])
|
||||
return model
|
||||
|
||||
|
||||
def EfficientNetB6(padding_type='SAME',
|
||||
override_params=None,
|
||||
use_se=True,
|
||||
pretrained=False,
|
||||
use_ssld=False,
|
||||
**kwargs):
|
||||
model = EfficientNet(
|
||||
name='b6',
|
||||
padding_type=padding_type,
|
||||
override_params=override_params,
|
||||
use_se=use_se,
|
||||
**kwargs)
|
||||
_load_pretrained(pretrained, model, MODEL_URLS["EfficientNetB6"])
|
||||
return model
|
||||
|
||||
|
||||
def EfficientNetB7(padding_type='SAME',
|
||||
override_params=None,
|
||||
use_se=True,
|
||||
pretrained=False,
|
||||
use_ssld=False,
|
||||
**kwargs):
|
||||
model = EfficientNet(
|
||||
name='b7',
|
||||
padding_type=padding_type,
|
||||
override_params=override_params,
|
||||
use_se=use_se,
|
||||
**kwargs)
|
||||
_load_pretrained(pretrained, model, MODEL_URLS["EfficientNetB7"])
|
||||
return model
|
||||
@ -0,0 +1,363 @@
|
||||
# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# Code was based on https://github.com/huawei-noah/CV-Backbones/tree/master/ghostnet_pytorch
|
||||
|
||||
import math
|
||||
import paddle
|
||||
from paddle import ParamAttr
|
||||
import paddle.nn as nn
|
||||
import paddle.nn.functional as F
|
||||
from paddle.nn import Conv2D, BatchNorm, AdaptiveAvgPool2D, Linear
|
||||
from paddle.regularizer import L2Decay
|
||||
from paddle.nn.initializer import Uniform, KaimingNormal
|
||||
|
||||
from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
|
||||
|
||||
MODEL_URLS = {
|
||||
"GhostNet_x0_5":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/GhostNet_x0_5_pretrained.pdparams",
|
||||
"GhostNet_x1_0":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/GhostNet_x1_0_pretrained.pdparams",
|
||||
"GhostNet_x1_3":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/GhostNet_x1_3_pretrained.pdparams",
|
||||
}
|
||||
|
||||
__all__ = list(MODEL_URLS.keys())
|
||||
|
||||
|
||||
class ConvBNLayer(nn.Layer):
|
||||
def __init__(self,
|
||||
in_channels,
|
||||
out_channels,
|
||||
kernel_size,
|
||||
stride=1,
|
||||
groups=1,
|
||||
act="relu",
|
||||
name=None):
|
||||
super(ConvBNLayer, self).__init__()
|
||||
self._conv = Conv2D(
|
||||
in_channels=in_channels,
|
||||
out_channels=out_channels,
|
||||
kernel_size=kernel_size,
|
||||
stride=stride,
|
||||
padding=(kernel_size - 1) // 2,
|
||||
groups=groups,
|
||||
weight_attr=ParamAttr(
|
||||
initializer=KaimingNormal(), name=name + "_weights"),
|
||||
bias_attr=False)
|
||||
bn_name = name + "_bn"
|
||||
|
||||
self._batch_norm = BatchNorm(
|
||||
num_channels=out_channels,
|
||||
act=act,
|
||||
param_attr=ParamAttr(
|
||||
name=bn_name + "_scale", regularizer=L2Decay(0.0)),
|
||||
bias_attr=ParamAttr(
|
||||
name=bn_name + "_offset", regularizer=L2Decay(0.0)),
|
||||
moving_mean_name=bn_name + "_mean",
|
||||
moving_variance_name=bn_name + "_variance")
|
||||
|
||||
def forward(self, inputs):
|
||||
y = self._conv(inputs)
|
||||
y = self._batch_norm(y)
|
||||
return y
|
||||
|
||||
|
||||
class SEBlock(nn.Layer):
|
||||
def __init__(self, num_channels, reduction_ratio=4, name=None):
|
||||
super(SEBlock, self).__init__()
|
||||
self.pool2d_gap = AdaptiveAvgPool2D(1)
|
||||
self._num_channels = num_channels
|
||||
stdv = 1.0 / math.sqrt(num_channels * 1.0)
|
||||
med_ch = num_channels // reduction_ratio
|
||||
self.squeeze = Linear(
|
||||
num_channels,
|
||||
med_ch,
|
||||
weight_attr=ParamAttr(
|
||||
initializer=Uniform(-stdv, stdv), name=name + "_1_weights"),
|
||||
bias_attr=ParamAttr(name=name + "_1_offset"))
|
||||
stdv = 1.0 / math.sqrt(med_ch * 1.0)
|
||||
self.excitation = Linear(
|
||||
med_ch,
|
||||
num_channels,
|
||||
weight_attr=ParamAttr(
|
||||
initializer=Uniform(-stdv, stdv), name=name + "_2_weights"),
|
||||
bias_attr=ParamAttr(name=name + "_2_offset"))
|
||||
|
||||
def forward(self, inputs):
|
||||
pool = self.pool2d_gap(inputs)
|
||||
pool = paddle.squeeze(pool, axis=[2, 3])
|
||||
squeeze = self.squeeze(pool)
|
||||
squeeze = F.relu(squeeze)
|
||||
excitation = self.excitation(squeeze)
|
||||
excitation = paddle.clip(x=excitation, min=0, max=1)
|
||||
excitation = paddle.unsqueeze(excitation, axis=[2, 3])
|
||||
out = paddle.multiply(inputs, excitation)
|
||||
return out
|
||||
|
||||
|
||||
class GhostModule(nn.Layer):
|
||||
def __init__(self,
|
||||
in_channels,
|
||||
output_channels,
|
||||
kernel_size=1,
|
||||
ratio=2,
|
||||
dw_size=3,
|
||||
stride=1,
|
||||
relu=True,
|
||||
name=None):
|
||||
super(GhostModule, self).__init__()
|
||||
init_channels = int(math.ceil(output_channels / ratio))
|
||||
new_channels = int(init_channels * (ratio - 1))
|
||||
self.primary_conv = ConvBNLayer(
|
||||
in_channels=in_channels,
|
||||
out_channels=init_channels,
|
||||
kernel_size=kernel_size,
|
||||
stride=stride,
|
||||
groups=1,
|
||||
act="relu" if relu else None,
|
||||
name=name + "_primary_conv")
|
||||
self.cheap_operation = ConvBNLayer(
|
||||
in_channels=init_channels,
|
||||
out_channels=new_channels,
|
||||
kernel_size=dw_size,
|
||||
stride=1,
|
||||
groups=init_channels,
|
||||
act="relu" if relu else None,
|
||||
name=name + "_cheap_operation")
|
||||
|
||||
def forward(self, inputs):
|
||||
x = self.primary_conv(inputs)
|
||||
y = self.cheap_operation(x)
|
||||
out = paddle.concat([x, y], axis=1)
|
||||
return out
|
||||
|
||||
|
||||
class GhostBottleneck(nn.Layer):
|
||||
def __init__(self,
|
||||
in_channels,
|
||||
hidden_dim,
|
||||
output_channels,
|
||||
kernel_size,
|
||||
stride,
|
||||
use_se,
|
||||
name=None):
|
||||
super(GhostBottleneck, self).__init__()
|
||||
self._stride = stride
|
||||
self._use_se = use_se
|
||||
self._num_channels = in_channels
|
||||
self._output_channels = output_channels
|
||||
self.ghost_module_1 = GhostModule(
|
||||
in_channels=in_channels,
|
||||
output_channels=hidden_dim,
|
||||
kernel_size=1,
|
||||
stride=1,
|
||||
relu=True,
|
||||
name=name + "_ghost_module_1")
|
||||
if stride == 2:
|
||||
self.depthwise_conv = ConvBNLayer(
|
||||
in_channels=hidden_dim,
|
||||
out_channels=hidden_dim,
|
||||
kernel_size=kernel_size,
|
||||
stride=stride,
|
||||
groups=hidden_dim,
|
||||
act=None,
|
||||
name=name +
|
||||
"_depthwise_depthwise" # looks strange due to an old typo, will be fixed later.
|
||||
)
|
||||
if use_se:
|
||||
self.se_block = SEBlock(num_channels=hidden_dim, name=name + "_se")
|
||||
self.ghost_module_2 = GhostModule(
|
||||
in_channels=hidden_dim,
|
||||
output_channels=output_channels,
|
||||
kernel_size=1,
|
||||
relu=False,
|
||||
name=name + "_ghost_module_2")
|
||||
if stride != 1 or in_channels != output_channels:
|
||||
self.shortcut_depthwise = ConvBNLayer(
|
||||
in_channels=in_channels,
|
||||
out_channels=in_channels,
|
||||
kernel_size=kernel_size,
|
||||
stride=stride,
|
||||
groups=in_channels,
|
||||
act=None,
|
||||
name=name +
|
||||
"_shortcut_depthwise_depthwise" # looks strange due to an old typo, will be fixed later.
|
||||
)
|
||||
self.shortcut_conv = ConvBNLayer(
|
||||
in_channels=in_channels,
|
||||
out_channels=output_channels,
|
||||
kernel_size=1,
|
||||
stride=1,
|
||||
groups=1,
|
||||
act=None,
|
||||
name=name + "_shortcut_conv")
|
||||
|
||||
def forward(self, inputs):
|
||||
x = self.ghost_module_1(inputs)
|
||||
if self._stride == 2:
|
||||
x = self.depthwise_conv(x)
|
||||
if self._use_se:
|
||||
x = self.se_block(x)
|
||||
x = self.ghost_module_2(x)
|
||||
if self._stride == 1 and self._num_channels == self._output_channels:
|
||||
shortcut = inputs
|
||||
else:
|
||||
shortcut = self.shortcut_depthwise(inputs)
|
||||
shortcut = self.shortcut_conv(shortcut)
|
||||
return paddle.add(x=x, y=shortcut)
|
||||
|
||||
|
||||
class GhostNet(nn.Layer):
|
||||
def __init__(self, scale, class_num=1000):
|
||||
super(GhostNet, self).__init__()
|
||||
self.cfgs = [
|
||||
# k, t, c, SE, s
|
||||
[3, 16, 16, 0, 1],
|
||||
[3, 48, 24, 0, 2],
|
||||
[3, 72, 24, 0, 1],
|
||||
[5, 72, 40, 1, 2],
|
||||
[5, 120, 40, 1, 1],
|
||||
[3, 240, 80, 0, 2],
|
||||
[3, 200, 80, 0, 1],
|
||||
[3, 184, 80, 0, 1],
|
||||
[3, 184, 80, 0, 1],
|
||||
[3, 480, 112, 1, 1],
|
||||
[3, 672, 112, 1, 1],
|
||||
[5, 672, 160, 1, 2],
|
||||
[5, 960, 160, 0, 1],
|
||||
[5, 960, 160, 1, 1],
|
||||
[5, 960, 160, 0, 1],
|
||||
[5, 960, 160, 1, 1]
|
||||
]
|
||||
self.scale = scale
|
||||
output_channels = int(self._make_divisible(16 * self.scale, 4))
|
||||
self.conv1 = ConvBNLayer(
|
||||
in_channels=3,
|
||||
out_channels=output_channels,
|
||||
kernel_size=3,
|
||||
stride=2,
|
||||
groups=1,
|
||||
act="relu",
|
||||
name="conv1")
|
||||
# build inverted residual blocks
|
||||
idx = 0
|
||||
self.ghost_bottleneck_list = []
|
||||
for k, exp_size, c, use_se, s in self.cfgs:
|
||||
in_channels = output_channels
|
||||
output_channels = int(self._make_divisible(c * self.scale, 4))
|
||||
hidden_dim = int(self._make_divisible(exp_size * self.scale, 4))
|
||||
ghost_bottleneck = self.add_sublayer(
|
||||
name="_ghostbottleneck_" + str(idx),
|
||||
sublayer=GhostBottleneck(
|
||||
in_channels=in_channels,
|
||||
hidden_dim=hidden_dim,
|
||||
output_channels=output_channels,
|
||||
kernel_size=k,
|
||||
stride=s,
|
||||
use_se=use_se,
|
||||
name="_ghostbottleneck_" + str(idx)))
|
||||
self.ghost_bottleneck_list.append(ghost_bottleneck)
|
||||
idx += 1
|
||||
# build last several layers
|
||||
in_channels = output_channels
|
||||
output_channels = int(self._make_divisible(exp_size * self.scale, 4))
|
||||
self.conv_last = ConvBNLayer(
|
||||
in_channels=in_channels,
|
||||
out_channels=output_channels,
|
||||
kernel_size=1,
|
||||
stride=1,
|
||||
groups=1,
|
||||
act="relu",
|
||||
name="conv_last")
|
||||
self.pool2d_gap = AdaptiveAvgPool2D(1)
|
||||
in_channels = output_channels
|
||||
self._fc0_output_channels = 1280
|
||||
self.fc_0 = ConvBNLayer(
|
||||
in_channels=in_channels,
|
||||
out_channels=self._fc0_output_channels,
|
||||
kernel_size=1,
|
||||
stride=1,
|
||||
act="relu",
|
||||
name="fc_0")
|
||||
self.dropout = nn.Dropout(p=0.2)
|
||||
stdv = 1.0 / math.sqrt(self._fc0_output_channels * 1.0)
|
||||
self.fc_1 = Linear(
|
||||
self._fc0_output_channels,
|
||||
class_num,
|
||||
weight_attr=ParamAttr(
|
||||
name="fc_1_weights", initializer=Uniform(-stdv, stdv)),
|
||||
bias_attr=ParamAttr(name="fc_1_offset"))
|
||||
|
||||
def forward(self, inputs):
|
||||
x = self.conv1(inputs)
|
||||
for ghost_bottleneck in self.ghost_bottleneck_list:
|
||||
x = ghost_bottleneck(x)
|
||||
x = self.conv_last(x)
|
||||
x = self.pool2d_gap(x)
|
||||
x = self.fc_0(x)
|
||||
x = self.dropout(x)
|
||||
x = paddle.reshape(x, shape=[-1, self._fc0_output_channels])
|
||||
x = self.fc_1(x)
|
||||
return x
|
||||
|
||||
def _make_divisible(self, v, divisor, min_value=None):
|
||||
"""
|
||||
This function is taken from the original tf repo.
|
||||
It ensures that all layers have a channel number that is divisible by 8
|
||||
It can be seen here:
|
||||
https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py
|
||||
"""
|
||||
if min_value is None:
|
||||
min_value = divisor
|
||||
new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
|
||||
# Make sure that round down does not go down by more than 10%.
|
||||
if new_v < 0.9 * v:
|
||||
new_v += divisor
|
||||
return new_v
|
||||
|
||||
|
||||
def _load_pretrained(pretrained, model, model_url, use_ssld=False):
|
||||
if pretrained is False:
|
||||
pass
|
||||
elif pretrained is True:
|
||||
load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
|
||||
elif isinstance(pretrained, str):
|
||||
load_dygraph_pretrain(model, pretrained)
|
||||
else:
|
||||
raise RuntimeError(
|
||||
"pretrained type is not available. Please use `string` or `boolean` type."
|
||||
)
|
||||
|
||||
|
||||
def GhostNet_x0_5(pretrained=False, use_ssld=False, **kwargs):
|
||||
model = GhostNet(scale=0.5, **kwargs)
|
||||
_load_pretrained(
|
||||
pretrained, model, MODEL_URLS["GhostNet_x0_5"], use_ssld=use_ssld)
|
||||
return model
|
||||
|
||||
|
||||
def GhostNet_x1_0(pretrained=False, use_ssld=False, **kwargs):
|
||||
model = GhostNet(scale=1.0, **kwargs)
|
||||
_load_pretrained(
|
||||
pretrained, model, MODEL_URLS["GhostNet_x1_0"], use_ssld=use_ssld)
|
||||
return model
|
||||
|
||||
|
||||
def GhostNet_x1_3(pretrained=False, use_ssld=False, **kwargs):
|
||||
model = GhostNet(scale=1.3, **kwargs)
|
||||
_load_pretrained(
|
||||
pretrained, model, MODEL_URLS["GhostNet_x1_3"], use_ssld=use_ssld)
|
||||
return model
|
||||
@ -0,0 +1,229 @@
|
||||
import paddle
|
||||
from paddle import ParamAttr
|
||||
import paddle.nn as nn
|
||||
import paddle.nn.functional as F
|
||||
from paddle.nn import Conv2D, BatchNorm, Linear, Dropout
|
||||
from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D
|
||||
from paddle.nn.initializer import Uniform
|
||||
|
||||
import math
|
||||
|
||||
from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
|
||||
|
||||
MODEL_URLS = {
|
||||
"GoogLeNet":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/GoogLeNet_pretrained.pdparams",
|
||||
}
|
||||
|
||||
__all__ = list(MODEL_URLS.keys())
|
||||
|
||||
|
||||
def xavier(channels, filter_size, name):
|
||||
stdv = (3.0 / (filter_size**2 * channels))**0.5
|
||||
param_attr = ParamAttr(
|
||||
initializer=Uniform(-stdv, stdv), name=name + "_weights")
|
||||
return param_attr
|
||||
|
||||
|
||||
class ConvLayer(nn.Layer):
|
||||
def __init__(self,
|
||||
num_channels,
|
||||
num_filters,
|
||||
filter_size,
|
||||
stride=1,
|
||||
groups=1,
|
||||
act=None,
|
||||
name=None):
|
||||
super(ConvLayer, self).__init__()
|
||||
|
||||
self._conv = Conv2D(
|
||||
in_channels=num_channels,
|
||||
out_channels=num_filters,
|
||||
kernel_size=filter_size,
|
||||
stride=stride,
|
||||
padding=(filter_size - 1) // 2,
|
||||
groups=groups,
|
||||
weight_attr=ParamAttr(name=name + "_weights"),
|
||||
bias_attr=False)
|
||||
|
||||
def forward(self, inputs):
|
||||
y = self._conv(inputs)
|
||||
return y
|
||||
|
||||
|
||||
class Inception(nn.Layer):
|
||||
def __init__(self,
|
||||
input_channels,
|
||||
output_channels,
|
||||
filter1,
|
||||
filter3R,
|
||||
filter3,
|
||||
filter5R,
|
||||
filter5,
|
||||
proj,
|
||||
name=None):
|
||||
super(Inception, self).__init__()
|
||||
|
||||
self._conv1 = ConvLayer(
|
||||
input_channels, filter1, 1, name="inception_" + name + "_1x1")
|
||||
self._conv3r = ConvLayer(
|
||||
input_channels,
|
||||
filter3R,
|
||||
1,
|
||||
name="inception_" + name + "_3x3_reduce")
|
||||
self._conv3 = ConvLayer(
|
||||
filter3R, filter3, 3, name="inception_" + name + "_3x3")
|
||||
self._conv5r = ConvLayer(
|
||||
input_channels,
|
||||
filter5R,
|
||||
1,
|
||||
name="inception_" + name + "_5x5_reduce")
|
||||
self._conv5 = ConvLayer(
|
||||
filter5R, filter5, 5, name="inception_" + name + "_5x5")
|
||||
self._pool = MaxPool2D(kernel_size=3, stride=1, padding=1)
|
||||
|
||||
self._convprj = ConvLayer(
|
||||
input_channels, proj, 1, name="inception_" + name + "_3x3_proj")
|
||||
|
||||
def forward(self, inputs):
|
||||
conv1 = self._conv1(inputs)
|
||||
|
||||
conv3r = self._conv3r(inputs)
|
||||
conv3 = self._conv3(conv3r)
|
||||
|
||||
conv5r = self._conv5r(inputs)
|
||||
conv5 = self._conv5(conv5r)
|
||||
|
||||
pool = self._pool(inputs)
|
||||
convprj = self._convprj(pool)
|
||||
|
||||
cat = paddle.concat([conv1, conv3, conv5, convprj], axis=1)
|
||||
cat = F.relu(cat)
|
||||
return cat
|
||||
|
||||
|
||||
class GoogLeNetDY(nn.Layer):
|
||||
def __init__(self, class_num=1000):
|
||||
super(GoogLeNetDY, self).__init__()
|
||||
self._conv = ConvLayer(3, 64, 7, 2, name="conv1")
|
||||
self._pool = MaxPool2D(kernel_size=3, stride=2)
|
||||
self._conv_1 = ConvLayer(64, 64, 1, name="conv2_1x1")
|
||||
self._conv_2 = ConvLayer(64, 192, 3, name="conv2_3x3")
|
||||
|
||||
self._ince3a = Inception(
|
||||
192, 192, 64, 96, 128, 16, 32, 32, name="ince3a")
|
||||
self._ince3b = Inception(
|
||||
256, 256, 128, 128, 192, 32, 96, 64, name="ince3b")
|
||||
|
||||
self._ince4a = Inception(
|
||||
480, 480, 192, 96, 208, 16, 48, 64, name="ince4a")
|
||||
self._ince4b = Inception(
|
||||
512, 512, 160, 112, 224, 24, 64, 64, name="ince4b")
|
||||
self._ince4c = Inception(
|
||||
512, 512, 128, 128, 256, 24, 64, 64, name="ince4c")
|
||||
self._ince4d = Inception(
|
||||
512, 512, 112, 144, 288, 32, 64, 64, name="ince4d")
|
||||
self._ince4e = Inception(
|
||||
528, 528, 256, 160, 320, 32, 128, 128, name="ince4e")
|
||||
|
||||
self._ince5a = Inception(
|
||||
832, 832, 256, 160, 320, 32, 128, 128, name="ince5a")
|
||||
self._ince5b = Inception(
|
||||
832, 832, 384, 192, 384, 48, 128, 128, name="ince5b")
|
||||
|
||||
self._pool_5 = AdaptiveAvgPool2D(1)
|
||||
|
||||
self._drop = Dropout(p=0.4, mode="downscale_in_infer")
|
||||
self._fc_out = Linear(
|
||||
1024,
|
||||
class_num,
|
||||
weight_attr=xavier(1024, 1, "out"),
|
||||
bias_attr=ParamAttr(name="out_offset"))
|
||||
self._pool_o1 = AvgPool2D(kernel_size=5, stride=3)
|
||||
self._conv_o1 = ConvLayer(512, 128, 1, name="conv_o1")
|
||||
self._fc_o1 = Linear(
|
||||
1152,
|
||||
1024,
|
||||
weight_attr=xavier(2048, 1, "fc_o1"),
|
||||
bias_attr=ParamAttr(name="fc_o1_offset"))
|
||||
self._drop_o1 = Dropout(p=0.7, mode="downscale_in_infer")
|
||||
self._out1 = Linear(
|
||||
1024,
|
||||
class_num,
|
||||
weight_attr=xavier(1024, 1, "out1"),
|
||||
bias_attr=ParamAttr(name="out1_offset"))
|
||||
self._pool_o2 = AvgPool2D(kernel_size=5, stride=3)
|
||||
self._conv_o2 = ConvLayer(528, 128, 1, name="conv_o2")
|
||||
self._fc_o2 = Linear(
|
||||
1152,
|
||||
1024,
|
||||
weight_attr=xavier(2048, 1, "fc_o2"),
|
||||
bias_attr=ParamAttr(name="fc_o2_offset"))
|
||||
self._drop_o2 = Dropout(p=0.7, mode="downscale_in_infer")
|
||||
self._out2 = Linear(
|
||||
1024,
|
||||
class_num,
|
||||
weight_attr=xavier(1024, 1, "out2"),
|
||||
bias_attr=ParamAttr(name="out2_offset"))
|
||||
|
||||
def forward(self, inputs):
|
||||
x = self._conv(inputs)
|
||||
x = self._pool(x)
|
||||
x = self._conv_1(x)
|
||||
x = self._conv_2(x)
|
||||
x = self._pool(x)
|
||||
|
||||
x = self._ince3a(x)
|
||||
x = self._ince3b(x)
|
||||
x = self._pool(x)
|
||||
|
||||
ince4a = self._ince4a(x)
|
||||
x = self._ince4b(ince4a)
|
||||
x = self._ince4c(x)
|
||||
ince4d = self._ince4d(x)
|
||||
x = self._ince4e(ince4d)
|
||||
x = self._pool(x)
|
||||
|
||||
x = self._ince5a(x)
|
||||
ince5b = self._ince5b(x)
|
||||
|
||||
x = self._pool_5(ince5b)
|
||||
x = self._drop(x)
|
||||
x = paddle.squeeze(x, axis=[2, 3])
|
||||
out = self._fc_out(x)
|
||||
|
||||
x = self._pool_o1(ince4a)
|
||||
x = self._conv_o1(x)
|
||||
x = paddle.flatten(x, start_axis=1, stop_axis=-1)
|
||||
x = self._fc_o1(x)
|
||||
x = F.relu(x)
|
||||
x = self._drop_o1(x)
|
||||
out1 = self._out1(x)
|
||||
|
||||
x = self._pool_o2(ince4d)
|
||||
x = self._conv_o2(x)
|
||||
x = paddle.flatten(x, start_axis=1, stop_axis=-1)
|
||||
x = self._fc_o2(x)
|
||||
x = self._drop_o2(x)
|
||||
out2 = self._out2(x)
|
||||
return [out, out1, out2]
|
||||
|
||||
|
||||
def _load_pretrained(pretrained, model, model_url, use_ssld=False):
|
||||
if pretrained is False:
|
||||
pass
|
||||
elif pretrained is True:
|
||||
load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
|
||||
elif isinstance(pretrained, str):
|
||||
load_dygraph_pretrain(model, pretrained)
|
||||
else:
|
||||
raise RuntimeError(
|
||||
"pretrained type is not available. Please use `string` or `boolean` type."
|
||||
)
|
||||
|
||||
|
||||
def GoogLeNet(pretrained=False, use_ssld=False, **kwargs):
|
||||
model = GoogLeNetDY(**kwargs)
|
||||
_load_pretrained(
|
||||
pretrained, model, MODEL_URLS["GoogLeNet"], use_ssld=use_ssld)
|
||||
return model
|
||||
@ -0,0 +1,693 @@
|
||||
# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# Code was based on https://github.com/Meituan-AutoML/Twins
|
||||
|
||||
from functools import partial
|
||||
|
||||
import paddle
|
||||
import paddle.nn as nn
|
||||
import paddle.nn.functional as F
|
||||
from paddle.regularizer import L2Decay
|
||||
|
||||
from .vision_transformer import trunc_normal_, normal_, zeros_, ones_, to_2tuple, DropPath, Identity, Mlp
|
||||
from .vision_transformer import Block as ViTBlock
|
||||
|
||||
from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
|
||||
|
||||
MODEL_URLS = {
|
||||
"pcpvt_small":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/pcpvt_small_pretrained.pdparams",
|
||||
"pcpvt_base":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/pcpvt_base_pretrained.pdparams",
|
||||
"pcpvt_large":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/pcpvt_large_pretrained.pdparams",
|
||||
"alt_gvt_small":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/alt_gvt_small_pretrained.pdparams",
|
||||
"alt_gvt_base":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/alt_gvt_base_pretrained.pdparams",
|
||||
"alt_gvt_large":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/alt_gvt_large_pretrained.pdparams"
|
||||
}
|
||||
|
||||
__all__ = list(MODEL_URLS.keys())
|
||||
|
||||
|
||||
class GroupAttention(nn.Layer):
|
||||
"""LSA: self attention within a group.
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
dim,
|
||||
num_heads=8,
|
||||
qkv_bias=False,
|
||||
qk_scale=None,
|
||||
attn_drop=0.,
|
||||
proj_drop=0.,
|
||||
ws=1):
|
||||
super().__init__()
|
||||
if ws == 1:
|
||||
raise Exception("ws {ws} should not be 1")
|
||||
if dim % num_heads != 0:
|
||||
raise Exception(
|
||||
"dim {dim} should be divided by num_heads {num_heads}.")
|
||||
|
||||
self.dim = dim
|
||||
self.num_heads = num_heads
|
||||
head_dim = dim // num_heads
|
||||
self.scale = qk_scale or head_dim**-0.5
|
||||
|
||||
self.qkv = nn.Linear(dim, dim * 3, bias_attr=qkv_bias)
|
||||
self.attn_drop = nn.Dropout(attn_drop)
|
||||
self.proj = nn.Linear(dim, dim)
|
||||
self.proj_drop = nn.Dropout(proj_drop)
|
||||
self.ws = ws
|
||||
|
||||
def forward(self, x, H, W):
|
||||
B, N, C = x.shape
|
||||
h_group, w_group = H // self.ws, W // self.ws
|
||||
total_groups = h_group * w_group
|
||||
x = x.reshape([B, h_group, self.ws, w_group, self.ws, C]).transpose(
|
||||
[0, 1, 3, 2, 4, 5])
|
||||
qkv = self.qkv(x).reshape([
|
||||
B, total_groups, self.ws**2, 3, self.num_heads, C // self.num_heads
|
||||
]).transpose([3, 0, 1, 4, 2, 5])
|
||||
q, k, v = qkv[0], qkv[1], qkv[2]
|
||||
attn = paddle.matmul(q, k.transpose([0, 1, 2, 4, 3])) * self.scale
|
||||
|
||||
attn = nn.Softmax(axis=-1)(attn)
|
||||
attn = self.attn_drop(attn)
|
||||
attn = paddle.matmul(attn, v).transpose([0, 1, 3, 2, 4]).reshape(
|
||||
[B, h_group, w_group, self.ws, self.ws, C])
|
||||
|
||||
x = attn.transpose([0, 1, 3, 2, 4, 5]).reshape([B, N, C])
|
||||
x = self.proj(x)
|
||||
x = self.proj_drop(x)
|
||||
return x
|
||||
|
||||
|
||||
class Attention(nn.Layer):
|
||||
"""GSA: using a key to summarize the information for a group to be efficient.
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
dim,
|
||||
num_heads=8,
|
||||
qkv_bias=False,
|
||||
qk_scale=None,
|
||||
attn_drop=0.,
|
||||
proj_drop=0.,
|
||||
sr_ratio=1):
|
||||
super().__init__()
|
||||
assert dim % num_heads == 0, f"dim {dim} should be divided by num_heads {num_heads}."
|
||||
|
||||
self.dim = dim
|
||||
self.num_heads = num_heads
|
||||
head_dim = dim // num_heads
|
||||
self.scale = qk_scale or head_dim**-0.5
|
||||
|
||||
self.q = nn.Linear(dim, dim, bias_attr=qkv_bias)
|
||||
self.kv = nn.Linear(dim, dim * 2, bias_attr=qkv_bias)
|
||||
self.attn_drop = nn.Dropout(attn_drop)
|
||||
self.proj = nn.Linear(dim, dim)
|
||||
self.proj_drop = nn.Dropout(proj_drop)
|
||||
|
||||
self.sr_ratio = sr_ratio
|
||||
if sr_ratio > 1:
|
||||
self.sr = nn.Conv2D(
|
||||
dim, dim, kernel_size=sr_ratio, stride=sr_ratio)
|
||||
self.norm = nn.LayerNorm(dim)
|
||||
|
||||
def forward(self, x, H, W):
|
||||
B, N, C = x.shape
|
||||
q = self.q(x).reshape(
|
||||
[B, N, self.num_heads, C // self.num_heads]).transpose(
|
||||
[0, 2, 1, 3])
|
||||
|
||||
if self.sr_ratio > 1:
|
||||
x_ = x.transpose([0, 2, 1]).reshape([B, C, H, W])
|
||||
tmp_n = H * W // self.sr_ratio**2
|
||||
x_ = self.sr(x_).reshape([B, C, tmp_n]).transpose([0, 2, 1])
|
||||
x_ = self.norm(x_)
|
||||
kv = self.kv(x_).reshape(
|
||||
[B, tmp_n, 2, self.num_heads, C // self.num_heads]).transpose(
|
||||
[2, 0, 3, 1, 4])
|
||||
else:
|
||||
kv = self.kv(x).reshape(
|
||||
[B, N, 2, self.num_heads, C // self.num_heads]).transpose(
|
||||
[2, 0, 3, 1, 4])
|
||||
k, v = kv[0], kv[1]
|
||||
|
||||
attn = paddle.matmul(q, k.transpose([0, 1, 3, 2])) * self.scale
|
||||
attn = nn.Softmax(axis=-1)(attn)
|
||||
attn = self.attn_drop(attn)
|
||||
|
||||
x = paddle.matmul(attn, v).transpose([0, 2, 1, 3]).reshape([B, N, C])
|
||||
x = self.proj(x)
|
||||
x = self.proj_drop(x)
|
||||
return x
|
||||
|
||||
|
||||
class Block(nn.Layer):
|
||||
def __init__(self,
|
||||
dim,
|
||||
num_heads,
|
||||
mlp_ratio=4.,
|
||||
qkv_bias=False,
|
||||
qk_scale=None,
|
||||
drop=0.,
|
||||
attn_drop=0.,
|
||||
drop_path=0.,
|
||||
act_layer=nn.GELU,
|
||||
norm_layer=nn.LayerNorm,
|
||||
sr_ratio=1):
|
||||
super().__init__()
|
||||
self.norm1 = norm_layer(dim)
|
||||
self.attn = Attention(
|
||||
dim,
|
||||
num_heads=num_heads,
|
||||
qkv_bias=qkv_bias,
|
||||
qk_scale=qk_scale,
|
||||
attn_drop=attn_drop,
|
||||
proj_drop=drop,
|
||||
sr_ratio=sr_ratio)
|
||||
self.drop_path = DropPath(drop_path) if drop_path > 0. else Identity()
|
||||
self.norm2 = norm_layer(dim)
|
||||
mlp_hidden_dim = int(dim * mlp_ratio)
|
||||
self.mlp = Mlp(in_features=dim,
|
||||
hidden_features=mlp_hidden_dim,
|
||||
act_layer=act_layer,
|
||||
drop=drop)
|
||||
|
||||
def forward(self, x, H, W):
|
||||
x = x + self.drop_path(self.attn(self.norm1(x), H, W))
|
||||
x = x + self.drop_path(self.mlp(self.norm2(x)))
|
||||
return x
|
||||
|
||||
|
||||
class SBlock(ViTBlock):
|
||||
def __init__(self,
|
||||
dim,
|
||||
num_heads,
|
||||
mlp_ratio=4.,
|
||||
qkv_bias=False,
|
||||
qk_scale=None,
|
||||
drop=0.,
|
||||
attn_drop=0.,
|
||||
drop_path=0.,
|
||||
act_layer=nn.GELU,
|
||||
norm_layer=nn.LayerNorm,
|
||||
sr_ratio=1):
|
||||
super().__init__(dim, num_heads, mlp_ratio, qkv_bias, qk_scale, drop,
|
||||
attn_drop, drop_path, act_layer, norm_layer)
|
||||
|
||||
def forward(self, x, H, W):
|
||||
return super().forward(x)
|
||||
|
||||
|
||||
class GroupBlock(ViTBlock):
|
||||
def __init__(self,
|
||||
dim,
|
||||
num_heads,
|
||||
mlp_ratio=4.,
|
||||
qkv_bias=False,
|
||||
qk_scale=None,
|
||||
drop=0.,
|
||||
attn_drop=0.,
|
||||
drop_path=0.,
|
||||
act_layer=nn.GELU,
|
||||
norm_layer=nn.LayerNorm,
|
||||
sr_ratio=1,
|
||||
ws=1):
|
||||
super().__init__(dim, num_heads, mlp_ratio, qkv_bias, qk_scale, drop,
|
||||
attn_drop, drop_path, act_layer, norm_layer)
|
||||
del self.attn
|
||||
if ws == 1:
|
||||
self.attn = Attention(dim, num_heads, qkv_bias, qk_scale,
|
||||
attn_drop, drop, sr_ratio)
|
||||
else:
|
||||
self.attn = GroupAttention(dim, num_heads, qkv_bias, qk_scale,
|
||||
attn_drop, drop, ws)
|
||||
|
||||
def forward(self, x, H, W):
|
||||
x = x + self.drop_path(self.attn(self.norm1(x), H, W))
|
||||
x = x + self.drop_path(self.mlp(self.norm2(x)))
|
||||
return x
|
||||
|
||||
|
||||
class PatchEmbed(nn.Layer):
|
||||
""" Image to Patch Embedding.
|
||||
"""
|
||||
|
||||
def __init__(self, img_size=224, patch_size=16, in_chans=3, embed_dim=768):
|
||||
super().__init__()
|
||||
if img_size % patch_size != 0:
|
||||
raise Exception(
|
||||
f"img_size {img_size} should be divided by patch_size {patch_size}."
|
||||
)
|
||||
|
||||
img_size = to_2tuple(img_size)
|
||||
patch_size = to_2tuple(patch_size)
|
||||
|
||||
self.img_size = img_size
|
||||
self.patch_size = patch_size
|
||||
self.H, self.W = img_size[0] // patch_size[0], img_size[
|
||||
1] // patch_size[1]
|
||||
self.num_patches = self.H * self.W
|
||||
self.proj = nn.Conv2D(
|
||||
in_chans, embed_dim, kernel_size=patch_size, stride=patch_size)
|
||||
self.norm = nn.LayerNorm(embed_dim)
|
||||
|
||||
def forward(self, x):
|
||||
B, C, H, W = x.shape
|
||||
x = self.proj(x).flatten(2).transpose([0, 2, 1])
|
||||
x = self.norm(x)
|
||||
H, W = H // self.patch_size[0], W // self.patch_size[1]
|
||||
return x, (H, W)
|
||||
|
||||
|
||||
# borrow from PVT https://github.com/whai362/PVT.git
|
||||
class PyramidVisionTransformer(nn.Layer):
|
||||
def __init__(self,
|
||||
img_size=224,
|
||||
patch_size=16,
|
||||
in_chans=3,
|
||||
class_num=1000,
|
||||
embed_dims=[64, 128, 256, 512],
|
||||
num_heads=[1, 2, 4, 8],
|
||||
mlp_ratios=[4, 4, 4, 4],
|
||||
qkv_bias=False,
|
||||
qk_scale=None,
|
||||
drop_rate=0.,
|
||||
attn_drop_rate=0.,
|
||||
drop_path_rate=0.,
|
||||
norm_layer=nn.LayerNorm,
|
||||
depths=[3, 4, 6, 3],
|
||||
sr_ratios=[8, 4, 2, 1],
|
||||
block_cls=Block):
|
||||
super().__init__()
|
||||
self.class_num = class_num
|
||||
self.depths = depths
|
||||
|
||||
# patch_embed
|
||||
self.patch_embeds = nn.LayerList()
|
||||
self.pos_embeds = nn.ParameterList()
|
||||
self.pos_drops = nn.LayerList()
|
||||
self.blocks = nn.LayerList()
|
||||
|
||||
for i in range(len(depths)):
|
||||
if i == 0:
|
||||
self.patch_embeds.append(
|
||||
PatchEmbed(img_size, patch_size, in_chans, embed_dims[i]))
|
||||
else:
|
||||
self.patch_embeds.append(
|
||||
PatchEmbed(img_size // patch_size // 2**(i - 1), 2,
|
||||
embed_dims[i - 1], embed_dims[i]))
|
||||
patch_num = self.patch_embeds[i].num_patches + 1 if i == len(
|
||||
embed_dims) - 1 else self.patch_embeds[i].num_patches
|
||||
self.pos_embeds.append(
|
||||
self.create_parameter(
|
||||
shape=[1, patch_num, embed_dims[i]],
|
||||
default_initializer=zeros_))
|
||||
self.pos_drops.append(nn.Dropout(p=drop_rate))
|
||||
|
||||
dpr = [
|
||||
x.numpy()[0]
|
||||
for x in paddle.linspace(0, drop_path_rate, sum(depths))
|
||||
] # stochastic depth decay rule
|
||||
|
||||
cur = 0
|
||||
for k in range(len(depths)):
|
||||
_block = nn.LayerList([
|
||||
block_cls(
|
||||
dim=embed_dims[k],
|
||||
num_heads=num_heads[k],
|
||||
mlp_ratio=mlp_ratios[k],
|
||||
qkv_bias=qkv_bias,
|
||||
qk_scale=qk_scale,
|
||||
drop=drop_rate,
|
||||
attn_drop=attn_drop_rate,
|
||||
drop_path=dpr[cur + i],
|
||||
norm_layer=norm_layer,
|
||||
sr_ratio=sr_ratios[k]) for i in range(depths[k])
|
||||
])
|
||||
self.blocks.append(_block)
|
||||
cur += depths[k]
|
||||
|
||||
self.norm = norm_layer(embed_dims[-1])
|
||||
|
||||
# cls_token
|
||||
self.cls_token = self.create_parameter(
|
||||
shape=[1, 1, embed_dims[-1]],
|
||||
default_initializer=zeros_,
|
||||
attr=paddle.ParamAttr(regularizer=L2Decay(0.0)))
|
||||
|
||||
# classification head
|
||||
self.head = nn.Linear(embed_dims[-1],
|
||||
class_num) if class_num > 0 else Identity()
|
||||
|
||||
# init weights
|
||||
for pos_emb in self.pos_embeds:
|
||||
trunc_normal_(pos_emb)
|
||||
self.apply(self._init_weights)
|
||||
|
||||
def _init_weights(self, m):
|
||||
if isinstance(m, nn.Linear):
|
||||
trunc_normal_(m.weight)
|
||||
if isinstance(m, nn.Linear) and m.bias is not None:
|
||||
zeros_(m.bias)
|
||||
elif isinstance(m, nn.LayerNorm):
|
||||
zeros_(m.bias)
|
||||
ones_(m.weight)
|
||||
|
||||
def forward_features(self, x):
|
||||
B = x.shape[0]
|
||||
for i in range(len(self.depths)):
|
||||
x, (H, W) = self.patch_embeds[i](x)
|
||||
if i == len(self.depths) - 1:
|
||||
cls_tokens = self.cls_token.expand([B, -1, -1])
|
||||
x = paddle.concat([cls_tokens, x], dim=1)
|
||||
x = x + self.pos_embeds[i]
|
||||
x = self.pos_drops[i](x)
|
||||
for blk in self.blocks[i]:
|
||||
x = blk(x, H, W)
|
||||
if i < len(self.depths) - 1:
|
||||
x = x.reshape([B, H, W, -1]).transpose(
|
||||
[0, 3, 1, 2]).contiguous()
|
||||
x = self.norm(x)
|
||||
return x[:, 0]
|
||||
|
||||
def forward(self, x):
|
||||
x = self.forward_features(x)
|
||||
x = self.head(x)
|
||||
return x
|
||||
|
||||
|
||||
# PEG from https://arxiv.org/abs/2102.10882
|
||||
class PosCNN(nn.Layer):
|
||||
def __init__(self, in_chans, embed_dim=768, s=1):
|
||||
super().__init__()
|
||||
self.proj = nn.Sequential(
|
||||
nn.Conv2D(
|
||||
in_chans,
|
||||
embed_dim,
|
||||
3,
|
||||
s,
|
||||
1,
|
||||
bias_attr=paddle.ParamAttr(regularizer=L2Decay(0.0)),
|
||||
groups=embed_dim,
|
||||
weight_attr=paddle.ParamAttr(regularizer=L2Decay(0.0)), ))
|
||||
self.s = s
|
||||
|
||||
def forward(self, x, H, W):
|
||||
B, N, C = x.shape
|
||||
feat_token = x
|
||||
cnn_feat = feat_token.transpose([0, 2, 1]).reshape([B, C, H, W])
|
||||
if self.s == 1:
|
||||
x = self.proj(cnn_feat) + cnn_feat
|
||||
else:
|
||||
x = self.proj(cnn_feat)
|
||||
x = x.flatten(2).transpose([0, 2, 1])
|
||||
return x
|
||||
|
||||
|
||||
class CPVTV2(PyramidVisionTransformer):
|
||||
"""
|
||||
Use useful results from CPVT. PEG and GAP.
|
||||
Therefore, cls token is no longer required.
|
||||
PEG is used to encode the absolute position on the fly, which greatly affects the performance when input resolution
|
||||
changes during the training (such as segmentation, detection)
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
img_size=224,
|
||||
patch_size=4,
|
||||
in_chans=3,
|
||||
class_num=1000,
|
||||
embed_dims=[64, 128, 256, 512],
|
||||
num_heads=[1, 2, 4, 8],
|
||||
mlp_ratios=[4, 4, 4, 4],
|
||||
qkv_bias=False,
|
||||
qk_scale=None,
|
||||
drop_rate=0.,
|
||||
attn_drop_rate=0.,
|
||||
drop_path_rate=0.,
|
||||
norm_layer=nn.LayerNorm,
|
||||
depths=[3, 4, 6, 3],
|
||||
sr_ratios=[8, 4, 2, 1],
|
||||
block_cls=Block):
|
||||
super().__init__(img_size, patch_size, in_chans, class_num, embed_dims,
|
||||
num_heads, mlp_ratios, qkv_bias, qk_scale, drop_rate,
|
||||
attn_drop_rate, drop_path_rate, norm_layer, depths,
|
||||
sr_ratios, block_cls)
|
||||
del self.pos_embeds
|
||||
del self.cls_token
|
||||
self.pos_block = nn.LayerList(
|
||||
[PosCNN(embed_dim, embed_dim) for embed_dim in embed_dims])
|
||||
self.apply(self._init_weights)
|
||||
|
||||
def _init_weights(self, m):
|
||||
import math
|
||||
if isinstance(m, nn.Linear):
|
||||
trunc_normal_(m.weight)
|
||||
if isinstance(m, nn.Linear) and m.bias is not None:
|
||||
zeros_(m.bias)
|
||||
elif isinstance(m, nn.LayerNorm):
|
||||
zeros_(m.bias)
|
||||
ones_(m.weight)
|
||||
elif isinstance(m, nn.Conv2D):
|
||||
fan_out = m._kernel_size[0] * m._kernel_size[1] * m._out_channels
|
||||
fan_out //= m._groups
|
||||
normal_(0, math.sqrt(2.0 / fan_out))(m.weight)
|
||||
if m.bias is not None:
|
||||
zeros_(m.bias)
|
||||
elif isinstance(m, nn.BatchNorm2D):
|
||||
m.weight.data.fill_(1.0)
|
||||
m.bias.data.zero_()
|
||||
|
||||
def forward_features(self, x):
|
||||
B = x.shape[0]
|
||||
|
||||
for i in range(len(self.depths)):
|
||||
x, (H, W) = self.patch_embeds[i](x)
|
||||
x = self.pos_drops[i](x)
|
||||
|
||||
for j, blk in enumerate(self.blocks[i]):
|
||||
x = blk(x, H, W)
|
||||
if j == 0:
|
||||
x = self.pos_block[i](x, H, W) # PEG here
|
||||
|
||||
if i < len(self.depths) - 1:
|
||||
x = x.reshape([B, H, W, x.shape[-1]]).transpose([0, 3, 1, 2])
|
||||
|
||||
x = self.norm(x)
|
||||
return x.mean(axis=1) # GAP here
|
||||
|
||||
|
||||
class PCPVT(CPVTV2):
|
||||
def __init__(self,
|
||||
img_size=224,
|
||||
patch_size=4,
|
||||
in_chans=3,
|
||||
class_num=1000,
|
||||
embed_dims=[64, 128, 256],
|
||||
num_heads=[1, 2, 4],
|
||||
mlp_ratios=[4, 4, 4],
|
||||
qkv_bias=False,
|
||||
qk_scale=None,
|
||||
drop_rate=0.,
|
||||
attn_drop_rate=0.,
|
||||
drop_path_rate=0.,
|
||||
norm_layer=nn.LayerNorm,
|
||||
depths=[4, 4, 4],
|
||||
sr_ratios=[4, 2, 1],
|
||||
block_cls=SBlock):
|
||||
super().__init__(img_size, patch_size, in_chans, class_num, embed_dims,
|
||||
num_heads, mlp_ratios, qkv_bias, qk_scale, drop_rate,
|
||||
attn_drop_rate, drop_path_rate, norm_layer, depths,
|
||||
sr_ratios, block_cls)
|
||||
|
||||
|
||||
class ALTGVT(PCPVT):
|
||||
"""
|
||||
alias Twins-SVT
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
img_size=224,
|
||||
patch_size=4,
|
||||
in_chans=3,
|
||||
class_num=1000,
|
||||
embed_dims=[64, 128, 256],
|
||||
num_heads=[1, 2, 4],
|
||||
mlp_ratios=[4, 4, 4],
|
||||
qkv_bias=False,
|
||||
qk_scale=None,
|
||||
drop_rate=0.,
|
||||
attn_drop_rate=0.,
|
||||
drop_path_rate=0.,
|
||||
norm_layer=nn.LayerNorm,
|
||||
depths=[4, 4, 4],
|
||||
sr_ratios=[4, 2, 1],
|
||||
block_cls=GroupBlock,
|
||||
wss=[7, 7, 7]):
|
||||
super().__init__(img_size, patch_size, in_chans, class_num, embed_dims,
|
||||
num_heads, mlp_ratios, qkv_bias, qk_scale, drop_rate,
|
||||
attn_drop_rate, drop_path_rate, norm_layer, depths,
|
||||
sr_ratios, block_cls)
|
||||
del self.blocks
|
||||
self.wss = wss
|
||||
# transformer encoder
|
||||
dpr = [
|
||||
x.numpy()[0]
|
||||
for x in paddle.linspace(0, drop_path_rate, sum(depths))
|
||||
] # stochastic depth decay rule
|
||||
cur = 0
|
||||
self.blocks = nn.LayerList()
|
||||
for k in range(len(depths)):
|
||||
_block = nn.LayerList([
|
||||
block_cls(
|
||||
dim=embed_dims[k],
|
||||
num_heads=num_heads[k],
|
||||
mlp_ratio=mlp_ratios[k],
|
||||
qkv_bias=qkv_bias,
|
||||
qk_scale=qk_scale,
|
||||
drop=drop_rate,
|
||||
attn_drop=attn_drop_rate,
|
||||
drop_path=dpr[cur + i],
|
||||
norm_layer=norm_layer,
|
||||
sr_ratio=sr_ratios[k],
|
||||
ws=1 if i % 2 == 1 else wss[k]) for i in range(depths[k])
|
||||
])
|
||||
self.blocks.append(_block)
|
||||
cur += depths[k]
|
||||
self.apply(self._init_weights)
|
||||
|
||||
|
||||
def _load_pretrained(pretrained, model, model_url, use_ssld=False):
|
||||
if pretrained is False:
|
||||
pass
|
||||
elif pretrained is True:
|
||||
load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
|
||||
elif isinstance(pretrained, str):
|
||||
load_dygraph_pretrain(model, pretrained)
|
||||
else:
|
||||
raise RuntimeError(
|
||||
"pretrained type is not available. Please use `string` or `boolean` type."
|
||||
)
|
||||
|
||||
|
||||
def pcpvt_small(pretrained=False, use_ssld=False, **kwargs):
|
||||
model = CPVTV2(
|
||||
patch_size=4,
|
||||
embed_dims=[64, 128, 320, 512],
|
||||
num_heads=[1, 2, 5, 8],
|
||||
mlp_ratios=[8, 8, 4, 4],
|
||||
qkv_bias=True,
|
||||
norm_layer=partial(
|
||||
nn.LayerNorm, epsilon=1e-6),
|
||||
depths=[3, 4, 6, 3],
|
||||
sr_ratios=[8, 4, 2, 1],
|
||||
**kwargs)
|
||||
_load_pretrained(
|
||||
pretrained, model, MODEL_URLS["pcpvt_small"], use_ssld=use_ssld)
|
||||
return model
|
||||
|
||||
|
||||
def pcpvt_base(pretrained=False, use_ssld=False, **kwargs):
|
||||
model = CPVTV2(
|
||||
patch_size=4,
|
||||
embed_dims=[64, 128, 320, 512],
|
||||
num_heads=[1, 2, 5, 8],
|
||||
mlp_ratios=[8, 8, 4, 4],
|
||||
qkv_bias=True,
|
||||
norm_layer=partial(
|
||||
nn.LayerNorm, epsilon=1e-6),
|
||||
depths=[3, 4, 18, 3],
|
||||
sr_ratios=[8, 4, 2, 1],
|
||||
**kwargs)
|
||||
_load_pretrained(
|
||||
pretrained, model, MODEL_URLS["pcpvt_base"], use_ssld=use_ssld)
|
||||
return model
|
||||
|
||||
|
||||
def pcpvt_large(pretrained=False, use_ssld=False, **kwargs):
|
||||
model = CPVTV2(
|
||||
patch_size=4,
|
||||
embed_dims=[64, 128, 320, 512],
|
||||
num_heads=[1, 2, 5, 8],
|
||||
mlp_ratios=[8, 8, 4, 4],
|
||||
qkv_bias=True,
|
||||
norm_layer=partial(
|
||||
nn.LayerNorm, epsilon=1e-6),
|
||||
depths=[3, 8, 27, 3],
|
||||
sr_ratios=[8, 4, 2, 1],
|
||||
**kwargs)
|
||||
_load_pretrained(
|
||||
pretrained, model, MODEL_URLS["pcpvt_large"], use_ssld=use_ssld)
|
||||
return model
|
||||
|
||||
|
||||
def alt_gvt_small(pretrained=False, use_ssld=False, **kwargs):
|
||||
model = ALTGVT(
|
||||
patch_size=4,
|
||||
embed_dims=[64, 128, 256, 512],
|
||||
num_heads=[2, 4, 8, 16],
|
||||
mlp_ratios=[4, 4, 4, 4],
|
||||
qkv_bias=True,
|
||||
norm_layer=partial(
|
||||
nn.LayerNorm, epsilon=1e-6),
|
||||
depths=[2, 2, 10, 4],
|
||||
wss=[7, 7, 7, 7],
|
||||
sr_ratios=[8, 4, 2, 1],
|
||||
**kwargs)
|
||||
_load_pretrained(
|
||||
pretrained, model, MODEL_URLS["alt_gvt_small"], use_ssld=use_ssld)
|
||||
return model
|
||||
|
||||
|
||||
def alt_gvt_base(pretrained=False, use_ssld=False, **kwargs):
|
||||
model = ALTGVT(
|
||||
patch_size=4,
|
||||
embed_dims=[96, 192, 384, 768],
|
||||
num_heads=[3, 6, 12, 24],
|
||||
mlp_ratios=[4, 4, 4, 4],
|
||||
qkv_bias=True,
|
||||
norm_layer=partial(
|
||||
nn.LayerNorm, epsilon=1e-6),
|
||||
depths=[2, 2, 18, 2],
|
||||
wss=[7, 7, 7, 7],
|
||||
sr_ratios=[8, 4, 2, 1],
|
||||
**kwargs)
|
||||
_load_pretrained(
|
||||
pretrained, model, MODEL_URLS["alt_gvt_base"], use_ssld=use_ssld)
|
||||
return model
|
||||
|
||||
|
||||
def alt_gvt_large(pretrained=False, use_ssld=False, **kwargs):
|
||||
model = ALTGVT(
|
||||
patch_size=4,
|
||||
embed_dims=[128, 256, 512, 1024],
|
||||
num_heads=[4, 8, 16, 32],
|
||||
mlp_ratios=[4, 4, 4, 4],
|
||||
qkv_bias=True,
|
||||
norm_layer=partial(
|
||||
nn.LayerNorm, epsilon=1e-6),
|
||||
depths=[2, 2, 18, 2],
|
||||
wss=[7, 7, 7, 7],
|
||||
sr_ratios=[8, 4, 2, 1],
|
||||
**kwargs)
|
||||
_load_pretrained(
|
||||
pretrained, model, MODEL_URLS["alt_gvt_large"], use_ssld=use_ssld)
|
||||
return model
|
||||
@ -0,0 +1,293 @@
|
||||
# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# Code was based on https://github.com/PingoLH/Pytorch-HarDNet
|
||||
|
||||
import paddle
|
||||
import paddle.nn as nn
|
||||
|
||||
from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
|
||||
|
||||
MODEL_URLS = {
|
||||
'HarDNet39_ds':
|
||||
'https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/HarDNet39_ds_pretrained.pdparams',
|
||||
'HarDNet68_ds':
|
||||
'https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/HarDNet68_ds_pretrained.pdparams',
|
||||
'HarDNet68':
|
||||
'https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/HarDNet68_pretrained.pdparams',
|
||||
'HarDNet85':
|
||||
'https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/HarDNet85_pretrained.pdparams'
|
||||
}
|
||||
|
||||
__all__ = MODEL_URLS.keys()
|
||||
|
||||
|
||||
def ConvLayer(in_channels,
|
||||
out_channels,
|
||||
kernel_size=3,
|
||||
stride=1,
|
||||
bias_attr=False):
|
||||
layer = nn.Sequential(
|
||||
('conv', nn.Conv2D(
|
||||
in_channels,
|
||||
out_channels,
|
||||
kernel_size=kernel_size,
|
||||
stride=stride,
|
||||
padding=kernel_size // 2,
|
||||
groups=1,
|
||||
bias_attr=bias_attr)), ('norm', nn.BatchNorm2D(out_channels)),
|
||||
('relu', nn.ReLU6()))
|
||||
return layer
|
||||
|
||||
|
||||
def DWConvLayer(in_channels,
|
||||
out_channels,
|
||||
kernel_size=3,
|
||||
stride=1,
|
||||
bias_attr=False):
|
||||
layer = nn.Sequential(
|
||||
('dwconv', nn.Conv2D(
|
||||
in_channels,
|
||||
out_channels,
|
||||
kernel_size=kernel_size,
|
||||
stride=stride,
|
||||
padding=1,
|
||||
groups=out_channels,
|
||||
bias_attr=bias_attr)), ('norm', nn.BatchNorm2D(out_channels)))
|
||||
return layer
|
||||
|
||||
|
||||
def CombConvLayer(in_channels, out_channels, kernel_size=1, stride=1):
|
||||
layer = nn.Sequential(
|
||||
('layer1', ConvLayer(
|
||||
in_channels, out_channels, kernel_size=kernel_size)),
|
||||
('layer2', DWConvLayer(
|
||||
out_channels, out_channels, stride=stride)))
|
||||
return layer
|
||||
|
||||
|
||||
class HarDBlock(nn.Layer):
|
||||
def __init__(self,
|
||||
in_channels,
|
||||
growth_rate,
|
||||
grmul,
|
||||
n_layers,
|
||||
keepBase=False,
|
||||
residual_out=False,
|
||||
dwconv=False):
|
||||
super().__init__()
|
||||
self.keepBase = keepBase
|
||||
self.links = []
|
||||
layers_ = []
|
||||
self.out_channels = 0 # if upsample else in_channels
|
||||
for i in range(n_layers):
|
||||
outch, inch, link = self.get_link(i + 1, in_channels, growth_rate,
|
||||
grmul)
|
||||
self.links.append(link)
|
||||
if dwconv:
|
||||
layers_.append(CombConvLayer(inch, outch))
|
||||
else:
|
||||
layers_.append(ConvLayer(inch, outch))
|
||||
|
||||
if (i % 2 == 0) or (i == n_layers - 1):
|
||||
self.out_channels += outch
|
||||
# print("Blk out =",self.out_channels)
|
||||
self.layers = nn.LayerList(layers_)
|
||||
|
||||
def get_link(self, layer, base_ch, growth_rate, grmul):
|
||||
if layer == 0:
|
||||
return base_ch, 0, []
|
||||
out_channels = growth_rate
|
||||
|
||||
link = []
|
||||
for i in range(10):
|
||||
dv = 2**i
|
||||
if layer % dv == 0:
|
||||
k = layer - dv
|
||||
link.append(k)
|
||||
if i > 0:
|
||||
out_channels *= grmul
|
||||
|
||||
out_channels = int(int(out_channels + 1) / 2) * 2
|
||||
in_channels = 0
|
||||
|
||||
for i in link:
|
||||
ch, _, _ = self.get_link(i, base_ch, growth_rate, grmul)
|
||||
in_channels += ch
|
||||
|
||||
return out_channels, in_channels, link
|
||||
|
||||
def forward(self, x):
|
||||
layers_ = [x]
|
||||
|
||||
for layer in range(len(self.layers)):
|
||||
link = self.links[layer]
|
||||
tin = []
|
||||
for i in link:
|
||||
tin.append(layers_[i])
|
||||
if len(tin) > 1:
|
||||
x = paddle.concat(tin, 1)
|
||||
else:
|
||||
x = tin[0]
|
||||
out = self.layers[layer](x)
|
||||
layers_.append(out)
|
||||
|
||||
t = len(layers_)
|
||||
out_ = []
|
||||
for i in range(t):
|
||||
if (i == 0 and self.keepBase) or (i == t - 1) or (i % 2 == 1):
|
||||
out_.append(layers_[i])
|
||||
out = paddle.concat(out_, 1)
|
||||
|
||||
return out
|
||||
|
||||
|
||||
class HarDNet(nn.Layer):
|
||||
def __init__(self,
|
||||
depth_wise=False,
|
||||
arch=85,
|
||||
class_num=1000,
|
||||
with_pool=True):
|
||||
super().__init__()
|
||||
first_ch = [32, 64]
|
||||
second_kernel = 3
|
||||
max_pool = True
|
||||
grmul = 1.7
|
||||
drop_rate = 0.1
|
||||
|
||||
# HarDNet68
|
||||
ch_list = [128, 256, 320, 640, 1024]
|
||||
gr = [14, 16, 20, 40, 160]
|
||||
n_layers = [8, 16, 16, 16, 4]
|
||||
downSamp = [1, 0, 1, 1, 0]
|
||||
|
||||
if arch == 85:
|
||||
# HarDNet85
|
||||
first_ch = [48, 96]
|
||||
ch_list = [192, 256, 320, 480, 720, 1280]
|
||||
gr = [24, 24, 28, 36, 48, 256]
|
||||
n_layers = [8, 16, 16, 16, 16, 4]
|
||||
downSamp = [1, 0, 1, 0, 1, 0]
|
||||
drop_rate = 0.2
|
||||
|
||||
elif arch == 39:
|
||||
# HarDNet39
|
||||
first_ch = [24, 48]
|
||||
ch_list = [96, 320, 640, 1024]
|
||||
grmul = 1.6
|
||||
gr = [16, 20, 64, 160]
|
||||
n_layers = [4, 16, 8, 4]
|
||||
downSamp = [1, 1, 1, 0]
|
||||
|
||||
if depth_wise:
|
||||
second_kernel = 1
|
||||
max_pool = False
|
||||
drop_rate = 0.05
|
||||
|
||||
blks = len(n_layers)
|
||||
self.base = nn.LayerList([])
|
||||
|
||||
# First Layer: Standard Conv3x3, Stride=2
|
||||
self.base.append(
|
||||
ConvLayer(
|
||||
in_channels=3,
|
||||
out_channels=first_ch[0],
|
||||
kernel_size=3,
|
||||
stride=2,
|
||||
bias_attr=False))
|
||||
|
||||
# Second Layer
|
||||
self.base.append(
|
||||
ConvLayer(
|
||||
first_ch[0], first_ch[1], kernel_size=second_kernel))
|
||||
|
||||
# Maxpooling or DWConv3x3 downsampling
|
||||
if max_pool:
|
||||
self.base.append(nn.MaxPool2D(kernel_size=3, stride=2, padding=1))
|
||||
else:
|
||||
self.base.append(DWConvLayer(first_ch[1], first_ch[1], stride=2))
|
||||
|
||||
# Build all HarDNet blocks
|
||||
ch = first_ch[1]
|
||||
for i in range(blks):
|
||||
blk = HarDBlock(ch, gr[i], grmul, n_layers[i], dwconv=depth_wise)
|
||||
ch = blk.out_channels
|
||||
self.base.append(blk)
|
||||
|
||||
if i == blks - 1 and arch == 85:
|
||||
self.base.append(nn.Dropout(0.1))
|
||||
|
||||
self.base.append(ConvLayer(ch, ch_list[i], kernel_size=1))
|
||||
ch = ch_list[i]
|
||||
if downSamp[i] == 1:
|
||||
if max_pool:
|
||||
self.base.append(nn.MaxPool2D(kernel_size=2, stride=2))
|
||||
else:
|
||||
self.base.append(DWConvLayer(ch, ch, stride=2))
|
||||
|
||||
ch = ch_list[blks - 1]
|
||||
|
||||
layers = []
|
||||
|
||||
if with_pool:
|
||||
layers.append(nn.AdaptiveAvgPool2D((1, 1)))
|
||||
|
||||
if class_num > 0:
|
||||
layers.append(nn.Flatten())
|
||||
layers.append(nn.Dropout(drop_rate))
|
||||
layers.append(nn.Linear(ch, class_num))
|
||||
|
||||
self.base.append(nn.Sequential(*layers))
|
||||
|
||||
def forward(self, x):
|
||||
for layer in self.base:
|
||||
x = layer(x)
|
||||
return x
|
||||
|
||||
|
||||
def _load_pretrained(pretrained, model, model_url, use_ssld=False):
|
||||
if pretrained is False:
|
||||
pass
|
||||
elif pretrained is True:
|
||||
load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
|
||||
elif isinstance(pretrained, str):
|
||||
load_dygraph_pretrain(model, pretrained)
|
||||
else:
|
||||
raise RuntimeError(
|
||||
"pretrained type is not available. Please use `string` or `boolean` type."
|
||||
)
|
||||
|
||||
|
||||
def HarDNet39_ds(pretrained=False, **kwargs):
|
||||
model = HarDNet(arch=39, depth_wise=True, **kwargs)
|
||||
_load_pretrained(pretrained, model, MODEL_URLS["HarDNet39_ds"])
|
||||
return model
|
||||
|
||||
|
||||
def HarDNet68_ds(pretrained=False, **kwargs):
|
||||
model = HarDNet(arch=68, depth_wise=True, **kwargs)
|
||||
_load_pretrained(pretrained, model, MODEL_URLS["HarDNet68_ds"])
|
||||
return model
|
||||
|
||||
|
||||
def HarDNet68(pretrained=False, **kwargs):
|
||||
model = HarDNet(arch=68, **kwargs)
|
||||
_load_pretrained(pretrained, model, MODEL_URLS["HarDNet68"])
|
||||
return model
|
||||
|
||||
|
||||
def HarDNet85(pretrained=False, **kwargs):
|
||||
model = HarDNet(arch=85, **kwargs)
|
||||
_load_pretrained(pretrained, model, MODEL_URLS["HarDNet85"])
|
||||
return model
|
||||
@ -0,0 +1,477 @@
|
||||
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import paddle
|
||||
from paddle import ParamAttr
|
||||
import paddle.nn as nn
|
||||
import paddle.nn.functional as F
|
||||
from paddle.nn import Conv2D, BatchNorm, Linear, Dropout
|
||||
from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D
|
||||
from paddle.nn.initializer import Uniform
|
||||
import math
|
||||
|
||||
from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
|
||||
|
||||
MODEL_URLS = {
|
||||
"InceptionV4":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/InceptionV4_pretrained.pdparams"
|
||||
}
|
||||
|
||||
__all__ = list(MODEL_URLS.keys())
|
||||
|
||||
|
||||
class ConvBNLayer(nn.Layer):
|
||||
def __init__(self,
|
||||
num_channels,
|
||||
num_filters,
|
||||
filter_size,
|
||||
stride=1,
|
||||
padding=0,
|
||||
groups=1,
|
||||
act='relu',
|
||||
name=None):
|
||||
super(ConvBNLayer, self).__init__()
|
||||
|
||||
self._conv = Conv2D(
|
||||
in_channels=num_channels,
|
||||
out_channels=num_filters,
|
||||
kernel_size=filter_size,
|
||||
stride=stride,
|
||||
padding=padding,
|
||||
groups=groups,
|
||||
weight_attr=ParamAttr(name=name + "_weights"),
|
||||
bias_attr=False)
|
||||
bn_name = name + "_bn"
|
||||
self._batch_norm = BatchNorm(
|
||||
num_filters,
|
||||
act=act,
|
||||
param_attr=ParamAttr(name=bn_name + "_scale"),
|
||||
bias_attr=ParamAttr(name=bn_name + "_offset"),
|
||||
moving_mean_name=bn_name + '_mean',
|
||||
moving_variance_name=bn_name + '_variance')
|
||||
|
||||
def forward(self, inputs):
|
||||
y = self._conv(inputs)
|
||||
y = self._batch_norm(y)
|
||||
return y
|
||||
|
||||
|
||||
class InceptionStem(nn.Layer):
|
||||
def __init__(self):
|
||||
super(InceptionStem, self).__init__()
|
||||
self._conv_1 = ConvBNLayer(
|
||||
3, 32, 3, stride=2, act="relu", name="conv1_3x3_s2")
|
||||
self._conv_2 = ConvBNLayer(32, 32, 3, act="relu", name="conv2_3x3_s1")
|
||||
self._conv_3 = ConvBNLayer(
|
||||
32, 64, 3, padding=1, act="relu", name="conv3_3x3_s1")
|
||||
self._pool = MaxPool2D(kernel_size=3, stride=2, padding=0)
|
||||
self._conv2 = ConvBNLayer(
|
||||
64, 96, 3, stride=2, act="relu", name="inception_stem1_3x3_s2")
|
||||
self._conv1_1 = ConvBNLayer(
|
||||
160, 64, 1, act="relu", name="inception_stem2_3x3_reduce")
|
||||
self._conv1_2 = ConvBNLayer(
|
||||
64, 96, 3, act="relu", name="inception_stem2_3x3")
|
||||
self._conv2_1 = ConvBNLayer(
|
||||
160, 64, 1, act="relu", name="inception_stem2_1x7_reduce")
|
||||
self._conv2_2 = ConvBNLayer(
|
||||
64,
|
||||
64, (7, 1),
|
||||
padding=(3, 0),
|
||||
act="relu",
|
||||
name="inception_stem2_1x7")
|
||||
self._conv2_3 = ConvBNLayer(
|
||||
64,
|
||||
64, (1, 7),
|
||||
padding=(0, 3),
|
||||
act="relu",
|
||||
name="inception_stem2_7x1")
|
||||
self._conv2_4 = ConvBNLayer(
|
||||
64, 96, 3, act="relu", name="inception_stem2_3x3_2")
|
||||
self._conv3 = ConvBNLayer(
|
||||
192, 192, 3, stride=2, act="relu", name="inception_stem3_3x3_s2")
|
||||
|
||||
def forward(self, inputs):
|
||||
conv = self._conv_1(inputs)
|
||||
conv = self._conv_2(conv)
|
||||
conv = self._conv_3(conv)
|
||||
|
||||
pool1 = self._pool(conv)
|
||||
conv2 = self._conv2(conv)
|
||||
concat = paddle.concat([pool1, conv2], axis=1)
|
||||
|
||||
conv1 = self._conv1_1(concat)
|
||||
conv1 = self._conv1_2(conv1)
|
||||
|
||||
conv2 = self._conv2_1(concat)
|
||||
conv2 = self._conv2_2(conv2)
|
||||
conv2 = self._conv2_3(conv2)
|
||||
conv2 = self._conv2_4(conv2)
|
||||
|
||||
concat = paddle.concat([conv1, conv2], axis=1)
|
||||
|
||||
conv1 = self._conv3(concat)
|
||||
pool1 = self._pool(concat)
|
||||
|
||||
concat = paddle.concat([conv1, pool1], axis=1)
|
||||
return concat
|
||||
|
||||
|
||||
class InceptionA(nn.Layer):
|
||||
def __init__(self, name):
|
||||
super(InceptionA, self).__init__()
|
||||
self._pool = AvgPool2D(kernel_size=3, stride=1, padding=1)
|
||||
self._conv1 = ConvBNLayer(
|
||||
384, 96, 1, act="relu", name="inception_a" + name + "_1x1")
|
||||
self._conv2 = ConvBNLayer(
|
||||
384, 96, 1, act="relu", name="inception_a" + name + "_1x1_2")
|
||||
self._conv3_1 = ConvBNLayer(
|
||||
384, 64, 1, act="relu", name="inception_a" + name + "_3x3_reduce")
|
||||
self._conv3_2 = ConvBNLayer(
|
||||
64,
|
||||
96,
|
||||
3,
|
||||
padding=1,
|
||||
act="relu",
|
||||
name="inception_a" + name + "_3x3")
|
||||
self._conv4_1 = ConvBNLayer(
|
||||
384,
|
||||
64,
|
||||
1,
|
||||
act="relu",
|
||||
name="inception_a" + name + "_3x3_2_reduce")
|
||||
self._conv4_2 = ConvBNLayer(
|
||||
64,
|
||||
96,
|
||||
3,
|
||||
padding=1,
|
||||
act="relu",
|
||||
name="inception_a" + name + "_3x3_2")
|
||||
self._conv4_3 = ConvBNLayer(
|
||||
96,
|
||||
96,
|
||||
3,
|
||||
padding=1,
|
||||
act="relu",
|
||||
name="inception_a" + name + "_3x3_3")
|
||||
|
||||
def forward(self, inputs):
|
||||
pool1 = self._pool(inputs)
|
||||
conv1 = self._conv1(pool1)
|
||||
|
||||
conv2 = self._conv2(inputs)
|
||||
|
||||
conv3 = self._conv3_1(inputs)
|
||||
conv3 = self._conv3_2(conv3)
|
||||
|
||||
conv4 = self._conv4_1(inputs)
|
||||
conv4 = self._conv4_2(conv4)
|
||||
conv4 = self._conv4_3(conv4)
|
||||
|
||||
concat = paddle.concat([conv1, conv2, conv3, conv4], axis=1)
|
||||
return concat
|
||||
|
||||
|
||||
class ReductionA(nn.Layer):
|
||||
def __init__(self):
|
||||
super(ReductionA, self).__init__()
|
||||
self._pool = MaxPool2D(kernel_size=3, stride=2, padding=0)
|
||||
self._conv2 = ConvBNLayer(
|
||||
384, 384, 3, stride=2, act="relu", name="reduction_a_3x3")
|
||||
self._conv3_1 = ConvBNLayer(
|
||||
384, 192, 1, act="relu", name="reduction_a_3x3_2_reduce")
|
||||
self._conv3_2 = ConvBNLayer(
|
||||
192, 224, 3, padding=1, act="relu", name="reduction_a_3x3_2")
|
||||
self._conv3_3 = ConvBNLayer(
|
||||
224, 256, 3, stride=2, act="relu", name="reduction_a_3x3_3")
|
||||
|
||||
def forward(self, inputs):
|
||||
pool1 = self._pool(inputs)
|
||||
conv2 = self._conv2(inputs)
|
||||
conv3 = self._conv3_1(inputs)
|
||||
conv3 = self._conv3_2(conv3)
|
||||
conv3 = self._conv3_3(conv3)
|
||||
concat = paddle.concat([pool1, conv2, conv3], axis=1)
|
||||
return concat
|
||||
|
||||
|
||||
class InceptionB(nn.Layer):
|
||||
def __init__(self, name=None):
|
||||
super(InceptionB, self).__init__()
|
||||
self._pool = AvgPool2D(kernel_size=3, stride=1, padding=1)
|
||||
self._conv1 = ConvBNLayer(
|
||||
1024, 128, 1, act="relu", name="inception_b" + name + "_1x1")
|
||||
self._conv2 = ConvBNLayer(
|
||||
1024, 384, 1, act="relu", name="inception_b" + name + "_1x1_2")
|
||||
self._conv3_1 = ConvBNLayer(
|
||||
1024,
|
||||
192,
|
||||
1,
|
||||
act="relu",
|
||||
name="inception_b" + name + "_1x7_reduce")
|
||||
self._conv3_2 = ConvBNLayer(
|
||||
192,
|
||||
224, (1, 7),
|
||||
padding=(0, 3),
|
||||
act="relu",
|
||||
name="inception_b" + name + "_1x7")
|
||||
self._conv3_3 = ConvBNLayer(
|
||||
224,
|
||||
256, (7, 1),
|
||||
padding=(3, 0),
|
||||
act="relu",
|
||||
name="inception_b" + name + "_7x1")
|
||||
self._conv4_1 = ConvBNLayer(
|
||||
1024,
|
||||
192,
|
||||
1,
|
||||
act="relu",
|
||||
name="inception_b" + name + "_7x1_2_reduce")
|
||||
self._conv4_2 = ConvBNLayer(
|
||||
192,
|
||||
192, (1, 7),
|
||||
padding=(0, 3),
|
||||
act="relu",
|
||||
name="inception_b" + name + "_1x7_2")
|
||||
self._conv4_3 = ConvBNLayer(
|
||||
192,
|
||||
224, (7, 1),
|
||||
padding=(3, 0),
|
||||
act="relu",
|
||||
name="inception_b" + name + "_7x1_2")
|
||||
self._conv4_4 = ConvBNLayer(
|
||||
224,
|
||||
224, (1, 7),
|
||||
padding=(0, 3),
|
||||
act="relu",
|
||||
name="inception_b" + name + "_1x7_3")
|
||||
self._conv4_5 = ConvBNLayer(
|
||||
224,
|
||||
256, (7, 1),
|
||||
padding=(3, 0),
|
||||
act="relu",
|
||||
name="inception_b" + name + "_7x1_3")
|
||||
|
||||
def forward(self, inputs):
|
||||
pool1 = self._pool(inputs)
|
||||
conv1 = self._conv1(pool1)
|
||||
|
||||
conv2 = self._conv2(inputs)
|
||||
|
||||
conv3 = self._conv3_1(inputs)
|
||||
conv3 = self._conv3_2(conv3)
|
||||
conv3 = self._conv3_3(conv3)
|
||||
|
||||
conv4 = self._conv4_1(inputs)
|
||||
conv4 = self._conv4_2(conv4)
|
||||
conv4 = self._conv4_3(conv4)
|
||||
conv4 = self._conv4_4(conv4)
|
||||
conv4 = self._conv4_5(conv4)
|
||||
|
||||
concat = paddle.concat([conv1, conv2, conv3, conv4], axis=1)
|
||||
return concat
|
||||
|
||||
|
||||
class ReductionB(nn.Layer):
|
||||
def __init__(self):
|
||||
super(ReductionB, self).__init__()
|
||||
self._pool = MaxPool2D(kernel_size=3, stride=2, padding=0)
|
||||
self._conv2_1 = ConvBNLayer(
|
||||
1024, 192, 1, act="relu", name="reduction_b_3x3_reduce")
|
||||
self._conv2_2 = ConvBNLayer(
|
||||
192, 192, 3, stride=2, act="relu", name="reduction_b_3x3")
|
||||
self._conv3_1 = ConvBNLayer(
|
||||
1024, 256, 1, act="relu", name="reduction_b_1x7_reduce")
|
||||
self._conv3_2 = ConvBNLayer(
|
||||
256,
|
||||
256, (1, 7),
|
||||
padding=(0, 3),
|
||||
act="relu",
|
||||
name="reduction_b_1x7")
|
||||
self._conv3_3 = ConvBNLayer(
|
||||
256,
|
||||
320, (7, 1),
|
||||
padding=(3, 0),
|
||||
act="relu",
|
||||
name="reduction_b_7x1")
|
||||
self._conv3_4 = ConvBNLayer(
|
||||
320, 320, 3, stride=2, act="relu", name="reduction_b_3x3_2")
|
||||
|
||||
def forward(self, inputs):
|
||||
pool1 = self._pool(inputs)
|
||||
|
||||
conv2 = self._conv2_1(inputs)
|
||||
conv2 = self._conv2_2(conv2)
|
||||
|
||||
conv3 = self._conv3_1(inputs)
|
||||
conv3 = self._conv3_2(conv3)
|
||||
conv3 = self._conv3_3(conv3)
|
||||
conv3 = self._conv3_4(conv3)
|
||||
|
||||
concat = paddle.concat([pool1, conv2, conv3], axis=1)
|
||||
|
||||
return concat
|
||||
|
||||
|
||||
class InceptionC(nn.Layer):
|
||||
def __init__(self, name=None):
|
||||
super(InceptionC, self).__init__()
|
||||
self._pool = AvgPool2D(kernel_size=3, stride=1, padding=1)
|
||||
self._conv1 = ConvBNLayer(
|
||||
1536, 256, 1, act="relu", name="inception_c" + name + "_1x1")
|
||||
self._conv2 = ConvBNLayer(
|
||||
1536, 256, 1, act="relu", name="inception_c" + name + "_1x1_2")
|
||||
self._conv3_0 = ConvBNLayer(
|
||||
1536, 384, 1, act="relu", name="inception_c" + name + "_1x1_3")
|
||||
self._conv3_1 = ConvBNLayer(
|
||||
384,
|
||||
256, (1, 3),
|
||||
padding=(0, 1),
|
||||
act="relu",
|
||||
name="inception_c" + name + "_1x3")
|
||||
self._conv3_2 = ConvBNLayer(
|
||||
384,
|
||||
256, (3, 1),
|
||||
padding=(1, 0),
|
||||
act="relu",
|
||||
name="inception_c" + name + "_3x1")
|
||||
self._conv4_0 = ConvBNLayer(
|
||||
1536, 384, 1, act="relu", name="inception_c" + name + "_1x1_4")
|
||||
self._conv4_00 = ConvBNLayer(
|
||||
384,
|
||||
448, (1, 3),
|
||||
padding=(0, 1),
|
||||
act="relu",
|
||||
name="inception_c" + name + "_1x3_2")
|
||||
self._conv4_000 = ConvBNLayer(
|
||||
448,
|
||||
512, (3, 1),
|
||||
padding=(1, 0),
|
||||
act="relu",
|
||||
name="inception_c" + name + "_3x1_2")
|
||||
self._conv4_1 = ConvBNLayer(
|
||||
512,
|
||||
256, (1, 3),
|
||||
padding=(0, 1),
|
||||
act="relu",
|
||||
name="inception_c" + name + "_1x3_3")
|
||||
self._conv4_2 = ConvBNLayer(
|
||||
512,
|
||||
256, (3, 1),
|
||||
padding=(1, 0),
|
||||
act="relu",
|
||||
name="inception_c" + name + "_3x1_3")
|
||||
|
||||
def forward(self, inputs):
|
||||
pool1 = self._pool(inputs)
|
||||
conv1 = self._conv1(pool1)
|
||||
|
||||
conv2 = self._conv2(inputs)
|
||||
|
||||
conv3 = self._conv3_0(inputs)
|
||||
conv3_1 = self._conv3_1(conv3)
|
||||
conv3_2 = self._conv3_2(conv3)
|
||||
|
||||
conv4 = self._conv4_0(inputs)
|
||||
conv4 = self._conv4_00(conv4)
|
||||
conv4 = self._conv4_000(conv4)
|
||||
conv4_1 = self._conv4_1(conv4)
|
||||
conv4_2 = self._conv4_2(conv4)
|
||||
|
||||
concat = paddle.concat(
|
||||
[conv1, conv2, conv3_1, conv3_2, conv4_1, conv4_2], axis=1)
|
||||
|
||||
return concat
|
||||
|
||||
|
||||
class InceptionV4DY(nn.Layer):
|
||||
def __init__(self, class_num=1000):
|
||||
super(InceptionV4DY, self).__init__()
|
||||
self._inception_stem = InceptionStem()
|
||||
|
||||
self._inceptionA_1 = InceptionA(name="1")
|
||||
self._inceptionA_2 = InceptionA(name="2")
|
||||
self._inceptionA_3 = InceptionA(name="3")
|
||||
self._inceptionA_4 = InceptionA(name="4")
|
||||
self._reductionA = ReductionA()
|
||||
|
||||
self._inceptionB_1 = InceptionB(name="1")
|
||||
self._inceptionB_2 = InceptionB(name="2")
|
||||
self._inceptionB_3 = InceptionB(name="3")
|
||||
self._inceptionB_4 = InceptionB(name="4")
|
||||
self._inceptionB_5 = InceptionB(name="5")
|
||||
self._inceptionB_6 = InceptionB(name="6")
|
||||
self._inceptionB_7 = InceptionB(name="7")
|
||||
self._reductionB = ReductionB()
|
||||
|
||||
self._inceptionC_1 = InceptionC(name="1")
|
||||
self._inceptionC_2 = InceptionC(name="2")
|
||||
self._inceptionC_3 = InceptionC(name="3")
|
||||
|
||||
self.avg_pool = AdaptiveAvgPool2D(1)
|
||||
self._drop = Dropout(p=0.2, mode="downscale_in_infer")
|
||||
stdv = 1.0 / math.sqrt(1536 * 1.0)
|
||||
self.out = Linear(
|
||||
1536,
|
||||
class_num,
|
||||
weight_attr=ParamAttr(
|
||||
initializer=Uniform(-stdv, stdv), name="final_fc_weights"),
|
||||
bias_attr=ParamAttr(name="final_fc_offset"))
|
||||
|
||||
def forward(self, inputs):
|
||||
x = self._inception_stem(inputs)
|
||||
|
||||
x = self._inceptionA_1(x)
|
||||
x = self._inceptionA_2(x)
|
||||
x = self._inceptionA_3(x)
|
||||
x = self._inceptionA_4(x)
|
||||
x = self._reductionA(x)
|
||||
|
||||
x = self._inceptionB_1(x)
|
||||
x = self._inceptionB_2(x)
|
||||
x = self._inceptionB_3(x)
|
||||
x = self._inceptionB_4(x)
|
||||
x = self._inceptionB_5(x)
|
||||
x = self._inceptionB_6(x)
|
||||
x = self._inceptionB_7(x)
|
||||
x = self._reductionB(x)
|
||||
|
||||
x = self._inceptionC_1(x)
|
||||
x = self._inceptionC_2(x)
|
||||
x = self._inceptionC_3(x)
|
||||
|
||||
x = self.avg_pool(x)
|
||||
x = paddle.squeeze(x, axis=[2, 3])
|
||||
x = self._drop(x)
|
||||
x = self.out(x)
|
||||
return x
|
||||
|
||||
|
||||
def _load_pretrained(pretrained, model, model_url, use_ssld=False):
|
||||
if pretrained is False:
|
||||
pass
|
||||
elif pretrained is True:
|
||||
load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
|
||||
elif isinstance(pretrained, str):
|
||||
load_dygraph_pretrain(model, pretrained)
|
||||
else:
|
||||
raise RuntimeError(
|
||||
"pretrained type is not available. Please use `string` or `boolean` type."
|
||||
)
|
||||
|
||||
|
||||
def InceptionV4(pretrained=False, use_ssld=False, **kwargs):
|
||||
model = InceptionV4DY(**kwargs)
|
||||
_load_pretrained(
|
||||
pretrained, model, MODEL_URLS["InceptionV4"], use_ssld=use_ssld)
|
||||
return model
|
||||
@ -0,0 +1,589 @@
|
||||
# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# Code was based on https://github.com/facebookresearch/LeViT
|
||||
|
||||
import itertools
|
||||
import math
|
||||
import warnings
|
||||
|
||||
import paddle
|
||||
import paddle.nn as nn
|
||||
import paddle.nn.functional as F
|
||||
from paddle.nn.initializer import TruncatedNormal, Constant
|
||||
from paddle.regularizer import L2Decay
|
||||
|
||||
from .vision_transformer import trunc_normal_, zeros_, ones_, Identity
|
||||
|
||||
from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
|
||||
|
||||
MODEL_URLS = {
|
||||
"LeViT_128S":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/LeViT_128S_pretrained.pdparams",
|
||||
"LeViT_128":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/LeViT_128_pretrained.pdparams",
|
||||
"LeViT_192":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/LeViT_192_pretrained.pdparams",
|
||||
"LeViT_256":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/LeViT_256_pretrained.pdparams",
|
||||
"LeViT_384":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/LeViT_384_pretrained.pdparams",
|
||||
}
|
||||
|
||||
__all__ = list(MODEL_URLS.keys())
|
||||
|
||||
|
||||
def cal_attention_biases(attention_biases, attention_bias_idxs):
|
||||
gather_list = []
|
||||
attention_bias_t = paddle.transpose(attention_biases, (1, 0))
|
||||
nums = attention_bias_idxs.shape[0]
|
||||
for idx in range(nums):
|
||||
gather = paddle.gather(attention_bias_t, attention_bias_idxs[idx])
|
||||
gather_list.append(gather)
|
||||
shape0, shape1 = attention_bias_idxs.shape
|
||||
gather = paddle.concat(gather_list)
|
||||
return paddle.transpose(gather, (1, 0)).reshape((0, shape0, shape1))
|
||||
|
||||
|
||||
class Conv2d_BN(nn.Sequential):
|
||||
def __init__(self,
|
||||
a,
|
||||
b,
|
||||
ks=1,
|
||||
stride=1,
|
||||
pad=0,
|
||||
dilation=1,
|
||||
groups=1,
|
||||
bn_weight_init=1,
|
||||
resolution=-10000):
|
||||
super().__init__()
|
||||
self.add_sublayer(
|
||||
'c',
|
||||
nn.Conv2D(
|
||||
a, b, ks, stride, pad, dilation, groups, bias_attr=False))
|
||||
bn = nn.BatchNorm2D(b)
|
||||
ones_(bn.weight)
|
||||
zeros_(bn.bias)
|
||||
self.add_sublayer('bn', bn)
|
||||
|
||||
|
||||
class Linear_BN(nn.Sequential):
|
||||
def __init__(self, a, b, bn_weight_init=1):
|
||||
super().__init__()
|
||||
self.add_sublayer('c', nn.Linear(a, b, bias_attr=False))
|
||||
bn = nn.BatchNorm1D(b)
|
||||
if bn_weight_init == 0:
|
||||
zeros_(bn.weight)
|
||||
else:
|
||||
ones_(bn.weight)
|
||||
zeros_(bn.bias)
|
||||
self.add_sublayer('bn', bn)
|
||||
|
||||
def forward(self, x):
|
||||
l, bn = self._sub_layers.values()
|
||||
x = l(x)
|
||||
return paddle.reshape(bn(x.flatten(0, 1)), x.shape)
|
||||
|
||||
|
||||
class BN_Linear(nn.Sequential):
|
||||
def __init__(self, a, b, bias=True, std=0.02):
|
||||
super().__init__()
|
||||
self.add_sublayer('bn', nn.BatchNorm1D(a))
|
||||
l = nn.Linear(a, b, bias_attr=bias)
|
||||
trunc_normal_(l.weight)
|
||||
if bias:
|
||||
zeros_(l.bias)
|
||||
self.add_sublayer('l', l)
|
||||
|
||||
|
||||
def b16(n, activation, resolution=224):
|
||||
return nn.Sequential(
|
||||
Conv2d_BN(
|
||||
3, n // 8, 3, 2, 1, resolution=resolution),
|
||||
activation(),
|
||||
Conv2d_BN(
|
||||
n // 8, n // 4, 3, 2, 1, resolution=resolution // 2),
|
||||
activation(),
|
||||
Conv2d_BN(
|
||||
n // 4, n // 2, 3, 2, 1, resolution=resolution // 4),
|
||||
activation(),
|
||||
Conv2d_BN(
|
||||
n // 2, n, 3, 2, 1, resolution=resolution // 8))
|
||||
|
||||
|
||||
class Residual(nn.Layer):
|
||||
def __init__(self, m, drop):
|
||||
super().__init__()
|
||||
self.m = m
|
||||
self.drop = drop
|
||||
|
||||
def forward(self, x):
|
||||
if self.training and self.drop > 0:
|
||||
y = paddle.rand(
|
||||
shape=[x.shape[0], 1, 1]).__ge__(self.drop).astype("float32")
|
||||
y = y.divide(paddle.full_like(y, 1 - self.drop))
|
||||
return paddle.add(x, y)
|
||||
else:
|
||||
return paddle.add(x, self.m(x))
|
||||
|
||||
|
||||
class Attention(nn.Layer):
|
||||
def __init__(self,
|
||||
dim,
|
||||
key_dim,
|
||||
num_heads=8,
|
||||
attn_ratio=4,
|
||||
activation=None,
|
||||
resolution=14):
|
||||
super().__init__()
|
||||
self.num_heads = num_heads
|
||||
self.scale = key_dim**-0.5
|
||||
self.key_dim = key_dim
|
||||
self.nh_kd = nh_kd = key_dim * num_heads
|
||||
self.d = int(attn_ratio * key_dim)
|
||||
self.dh = int(attn_ratio * key_dim) * num_heads
|
||||
self.attn_ratio = attn_ratio
|
||||
self.h = self.dh + nh_kd * 2
|
||||
self.qkv = Linear_BN(dim, self.h)
|
||||
self.proj = nn.Sequential(
|
||||
activation(), Linear_BN(
|
||||
self.dh, dim, bn_weight_init=0))
|
||||
points = list(itertools.product(range(resolution), range(resolution)))
|
||||
N = len(points)
|
||||
attention_offsets = {}
|
||||
idxs = []
|
||||
for p1 in points:
|
||||
for p2 in points:
|
||||
offset = (abs(p1[0] - p2[0]), abs(p1[1] - p2[1]))
|
||||
if offset not in attention_offsets:
|
||||
attention_offsets[offset] = len(attention_offsets)
|
||||
idxs.append(attention_offsets[offset])
|
||||
self.attention_biases = self.create_parameter(
|
||||
shape=(num_heads, len(attention_offsets)),
|
||||
default_initializer=zeros_,
|
||||
attr=paddle.ParamAttr(regularizer=L2Decay(0.0)))
|
||||
tensor_idxs = paddle.to_tensor(idxs, dtype='int64')
|
||||
self.register_buffer('attention_bias_idxs',
|
||||
paddle.reshape(tensor_idxs, [N, N]))
|
||||
|
||||
@paddle.no_grad()
|
||||
def train(self, mode=True):
|
||||
if mode:
|
||||
super().train()
|
||||
else:
|
||||
super().eval()
|
||||
if mode and hasattr(self, 'ab'):
|
||||
del self.ab
|
||||
else:
|
||||
self.ab = cal_attention_biases(self.attention_biases,
|
||||
self.attention_bias_idxs)
|
||||
|
||||
def forward(self, x):
|
||||
self.training = True
|
||||
B, N, C = x.shape
|
||||
qkv = self.qkv(x)
|
||||
qkv = paddle.reshape(qkv,
|
||||
[B, N, self.num_heads, self.h // self.num_heads])
|
||||
q, k, v = paddle.split(
|
||||
qkv, [self.key_dim, self.key_dim, self.d], axis=3)
|
||||
q = paddle.transpose(q, perm=[0, 2, 1, 3])
|
||||
k = paddle.transpose(k, perm=[0, 2, 1, 3])
|
||||
v = paddle.transpose(v, perm=[0, 2, 1, 3])
|
||||
k_transpose = paddle.transpose(k, perm=[0, 1, 3, 2])
|
||||
|
||||
if self.training:
|
||||
attention_biases = cal_attention_biases(self.attention_biases,
|
||||
self.attention_bias_idxs)
|
||||
else:
|
||||
attention_biases = self.ab
|
||||
attn = (paddle.matmul(q, k_transpose) * self.scale + attention_biases)
|
||||
attn = F.softmax(attn)
|
||||
x = paddle.transpose(paddle.matmul(attn, v), perm=[0, 2, 1, 3])
|
||||
x = paddle.reshape(x, [B, N, self.dh])
|
||||
x = self.proj(x)
|
||||
return x
|
||||
|
||||
|
||||
class Subsample(nn.Layer):
|
||||
def __init__(self, stride, resolution):
|
||||
super().__init__()
|
||||
self.stride = stride
|
||||
self.resolution = resolution
|
||||
|
||||
def forward(self, x):
|
||||
B, N, C = x.shape
|
||||
x = paddle.reshape(x, [B, self.resolution, self.resolution, C])
|
||||
end1, end2 = x.shape[1], x.shape[2]
|
||||
x = x[:, 0:end1:self.stride, 0:end2:self.stride]
|
||||
x = paddle.reshape(x, [B, -1, C])
|
||||
return x
|
||||
|
||||
|
||||
class AttentionSubsample(nn.Layer):
|
||||
def __init__(self,
|
||||
in_dim,
|
||||
out_dim,
|
||||
key_dim,
|
||||
num_heads=8,
|
||||
attn_ratio=2,
|
||||
activation=None,
|
||||
stride=2,
|
||||
resolution=14,
|
||||
resolution_=7):
|
||||
super().__init__()
|
||||
self.num_heads = num_heads
|
||||
self.scale = key_dim**-0.5
|
||||
self.key_dim = key_dim
|
||||
self.nh_kd = nh_kd = key_dim * num_heads
|
||||
self.d = int(attn_ratio * key_dim)
|
||||
self.dh = int(attn_ratio * key_dim) * self.num_heads
|
||||
self.attn_ratio = attn_ratio
|
||||
self.resolution_ = resolution_
|
||||
self.resolution_2 = resolution_**2
|
||||
self.training = True
|
||||
h = self.dh + nh_kd
|
||||
self.kv = Linear_BN(in_dim, h)
|
||||
|
||||
self.q = nn.Sequential(
|
||||
Subsample(stride, resolution), Linear_BN(in_dim, nh_kd))
|
||||
self.proj = nn.Sequential(activation(), Linear_BN(self.dh, out_dim))
|
||||
|
||||
self.stride = stride
|
||||
self.resolution = resolution
|
||||
points = list(itertools.product(range(resolution), range(resolution)))
|
||||
points_ = list(
|
||||
itertools.product(range(resolution_), range(resolution_)))
|
||||
|
||||
N = len(points)
|
||||
N_ = len(points_)
|
||||
attention_offsets = {}
|
||||
idxs = []
|
||||
i = 0
|
||||
j = 0
|
||||
for p1 in points_:
|
||||
i += 1
|
||||
for p2 in points:
|
||||
j += 1
|
||||
size = 1
|
||||
offset = (abs(p1[0] * stride - p2[0] + (size - 1) / 2),
|
||||
abs(p1[1] * stride - p2[1] + (size - 1) / 2))
|
||||
if offset not in attention_offsets:
|
||||
attention_offsets[offset] = len(attention_offsets)
|
||||
idxs.append(attention_offsets[offset])
|
||||
self.attention_biases = self.create_parameter(
|
||||
shape=(num_heads, len(attention_offsets)),
|
||||
default_initializer=zeros_,
|
||||
attr=paddle.ParamAttr(regularizer=L2Decay(0.0)))
|
||||
|
||||
tensor_idxs_ = paddle.to_tensor(idxs, dtype='int64')
|
||||
self.register_buffer('attention_bias_idxs',
|
||||
paddle.reshape(tensor_idxs_, [N_, N]))
|
||||
|
||||
@paddle.no_grad()
|
||||
def train(self, mode=True):
|
||||
if mode:
|
||||
super().train()
|
||||
else:
|
||||
super().eval()
|
||||
if mode and hasattr(self, 'ab'):
|
||||
del self.ab
|
||||
else:
|
||||
self.ab = cal_attention_biases(self.attention_biases,
|
||||
self.attention_bias_idxs)
|
||||
|
||||
def forward(self, x):
|
||||
self.training = True
|
||||
B, N, C = x.shape
|
||||
kv = self.kv(x)
|
||||
kv = paddle.reshape(kv, [B, N, self.num_heads, -1])
|
||||
k, v = paddle.split(kv, [self.key_dim, self.d], axis=3)
|
||||
k = paddle.transpose(k, perm=[0, 2, 1, 3]) # BHNC
|
||||
v = paddle.transpose(v, perm=[0, 2, 1, 3])
|
||||
q = paddle.reshape(
|
||||
self.q(x), [B, self.resolution_2, self.num_heads, self.key_dim])
|
||||
q = paddle.transpose(q, perm=[0, 2, 1, 3])
|
||||
|
||||
if self.training:
|
||||
attention_biases = cal_attention_biases(self.attention_biases,
|
||||
self.attention_bias_idxs)
|
||||
else:
|
||||
attention_biases = self.ab
|
||||
|
||||
attn = (paddle.matmul(
|
||||
q, paddle.transpose(
|
||||
k, perm=[0, 1, 3, 2]))) * self.scale + attention_biases
|
||||
attn = F.softmax(attn)
|
||||
|
||||
x = paddle.reshape(
|
||||
paddle.transpose(
|
||||
paddle.matmul(attn, v), perm=[0, 2, 1, 3]), [B, -1, self.dh])
|
||||
x = self.proj(x)
|
||||
return x
|
||||
|
||||
|
||||
class LeViT(nn.Layer):
|
||||
""" Vision Transformer with support for patch or hybrid CNN input stage
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
img_size=224,
|
||||
patch_size=16,
|
||||
in_chans=3,
|
||||
class_num=1000,
|
||||
embed_dim=[192],
|
||||
key_dim=[64],
|
||||
depth=[12],
|
||||
num_heads=[3],
|
||||
attn_ratio=[2],
|
||||
mlp_ratio=[2],
|
||||
hybrid_backbone=None,
|
||||
down_ops=[],
|
||||
attention_activation=nn.Hardswish,
|
||||
mlp_activation=nn.Hardswish,
|
||||
distillation=True,
|
||||
drop_path=0):
|
||||
super().__init__()
|
||||
|
||||
self.class_num = class_num
|
||||
self.num_features = embed_dim[-1]
|
||||
self.embed_dim = embed_dim
|
||||
self.distillation = distillation
|
||||
|
||||
self.patch_embed = hybrid_backbone
|
||||
|
||||
self.blocks = []
|
||||
down_ops.append([''])
|
||||
resolution = img_size // patch_size
|
||||
for i, (ed, kd, dpth, nh, ar, mr, do) in enumerate(
|
||||
zip(embed_dim, key_dim, depth, num_heads, attn_ratio,
|
||||
mlp_ratio, down_ops)):
|
||||
for _ in range(dpth):
|
||||
self.blocks.append(
|
||||
Residual(
|
||||
Attention(
|
||||
ed,
|
||||
kd,
|
||||
nh,
|
||||
attn_ratio=ar,
|
||||
activation=attention_activation,
|
||||
resolution=resolution, ),
|
||||
drop_path))
|
||||
if mr > 0:
|
||||
h = int(ed * mr)
|
||||
self.blocks.append(
|
||||
Residual(
|
||||
nn.Sequential(
|
||||
Linear_BN(ed, h),
|
||||
mlp_activation(),
|
||||
Linear_BN(
|
||||
h, ed, bn_weight_init=0), ),
|
||||
drop_path))
|
||||
if do[0] == 'Subsample':
|
||||
#('Subsample',key_dim, num_heads, attn_ratio, mlp_ratio, stride)
|
||||
resolution_ = (resolution - 1) // do[5] + 1
|
||||
self.blocks.append(
|
||||
AttentionSubsample(
|
||||
*embed_dim[i:i + 2],
|
||||
key_dim=do[1],
|
||||
num_heads=do[2],
|
||||
attn_ratio=do[3],
|
||||
activation=attention_activation,
|
||||
stride=do[5],
|
||||
resolution=resolution,
|
||||
resolution_=resolution_))
|
||||
resolution = resolution_
|
||||
if do[4] > 0: # mlp_ratio
|
||||
h = int(embed_dim[i + 1] * do[4])
|
||||
self.blocks.append(
|
||||
Residual(
|
||||
nn.Sequential(
|
||||
Linear_BN(embed_dim[i + 1], h),
|
||||
mlp_activation(),
|
||||
Linear_BN(
|
||||
h, embed_dim[i + 1], bn_weight_init=0), ),
|
||||
drop_path))
|
||||
self.blocks = nn.Sequential(*self.blocks)
|
||||
|
||||
# Classifier head
|
||||
self.head = BN_Linear(embed_dim[-1],
|
||||
class_num) if class_num > 0 else Identity()
|
||||
if distillation:
|
||||
self.head_dist = BN_Linear(
|
||||
embed_dim[-1], class_num) if class_num > 0 else Identity()
|
||||
|
||||
def forward(self, x):
|
||||
x = self.patch_embed(x)
|
||||
x = x.flatten(2)
|
||||
x = paddle.transpose(x, perm=[0, 2, 1])
|
||||
x = self.blocks(x)
|
||||
x = x.mean(1)
|
||||
|
||||
x = paddle.reshape(x, [-1, self.embed_dim[-1]])
|
||||
if self.distillation:
|
||||
x = self.head(x), self.head_dist(x)
|
||||
if not self.training:
|
||||
x = (x[0] + x[1]) / 2
|
||||
else:
|
||||
x = self.head(x)
|
||||
return x
|
||||
|
||||
|
||||
def model_factory(C, D, X, N, drop_path, class_num, distillation):
|
||||
embed_dim = [int(x) for x in C.split('_')]
|
||||
num_heads = [int(x) for x in N.split('_')]
|
||||
depth = [int(x) for x in X.split('_')]
|
||||
act = nn.Hardswish
|
||||
model = LeViT(
|
||||
patch_size=16,
|
||||
embed_dim=embed_dim,
|
||||
num_heads=num_heads,
|
||||
key_dim=[D] * 3,
|
||||
depth=depth,
|
||||
attn_ratio=[2, 2, 2],
|
||||
mlp_ratio=[2, 2, 2],
|
||||
down_ops=[
|
||||
#('Subsample',key_dim, num_heads, attn_ratio, mlp_ratio, stride)
|
||||
['Subsample', D, embed_dim[0] // D, 4, 2, 2],
|
||||
['Subsample', D, embed_dim[1] // D, 4, 2, 2],
|
||||
],
|
||||
attention_activation=act,
|
||||
mlp_activation=act,
|
||||
hybrid_backbone=b16(embed_dim[0], activation=act),
|
||||
class_num=class_num,
|
||||
drop_path=drop_path,
|
||||
distillation=distillation)
|
||||
|
||||
return model
|
||||
|
||||
|
||||
specification = {
|
||||
'LeViT_128S': {
|
||||
'C': '128_256_384',
|
||||
'D': 16,
|
||||
'N': '4_6_8',
|
||||
'X': '2_3_4',
|
||||
'drop_path': 0
|
||||
},
|
||||
'LeViT_128': {
|
||||
'C': '128_256_384',
|
||||
'D': 16,
|
||||
'N': '4_8_12',
|
||||
'X': '4_4_4',
|
||||
'drop_path': 0
|
||||
},
|
||||
'LeViT_192': {
|
||||
'C': '192_288_384',
|
||||
'D': 32,
|
||||
'N': '3_5_6',
|
||||
'X': '4_4_4',
|
||||
'drop_path': 0
|
||||
},
|
||||
'LeViT_256': {
|
||||
'C': '256_384_512',
|
||||
'D': 32,
|
||||
'N': '4_6_8',
|
||||
'X': '4_4_4',
|
||||
'drop_path': 0
|
||||
},
|
||||
'LeViT_384': {
|
||||
'C': '384_512_768',
|
||||
'D': 32,
|
||||
'N': '6_9_12',
|
||||
'X': '4_4_4',
|
||||
'drop_path': 0.1
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def _load_pretrained(pretrained, model, model_url, use_ssld=False):
|
||||
if pretrained is False:
|
||||
pass
|
||||
elif pretrained is True:
|
||||
load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
|
||||
elif isinstance(pretrained, str):
|
||||
load_dygraph_pretrain(model, pretrained)
|
||||
else:
|
||||
raise RuntimeError(
|
||||
"pretrained type is not available. Please use `string` or `boolean` type."
|
||||
)
|
||||
|
||||
|
||||
def LeViT_128S(pretrained=False,
|
||||
use_ssld=False,
|
||||
class_num=1000,
|
||||
distillation=False,
|
||||
**kwargs):
|
||||
model = model_factory(
|
||||
**specification['LeViT_128S'],
|
||||
class_num=class_num,
|
||||
distillation=distillation)
|
||||
_load_pretrained(
|
||||
pretrained, model, MODEL_URLS["LeViT_128S"], use_ssld=use_ssld)
|
||||
return model
|
||||
|
||||
|
||||
def LeViT_128(pretrained=False,
|
||||
use_ssld=False,
|
||||
class_num=1000,
|
||||
distillation=False,
|
||||
**kwargs):
|
||||
model = model_factory(
|
||||
**specification['LeViT_128'],
|
||||
class_num=class_num,
|
||||
distillation=distillation)
|
||||
_load_pretrained(
|
||||
pretrained, model, MODEL_URLS["LeViT_128"], use_ssld=use_ssld)
|
||||
return model
|
||||
|
||||
|
||||
def LeViT_192(pretrained=False,
|
||||
use_ssld=False,
|
||||
class_num=1000,
|
||||
distillation=False,
|
||||
**kwargs):
|
||||
model = model_factory(
|
||||
**specification['LeViT_192'],
|
||||
class_num=class_num,
|
||||
distillation=distillation)
|
||||
_load_pretrained(
|
||||
pretrained, model, MODEL_URLS["LeViT_192"], use_ssld=use_ssld)
|
||||
return model
|
||||
|
||||
|
||||
def LeViT_256(pretrained=False,
|
||||
use_ssld=False,
|
||||
class_num=1000,
|
||||
distillation=False,
|
||||
**kwargs):
|
||||
model = model_factory(
|
||||
**specification['LeViT_256'],
|
||||
class_num=class_num,
|
||||
distillation=distillation)
|
||||
_load_pretrained(
|
||||
pretrained, model, MODEL_URLS["LeViT_256"], use_ssld=use_ssld)
|
||||
return model
|
||||
|
||||
|
||||
def LeViT_384(pretrained=False,
|
||||
use_ssld=False,
|
||||
class_num=1000,
|
||||
distillation=False,
|
||||
**kwargs):
|
||||
model = model_factory(
|
||||
**specification['LeViT_384'],
|
||||
class_num=class_num,
|
||||
distillation=distillation)
|
||||
_load_pretrained(
|
||||
pretrained, model, MODEL_URLS["LeViT_384"], use_ssld=use_ssld)
|
||||
return model
|
||||
@ -0,0 +1,815 @@
|
||||
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""
|
||||
MixNet for ImageNet-1K, implemented in Paddle.
|
||||
Original paper: 'MixConv: Mixed Depthwise Convolutional Kernels,'
|
||||
https://arxiv.org/abs/1907.09595.
|
||||
"""
|
||||
|
||||
import os
|
||||
from inspect import isfunction
|
||||
from functools import reduce
|
||||
import paddle
|
||||
import paddle.nn as nn
|
||||
|
||||
from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
|
||||
|
||||
MODEL_URLS = {
|
||||
"MixNet_S":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MixNet_S_pretrained.pdparams",
|
||||
"MixNet_M":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MixNet_M_pretrained.pdparams",
|
||||
"MixNet_L":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MixNet_L_pretrained.pdparams"
|
||||
}
|
||||
|
||||
__all__ = list(MODEL_URLS.keys())
|
||||
|
||||
|
||||
class Identity(nn.Layer):
|
||||
"""
|
||||
Identity block.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
super(Identity, self).__init__()
|
||||
|
||||
def forward(self, x):
|
||||
return x
|
||||
|
||||
|
||||
def round_channels(channels, divisor=8):
|
||||
"""
|
||||
Round weighted channel number (make divisible operation).
|
||||
|
||||
Parameters:
|
||||
----------
|
||||
channels : int or float
|
||||
Original number of channels.
|
||||
divisor : int, default 8
|
||||
Alignment value.
|
||||
|
||||
Returns:
|
||||
-------
|
||||
int
|
||||
Weighted number of channels.
|
||||
"""
|
||||
rounded_channels = max(
|
||||
int(channels + divisor / 2.0) // divisor * divisor, divisor)
|
||||
if float(rounded_channels) < 0.9 * channels:
|
||||
rounded_channels += divisor
|
||||
return rounded_channels
|
||||
|
||||
|
||||
def get_activation_layer(activation):
|
||||
"""
|
||||
Create activation layer from string/function.
|
||||
|
||||
Parameters:
|
||||
----------
|
||||
activation : function, or str, or nn.Module
|
||||
Activation function or name of activation function.
|
||||
|
||||
Returns:
|
||||
-------
|
||||
nn.Module
|
||||
Activation layer.
|
||||
"""
|
||||
assert activation is not None
|
||||
if isfunction(activation):
|
||||
return activation()
|
||||
elif isinstance(activation, str):
|
||||
if activation == "relu":
|
||||
return nn.ReLU()
|
||||
elif activation == "relu6":
|
||||
return nn.ReLU6()
|
||||
elif activation == "swish":
|
||||
return nn.Swish()
|
||||
elif activation == "hswish":
|
||||
return nn.Hardswish()
|
||||
elif activation == "sigmoid":
|
||||
return nn.Sigmoid()
|
||||
elif activation == "hsigmoid":
|
||||
return nn.Hardsigmoid()
|
||||
elif activation == "identity":
|
||||
return Identity()
|
||||
else:
|
||||
raise NotImplementedError()
|
||||
else:
|
||||
assert isinstance(activation, nn.Layer)
|
||||
return activation
|
||||
|
||||
|
||||
class ConvBlock(nn.Layer):
|
||||
"""
|
||||
Standard convolution block with Batch normalization and activation.
|
||||
|
||||
Parameters:
|
||||
----------
|
||||
in_channels : int
|
||||
Number of input channels.
|
||||
out_channels : int
|
||||
Number of output channels.
|
||||
kernel_size : int or tuple/list of 2 int
|
||||
Convolution window size.
|
||||
stride : int or tuple/list of 2 int
|
||||
Strides of the convolution.
|
||||
padding : int, or tuple/list of 2 int, or tuple/list of 4 int
|
||||
Padding value for convolution layer.
|
||||
dilation : int or tuple/list of 2 int, default 1
|
||||
Dilation value for convolution layer.
|
||||
groups : int, default 1
|
||||
Number of groups.
|
||||
bias : bool, default False
|
||||
Whether the layer uses a bias vector.
|
||||
use_bn : bool, default True
|
||||
Whether to use BatchNorm layer.
|
||||
bn_eps : float, default 1e-5
|
||||
Small float added to variance in Batch norm.
|
||||
activation : function or str or None, default nn.ReLU()
|
||||
Activation function or name of activation function.
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
in_channels,
|
||||
out_channels,
|
||||
kernel_size,
|
||||
stride,
|
||||
padding,
|
||||
dilation=1,
|
||||
groups=1,
|
||||
bias=False,
|
||||
use_bn=True,
|
||||
bn_eps=1e-5,
|
||||
activation=nn.ReLU()):
|
||||
super(ConvBlock, self).__init__()
|
||||
self.activate = (activation is not None)
|
||||
self.use_bn = use_bn
|
||||
self.use_pad = (isinstance(padding, (list, tuple)) and
|
||||
(len(padding) == 4))
|
||||
|
||||
if self.use_pad:
|
||||
self.pad = padding
|
||||
self.conv = nn.Conv2D(
|
||||
in_channels=in_channels,
|
||||
out_channels=out_channels,
|
||||
kernel_size=kernel_size,
|
||||
stride=stride,
|
||||
padding=padding,
|
||||
dilation=dilation,
|
||||
groups=groups,
|
||||
bias_attr=bias,
|
||||
weight_attr=None)
|
||||
if self.use_bn:
|
||||
self.bn = nn.BatchNorm2D(num_features=out_channels, epsilon=bn_eps)
|
||||
if self.activate:
|
||||
self.activ = get_activation_layer(activation)
|
||||
|
||||
def forward(self, x):
|
||||
x = self.conv(x)
|
||||
if self.use_bn:
|
||||
x = self.bn(x)
|
||||
if self.activate:
|
||||
x = self.activ(x)
|
||||
return x
|
||||
|
||||
|
||||
class SEBlock(nn.Layer):
|
||||
def __init__(self,
|
||||
channels,
|
||||
reduction=16,
|
||||
mid_channels=None,
|
||||
round_mid=False,
|
||||
use_conv=True,
|
||||
mid_activation=nn.ReLU(),
|
||||
out_activation=nn.Sigmoid()):
|
||||
super(SEBlock, self).__init__()
|
||||
self.use_conv = use_conv
|
||||
if mid_channels is None:
|
||||
mid_channels = channels // reduction if not round_mid else round_channels(
|
||||
float(channels) / reduction)
|
||||
|
||||
self.pool = nn.AdaptiveAvgPool2D(output_size=1)
|
||||
if use_conv:
|
||||
self.conv1 = nn.Conv2D(
|
||||
in_channels=channels,
|
||||
out_channels=mid_channels,
|
||||
kernel_size=1,
|
||||
stride=1,
|
||||
groups=1,
|
||||
bias_attr=True,
|
||||
weight_attr=None)
|
||||
|
||||
else:
|
||||
self.fc1 = nn.Linear(
|
||||
in_features=channels, out_features=mid_channels)
|
||||
self.activ = get_activation_layer(mid_activation)
|
||||
if use_conv:
|
||||
self.conv2 = nn.Conv2D(
|
||||
in_channels=mid_channels,
|
||||
out_channels=channels,
|
||||
kernel_size=1,
|
||||
stride=1,
|
||||
groups=1,
|
||||
bias_attr=True,
|
||||
weight_attr=None)
|
||||
else:
|
||||
self.fc2 = nn.Linear(
|
||||
in_features=mid_channels, out_features=channels)
|
||||
self.sigmoid = get_activation_layer(out_activation)
|
||||
|
||||
def forward(self, x):
|
||||
w = self.pool(x)
|
||||
if not self.use_conv:
|
||||
w = w.reshape(shape=[w.shape[0], -1])
|
||||
w = self.conv1(w) if self.use_conv else self.fc1(w)
|
||||
w = self.activ(w)
|
||||
w = self.conv2(w) if self.use_conv else self.fc2(w)
|
||||
w = self.sigmoid(w)
|
||||
if not self.use_conv:
|
||||
w = w.unsqueeze(2).unsqueeze(3)
|
||||
x = x * w
|
||||
return x
|
||||
|
||||
|
||||
class MixConv(nn.Layer):
|
||||
"""
|
||||
Mixed convolution layer from 'MixConv: Mixed Depthwise Convolutional Kernels,'
|
||||
https://arxiv.org/abs/1907.09595.
|
||||
|
||||
Parameters:
|
||||
----------
|
||||
in_channels : int
|
||||
Number of input channels.
|
||||
out_channels : int
|
||||
Number of output channels.
|
||||
kernel_size : int or tuple/list of int, or tuple/list of tuple/list of 2 int
|
||||
Convolution window size.
|
||||
stride : int or tuple/list of 2 int
|
||||
Strides of the convolution.
|
||||
padding : int or tuple/list of int, or tuple/list of tuple/list of 2 int
|
||||
Padding value for convolution layer.
|
||||
dilation : int or tuple/list of 2 int, default 1
|
||||
Dilation value for convolution layer.
|
||||
groups : int, default 1
|
||||
Number of groups.
|
||||
bias : bool, default False
|
||||
Whether the layer uses a bias vector.
|
||||
axis : int, default 1
|
||||
The axis on which to concatenate the outputs.
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
in_channels,
|
||||
out_channels,
|
||||
kernel_size,
|
||||
stride,
|
||||
padding,
|
||||
dilation=1,
|
||||
groups=1,
|
||||
bias=False,
|
||||
axis=1):
|
||||
super(MixConv, self).__init__()
|
||||
kernel_size = kernel_size if isinstance(kernel_size,
|
||||
list) else [kernel_size]
|
||||
padding = padding if isinstance(padding, list) else [padding]
|
||||
kernel_count = len(kernel_size)
|
||||
self.splitted_in_channels = self.split_channels(in_channels,
|
||||
kernel_count)
|
||||
splitted_out_channels = self.split_channels(out_channels, kernel_count)
|
||||
for i, kernel_size_i in enumerate(kernel_size):
|
||||
in_channels_i = self.splitted_in_channels[i]
|
||||
out_channels_i = splitted_out_channels[i]
|
||||
padding_i = padding[i]
|
||||
_ = self.add_sublayer(
|
||||
name=str(i),
|
||||
sublayer=nn.Conv2D(
|
||||
in_channels=in_channels_i,
|
||||
out_channels=out_channels_i,
|
||||
kernel_size=kernel_size_i,
|
||||
stride=stride,
|
||||
padding=padding_i,
|
||||
dilation=dilation,
|
||||
groups=(out_channels_i
|
||||
if out_channels == groups else groups),
|
||||
bias_attr=bias,
|
||||
weight_attr=None))
|
||||
self.axis = axis
|
||||
|
||||
def forward(self, x):
|
||||
xx = paddle.split(x, self.splitted_in_channels, axis=self.axis)
|
||||
xx = paddle.split(x, self.splitted_in_channels, axis=self.axis)
|
||||
out = [
|
||||
conv_i(x_i) for x_i, conv_i in zip(xx, self._sub_layers.values())
|
||||
]
|
||||
x = paddle.concat(tuple(out), axis=self.axis)
|
||||
return x
|
||||
|
||||
@staticmethod
|
||||
def split_channels(channels, kernel_count):
|
||||
splitted_channels = [channels // kernel_count] * kernel_count
|
||||
splitted_channels[0] += channels - sum(splitted_channels)
|
||||
return splitted_channels
|
||||
|
||||
|
||||
class MixConvBlock(nn.Layer):
|
||||
"""
|
||||
Mixed convolution block with Batch normalization and activation.
|
||||
|
||||
Parameters:
|
||||
----------
|
||||
in_channels : int
|
||||
Number of input channels.
|
||||
out_channels : int
|
||||
Number of output channels.
|
||||
kernel_size : int or tuple/list of int, or tuple/list of tuple/list of 2 int
|
||||
Convolution window size.
|
||||
stride : int or tuple/list of 2 int
|
||||
Strides of the convolution.
|
||||
padding : int or tuple/list of int, or tuple/list of tuple/list of 2 int
|
||||
Padding value for convolution layer.
|
||||
dilation : int or tuple/list of 2 int, default 1
|
||||
Dilation value for convolution layer.
|
||||
groups : int, default 1
|
||||
Number of groups.
|
||||
bias : bool, default False
|
||||
Whether the layer uses a bias vector.
|
||||
use_bn : bool, default True
|
||||
Whether to use BatchNorm layer.
|
||||
bn_eps : float, default 1e-5
|
||||
Small float added to variance in Batch norm.
|
||||
activation : function or str or None, default nn.ReLU()
|
||||
Activation function or name of activation function.
|
||||
activate : bool, default True
|
||||
Whether activate the convolution block.
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
in_channels,
|
||||
out_channels,
|
||||
kernel_size,
|
||||
stride,
|
||||
padding,
|
||||
dilation=1,
|
||||
groups=1,
|
||||
bias=False,
|
||||
use_bn=True,
|
||||
bn_eps=1e-5,
|
||||
activation=nn.ReLU()):
|
||||
super(MixConvBlock, self).__init__()
|
||||
self.activate = (activation is not None)
|
||||
self.use_bn = use_bn
|
||||
|
||||
self.conv = MixConv(
|
||||
in_channels=in_channels,
|
||||
out_channels=out_channels,
|
||||
kernel_size=kernel_size,
|
||||
stride=stride,
|
||||
padding=padding,
|
||||
dilation=dilation,
|
||||
groups=groups,
|
||||
bias=bias)
|
||||
if self.use_bn:
|
||||
self.bn = nn.BatchNorm2D(num_features=out_channels, epsilon=bn_eps)
|
||||
if self.activate:
|
||||
self.activ = get_activation_layer(activation)
|
||||
|
||||
def forward(self, x):
|
||||
x = self.conv(x)
|
||||
if self.use_bn:
|
||||
x = self.bn(x)
|
||||
if self.activate:
|
||||
x = self.activ(x)
|
||||
return x
|
||||
|
||||
|
||||
def mixconv1x1_block(in_channels,
|
||||
out_channels,
|
||||
kernel_count,
|
||||
stride=1,
|
||||
groups=1,
|
||||
bias=False,
|
||||
use_bn=True,
|
||||
bn_eps=1e-5,
|
||||
activation=nn.ReLU()):
|
||||
"""
|
||||
1x1 version of the mixed convolution block.
|
||||
|
||||
Parameters:
|
||||
----------
|
||||
in_channels : int
|
||||
Number of input channels.
|
||||
out_channels : int
|
||||
Number of output channels.
|
||||
kernel_count : int
|
||||
Kernel count.
|
||||
stride : int or tuple/list of 2 int, default 1
|
||||
Strides of the convolution.
|
||||
groups : int, default 1
|
||||
Number of groups.
|
||||
bias : bool, default False
|
||||
Whether the layer uses a bias vector.
|
||||
use_bn : bool, default True
|
||||
Whether to use BatchNorm layer.
|
||||
bn_eps : float, default 1e-5
|
||||
Small float added to variance in Batch norm.
|
||||
activation : function or str, or None, default nn.ReLU()
|
||||
Activation function or name of activation function.
|
||||
"""
|
||||
return MixConvBlock(
|
||||
in_channels=in_channels,
|
||||
out_channels=out_channels,
|
||||
kernel_size=([1] * kernel_count),
|
||||
stride=stride,
|
||||
padding=([0] * kernel_count),
|
||||
groups=groups,
|
||||
bias=bias,
|
||||
use_bn=use_bn,
|
||||
bn_eps=bn_eps,
|
||||
activation=activation)
|
||||
|
||||
|
||||
class MixUnit(nn.Layer):
|
||||
"""
|
||||
MixNet unit.
|
||||
|
||||
Parameters:
|
||||
----------
|
||||
in_channels : int
|
||||
Number of input channels.
|
||||
out_channels : int
|
||||
Number of output channels. exp_channels : int
|
||||
Number of middle (expanded) channels.
|
||||
stride : int or tuple/list of 2 int
|
||||
Strides of the second convolution layer.
|
||||
exp_kernel_count : int
|
||||
Expansion convolution kernel count for each unit.
|
||||
conv1_kernel_count : int
|
||||
Conv1 kernel count for each unit.
|
||||
conv2_kernel_count : int
|
||||
Conv2 kernel count for each unit.
|
||||
exp_factor : int
|
||||
Expansion factor for each unit.
|
||||
se_factor : int
|
||||
SE reduction factor for each unit.
|
||||
activation : str
|
||||
Activation function or name of activation function.
|
||||
"""
|
||||
|
||||
def __init__(self, in_channels, out_channels, stride, exp_kernel_count,
|
||||
conv1_kernel_count, conv2_kernel_count, exp_factor, se_factor,
|
||||
activation):
|
||||
super(MixUnit, self).__init__()
|
||||
assert exp_factor >= 1
|
||||
assert se_factor >= 0
|
||||
self.residual = (in_channels == out_channels) and (stride == 1)
|
||||
self.use_se = se_factor > 0
|
||||
mid_channels = exp_factor * in_channels
|
||||
self.use_exp_conv = exp_factor > 1
|
||||
|
||||
if self.use_exp_conv:
|
||||
if exp_kernel_count == 1:
|
||||
self.exp_conv = ConvBlock(
|
||||
in_channels=in_channels,
|
||||
out_channels=mid_channels,
|
||||
kernel_size=1,
|
||||
stride=1,
|
||||
padding=0,
|
||||
groups=1,
|
||||
bias=False,
|
||||
use_bn=True,
|
||||
bn_eps=1e-5,
|
||||
activation=activation)
|
||||
else:
|
||||
self.exp_conv = mixconv1x1_block(
|
||||
in_channels=in_channels,
|
||||
out_channels=mid_channels,
|
||||
kernel_count=exp_kernel_count,
|
||||
activation=activation)
|
||||
if conv1_kernel_count == 1:
|
||||
self.conv1 = ConvBlock(
|
||||
in_channels=mid_channels,
|
||||
out_channels=mid_channels,
|
||||
kernel_size=3,
|
||||
stride=stride,
|
||||
padding=1,
|
||||
dilation=1,
|
||||
groups=mid_channels,
|
||||
bias=False,
|
||||
use_bn=True,
|
||||
bn_eps=1e-5,
|
||||
activation=activation)
|
||||
else:
|
||||
self.conv1 = MixConvBlock(
|
||||
in_channels=mid_channels,
|
||||
out_channels=mid_channels,
|
||||
kernel_size=[3 + 2 * i for i in range(conv1_kernel_count)],
|
||||
stride=stride,
|
||||
padding=[1 + i for i in range(conv1_kernel_count)],
|
||||
groups=mid_channels,
|
||||
activation=activation)
|
||||
if self.use_se:
|
||||
self.se = SEBlock(
|
||||
channels=mid_channels,
|
||||
reduction=(exp_factor * se_factor),
|
||||
round_mid=False,
|
||||
mid_activation=activation)
|
||||
if conv2_kernel_count == 1:
|
||||
self.conv2 = ConvBlock(
|
||||
in_channels=mid_channels,
|
||||
out_channels=out_channels,
|
||||
activation=None,
|
||||
kernel_size=1,
|
||||
stride=1,
|
||||
padding=0,
|
||||
groups=1,
|
||||
bias=False,
|
||||
use_bn=True,
|
||||
bn_eps=1e-5)
|
||||
else:
|
||||
self.conv2 = mixconv1x1_block(
|
||||
in_channels=mid_channels,
|
||||
out_channels=out_channels,
|
||||
kernel_count=conv2_kernel_count,
|
||||
activation=None)
|
||||
|
||||
def forward(self, x):
|
||||
if self.residual:
|
||||
identity = x
|
||||
if self.use_exp_conv:
|
||||
x = self.exp_conv(x)
|
||||
x = self.conv1(x)
|
||||
if self.use_se:
|
||||
x = self.se(x)
|
||||
x = self.conv2(x)
|
||||
if self.residual:
|
||||
x = x + identity
|
||||
return x
|
||||
|
||||
|
||||
class MixInitBlock(nn.Layer):
|
||||
"""
|
||||
MixNet specific initial block.
|
||||
|
||||
Parameters:
|
||||
----------
|
||||
in_channels : int
|
||||
Number of input channels.
|
||||
out_channels : int
|
||||
Number of output channels.
|
||||
"""
|
||||
|
||||
def __init__(self, in_channels, out_channels):
|
||||
super(MixInitBlock, self).__init__()
|
||||
self.conv1 = ConvBlock(
|
||||
in_channels=in_channels,
|
||||
out_channels=out_channels,
|
||||
stride=2,
|
||||
kernel_size=3,
|
||||
padding=1)
|
||||
self.conv2 = MixUnit(
|
||||
in_channels=out_channels,
|
||||
out_channels=out_channels,
|
||||
stride=1,
|
||||
exp_kernel_count=1,
|
||||
conv1_kernel_count=1,
|
||||
conv2_kernel_count=1,
|
||||
exp_factor=1,
|
||||
se_factor=0,
|
||||
activation="relu")
|
||||
|
||||
def forward(self, x):
|
||||
x = self.conv1(x)
|
||||
x = self.conv2(x)
|
||||
return x
|
||||
|
||||
|
||||
class MixNet(nn.Layer):
|
||||
"""
|
||||
MixNet model from 'MixConv: Mixed Depthwise Convolutional Kernels,'
|
||||
https://arxiv.org/abs/1907.09595.
|
||||
|
||||
Parameters:
|
||||
----------
|
||||
channels : list of list of int
|
||||
Number of output channels for each unit.
|
||||
init_block_channels : int
|
||||
Number of output channels for the initial unit.
|
||||
final_block_channels : int
|
||||
Number of output channels for the final block of the feature extractor.
|
||||
exp_kernel_counts : list of list of int
|
||||
Expansion convolution kernel count for each unit.
|
||||
conv1_kernel_counts : list of list of int
|
||||
Conv1 kernel count for each unit.
|
||||
conv2_kernel_counts : list of list of int
|
||||
Conv2 kernel count for each unit.
|
||||
exp_factors : list of list of int
|
||||
Expansion factor for each unit.
|
||||
se_factors : list of list of int
|
||||
SE reduction factor for each unit.
|
||||
in_channels : int, default 3
|
||||
Number of input channels.
|
||||
in_size : tuple of two ints, default (224, 224)
|
||||
Spatial size of the expected input image.
|
||||
class_num : int, default 1000
|
||||
Number of classification classes.
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
channels,
|
||||
init_block_channels,
|
||||
final_block_channels,
|
||||
exp_kernel_counts,
|
||||
conv1_kernel_counts,
|
||||
conv2_kernel_counts,
|
||||
exp_factors,
|
||||
se_factors,
|
||||
in_channels=3,
|
||||
in_size=(224, 224),
|
||||
class_num=1000):
|
||||
super(MixNet, self).__init__()
|
||||
self.in_size = in_size
|
||||
self.class_num = class_num
|
||||
|
||||
self.features = nn.Sequential()
|
||||
self.features.add_sublayer(
|
||||
"init_block",
|
||||
MixInitBlock(
|
||||
in_channels=in_channels, out_channels=init_block_channels))
|
||||
in_channels = init_block_channels
|
||||
for i, channels_per_stage in enumerate(channels):
|
||||
stage = nn.Sequential()
|
||||
for j, out_channels in enumerate(channels_per_stage):
|
||||
stride = 2 if ((j == 0) and (i != 3)) or (
|
||||
(j == len(channels_per_stage) // 2) and (i == 3)) else 1
|
||||
exp_kernel_count = exp_kernel_counts[i][j]
|
||||
conv1_kernel_count = conv1_kernel_counts[i][j]
|
||||
conv2_kernel_count = conv2_kernel_counts[i][j]
|
||||
exp_factor = exp_factors[i][j]
|
||||
se_factor = se_factors[i][j]
|
||||
activation = "relu" if i == 0 else "swish"
|
||||
stage.add_sublayer(
|
||||
"unit{}".format(j + 1),
|
||||
MixUnit(
|
||||
in_channels=in_channels,
|
||||
out_channels=out_channels,
|
||||
stride=stride,
|
||||
exp_kernel_count=exp_kernel_count,
|
||||
conv1_kernel_count=conv1_kernel_count,
|
||||
conv2_kernel_count=conv2_kernel_count,
|
||||
exp_factor=exp_factor,
|
||||
se_factor=se_factor,
|
||||
activation=activation))
|
||||
in_channels = out_channels
|
||||
self.features.add_sublayer("stage{}".format(i + 1), stage)
|
||||
self.features.add_sublayer(
|
||||
"final_block",
|
||||
ConvBlock(
|
||||
in_channels=in_channels,
|
||||
out_channels=final_block_channels,
|
||||
kernel_size=1,
|
||||
stride=1,
|
||||
padding=0,
|
||||
groups=1,
|
||||
bias=False,
|
||||
use_bn=True,
|
||||
bn_eps=1e-5,
|
||||
activation=nn.ReLU()))
|
||||
in_channels = final_block_channels
|
||||
self.features.add_sublayer(
|
||||
"final_pool", nn.AvgPool2D(
|
||||
kernel_size=7, stride=1))
|
||||
|
||||
self.output = nn.Linear(
|
||||
in_features=in_channels, out_features=class_num)
|
||||
|
||||
def forward(self, x):
|
||||
x = self.features(x)
|
||||
reshape_dim = reduce(lambda x, y: x * y, x.shape[1:])
|
||||
x = x.reshape(shape=[x.shape[0], reshape_dim])
|
||||
x = self.output(x)
|
||||
return x
|
||||
|
||||
|
||||
def get_mixnet(version, width_scale, model_name=None, **kwargs):
|
||||
"""
|
||||
Create MixNet model with specific parameters.
|
||||
|
||||
Parameters:
|
||||
----------
|
||||
version : str
|
||||
Version of MobileNetV3 ('s' or 'm').
|
||||
width_scale : float
|
||||
Scale factor for width of layers.
|
||||
model_name : str or None, default None
|
||||
Model name.
|
||||
"""
|
||||
|
||||
if version == "s":
|
||||
init_block_channels = 16
|
||||
channels = [[24, 24], [40, 40, 40, 40], [80, 80, 80],
|
||||
[120, 120, 120, 200, 200, 200]]
|
||||
exp_kernel_counts = [[2, 2], [1, 2, 2, 2], [1, 1, 1],
|
||||
[2, 2, 2, 1, 1, 1]]
|
||||
conv1_kernel_counts = [[1, 1], [3, 2, 2, 2], [3, 2, 2],
|
||||
[3, 4, 4, 5, 4, 4]]
|
||||
conv2_kernel_counts = [[2, 2], [1, 2, 2, 2], [2, 2, 2],
|
||||
[2, 2, 2, 1, 2, 2]]
|
||||
exp_factors = [[6, 3], [6, 6, 6, 6], [6, 6, 6], [6, 3, 3, 6, 6, 6]]
|
||||
se_factors = [[0, 0], [2, 2, 2, 2], [4, 4, 4], [2, 2, 2, 2, 2, 2]]
|
||||
elif version == "m":
|
||||
init_block_channels = 24
|
||||
channels = [[32, 32], [40, 40, 40, 40], [80, 80, 80, 80],
|
||||
[120, 120, 120, 120, 200, 200, 200, 200]]
|
||||
exp_kernel_counts = [[2, 2], [1, 2, 2, 2], [1, 2, 2, 2],
|
||||
[1, 2, 2, 2, 1, 1, 1, 1]]
|
||||
conv1_kernel_counts = [[3, 1], [4, 2, 2, 2], [3, 4, 4, 4],
|
||||
[1, 4, 4, 4, 4, 4, 4, 4]]
|
||||
conv2_kernel_counts = [[2, 2], [1, 2, 2, 2], [1, 2, 2, 2],
|
||||
[1, 2, 2, 2, 1, 2, 2, 2]]
|
||||
exp_factors = [[6, 3], [6, 6, 6, 6], [6, 6, 6, 6],
|
||||
[6, 3, 3, 3, 6, 6, 6, 6]]
|
||||
se_factors = [[0, 0], [2, 2, 2, 2], [4, 4, 4, 4],
|
||||
[2, 2, 2, 2, 2, 2, 2, 2]]
|
||||
else:
|
||||
raise ValueError("Unsupported MixNet version {}".format(version))
|
||||
|
||||
final_block_channels = 1536
|
||||
|
||||
if width_scale != 1.0:
|
||||
channels = [[round_channels(cij * width_scale) for cij in ci]
|
||||
for ci in channels]
|
||||
init_block_channels = round_channels(init_block_channels * width_scale)
|
||||
|
||||
net = MixNet(
|
||||
channels=channels,
|
||||
init_block_channels=init_block_channels,
|
||||
final_block_channels=final_block_channels,
|
||||
exp_kernel_counts=exp_kernel_counts,
|
||||
conv1_kernel_counts=conv1_kernel_counts,
|
||||
conv2_kernel_counts=conv2_kernel_counts,
|
||||
exp_factors=exp_factors,
|
||||
se_factors=se_factors,
|
||||
**kwargs)
|
||||
|
||||
return net
|
||||
|
||||
|
||||
def _load_pretrained(pretrained, model, model_url, use_ssld=False):
|
||||
if pretrained is False:
|
||||
pass
|
||||
elif pretrained is True:
|
||||
load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
|
||||
elif isinstance(pretrained, str):
|
||||
load_dygraph_pretrain(model, pretrained)
|
||||
else:
|
||||
raise RuntimeError(
|
||||
"pretrained type is not available. Please use `string` or `boolean` type."
|
||||
)
|
||||
|
||||
|
||||
def MixNet_S(pretrained=False, use_ssld=False, **kwargs):
|
||||
"""
|
||||
MixNet-S model from 'MixConv: Mixed Depthwise Convolutional Kernels,'
|
||||
https://arxiv.org/abs/1907.09595.
|
||||
"""
|
||||
model = get_mixnet(
|
||||
version="s", width_scale=1.0, model_name="MixNet_S", **kwargs)
|
||||
_load_pretrained(
|
||||
pretrained, model, MODEL_URLS["MixNet_S"], use_ssld=use_ssld)
|
||||
return model
|
||||
|
||||
|
||||
def MixNet_M(pretrained=False, use_ssld=False, **kwargs):
|
||||
"""
|
||||
MixNet-M model from 'MixConv: Mixed Depthwise Convolutional Kernels,'
|
||||
https://arxiv.org/abs/1907.09595.
|
||||
"""
|
||||
model = get_mixnet(
|
||||
version="m", width_scale=1.0, model_name="MixNet_M", **kwargs)
|
||||
_load_pretrained(
|
||||
pretrained, model, MODEL_URLS["MixNet_M"], use_ssld=use_ssld)
|
||||
return model
|
||||
|
||||
|
||||
def MixNet_L(pretrained=False, use_ssld=False, **kwargs):
|
||||
"""
|
||||
MixNet-S model from 'MixConv: Mixed Depthwise Convolutional Kernels,'
|
||||
https://arxiv.org/abs/1907.09595.
|
||||
"""
|
||||
model = get_mixnet(
|
||||
version="m", width_scale=1.3, model_name="MixNet_L", **kwargs)
|
||||
_load_pretrained(
|
||||
pretrained, model, MODEL_URLS["MixNet_L"], use_ssld=use_ssld)
|
||||
return model
|
||||
@ -0,0 +1,287 @@
|
||||
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import numpy as np
|
||||
import paddle
|
||||
from paddle import ParamAttr
|
||||
import paddle.nn as nn
|
||||
import paddle.nn.functional as F
|
||||
from paddle.nn import Conv2D, BatchNorm, Linear, Dropout
|
||||
from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D
|
||||
|
||||
import math
|
||||
|
||||
from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
|
||||
|
||||
MODEL_URLS = {
|
||||
"MobileNetV2_x0_25":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV2_x0_25_pretrained.pdparams",
|
||||
"MobileNetV2_x0_5":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV2_x0_5_pretrained.pdparams",
|
||||
"MobileNetV2_x0_75":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV2_x0_75_pretrained.pdparams",
|
||||
"MobileNetV2":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV2_pretrained.pdparams",
|
||||
"MobileNetV2_x1_5":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV2_x1_5_pretrained.pdparams",
|
||||
"MobileNetV2_x2_0":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV2_x2_0_pretrained.pdparams"
|
||||
}
|
||||
|
||||
__all__ = list(MODEL_URLS.keys())
|
||||
|
||||
|
||||
class ConvBNLayer(nn.Layer):
|
||||
def __init__(self,
|
||||
num_channels,
|
||||
filter_size,
|
||||
num_filters,
|
||||
stride,
|
||||
padding,
|
||||
channels=None,
|
||||
num_groups=1,
|
||||
name=None,
|
||||
use_cudnn=True):
|
||||
super(ConvBNLayer, self).__init__()
|
||||
|
||||
self._conv = Conv2D(
|
||||
in_channels=num_channels,
|
||||
out_channels=num_filters,
|
||||
kernel_size=filter_size,
|
||||
stride=stride,
|
||||
padding=padding,
|
||||
groups=num_groups,
|
||||
weight_attr=ParamAttr(name=name + "_weights"),
|
||||
bias_attr=False)
|
||||
|
||||
self._batch_norm = BatchNorm(
|
||||
num_filters,
|
||||
param_attr=ParamAttr(name=name + "_bn_scale"),
|
||||
bias_attr=ParamAttr(name=name + "_bn_offset"),
|
||||
moving_mean_name=name + "_bn_mean",
|
||||
moving_variance_name=name + "_bn_variance")
|
||||
|
||||
def forward(self, inputs, if_act=True):
|
||||
y = self._conv(inputs)
|
||||
y = self._batch_norm(y)
|
||||
if if_act:
|
||||
y = F.relu6(y)
|
||||
return y
|
||||
|
||||
|
||||
class InvertedResidualUnit(nn.Layer):
|
||||
def __init__(self, num_channels, num_in_filter, num_filters, stride,
|
||||
filter_size, padding, expansion_factor, name):
|
||||
super(InvertedResidualUnit, self).__init__()
|
||||
num_expfilter = int(round(num_in_filter * expansion_factor))
|
||||
self._expand_conv = ConvBNLayer(
|
||||
num_channels=num_channels,
|
||||
num_filters=num_expfilter,
|
||||
filter_size=1,
|
||||
stride=1,
|
||||
padding=0,
|
||||
num_groups=1,
|
||||
name=name + "_expand")
|
||||
|
||||
self._bottleneck_conv = ConvBNLayer(
|
||||
num_channels=num_expfilter,
|
||||
num_filters=num_expfilter,
|
||||
filter_size=filter_size,
|
||||
stride=stride,
|
||||
padding=padding,
|
||||
num_groups=num_expfilter,
|
||||
use_cudnn=False,
|
||||
name=name + "_dwise")
|
||||
|
||||
self._linear_conv = ConvBNLayer(
|
||||
num_channels=num_expfilter,
|
||||
num_filters=num_filters,
|
||||
filter_size=1,
|
||||
stride=1,
|
||||
padding=0,
|
||||
num_groups=1,
|
||||
name=name + "_linear")
|
||||
|
||||
def forward(self, inputs, ifshortcut):
|
||||
y = self._expand_conv(inputs, if_act=True)
|
||||
y = self._bottleneck_conv(y, if_act=True)
|
||||
y = self._linear_conv(y, if_act=False)
|
||||
if ifshortcut:
|
||||
y = paddle.add(inputs, y)
|
||||
return y
|
||||
|
||||
|
||||
class InvresiBlocks(nn.Layer):
|
||||
def __init__(self, in_c, t, c, n, s, name):
|
||||
super(InvresiBlocks, self).__init__()
|
||||
|
||||
self._first_block = InvertedResidualUnit(
|
||||
num_channels=in_c,
|
||||
num_in_filter=in_c,
|
||||
num_filters=c,
|
||||
stride=s,
|
||||
filter_size=3,
|
||||
padding=1,
|
||||
expansion_factor=t,
|
||||
name=name + "_1")
|
||||
|
||||
self._block_list = []
|
||||
for i in range(1, n):
|
||||
block = self.add_sublayer(
|
||||
name + "_" + str(i + 1),
|
||||
sublayer=InvertedResidualUnit(
|
||||
num_channels=c,
|
||||
num_in_filter=c,
|
||||
num_filters=c,
|
||||
stride=1,
|
||||
filter_size=3,
|
||||
padding=1,
|
||||
expansion_factor=t,
|
||||
name=name + "_" + str(i + 1)))
|
||||
self._block_list.append(block)
|
||||
|
||||
def forward(self, inputs):
|
||||
y = self._first_block(inputs, ifshortcut=False)
|
||||
for block in self._block_list:
|
||||
y = block(y, ifshortcut=True)
|
||||
return y
|
||||
|
||||
|
||||
class MobileNet(nn.Layer):
|
||||
def __init__(self, class_num=1000, scale=1.0, prefix_name=""):
|
||||
super(MobileNet, self).__init__()
|
||||
self.scale = scale
|
||||
self.class_num = class_num
|
||||
|
||||
bottleneck_params_list = [
|
||||
(1, 16, 1, 1),
|
||||
(6, 24, 2, 2),
|
||||
(6, 32, 3, 2),
|
||||
(6, 64, 4, 2),
|
||||
(6, 96, 3, 1),
|
||||
(6, 160, 3, 2),
|
||||
(6, 320, 1, 1),
|
||||
]
|
||||
|
||||
self.conv1 = ConvBNLayer(
|
||||
num_channels=3,
|
||||
num_filters=int(32 * scale),
|
||||
filter_size=3,
|
||||
stride=2,
|
||||
padding=1,
|
||||
name=prefix_name + "conv1_1")
|
||||
|
||||
self.block_list = []
|
||||
i = 1
|
||||
in_c = int(32 * scale)
|
||||
for layer_setting in bottleneck_params_list:
|
||||
t, c, n, s = layer_setting
|
||||
i += 1
|
||||
block = self.add_sublayer(
|
||||
prefix_name + "conv" + str(i),
|
||||
sublayer=InvresiBlocks(
|
||||
in_c=in_c,
|
||||
t=t,
|
||||
c=int(c * scale),
|
||||
n=n,
|
||||
s=s,
|
||||
name=prefix_name + "conv" + str(i)))
|
||||
self.block_list.append(block)
|
||||
in_c = int(c * scale)
|
||||
|
||||
self.out_c = int(1280 * scale) if scale > 1.0 else 1280
|
||||
self.conv9 = ConvBNLayer(
|
||||
num_channels=in_c,
|
||||
num_filters=self.out_c,
|
||||
filter_size=1,
|
||||
stride=1,
|
||||
padding=0,
|
||||
name=prefix_name + "conv9")
|
||||
|
||||
self.pool2d_avg = AdaptiveAvgPool2D(1)
|
||||
|
||||
self.out = Linear(
|
||||
self.out_c,
|
||||
class_num,
|
||||
weight_attr=ParamAttr(name=prefix_name + "fc10_weights"),
|
||||
bias_attr=ParamAttr(name=prefix_name + "fc10_offset"))
|
||||
|
||||
def forward(self, inputs):
|
||||
y = self.conv1(inputs, if_act=True)
|
||||
for block in self.block_list:
|
||||
y = block(y)
|
||||
y = self.conv9(y, if_act=True)
|
||||
y = self.pool2d_avg(y)
|
||||
y = paddle.flatten(y, start_axis=1, stop_axis=-1)
|
||||
y = self.out(y)
|
||||
return y
|
||||
|
||||
|
||||
def _load_pretrained(pretrained, model, model_url, use_ssld=False):
|
||||
if pretrained is False:
|
||||
pass
|
||||
elif pretrained is True:
|
||||
load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
|
||||
elif isinstance(pretrained, str):
|
||||
load_dygraph_pretrain(model, pretrained)
|
||||
else:
|
||||
raise RuntimeError(
|
||||
"pretrained type is not available. Please use `string` or `boolean` type."
|
||||
)
|
||||
|
||||
|
||||
def MobileNetV2_x0_25(pretrained=False, use_ssld=False, **kwargs):
|
||||
model = MobileNet(scale=0.25, **kwargs)
|
||||
_load_pretrained(
|
||||
pretrained, model, MODEL_URLS["MobileNetV2_x0_25"], use_ssld=use_ssld)
|
||||
return model
|
||||
|
||||
|
||||
def MobileNetV2_x0_5(pretrained=False, use_ssld=False, **kwargs):
|
||||
model = MobileNet(scale=0.5, **kwargs)
|
||||
_load_pretrained(
|
||||
pretrained, model, MODEL_URLS["MobileNetV2_x0_5"], use_ssld=use_ssld)
|
||||
return model
|
||||
|
||||
|
||||
def MobileNetV2_x0_75(pretrained=False, use_ssld=False, **kwargs):
|
||||
model = MobileNet(scale=0.75, **kwargs)
|
||||
_load_pretrained(
|
||||
pretrained, model, MODEL_URLS["MobileNetV2_x0_75"], use_ssld=use_ssld)
|
||||
return model
|
||||
|
||||
|
||||
def MobileNetV2(pretrained=False, use_ssld=False, **kwargs):
|
||||
model = MobileNet(scale=1.0, **kwargs)
|
||||
_load_pretrained(
|
||||
pretrained, model, MODEL_URLS["MobileNetV2"], use_ssld=use_ssld)
|
||||
return model
|
||||
|
||||
|
||||
def MobileNetV2_x1_5(pretrained=False, use_ssld=False, **kwargs):
|
||||
model = MobileNet(scale=1.5, **kwargs)
|
||||
_load_pretrained(
|
||||
pretrained, model, MODEL_URLS["MobileNetV2_x1_5"], use_ssld=use_ssld)
|
||||
return model
|
||||
|
||||
|
||||
def MobileNetV2_x2_0(pretrained=False, use_ssld=False, **kwargs):
|
||||
model = MobileNet(scale=2.0, **kwargs)
|
||||
_load_pretrained(
|
||||
pretrained, model, MODEL_URLS["MobileNetV2_x2_0"], use_ssld=use_ssld)
|
||||
return model
|
||||
@ -0,0 +1,492 @@
|
||||
# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# Code was heavily based on https://github.com/whai362/PVT
|
||||
|
||||
from functools import partial
|
||||
import math
|
||||
import paddle
|
||||
import paddle.nn as nn
|
||||
import paddle.nn.functional as F
|
||||
from paddle.nn.initializer import TruncatedNormal, Constant
|
||||
|
||||
from .vision_transformer import trunc_normal_, zeros_, ones_, to_2tuple, DropPath, Identity, drop_path
|
||||
|
||||
from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
|
||||
|
||||
MODEL_URLS = {
|
||||
"PVT_V2_B0":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/PVT_V2_B0_pretrained.pdparams",
|
||||
"PVT_V2_B1":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/PVT_V2_B1_pretrained.pdparams",
|
||||
"PVT_V2_B2":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/PVT_V2_B2_pretrained.pdparams",
|
||||
"PVT_V2_B2_Linear":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/PVT_V2_B2_Linear_pretrained.pdparams",
|
||||
"PVT_V2_B3":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/PVT_V2_B3_pretrained.pdparams",
|
||||
"PVT_V2_B4":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/PVT_V2_B4_pretrained.pdparams",
|
||||
"PVT_V2_B5":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/PVT_V2_B5_pretrained.pdparams",
|
||||
}
|
||||
|
||||
__all__ = list(MODEL_URLS.keys())
|
||||
|
||||
|
||||
@paddle.jit.not_to_static
|
||||
def swapdim(x, dim1, dim2):
|
||||
a = list(range(len(x.shape)))
|
||||
a[dim1], a[dim2] = a[dim2], a[dim1]
|
||||
return x.transpose(a)
|
||||
|
||||
|
||||
class Mlp(nn.Layer):
|
||||
def __init__(self,
|
||||
in_features,
|
||||
hidden_features=None,
|
||||
out_features=None,
|
||||
act_layer=nn.GELU,
|
||||
drop=0.,
|
||||
linear=False):
|
||||
super().__init__()
|
||||
out_features = out_features or in_features
|
||||
hidden_features = hidden_features or in_features
|
||||
self.fc1 = nn.Linear(in_features, hidden_features)
|
||||
self.dwconv = DWConv(hidden_features)
|
||||
self.act = act_layer()
|
||||
self.fc2 = nn.Linear(hidden_features, out_features)
|
||||
self.drop = nn.Dropout(drop)
|
||||
self.linear = linear
|
||||
if self.linear:
|
||||
self.relu = nn.ReLU()
|
||||
|
||||
def forward(self, x, H, W):
|
||||
x = self.fc1(x)
|
||||
if self.linear:
|
||||
x = self.relu(x)
|
||||
x = self.dwconv(x, H, W)
|
||||
x = self.act(x)
|
||||
x = self.drop(x)
|
||||
x = self.fc2(x)
|
||||
x = self.drop(x)
|
||||
return x
|
||||
|
||||
|
||||
class Attention(nn.Layer):
|
||||
def __init__(self,
|
||||
dim,
|
||||
num_heads=8,
|
||||
qkv_bias=False,
|
||||
qk_scale=None,
|
||||
attn_drop=0.,
|
||||
proj_drop=0.,
|
||||
sr_ratio=1,
|
||||
linear=False):
|
||||
super().__init__()
|
||||
assert dim % num_heads == 0
|
||||
|
||||
self.dim = dim
|
||||
self.num_heads = num_heads
|
||||
head_dim = dim // num_heads
|
||||
self.scale = qk_scale or head_dim**-0.5
|
||||
|
||||
self.q = nn.Linear(dim, dim, bias_attr=qkv_bias)
|
||||
self.kv = nn.Linear(dim, dim * 2, bias_attr=qkv_bias)
|
||||
self.attn_drop = nn.Dropout(attn_drop)
|
||||
self.proj = nn.Linear(dim, dim)
|
||||
self.proj_drop = nn.Dropout(proj_drop)
|
||||
|
||||
self.linear = linear
|
||||
self.sr_ratio = sr_ratio
|
||||
if not linear:
|
||||
if sr_ratio > 1:
|
||||
self.sr = nn.Conv2D(
|
||||
dim, dim, kernel_size=sr_ratio, stride=sr_ratio)
|
||||
self.norm = nn.LayerNorm(dim)
|
||||
else:
|
||||
self.pool = nn.AdaptiveAvgPool2D(7)
|
||||
self.sr = nn.Conv2D(dim, dim, kernel_size=1, stride=1)
|
||||
self.norm = nn.LayerNorm(dim)
|
||||
self.act = nn.GELU()
|
||||
|
||||
def forward(self, x, H, W):
|
||||
B, N, C = x.shape
|
||||
q = self.q(x).reshape(
|
||||
[B, N, self.num_heads, C // self.num_heads]).transpose(
|
||||
[0, 2, 1, 3])
|
||||
|
||||
if not self.linear:
|
||||
if self.sr_ratio > 1:
|
||||
x_ = x.transpose([0, 2, 1]).reshape([B, C, H, W])
|
||||
x_ = self.sr(x_)
|
||||
h_, w_ = x_.shape[-2:]
|
||||
x_ = x_.reshape([B, C, h_ * w_]).transpose([0, 2, 1])
|
||||
x_ = self.norm(x_)
|
||||
kv = self.kv(x_)
|
||||
kv = kv.reshape([
|
||||
B, kv.shape[2] * kv.shape[1] // 2 // C, 2, self.num_heads,
|
||||
C // self.num_heads
|
||||
]).transpose([2, 0, 3, 1, 4])
|
||||
else:
|
||||
kv = self.kv(x)
|
||||
kv = kv.reshape([
|
||||
B, kv.shape[2] * kv.shape[1] // 2 // C, 2, self.num_heads,
|
||||
C // self.num_heads
|
||||
]).transpose([2, 0, 3, 1, 4])
|
||||
else:
|
||||
x_ = x.transpose([0, 2, 1]).reshape([B, C, H, W])
|
||||
x_ = self.sr(self.pool(x_))
|
||||
x_ = x_.reshape([B, C, x_.shape[2] * x_.shape[3]]).transpose(
|
||||
[0, 2, 1])
|
||||
x_ = self.norm(x_)
|
||||
x_ = self.act(x_)
|
||||
kv = self.kv(x_)
|
||||
kv = kv.reshape([
|
||||
B, kv.shape[2] * kv.shape[1] // 2 // C, 2, self.num_heads,
|
||||
C // self.num_heads
|
||||
]).transpose([2, 0, 3, 1, 4])
|
||||
k, v = kv[0], kv[1]
|
||||
|
||||
attn = (q @swapdim(k, -2, -1)) * self.scale
|
||||
attn = F.softmax(attn, axis=-1)
|
||||
attn = self.attn_drop(attn)
|
||||
|
||||
x = swapdim((attn @v), 1, 2).reshape([B, N, C])
|
||||
x = self.proj(x)
|
||||
x = self.proj_drop(x)
|
||||
|
||||
return x
|
||||
|
||||
|
||||
class Block(nn.Layer):
|
||||
def __init__(self,
|
||||
dim,
|
||||
num_heads,
|
||||
mlp_ratio=4.,
|
||||
qkv_bias=False,
|
||||
qk_scale=None,
|
||||
drop=0.,
|
||||
attn_drop=0.,
|
||||
drop_path=0.,
|
||||
act_layer=nn.GELU,
|
||||
norm_layer=nn.LayerNorm,
|
||||
sr_ratio=1,
|
||||
linear=False):
|
||||
super().__init__()
|
||||
self.norm1 = norm_layer(dim)
|
||||
self.attn = Attention(
|
||||
dim,
|
||||
num_heads=num_heads,
|
||||
qkv_bias=qkv_bias,
|
||||
qk_scale=qk_scale,
|
||||
attn_drop=attn_drop,
|
||||
proj_drop=drop,
|
||||
sr_ratio=sr_ratio,
|
||||
linear=linear)
|
||||
self.drop_path = DropPath(drop_path) if drop_path > 0. else Identity()
|
||||
self.norm2 = norm_layer(dim)
|
||||
mlp_hidden_dim = int(dim * mlp_ratio)
|
||||
self.mlp = Mlp(in_features=dim,
|
||||
hidden_features=mlp_hidden_dim,
|
||||
act_layer=act_layer,
|
||||
drop=drop,
|
||||
linear=linear)
|
||||
|
||||
def forward(self, x, H, W):
|
||||
x = x + self.drop_path(self.attn(self.norm1(x), H, W))
|
||||
x = x + self.drop_path(self.mlp(self.norm2(x), H, W))
|
||||
|
||||
return x
|
||||
|
||||
|
||||
class OverlapPatchEmbed(nn.Layer):
|
||||
""" Image to Patch Embedding
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
img_size=224,
|
||||
patch_size=7,
|
||||
stride=4,
|
||||
in_chans=3,
|
||||
embed_dim=768):
|
||||
super().__init__()
|
||||
img_size = to_2tuple(img_size)
|
||||
patch_size = to_2tuple(patch_size)
|
||||
|
||||
self.img_size = img_size
|
||||
self.patch_size = patch_size
|
||||
self.H, self.W = img_size[0] // patch_size[0], img_size[
|
||||
1] // patch_size[1]
|
||||
self.num_patches = self.H * self.W
|
||||
self.proj = nn.Conv2D(
|
||||
in_chans,
|
||||
embed_dim,
|
||||
kernel_size=patch_size,
|
||||
stride=stride,
|
||||
padding=(patch_size[0] // 2, patch_size[1] // 2))
|
||||
self.norm = nn.LayerNorm(embed_dim)
|
||||
|
||||
def forward(self, x):
|
||||
x = self.proj(x)
|
||||
_, _, H, W = x.shape
|
||||
x = x.flatten(2)
|
||||
x = swapdim(x, 1, 2)
|
||||
x = self.norm(x)
|
||||
|
||||
return x, H, W
|
||||
|
||||
|
||||
class PyramidVisionTransformerV2(nn.Layer):
|
||||
def __init__(self,
|
||||
img_size=224,
|
||||
patch_size=16,
|
||||
in_chans=3,
|
||||
class_num=1000,
|
||||
embed_dims=[64, 128, 256, 512],
|
||||
num_heads=[1, 2, 4, 8],
|
||||
mlp_ratios=[4, 4, 4, 4],
|
||||
qkv_bias=False,
|
||||
qk_scale=None,
|
||||
drop_rate=0.,
|
||||
attn_drop_rate=0.,
|
||||
drop_path_rate=0.,
|
||||
norm_layer=nn.LayerNorm,
|
||||
depths=[3, 4, 6, 3],
|
||||
sr_ratios=[8, 4, 2, 1],
|
||||
num_stages=4,
|
||||
linear=False):
|
||||
super().__init__()
|
||||
self.class_num = class_num
|
||||
self.depths = depths
|
||||
self.num_stages = num_stages
|
||||
|
||||
dpr = [x for x in paddle.linspace(0, drop_path_rate, sum(depths))
|
||||
] # stochastic depth decay rule
|
||||
cur = 0
|
||||
|
||||
for i in range(num_stages):
|
||||
patch_embed = OverlapPatchEmbed(
|
||||
img_size=img_size if i == 0 else img_size // (2**(i + 1)),
|
||||
patch_size=7 if i == 0 else 3,
|
||||
stride=4 if i == 0 else 2,
|
||||
in_chans=in_chans if i == 0 else embed_dims[i - 1],
|
||||
embed_dim=embed_dims[i])
|
||||
|
||||
block = nn.LayerList([
|
||||
Block(
|
||||
dim=embed_dims[i],
|
||||
num_heads=num_heads[i],
|
||||
mlp_ratio=mlp_ratios[i],
|
||||
qkv_bias=qkv_bias,
|
||||
qk_scale=qk_scale,
|
||||
drop=drop_rate,
|
||||
attn_drop=attn_drop_rate,
|
||||
drop_path=dpr[cur + j],
|
||||
norm_layer=norm_layer,
|
||||
sr_ratio=sr_ratios[i],
|
||||
linear=linear) for j in range(depths[i])
|
||||
])
|
||||
norm = norm_layer(embed_dims[i])
|
||||
cur += depths[i]
|
||||
|
||||
setattr(self, f"patch_embed{i + 1}", patch_embed)
|
||||
setattr(self, f"block{i + 1}", block)
|
||||
setattr(self, f"norm{i + 1}", norm)
|
||||
|
||||
# classification head
|
||||
self.head = nn.Linear(embed_dims[3],
|
||||
class_num) if class_num > 0 else Identity()
|
||||
|
||||
self.apply(self._init_weights)
|
||||
|
||||
def _init_weights(self, m):
|
||||
if isinstance(m, nn.Linear):
|
||||
trunc_normal_(m.weight)
|
||||
if isinstance(m, nn.Linear) and m.bias is not None:
|
||||
zeros_(m.bias)
|
||||
elif isinstance(m, nn.LayerNorm):
|
||||
zeros_(m.bias)
|
||||
ones_(m.weight)
|
||||
|
||||
def forward_features(self, x):
|
||||
B = x.shape[0]
|
||||
|
||||
for i in range(self.num_stages):
|
||||
patch_embed = getattr(self, f"patch_embed{i + 1}")
|
||||
block = getattr(self, f"block{i + 1}")
|
||||
norm = getattr(self, f"norm{i + 1}")
|
||||
x, H, W = patch_embed(x)
|
||||
for blk in block:
|
||||
x = blk(x, H, W)
|
||||
x = norm(x)
|
||||
if i != self.num_stages - 1:
|
||||
x = x.reshape([B, H, W, x.shape[2]]).transpose([0, 3, 1, 2])
|
||||
|
||||
return x.mean(axis=1)
|
||||
|
||||
def forward(self, x):
|
||||
x = self.forward_features(x)
|
||||
x = self.head(x)
|
||||
|
||||
return x
|
||||
|
||||
|
||||
class DWConv(nn.Layer):
|
||||
def __init__(self, dim=768):
|
||||
super().__init__()
|
||||
self.dwconv = nn.Conv2D(dim, dim, 3, 1, 1, bias_attr=True, groups=dim)
|
||||
|
||||
def forward(self, x, H, W):
|
||||
B, N, C = x.shape
|
||||
x = swapdim(x, 1, 2)
|
||||
x = x.reshape([B, C, H, W])
|
||||
x = self.dwconv(x)
|
||||
x = x.flatten(2)
|
||||
x = swapdim(x, 1, 2)
|
||||
|
||||
return x
|
||||
|
||||
|
||||
def _load_pretrained(pretrained, model, model_url, use_ssld=False):
|
||||
if pretrained is False:
|
||||
pass
|
||||
elif pretrained is True:
|
||||
load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
|
||||
elif isinstance(pretrained, str):
|
||||
load_dygraph_pretrain(model, pretrained)
|
||||
else:
|
||||
raise RuntimeError(
|
||||
"pretrained type is not available. Please use `string` or `boolean` type."
|
||||
)
|
||||
|
||||
|
||||
def PVT_V2_B0(pretrained=False, use_ssld=False, **kwargs):
|
||||
model = PyramidVisionTransformerV2(
|
||||
patch_size=4,
|
||||
embed_dims=[32, 64, 160, 256],
|
||||
num_heads=[1, 2, 5, 8],
|
||||
mlp_ratios=[8, 8, 4, 4],
|
||||
qkv_bias=True,
|
||||
norm_layer=partial(
|
||||
nn.LayerNorm, epsilon=1e-6),
|
||||
depths=[2, 2, 2, 2],
|
||||
sr_ratios=[8, 4, 2, 1],
|
||||
**kwargs)
|
||||
_load_pretrained(
|
||||
pretrained, model, MODEL_URLS["PVT_V2_B0"], use_ssld=use_ssld)
|
||||
return model
|
||||
|
||||
|
||||
def PVT_V2_B1(pretrained=False, use_ssld=False, **kwargs):
|
||||
model = PyramidVisionTransformerV2(
|
||||
patch_size=4,
|
||||
embed_dims=[64, 128, 320, 512],
|
||||
num_heads=[1, 2, 5, 8],
|
||||
mlp_ratios=[8, 8, 4, 4],
|
||||
qkv_bias=True,
|
||||
norm_layer=partial(
|
||||
nn.LayerNorm, epsilon=1e-6),
|
||||
depths=[2, 2, 2, 2],
|
||||
sr_ratios=[8, 4, 2, 1],
|
||||
**kwargs)
|
||||
_load_pretrained(
|
||||
pretrained, model, MODEL_URLS["PVT_V2_B1"], use_ssld=use_ssld)
|
||||
return model
|
||||
|
||||
|
||||
def PVT_V2_B2(pretrained=False, use_ssld=False, **kwargs):
|
||||
model = PyramidVisionTransformerV2(
|
||||
patch_size=4,
|
||||
embed_dims=[64, 128, 320, 512],
|
||||
num_heads=[1, 2, 5, 8],
|
||||
mlp_ratios=[8, 8, 4, 4],
|
||||
qkv_bias=True,
|
||||
norm_layer=partial(
|
||||
nn.LayerNorm, epsilon=1e-6),
|
||||
depths=[3, 4, 6, 3],
|
||||
sr_ratios=[8, 4, 2, 1],
|
||||
**kwargs)
|
||||
_load_pretrained(
|
||||
pretrained, model, MODEL_URLS["PVT_V2_B2"], use_ssld=use_ssld)
|
||||
return model
|
||||
|
||||
|
||||
def PVT_V2_B3(pretrained=False, use_ssld=False, **kwargs):
|
||||
model = PyramidVisionTransformerV2(
|
||||
patch_size=4,
|
||||
embed_dims=[64, 128, 320, 512],
|
||||
num_heads=[1, 2, 5, 8],
|
||||
mlp_ratios=[8, 8, 4, 4],
|
||||
qkv_bias=True,
|
||||
norm_layer=partial(
|
||||
nn.LayerNorm, epsilon=1e-6),
|
||||
depths=[3, 4, 18, 3],
|
||||
sr_ratios=[8, 4, 2, 1],
|
||||
**kwargs)
|
||||
_load_pretrained(
|
||||
pretrained, model, MODEL_URLS["PVT_V2_B3"], use_ssld=use_ssld)
|
||||
return model
|
||||
|
||||
|
||||
def PVT_V2_B4(pretrained=False, use_ssld=False, **kwargs):
|
||||
model = PyramidVisionTransformerV2(
|
||||
patch_size=4,
|
||||
embed_dims=[64, 128, 320, 512],
|
||||
num_heads=[1, 2, 5, 8],
|
||||
mlp_ratios=[8, 8, 4, 4],
|
||||
qkv_bias=True,
|
||||
norm_layer=partial(
|
||||
nn.LayerNorm, epsilon=1e-6),
|
||||
depths=[3, 8, 27, 3],
|
||||
sr_ratios=[8, 4, 2, 1],
|
||||
**kwargs)
|
||||
_load_pretrained(
|
||||
pretrained, model, MODEL_URLS["PVT_V2_B4"], use_ssld=use_ssld)
|
||||
return model
|
||||
|
||||
|
||||
def PVT_V2_B5(pretrained=False, use_ssld=False, **kwargs):
|
||||
model = PyramidVisionTransformerV2(
|
||||
patch_size=4,
|
||||
embed_dims=[64, 128, 320, 512],
|
||||
num_heads=[1, 2, 5, 8],
|
||||
mlp_ratios=[4, 4, 4, 4],
|
||||
qkv_bias=True,
|
||||
norm_layer=partial(
|
||||
nn.LayerNorm, epsilon=1e-6),
|
||||
depths=[3, 6, 40, 3],
|
||||
sr_ratios=[8, 4, 2, 1],
|
||||
**kwargs)
|
||||
_load_pretrained(
|
||||
pretrained, model, MODEL_URLS["PVT_V2_B5"], use_ssld=use_ssld)
|
||||
return model
|
||||
|
||||
|
||||
def PVT_V2_B2_Linear(pretrained=False, use_ssld=False, **kwargs):
|
||||
model = PyramidVisionTransformerV2(
|
||||
patch_size=4,
|
||||
embed_dims=[64, 128, 320, 512],
|
||||
num_heads=[1, 2, 5, 8],
|
||||
mlp_ratios=[8, 8, 4, 4],
|
||||
qkv_bias=True,
|
||||
norm_layer=partial(
|
||||
nn.LayerNorm, epsilon=1e-6),
|
||||
depths=[3, 4, 6, 3],
|
||||
sr_ratios=[8, 4, 2, 1],
|
||||
linear=True,
|
||||
**kwargs)
|
||||
_load_pretrained(
|
||||
pretrained, model, MODEL_URLS["PVT_V2_B2_Linear"], use_ssld=use_ssld)
|
||||
return model
|
||||
@ -0,0 +1,203 @@
|
||||
# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# Code was based on https://github.com/d-li14/involution
|
||||
|
||||
import paddle
|
||||
import paddle.nn as nn
|
||||
|
||||
from paddle.vision.models import resnet
|
||||
|
||||
from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
|
||||
|
||||
MODEL_URLS = {
|
||||
"RedNet26":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RedNet26_pretrained.pdparams",
|
||||
"RedNet38":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RedNet38_pretrained.pdparams",
|
||||
"RedNet50":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RedNet50_pretrained.pdparams",
|
||||
"RedNet101":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RedNet101_pretrained.pdparams",
|
||||
"RedNet152":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RedNet152_pretrained.pdparams"
|
||||
}
|
||||
|
||||
__all__ = MODEL_URLS.keys()
|
||||
|
||||
|
||||
class Involution(nn.Layer):
|
||||
def __init__(self, channels, kernel_size, stride):
|
||||
super(Involution, self).__init__()
|
||||
self.kernel_size = kernel_size
|
||||
self.stride = stride
|
||||
self.channels = channels
|
||||
reduction_ratio = 4
|
||||
self.group_channels = 16
|
||||
self.groups = self.channels // self.group_channels
|
||||
self.conv1 = nn.Sequential(
|
||||
('conv', nn.Conv2D(
|
||||
in_channels=channels,
|
||||
out_channels=channels // reduction_ratio,
|
||||
kernel_size=1,
|
||||
bias_attr=False)),
|
||||
('bn', nn.BatchNorm2D(channels // reduction_ratio)),
|
||||
('activate', nn.ReLU()))
|
||||
self.conv2 = nn.Sequential(('conv', nn.Conv2D(
|
||||
in_channels=channels // reduction_ratio,
|
||||
out_channels=kernel_size**2 * self.groups,
|
||||
kernel_size=1,
|
||||
stride=1)))
|
||||
if stride > 1:
|
||||
self.avgpool = nn.AvgPool2D(stride, stride)
|
||||
|
||||
def forward(self, x):
|
||||
weight = self.conv2(
|
||||
self.conv1(x if self.stride == 1 else self.avgpool(x)))
|
||||
b, c, h, w = weight.shape
|
||||
weight = weight.reshape(
|
||||
(b, self.groups, self.kernel_size**2, h, w)).unsqueeze(2)
|
||||
|
||||
out = nn.functional.unfold(x, self.kernel_size, self.stride,
|
||||
(self.kernel_size - 1) // 2, 1)
|
||||
out = out.reshape(
|
||||
(b, self.groups, self.group_channels, self.kernel_size**2, h, w))
|
||||
out = (weight * out).sum(axis=3).reshape((b, self.channels, h, w))
|
||||
return out
|
||||
|
||||
|
||||
class BottleneckBlock(resnet.BottleneckBlock):
|
||||
def __init__(self,
|
||||
inplanes,
|
||||
planes,
|
||||
stride=1,
|
||||
downsample=None,
|
||||
groups=1,
|
||||
base_width=64,
|
||||
dilation=1,
|
||||
norm_layer=None):
|
||||
super(BottleneckBlock, self).__init__(inplanes, planes, stride,
|
||||
downsample, groups, base_width,
|
||||
dilation, norm_layer)
|
||||
width = int(planes * (base_width / 64.)) * groups
|
||||
self.conv2 = Involution(width, 7, stride)
|
||||
|
||||
|
||||
class RedNet(resnet.ResNet):
|
||||
def __init__(self, block, depth, class_num=1000, with_pool=True):
|
||||
super(RedNet, self).__init__(
|
||||
block=block, depth=50, num_classes=class_num, with_pool=with_pool)
|
||||
layer_cfg = {
|
||||
26: [1, 2, 4, 1],
|
||||
38: [2, 3, 5, 2],
|
||||
50: [3, 4, 6, 3],
|
||||
101: [3, 4, 23, 3],
|
||||
152: [3, 8, 36, 3]
|
||||
}
|
||||
layers = layer_cfg[depth]
|
||||
|
||||
self.conv1 = None
|
||||
self.bn1 = None
|
||||
self.relu = None
|
||||
self.inplanes = 64
|
||||
self.class_num = class_num
|
||||
self.stem = nn.Sequential(
|
||||
nn.Sequential(
|
||||
('conv', nn.Conv2D(
|
||||
in_channels=3,
|
||||
out_channels=self.inplanes // 2,
|
||||
kernel_size=3,
|
||||
stride=2,
|
||||
padding=1,
|
||||
bias_attr=False)),
|
||||
('bn', nn.BatchNorm2D(self.inplanes // 2)),
|
||||
('activate', nn.ReLU())),
|
||||
Involution(self.inplanes // 2, 3, 1),
|
||||
nn.BatchNorm2D(self.inplanes // 2),
|
||||
nn.ReLU(),
|
||||
nn.Sequential(
|
||||
('conv', nn.Conv2D(
|
||||
in_channels=self.inplanes // 2,
|
||||
out_channels=self.inplanes,
|
||||
kernel_size=3,
|
||||
stride=1,
|
||||
padding=1,
|
||||
bias_attr=False)), ('bn', nn.BatchNorm2D(self.inplanes)),
|
||||
('activate', nn.ReLU())))
|
||||
|
||||
self.layer1 = self._make_layer(block, 64, layers[0])
|
||||
self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
|
||||
self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
|
||||
self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
|
||||
|
||||
def forward(self, x):
|
||||
x = self.stem(x)
|
||||
x = self.maxpool(x)
|
||||
|
||||
x = self.layer1(x)
|
||||
x = self.layer2(x)
|
||||
x = self.layer3(x)
|
||||
x = self.layer4(x)
|
||||
|
||||
if self.with_pool:
|
||||
x = self.avgpool(x)
|
||||
|
||||
if self.class_num > 0:
|
||||
x = paddle.flatten(x, 1)
|
||||
x = self.fc(x)
|
||||
|
||||
return x
|
||||
|
||||
|
||||
def _load_pretrained(pretrained, model, model_url, use_ssld=False):
|
||||
if pretrained is False:
|
||||
pass
|
||||
elif pretrained is True:
|
||||
load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
|
||||
elif isinstance(pretrained, str):
|
||||
load_dygraph_pretrain(model, pretrained)
|
||||
else:
|
||||
raise RuntimeError(
|
||||
"pretrained type is not available. Please use `string` or `boolean` type."
|
||||
)
|
||||
|
||||
|
||||
def RedNet26(pretrained=False, **kwargs):
|
||||
model = RedNet(BottleneckBlock, 26, **kwargs)
|
||||
_load_pretrained(pretrained, model, MODEL_URLS["RedNet26"])
|
||||
return model
|
||||
|
||||
|
||||
def RedNet38(pretrained=False, **kwargs):
|
||||
model = RedNet(BottleneckBlock, 38, **kwargs)
|
||||
_load_pretrained(pretrained, model, MODEL_URLS["RedNet38"])
|
||||
return model
|
||||
|
||||
|
||||
def RedNet50(pretrained=False, **kwargs):
|
||||
model = RedNet(BottleneckBlock, 50, **kwargs)
|
||||
_load_pretrained(pretrained, model, MODEL_URLS["RedNet50"])
|
||||
return model
|
||||
|
||||
|
||||
def RedNet101(pretrained=False, **kwargs):
|
||||
model = RedNet(BottleneckBlock, 101, **kwargs)
|
||||
_load_pretrained(pretrained, model, MODEL_URLS["RedNet101"])
|
||||
return model
|
||||
|
||||
|
||||
def RedNet152(pretrained=False, **kwargs):
|
||||
model = RedNet(BottleneckBlock, 152, **kwargs)
|
||||
_load_pretrained(pretrained, model, MODEL_URLS["RedNet152"])
|
||||
return model
|
||||
@ -0,0 +1,431 @@
|
||||
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# Code was based on https://github.com/facebookresearch/pycls
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import numpy as np
|
||||
import paddle
|
||||
from paddle import ParamAttr
|
||||
import paddle.nn as nn
|
||||
import paddle.nn.functional as F
|
||||
from paddle.nn import Conv2D, BatchNorm, Linear, Dropout
|
||||
from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D
|
||||
from paddle.nn.initializer import Uniform
|
||||
import math
|
||||
|
||||
from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
|
||||
|
||||
MODEL_URLS = {
|
||||
"RegNetX_200MF":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RegNetX_200MF_pretrained.pdparams",
|
||||
"RegNetX_4GF":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RegNetX_4GF_pretrained.pdparams",
|
||||
"RegNetX_32GF":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RegNetX_32GF_pretrained.pdparams",
|
||||
"RegNetY_200MF":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RegNetY_200MF_pretrained.pdparams",
|
||||
"RegNetY_4GF":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RegNetY_4GF_pretrained.pdparams",
|
||||
"RegNetY_32GF":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RegNetY_32GF_pretrained.pdparams",
|
||||
}
|
||||
|
||||
__all__ = list(MODEL_URLS.keys())
|
||||
|
||||
|
||||
def quantize_float(f, q):
|
||||
"""Converts a float to closest non-zero int divisible by q."""
|
||||
return int(round(f / q) * q)
|
||||
|
||||
|
||||
def adjust_ws_gs_comp(ws, bms, gs):
|
||||
"""Adjusts the compatibility of widths and groups."""
|
||||
ws_bot = [int(w * b) for w, b in zip(ws, bms)]
|
||||
gs = [min(g, w_bot) for g, w_bot in zip(gs, ws_bot)]
|
||||
ws_bot = [quantize_float(w_bot, g) for w_bot, g in zip(ws_bot, gs)]
|
||||
ws = [int(w_bot / b) for w_bot, b in zip(ws_bot, bms)]
|
||||
return ws, gs
|
||||
|
||||
|
||||
def get_stages_from_blocks(ws, rs):
|
||||
"""Gets ws/ds of network at each stage from per block values."""
|
||||
ts = [
|
||||
w != wp or r != rp
|
||||
for w, wp, r, rp in zip(ws + [0], [0] + ws, rs + [0], [0] + rs)
|
||||
]
|
||||
s_ws = [w for w, t in zip(ws, ts[:-1]) if t]
|
||||
s_ds = np.diff([d for d, t in zip(range(len(ts)), ts) if t]).tolist()
|
||||
return s_ws, s_ds
|
||||
|
||||
|
||||
def generate_regnet(w_a, w_0, w_m, d, q=8):
|
||||
"""Generates per block ws from RegNet parameters."""
|
||||
assert w_a >= 0 and w_0 > 0 and w_m > 1 and w_0 % q == 0
|
||||
ws_cont = np.arange(d) * w_a + w_0
|
||||
ks = np.round(np.log(ws_cont / w_0) / np.log(w_m))
|
||||
ws = w_0 * np.power(w_m, ks)
|
||||
ws = np.round(np.divide(ws, q)) * q
|
||||
num_stages, max_stage = len(np.unique(ws)), ks.max() + 1
|
||||
ws, ws_cont = ws.astype(int).tolist(), ws_cont.tolist()
|
||||
return ws, num_stages, max_stage, ws_cont
|
||||
|
||||
|
||||
class ConvBNLayer(nn.Layer):
|
||||
def __init__(self,
|
||||
num_channels,
|
||||
num_filters,
|
||||
filter_size,
|
||||
stride=1,
|
||||
groups=1,
|
||||
padding=0,
|
||||
act=None,
|
||||
name=None):
|
||||
super(ConvBNLayer, self).__init__()
|
||||
|
||||
self._conv = Conv2D(
|
||||
in_channels=num_channels,
|
||||
out_channels=num_filters,
|
||||
kernel_size=filter_size,
|
||||
stride=stride,
|
||||
padding=padding,
|
||||
groups=groups,
|
||||
weight_attr=ParamAttr(name=name + ".conv2d.output.1.w_0"),
|
||||
bias_attr=ParamAttr(name=name + ".conv2d.output.1.b_0"))
|
||||
bn_name = name + "_bn"
|
||||
self._batch_norm = BatchNorm(
|
||||
num_filters,
|
||||
act=act,
|
||||
param_attr=ParamAttr(name=bn_name + ".output.1.w_0"),
|
||||
bias_attr=ParamAttr(bn_name + ".output.1.b_0"),
|
||||
moving_mean_name=bn_name + "_mean",
|
||||
moving_variance_name=bn_name + "_variance")
|
||||
|
||||
def forward(self, inputs):
|
||||
y = self._conv(inputs)
|
||||
y = self._batch_norm(y)
|
||||
return y
|
||||
|
||||
|
||||
class BottleneckBlock(nn.Layer):
|
||||
def __init__(self,
|
||||
num_channels,
|
||||
num_filters,
|
||||
stride,
|
||||
bm,
|
||||
gw,
|
||||
se_on,
|
||||
se_r,
|
||||
shortcut=True,
|
||||
name=None):
|
||||
super(BottleneckBlock, self).__init__()
|
||||
|
||||
# Compute the bottleneck width
|
||||
w_b = int(round(num_filters * bm))
|
||||
# Compute the number of groups
|
||||
num_gs = w_b // gw
|
||||
self.se_on = se_on
|
||||
self.conv0 = ConvBNLayer(
|
||||
num_channels=num_channels,
|
||||
num_filters=w_b,
|
||||
filter_size=1,
|
||||
padding=0,
|
||||
act="relu",
|
||||
name=name + "_branch2a")
|
||||
self.conv1 = ConvBNLayer(
|
||||
num_channels=w_b,
|
||||
num_filters=w_b,
|
||||
filter_size=3,
|
||||
stride=stride,
|
||||
padding=1,
|
||||
groups=num_gs,
|
||||
act="relu",
|
||||
name=name + "_branch2b")
|
||||
if se_on:
|
||||
w_se = int(round(num_channels * se_r))
|
||||
self.se_block = SELayer(
|
||||
num_channels=w_b,
|
||||
num_filters=w_b,
|
||||
reduction_ratio=w_se,
|
||||
name=name + "_branch2se")
|
||||
self.conv2 = ConvBNLayer(
|
||||
num_channels=w_b,
|
||||
num_filters=num_filters,
|
||||
filter_size=1,
|
||||
act=None,
|
||||
name=name + "_branch2c")
|
||||
|
||||
if not shortcut:
|
||||
self.short = ConvBNLayer(
|
||||
num_channels=num_channels,
|
||||
num_filters=num_filters,
|
||||
filter_size=1,
|
||||
stride=stride,
|
||||
name=name + "_branch1")
|
||||
|
||||
self.shortcut = shortcut
|
||||
|
||||
def forward(self, inputs):
|
||||
y = self.conv0(inputs)
|
||||
conv1 = self.conv1(y)
|
||||
if self.se_on:
|
||||
conv1 = self.se_block(conv1)
|
||||
conv2 = self.conv2(conv1)
|
||||
|
||||
if self.shortcut:
|
||||
short = inputs
|
||||
else:
|
||||
short = self.short(inputs)
|
||||
|
||||
y = paddle.add(x=short, y=conv2)
|
||||
y = F.relu(y)
|
||||
return y
|
||||
|
||||
|
||||
class SELayer(nn.Layer):
|
||||
def __init__(self, num_channels, num_filters, reduction_ratio, name=None):
|
||||
super(SELayer, self).__init__()
|
||||
|
||||
self.pool2d_gap = AdaptiveAvgPool2D(1)
|
||||
|
||||
self._num_channels = num_channels
|
||||
|
||||
med_ch = int(num_channels / reduction_ratio)
|
||||
stdv = 1.0 / math.sqrt(num_channels * 1.0)
|
||||
self.squeeze = Linear(
|
||||
num_channels,
|
||||
med_ch,
|
||||
weight_attr=ParamAttr(
|
||||
initializer=Uniform(-stdv, stdv), name=name + "_sqz_weights"),
|
||||
bias_attr=ParamAttr(name=name + "_sqz_offset"))
|
||||
|
||||
stdv = 1.0 / math.sqrt(med_ch * 1.0)
|
||||
self.excitation = Linear(
|
||||
med_ch,
|
||||
num_filters,
|
||||
weight_attr=ParamAttr(
|
||||
initializer=Uniform(-stdv, stdv), name=name + "_exc_weights"),
|
||||
bias_attr=ParamAttr(name=name + "_exc_offset"))
|
||||
|
||||
def forward(self, input):
|
||||
pool = self.pool2d_gap(input)
|
||||
pool = paddle.reshape(pool, shape=[-1, self._num_channels])
|
||||
squeeze = self.squeeze(pool)
|
||||
squeeze = F.relu(squeeze)
|
||||
excitation = self.excitation(squeeze)
|
||||
excitation = F.sigmoid(excitation)
|
||||
excitation = paddle.reshape(
|
||||
excitation, shape=[-1, self._num_channels, 1, 1])
|
||||
out = input * excitation
|
||||
return out
|
||||
|
||||
|
||||
class RegNet(nn.Layer):
|
||||
def __init__(self,
|
||||
w_a,
|
||||
w_0,
|
||||
w_m,
|
||||
d,
|
||||
group_w,
|
||||
bot_mul,
|
||||
q=8,
|
||||
se_on=False,
|
||||
class_num=1000):
|
||||
super(RegNet, self).__init__()
|
||||
|
||||
# Generate RegNet ws per block
|
||||
b_ws, num_s, max_s, ws_cont = generate_regnet(w_a, w_0, w_m, d, q)
|
||||
# Convert to per stage format
|
||||
ws, ds = get_stages_from_blocks(b_ws, b_ws)
|
||||
# Generate group widths and bot muls
|
||||
gws = [group_w for _ in range(num_s)]
|
||||
bms = [bot_mul for _ in range(num_s)]
|
||||
# Adjust the compatibility of ws and gws
|
||||
ws, gws = adjust_ws_gs_comp(ws, bms, gws)
|
||||
# Use the same stride for each stage
|
||||
ss = [2 for _ in range(num_s)]
|
||||
# Use SE for RegNetY
|
||||
se_r = 0.25
|
||||
# Construct the model
|
||||
# Group params by stage
|
||||
stage_params = list(zip(ds, ws, ss, bms, gws))
|
||||
# Construct the stem
|
||||
stem_type = "simple_stem_in"
|
||||
stem_w = 32
|
||||
block_type = "res_bottleneck_block"
|
||||
|
||||
self.conv = ConvBNLayer(
|
||||
num_channels=3,
|
||||
num_filters=stem_w,
|
||||
filter_size=3,
|
||||
stride=2,
|
||||
padding=1,
|
||||
act="relu",
|
||||
name="stem_conv")
|
||||
|
||||
self.block_list = []
|
||||
for block, (d, w_out, stride, bm, gw) in enumerate(stage_params):
|
||||
shortcut = False
|
||||
for i in range(d):
|
||||
num_channels = stem_w if block == i == 0 else in_channels
|
||||
# Stride apply to the first block of the stage
|
||||
b_stride = stride if i == 0 else 1
|
||||
conv_name = "s" + str(block + 1) + "_b" + str(i +
|
||||
1) # chr(97 + i)
|
||||
bottleneck_block = self.add_sublayer(
|
||||
conv_name,
|
||||
BottleneckBlock(
|
||||
num_channels=num_channels,
|
||||
num_filters=w_out,
|
||||
stride=b_stride,
|
||||
bm=bm,
|
||||
gw=gw,
|
||||
se_on=se_on,
|
||||
se_r=se_r,
|
||||
shortcut=shortcut,
|
||||
name=conv_name))
|
||||
in_channels = w_out
|
||||
self.block_list.append(bottleneck_block)
|
||||
shortcut = True
|
||||
|
||||
self.pool2d_avg = AdaptiveAvgPool2D(1)
|
||||
|
||||
self.pool2d_avg_channels = w_out
|
||||
|
||||
stdv = 1.0 / math.sqrt(self.pool2d_avg_channels * 1.0)
|
||||
|
||||
self.out = Linear(
|
||||
self.pool2d_avg_channels,
|
||||
class_num,
|
||||
weight_attr=ParamAttr(
|
||||
initializer=Uniform(-stdv, stdv), name="fc_0.w_0"),
|
||||
bias_attr=ParamAttr(name="fc_0.b_0"))
|
||||
|
||||
def forward(self, inputs):
|
||||
y = self.conv(inputs)
|
||||
for block in self.block_list:
|
||||
y = block(y)
|
||||
y = self.pool2d_avg(y)
|
||||
y = paddle.reshape(y, shape=[-1, self.pool2d_avg_channels])
|
||||
y = self.out(y)
|
||||
return y
|
||||
|
||||
|
||||
def _load_pretrained(pretrained, model, model_url, use_ssld=False):
|
||||
if pretrained is False:
|
||||
pass
|
||||
elif pretrained is True:
|
||||
load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
|
||||
elif isinstance(pretrained, str):
|
||||
load_dygraph_pretrain(model, pretrained)
|
||||
else:
|
||||
raise RuntimeError(
|
||||
"pretrained type is not available. Please use `string` or `boolean` type."
|
||||
)
|
||||
|
||||
|
||||
def RegNetX_200MF(pretrained=False, use_ssld=False, **kwargs):
|
||||
model = RegNet(
|
||||
w_a=36.44,
|
||||
w_0=24,
|
||||
w_m=2.49,
|
||||
d=13,
|
||||
group_w=8,
|
||||
bot_mul=1.0,
|
||||
q=8,
|
||||
**kwargs)
|
||||
_load_pretrained(
|
||||
pretrained, model, MODEL_URLS["RegNetX_200MF"], use_ssld=use_ssld)
|
||||
return model
|
||||
|
||||
|
||||
def RegNetX_4GF(pretrained=False, use_ssld=False, **kwargs):
|
||||
model = RegNet(
|
||||
w_a=38.65,
|
||||
w_0=96,
|
||||
w_m=2.43,
|
||||
d=23,
|
||||
group_w=40,
|
||||
bot_mul=1.0,
|
||||
q=8,
|
||||
**kwargs)
|
||||
_load_pretrained(
|
||||
pretrained, model, MODEL_URLS["RegNetX_4GF"], use_ssld=use_ssld)
|
||||
return model
|
||||
|
||||
|
||||
def RegNetX_32GF(pretrained=False, use_ssld=False, **kwargs):
|
||||
model = RegNet(
|
||||
w_a=69.86,
|
||||
w_0=320,
|
||||
w_m=2.0,
|
||||
d=23,
|
||||
group_w=168,
|
||||
bot_mul=1.0,
|
||||
q=8,
|
||||
**kwargs)
|
||||
_load_pretrained(
|
||||
pretrained, model, MODEL_URLS["RegNetX_32GF"], use_ssld=use_ssld)
|
||||
return model
|
||||
|
||||
|
||||
def RegNetY_200MF(pretrained=False, use_ssld=False, **kwargs):
|
||||
model = RegNet(
|
||||
w_a=36.44,
|
||||
w_0=24,
|
||||
w_m=2.49,
|
||||
d=13,
|
||||
group_w=8,
|
||||
bot_mul=1.0,
|
||||
q=8,
|
||||
se_on=True,
|
||||
**kwargs)
|
||||
_load_pretrained(
|
||||
pretrained, model, MODEL_URLS["RegNetX_32GF"], use_ssld=use_ssld)
|
||||
return model
|
||||
|
||||
|
||||
def RegNetY_4GF(pretrained=False, use_ssld=False, **kwargs):
|
||||
model = RegNet(
|
||||
w_a=31.41,
|
||||
w_0=96,
|
||||
w_m=2.24,
|
||||
d=22,
|
||||
group_w=64,
|
||||
bot_mul=1.0,
|
||||
q=8,
|
||||
se_on=True,
|
||||
**kwargs)
|
||||
_load_pretrained(
|
||||
pretrained, model, MODEL_URLS["RegNetX_32GF"], use_ssld=use_ssld)
|
||||
return model
|
||||
|
||||
|
||||
def RegNetY_32GF(pretrained=False, use_ssld=False, **kwargs):
|
||||
model = RegNet(
|
||||
w_a=115.89,
|
||||
w_0=232,
|
||||
w_m=2.53,
|
||||
d=20,
|
||||
group_w=232,
|
||||
bot_mul=1.0,
|
||||
q=8,
|
||||
se_on=True,
|
||||
**kwargs)
|
||||
_load_pretrained(
|
||||
pretrained, model, MODEL_URLS["RegNetX_32GF"], use_ssld=use_ssld)
|
||||
return model
|
||||
@ -0,0 +1,382 @@
|
||||
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# Code was based on https://github.com/DingXiaoH/RepVGG
|
||||
|
||||
import paddle.nn as nn
|
||||
import paddle
|
||||
import numpy as np
|
||||
|
||||
from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
|
||||
|
||||
MODEL_URLS = {
|
||||
"RepVGG_A0":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_A0_pretrained.pdparams",
|
||||
"RepVGG_A1":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_A1_pretrained.pdparams",
|
||||
"RepVGG_A2":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_A2_pretrained.pdparams",
|
||||
"RepVGG_B0":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_B0_pretrained.pdparams",
|
||||
"RepVGG_B1":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_B1_pretrained.pdparams",
|
||||
"RepVGG_B2":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_B2_pretrained.pdparams",
|
||||
"RepVGG_B1g2":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_B1g2_pretrained.pdparams",
|
||||
"RepVGG_B1g4":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_B1g4_pretrained.pdparams",
|
||||
"RepVGG_B2g4":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_B2g4_pretrained.pdparams",
|
||||
"RepVGG_B3g4":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_B3g4_pretrained.pdparams",
|
||||
}
|
||||
|
||||
__all__ = list(MODEL_URLS.keys())
|
||||
|
||||
optional_groupwise_layers = [2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26]
|
||||
g2_map = {l: 2 for l in optional_groupwise_layers}
|
||||
g4_map = {l: 4 for l in optional_groupwise_layers}
|
||||
|
||||
|
||||
class ConvBN(nn.Layer):
|
||||
def __init__(self,
|
||||
in_channels,
|
||||
out_channels,
|
||||
kernel_size,
|
||||
stride,
|
||||
padding,
|
||||
groups=1):
|
||||
super(ConvBN, self).__init__()
|
||||
self.conv = nn.Conv2D(
|
||||
in_channels=in_channels,
|
||||
out_channels=out_channels,
|
||||
kernel_size=kernel_size,
|
||||
stride=stride,
|
||||
padding=padding,
|
||||
groups=groups,
|
||||
bias_attr=False)
|
||||
self.bn = nn.BatchNorm2D(num_features=out_channels)
|
||||
|
||||
def forward(self, x):
|
||||
y = self.conv(x)
|
||||
y = self.bn(y)
|
||||
return y
|
||||
|
||||
|
||||
class RepVGGBlock(nn.Layer):
|
||||
def __init__(self,
|
||||
in_channels,
|
||||
out_channels,
|
||||
kernel_size,
|
||||
stride=1,
|
||||
padding=0,
|
||||
dilation=1,
|
||||
groups=1,
|
||||
padding_mode='zeros'):
|
||||
super(RepVGGBlock, self).__init__()
|
||||
self.is_repped = False
|
||||
|
||||
self.in_channels = in_channels
|
||||
self.out_channels = out_channels
|
||||
self.kernel_size = kernel_size
|
||||
self.stride = stride
|
||||
self.padding = padding
|
||||
self.dilation = dilation
|
||||
self.groups = groups
|
||||
self.padding_mode = padding_mode
|
||||
|
||||
assert kernel_size == 3
|
||||
assert padding == 1
|
||||
|
||||
padding_11 = padding - kernel_size // 2
|
||||
|
||||
self.nonlinearity = nn.ReLU()
|
||||
|
||||
self.rbr_identity = nn.BatchNorm2D(
|
||||
num_features=in_channels
|
||||
) if out_channels == in_channels and stride == 1 else None
|
||||
self.rbr_dense = ConvBN(
|
||||
in_channels=in_channels,
|
||||
out_channels=out_channels,
|
||||
kernel_size=kernel_size,
|
||||
stride=stride,
|
||||
padding=padding,
|
||||
groups=groups)
|
||||
self.rbr_1x1 = ConvBN(
|
||||
in_channels=in_channels,
|
||||
out_channels=out_channels,
|
||||
kernel_size=1,
|
||||
stride=stride,
|
||||
padding=padding_11,
|
||||
groups=groups)
|
||||
|
||||
def forward(self, inputs):
|
||||
if not self.training and not self.is_repped:
|
||||
self.rep()
|
||||
self.is_repped = True
|
||||
if self.training and self.is_repped:
|
||||
self.is_repped = False
|
||||
|
||||
if not self.training:
|
||||
return self.nonlinearity(self.rbr_reparam(inputs))
|
||||
|
||||
if self.rbr_identity is None:
|
||||
id_out = 0
|
||||
else:
|
||||
id_out = self.rbr_identity(inputs)
|
||||
return self.nonlinearity(
|
||||
self.rbr_dense(inputs) + self.rbr_1x1(inputs) + id_out)
|
||||
|
||||
def rep(self):
|
||||
if not hasattr(self, 'rbr_reparam'):
|
||||
self.rbr_reparam = nn.Conv2D(
|
||||
in_channels=self.in_channels,
|
||||
out_channels=self.out_channels,
|
||||
kernel_size=self.kernel_size,
|
||||
stride=self.stride,
|
||||
padding=self.padding,
|
||||
dilation=self.dilation,
|
||||
groups=self.groups,
|
||||
padding_mode=self.padding_mode)
|
||||
kernel, bias = self.get_equivalent_kernel_bias()
|
||||
self.rbr_reparam.weight.set_value(kernel)
|
||||
self.rbr_reparam.bias.set_value(bias)
|
||||
|
||||
def get_equivalent_kernel_bias(self):
|
||||
kernel3x3, bias3x3 = self._fuse_bn_tensor(self.rbr_dense)
|
||||
kernel1x1, bias1x1 = self._fuse_bn_tensor(self.rbr_1x1)
|
||||
kernelid, biasid = self._fuse_bn_tensor(self.rbr_identity)
|
||||
return kernel3x3 + self._pad_1x1_to_3x3_tensor(
|
||||
kernel1x1) + kernelid, bias3x3 + bias1x1 + biasid
|
||||
|
||||
def _pad_1x1_to_3x3_tensor(self, kernel1x1):
|
||||
if kernel1x1 is None:
|
||||
return 0
|
||||
else:
|
||||
return nn.functional.pad(kernel1x1, [1, 1, 1, 1])
|
||||
|
||||
def _fuse_bn_tensor(self, branch):
|
||||
if branch is None:
|
||||
return 0, 0
|
||||
if isinstance(branch, ConvBN):
|
||||
kernel = branch.conv.weight
|
||||
running_mean = branch.bn._mean
|
||||
running_var = branch.bn._variance
|
||||
gamma = branch.bn.weight
|
||||
beta = branch.bn.bias
|
||||
eps = branch.bn._epsilon
|
||||
else:
|
||||
assert isinstance(branch, nn.BatchNorm2D)
|
||||
if not hasattr(self, 'id_tensor'):
|
||||
input_dim = self.in_channels // self.groups
|
||||
kernel_value = np.zeros(
|
||||
(self.in_channels, input_dim, 3, 3), dtype=np.float32)
|
||||
for i in range(self.in_channels):
|
||||
kernel_value[i, i % input_dim, 1, 1] = 1
|
||||
self.id_tensor = paddle.to_tensor(kernel_value)
|
||||
kernel = self.id_tensor
|
||||
running_mean = branch._mean
|
||||
running_var = branch._variance
|
||||
gamma = branch.weight
|
||||
beta = branch.bias
|
||||
eps = branch._epsilon
|
||||
std = (running_var + eps).sqrt()
|
||||
t = (gamma / std).reshape((-1, 1, 1, 1))
|
||||
return kernel * t, beta - running_mean * gamma / std
|
||||
|
||||
|
||||
class RepVGG(nn.Layer):
|
||||
def __init__(self,
|
||||
num_blocks,
|
||||
width_multiplier=None,
|
||||
override_groups_map=None,
|
||||
class_num=1000):
|
||||
super(RepVGG, self).__init__()
|
||||
|
||||
assert len(width_multiplier) == 4
|
||||
self.override_groups_map = override_groups_map or dict()
|
||||
|
||||
assert 0 not in self.override_groups_map
|
||||
|
||||
self.in_planes = min(64, int(64 * width_multiplier[0]))
|
||||
|
||||
self.stage0 = RepVGGBlock(
|
||||
in_channels=3,
|
||||
out_channels=self.in_planes,
|
||||
kernel_size=3,
|
||||
stride=2,
|
||||
padding=1)
|
||||
self.cur_layer_idx = 1
|
||||
self.stage1 = self._make_stage(
|
||||
int(64 * width_multiplier[0]), num_blocks[0], stride=2)
|
||||
self.stage2 = self._make_stage(
|
||||
int(128 * width_multiplier[1]), num_blocks[1], stride=2)
|
||||
self.stage3 = self._make_stage(
|
||||
int(256 * width_multiplier[2]), num_blocks[2], stride=2)
|
||||
self.stage4 = self._make_stage(
|
||||
int(512 * width_multiplier[3]), num_blocks[3], stride=2)
|
||||
self.gap = nn.AdaptiveAvgPool2D(output_size=1)
|
||||
self.linear = nn.Linear(int(512 * width_multiplier[3]), class_num)
|
||||
|
||||
def _make_stage(self, planes, num_blocks, stride):
|
||||
strides = [stride] + [1] * (num_blocks - 1)
|
||||
blocks = []
|
||||
for stride in strides:
|
||||
cur_groups = self.override_groups_map.get(self.cur_layer_idx, 1)
|
||||
blocks.append(
|
||||
RepVGGBlock(
|
||||
in_channels=self.in_planes,
|
||||
out_channels=planes,
|
||||
kernel_size=3,
|
||||
stride=stride,
|
||||
padding=1,
|
||||
groups=cur_groups))
|
||||
self.in_planes = planes
|
||||
self.cur_layer_idx += 1
|
||||
return nn.Sequential(*blocks)
|
||||
|
||||
def forward(self, x):
|
||||
out = self.stage0(x)
|
||||
out = self.stage1(out)
|
||||
out = self.stage2(out)
|
||||
out = self.stage3(out)
|
||||
out = self.stage4(out)
|
||||
out = self.gap(out)
|
||||
out = paddle.flatten(out, start_axis=1)
|
||||
out = self.linear(out)
|
||||
return out
|
||||
|
||||
|
||||
def _load_pretrained(pretrained, model, model_url, use_ssld=False):
|
||||
if pretrained is False:
|
||||
pass
|
||||
elif pretrained is True:
|
||||
load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
|
||||
elif isinstance(pretrained, str):
|
||||
load_dygraph_pretrain(model, pretrained)
|
||||
else:
|
||||
raise RuntimeError(
|
||||
"pretrained type is not available. Please use `string` or `boolean` type."
|
||||
)
|
||||
|
||||
|
||||
def RepVGG_A0(pretrained=False, use_ssld=False, **kwargs):
|
||||
model = RepVGG(
|
||||
num_blocks=[2, 4, 14, 1],
|
||||
width_multiplier=[0.75, 0.75, 0.75, 2.5],
|
||||
override_groups_map=None,
|
||||
**kwargs)
|
||||
_load_pretrained(
|
||||
pretrained, model, MODEL_URLS["RepVGG_A0"], use_ssld=use_ssld)
|
||||
return model
|
||||
|
||||
|
||||
def RepVGG_A1(pretrained=False, use_ssld=False, **kwargs):
|
||||
model = RepVGG(
|
||||
num_blocks=[2, 4, 14, 1],
|
||||
width_multiplier=[1, 1, 1, 2.5],
|
||||
override_groups_map=None,
|
||||
**kwargs)
|
||||
_load_pretrained(
|
||||
pretrained, model, MODEL_URLS["RepVGG_A1"], use_ssld=use_ssld)
|
||||
return model
|
||||
|
||||
|
||||
def RepVGG_A2(pretrained=False, use_ssld=False, **kwargs):
|
||||
model = RepVGG(
|
||||
num_blocks=[2, 4, 14, 1],
|
||||
width_multiplier=[1.5, 1.5, 1.5, 2.75],
|
||||
override_groups_map=None,
|
||||
**kwargs)
|
||||
_load_pretrained(
|
||||
pretrained, model, MODEL_URLS["RepVGG_A2"], use_ssld=use_ssld)
|
||||
return model
|
||||
|
||||
|
||||
def RepVGG_B0(pretrained=False, use_ssld=False, **kwargs):
|
||||
model = RepVGG(
|
||||
num_blocks=[4, 6, 16, 1],
|
||||
width_multiplier=[1, 1, 1, 2.5],
|
||||
override_groups_map=None,
|
||||
**kwargs)
|
||||
_load_pretrained(
|
||||
pretrained, model, MODEL_URLS["RepVGG_B0"], use_ssld=use_ssld)
|
||||
return model
|
||||
|
||||
|
||||
def RepVGG_B1(pretrained=False, use_ssld=False, **kwargs):
|
||||
model = RepVGG(
|
||||
num_blocks=[4, 6, 16, 1],
|
||||
width_multiplier=[2, 2, 2, 4],
|
||||
override_groups_map=None,
|
||||
**kwargs)
|
||||
_load_pretrained(
|
||||
pretrained, model, MODEL_URLS["RepVGG_B1"], use_ssld=use_ssld)
|
||||
return model
|
||||
|
||||
|
||||
def RepVGG_B1g2(pretrained=False, use_ssld=False, **kwargs):
|
||||
model = RepVGG(
|
||||
num_blocks=[4, 6, 16, 1],
|
||||
width_multiplier=[2, 2, 2, 4],
|
||||
override_groups_map=g2_map,
|
||||
**kwargs)
|
||||
_load_pretrained(
|
||||
pretrained, model, MODEL_URLS["RepVGG_B1g2"], use_ssld=use_ssld)
|
||||
return model
|
||||
|
||||
|
||||
def RepVGG_B1g4(pretrained=False, use_ssld=False, **kwargs):
|
||||
model = RepVGG(
|
||||
num_blocks=[4, 6, 16, 1],
|
||||
width_multiplier=[2, 2, 2, 4],
|
||||
override_groups_map=g4_map,
|
||||
**kwargs)
|
||||
_load_pretrained(
|
||||
pretrained, model, MODEL_URLS["RepVGG_B1g4"], use_ssld=use_ssld)
|
||||
return model
|
||||
|
||||
|
||||
def RepVGG_B2(pretrained=False, use_ssld=False, **kwargs):
|
||||
model = RepVGG(
|
||||
num_blocks=[4, 6, 16, 1],
|
||||
width_multiplier=[2.5, 2.5, 2.5, 5],
|
||||
override_groups_map=None,
|
||||
**kwargs)
|
||||
_load_pretrained(
|
||||
pretrained, model, MODEL_URLS["RepVGG_B2"], use_ssld=use_ssld)
|
||||
return model
|
||||
|
||||
|
||||
def RepVGG_B2g4(pretrained=False, use_ssld=False, **kwargs):
|
||||
model = RepVGG(
|
||||
num_blocks=[4, 6, 16, 1],
|
||||
width_multiplier=[2.5, 2.5, 2.5, 5],
|
||||
override_groups_map=g4_map,
|
||||
**kwargs)
|
||||
_load_pretrained(
|
||||
pretrained, model, MODEL_URLS["RepVGG_B2g4"], use_ssld=use_ssld)
|
||||
return model
|
||||
|
||||
|
||||
def RepVGG_B3g4(pretrained=False, use_ssld=False, **kwargs):
|
||||
model = RepVGG(
|
||||
num_blocks=[4, 6, 16, 1],
|
||||
width_multiplier=[3, 3, 3, 5],
|
||||
override_groups_map=g4_map,
|
||||
**kwargs)
|
||||
_load_pretrained(
|
||||
pretrained, model, MODEL_URLS["RepVGG_B3g4"], use_ssld=use_ssld)
|
||||
return model
|
||||
@ -0,0 +1,264 @@
|
||||
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import numpy as np
|
||||
import paddle
|
||||
from paddle import ParamAttr
|
||||
import paddle.nn as nn
|
||||
import paddle.nn.functional as F
|
||||
from paddle.nn import Conv2D, BatchNorm, Linear, Dropout
|
||||
from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D
|
||||
from paddle.nn.initializer import Uniform
|
||||
|
||||
import math
|
||||
|
||||
from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
|
||||
|
||||
MODEL_URLS = {
|
||||
"Res2Net50_26w_4s":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/Res2Net50_26w_4s_pretrained.pdparams",
|
||||
"Res2Net50_14w_8s":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/Res2Net50_14w_8s_pretrained.pdparams",
|
||||
}
|
||||
|
||||
__all__ = list(MODEL_URLS.keys())
|
||||
|
||||
|
||||
class ConvBNLayer(nn.Layer):
|
||||
def __init__(
|
||||
self,
|
||||
num_channels,
|
||||
num_filters,
|
||||
filter_size,
|
||||
stride=1,
|
||||
groups=1,
|
||||
act=None,
|
||||
name=None, ):
|
||||
super(ConvBNLayer, self).__init__()
|
||||
|
||||
self._conv = Conv2D(
|
||||
in_channels=num_channels,
|
||||
out_channels=num_filters,
|
||||
kernel_size=filter_size,
|
||||
stride=stride,
|
||||
padding=(filter_size - 1) // 2,
|
||||
groups=groups,
|
||||
weight_attr=ParamAttr(name=name + "_weights"),
|
||||
bias_attr=False)
|
||||
if name == "conv1":
|
||||
bn_name = "bn_" + name
|
||||
else:
|
||||
bn_name = "bn" + name[3:]
|
||||
self._batch_norm = BatchNorm(
|
||||
num_filters,
|
||||
act=act,
|
||||
param_attr=ParamAttr(name=bn_name + '_scale'),
|
||||
bias_attr=ParamAttr(bn_name + '_offset'),
|
||||
moving_mean_name=bn_name + '_mean',
|
||||
moving_variance_name=bn_name + '_variance')
|
||||
|
||||
def forward(self, inputs):
|
||||
y = self._conv(inputs)
|
||||
y = self._batch_norm(y)
|
||||
return y
|
||||
|
||||
|
||||
class BottleneckBlock(nn.Layer):
|
||||
def __init__(self,
|
||||
num_channels1,
|
||||
num_channels2,
|
||||
num_filters,
|
||||
stride,
|
||||
scales,
|
||||
shortcut=True,
|
||||
if_first=False,
|
||||
name=None):
|
||||
super(BottleneckBlock, self).__init__()
|
||||
self.stride = stride
|
||||
self.scales = scales
|
||||
self.conv0 = ConvBNLayer(
|
||||
num_channels=num_channels1,
|
||||
num_filters=num_filters,
|
||||
filter_size=1,
|
||||
act='relu',
|
||||
name=name + "_branch2a")
|
||||
self.conv1_list = []
|
||||
for s in range(scales - 1):
|
||||
conv1 = self.add_sublayer(
|
||||
name + '_branch2b_' + str(s + 1),
|
||||
ConvBNLayer(
|
||||
num_channels=num_filters // scales,
|
||||
num_filters=num_filters // scales,
|
||||
filter_size=3,
|
||||
stride=stride,
|
||||
act='relu',
|
||||
name=name + '_branch2b_' + str(s + 1)))
|
||||
self.conv1_list.append(conv1)
|
||||
self.pool2d_avg = AvgPool2D(kernel_size=3, stride=stride, padding=1)
|
||||
|
||||
self.conv2 = ConvBNLayer(
|
||||
num_channels=num_filters,
|
||||
num_filters=num_channels2,
|
||||
filter_size=1,
|
||||
act=None,
|
||||
name=name + "_branch2c")
|
||||
|
||||
if not shortcut:
|
||||
self.short = ConvBNLayer(
|
||||
num_channels=num_channels1,
|
||||
num_filters=num_channels2,
|
||||
filter_size=1,
|
||||
stride=stride,
|
||||
name=name + "_branch1")
|
||||
|
||||
self.shortcut = shortcut
|
||||
|
||||
def forward(self, inputs):
|
||||
y = self.conv0(inputs)
|
||||
xs = paddle.split(y, self.scales, 1)
|
||||
ys = []
|
||||
for s, conv1 in enumerate(self.conv1_list):
|
||||
if s == 0 or self.stride == 2:
|
||||
ys.append(conv1(xs[s]))
|
||||
else:
|
||||
ys.append(conv1(paddle.add(xs[s], ys[-1])))
|
||||
if self.stride == 1:
|
||||
ys.append(xs[-1])
|
||||
else:
|
||||
ys.append(self.pool2d_avg(xs[-1]))
|
||||
conv1 = paddle.concat(ys, axis=1)
|
||||
conv2 = self.conv2(conv1)
|
||||
|
||||
if self.shortcut:
|
||||
short = inputs
|
||||
else:
|
||||
short = self.short(inputs)
|
||||
y = paddle.add(x=short, y=conv2)
|
||||
y = F.relu(y)
|
||||
return y
|
||||
|
||||
|
||||
class Res2Net(nn.Layer):
|
||||
def __init__(self, layers=50, scales=4, width=26, class_num=1000):
|
||||
super(Res2Net, self).__init__()
|
||||
|
||||
self.layers = layers
|
||||
self.scales = scales
|
||||
self.width = width
|
||||
basic_width = self.width * self.scales
|
||||
supported_layers = [50, 101, 152, 200]
|
||||
assert layers in supported_layers, \
|
||||
"supported layers are {} but input layer is {}".format(
|
||||
supported_layers, layers)
|
||||
|
||||
if layers == 50:
|
||||
depth = [3, 4, 6, 3]
|
||||
elif layers == 101:
|
||||
depth = [3, 4, 23, 3]
|
||||
elif layers == 152:
|
||||
depth = [3, 8, 36, 3]
|
||||
elif layers == 200:
|
||||
depth = [3, 12, 48, 3]
|
||||
num_channels = [64, 256, 512, 1024]
|
||||
num_channels2 = [256, 512, 1024, 2048]
|
||||
num_filters = [basic_width * t for t in [1, 2, 4, 8]]
|
||||
|
||||
self.conv1 = ConvBNLayer(
|
||||
num_channels=3,
|
||||
num_filters=64,
|
||||
filter_size=7,
|
||||
stride=2,
|
||||
act='relu',
|
||||
name="conv1")
|
||||
self.pool2d_max = MaxPool2D(kernel_size=3, stride=2, padding=1)
|
||||
|
||||
self.block_list = []
|
||||
for block in range(len(depth)):
|
||||
shortcut = False
|
||||
for i in range(depth[block]):
|
||||
if layers in [101, 152] and block == 2:
|
||||
if i == 0:
|
||||
conv_name = "res" + str(block + 2) + "a"
|
||||
else:
|
||||
conv_name = "res" + str(block + 2) + "b" + str(i)
|
||||
else:
|
||||
conv_name = "res" + str(block + 2) + chr(97 + i)
|
||||
bottleneck_block = self.add_sublayer(
|
||||
'bb_%d_%d' % (block, i),
|
||||
BottleneckBlock(
|
||||
num_channels1=num_channels[block]
|
||||
if i == 0 else num_channels2[block],
|
||||
num_channels2=num_channels2[block],
|
||||
num_filters=num_filters[block],
|
||||
stride=2 if i == 0 and block != 0 else 1,
|
||||
scales=scales,
|
||||
shortcut=shortcut,
|
||||
if_first=block == i == 0,
|
||||
name=conv_name))
|
||||
self.block_list.append(bottleneck_block)
|
||||
shortcut = True
|
||||
|
||||
self.pool2d_avg = AdaptiveAvgPool2D(1)
|
||||
|
||||
self.pool2d_avg_channels = num_channels[-1] * 2
|
||||
|
||||
stdv = 1.0 / math.sqrt(self.pool2d_avg_channels * 1.0)
|
||||
|
||||
self.out = Linear(
|
||||
self.pool2d_avg_channels,
|
||||
class_num,
|
||||
weight_attr=ParamAttr(
|
||||
initializer=Uniform(-stdv, stdv), name="fc_weights"),
|
||||
bias_attr=ParamAttr(name="fc_offset"))
|
||||
|
||||
def forward(self, inputs):
|
||||
y = self.conv1(inputs)
|
||||
y = self.pool2d_max(y)
|
||||
for block in self.block_list:
|
||||
y = block(y)
|
||||
y = self.pool2d_avg(y)
|
||||
y = paddle.reshape(y, shape=[-1, self.pool2d_avg_channels])
|
||||
y = self.out(y)
|
||||
return y
|
||||
|
||||
|
||||
def _load_pretrained(pretrained, model, model_url, use_ssld=False):
|
||||
if pretrained is False:
|
||||
pass
|
||||
elif pretrained is True:
|
||||
load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
|
||||
elif isinstance(pretrained, str):
|
||||
load_dygraph_pretrain(model, pretrained)
|
||||
else:
|
||||
raise RuntimeError(
|
||||
"pretrained type is not available. Please use `string` or `boolean` type."
|
||||
)
|
||||
|
||||
|
||||
def Res2Net50_26w_4s(pretrained=False, use_ssld=False, **kwargs):
|
||||
model = Res2Net(layers=50, scales=4, width=26, **kwargs)
|
||||
_load_pretrained(
|
||||
pretrained, model, MODEL_URLS["Res2Net50_26w_4s"], use_ssld=use_ssld)
|
||||
return model
|
||||
|
||||
|
||||
def Res2Net50_14w_8s(pretrained=False, use_ssld=False, **kwargs):
|
||||
model = Res2Net(layers=50, scales=8, width=14, **kwargs)
|
||||
_load_pretrained(
|
||||
pretrained, model, MODEL_URLS["Res2Net50_14w_8s"], use_ssld=use_ssld)
|
||||
return model
|
||||
@ -0,0 +1,305 @@
|
||||
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import numpy as np
|
||||
import paddle
|
||||
from paddle import ParamAttr
|
||||
import paddle.nn as nn
|
||||
import paddle.nn.functional as F
|
||||
from paddle.nn import Conv2D, BatchNorm, Linear, Dropout
|
||||
from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D
|
||||
from paddle.nn.initializer import Uniform
|
||||
|
||||
import math
|
||||
|
||||
from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
|
||||
|
||||
MODEL_URLS = {
|
||||
"Res2Net50_vd_26w_4s":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/Res2Net50_vd_26w_4s_pretrained.pdparams",
|
||||
"Res2Net101_vd_26w_4s":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/Res2Net101_vd_26w_4s_pretrained.pdparams",
|
||||
"Res2Net200_vd_26w_4s":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/Res2Net200_vd_26w_4s_pretrained.pdparams",
|
||||
}
|
||||
|
||||
__all__ = list(MODEL_URLS.keys())
|
||||
|
||||
|
||||
class ConvBNLayer(nn.Layer):
|
||||
def __init__(
|
||||
self,
|
||||
num_channels,
|
||||
num_filters,
|
||||
filter_size,
|
||||
stride=1,
|
||||
groups=1,
|
||||
is_vd_mode=False,
|
||||
act=None,
|
||||
name=None, ):
|
||||
super(ConvBNLayer, self).__init__()
|
||||
|
||||
self.is_vd_mode = is_vd_mode
|
||||
self._pool2d_avg = AvgPool2D(
|
||||
kernel_size=2, stride=2, padding=0, ceil_mode=True)
|
||||
self._conv = Conv2D(
|
||||
in_channels=num_channels,
|
||||
out_channels=num_filters,
|
||||
kernel_size=filter_size,
|
||||
stride=stride,
|
||||
padding=(filter_size - 1) // 2,
|
||||
groups=groups,
|
||||
weight_attr=ParamAttr(name=name + "_weights"),
|
||||
bias_attr=False)
|
||||
if name == "conv1":
|
||||
bn_name = "bn_" + name
|
||||
else:
|
||||
bn_name = "bn" + name[3:]
|
||||
self._batch_norm = BatchNorm(
|
||||
num_filters,
|
||||
act=act,
|
||||
param_attr=ParamAttr(name=bn_name + '_scale'),
|
||||
bias_attr=ParamAttr(bn_name + '_offset'),
|
||||
moving_mean_name=bn_name + '_mean',
|
||||
moving_variance_name=bn_name + '_variance')
|
||||
|
||||
def forward(self, inputs):
|
||||
if self.is_vd_mode:
|
||||
inputs = self._pool2d_avg(inputs)
|
||||
y = self._conv(inputs)
|
||||
y = self._batch_norm(y)
|
||||
return y
|
||||
|
||||
|
||||
class BottleneckBlock(nn.Layer):
|
||||
def __init__(self,
|
||||
num_channels1,
|
||||
num_channels2,
|
||||
num_filters,
|
||||
stride,
|
||||
scales,
|
||||
shortcut=True,
|
||||
if_first=False,
|
||||
name=None):
|
||||
super(BottleneckBlock, self).__init__()
|
||||
self.stride = stride
|
||||
self.scales = scales
|
||||
self.conv0 = ConvBNLayer(
|
||||
num_channels=num_channels1,
|
||||
num_filters=num_filters,
|
||||
filter_size=1,
|
||||
act='relu',
|
||||
name=name + "_branch2a")
|
||||
self.conv1_list = []
|
||||
for s in range(scales - 1):
|
||||
conv1 = self.add_sublayer(
|
||||
name + '_branch2b_' + str(s + 1),
|
||||
ConvBNLayer(
|
||||
num_channels=num_filters // scales,
|
||||
num_filters=num_filters // scales,
|
||||
filter_size=3,
|
||||
stride=stride,
|
||||
act='relu',
|
||||
name=name + '_branch2b_' + str(s + 1)))
|
||||
self.conv1_list.append(conv1)
|
||||
self.pool2d_avg = AvgPool2D(kernel_size=3, stride=stride, padding=1)
|
||||
|
||||
self.conv2 = ConvBNLayer(
|
||||
num_channels=num_filters,
|
||||
num_filters=num_channels2,
|
||||
filter_size=1,
|
||||
act=None,
|
||||
name=name + "_branch2c")
|
||||
|
||||
if not shortcut:
|
||||
self.short = ConvBNLayer(
|
||||
num_channels=num_channels1,
|
||||
num_filters=num_channels2,
|
||||
filter_size=1,
|
||||
stride=1,
|
||||
is_vd_mode=False if if_first else True,
|
||||
name=name + "_branch1")
|
||||
|
||||
self.shortcut = shortcut
|
||||
|
||||
def forward(self, inputs):
|
||||
y = self.conv0(inputs)
|
||||
xs = paddle.split(y, self.scales, 1)
|
||||
ys = []
|
||||
for s, conv1 in enumerate(self.conv1_list):
|
||||
if s == 0 or self.stride == 2:
|
||||
ys.append(conv1(xs[s]))
|
||||
else:
|
||||
ys.append(conv1(xs[s] + ys[-1]))
|
||||
if self.stride == 1:
|
||||
ys.append(xs[-1])
|
||||
else:
|
||||
ys.append(self.pool2d_avg(xs[-1]))
|
||||
conv1 = paddle.concat(ys, axis=1)
|
||||
conv2 = self.conv2(conv1)
|
||||
|
||||
if self.shortcut:
|
||||
short = inputs
|
||||
else:
|
||||
short = self.short(inputs)
|
||||
y = paddle.add(x=short, y=conv2)
|
||||
y = F.relu(y)
|
||||
return y
|
||||
|
||||
|
||||
class Res2Net_vd(nn.Layer):
|
||||
def __init__(self, layers=50, scales=4, width=26, class_num=1000):
|
||||
super(Res2Net_vd, self).__init__()
|
||||
|
||||
self.layers = layers
|
||||
self.scales = scales
|
||||
self.width = width
|
||||
basic_width = self.width * self.scales
|
||||
supported_layers = [50, 101, 152, 200]
|
||||
assert layers in supported_layers, \
|
||||
"supported layers are {} but input layer is {}".format(
|
||||
supported_layers, layers)
|
||||
|
||||
if layers == 50:
|
||||
depth = [3, 4, 6, 3]
|
||||
elif layers == 101:
|
||||
depth = [3, 4, 23, 3]
|
||||
elif layers == 152:
|
||||
depth = [3, 8, 36, 3]
|
||||
elif layers == 200:
|
||||
depth = [3, 12, 48, 3]
|
||||
num_channels = [64, 256, 512, 1024]
|
||||
num_channels2 = [256, 512, 1024, 2048]
|
||||
num_filters = [basic_width * t for t in [1, 2, 4, 8]]
|
||||
|
||||
self.conv1_1 = ConvBNLayer(
|
||||
num_channels=3,
|
||||
num_filters=32,
|
||||
filter_size=3,
|
||||
stride=2,
|
||||
act='relu',
|
||||
name="conv1_1")
|
||||
self.conv1_2 = ConvBNLayer(
|
||||
num_channels=32,
|
||||
num_filters=32,
|
||||
filter_size=3,
|
||||
stride=1,
|
||||
act='relu',
|
||||
name="conv1_2")
|
||||
self.conv1_3 = ConvBNLayer(
|
||||
num_channels=32,
|
||||
num_filters=64,
|
||||
filter_size=3,
|
||||
stride=1,
|
||||
act='relu',
|
||||
name="conv1_3")
|
||||
self.pool2d_max = MaxPool2D(kernel_size=3, stride=2, padding=1)
|
||||
|
||||
self.block_list = []
|
||||
for block in range(len(depth)):
|
||||
shortcut = False
|
||||
for i in range(depth[block]):
|
||||
if layers in [101, 152, 200] and block == 2:
|
||||
if i == 0:
|
||||
conv_name = "res" + str(block + 2) + "a"
|
||||
else:
|
||||
conv_name = "res" + str(block + 2) + "b" + str(i)
|
||||
else:
|
||||
conv_name = "res" + str(block + 2) + chr(97 + i)
|
||||
bottleneck_block = self.add_sublayer(
|
||||
'bb_%d_%d' % (block, i),
|
||||
BottleneckBlock(
|
||||
num_channels1=num_channels[block]
|
||||
if i == 0 else num_channels2[block],
|
||||
num_channels2=num_channels2[block],
|
||||
num_filters=num_filters[block],
|
||||
stride=2 if i == 0 and block != 0 else 1,
|
||||
scales=scales,
|
||||
shortcut=shortcut,
|
||||
if_first=block == i == 0,
|
||||
name=conv_name))
|
||||
self.block_list.append(bottleneck_block)
|
||||
shortcut = True
|
||||
|
||||
self.pool2d_avg = AdaptiveAvgPool2D(1)
|
||||
|
||||
self.pool2d_avg_channels = num_channels[-1] * 2
|
||||
|
||||
stdv = 1.0 / math.sqrt(self.pool2d_avg_channels * 1.0)
|
||||
|
||||
self.out = Linear(
|
||||
self.pool2d_avg_channels,
|
||||
class_num,
|
||||
weight_attr=ParamAttr(
|
||||
initializer=Uniform(-stdv, stdv), name="fc_weights"),
|
||||
bias_attr=ParamAttr(name="fc_offset"))
|
||||
|
||||
def forward(self, inputs):
|
||||
y = self.conv1_1(inputs)
|
||||
y = self.conv1_2(y)
|
||||
y = self.conv1_3(y)
|
||||
y = self.pool2d_max(y)
|
||||
for block in self.block_list:
|
||||
y = block(y)
|
||||
y = self.pool2d_avg(y)
|
||||
y = paddle.reshape(y, shape=[-1, self.pool2d_avg_channels])
|
||||
y = self.out(y)
|
||||
return y
|
||||
|
||||
|
||||
def _load_pretrained(pretrained, model, model_url, use_ssld=False):
|
||||
if pretrained is False:
|
||||
pass
|
||||
elif pretrained is True:
|
||||
load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
|
||||
elif isinstance(pretrained, str):
|
||||
load_dygraph_pretrain(model, pretrained)
|
||||
else:
|
||||
raise RuntimeError(
|
||||
"pretrained type is not available. Please use `string` or `boolean` type."
|
||||
)
|
||||
|
||||
|
||||
def Res2Net50_vd_26w_4s(pretrained=False, use_ssld=False, **kwargs):
|
||||
model = Res2Net_vd(layers=50, scales=4, width=26, **kwargs)
|
||||
_load_pretrained(
|
||||
pretrained,
|
||||
model,
|
||||
MODEL_URLS["Res2Net50_vd_26w_4s"],
|
||||
use_ssld=use_ssld)
|
||||
return model
|
||||
|
||||
|
||||
def Res2Net101_vd_26w_4s(pretrained=False, use_ssld=False, **kwargs):
|
||||
model = Res2Net_vd(layers=101, scales=4, width=26, **kwargs)
|
||||
_load_pretrained(
|
||||
pretrained,
|
||||
model,
|
||||
MODEL_URLS["Res2Net101_vd_26w_4s"],
|
||||
use_ssld=use_ssld)
|
||||
return model
|
||||
|
||||
|
||||
def Res2Net200_vd_26w_4s(pretrained=False, use_ssld=False, **kwargs):
|
||||
model = Res2Net_vd(layers=200, scales=4, width=26, **kwargs)
|
||||
_load_pretrained(
|
||||
pretrained,
|
||||
model,
|
||||
MODEL_URLS["Res2Net200_vd_26w_4s"],
|
||||
use_ssld=use_ssld)
|
||||
return model
|
||||
@ -0,0 +1,740 @@
|
||||
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# Code was based on https://github.com/zhanghang1989/ResNeSt
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import numpy as np
|
||||
import paddle
|
||||
import math
|
||||
import paddle.nn as nn
|
||||
import paddle.nn.functional as F
|
||||
from paddle import ParamAttr
|
||||
from paddle.nn.initializer import KaimingNormal
|
||||
from paddle.nn import Conv2D, BatchNorm, Linear, Dropout
|
||||
from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D
|
||||
from paddle.regularizer import L2Decay
|
||||
|
||||
from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
|
||||
|
||||
MODEL_URLS = {
|
||||
"ResNeSt50_fast_1s1x64d":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeSt50_fast_1s1x64d_pretrained.pdparams",
|
||||
"ResNeSt50":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeSt50_pretrained.pdparams",
|
||||
"ResNeSt101":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeSt101_pretrained.pdparams",
|
||||
}
|
||||
|
||||
__all__ = list(MODEL_URLS.keys())
|
||||
|
||||
|
||||
class ConvBNLayer(nn.Layer):
|
||||
def __init__(self,
|
||||
num_channels,
|
||||
num_filters,
|
||||
filter_size,
|
||||
stride=1,
|
||||
dilation=1,
|
||||
groups=1,
|
||||
act=None,
|
||||
name=None):
|
||||
super(ConvBNLayer, self).__init__()
|
||||
|
||||
bn_decay = 0.0
|
||||
|
||||
self._conv = Conv2D(
|
||||
in_channels=num_channels,
|
||||
out_channels=num_filters,
|
||||
kernel_size=filter_size,
|
||||
stride=stride,
|
||||
padding=(filter_size - 1) // 2,
|
||||
dilation=dilation,
|
||||
groups=groups,
|
||||
weight_attr=ParamAttr(name=name + "_weight"),
|
||||
bias_attr=False)
|
||||
self._batch_norm = BatchNorm(
|
||||
num_filters,
|
||||
act=act,
|
||||
param_attr=ParamAttr(
|
||||
name=name + "_scale", regularizer=L2Decay(bn_decay)),
|
||||
bias_attr=ParamAttr(
|
||||
name + "_offset", regularizer=L2Decay(bn_decay)),
|
||||
moving_mean_name=name + "_mean",
|
||||
moving_variance_name=name + "_variance")
|
||||
|
||||
def forward(self, x):
|
||||
x = self._conv(x)
|
||||
x = self._batch_norm(x)
|
||||
return x
|
||||
|
||||
|
||||
class rSoftmax(nn.Layer):
|
||||
def __init__(self, radix, cardinality):
|
||||
super(rSoftmax, self).__init__()
|
||||
self.radix = radix
|
||||
self.cardinality = cardinality
|
||||
|
||||
def forward(self, x):
|
||||
cardinality = self.cardinality
|
||||
radix = self.radix
|
||||
|
||||
batch, r, h, w = x.shape
|
||||
if self.radix > 1:
|
||||
x = paddle.reshape(
|
||||
x=x,
|
||||
shape=[
|
||||
batch, cardinality, radix,
|
||||
int(r * h * w / cardinality / radix)
|
||||
])
|
||||
x = paddle.transpose(x=x, perm=[0, 2, 1, 3])
|
||||
x = nn.functional.softmax(x, axis=1)
|
||||
x = paddle.reshape(x=x, shape=[batch, r * h * w, 1, 1])
|
||||
else:
|
||||
x = nn.functional.sigmoid(x)
|
||||
return x
|
||||
|
||||
|
||||
class SplatConv(nn.Layer):
|
||||
def __init__(self,
|
||||
in_channels,
|
||||
channels,
|
||||
kernel_size,
|
||||
stride=1,
|
||||
padding=0,
|
||||
dilation=1,
|
||||
groups=1,
|
||||
bias=True,
|
||||
radix=2,
|
||||
reduction_factor=4,
|
||||
rectify_avg=False,
|
||||
name=None):
|
||||
super(SplatConv, self).__init__()
|
||||
|
||||
self.radix = radix
|
||||
|
||||
self.conv1 = ConvBNLayer(
|
||||
num_channels=in_channels,
|
||||
num_filters=channels * radix,
|
||||
filter_size=kernel_size,
|
||||
stride=stride,
|
||||
groups=groups * radix,
|
||||
act="relu",
|
||||
name=name + "_1_weights")
|
||||
|
||||
self.avg_pool2d = AdaptiveAvgPool2D(1)
|
||||
|
||||
inter_channels = int(max(in_channels * radix // reduction_factor, 32))
|
||||
|
||||
# to calc gap
|
||||
self.conv2 = ConvBNLayer(
|
||||
num_channels=channels,
|
||||
num_filters=inter_channels,
|
||||
filter_size=1,
|
||||
stride=1,
|
||||
groups=groups,
|
||||
act="relu",
|
||||
name=name + "_2_weights")
|
||||
|
||||
# to calc atten
|
||||
self.conv3 = Conv2D(
|
||||
in_channels=inter_channels,
|
||||
out_channels=channels * radix,
|
||||
kernel_size=1,
|
||||
stride=1,
|
||||
padding=0,
|
||||
groups=groups,
|
||||
weight_attr=ParamAttr(
|
||||
name=name + "_weights", initializer=KaimingNormal()),
|
||||
bias_attr=False)
|
||||
|
||||
self.rsoftmax = rSoftmax(radix=radix, cardinality=groups)
|
||||
|
||||
def forward(self, x):
|
||||
x = self.conv1(x)
|
||||
|
||||
if self.radix > 1:
|
||||
splited = paddle.split(x, num_or_sections=self.radix, axis=1)
|
||||
gap = paddle.add_n(splited)
|
||||
else:
|
||||
gap = x
|
||||
|
||||
gap = self.avg_pool2d(gap)
|
||||
gap = self.conv2(gap)
|
||||
|
||||
atten = self.conv3(gap)
|
||||
atten = self.rsoftmax(atten)
|
||||
|
||||
if self.radix > 1:
|
||||
attens = paddle.split(atten, num_or_sections=self.radix, axis=1)
|
||||
y = paddle.add_n([
|
||||
paddle.multiply(split, att)
|
||||
for (att, split) in zip(attens, splited)
|
||||
])
|
||||
else:
|
||||
y = paddle.multiply(x, atten)
|
||||
|
||||
return y
|
||||
|
||||
|
||||
class BottleneckBlock(nn.Layer):
|
||||
def __init__(self,
|
||||
inplanes,
|
||||
planes,
|
||||
stride=1,
|
||||
radix=1,
|
||||
cardinality=1,
|
||||
bottleneck_width=64,
|
||||
avd=False,
|
||||
avd_first=False,
|
||||
dilation=1,
|
||||
is_first=False,
|
||||
rectify_avg=False,
|
||||
last_gamma=False,
|
||||
avg_down=False,
|
||||
name=None):
|
||||
super(BottleneckBlock, self).__init__()
|
||||
self.inplanes = inplanes
|
||||
self.planes = planes
|
||||
self.stride = stride
|
||||
self.radix = radix
|
||||
self.cardinality = cardinality
|
||||
self.avd = avd
|
||||
self.avd_first = avd_first
|
||||
self.dilation = dilation
|
||||
self.is_first = is_first
|
||||
self.rectify_avg = rectify_avg
|
||||
self.last_gamma = last_gamma
|
||||
self.avg_down = avg_down
|
||||
|
||||
group_width = int(planes * (bottleneck_width / 64.)) * cardinality
|
||||
|
||||
self.conv1 = ConvBNLayer(
|
||||
num_channels=self.inplanes,
|
||||
num_filters=group_width,
|
||||
filter_size=1,
|
||||
stride=1,
|
||||
groups=1,
|
||||
act="relu",
|
||||
name=name + "_conv1")
|
||||
|
||||
if avd and avd_first and (stride > 1 or is_first):
|
||||
self.avg_pool2d_1 = AvgPool2D(
|
||||
kernel_size=3, stride=stride, padding=1)
|
||||
|
||||
if radix >= 1:
|
||||
self.conv2 = SplatConv(
|
||||
in_channels=group_width,
|
||||
channels=group_width,
|
||||
kernel_size=3,
|
||||
stride=1,
|
||||
padding=dilation,
|
||||
dilation=dilation,
|
||||
groups=cardinality,
|
||||
bias=False,
|
||||
radix=radix,
|
||||
rectify_avg=rectify_avg,
|
||||
name=name + "_splat")
|
||||
else:
|
||||
self.conv2 = ConvBNLayer(
|
||||
num_channels=group_width,
|
||||
num_filters=group_width,
|
||||
filter_size=3,
|
||||
stride=1,
|
||||
dilation=dilation,
|
||||
groups=cardinality,
|
||||
act="relu",
|
||||
name=name + "_conv2")
|
||||
|
||||
if avd and avd_first == False and (stride > 1 or is_first):
|
||||
self.avg_pool2d_2 = AvgPool2D(
|
||||
kernel_size=3, stride=stride, padding=1)
|
||||
|
||||
self.conv3 = ConvBNLayer(
|
||||
num_channels=group_width,
|
||||
num_filters=planes * 4,
|
||||
filter_size=1,
|
||||
stride=1,
|
||||
groups=1,
|
||||
act=None,
|
||||
name=name + "_conv3")
|
||||
|
||||
if stride != 1 or self.inplanes != self.planes * 4:
|
||||
if avg_down:
|
||||
if dilation == 1:
|
||||
self.avg_pool2d_3 = AvgPool2D(
|
||||
kernel_size=stride, stride=stride, padding=0)
|
||||
else:
|
||||
self.avg_pool2d_3 = AvgPool2D(
|
||||
kernel_size=1, stride=1, padding=0, ceil_mode=True)
|
||||
|
||||
self.conv4 = Conv2D(
|
||||
in_channels=self.inplanes,
|
||||
out_channels=planes * 4,
|
||||
kernel_size=1,
|
||||
stride=1,
|
||||
padding=0,
|
||||
groups=1,
|
||||
weight_attr=ParamAttr(
|
||||
name=name + "_weights", initializer=KaimingNormal()),
|
||||
bias_attr=False)
|
||||
else:
|
||||
self.conv4 = Conv2D(
|
||||
in_channels=self.inplanes,
|
||||
out_channels=planes * 4,
|
||||
kernel_size=1,
|
||||
stride=stride,
|
||||
padding=0,
|
||||
groups=1,
|
||||
weight_attr=ParamAttr(
|
||||
name=name + "_shortcut_weights",
|
||||
initializer=KaimingNormal()),
|
||||
bias_attr=False)
|
||||
|
||||
bn_decay = 0.0
|
||||
self._batch_norm = BatchNorm(
|
||||
planes * 4,
|
||||
act=None,
|
||||
param_attr=ParamAttr(
|
||||
name=name + "_shortcut_scale",
|
||||
regularizer=L2Decay(bn_decay)),
|
||||
bias_attr=ParamAttr(
|
||||
name + "_shortcut_offset", regularizer=L2Decay(bn_decay)),
|
||||
moving_mean_name=name + "_shortcut_mean",
|
||||
moving_variance_name=name + "_shortcut_variance")
|
||||
|
||||
def forward(self, x):
|
||||
short = x
|
||||
|
||||
x = self.conv1(x)
|
||||
if self.avd and self.avd_first and (self.stride > 1 or self.is_first):
|
||||
x = self.avg_pool2d_1(x)
|
||||
|
||||
x = self.conv2(x)
|
||||
|
||||
if self.avd and self.avd_first == False and (self.stride > 1 or
|
||||
self.is_first):
|
||||
x = self.avg_pool2d_2(x)
|
||||
|
||||
x = self.conv3(x)
|
||||
|
||||
if self.stride != 1 or self.inplanes != self.planes * 4:
|
||||
if self.avg_down:
|
||||
short = self.avg_pool2d_3(short)
|
||||
|
||||
short = self.conv4(short)
|
||||
|
||||
short = self._batch_norm(short)
|
||||
|
||||
y = paddle.add(x=short, y=x)
|
||||
y = F.relu(y)
|
||||
return y
|
||||
|
||||
|
||||
class ResNeStLayer(nn.Layer):
|
||||
def __init__(self,
|
||||
inplanes,
|
||||
planes,
|
||||
blocks,
|
||||
radix,
|
||||
cardinality,
|
||||
bottleneck_width,
|
||||
avg_down,
|
||||
avd,
|
||||
avd_first,
|
||||
rectify_avg,
|
||||
last_gamma,
|
||||
stride=1,
|
||||
dilation=1,
|
||||
is_first=True,
|
||||
name=None):
|
||||
super(ResNeStLayer, self).__init__()
|
||||
self.inplanes = inplanes
|
||||
self.planes = planes
|
||||
self.blocks = blocks
|
||||
self.radix = radix
|
||||
self.cardinality = cardinality
|
||||
self.bottleneck_width = bottleneck_width
|
||||
self.avg_down = avg_down
|
||||
self.avd = avd
|
||||
self.avd_first = avd_first
|
||||
self.rectify_avg = rectify_avg
|
||||
self.last_gamma = last_gamma
|
||||
self.is_first = is_first
|
||||
|
||||
if dilation == 1 or dilation == 2:
|
||||
bottleneck_func = self.add_sublayer(
|
||||
name + "_bottleneck_0",
|
||||
BottleneckBlock(
|
||||
inplanes=self.inplanes,
|
||||
planes=planes,
|
||||
stride=stride,
|
||||
radix=radix,
|
||||
cardinality=cardinality,
|
||||
bottleneck_width=bottleneck_width,
|
||||
avg_down=self.avg_down,
|
||||
avd=avd,
|
||||
avd_first=avd_first,
|
||||
dilation=1,
|
||||
is_first=is_first,
|
||||
rectify_avg=rectify_avg,
|
||||
last_gamma=last_gamma,
|
||||
name=name + "_bottleneck_0"))
|
||||
elif dilation == 4:
|
||||
bottleneck_func = self.add_sublayer(
|
||||
name + "_bottleneck_0",
|
||||
BottleneckBlock(
|
||||
inplanes=self.inplanes,
|
||||
planes=planes,
|
||||
stride=stride,
|
||||
radix=radix,
|
||||
cardinality=cardinality,
|
||||
bottleneck_width=bottleneck_width,
|
||||
avg_down=self.avg_down,
|
||||
avd=avd,
|
||||
avd_first=avd_first,
|
||||
dilation=2,
|
||||
is_first=is_first,
|
||||
rectify_avg=rectify_avg,
|
||||
last_gamma=last_gamma,
|
||||
name=name + "_bottleneck_0"))
|
||||
else:
|
||||
raise RuntimeError("=>unknown dilation size")
|
||||
|
||||
self.inplanes = planes * 4
|
||||
self.bottleneck_block_list = [bottleneck_func]
|
||||
for i in range(1, blocks):
|
||||
curr_name = name + "_bottleneck_" + str(i)
|
||||
|
||||
bottleneck_func = self.add_sublayer(
|
||||
curr_name,
|
||||
BottleneckBlock(
|
||||
inplanes=self.inplanes,
|
||||
planes=planes,
|
||||
radix=radix,
|
||||
cardinality=cardinality,
|
||||
bottleneck_width=bottleneck_width,
|
||||
avg_down=self.avg_down,
|
||||
avd=avd,
|
||||
avd_first=avd_first,
|
||||
dilation=dilation,
|
||||
rectify_avg=rectify_avg,
|
||||
last_gamma=last_gamma,
|
||||
name=curr_name))
|
||||
self.bottleneck_block_list.append(bottleneck_func)
|
||||
|
||||
def forward(self, x):
|
||||
for bottleneck_block in self.bottleneck_block_list:
|
||||
x = bottleneck_block(x)
|
||||
return x
|
||||
|
||||
|
||||
class ResNeSt(nn.Layer):
|
||||
def __init__(self,
|
||||
layers,
|
||||
radix=1,
|
||||
groups=1,
|
||||
bottleneck_width=64,
|
||||
dilated=False,
|
||||
dilation=1,
|
||||
deep_stem=False,
|
||||
stem_width=64,
|
||||
avg_down=False,
|
||||
rectify_avg=False,
|
||||
avd=False,
|
||||
avd_first=False,
|
||||
final_drop=0.0,
|
||||
last_gamma=False,
|
||||
class_num=1000):
|
||||
super(ResNeSt, self).__init__()
|
||||
|
||||
self.cardinality = groups
|
||||
self.bottleneck_width = bottleneck_width
|
||||
# ResNet-D params
|
||||
self.inplanes = stem_width * 2 if deep_stem else 64
|
||||
self.avg_down = avg_down
|
||||
self.last_gamma = last_gamma
|
||||
# ResNeSt params
|
||||
self.radix = radix
|
||||
self.avd = avd
|
||||
self.avd_first = avd_first
|
||||
|
||||
self.deep_stem = deep_stem
|
||||
self.stem_width = stem_width
|
||||
self.layers = layers
|
||||
self.final_drop = final_drop
|
||||
self.dilated = dilated
|
||||
self.dilation = dilation
|
||||
|
||||
self.rectify_avg = rectify_avg
|
||||
|
||||
if self.deep_stem:
|
||||
self.stem = nn.Sequential(
|
||||
("conv1", ConvBNLayer(
|
||||
num_channels=3,
|
||||
num_filters=stem_width,
|
||||
filter_size=3,
|
||||
stride=2,
|
||||
act="relu",
|
||||
name="conv1")), ("conv2", ConvBNLayer(
|
||||
num_channels=stem_width,
|
||||
num_filters=stem_width,
|
||||
filter_size=3,
|
||||
stride=1,
|
||||
act="relu",
|
||||
name="conv2")), ("conv3", ConvBNLayer(
|
||||
num_channels=stem_width,
|
||||
num_filters=stem_width * 2,
|
||||
filter_size=3,
|
||||
stride=1,
|
||||
act="relu",
|
||||
name="conv3")))
|
||||
else:
|
||||
self.stem = ConvBNLayer(
|
||||
num_channels=3,
|
||||
num_filters=stem_width,
|
||||
filter_size=7,
|
||||
stride=2,
|
||||
act="relu",
|
||||
name="conv1")
|
||||
|
||||
self.max_pool2d = MaxPool2D(kernel_size=3, stride=2, padding=1)
|
||||
|
||||
self.layer1 = ResNeStLayer(
|
||||
inplanes=self.stem_width * 2
|
||||
if self.deep_stem else self.stem_width,
|
||||
planes=64,
|
||||
blocks=self.layers[0],
|
||||
radix=radix,
|
||||
cardinality=self.cardinality,
|
||||
bottleneck_width=bottleneck_width,
|
||||
avg_down=self.avg_down,
|
||||
avd=avd,
|
||||
avd_first=avd_first,
|
||||
rectify_avg=rectify_avg,
|
||||
last_gamma=last_gamma,
|
||||
stride=1,
|
||||
dilation=1,
|
||||
is_first=False,
|
||||
name="layer1")
|
||||
|
||||
# return
|
||||
|
||||
self.layer2 = ResNeStLayer(
|
||||
inplanes=256,
|
||||
planes=128,
|
||||
blocks=self.layers[1],
|
||||
radix=radix,
|
||||
cardinality=self.cardinality,
|
||||
bottleneck_width=bottleneck_width,
|
||||
avg_down=self.avg_down,
|
||||
avd=avd,
|
||||
avd_first=avd_first,
|
||||
rectify_avg=rectify_avg,
|
||||
last_gamma=last_gamma,
|
||||
stride=2,
|
||||
name="layer2")
|
||||
|
||||
if self.dilated or self.dilation == 4:
|
||||
self.layer3 = ResNeStLayer(
|
||||
inplanes=512,
|
||||
planes=256,
|
||||
blocks=self.layers[2],
|
||||
radix=radix,
|
||||
cardinality=self.cardinality,
|
||||
bottleneck_width=bottleneck_width,
|
||||
avg_down=self.avg_down,
|
||||
avd=avd,
|
||||
avd_first=avd_first,
|
||||
rectify_avg=rectify_avg,
|
||||
last_gamma=last_gamma,
|
||||
stride=1,
|
||||
dilation=2,
|
||||
name="layer3")
|
||||
self.layer4 = ResNeStLayer(
|
||||
inplanes=1024,
|
||||
planes=512,
|
||||
blocks=self.layers[3],
|
||||
radix=radix,
|
||||
cardinality=self.cardinality,
|
||||
bottleneck_width=bottleneck_width,
|
||||
avg_down=self.avg_down,
|
||||
avd=avd,
|
||||
avd_first=avd_first,
|
||||
rectify_avg=rectify_avg,
|
||||
last_gamma=last_gamma,
|
||||
stride=1,
|
||||
dilation=4,
|
||||
name="layer4")
|
||||
elif self.dilation == 2:
|
||||
self.layer3 = ResNeStLayer(
|
||||
inplanes=512,
|
||||
planes=256,
|
||||
blocks=self.layers[2],
|
||||
radix=radix,
|
||||
cardinality=self.cardinality,
|
||||
bottleneck_width=bottleneck_width,
|
||||
avg_down=self.avg_down,
|
||||
avd=avd,
|
||||
avd_first=avd_first,
|
||||
rectify_avg=rectify_avg,
|
||||
last_gamma=last_gamma,
|
||||
stride=2,
|
||||
dilation=1,
|
||||
name="layer3")
|
||||
self.layer4 = ResNeStLayer(
|
||||
inplanes=1024,
|
||||
planes=512,
|
||||
blocks=self.layers[3],
|
||||
radix=radix,
|
||||
cardinality=self.cardinality,
|
||||
bottleneck_width=bottleneck_width,
|
||||
avg_down=self.avg_down,
|
||||
avd=avd,
|
||||
avd_first=avd_first,
|
||||
rectify_avg=rectify_avg,
|
||||
last_gamma=last_gamma,
|
||||
stride=1,
|
||||
dilation=2,
|
||||
name="layer4")
|
||||
else:
|
||||
self.layer3 = ResNeStLayer(
|
||||
inplanes=512,
|
||||
planes=256,
|
||||
blocks=self.layers[2],
|
||||
radix=radix,
|
||||
cardinality=self.cardinality,
|
||||
bottleneck_width=bottleneck_width,
|
||||
avg_down=self.avg_down,
|
||||
avd=avd,
|
||||
avd_first=avd_first,
|
||||
rectify_avg=rectify_avg,
|
||||
last_gamma=last_gamma,
|
||||
stride=2,
|
||||
name="layer3")
|
||||
self.layer4 = ResNeStLayer(
|
||||
inplanes=1024,
|
||||
planes=512,
|
||||
blocks=self.layers[3],
|
||||
radix=radix,
|
||||
cardinality=self.cardinality,
|
||||
bottleneck_width=bottleneck_width,
|
||||
avg_down=self.avg_down,
|
||||
avd=avd,
|
||||
avd_first=avd_first,
|
||||
rectify_avg=rectify_avg,
|
||||
last_gamma=last_gamma,
|
||||
stride=2,
|
||||
name="layer4")
|
||||
|
||||
self.pool2d_avg = AdaptiveAvgPool2D(1)
|
||||
|
||||
self.out_channels = 2048
|
||||
|
||||
stdv = 1.0 / math.sqrt(self.out_channels * 1.0)
|
||||
|
||||
self.out = Linear(
|
||||
self.out_channels,
|
||||
class_num,
|
||||
weight_attr=ParamAttr(
|
||||
initializer=nn.initializer.Uniform(-stdv, stdv),
|
||||
name="fc_weights"),
|
||||
bias_attr=ParamAttr(name="fc_offset"))
|
||||
|
||||
def forward(self, x):
|
||||
x = self.stem(x)
|
||||
x = self.max_pool2d(x)
|
||||
x = self.layer1(x)
|
||||
x = self.layer2(x)
|
||||
|
||||
x = self.layer3(x)
|
||||
|
||||
x = self.layer4(x)
|
||||
x = self.pool2d_avg(x)
|
||||
x = paddle.reshape(x, shape=[-1, self.out_channels])
|
||||
x = self.out(x)
|
||||
return x
|
||||
|
||||
|
||||
def _load_pretrained(pretrained, model, model_url, use_ssld=False):
|
||||
if pretrained is False:
|
||||
pass
|
||||
elif pretrained is True:
|
||||
load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
|
||||
elif isinstance(pretrained, str):
|
||||
load_dygraph_pretrain(model, pretrained)
|
||||
else:
|
||||
raise RuntimeError(
|
||||
"pretrained type is not available. Please use `string` or `boolean` type."
|
||||
)
|
||||
|
||||
|
||||
def ResNeSt50_fast_1s1x64d(pretrained=False, use_ssld=False, **kwargs):
|
||||
model = ResNeSt(
|
||||
layers=[3, 4, 6, 3],
|
||||
radix=1,
|
||||
groups=1,
|
||||
bottleneck_width=64,
|
||||
deep_stem=True,
|
||||
stem_width=32,
|
||||
avg_down=True,
|
||||
avd=True,
|
||||
avd_first=True,
|
||||
final_drop=0.0,
|
||||
**kwargs)
|
||||
_load_pretrained(
|
||||
pretrained,
|
||||
model,
|
||||
MODEL_URLS["ResNeSt50_fast_1s1x64d"],
|
||||
use_ssld=use_ssld)
|
||||
return model
|
||||
|
||||
|
||||
def ResNeSt50(pretrained=False, use_ssld=False, **kwargs):
|
||||
model = ResNeSt(
|
||||
layers=[3, 4, 6, 3],
|
||||
radix=2,
|
||||
groups=1,
|
||||
bottleneck_width=64,
|
||||
deep_stem=True,
|
||||
stem_width=32,
|
||||
avg_down=True,
|
||||
avd=True,
|
||||
avd_first=False,
|
||||
final_drop=0.0,
|
||||
**kwargs)
|
||||
_load_pretrained(
|
||||
pretrained, model, MODEL_URLS["ResNeSt50"], use_ssld=use_ssld)
|
||||
return model
|
||||
|
||||
|
||||
def ResNeSt101(pretrained=False, use_ssld=False, **kwargs):
|
||||
model = ResNeSt(
|
||||
layers=[3, 4, 23, 3],
|
||||
radix=2,
|
||||
groups=1,
|
||||
bottleneck_width=64,
|
||||
deep_stem=True,
|
||||
stem_width=64,
|
||||
avg_down=True,
|
||||
avd=True,
|
||||
avd_first=False,
|
||||
final_drop=0.0,
|
||||
**kwargs)
|
||||
_load_pretrained(
|
||||
pretrained, model, MODEL_URLS["ResNeSt101"], use_ssld=use_ssld)
|
||||
return model
|
||||
@ -0,0 +1,309 @@
|
||||
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import numpy as np
|
||||
import paddle
|
||||
from paddle import ParamAttr
|
||||
import paddle.nn as nn
|
||||
import paddle.nn.functional as F
|
||||
from paddle.nn import Conv2D, BatchNorm, Linear, Dropout
|
||||
from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D
|
||||
from paddle.nn.initializer import Uniform
|
||||
|
||||
import math
|
||||
|
||||
from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
|
||||
|
||||
MODEL_URLS = {
|
||||
"ResNet50_vc":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNet50_vc_pretrained.pdparams",
|
||||
}
|
||||
|
||||
__all__ = list(MODEL_URLS.keys())
|
||||
|
||||
|
||||
class ConvBNLayer(nn.Layer):
|
||||
def __init__(self,
|
||||
num_channels,
|
||||
num_filters,
|
||||
filter_size,
|
||||
stride=1,
|
||||
groups=1,
|
||||
act=None,
|
||||
name=None):
|
||||
super(ConvBNLayer, self).__init__()
|
||||
|
||||
self._conv = Conv2D(
|
||||
in_channels=num_channels,
|
||||
out_channels=num_filters,
|
||||
kernel_size=filter_size,
|
||||
stride=stride,
|
||||
padding=(filter_size - 1) // 2,
|
||||
groups=groups,
|
||||
weight_attr=ParamAttr(name=name + "_weights"),
|
||||
bias_attr=False)
|
||||
if name == "conv1":
|
||||
bn_name = "bn_" + name
|
||||
else:
|
||||
bn_name = "bn" + name[3:]
|
||||
self._batch_norm = BatchNorm(
|
||||
num_filters,
|
||||
act=act,
|
||||
param_attr=ParamAttr(name=bn_name + '_scale'),
|
||||
bias_attr=ParamAttr(bn_name + '_offset'),
|
||||
moving_mean_name=bn_name + '_mean',
|
||||
moving_variance_name=bn_name + '_variance')
|
||||
|
||||
def forward(self, inputs):
|
||||
y = self._conv(inputs)
|
||||
y = self._batch_norm(y)
|
||||
return y
|
||||
|
||||
|
||||
class BottleneckBlock(nn.Layer):
|
||||
def __init__(self,
|
||||
num_channels,
|
||||
num_filters,
|
||||
stride,
|
||||
shortcut=True,
|
||||
name=None):
|
||||
super(BottleneckBlock, self).__init__()
|
||||
|
||||
self.conv0 = ConvBNLayer(
|
||||
num_channels=num_channels,
|
||||
num_filters=num_filters,
|
||||
filter_size=1,
|
||||
act='relu',
|
||||
name=name + "_branch2a")
|
||||
self.conv1 = ConvBNLayer(
|
||||
num_channels=num_filters,
|
||||
num_filters=num_filters,
|
||||
filter_size=3,
|
||||
stride=stride,
|
||||
act='relu',
|
||||
name=name + "_branch2b")
|
||||
self.conv2 = ConvBNLayer(
|
||||
num_channels=num_filters,
|
||||
num_filters=num_filters * 4,
|
||||
filter_size=1,
|
||||
act=None,
|
||||
name=name + "_branch2c")
|
||||
|
||||
if not shortcut:
|
||||
self.short = ConvBNLayer(
|
||||
num_channels=num_channels,
|
||||
num_filters=num_filters * 4,
|
||||
filter_size=1,
|
||||
stride=stride,
|
||||
name=name + "_branch1")
|
||||
|
||||
self.shortcut = shortcut
|
||||
|
||||
self._num_channels_out = num_filters * 4
|
||||
|
||||
def forward(self, inputs):
|
||||
y = self.conv0(inputs)
|
||||
conv1 = self.conv1(y)
|
||||
conv2 = self.conv2(conv1)
|
||||
|
||||
if self.shortcut:
|
||||
short = inputs
|
||||
else:
|
||||
short = self.short(inputs)
|
||||
|
||||
y = paddle.add(x=short, y=conv2)
|
||||
y = F.relu(y)
|
||||
return y
|
||||
|
||||
|
||||
class BasicBlock(nn.Layer):
|
||||
def __init__(self,
|
||||
num_channels,
|
||||
num_filters,
|
||||
stride,
|
||||
shortcut=True,
|
||||
name=None):
|
||||
super(BasicBlock, self).__init__()
|
||||
self.stride = stride
|
||||
self.conv0 = ConvBNLayer(
|
||||
num_channels=num_channels,
|
||||
num_filters=num_filters,
|
||||
filter_size=3,
|
||||
stride=stride,
|
||||
act='relu',
|
||||
name=name + "_branch2a")
|
||||
self.conv1 = ConvBNLayer(
|
||||
num_channels=num_filters,
|
||||
num_filters=num_filters,
|
||||
filter_size=3,
|
||||
act=None,
|
||||
name=name + "_branch2b")
|
||||
|
||||
if not shortcut:
|
||||
self.short = ConvBNLayer(
|
||||
num_channels=num_channels,
|
||||
num_filters=num_filters,
|
||||
filter_size=1,
|
||||
stride=stride,
|
||||
name=name + "_branch1")
|
||||
|
||||
self.shortcut = shortcut
|
||||
|
||||
def forward(self, inputs):
|
||||
y = self.conv0(inputs)
|
||||
conv1 = self.conv1(y)
|
||||
|
||||
if self.shortcut:
|
||||
short = inputs
|
||||
else:
|
||||
short = self.short(inputs)
|
||||
y = paddle.add(x=short, y=conv1)
|
||||
y = F.relu(y)
|
||||
return y
|
||||
|
||||
|
||||
class ResNet_vc(nn.Layer):
|
||||
def __init__(self, layers=50, class_num=1000):
|
||||
super(ResNet_vc, self).__init__()
|
||||
|
||||
self.layers = layers
|
||||
supported_layers = [18, 34, 50, 101, 152]
|
||||
assert layers in supported_layers, \
|
||||
"supported layers are {} but input layer is {}".format(
|
||||
supported_layers, layers)
|
||||
|
||||
if layers == 18:
|
||||
depth = [2, 2, 2, 2]
|
||||
elif layers == 34 or layers == 50:
|
||||
depth = [3, 4, 6, 3]
|
||||
elif layers == 101:
|
||||
depth = [3, 4, 23, 3]
|
||||
elif layers == 152:
|
||||
depth = [3, 8, 36, 3]
|
||||
num_channels = [64, 256, 512,
|
||||
1024] if layers >= 50 else [64, 64, 128, 256]
|
||||
num_filters = [64, 128, 256, 512]
|
||||
|
||||
self.conv1_1 = ConvBNLayer(
|
||||
num_channels=3,
|
||||
num_filters=32,
|
||||
filter_size=3,
|
||||
stride=2,
|
||||
act='relu',
|
||||
name="conv1_1")
|
||||
self.conv1_2 = ConvBNLayer(
|
||||
num_channels=32,
|
||||
num_filters=32,
|
||||
filter_size=3,
|
||||
stride=1,
|
||||
act='relu',
|
||||
name="conv1_2")
|
||||
self.conv1_3 = ConvBNLayer(
|
||||
num_channels=32,
|
||||
num_filters=64,
|
||||
filter_size=3,
|
||||
stride=1,
|
||||
act='relu',
|
||||
name="conv1_3")
|
||||
|
||||
self.pool2d_max = MaxPool2D(kernel_size=3, stride=2, padding=1)
|
||||
|
||||
self.block_list = []
|
||||
if layers >= 50:
|
||||
for block in range(len(depth)):
|
||||
shortcut = False
|
||||
for i in range(depth[block]):
|
||||
if layers in [101, 152] and block == 2:
|
||||
if i == 0:
|
||||
conv_name = "res" + str(block + 2) + "a"
|
||||
else:
|
||||
conv_name = "res" + str(block + 2) + "b" + str(i)
|
||||
else:
|
||||
conv_name = "res" + str(block + 2) + chr(97 + i)
|
||||
bottleneck_block = self.add_sublayer(
|
||||
'bb_%d_%d' % (block, i),
|
||||
BottleneckBlock(
|
||||
num_channels=num_channels[block]
|
||||
if i == 0 else num_filters[block] * 4,
|
||||
num_filters=num_filters[block],
|
||||
stride=2 if i == 0 and block != 0 else 1,
|
||||
shortcut=shortcut,
|
||||
name=conv_name))
|
||||
self.block_list.append(bottleneck_block)
|
||||
shortcut = True
|
||||
else:
|
||||
for block in range(len(depth)):
|
||||
shortcut = False
|
||||
for i in range(depth[block]):
|
||||
conv_name = "res" + str(block + 2) + chr(97 + i)
|
||||
basic_block = self.add_sublayer(
|
||||
'bb_%d_%d' % (block, i),
|
||||
BasicBlock(
|
||||
num_channels=num_channels[block]
|
||||
if i == 0 else num_filters[block],
|
||||
num_filters=num_filters[block],
|
||||
stride=2 if i == 0 and block != 0 else 1,
|
||||
shortcut=shortcut,
|
||||
name=conv_name))
|
||||
self.block_list.append(basic_block)
|
||||
shortcut = True
|
||||
|
||||
self.pool2d_avg = AdaptiveAvgPool2D(1)
|
||||
|
||||
self.pool2d_avg_channels = num_channels[-1] * 2
|
||||
|
||||
stdv = 1.0 / math.sqrt(self.pool2d_avg_channels * 1.0)
|
||||
|
||||
self.out = Linear(
|
||||
self.pool2d_avg_channels,
|
||||
class_num,
|
||||
weight_attr=ParamAttr(
|
||||
initializer=Uniform(-stdv, stdv), name="fc_0.w_0"),
|
||||
bias_attr=ParamAttr(name="fc_0.b_0"))
|
||||
|
||||
def forward(self, inputs):
|
||||
y = self.conv1_1(inputs)
|
||||
y = self.conv1_2(y)
|
||||
y = self.conv1_3(y)
|
||||
y = self.pool2d_max(y)
|
||||
for block in self.block_list:
|
||||
y = block(y)
|
||||
y = self.pool2d_avg(y)
|
||||
y = paddle.reshape(y, shape=[-1, self.pool2d_avg_channels])
|
||||
y = self.out(y)
|
||||
return y
|
||||
|
||||
|
||||
def _load_pretrained(pretrained, model, model_url, use_ssld=False):
|
||||
if pretrained is False:
|
||||
pass
|
||||
elif pretrained is True:
|
||||
load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
|
||||
elif isinstance(pretrained, str):
|
||||
load_dygraph_pretrain(model, pretrained)
|
||||
else:
|
||||
raise RuntimeError(
|
||||
"pretrained type is not available. Please use `string` or `boolean` type."
|
||||
)
|
||||
|
||||
|
||||
def ResNet50_vc(pretrained=False, use_ssld=False, **kwargs):
|
||||
model = ResNet_vc(layers=50, **kwargs)
|
||||
_load_pretrained(
|
||||
pretrained, model, MODEL_URLS["ResNet50_vc"], use_ssld=use_ssld)
|
||||
return model
|
||||
@ -0,0 +1,298 @@
|
||||
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import numpy as np
|
||||
import paddle
|
||||
from paddle import ParamAttr
|
||||
import paddle.nn as nn
|
||||
import paddle.nn.functional as F
|
||||
from paddle.nn import Conv2D, BatchNorm, Linear, Dropout
|
||||
from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D
|
||||
from paddle.nn.initializer import Uniform
|
||||
|
||||
import math
|
||||
|
||||
from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
|
||||
|
||||
MODEL_URLS = {
|
||||
"ResNeXt50_32x4d":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt50_32x4d_pretrained.pdparams",
|
||||
"ResNeXt50_64x4d":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt50_64x4d_pretrained.pdparams",
|
||||
"ResNeXt101_32x4d":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt101_32x4d_pretrained.pdparams",
|
||||
"ResNeXt101_64x4d":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt101_64x4d_pretrained.pdparams",
|
||||
"ResNeXt152_32x4d":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt152_32x4d_pretrained.pdparams",
|
||||
"ResNeXt152_64x4d":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt152_64x4d_pretrained.pdparams",
|
||||
}
|
||||
|
||||
__all__ = list(MODEL_URLS.keys())
|
||||
|
||||
|
||||
class ConvBNLayer(nn.Layer):
|
||||
def __init__(self,
|
||||
num_channels,
|
||||
num_filters,
|
||||
filter_size,
|
||||
stride=1,
|
||||
groups=1,
|
||||
act=None,
|
||||
name=None,
|
||||
data_format="NCHW"):
|
||||
super(ConvBNLayer, self).__init__()
|
||||
self._conv = Conv2D(
|
||||
in_channels=num_channels,
|
||||
out_channels=num_filters,
|
||||
kernel_size=filter_size,
|
||||
stride=stride,
|
||||
padding=(filter_size - 1) // 2,
|
||||
groups=groups,
|
||||
weight_attr=ParamAttr(name=name + "_weights"),
|
||||
bias_attr=False,
|
||||
data_format=data_format)
|
||||
if name == "conv1":
|
||||
bn_name = "bn_" + name
|
||||
else:
|
||||
bn_name = "bn" + name[3:]
|
||||
self._batch_norm = BatchNorm(
|
||||
num_filters,
|
||||
act=act,
|
||||
param_attr=ParamAttr(name=bn_name + '_scale'),
|
||||
bias_attr=ParamAttr(bn_name + '_offset'),
|
||||
moving_mean_name=bn_name + '_mean',
|
||||
moving_variance_name=bn_name + '_variance',
|
||||
data_layout=data_format)
|
||||
|
||||
def forward(self, inputs):
|
||||
y = self._conv(inputs)
|
||||
y = self._batch_norm(y)
|
||||
return y
|
||||
|
||||
|
||||
class BottleneckBlock(nn.Layer):
|
||||
def __init__(self,
|
||||
num_channels,
|
||||
num_filters,
|
||||
stride,
|
||||
cardinality,
|
||||
shortcut=True,
|
||||
name=None,
|
||||
data_format="NCHW"):
|
||||
super(BottleneckBlock, self).__init__()
|
||||
self.conv0 = ConvBNLayer(
|
||||
num_channels=num_channels,
|
||||
num_filters=num_filters,
|
||||
filter_size=1,
|
||||
act='relu',
|
||||
name=name + "_branch2a",
|
||||
data_format=data_format)
|
||||
self.conv1 = ConvBNLayer(
|
||||
num_channels=num_filters,
|
||||
num_filters=num_filters,
|
||||
filter_size=3,
|
||||
groups=cardinality,
|
||||
stride=stride,
|
||||
act='relu',
|
||||
name=name + "_branch2b",
|
||||
data_format=data_format)
|
||||
self.conv2 = ConvBNLayer(
|
||||
num_channels=num_filters,
|
||||
num_filters=num_filters * 2 if cardinality == 32 else num_filters,
|
||||
filter_size=1,
|
||||
act=None,
|
||||
name=name + "_branch2c",
|
||||
data_format=data_format)
|
||||
|
||||
if not shortcut:
|
||||
self.short = ConvBNLayer(
|
||||
num_channels=num_channels,
|
||||
num_filters=num_filters * 2
|
||||
if cardinality == 32 else num_filters,
|
||||
filter_size=1,
|
||||
stride=stride,
|
||||
name=name + "_branch1",
|
||||
data_format=data_format)
|
||||
|
||||
self.shortcut = shortcut
|
||||
|
||||
def forward(self, inputs):
|
||||
y = self.conv0(inputs)
|
||||
conv1 = self.conv1(y)
|
||||
conv2 = self.conv2(conv1)
|
||||
|
||||
if self.shortcut:
|
||||
short = inputs
|
||||
else:
|
||||
short = self.short(inputs)
|
||||
|
||||
y = paddle.add(x=short, y=conv2)
|
||||
y = F.relu(y)
|
||||
return y
|
||||
|
||||
|
||||
class ResNeXt(nn.Layer):
|
||||
def __init__(self,
|
||||
layers=50,
|
||||
class_num=1000,
|
||||
cardinality=32,
|
||||
input_image_channel=3,
|
||||
data_format="NCHW"):
|
||||
super(ResNeXt, self).__init__()
|
||||
|
||||
self.layers = layers
|
||||
self.data_format = data_format
|
||||
self.input_image_channel = input_image_channel
|
||||
self.cardinality = cardinality
|
||||
supported_layers = [50, 101, 152]
|
||||
assert layers in supported_layers, \
|
||||
"supported layers are {} but input layer is {}".format(
|
||||
supported_layers, layers)
|
||||
supported_cardinality = [32, 64]
|
||||
assert cardinality in supported_cardinality, \
|
||||
"supported cardinality is {} but input cardinality is {}" \
|
||||
.format(supported_cardinality, cardinality)
|
||||
if layers == 50:
|
||||
depth = [3, 4, 6, 3]
|
||||
elif layers == 101:
|
||||
depth = [3, 4, 23, 3]
|
||||
elif layers == 152:
|
||||
depth = [3, 8, 36, 3]
|
||||
num_channels = [64, 256, 512, 1024]
|
||||
num_filters = [128, 256, 512,
|
||||
1024] if cardinality == 32 else [256, 512, 1024, 2048]
|
||||
|
||||
self.conv = ConvBNLayer(
|
||||
num_channels=self.input_image_channel,
|
||||
num_filters=64,
|
||||
filter_size=7,
|
||||
stride=2,
|
||||
act='relu',
|
||||
name="res_conv1",
|
||||
data_format=self.data_format)
|
||||
self.pool2d_max = MaxPool2D(
|
||||
kernel_size=3, stride=2, padding=1, data_format=self.data_format)
|
||||
|
||||
self.block_list = []
|
||||
for block in range(len(depth)):
|
||||
shortcut = False
|
||||
for i in range(depth[block]):
|
||||
if layers in [101, 152] and block == 2:
|
||||
if i == 0:
|
||||
conv_name = "res" + str(block + 2) + "a"
|
||||
else:
|
||||
conv_name = "res" + str(block + 2) + "b" + str(i)
|
||||
else:
|
||||
conv_name = "res" + str(block + 2) + chr(97 + i)
|
||||
bottleneck_block = self.add_sublayer(
|
||||
'bb_%d_%d' % (block, i),
|
||||
BottleneckBlock(
|
||||
num_channels=num_channels[block] if i == 0 else
|
||||
num_filters[block] * int(64 // self.cardinality),
|
||||
num_filters=num_filters[block],
|
||||
stride=2 if i == 0 and block != 0 else 1,
|
||||
cardinality=self.cardinality,
|
||||
shortcut=shortcut,
|
||||
name=conv_name,
|
||||
data_format=self.data_format))
|
||||
self.block_list.append(bottleneck_block)
|
||||
shortcut = True
|
||||
|
||||
self.pool2d_avg = AdaptiveAvgPool2D(1, data_format=self.data_format)
|
||||
|
||||
self.pool2d_avg_channels = num_channels[-1] * 2
|
||||
|
||||
stdv = 1.0 / math.sqrt(self.pool2d_avg_channels * 1.0)
|
||||
|
||||
self.out = Linear(
|
||||
self.pool2d_avg_channels,
|
||||
class_num,
|
||||
weight_attr=ParamAttr(
|
||||
initializer=Uniform(-stdv, stdv), name="fc_weights"),
|
||||
bias_attr=ParamAttr(name="fc_offset"))
|
||||
|
||||
def forward(self, inputs):
|
||||
with paddle.static.amp.fp16_guard():
|
||||
if self.data_format == "NHWC":
|
||||
inputs = paddle.tensor.transpose(inputs, [0, 2, 3, 1])
|
||||
inputs.stop_gradient = True
|
||||
y = self.conv(inputs)
|
||||
y = self.pool2d_max(y)
|
||||
for block in self.block_list:
|
||||
y = block(y)
|
||||
y = self.pool2d_avg(y)
|
||||
y = paddle.reshape(y, shape=[-1, self.pool2d_avg_channels])
|
||||
y = self.out(y)
|
||||
return y
|
||||
|
||||
|
||||
def _load_pretrained(pretrained, model, model_url, use_ssld=False):
|
||||
if pretrained is False:
|
||||
pass
|
||||
elif pretrained is True:
|
||||
load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
|
||||
elif isinstance(pretrained, str):
|
||||
load_dygraph_pretrain(model, pretrained)
|
||||
else:
|
||||
raise RuntimeError(
|
||||
"pretrained type is not available. Please use `string` or `boolean` type."
|
||||
)
|
||||
|
||||
|
||||
def ResNeXt50_32x4d(pretrained=False, use_ssld=False, **kwargs):
|
||||
model = ResNeXt(layers=50, cardinality=32, **kwargs)
|
||||
_load_pretrained(
|
||||
pretrained, model, MODEL_URLS["ResNeXt50_32x4d"], use_ssld=use_ssld)
|
||||
return model
|
||||
|
||||
|
||||
def ResNeXt50_64x4d(pretrained=False, use_ssld=False, **kwargs):
|
||||
model = ResNeXt(layers=50, cardinality=64, **kwargs)
|
||||
_load_pretrained(
|
||||
pretrained, model, MODEL_URLS["ResNeXt50_64x4d"], use_ssld=use_ssld)
|
||||
return model
|
||||
|
||||
|
||||
def ResNeXt101_32x4d(pretrained=False, use_ssld=False, **kwargs):
|
||||
model = ResNeXt(layers=101, cardinality=32, **kwargs)
|
||||
_load_pretrained(
|
||||
pretrained, model, MODEL_URLS["ResNeXt101_32x4d"], use_ssld=use_ssld)
|
||||
return model
|
||||
|
||||
|
||||
def ResNeXt101_64x4d(pretrained=False, use_ssld=False, **kwargs):
|
||||
model = ResNeXt(layers=101, cardinality=64, **kwargs)
|
||||
_load_pretrained(
|
||||
pretrained, model, MODEL_URLS["ResNeXt101_64x4d"], use_ssld=use_ssld)
|
||||
return model
|
||||
|
||||
|
||||
def ResNeXt152_32x4d(pretrained=False, use_ssld=False, **kwargs):
|
||||
model = ResNeXt(layers=152, cardinality=32, **kwargs)
|
||||
_load_pretrained(
|
||||
pretrained, model, MODEL_URLS["ResNeXt152_32x4d"], use_ssld=use_ssld)
|
||||
return model
|
||||
|
||||
|
||||
def ResNeXt152_64x4d(pretrained=False, use_ssld=False, **kwargs):
|
||||
model = ResNeXt(layers=152, cardinality=64, **kwargs)
|
||||
_load_pretrained(
|
||||
pretrained, model, MODEL_URLS["ResNeXt152_64x4d"], use_ssld=use_ssld)
|
||||
return model
|
||||
@ -0,0 +1,490 @@
|
||||
import paddle
|
||||
from paddle import ParamAttr
|
||||
import paddle.nn as nn
|
||||
import paddle.nn.functional as F
|
||||
from paddle.nn import Conv2D, BatchNorm, Linear, Dropout
|
||||
from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D
|
||||
from paddle.nn.initializer import Uniform
|
||||
|
||||
from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
|
||||
|
||||
MODEL_URLS = {
|
||||
"ResNeXt101_32x8d_wsl":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt101_32x8d_wsl_pretrained.pdparams",
|
||||
"ResNeXt101_32x16d_wsl":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt101_32x16_wsl_pretrained.pdparams",
|
||||
"ResNeXt101_32x32d_wsl":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt101_32x32d_wsl_pretrained.pdparams",
|
||||
"ResNeXt101_32x48d_wsl":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt101_32x48d_wsl_pretrained.pdparams",
|
||||
}
|
||||
|
||||
__all__ = list(MODEL_URLS.keys())
|
||||
|
||||
|
||||
class ConvBNLayer(nn.Layer):
|
||||
def __init__(self,
|
||||
input_channels,
|
||||
output_channels,
|
||||
filter_size,
|
||||
stride=1,
|
||||
groups=1,
|
||||
act=None,
|
||||
name=None):
|
||||
super(ConvBNLayer, self).__init__()
|
||||
if "downsample" in name:
|
||||
conv_name = name + ".0"
|
||||
else:
|
||||
conv_name = name
|
||||
self._conv = Conv2D(
|
||||
in_channels=input_channels,
|
||||
out_channels=output_channels,
|
||||
kernel_size=filter_size,
|
||||
stride=stride,
|
||||
padding=(filter_size - 1) // 2,
|
||||
groups=groups,
|
||||
weight_attr=ParamAttr(name=conv_name + ".weight"),
|
||||
bias_attr=False)
|
||||
if "downsample" in name:
|
||||
bn_name = name[:9] + "downsample.1"
|
||||
else:
|
||||
if "conv1" == name:
|
||||
bn_name = "bn" + name[-1]
|
||||
else:
|
||||
bn_name = (name[:10] if name[7:9].isdigit() else name[:9]
|
||||
) + "bn" + name[-1]
|
||||
self._bn = BatchNorm(
|
||||
num_channels=output_channels,
|
||||
act=act,
|
||||
param_attr=ParamAttr(name=bn_name + ".weight"),
|
||||
bias_attr=ParamAttr(name=bn_name + ".bias"),
|
||||
moving_mean_name=bn_name + ".running_mean",
|
||||
moving_variance_name=bn_name + ".running_var")
|
||||
|
||||
def forward(self, inputs):
|
||||
x = self._conv(inputs)
|
||||
x = self._bn(x)
|
||||
return x
|
||||
|
||||
|
||||
class ShortCut(nn.Layer):
|
||||
def __init__(self, input_channels, output_channels, stride, name=None):
|
||||
super(ShortCut, self).__init__()
|
||||
|
||||
self.input_channels = input_channels
|
||||
self.output_channels = output_channels
|
||||
self.stride = stride
|
||||
if input_channels != output_channels or stride != 1:
|
||||
self._conv = ConvBNLayer(
|
||||
input_channels,
|
||||
output_channels,
|
||||
filter_size=1,
|
||||
stride=stride,
|
||||
name=name)
|
||||
|
||||
def forward(self, inputs):
|
||||
if self.input_channels != self.output_channels or self.stride != 1:
|
||||
return self._conv(inputs)
|
||||
return inputs
|
||||
|
||||
|
||||
class BottleneckBlock(nn.Layer):
|
||||
def __init__(self, input_channels, output_channels, stride, cardinality,
|
||||
width, name):
|
||||
super(BottleneckBlock, self).__init__()
|
||||
|
||||
self._conv0 = ConvBNLayer(
|
||||
input_channels,
|
||||
output_channels,
|
||||
filter_size=1,
|
||||
act="relu",
|
||||
name=name + ".conv1")
|
||||
self._conv1 = ConvBNLayer(
|
||||
output_channels,
|
||||
output_channels,
|
||||
filter_size=3,
|
||||
act="relu",
|
||||
stride=stride,
|
||||
groups=cardinality,
|
||||
name=name + ".conv2")
|
||||
self._conv2 = ConvBNLayer(
|
||||
output_channels,
|
||||
output_channels // (width // 8),
|
||||
filter_size=1,
|
||||
act=None,
|
||||
name=name + ".conv3")
|
||||
self._short = ShortCut(
|
||||
input_channels,
|
||||
output_channels // (width // 8),
|
||||
stride=stride,
|
||||
name=name + ".downsample")
|
||||
|
||||
def forward(self, inputs):
|
||||
x = self._conv0(inputs)
|
||||
x = self._conv1(x)
|
||||
x = self._conv2(x)
|
||||
y = self._short(inputs)
|
||||
y = paddle.add(x, y)
|
||||
y = F.relu(y)
|
||||
return y
|
||||
|
||||
|
||||
class ResNeXt101WSL(nn.Layer):
|
||||
def __init__(self, layers=101, cardinality=32, width=48, class_num=1000):
|
||||
super(ResNeXt101WSL, self).__init__()
|
||||
|
||||
self.class_num = class_num
|
||||
|
||||
self.layers = layers
|
||||
self.cardinality = cardinality
|
||||
self.width = width
|
||||
self.scale = width // 8
|
||||
|
||||
self.depth = [3, 4, 23, 3]
|
||||
self.base_width = cardinality * width
|
||||
num_filters = [self.base_width * i
|
||||
for i in [1, 2, 4, 8]] # [256, 512, 1024, 2048]
|
||||
self._conv_stem = ConvBNLayer(
|
||||
3, 64, 7, stride=2, act="relu", name="conv1")
|
||||
self._pool = MaxPool2D(kernel_size=3, stride=2, padding=1)
|
||||
|
||||
self._conv1_0 = BottleneckBlock(
|
||||
64,
|
||||
num_filters[0],
|
||||
stride=1,
|
||||
cardinality=self.cardinality,
|
||||
width=self.width,
|
||||
name="layer1.0")
|
||||
self._conv1_1 = BottleneckBlock(
|
||||
num_filters[0] // (width // 8),
|
||||
num_filters[0],
|
||||
stride=1,
|
||||
cardinality=self.cardinality,
|
||||
width=self.width,
|
||||
name="layer1.1")
|
||||
self._conv1_2 = BottleneckBlock(
|
||||
num_filters[0] // (width // 8),
|
||||
num_filters[0],
|
||||
stride=1,
|
||||
cardinality=self.cardinality,
|
||||
width=self.width,
|
||||
name="layer1.2")
|
||||
|
||||
self._conv2_0 = BottleneckBlock(
|
||||
num_filters[0] // (width // 8),
|
||||
num_filters[1],
|
||||
stride=2,
|
||||
cardinality=self.cardinality,
|
||||
width=self.width,
|
||||
name="layer2.0")
|
||||
self._conv2_1 = BottleneckBlock(
|
||||
num_filters[1] // (width // 8),
|
||||
num_filters[1],
|
||||
stride=1,
|
||||
cardinality=self.cardinality,
|
||||
width=self.width,
|
||||
name="layer2.1")
|
||||
self._conv2_2 = BottleneckBlock(
|
||||
num_filters[1] // (width // 8),
|
||||
num_filters[1],
|
||||
stride=1,
|
||||
cardinality=self.cardinality,
|
||||
width=self.width,
|
||||
name="layer2.2")
|
||||
self._conv2_3 = BottleneckBlock(
|
||||
num_filters[1] // (width // 8),
|
||||
num_filters[1],
|
||||
stride=1,
|
||||
cardinality=self.cardinality,
|
||||
width=self.width,
|
||||
name="layer2.3")
|
||||
|
||||
self._conv3_0 = BottleneckBlock(
|
||||
num_filters[1] // (width // 8),
|
||||
num_filters[2],
|
||||
stride=2,
|
||||
cardinality=self.cardinality,
|
||||
width=self.width,
|
||||
name="layer3.0")
|
||||
self._conv3_1 = BottleneckBlock(
|
||||
num_filters[2] // (width // 8),
|
||||
num_filters[2],
|
||||
stride=1,
|
||||
cardinality=self.cardinality,
|
||||
width=self.width,
|
||||
name="layer3.1")
|
||||
self._conv3_2 = BottleneckBlock(
|
||||
num_filters[2] // (width // 8),
|
||||
num_filters[2],
|
||||
stride=1,
|
||||
cardinality=self.cardinality,
|
||||
width=self.width,
|
||||
name="layer3.2")
|
||||
self._conv3_3 = BottleneckBlock(
|
||||
num_filters[2] // (width // 8),
|
||||
num_filters[2],
|
||||
stride=1,
|
||||
cardinality=self.cardinality,
|
||||
width=self.width,
|
||||
name="layer3.3")
|
||||
self._conv3_4 = BottleneckBlock(
|
||||
num_filters[2] // (width // 8),
|
||||
num_filters[2],
|
||||
stride=1,
|
||||
cardinality=self.cardinality,
|
||||
width=self.width,
|
||||
name="layer3.4")
|
||||
self._conv3_5 = BottleneckBlock(
|
||||
num_filters[2] // (width // 8),
|
||||
num_filters[2],
|
||||
stride=1,
|
||||
cardinality=self.cardinality,
|
||||
width=self.width,
|
||||
name="layer3.5")
|
||||
self._conv3_6 = BottleneckBlock(
|
||||
num_filters[2] // (width // 8),
|
||||
num_filters[2],
|
||||
stride=1,
|
||||
cardinality=self.cardinality,
|
||||
width=self.width,
|
||||
name="layer3.6")
|
||||
self._conv3_7 = BottleneckBlock(
|
||||
num_filters[2] // (width // 8),
|
||||
num_filters[2],
|
||||
stride=1,
|
||||
cardinality=self.cardinality,
|
||||
width=self.width,
|
||||
name="layer3.7")
|
||||
self._conv3_8 = BottleneckBlock(
|
||||
num_filters[2] // (width // 8),
|
||||
num_filters[2],
|
||||
stride=1,
|
||||
cardinality=self.cardinality,
|
||||
width=self.width,
|
||||
name="layer3.8")
|
||||
self._conv3_9 = BottleneckBlock(
|
||||
num_filters[2] // (width // 8),
|
||||
num_filters[2],
|
||||
stride=1,
|
||||
cardinality=self.cardinality,
|
||||
width=self.width,
|
||||
name="layer3.9")
|
||||
self._conv3_10 = BottleneckBlock(
|
||||
num_filters[2] // (width // 8),
|
||||
num_filters[2],
|
||||
stride=1,
|
||||
cardinality=self.cardinality,
|
||||
width=self.width,
|
||||
name="layer3.10")
|
||||
self._conv3_11 = BottleneckBlock(
|
||||
num_filters[2] // (width // 8),
|
||||
num_filters[2],
|
||||
stride=1,
|
||||
cardinality=self.cardinality,
|
||||
width=self.width,
|
||||
name="layer3.11")
|
||||
self._conv3_12 = BottleneckBlock(
|
||||
num_filters[2] // (width // 8),
|
||||
num_filters[2],
|
||||
stride=1,
|
||||
cardinality=self.cardinality,
|
||||
width=self.width,
|
||||
name="layer3.12")
|
||||
self._conv3_13 = BottleneckBlock(
|
||||
num_filters[2] // (width // 8),
|
||||
num_filters[2],
|
||||
stride=1,
|
||||
cardinality=self.cardinality,
|
||||
width=self.width,
|
||||
name="layer3.13")
|
||||
self._conv3_14 = BottleneckBlock(
|
||||
num_filters[2] // (width // 8),
|
||||
num_filters[2],
|
||||
stride=1,
|
||||
cardinality=self.cardinality,
|
||||
width=self.width,
|
||||
name="layer3.14")
|
||||
self._conv3_15 = BottleneckBlock(
|
||||
num_filters[2] // (width // 8),
|
||||
num_filters[2],
|
||||
stride=1,
|
||||
cardinality=self.cardinality,
|
||||
width=self.width,
|
||||
name="layer3.15")
|
||||
self._conv3_16 = BottleneckBlock(
|
||||
num_filters[2] // (width // 8),
|
||||
num_filters[2],
|
||||
stride=1,
|
||||
cardinality=self.cardinality,
|
||||
width=self.width,
|
||||
name="layer3.16")
|
||||
self._conv3_17 = BottleneckBlock(
|
||||
num_filters[2] // (width // 8),
|
||||
num_filters[2],
|
||||
stride=1,
|
||||
cardinality=self.cardinality,
|
||||
width=self.width,
|
||||
name="layer3.17")
|
||||
self._conv3_18 = BottleneckBlock(
|
||||
num_filters[2] // (width // 8),
|
||||
num_filters[2],
|
||||
stride=1,
|
||||
cardinality=self.cardinality,
|
||||
width=self.width,
|
||||
name="layer3.18")
|
||||
self._conv3_19 = BottleneckBlock(
|
||||
num_filters[2] // (width // 8),
|
||||
num_filters[2],
|
||||
stride=1,
|
||||
cardinality=self.cardinality,
|
||||
width=self.width,
|
||||
name="layer3.19")
|
||||
self._conv3_20 = BottleneckBlock(
|
||||
num_filters[2] // (width // 8),
|
||||
num_filters[2],
|
||||
stride=1,
|
||||
cardinality=self.cardinality,
|
||||
width=self.width,
|
||||
name="layer3.20")
|
||||
self._conv3_21 = BottleneckBlock(
|
||||
num_filters[2] // (width // 8),
|
||||
num_filters[2],
|
||||
stride=1,
|
||||
cardinality=self.cardinality,
|
||||
width=self.width,
|
||||
name="layer3.21")
|
||||
self._conv3_22 = BottleneckBlock(
|
||||
num_filters[2] // (width // 8),
|
||||
num_filters[2],
|
||||
stride=1,
|
||||
cardinality=self.cardinality,
|
||||
width=self.width,
|
||||
name="layer3.22")
|
||||
|
||||
self._conv4_0 = BottleneckBlock(
|
||||
num_filters[2] // (width // 8),
|
||||
num_filters[3],
|
||||
stride=2,
|
||||
cardinality=self.cardinality,
|
||||
width=self.width,
|
||||
name="layer4.0")
|
||||
self._conv4_1 = BottleneckBlock(
|
||||
num_filters[3] // (width // 8),
|
||||
num_filters[3],
|
||||
stride=1,
|
||||
cardinality=self.cardinality,
|
||||
width=self.width,
|
||||
name="layer4.1")
|
||||
self._conv4_2 = BottleneckBlock(
|
||||
num_filters[3] // (width // 8),
|
||||
num_filters[3],
|
||||
stride=1,
|
||||
cardinality=self.cardinality,
|
||||
width=self.width,
|
||||
name="layer4.2")
|
||||
|
||||
self._avg_pool = AdaptiveAvgPool2D(1)
|
||||
self._out = Linear(
|
||||
num_filters[3] // (width // 8),
|
||||
class_num,
|
||||
weight_attr=ParamAttr(name="fc.weight"),
|
||||
bias_attr=ParamAttr(name="fc.bias"))
|
||||
|
||||
def forward(self, inputs):
|
||||
x = self._conv_stem(inputs)
|
||||
x = self._pool(x)
|
||||
|
||||
x = self._conv1_0(x)
|
||||
x = self._conv1_1(x)
|
||||
x = self._conv1_2(x)
|
||||
|
||||
x = self._conv2_0(x)
|
||||
x = self._conv2_1(x)
|
||||
x = self._conv2_2(x)
|
||||
x = self._conv2_3(x)
|
||||
|
||||
x = self._conv3_0(x)
|
||||
x = self._conv3_1(x)
|
||||
x = self._conv3_2(x)
|
||||
x = self._conv3_3(x)
|
||||
x = self._conv3_4(x)
|
||||
x = self._conv3_5(x)
|
||||
x = self._conv3_6(x)
|
||||
x = self._conv3_7(x)
|
||||
x = self._conv3_8(x)
|
||||
x = self._conv3_9(x)
|
||||
x = self._conv3_10(x)
|
||||
x = self._conv3_11(x)
|
||||
x = self._conv3_12(x)
|
||||
x = self._conv3_13(x)
|
||||
x = self._conv3_14(x)
|
||||
x = self._conv3_15(x)
|
||||
x = self._conv3_16(x)
|
||||
x = self._conv3_17(x)
|
||||
x = self._conv3_18(x)
|
||||
x = self._conv3_19(x)
|
||||
x = self._conv3_20(x)
|
||||
x = self._conv3_21(x)
|
||||
x = self._conv3_22(x)
|
||||
|
||||
x = self._conv4_0(x)
|
||||
x = self._conv4_1(x)
|
||||
x = self._conv4_2(x)
|
||||
|
||||
x = self._avg_pool(x)
|
||||
x = paddle.squeeze(x, axis=[2, 3])
|
||||
x = self._out(x)
|
||||
return x
|
||||
|
||||
|
||||
def _load_pretrained(pretrained, model, model_url, use_ssld=False):
|
||||
if pretrained is False:
|
||||
pass
|
||||
elif pretrained is True:
|
||||
load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
|
||||
elif isinstance(pretrained, str):
|
||||
load_dygraph_pretrain(model, pretrained)
|
||||
else:
|
||||
raise RuntimeError(
|
||||
"pretrained type is not available. Please use `string` or `boolean` type."
|
||||
)
|
||||
|
||||
|
||||
def ResNeXt101_32x8d_wsl(pretrained=False, use_ssld=False, **kwargs):
|
||||
model = ResNeXt101WSL(cardinality=32, width=8, **kwargs)
|
||||
_load_pretrained(
|
||||
pretrained,
|
||||
model,
|
||||
MODEL_URLS["ResNeXt101_32x8d_wsl"],
|
||||
use_ssld=use_ssld)
|
||||
return model
|
||||
|
||||
|
||||
def ResNeXt101_32x16d_wsl(pretrained=False, use_ssld=False, **kwargs):
|
||||
model = ResNeXt101WSL(cardinality=32, width=16, **kwargs)
|
||||
_load_pretrained(
|
||||
pretrained,
|
||||
model,
|
||||
MODEL_URLS["ResNeXt101_32x16d_wsl"],
|
||||
use_ssld=use_ssld)
|
||||
return model
|
||||
|
||||
|
||||
def ResNeXt101_32x32d_wsl(pretrained=False, use_ssld=False, **kwargs):
|
||||
model = ResNeXt101WSL(cardinality=32, width=32, **kwargs)
|
||||
_load_pretrained(
|
||||
pretrained,
|
||||
model,
|
||||
MODEL_URLS["ResNeXt101_32x32d_wsl"],
|
||||
use_ssld=use_ssld)
|
||||
return model
|
||||
|
||||
|
||||
def ResNeXt101_32x48d_wsl(pretrained=False, use_ssld=False, **kwargs):
|
||||
model = ResNeXt101WSL(cardinality=32, width=48, **kwargs)
|
||||
_load_pretrained(
|
||||
pretrained,
|
||||
model,
|
||||
MODEL_URLS["ResNeXt101_32x48d_wsl"],
|
||||
use_ssld=use_ssld)
|
||||
return model
|
||||
@ -0,0 +1,317 @@
|
||||
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import numpy as np
|
||||
import paddle
|
||||
from paddle import ParamAttr
|
||||
import paddle.nn as nn
|
||||
import paddle.nn.functional as F
|
||||
from paddle.nn import Conv2D, BatchNorm, Linear, Dropout
|
||||
from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D
|
||||
from paddle.nn.initializer import Uniform
|
||||
|
||||
import math
|
||||
|
||||
from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
|
||||
|
||||
MODEL_URLS = {
|
||||
"ResNeXt50_vd_32x4d":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt50_vd_32x4d_pretrained.pdparams",
|
||||
"ResNeXt50_vd_64x4d":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt50_vd_64x4d_pretrained.pdparams",
|
||||
"ResNeXt101_vd_32x4d":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt101_vd_32x4d_pretrained.pdparams",
|
||||
"ResNeXt101_vd_64x4d":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt101_vd_64x4d_pretrained.pdparams",
|
||||
"ResNeXt152_vd_32x4d":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt152_vd_32x4d_pretrained.pdparams",
|
||||
"ResNeXt152_vd_64x4d":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt152_vd_64x4d_pretrained.pdparams",
|
||||
}
|
||||
|
||||
__all__ = list(MODEL_URLS.keys())
|
||||
|
||||
|
||||
class ConvBNLayer(nn.Layer):
|
||||
def __init__(
|
||||
self,
|
||||
num_channels,
|
||||
num_filters,
|
||||
filter_size,
|
||||
stride=1,
|
||||
groups=1,
|
||||
is_vd_mode=False,
|
||||
act=None,
|
||||
name=None, ):
|
||||
super(ConvBNLayer, self).__init__()
|
||||
|
||||
self.is_vd_mode = is_vd_mode
|
||||
self._pool2d_avg = AvgPool2D(
|
||||
kernel_size=2, stride=2, padding=0, ceil_mode=True)
|
||||
self._conv = Conv2D(
|
||||
in_channels=num_channels,
|
||||
out_channels=num_filters,
|
||||
kernel_size=filter_size,
|
||||
stride=stride,
|
||||
padding=(filter_size - 1) // 2,
|
||||
groups=groups,
|
||||
weight_attr=ParamAttr(name=name + "_weights"),
|
||||
bias_attr=False)
|
||||
if name == "conv1":
|
||||
bn_name = "bn_" + name
|
||||
else:
|
||||
bn_name = "bn" + name[3:]
|
||||
self._batch_norm = BatchNorm(
|
||||
num_filters,
|
||||
act=act,
|
||||
param_attr=ParamAttr(name=bn_name + '_scale'),
|
||||
bias_attr=ParamAttr(bn_name + '_offset'),
|
||||
moving_mean_name=bn_name + '_mean',
|
||||
moving_variance_name=bn_name + '_variance')
|
||||
|
||||
def forward(self, inputs):
|
||||
if self.is_vd_mode:
|
||||
inputs = self._pool2d_avg(inputs)
|
||||
y = self._conv(inputs)
|
||||
y = self._batch_norm(y)
|
||||
return y
|
||||
|
||||
|
||||
class BottleneckBlock(nn.Layer):
|
||||
def __init__(self,
|
||||
num_channels,
|
||||
num_filters,
|
||||
stride,
|
||||
cardinality,
|
||||
shortcut=True,
|
||||
if_first=False,
|
||||
name=None):
|
||||
super(BottleneckBlock, self).__init__()
|
||||
|
||||
self.conv0 = ConvBNLayer(
|
||||
num_channels=num_channels,
|
||||
num_filters=num_filters,
|
||||
filter_size=1,
|
||||
act='relu',
|
||||
name=name + "_branch2a")
|
||||
self.conv1 = ConvBNLayer(
|
||||
num_channels=num_filters,
|
||||
num_filters=num_filters,
|
||||
filter_size=3,
|
||||
groups=cardinality,
|
||||
stride=stride,
|
||||
act='relu',
|
||||
name=name + "_branch2b")
|
||||
self.conv2 = ConvBNLayer(
|
||||
num_channels=num_filters,
|
||||
num_filters=num_filters * 2 if cardinality == 32 else num_filters,
|
||||
filter_size=1,
|
||||
act=None,
|
||||
name=name + "_branch2c")
|
||||
|
||||
if not shortcut:
|
||||
self.short = ConvBNLayer(
|
||||
num_channels=num_channels,
|
||||
num_filters=num_filters * 2
|
||||
if cardinality == 32 else num_filters,
|
||||
filter_size=1,
|
||||
stride=1,
|
||||
is_vd_mode=False if if_first else True,
|
||||
name=name + "_branch1")
|
||||
|
||||
self.shortcut = shortcut
|
||||
|
||||
def forward(self, inputs):
|
||||
y = self.conv0(inputs)
|
||||
conv1 = self.conv1(y)
|
||||
conv2 = self.conv2(conv1)
|
||||
|
||||
if self.shortcut:
|
||||
short = inputs
|
||||
else:
|
||||
short = self.short(inputs)
|
||||
|
||||
y = paddle.add(x=short, y=conv2)
|
||||
y = F.relu(y)
|
||||
return y
|
||||
|
||||
|
||||
class ResNeXt(nn.Layer):
|
||||
def __init__(self, layers=50, class_num=1000, cardinality=32):
|
||||
super(ResNeXt, self).__init__()
|
||||
|
||||
self.layers = layers
|
||||
self.cardinality = cardinality
|
||||
supported_layers = [50, 101, 152]
|
||||
assert layers in supported_layers, \
|
||||
"supported layers are {} but input layer is {}".format(
|
||||
supported_layers, layers)
|
||||
supported_cardinality = [32, 64]
|
||||
assert cardinality in supported_cardinality, \
|
||||
"supported cardinality is {} but input cardinality is {}" \
|
||||
.format(supported_cardinality, cardinality)
|
||||
if layers == 50:
|
||||
depth = [3, 4, 6, 3]
|
||||
elif layers == 101:
|
||||
depth = [3, 4, 23, 3]
|
||||
elif layers == 152:
|
||||
depth = [3, 8, 36, 3]
|
||||
num_channels = [64, 256, 512, 1024]
|
||||
num_filters = [128, 256, 512,
|
||||
1024] if cardinality == 32 else [256, 512, 1024, 2048]
|
||||
|
||||
self.conv1_1 = ConvBNLayer(
|
||||
num_channels=3,
|
||||
num_filters=32,
|
||||
filter_size=3,
|
||||
stride=2,
|
||||
act='relu',
|
||||
name="conv1_1")
|
||||
self.conv1_2 = ConvBNLayer(
|
||||
num_channels=32,
|
||||
num_filters=32,
|
||||
filter_size=3,
|
||||
stride=1,
|
||||
act='relu',
|
||||
name="conv1_2")
|
||||
self.conv1_3 = ConvBNLayer(
|
||||
num_channels=32,
|
||||
num_filters=64,
|
||||
filter_size=3,
|
||||
stride=1,
|
||||
act='relu',
|
||||
name="conv1_3")
|
||||
|
||||
self.pool2d_max = MaxPool2D(kernel_size=3, stride=2, padding=1)
|
||||
|
||||
self.block_list = []
|
||||
for block in range(len(depth)):
|
||||
shortcut = False
|
||||
for i in range(depth[block]):
|
||||
if layers in [101, 152] and block == 2:
|
||||
if i == 0:
|
||||
conv_name = "res" + str(block + 2) + "a"
|
||||
else:
|
||||
conv_name = "res" + str(block + 2) + "b" + str(i)
|
||||
else:
|
||||
conv_name = "res" + str(block + 2) + chr(97 + i)
|
||||
bottleneck_block = self.add_sublayer(
|
||||
'bb_%d_%d' % (block, i),
|
||||
BottleneckBlock(
|
||||
num_channels=num_channels[block] if i == 0 else
|
||||
num_filters[block] * int(64 // self.cardinality),
|
||||
num_filters=num_filters[block],
|
||||
stride=2 if i == 0 and block != 0 else 1,
|
||||
cardinality=self.cardinality,
|
||||
shortcut=shortcut,
|
||||
if_first=block == i == 0,
|
||||
name=conv_name))
|
||||
self.block_list.append(bottleneck_block)
|
||||
shortcut = True
|
||||
|
||||
self.pool2d_avg = AdaptiveAvgPool2D(1)
|
||||
|
||||
self.pool2d_avg_channels = num_channels[-1] * 2
|
||||
|
||||
stdv = 1.0 / math.sqrt(self.pool2d_avg_channels * 1.0)
|
||||
|
||||
self.out = Linear(
|
||||
self.pool2d_avg_channels,
|
||||
class_num,
|
||||
weight_attr=ParamAttr(
|
||||
initializer=Uniform(-stdv, stdv), name="fc_weights"),
|
||||
bias_attr=ParamAttr(name="fc_offset"))
|
||||
|
||||
def forward(self, inputs):
|
||||
y = self.conv1_1(inputs)
|
||||
y = self.conv1_2(y)
|
||||
y = self.conv1_3(y)
|
||||
y = self.pool2d_max(y)
|
||||
for block in self.block_list:
|
||||
y = block(y)
|
||||
y = self.pool2d_avg(y)
|
||||
y = paddle.reshape(y, shape=[-1, self.pool2d_avg_channels])
|
||||
y = self.out(y)
|
||||
return y
|
||||
|
||||
|
||||
def _load_pretrained(pretrained, model, model_url, use_ssld=False):
|
||||
if pretrained is False:
|
||||
pass
|
||||
elif pretrained is True:
|
||||
load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
|
||||
elif isinstance(pretrained, str):
|
||||
load_dygraph_pretrain(model, pretrained)
|
||||
else:
|
||||
raise RuntimeError(
|
||||
"pretrained type is not available. Please use `string` or `boolean` type."
|
||||
)
|
||||
|
||||
|
||||
def ResNeXt50_vd_32x4d(pretrained=False, use_ssld=False, **kwargs):
|
||||
model = ResNeXt(layers=50, cardinality=32, **kwargs)
|
||||
_load_pretrained(
|
||||
pretrained, model, MODEL_URLS["ResNeXt50_vd_32x4d"], use_ssld=use_ssld)
|
||||
return model
|
||||
|
||||
|
||||
def ResNeXt50_vd_64x4d(pretrained=False, use_ssld=False, **kwargs):
|
||||
model = ResNeXt(layers=50, cardinality=64, **kwargs)
|
||||
_load_pretrained(
|
||||
pretrained, model, MODEL_URLS["ResNeXt50_vd_64x4d"], use_ssld=use_ssld)
|
||||
return model
|
||||
|
||||
|
||||
def ResNeXt101_vd_32x4d(pretrained=False, use_ssld=False, **kwargs):
|
||||
model = ResNeXt(layers=101, cardinality=32, **kwargs)
|
||||
_load_pretrained(
|
||||
pretrained,
|
||||
model,
|
||||
MODEL_URLS["ResNeXt101_vd_32x4d"],
|
||||
use_ssld=use_ssld)
|
||||
return model
|
||||
|
||||
|
||||
def ResNeXt101_vd_64x4d(pretrained=False, use_ssld=False, **kwargs):
|
||||
model = ResNeXt(layers=101, cardinality=64, **kwargs)
|
||||
_load_pretrained(
|
||||
pretrained,
|
||||
model,
|
||||
MODEL_URLS["ResNeXt101_vd_64x4d"],
|
||||
use_ssld=use_ssld)
|
||||
return model
|
||||
|
||||
|
||||
def ResNeXt152_vd_32x4d(pretrained=False, use_ssld=False, **kwargs):
|
||||
model = ResNeXt(layers=152, cardinality=32, **kwargs)
|
||||
_load_pretrained(
|
||||
pretrained,
|
||||
model,
|
||||
MODEL_URLS["ResNeXt152_vd_32x4d"],
|
||||
use_ssld=use_ssld)
|
||||
return model
|
||||
|
||||
|
||||
def ResNeXt152_vd_64x4d(pretrained=False, use_ssld=False, **kwargs):
|
||||
model = ResNeXt(layers=152, cardinality=64, **kwargs)
|
||||
_load_pretrained(
|
||||
pretrained,
|
||||
model,
|
||||
MODEL_URLS["ResNeXt152_vd_64x4d"],
|
||||
use_ssld=use_ssld)
|
||||
return model
|
||||
@ -0,0 +1,281 @@
|
||||
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import numpy as np
|
||||
import paddle
|
||||
from paddle import ParamAttr
|
||||
import paddle.nn as nn
|
||||
from math import ceil
|
||||
|
||||
from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
|
||||
|
||||
MODEL_URLS = {
|
||||
"ReXNet_1_0":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ReXNet_1_0_pretrained.pdparams",
|
||||
"ReXNet_1_3":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ReXNet_1_3_pretrained.pdparams",
|
||||
"ReXNet_1_5":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ReXNet_1_5_pretrained.pdparams",
|
||||
"ReXNet_2_0":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ReXNet_2_0_pretrained.pdparams",
|
||||
"ReXNet_3_0":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ReXNet_3_0_pretrained.pdparams",
|
||||
}
|
||||
|
||||
__all__ = list(MODEL_URLS.keys())
|
||||
|
||||
|
||||
def conv_bn_act(out,
|
||||
in_channels,
|
||||
channels,
|
||||
kernel=1,
|
||||
stride=1,
|
||||
pad=0,
|
||||
num_group=1,
|
||||
active=True,
|
||||
relu6=False):
|
||||
out.append(
|
||||
nn.Conv2D(
|
||||
in_channels,
|
||||
channels,
|
||||
kernel,
|
||||
stride,
|
||||
pad,
|
||||
groups=num_group,
|
||||
bias_attr=False))
|
||||
out.append(nn.BatchNorm2D(channels))
|
||||
if active:
|
||||
out.append(nn.ReLU6() if relu6 else nn.ReLU())
|
||||
|
||||
|
||||
def conv_bn_swish(out,
|
||||
in_channels,
|
||||
channels,
|
||||
kernel=1,
|
||||
stride=1,
|
||||
pad=0,
|
||||
num_group=1):
|
||||
out.append(
|
||||
nn.Conv2D(
|
||||
in_channels,
|
||||
channels,
|
||||
kernel,
|
||||
stride,
|
||||
pad,
|
||||
groups=num_group,
|
||||
bias_attr=False))
|
||||
out.append(nn.BatchNorm2D(channels))
|
||||
out.append(nn.Swish())
|
||||
|
||||
|
||||
class SE(nn.Layer):
|
||||
def __init__(self, in_channels, channels, se_ratio=12):
|
||||
super(SE, self).__init__()
|
||||
self.avg_pool = nn.AdaptiveAvgPool2D(1)
|
||||
self.fc = nn.Sequential(
|
||||
nn.Conv2D(
|
||||
in_channels, channels // se_ratio, kernel_size=1, padding=0),
|
||||
nn.BatchNorm2D(channels // se_ratio),
|
||||
nn.ReLU(),
|
||||
nn.Conv2D(
|
||||
channels // se_ratio, channels, kernel_size=1, padding=0),
|
||||
nn.Sigmoid())
|
||||
|
||||
def forward(self, x):
|
||||
y = self.avg_pool(x)
|
||||
y = self.fc(y)
|
||||
return x * y
|
||||
|
||||
|
||||
class LinearBottleneck(nn.Layer):
|
||||
def __init__(self,
|
||||
in_channels,
|
||||
channels,
|
||||
t,
|
||||
stride,
|
||||
use_se=True,
|
||||
se_ratio=12,
|
||||
**kwargs):
|
||||
super(LinearBottleneck, self).__init__(**kwargs)
|
||||
self.use_shortcut = stride == 1 and in_channels <= channels
|
||||
self.in_channels = in_channels
|
||||
self.out_channels = channels
|
||||
|
||||
out = []
|
||||
if t != 1:
|
||||
dw_channels = in_channels * t
|
||||
conv_bn_swish(out, in_channels=in_channels, channels=dw_channels)
|
||||
else:
|
||||
dw_channels = in_channels
|
||||
|
||||
conv_bn_act(
|
||||
out,
|
||||
in_channels=dw_channels,
|
||||
channels=dw_channels,
|
||||
kernel=3,
|
||||
stride=stride,
|
||||
pad=1,
|
||||
num_group=dw_channels,
|
||||
active=False)
|
||||
|
||||
if use_se:
|
||||
out.append(SE(dw_channels, dw_channels, se_ratio))
|
||||
|
||||
out.append(nn.ReLU6())
|
||||
conv_bn_act(
|
||||
out,
|
||||
in_channels=dw_channels,
|
||||
channels=channels,
|
||||
active=False,
|
||||
relu6=True)
|
||||
self.out = nn.Sequential(*out)
|
||||
|
||||
def forward(self, x):
|
||||
out = self.out(x)
|
||||
if self.use_shortcut:
|
||||
out[:, 0:self.in_channels] += x
|
||||
|
||||
return out
|
||||
|
||||
|
||||
class ReXNetV1(nn.Layer):
|
||||
def __init__(self,
|
||||
input_ch=16,
|
||||
final_ch=180,
|
||||
width_mult=1.0,
|
||||
depth_mult=1.0,
|
||||
class_num=1000,
|
||||
use_se=True,
|
||||
se_ratio=12,
|
||||
dropout_ratio=0.2,
|
||||
bn_momentum=0.9):
|
||||
super(ReXNetV1, self).__init__()
|
||||
|
||||
layers = [1, 2, 2, 3, 3, 5]
|
||||
strides = [1, 2, 2, 2, 1, 2]
|
||||
use_ses = [False, False, True, True, True, True]
|
||||
|
||||
layers = [ceil(element * depth_mult) for element in layers]
|
||||
strides = sum([[element] + [1] * (layers[idx] - 1)
|
||||
for idx, element in enumerate(strides)], [])
|
||||
if use_se:
|
||||
use_ses = sum([[element] * layers[idx]
|
||||
for idx, element in enumerate(use_ses)], [])
|
||||
else:
|
||||
use_ses = [False] * sum(layers[:])
|
||||
ts = [1] * layers[0] + [6] * sum(layers[1:])
|
||||
|
||||
self.depth = sum(layers[:]) * 3
|
||||
stem_channel = 32 / width_mult if width_mult < 1.0 else 32
|
||||
inplanes = input_ch / width_mult if width_mult < 1.0 else input_ch
|
||||
|
||||
features = []
|
||||
in_channels_group = []
|
||||
channels_group = []
|
||||
|
||||
# The following channel configuration is a simple instance to make each layer become an expand layer.
|
||||
for i in range(self.depth // 3):
|
||||
if i == 0:
|
||||
in_channels_group.append(int(round(stem_channel * width_mult)))
|
||||
channels_group.append(int(round(inplanes * width_mult)))
|
||||
else:
|
||||
in_channels_group.append(int(round(inplanes * width_mult)))
|
||||
inplanes += final_ch / (self.depth // 3 * 1.0)
|
||||
channels_group.append(int(round(inplanes * width_mult)))
|
||||
|
||||
conv_bn_swish(
|
||||
features,
|
||||
3,
|
||||
int(round(stem_channel * width_mult)),
|
||||
kernel=3,
|
||||
stride=2,
|
||||
pad=1)
|
||||
|
||||
for block_idx, (in_c, c, t, s, se) in enumerate(
|
||||
zip(in_channels_group, channels_group, ts, strides, use_ses)):
|
||||
features.append(
|
||||
LinearBottleneck(
|
||||
in_channels=in_c,
|
||||
channels=c,
|
||||
t=t,
|
||||
stride=s,
|
||||
use_se=se,
|
||||
se_ratio=se_ratio))
|
||||
|
||||
pen_channels = int(1280 * width_mult)
|
||||
conv_bn_swish(features, c, pen_channels)
|
||||
|
||||
features.append(nn.AdaptiveAvgPool2D(1))
|
||||
self.features = nn.Sequential(*features)
|
||||
self.output = nn.Sequential(
|
||||
nn.Dropout(dropout_ratio),
|
||||
nn.Conv2D(
|
||||
pen_channels, class_num, 1, bias_attr=True))
|
||||
|
||||
def forward(self, x):
|
||||
x = self.features(x)
|
||||
x = self.output(x).squeeze(axis=-1).squeeze(axis=-1)
|
||||
return x
|
||||
|
||||
|
||||
def _load_pretrained(pretrained, model, model_url, use_ssld=False):
|
||||
if pretrained is False:
|
||||
pass
|
||||
elif pretrained is True:
|
||||
load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
|
||||
elif isinstance(pretrained, str):
|
||||
load_dygraph_pretrain(model, pretrained)
|
||||
else:
|
||||
raise RuntimeError(
|
||||
"pretrained type is not available. Please use `string` or `boolean` type."
|
||||
)
|
||||
|
||||
|
||||
def ReXNet_1_0(pretrained=False, use_ssld=False, **kwargs):
|
||||
model = ReXNetV1(width_mult=1.0, **kwargs)
|
||||
_load_pretrained(
|
||||
pretrained, model, MODEL_URLS["ReXNet_1_0"], use_ssld=use_ssld)
|
||||
return model
|
||||
|
||||
|
||||
def ReXNet_1_3(pretrained=False, use_ssld=False, **kwargs):
|
||||
model = ReXNetV1(width_mult=1.3, **kwargs)
|
||||
_load_pretrained(
|
||||
pretrained, model, MODEL_URLS["ReXNet_1_3"], use_ssld=use_ssld)
|
||||
return model
|
||||
|
||||
|
||||
def ReXNet_1_5(pretrained=False, use_ssld=False, **kwargs):
|
||||
model = ReXNetV1(width_mult=1.5, **kwargs)
|
||||
_load_pretrained(
|
||||
pretrained, model, MODEL_URLS["ReXNet_1_5"], use_ssld=use_ssld)
|
||||
return model
|
||||
|
||||
|
||||
def ReXNet_2_0(pretrained=False, use_ssld=False, **kwargs):
|
||||
model = ReXNetV1(width_mult=2.0, **kwargs)
|
||||
_load_pretrained(
|
||||
pretrained, model, MODEL_URLS["ReXNet_2_0"], use_ssld=use_ssld)
|
||||
return model
|
||||
|
||||
|
||||
def ReXNet_3_0(pretrained=False, use_ssld=False, **kwargs):
|
||||
model = ReXNetV1(width_mult=3.0, **kwargs)
|
||||
_load_pretrained(
|
||||
pretrained, model, MODEL_URLS["ReXNet_3_0"], use_ssld=use_ssld)
|
||||
return model
|
||||
@ -0,0 +1,390 @@
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import numpy as np
|
||||
import paddle
|
||||
from paddle import ParamAttr
|
||||
import paddle.nn as nn
|
||||
import paddle.nn.functional as F
|
||||
from paddle.nn import Conv2D, BatchNorm, Linear, Dropout
|
||||
from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D
|
||||
from paddle.nn.initializer import Uniform
|
||||
|
||||
import math
|
||||
|
||||
from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
|
||||
|
||||
MODEL_URLS = {
|
||||
"SE_ResNet18_vd":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SE_ResNet18_vd_pretrained.pdparams",
|
||||
"SE_ResNet34_vd":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SE_ResNet34_vd_pretrained.pdparams",
|
||||
"SE_ResNet50_vd":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SE_ResNet50_vd_pretrained.pdparams",
|
||||
}
|
||||
|
||||
__all__ = list(MODEL_URLS.keys())
|
||||
|
||||
|
||||
class ConvBNLayer(nn.Layer):
|
||||
def __init__(
|
||||
self,
|
||||
num_channels,
|
||||
num_filters,
|
||||
filter_size,
|
||||
stride=1,
|
||||
groups=1,
|
||||
is_vd_mode=False,
|
||||
act=None,
|
||||
name=None, ):
|
||||
super(ConvBNLayer, self).__init__()
|
||||
|
||||
self.is_vd_mode = is_vd_mode
|
||||
self._pool2d_avg = AvgPool2D(
|
||||
kernel_size=2, stride=2, padding=0, ceil_mode=True)
|
||||
|
||||
self._conv = Conv2D(
|
||||
in_channels=num_channels,
|
||||
out_channels=num_filters,
|
||||
kernel_size=filter_size,
|
||||
stride=stride,
|
||||
padding=(filter_size - 1) // 2,
|
||||
groups=groups,
|
||||
weight_attr=ParamAttr(name=name + "_weights"),
|
||||
bias_attr=False)
|
||||
if name == "conv1":
|
||||
bn_name = "bn_" + name
|
||||
else:
|
||||
bn_name = "bn" + name[3:]
|
||||
self._batch_norm = BatchNorm(
|
||||
num_filters,
|
||||
act=act,
|
||||
param_attr=ParamAttr(name=bn_name + '_scale'),
|
||||
bias_attr=ParamAttr(bn_name + '_offset'),
|
||||
moving_mean_name=bn_name + '_mean',
|
||||
moving_variance_name=bn_name + '_variance')
|
||||
|
||||
def forward(self, inputs):
|
||||
if self.is_vd_mode:
|
||||
inputs = self._pool2d_avg(inputs)
|
||||
y = self._conv(inputs)
|
||||
y = self._batch_norm(y)
|
||||
return y
|
||||
|
||||
|
||||
class BottleneckBlock(nn.Layer):
|
||||
def __init__(self,
|
||||
num_channels,
|
||||
num_filters,
|
||||
stride,
|
||||
shortcut=True,
|
||||
if_first=False,
|
||||
reduction_ratio=16,
|
||||
name=None):
|
||||
super(BottleneckBlock, self).__init__()
|
||||
|
||||
self.conv0 = ConvBNLayer(
|
||||
num_channels=num_channels,
|
||||
num_filters=num_filters,
|
||||
filter_size=1,
|
||||
act='relu',
|
||||
name=name + "_branch2a")
|
||||
self.conv1 = ConvBNLayer(
|
||||
num_channels=num_filters,
|
||||
num_filters=num_filters,
|
||||
filter_size=3,
|
||||
stride=stride,
|
||||
act='relu',
|
||||
name=name + "_branch2b")
|
||||
self.conv2 = ConvBNLayer(
|
||||
num_channels=num_filters,
|
||||
num_filters=num_filters * 4,
|
||||
filter_size=1,
|
||||
act=None,
|
||||
name=name + "_branch2c")
|
||||
self.scale = SELayer(
|
||||
num_channels=num_filters * 4,
|
||||
num_filters=num_filters * 4,
|
||||
reduction_ratio=reduction_ratio,
|
||||
name='fc_' + name)
|
||||
|
||||
if not shortcut:
|
||||
self.short = ConvBNLayer(
|
||||
num_channels=num_channels,
|
||||
num_filters=num_filters * 4,
|
||||
filter_size=1,
|
||||
stride=1,
|
||||
is_vd_mode=False if if_first else True,
|
||||
name=name + "_branch1")
|
||||
|
||||
self.shortcut = shortcut
|
||||
|
||||
def forward(self, inputs):
|
||||
y = self.conv0(inputs)
|
||||
conv1 = self.conv1(y)
|
||||
conv2 = self.conv2(conv1)
|
||||
scale = self.scale(conv2)
|
||||
|
||||
if self.shortcut:
|
||||
short = inputs
|
||||
else:
|
||||
short = self.short(inputs)
|
||||
y = paddle.add(x=short, y=scale)
|
||||
y = F.relu(y)
|
||||
return y
|
||||
|
||||
|
||||
class BasicBlock(nn.Layer):
|
||||
def __init__(self,
|
||||
num_channels,
|
||||
num_filters,
|
||||
stride,
|
||||
shortcut=True,
|
||||
if_first=False,
|
||||
reduction_ratio=16,
|
||||
name=None):
|
||||
super(BasicBlock, self).__init__()
|
||||
self.stride = stride
|
||||
self.conv0 = ConvBNLayer(
|
||||
num_channels=num_channels,
|
||||
num_filters=num_filters,
|
||||
filter_size=3,
|
||||
stride=stride,
|
||||
act='relu',
|
||||
name=name + "_branch2a")
|
||||
self.conv1 = ConvBNLayer(
|
||||
num_channels=num_filters,
|
||||
num_filters=num_filters,
|
||||
filter_size=3,
|
||||
act=None,
|
||||
name=name + "_branch2b")
|
||||
|
||||
self.scale = SELayer(
|
||||
num_channels=num_filters,
|
||||
num_filters=num_filters,
|
||||
reduction_ratio=reduction_ratio,
|
||||
name='fc_' + name)
|
||||
|
||||
if not shortcut:
|
||||
self.short = ConvBNLayer(
|
||||
num_channels=num_channels,
|
||||
num_filters=num_filters,
|
||||
filter_size=1,
|
||||
stride=1,
|
||||
is_vd_mode=False if if_first else True,
|
||||
name=name + "_branch1")
|
||||
|
||||
self.shortcut = shortcut
|
||||
|
||||
def forward(self, inputs):
|
||||
y = self.conv0(inputs)
|
||||
conv1 = self.conv1(y)
|
||||
scale = self.scale(conv1)
|
||||
|
||||
if self.shortcut:
|
||||
short = inputs
|
||||
else:
|
||||
short = self.short(inputs)
|
||||
y = paddle.add(x=short, y=scale)
|
||||
y = F.relu(y)
|
||||
return y
|
||||
|
||||
|
||||
class SELayer(nn.Layer):
|
||||
def __init__(self, num_channels, num_filters, reduction_ratio, name=None):
|
||||
super(SELayer, self).__init__()
|
||||
|
||||
self.pool2d_gap = AdaptiveAvgPool2D(1)
|
||||
|
||||
self._num_channels = num_channels
|
||||
|
||||
med_ch = int(num_channels / reduction_ratio)
|
||||
stdv = 1.0 / math.sqrt(num_channels * 1.0)
|
||||
self.squeeze = Linear(
|
||||
num_channels,
|
||||
med_ch,
|
||||
weight_attr=ParamAttr(
|
||||
initializer=Uniform(-stdv, stdv), name=name + "_sqz_weights"),
|
||||
bias_attr=ParamAttr(name=name + '_sqz_offset'))
|
||||
|
||||
stdv = 1.0 / math.sqrt(med_ch * 1.0)
|
||||
self.excitation = Linear(
|
||||
med_ch,
|
||||
num_filters,
|
||||
weight_attr=ParamAttr(
|
||||
initializer=Uniform(-stdv, stdv), name=name + "_exc_weights"),
|
||||
bias_attr=ParamAttr(name=name + '_exc_offset'))
|
||||
|
||||
def forward(self, input):
|
||||
pool = self.pool2d_gap(input)
|
||||
pool = paddle.squeeze(pool, axis=[2, 3])
|
||||
squeeze = self.squeeze(pool)
|
||||
squeeze = F.relu(squeeze)
|
||||
excitation = self.excitation(squeeze)
|
||||
excitation = F.sigmoid(excitation)
|
||||
excitation = paddle.unsqueeze(excitation, axis=[2, 3])
|
||||
out = input * excitation
|
||||
return out
|
||||
|
||||
|
||||
class SE_ResNet_vd(nn.Layer):
|
||||
def __init__(self, layers=50, class_num=1000):
|
||||
super(SE_ResNet_vd, self).__init__()
|
||||
|
||||
self.layers = layers
|
||||
supported_layers = [18, 34, 50, 101, 152, 200]
|
||||
assert layers in supported_layers, \
|
||||
"supported layers are {} but input layer is {}".format(
|
||||
supported_layers, layers)
|
||||
|
||||
if layers == 18:
|
||||
depth = [2, 2, 2, 2]
|
||||
elif layers == 34 or layers == 50:
|
||||
depth = [3, 4, 6, 3]
|
||||
elif layers == 101:
|
||||
depth = [3, 4, 23, 3]
|
||||
elif layers == 152:
|
||||
depth = [3, 8, 36, 3]
|
||||
elif layers == 200:
|
||||
depth = [3, 12, 48, 3]
|
||||
num_channels = [64, 256, 512,
|
||||
1024] if layers >= 50 else [64, 64, 128, 256]
|
||||
num_filters = [64, 128, 256, 512]
|
||||
|
||||
self.conv1_1 = ConvBNLayer(
|
||||
num_channels=3,
|
||||
num_filters=32,
|
||||
filter_size=3,
|
||||
stride=2,
|
||||
act='relu',
|
||||
name="conv1_1")
|
||||
self.conv1_2 = ConvBNLayer(
|
||||
num_channels=32,
|
||||
num_filters=32,
|
||||
filter_size=3,
|
||||
stride=1,
|
||||
act='relu',
|
||||
name="conv1_2")
|
||||
self.conv1_3 = ConvBNLayer(
|
||||
num_channels=32,
|
||||
num_filters=64,
|
||||
filter_size=3,
|
||||
stride=1,
|
||||
act='relu',
|
||||
name="conv1_3")
|
||||
self.pool2d_max = MaxPool2D(kernel_size=3, stride=2, padding=1)
|
||||
|
||||
self.block_list = []
|
||||
if layers >= 50:
|
||||
for block in range(len(depth)):
|
||||
shortcut = False
|
||||
for i in range(depth[block]):
|
||||
if layers in [101, 152] and block == 2:
|
||||
if i == 0:
|
||||
conv_name = "res" + str(block + 2) + "a"
|
||||
else:
|
||||
conv_name = "res" + str(block + 2) + "b" + str(i)
|
||||
else:
|
||||
conv_name = "res" + str(block + 2) + chr(97 + i)
|
||||
bottleneck_block = self.add_sublayer(
|
||||
'bb_%d_%d' % (block, i),
|
||||
BottleneckBlock(
|
||||
num_channels=num_channels[block]
|
||||
if i == 0 else num_filters[block] * 4,
|
||||
num_filters=num_filters[block],
|
||||
stride=2 if i == 0 and block != 0 else 1,
|
||||
shortcut=shortcut,
|
||||
if_first=block == i == 0,
|
||||
name=conv_name))
|
||||
self.block_list.append(bottleneck_block)
|
||||
shortcut = True
|
||||
else:
|
||||
for block in range(len(depth)):
|
||||
shortcut = False
|
||||
for i in range(depth[block]):
|
||||
conv_name = "res" + str(block + 2) + chr(97 + i)
|
||||
basic_block = self.add_sublayer(
|
||||
'bb_%d_%d' % (block, i),
|
||||
BasicBlock(
|
||||
num_channels=num_channels[block]
|
||||
if i == 0 else num_filters[block],
|
||||
num_filters=num_filters[block],
|
||||
stride=2 if i == 0 and block != 0 else 1,
|
||||
shortcut=shortcut,
|
||||
if_first=block == i == 0,
|
||||
name=conv_name))
|
||||
self.block_list.append(basic_block)
|
||||
shortcut = True
|
||||
|
||||
self.pool2d_avg = AdaptiveAvgPool2D(1)
|
||||
|
||||
self.pool2d_avg_channels = num_channels[-1] * 2
|
||||
|
||||
stdv = 1.0 / math.sqrt(self.pool2d_avg_channels * 1.0)
|
||||
|
||||
self.out = Linear(
|
||||
self.pool2d_avg_channels,
|
||||
class_num,
|
||||
weight_attr=ParamAttr(
|
||||
initializer=Uniform(-stdv, stdv), name="fc6_weights"),
|
||||
bias_attr=ParamAttr(name="fc6_offset"))
|
||||
|
||||
def forward(self, inputs):
|
||||
y = self.conv1_1(inputs)
|
||||
y = self.conv1_2(y)
|
||||
y = self.conv1_3(y)
|
||||
y = self.pool2d_max(y)
|
||||
for block in self.block_list:
|
||||
y = block(y)
|
||||
y = self.pool2d_avg(y)
|
||||
y = paddle.reshape(y, shape=[-1, self.pool2d_avg_channels])
|
||||
y = self.out(y)
|
||||
return y
|
||||
|
||||
|
||||
def _load_pretrained(pretrained, model, model_url, use_ssld=False):
|
||||
if pretrained is False:
|
||||
pass
|
||||
elif pretrained is True:
|
||||
load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
|
||||
elif isinstance(pretrained, str):
|
||||
load_dygraph_pretrain(model, pretrained)
|
||||
else:
|
||||
raise RuntimeError(
|
||||
"pretrained type is not available. Please use `string` or `boolean` type."
|
||||
)
|
||||
|
||||
|
||||
def SE_ResNet18_vd(pretrained=False, use_ssld=False, **kwargs):
|
||||
model = SE_ResNet_vd(layers=18, **kwargs)
|
||||
_load_pretrained(
|
||||
pretrained, model, MODEL_URLS["SE_ResNet18_vd"], use_ssld=use_ssld)
|
||||
return model
|
||||
|
||||
|
||||
def SE_ResNet34_vd(pretrained=False, use_ssld=False, **kwargs):
|
||||
model = SE_ResNet_vd(layers=34, **kwargs)
|
||||
_load_pretrained(
|
||||
pretrained, model, MODEL_URLS["SE_ResNet34_vd"], use_ssld=use_ssld)
|
||||
return model
|
||||
|
||||
|
||||
def SE_ResNet50_vd(pretrained=False, use_ssld=False, **kwargs):
|
||||
model = SE_ResNet_vd(layers=50, **kwargs)
|
||||
_load_pretrained(
|
||||
pretrained, model, MODEL_URLS["SE_ResNet50_vd"], use_ssld=use_ssld)
|
||||
return model
|
||||
@ -0,0 +1,364 @@
|
||||
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import numpy as np
|
||||
import paddle
|
||||
from paddle import ParamAttr
|
||||
import paddle.nn as nn
|
||||
import paddle.nn.functional as F
|
||||
from paddle.nn import Conv2D, BatchNorm, Linear, Dropout
|
||||
from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D
|
||||
from paddle.nn.initializer import Uniform
|
||||
|
||||
import math
|
||||
|
||||
from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
|
||||
|
||||
MODEL_URLS = {
|
||||
"SE_ResNeXt50_32x4d":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SE_ResNeXt50_32x4d_pretrained.pdparams",
|
||||
"SE_ResNeXt101_32x4d":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SE_ResNeXt101_32x4d_pretrained.pdparams",
|
||||
"SE_ResNeXt152_64x4d":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SE_ResNeXt152_64x4d_pretrained.pdparams",
|
||||
}
|
||||
|
||||
__all__ = list(MODEL_URLS.keys())
|
||||
|
||||
|
||||
class ConvBNLayer(nn.Layer):
|
||||
def __init__(self,
|
||||
num_channels,
|
||||
num_filters,
|
||||
filter_size,
|
||||
stride=1,
|
||||
groups=1,
|
||||
act=None,
|
||||
name=None,
|
||||
data_format='NCHW'):
|
||||
super(ConvBNLayer, self).__init__()
|
||||
|
||||
self._conv = Conv2D(
|
||||
in_channels=num_channels,
|
||||
out_channels=num_filters,
|
||||
kernel_size=filter_size,
|
||||
stride=stride,
|
||||
padding=(filter_size - 1) // 2,
|
||||
groups=groups,
|
||||
weight_attr=ParamAttr(name=name + "_weights"),
|
||||
bias_attr=False,
|
||||
data_format=data_format)
|
||||
bn_name = name + '_bn'
|
||||
self._batch_norm = BatchNorm(
|
||||
num_filters,
|
||||
act=act,
|
||||
param_attr=ParamAttr(name=bn_name + '_scale'),
|
||||
bias_attr=ParamAttr(bn_name + '_offset'),
|
||||
moving_mean_name=bn_name + '_mean',
|
||||
moving_variance_name=bn_name + '_variance',
|
||||
data_layout=data_format)
|
||||
|
||||
def forward(self, inputs):
|
||||
y = self._conv(inputs)
|
||||
y = self._batch_norm(y)
|
||||
return y
|
||||
|
||||
|
||||
class BottleneckBlock(nn.Layer):
|
||||
def __init__(self,
|
||||
num_channels,
|
||||
num_filters,
|
||||
stride,
|
||||
cardinality,
|
||||
reduction_ratio,
|
||||
shortcut=True,
|
||||
if_first=False,
|
||||
name=None,
|
||||
data_format="NCHW"):
|
||||
super(BottleneckBlock, self).__init__()
|
||||
|
||||
self.conv0 = ConvBNLayer(
|
||||
num_channels=num_channels,
|
||||
num_filters=num_filters,
|
||||
filter_size=1,
|
||||
act='relu',
|
||||
name='conv' + name + '_x1',
|
||||
data_format=data_format)
|
||||
self.conv1 = ConvBNLayer(
|
||||
num_channels=num_filters,
|
||||
num_filters=num_filters,
|
||||
filter_size=3,
|
||||
groups=cardinality,
|
||||
stride=stride,
|
||||
act='relu',
|
||||
name='conv' + name + '_x2',
|
||||
data_format=data_format)
|
||||
self.conv2 = ConvBNLayer(
|
||||
num_channels=num_filters,
|
||||
num_filters=num_filters * 2 if cardinality == 32 else num_filters,
|
||||
filter_size=1,
|
||||
act=None,
|
||||
name='conv' + name + '_x3',
|
||||
data_format=data_format)
|
||||
self.scale = SELayer(
|
||||
num_channels=num_filters * 2 if cardinality == 32 else num_filters,
|
||||
num_filters=num_filters * 2 if cardinality == 32 else num_filters,
|
||||
reduction_ratio=reduction_ratio,
|
||||
name='fc' + name,
|
||||
data_format=data_format)
|
||||
|
||||
if not shortcut:
|
||||
self.short = ConvBNLayer(
|
||||
num_channels=num_channels,
|
||||
num_filters=num_filters * 2
|
||||
if cardinality == 32 else num_filters,
|
||||
filter_size=1,
|
||||
stride=stride,
|
||||
name='conv' + name + '_prj',
|
||||
data_format=data_format)
|
||||
|
||||
self.shortcut = shortcut
|
||||
|
||||
def forward(self, inputs):
|
||||
y = self.conv0(inputs)
|
||||
conv1 = self.conv1(y)
|
||||
conv2 = self.conv2(conv1)
|
||||
scale = self.scale(conv2)
|
||||
|
||||
if self.shortcut:
|
||||
short = inputs
|
||||
else:
|
||||
short = self.short(inputs)
|
||||
y = paddle.add(x=short, y=scale)
|
||||
y = F.relu(y)
|
||||
return y
|
||||
|
||||
|
||||
class SELayer(nn.Layer):
|
||||
def __init__(self,
|
||||
num_channels,
|
||||
num_filters,
|
||||
reduction_ratio,
|
||||
name=None,
|
||||
data_format="NCHW"):
|
||||
super(SELayer, self).__init__()
|
||||
|
||||
self.data_format = data_format
|
||||
self.pool2d_gap = AdaptiveAvgPool2D(1, data_format=self.data_format)
|
||||
|
||||
self._num_channels = num_channels
|
||||
|
||||
med_ch = int(num_channels / reduction_ratio)
|
||||
stdv = 1.0 / math.sqrt(num_channels * 1.0)
|
||||
self.squeeze = Linear(
|
||||
num_channels,
|
||||
med_ch,
|
||||
weight_attr=ParamAttr(
|
||||
initializer=Uniform(-stdv, stdv), name=name + "_sqz_weights"),
|
||||
bias_attr=ParamAttr(name=name + '_sqz_offset'))
|
||||
self.relu = nn.ReLU()
|
||||
stdv = 1.0 / math.sqrt(med_ch * 1.0)
|
||||
self.excitation = Linear(
|
||||
med_ch,
|
||||
num_filters,
|
||||
weight_attr=ParamAttr(
|
||||
initializer=Uniform(-stdv, stdv), name=name + "_exc_weights"),
|
||||
bias_attr=ParamAttr(name=name + '_exc_offset'))
|
||||
self.sigmoid = nn.Sigmoid()
|
||||
|
||||
def forward(self, input):
|
||||
pool = self.pool2d_gap(input)
|
||||
if self.data_format == "NHWC":
|
||||
pool = paddle.squeeze(pool, axis=[1, 2])
|
||||
else:
|
||||
pool = paddle.squeeze(pool, axis=[2, 3])
|
||||
squeeze = self.squeeze(pool)
|
||||
squeeze = self.relu(squeeze)
|
||||
excitation = self.excitation(squeeze)
|
||||
excitation = self.sigmoid(excitation)
|
||||
if self.data_format == "NHWC":
|
||||
excitation = paddle.unsqueeze(excitation, axis=[1, 2])
|
||||
else:
|
||||
excitation = paddle.unsqueeze(excitation, axis=[2, 3])
|
||||
out = input * excitation
|
||||
return out
|
||||
|
||||
|
||||
class ResNeXt(nn.Layer):
|
||||
def __init__(self,
|
||||
layers=50,
|
||||
class_num=1000,
|
||||
cardinality=32,
|
||||
input_image_channel=3,
|
||||
data_format="NCHW"):
|
||||
super(ResNeXt, self).__init__()
|
||||
|
||||
self.layers = layers
|
||||
self.cardinality = cardinality
|
||||
self.reduction_ratio = 16
|
||||
self.data_format = data_format
|
||||
self.input_image_channel = input_image_channel
|
||||
|
||||
supported_layers = [50, 101, 152]
|
||||
assert layers in supported_layers, \
|
||||
"supported layers are {} but input layer is {}".format(
|
||||
supported_layers, layers)
|
||||
supported_cardinality = [32, 64]
|
||||
assert cardinality in supported_cardinality, \
|
||||
"supported cardinality is {} but input cardinality is {}" \
|
||||
.format(supported_cardinality, cardinality)
|
||||
if layers == 50:
|
||||
depth = [3, 4, 6, 3]
|
||||
elif layers == 101:
|
||||
depth = [3, 4, 23, 3]
|
||||
elif layers == 152:
|
||||
depth = [3, 8, 36, 3]
|
||||
num_channels = [64, 256, 512, 1024]
|
||||
num_filters = [128, 256, 512,
|
||||
1024] if cardinality == 32 else [256, 512, 1024, 2048]
|
||||
if layers < 152:
|
||||
self.conv = ConvBNLayer(
|
||||
num_channels=self.input_image_channel,
|
||||
num_filters=64,
|
||||
filter_size=7,
|
||||
stride=2,
|
||||
act='relu',
|
||||
name="conv1",
|
||||
data_format=self.data_format)
|
||||
else:
|
||||
self.conv1_1 = ConvBNLayer(
|
||||
num_channels=self.input_image_channel,
|
||||
num_filters=64,
|
||||
filter_size=3,
|
||||
stride=2,
|
||||
act='relu',
|
||||
name="conv1",
|
||||
data_format=self.data_format)
|
||||
self.conv1_2 = ConvBNLayer(
|
||||
num_channels=64,
|
||||
num_filters=64,
|
||||
filter_size=3,
|
||||
stride=1,
|
||||
act='relu',
|
||||
name="conv2",
|
||||
data_format=self.data_format)
|
||||
self.conv1_3 = ConvBNLayer(
|
||||
num_channels=64,
|
||||
num_filters=128,
|
||||
filter_size=3,
|
||||
stride=1,
|
||||
act='relu',
|
||||
name="conv3",
|
||||
data_format=self.data_format)
|
||||
|
||||
self.pool2d_max = MaxPool2D(
|
||||
kernel_size=3, stride=2, padding=1, data_format=self.data_format)
|
||||
|
||||
self.block_list = []
|
||||
n = 1 if layers == 50 or layers == 101 else 3
|
||||
for block in range(len(depth)):
|
||||
n += 1
|
||||
shortcut = False
|
||||
for i in range(depth[block]):
|
||||
bottleneck_block = self.add_sublayer(
|
||||
'bb_%d_%d' % (block, i),
|
||||
BottleneckBlock(
|
||||
num_channels=num_channels[block] if i == 0 else
|
||||
num_filters[block] * int(64 // self.cardinality),
|
||||
num_filters=num_filters[block],
|
||||
stride=2 if i == 0 and block != 0 else 1,
|
||||
cardinality=self.cardinality,
|
||||
reduction_ratio=self.reduction_ratio,
|
||||
shortcut=shortcut,
|
||||
if_first=block == 0,
|
||||
name=str(n) + '_' + str(i + 1),
|
||||
data_format=self.data_format))
|
||||
self.block_list.append(bottleneck_block)
|
||||
shortcut = True
|
||||
|
||||
self.pool2d_avg = AdaptiveAvgPool2D(1, data_format=self.data_format)
|
||||
|
||||
self.pool2d_avg_channels = num_channels[-1] * 2
|
||||
|
||||
stdv = 1.0 / math.sqrt(self.pool2d_avg_channels * 1.0)
|
||||
|
||||
self.out = Linear(
|
||||
self.pool2d_avg_channels,
|
||||
class_num,
|
||||
weight_attr=ParamAttr(
|
||||
initializer=Uniform(-stdv, stdv), name="fc6_weights"),
|
||||
bias_attr=ParamAttr(name="fc6_offset"))
|
||||
|
||||
def forward(self, inputs):
|
||||
with paddle.static.amp.fp16_guard():
|
||||
if self.data_format == "NHWC":
|
||||
inputs = paddle.tensor.transpose(inputs, [0, 2, 3, 1])
|
||||
inputs.stop_gradient = True
|
||||
if self.layers < 152:
|
||||
y = self.conv(inputs)
|
||||
else:
|
||||
y = self.conv1_1(inputs)
|
||||
y = self.conv1_2(y)
|
||||
y = self.conv1_3(y)
|
||||
y = self.pool2d_max(y)
|
||||
for i, block in enumerate(self.block_list):
|
||||
y = block(y)
|
||||
y = self.pool2d_avg(y)
|
||||
y = paddle.reshape(y, shape=[-1, self.pool2d_avg_channels])
|
||||
y = self.out(y)
|
||||
return y
|
||||
|
||||
|
||||
def _load_pretrained(pretrained, model, model_url, use_ssld=False):
|
||||
if pretrained is False:
|
||||
pass
|
||||
elif pretrained is True:
|
||||
load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
|
||||
elif isinstance(pretrained, str):
|
||||
load_dygraph_pretrain(model, pretrained)
|
||||
else:
|
||||
raise RuntimeError(
|
||||
"pretrained type is not available. Please use `string` or `boolean` type."
|
||||
)
|
||||
|
||||
|
||||
def SE_ResNeXt50_32x4d(pretrained=False, use_ssld=False, **kwargs):
|
||||
model = ResNeXt(layers=50, cardinality=32, **kwargs)
|
||||
_load_pretrained(
|
||||
pretrained, model, MODEL_URLS["SE_ResNeXt50_32x4d"], use_ssld=use_ssld)
|
||||
return model
|
||||
|
||||
|
||||
def SE_ResNeXt101_32x4d(pretrained=False, use_ssld=False, **kwargs):
|
||||
model = ResNeXt(layers=101, cardinality=32, **kwargs)
|
||||
_load_pretrained(
|
||||
pretrained,
|
||||
model,
|
||||
MODEL_URLS["SE_ResNeXt101_32x4d"],
|
||||
use_ssld=use_ssld)
|
||||
return model
|
||||
|
||||
|
||||
def SE_ResNeXt152_64x4d(pretrained=False, use_ssld=False, **kwargs):
|
||||
model = ResNeXt(layers=152, cardinality=64, **kwargs)
|
||||
_load_pretrained(
|
||||
pretrained,
|
||||
model,
|
||||
MODEL_URLS["SE_ResNeXt152_64x4d"],
|
||||
use_ssld=use_ssld)
|
||||
return model
|
||||
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in new issue