DigitImage/firstordermodel/config/taichi-adv-256.yaml

# Dataset parameters
dataset_params:
  # Path to data, data can be stored in several formats: .mp4 or .gif videos, stacked .png images or folders with frames.
  root_dir: data/taichi-png
  # Image shape, needed for staked .png format.
  frame_shape: [256, 256, 3]
  # In case of TaiChi single video can be splitted in many chunks, or the maybe several videos for single person.
  # In this case epoch can be a pass over different videos (if id_sampling=True) or over different chunks (if id_sampling=False)
  # If the name of the video '12335#adsbf.mp4' the id is assumed to be 12335
  id_sampling: True
  # List with pairs for animation, None for random pairs
  pairs_list: data/taichi256.csv
  # Augmentation parameters see augmentation.py for all posible augmentations
  augmentation_params:
    flip_param:
      horizontal_flip: True
      time_flip: True
    jitter_param:
      brightness: 0.1
      contrast: 0.1
      saturation: 0.1
      hue: 0.1

# Defines model architecture
model_params:
  common_params:
    # Number of keypoint
    num_kp: 10
    # Number of channels per image
    num_channels: 3
    # Using first or zero order model
    estimate_jacobian: True
  kp_detector_params:
     # Softmax temperature for keypoint heatmaps
     temperature: 0.1
     # Number of features mutliplier
     block_expansion: 32
     # Maximum allowed number of features
     max_features: 1024
     # Number of block in Unet. Can be increased or decreased depending or resolution.
     num_blocks: 5
     # Keypioint is predicted on smaller images for better performance,
     # scale_factor=0.25 means that 256x256 image will be resized to 64x64
     scale_factor: 0.25
  generator_params:
    # Number of features mutliplier
    block_expansion: 64
    # Maximum allowed number of features
    max_features: 512
    # Number of downsampling blocks in Jonson architecture.
    # Can be increased or decreased depending or resolution.
    num_down_blocks: 2
    # Number of ResBlocks  in Jonson architecture.
    num_bottleneck_blocks: 6
    # Use occlusion map or not
    estimate_occlusion_map: True

    dense_motion_params:
      # Number of features mutliplier
      block_expansion: 64
      # Maximum allowed number of features
      max_features: 1024
      # Number of block in Unet. Can be increased or decreased depending or resolution.
      num_blocks: 5
      # Dense motion is predicted on smaller images for better performance,
      # scale_factor=0.25 means that 256x256 image will be resized to 64x64
      scale_factor: 0.25
  discriminator_params:
    # Discriminator can be multiscale, if you want 2 discriminator on original
    # resolution and half of the original, specify scales: [1, 0.5]
    scales: [1]
    # Number of features mutliplier
    block_expansion: 32
    # Maximum allowed number of features
    max_features: 512
    # Number of blocks. Can be increased or decreased depending or resolution.
    num_blocks: 4
    use_kp: True

# Parameters of training
train_params:
  # Number of training epochs
  num_epochs: 150
  # For better i/o performance when number of videos is small number of epochs can be multiplied by this number.
  # Thus effectivlly with num_repeats=100 each epoch is 100 times larger.
  num_repeats: 150
  # Drop learning rate by 10 times after this epochs
  epoch_milestones: []
  # Initial learing rate for all modules
  lr_generator: 2.0e-4
  lr_discriminator: 2.0e-4
  lr_kp_detector: 0
  batch_size: 27
  # Scales for perceptual pyramide loss. If scales = [1, 0.5, 0.25, 0.125] and image resolution is 256x256,
  # than the loss will be computer on resolutions 256x256, 128x128, 64x64, 32x32.
  scales: [1, 0.5, 0.25, 0.125]
  # Save checkpoint this frequently. If checkpoint_freq=50, checkpoint will be saved every 50 epochs.
  checkpoint_freq: 50
  # Parameters of transform for equivariance loss
  transform_params:
    # Sigma for affine part
    sigma_affine: 0.05
    # Sigma for deformation part
    sigma_tps: 0.005
    # Number of point in the deformation grid
    points_tps: 5
  loss_weights:
    # Weight for LSGAN loss in generator
    generator_gan: 1
    # Weight for LSGAN loss in discriminator
    discriminator_gan: 1
    # Weights for feature matching loss, the number should be the same as number of blocks in discriminator.
    feature_matching: [10, 10, 10, 10]
    # Weights for perceptual loss.
    perceptual: [10, 10, 10, 10, 10]
    # Weights for value equivariance.
    equivariance_value: 10
    # Weights for jacobian equivariance.
    equivariance_jacobian: 10

# Parameters of reconstruction
reconstruction_params:
  # Maximum number of videos for reconstruction
  num_videos: 1000
  # Format for visualization, note that results will be also stored in staked .png.
  format: '.mp4'

# Parameters of animation
animate_params:
  # Maximum number of pairs for animation, the pairs will be either taken from pairs_list or random.
  num_pairs: 50
  # Format for visualization, note that results will be also stored in staked .png.
  format: '.mp4'
  # Normalization of diriving keypoints
  normalization_params:
    # Increase or decrease relative movement scale depending on the size of the object
    adapt_movement_scale: False
    # Apply only relative displacement of the keypoint
    use_relative_movement: True
    # Apply only relative change in jacobian
    use_relative_jacobian: True

# Visualization parameters
visualizer_params:
  # Draw keypoints of this size, increase or decrease depending on resolution
  kp_size: 5
  # Draw white border around images
  draw_border: True
  # Color map for keypoints
  colormap: 'gist_rainbow'