You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
114 lines
4.6 KiB
114 lines
4.6 KiB
import numpy as np
|
|
import pandas as pd
|
|
import imageio
|
|
import os
|
|
import subprocess
|
|
from multiprocessing import Pool
|
|
from itertools import cycle
|
|
import warnings
|
|
import glob
|
|
import time
|
|
from tqdm import tqdm
|
|
from argparse import ArgumentParser
|
|
from skimage import img_as_ubyte
|
|
from skimage.transform import resize
|
|
warnings.filterwarnings("ignore")
|
|
|
|
DEVNULL = open(os.devnull, 'wb')
|
|
|
|
|
|
def save(path, frames, format):
|
|
if format == '.mp4':
|
|
imageio.mimsave(path, frames)
|
|
elif format == '.png':
|
|
if os.path.exists(path):
|
|
print ("Warning: skiping video %s" % os.path.basename(path))
|
|
return
|
|
else:
|
|
os.makedirs(path)
|
|
for j, frame in enumerate(frames):
|
|
imageio.imsave(os.path.join(path, str(j).zfill(7) + '.png'), frames[j])
|
|
else:
|
|
print ("Unknown format %s" % format)
|
|
exit()
|
|
|
|
|
|
def download(video_id, args):
|
|
video_path = os.path.join(args.video_folder, video_id + ".mp4")
|
|
subprocess.call([args.youtube, '-f', "''best/mp4''", '--write-auto-sub', '--write-sub',
|
|
'--sub-lang', 'en', '--skip-unavailable-fragments',
|
|
"https://www.youtube.com/watch?v=" + video_id, "--output",
|
|
video_path], stdout=DEVNULL, stderr=DEVNULL)
|
|
return video_path
|
|
|
|
|
|
def run(data):
|
|
video_id, args = data
|
|
if not os.path.exists(os.path.join(args.video_folder, video_id.split('#')[0] + '.mp4')):
|
|
download(video_id.split('#')[0], args)
|
|
|
|
if not os.path.exists(os.path.join(args.video_folder, video_id.split('#')[0] + '.mp4')):
|
|
print ('Can not load video %s, broken link' % video_id.split('#')[0])
|
|
return
|
|
reader = imageio.get_reader(os.path.join(args.video_folder, video_id.split('#')[0] + '.mp4'))
|
|
fps = reader.get_meta_data()['fps']
|
|
|
|
df = pd.read_csv(args.metadata)
|
|
df = df[df['video_id'] == video_id]
|
|
|
|
all_chunks_dict = [{'start': df['start'].iloc[j], 'end': df['end'].iloc[j],
|
|
'bbox': list(map(int, df['bbox'].iloc[j].split('-'))), 'frames':[]} for j in range(df.shape[0])]
|
|
ref_fps = df['fps'].iloc[0]
|
|
ref_height = df['height'].iloc[0]
|
|
ref_width = df['width'].iloc[0]
|
|
partition = df['partition'].iloc[0]
|
|
try:
|
|
for i, frame in enumerate(reader):
|
|
for entry in all_chunks_dict:
|
|
if (i * ref_fps >= entry['start'] * fps) and (i * ref_fps < entry['end'] * fps):
|
|
left, top, right, bot = entry['bbox']
|
|
left = int(left / (ref_width / frame.shape[1]))
|
|
top = int(top / (ref_height / frame.shape[0]))
|
|
right = int(right / (ref_width / frame.shape[1]))
|
|
bot = int(bot / (ref_height / frame.shape[0]))
|
|
crop = frame[top:bot, left:right]
|
|
if args.image_shape is not None:
|
|
crop = img_as_ubyte(resize(crop, args.image_shape, anti_aliasing=True))
|
|
entry['frames'].append(crop)
|
|
except imageio.core.format.CannotReadFrameError:
|
|
None
|
|
|
|
for entry in all_chunks_dict:
|
|
first_part = '#'.join(video_id.split('#')[::-1])
|
|
path = first_part + '#' + str(entry['start']).zfill(6) + '#' + str(entry['end']).zfill(6) + '.mp4'
|
|
save(os.path.join(args.out_folder, partition, path), entry['frames'], args.format)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
parser = ArgumentParser()
|
|
parser.add_argument("--video_folder", default='youtube-taichi', help='Path to youtube videos')
|
|
parser.add_argument("--metadata", default='taichi-metadata-new.csv', help='Path to metadata')
|
|
parser.add_argument("--out_folder", default='taichi-png', help='Path to output')
|
|
parser.add_argument("--format", default='.png', help='Storing format')
|
|
parser.add_argument("--workers", default=1, type=int, help='Number of workers')
|
|
parser.add_argument("--youtube", default='./youtube-dl', help='Path to youtube-dl')
|
|
|
|
parser.add_argument("--image_shape", default=(256, 256), type=lambda x: tuple(map(int, x.split(','))),
|
|
help="Image shape, None for no resize")
|
|
|
|
args = parser.parse_args()
|
|
if not os.path.exists(args.video_folder):
|
|
os.makedirs(args.video_folder)
|
|
if not os.path.exists(args.out_folder):
|
|
os.makedirs(args.out_folder)
|
|
for partition in ['test', 'train']:
|
|
if not os.path.exists(os.path.join(args.out_folder, partition)):
|
|
os.makedirs(os.path.join(args.out_folder, partition))
|
|
|
|
df = pd.read_csv(args.metadata)
|
|
video_ids = set(df['video_id'])
|
|
pool = Pool(processes=args.workers)
|
|
args_list = cycle([args])
|
|
for chunks_data in tqdm(pool.imap_unordered(run, zip(video_ids, args_list))):
|
|
None
|