项目地址:https://github.com/anandpawara/Real_Time_Image_Animation
实验环境:Google Colab
Step1: Clone Repo
- !git clone https://github.com/anandpawara/Real_Time_Image_Animation.git
- %cd Real_Time_Image_Animation
Step2: Install required modules
在安装前,需要修改 requirements.txt
文件,因为我们实验的环境是 linux,而 requirements.txt
中下载了一些 windows 的库
删除 requirements.txt
以下两行
- pywin32==227
- pywinpty==0.5.7
然后执行以下代码
- !pip install -r requirements.txt
- !pip install torch===1.0.0 torchvision===0.2.1 -f https://download.pytorch.org/whl/cu100/torch_stable.html
Step3: Modify image_animation.py
这个项目只能处理图像,不能保留音频。所以我们需要先将音频保存,再将处理好的视频和音频进行合成
修改 image_animation.py
文件
- import imageio
- import torch
- from tqdm import tqdm
- from animate import normalize_kp
- from demo import load_checkpoints
- import numpy as np
- import matplotlib.pyplot as plt
- import matplotlib.animation as animation
- from skimage import img_as_ubyte
- from skimage.transform import resize
- import cv2
- import os
- import argparse
-
- import subprocess
- import os
- from PIL import Image
-
- def video2mp3(file_name):
- outfile_name = file_name.split('.')[0] + '.mp3'
- cmd = 'ffmpeg -i ' + file_name + ' -f mp3 ' + outfile_name
- print(cmd)
- subprocess.call(cmd, shell=True)
-
-
- def video_add_mp3(file_name, mp3_file):
- outfile_name = file_name.split('.')[0] + '-f.mp4'
- subprocess.call('ffmpeg -i ' + file_name
- + ' -i ' + mp3_file + ' -strict -2 -f mp4 '
- + outfile_name, shell=True)
-
- ap = argparse.ArgumentParser()
- ap.add_argument("-i", "--input_image", required=True,help="Path to image to animate")
- ap.add_argument("-c", "--checkpoint", required=True,help="Path to checkpoint")
- ap.add_argument("-v","--input_video", required=False, help="Path to video input")
-
- args = vars(ap.parse_args())
-
- print("[INFO] loading source image and checkpoint...")
- source_path = args['input_image']
- checkpoint_path = args['checkpoint']
- if args['input_video']:
- video_path = args['input_video']
- else:
- video_path = None
- source_image = imageio.imread(source_path)
- source_image = resize(source_image,(256,256))[..., :3]
-
- generator, kp_detector = load_checkpoints(config_path='config/vox-256.yaml', checkpoint_path=checkpoint_path)
-
- if not os.path.exists('output'):
- os.mkdir('output')
-
-
- relative=True
- adapt_movement_scale=True
- cpu=False
-
- if video_path:
- cap = cv2.VideoCapture(video_path)
- print("[INFO] Loading video from the given path")
- else:
- cap = cv2.VideoCapture(0)
- print("[INFO] Initializing front camera...")
-
- fps = cap.get(cv2.CAP_PROP_FPS)
- size = (int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)))
-
- video2mp3(file_name = video_path)
-
- fourcc = cv2.VideoWriter_fourcc('M','P','E','G')
- #out1 = cv2.VideoWriter('output/test.avi', fourcc, fps, (256*3 , 256), True)
- out1 = cv2.VideoWriter('output/test.mp4', fourcc, fps, size, True)
-
- cv2_source = cv2.cvtColor(source_image.astype('float32'),cv2.COLOR_BGR2RGB)
- with torch.no_grad() :
- predictions = []
- source = torch.tensor(source_image[np.newaxis].astype(np.float32)).permute(0, 3, 1, 2)
- if not cpu:
- source = source.cuda()
- kp_source = kp_detector(source)
- count = 0
- while(True):
- ret, frame = cap.read()
- frame = cv2.flip(frame,1)
- if ret == True:
-
- if not video_path:
- x = 143
- y = 87
- w = 322
- h = 322
- frame = frame[y:y+h,x:x+w]
- frame1 = resize(frame,(256,256))[..., :3]
-
- if count == 0:
- source_image1 = frame1
- source1 = torch.tensor(source_image1[np.newaxis].astype(np.float32)).permute(0, 3, 1, 2)
- kp_driving_initial = kp_detector(source1)
-
- frame_test = torch.tensor(frame1[np.newaxis].astype(np.float32)).permute(0, 3, 1, 2)
-
- driving_frame = frame_test
- if not cpu:
- driving_frame = driving_frame.cuda()
- kp_driving = kp_detector(driving_frame)
- kp_norm = normalize_kp(kp_source=kp_source,
- kp_driving=kp_driving,
- kp_driving_initial=kp_driving_initial,
- use_relative_movement=relative,
- use_relative_jacobian=relative,
- adapt_movement_scale=adapt_movement_scale)
- out = generator(source, kp_source=kp_source, kp_driving=kp_norm)
- predictions.append(np.transpose(out['prediction'].data.cpu().numpy(), [0, 2, 3, 1])[0])
- im = np.transpose(out['prediction'].data.cpu().numpy(), [0, 2, 3, 1])[0]
- im = cv2.cvtColor(im,cv2.COLOR_RGB2BGR)
- #joinedFrame = np.concatenate((cv2_source,im,frame1),axis=1)
- #joinedFrame = np.concatenate((cv2_source,im,frame1),axis=1)
-
- #cv2.imshow('Test',joinedFrame)
- #out1.write(img_as_ubyte(joinedFrame))
- out1.write(img_as_ubyte(im))
- count += 1
- # if cv2.waitKey(20) & 0xFF == ord('q'):
- # break
- else:
- break
-
- cap.release()
- out1.release()
- cv2.destroyAllWindows()
-
- video_add_mp3(file_name='output/test.mp4', mp3_file=video_path.split('.')[0] + '.mp3')
Step 4: Download cascade file ,weights and model and save in folder named extract
下载算法需要的模型和权重文件
- !gdown --id 1wCzJP1XJNB04vEORZvPjNz6drkXm5AUK
- !unzip checkpoints.zip
视频以及图片素材下载链接:https://pan.baidu.com/s/1aur-vfTSJE9ix9afIuZLRQ 提取码:p3so
将视频素材 1.mp4
上传到 Real_Time_Image_Animation
目录下
将图片素材 pdd.png
上传到 Real_Time_Image_Animation/Inputs
目录下
Step5: Run the project
命令模板:
- python image_animation.py -i path_to_input_file -c path_to_checkpoint -v path_to_video_file
path_to_input_file
是输入的模板图片path_to_checkpoint
是权重文件path_to_video_file
是输入的视频文件
具体来说,执行如下命令即可
- !python image_animation.py -i Inputs/pdd.png -c vox-cpk.pth.tar -v 1.mp4
最后生成的视频会保存在 Real_Time_Image_Animation/output
目录下,名为 test-f.mp4
天皇的痛?