latent deepfrying
destroy image quality by constantly encoding then decoding an image using a VAE
example
code
import imageio
import numpy as np
import torch
from diffusers import AutoencoderKL
from PIL import Image
device = "cuda"
vae = AutoencoderKL.from_pretrained("stabilityai/sd-vae-ft-mse").to(device)
image = Image.open("your_image_here").convert("RGB")
image = image.resize((512, 512))
def encode(image):
image = image.resize((512, 512))
image = np.array(image).astype(np.float32) / 255.0
image = image[None].transpose(0, 3, 1, 2)
image = torch.from_numpy(image).to(device)
with torch.no_grad():
latent = vae.encode(2.0 * image - 1.0)
return 0.18215 * latent.latent_dist.sample()
def decode(latents):
latents = (1 / 0.18215) * latents
with torch.no_grad():
image = vae.decode(latents).sample
image = (image / 2 + 0.5).clamp(0, 1)
image = image.detach().permute(0, 2, 3, 1).float().cpu().numpy()
image = (image * 255).round().astype("uint8")
image = Image.fromarray(image[0])
return image
writer = imageio.get_writer("./latent_deepfrying.mp4", fps=10)
for _ in range(100):
latents = encode(image)
image = decode(latents)
image_np = np.array(image)
writer.append_data(image_np)
writer.close()