| |
| from diffusers import DiffusionPipeline |
| from diffusers.utils import pt_to_pil |
| import torch |
|
|
| |
| stage_1 = DiffusionPipeline.from_pretrained("DeepFloyd/IF-I-M-v1.0", variant="fp16", torch_dtype=torch.float16) |
| stage_1.enable_xformers_memory_efficient_attention() |
| stage_1.enable_model_cpu_offload() |
|
|
| |
| stage_2 = DiffusionPipeline.from_pretrained( |
| "DeepFloyd/IF-II-M-v1.0", text_encoder=None, variant="fp16", torch_dtype=torch.float16 |
| ) |
| stage_2.enable_xformers_memory_efficient_attention() |
| stage_2.enable_model_cpu_offload() |
|
|
| |
| safety_modules = {"feature_extractor": stage_1.feature_extractor, "safety_checker": stage_1.safety_checker, "watermarker": stage_1.watermarker} |
| stage_3 = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-x4-upscaler", **safety_modules, torch_dtype=torch.float16) |
| stage_3.enable_xformers_memory_efficient_attention() |
| stage_3.enable_model_cpu_offload() |
|
|
| prompt = 'a photo of a kangaroo wearing an orange hoodie and blue sunglasses standing in front of the eiffel tower holding a sign that says "very deep learning"' |
|
|
| |
| prompt_embeds, negative_embeds = stage_1.encode_prompt(prompt) |
|
|
| generator = torch.manual_seed(0) |
|
|
| image = stage_1(prompt_embeds=prompt_embeds, negative_prompt_embeds=negative_embeds, generator=generator, output_type="pt").images |
| pt_to_pil(image)[0].save("./if_stage_I.png") |
|
|
| image = stage_2( |
| image=image, prompt_embeds=prompt_embeds, negative_prompt_embeds=negative_embeds, generator=generator, output_type="pt" |
| ).images |
| pt_to_pil(image)[0].save("./if_stage_II.png") |
|
|
| image = stage_3(prompt=prompt, image=image, generator=generator, noise_level=100).images |
| image[0].save("./if_stage_III.png") |
|
|
|
|