Welcome to Software Development on Codidact!
Will you help us build our independent community of developers helping developers? We're small and trying to grow. We welcome questions about all aspects of software development, from design to code to QA and more. Got questions? Got answers? Got code you'd like someone to review? Please join us.
How can I run Flux2 inference on 2 GPUs?
+1
−0
I try to run Flux2 inference on 2 GPUs as follows:
import torch
from diffusers import Flux2Pipeline
from accelerate import PartialState
import argparse
from pathlib import Path
def main():
parser = argparse.ArgumentParser(description='Generate images using FLUX.2-dev with multi-GPU support')
parser.add_argument('--prompt', type=str,
default="Futuristic city",
help='Text prompt for image generation')
parser.add_argument('--output', type=str, default='flux2_output2.png',
help='Output image filename')
parser.add_argument('--steps', type=int, default=28,
help='Number of inference steps (default: 28, max recommended: 50)')
parser.add_argument('--guidance-scale', type=float, default=4.0,
help='Guidance scale for generation (default: 4.0)')
parser.add_argument('--seed', type=int, default=42,
help='Random seed for reproducibility')
parser.add_argument('--height', type=int, default=1024,
help='Output image height')
parser.add_argument('--width', type=int, default=1024,
help='Output image width')
args = parser.parse_args()
print("=" * 80)
print("FLUX.2-dev Image Generation")
print("=" * 80)
print(f"\nPrompt: {args.prompt}")
print(f"Output: {args.output}")
print(f"Steps: {args.steps}")
print(f"Guidance Scale: {args.guidance_scale}")
print(f"Seed: {args.seed}")
print(f"Size: {args.width}x{args.height}")
print("\n" + "=" * 80)
# Model repository
model_id = "black-forest-labs/FLUX.2-dev"
print("\nLoading FLUX.2-dev model...")
print("This will distribute the model across your 2 A100 GPUs automatically...")
# Load the pipeline with device_map="balanced" to distribute across GPUs
# Using bfloat16 for A100s (optimal precision)
pipe = Flux2Pipeline.from_pretrained(
model_id,
torch_dtype=torch.bfloat16,
device_map="balanced" # Distributes model across available GPUs
)
# Enable memory efficient attention
pipe.enable_attention_slicing()
print("\n Model loaded successfully!")
print(f" Model distributed across GPUs: {torch.cuda.device_count()} GPUs detected")
# Print GPU memory allocation
for i in range(torch.cuda.device_count()):
allocated = torch.cuda.memory_allocated(i) / 1024**3
reserved = torch.cuda.memory_reserved(i) / 1024**3
print(f" GPU {i}: {allocated:.2f}GB allocated, {reserved:.2f}GB reserved")
print("\nGenerating image...")
# Set up generator for reproducibility
# Note: For multi-GPU, we set the generator on cuda:0
generator = torch.Generator(device="cuda:0").manual_seed(args.seed)
# Generate image
output = pipe(
prompt=args.prompt,
height=args.height,
width=args.width,
num_inference_steps=args.steps,
guidance_scale=args.guidance_scale,
generator=generator,
)
image = output.images[0]
# Save the image
output_path = Path(args.output)
image.save(output_path)
print(f"\nImage generated successfully!")
print(f"Saved to: {output_path.absolute()}")
# Print final GPU memory usage
print("\nFinal GPU Memory Usage:")
for i in range(torch.cuda.device_count()):
allocated = torch.cuda.memory_allocated(i) / 1024**3
reserved = torch.cuda.memory_reserved(i) / 1024**3
print(f" GPU {i}: {allocated:.2f}GB allocated, {reserved:.2f}GB reserved")
print("\n" + "=" * 80)
if __name__ == "__main__":
main()
requirements.txt:
torch>=2.0.0
diffusers>=0.32.0
transformers>=4.40.0
accelerate>=0.26.0
bitsandbytes>=0.43.0
sentencepiece>=0.1.99
protobuf>=3.20.0
Pillow>=10.0.0
huggingface_hub>=0.20.0
However, while it does take memory on both GPUs, only 1 GPU is utilized during inference when looking at watch nvidia-smi. How can I run Flux2 inference on 2 GPUs?

0 comment threads