Welcome to Software Development on Codidact!

Will you help us build our independent community of developers helping developers? We're small and trying to grow. We welcome questions about all aspects of software development, from design to code to QA and more. Got questions? Got answers? Got code you'd like someone to review? Please join us.

How can I run Flux2 inference on 2 GPUs?

−0

I try to run Flux2 inference on 2 GPUs as follows:

import torch
from diffusers import Flux2Pipeline
from accelerate import PartialState
import argparse
from pathlib import Path

def main():
    parser = argparse.ArgumentParser(description='Generate images using FLUX.2-dev with multi-GPU support')
    parser.add_argument('--prompt', type=str, 
                        default="Futuristic city",
                        help='Text prompt for image generation')
    parser.add_argument('--output', type=str, default='flux2_output2.png',
                        help='Output image filename')
    parser.add_argument('--steps', type=int, default=28,
                        help='Number of inference steps (default: 28, max recommended: 50)')
    parser.add_argument('--guidance-scale', type=float, default=4.0,
                        help='Guidance scale for generation (default: 4.0)')
    parser.add_argument('--seed', type=int, default=42,
                        help='Random seed for reproducibility')
    parser.add_argument('--height', type=int, default=1024,
                        help='Output image height')
    parser.add_argument('--width', type=int, default=1024,
                        help='Output image width')
    
    args = parser.parse_args()
    
    print("=" * 80)
    print("FLUX.2-dev Image Generation")
    print("=" * 80)
    print(f"\nPrompt: {args.prompt}")
    print(f"Output: {args.output}")
    print(f"Steps: {args.steps}")
    print(f"Guidance Scale: {args.guidance_scale}")
    print(f"Seed: {args.seed}")
    print(f"Size: {args.width}x{args.height}")
    print("\n" + "=" * 80)
    
    # Model repository
    model_id = "black-forest-labs/FLUX.2-dev"
    
    print("\nLoading FLUX.2-dev model...")
    print("This will distribute the model across your 2 A100 GPUs automatically...")
    
    # Load the pipeline with device_map="balanced" to distribute across GPUs
    # Using bfloat16 for A100s (optimal precision)
    pipe = Flux2Pipeline.from_pretrained(
        model_id,        
        torch_dtype=torch.bfloat16,
        device_map="balanced"  # Distributes model across available GPUs
    )
    
    # Enable memory efficient attention
    pipe.enable_attention_slicing()
    
    print("\n Model loaded successfully!")
    print(f" Model distributed across GPUs: {torch.cuda.device_count()} GPUs detected")
    
    # Print GPU memory allocation
    for i in range(torch.cuda.device_count()):
        allocated = torch.cuda.memory_allocated(i) / 1024**3
        reserved = torch.cuda.memory_reserved(i) / 1024**3
        print(f"  GPU {i}: {allocated:.2f}GB allocated, {reserved:.2f}GB reserved")
    
    print("\nGenerating image...")
    
    # Set up generator for reproducibility
    # Note: For multi-GPU, we set the generator on cuda:0
    generator = torch.Generator(device="cuda:0").manual_seed(args.seed)
    
    # Generate image
    output = pipe(
        prompt=args.prompt,
        height=args.height,
        width=args.width,
        num_inference_steps=args.steps,
        guidance_scale=args.guidance_scale,
        generator=generator,
    )
    
    image = output.images[0]
    
    # Save the image
    output_path = Path(args.output)
    image.save(output_path)
    
    print(f"\nImage generated successfully!")
    print(f"Saved to: {output_path.absolute()}")
    
    # Print final GPU memory usage
    print("\nFinal GPU Memory Usage:")
    for i in range(torch.cuda.device_count()):
        allocated = torch.cuda.memory_allocated(i) / 1024**3
        reserved = torch.cuda.memory_reserved(i) / 1024**3
        print(f"  GPU {i}: {allocated:.2f}GB allocated, {reserved:.2f}GB reserved")
    
    print("\n" + "=" * 80)

if __name__ == "__main__":
    main()

requirements.txt:

torch>=2.0.0
diffusers>=0.32.0
transformers>=4.40.0
accelerate>=0.26.0
bitsandbytes>=0.43.0
sentencepiece>=0.1.99
protobuf>=3.20.0
Pillow>=10.0.0
huggingface_hub>=0.20.0

However, while it does take memory on both GPUs, only 1 GPU is utilized during inference when looking at watch nvidia-smi. How can I run Flux2 inference on 2 GPUs?

python gpu huggingface-transformers

posted about 1 month ago

CC BY-SA 4.0

Franck Dernoncourt‭

143 reputation 24 10 14 33

Raw

Markdown

History

Communities

How can I run Flux2 inference on 2 GPUs?

0 comment threads