1
1
from typing import List , Union
2
- from diffusers import StableDiffusionPipeline
3
- from diffusers .pipelines .stable_diffusion import (
4
- StableDiffusionImg2ImgPipeline ,
5
- StableDiffusionInpaintPipeline ,
6
- )
7
2
import torch
8
3
from PIL import Image
9
- from diffusers .pipelines .stable_diffusion .safety_checker import (
10
- StableDiffusionSafetyChecker ,
11
- )
12
4
import os
13
- from utils import ModelParts2GPUsAssigner , get_gpu_setting , dummy_checker , remove_nsfw
5
+ from utils import ModelParts2GPUsAssigner , get_gpu_setting
14
6
from parallel import StableDiffusionModelParallel , StableDiffusionMultiProcessing
15
- from schedulers import schedulers
16
7
import numpy as np
8
+ from sb import DiffusionModel
17
9
10
+ # read env variables
18
11
TOKEN = os .environ .get ("TOKEN" , None )
19
12
MODEL_ID = os .environ .get ("MODEL_ID" , "stabilityai/stable-diffusion-2-base" )
20
13
14
+ # If you are limited by GPU memory (e.g <10GB VRAM), please make sure to load in fp16 precision
21
15
fp16 = bool (int (os .environ .get ("FP16" , 1 )))
22
16
# MP = bool(int(os.environ.get("MODEL_PARALLEL", 0)))
23
17
MP = False # disabled
24
18
MIN_INPAINT_MASK_PERCENT = 0.1
25
19
26
- from diffusers .pipelines .stable_diffusion import StableDiffusionPipeline
27
-
28
20
# FIXME devices=0,1 causes cuda error on memory access..?
29
- # create and move model to GPU(s), defaults to GPU 0
30
- multi , devices = get_gpu_setting (os .environ .get ("DEVICES" , "0" ))
31
- # If you are limited by GPU memory and have less than 10GB of GPU RAM available, please make sure to load the StableDiffusionPipeline in float16 precision
32
- kwargs = dict (
33
- pretrained_model_name_or_path = MODEL_ID ,
34
- revision = "fp16" if fp16 else None ,
35
- torch_dtype = torch .float16 if fp16 else None ,
36
- use_auth_token = TOKEN ,
37
- requires_safety_checker = False ,
38
- )
39
-
40
- pipe , safety , safety_extractor = None , None , None
41
-
42
-
43
- def load_pipeline (model_or_path , devices : List [int ]):
44
- global pipe , safety , safety_extractor
45
- if pipe is not None and pipe ._pipe_name == model_or_path :
46
- # avoid re-loading same model
47
- return
48
-
21
+ IS_MULTI , DEVICES = get_gpu_setting (os .environ .get ("DEVICES" , "0" ))
22
+
23
+ # TODO docs
24
+ def init_pipeline (model_or_path = MODEL_ID , devices : List [int ]= DEVICES )-> Union [DiffusionModel , StableDiffusionMultiProcessing ]:
25
+ kwargs = dict (
26
+ pretrained_model_name_or_path = model_or_path ,
27
+ revision = "fp16" if fp16 else None ,
28
+ torch_dtype = torch .float16 if fp16 else None ,
29
+ use_auth_token = TOKEN ,
30
+ requires_safety_checker = False ,
31
+ )
49
32
model_ass = None
50
- print (f"Loading { model_or_path } from disk.." )
51
- kwargs ["pretrained_model_name_or_path" ] = model_or_path
52
33
# single-gpu multiple models currently disabled
53
34
if MP and len (devices ) > 1 :
54
35
# setup for model parallel: find model parts->gpus assignment
@@ -63,31 +44,27 @@ def load_pipeline(model_or_path, devices: List[int]):
63
44
)
64
45
print ("Assignments:" , model_ass )
65
46
66
- if multi and pipe is not None :
47
+ # TODO move logic
48
+ # if multi and pipe is not None:
67
49
# avoid re-creating processes in multi-gpu mode, have them reload a different model
68
- pipe .reload_model (model_or_path )
69
- elif multi :
50
+ # pipe.reload_model(model_or_path)
51
+ if IS_MULTI :
70
52
# DataParallel: one process *per GPU* (each has a copy of the model)
71
53
# ModelParallel: one process *per model*, each model (possibly) on multiple GPUs
72
54
n_procs = len (devices ) if not MP else len (model_ass )
73
55
pipe = StableDiffusionMultiProcessing .from_pretrained (
74
56
n_procs , devices , model_parallel_assignment = model_ass , ** kwargs
75
57
)
76
58
else :
77
- pipe = StableDiffusionPipeline .from_pretrained (** kwargs )
78
- # remove safety checker so it doesn't use up GPU memory
79
- safety , safety_extractor = remove_nsfw (pipe )
59
+ pipe = DiffusionModel .from_pretrained (** kwargs )
80
60
if len (devices ):
81
61
pipe .to (f"cuda:{ devices [0 ]} " )
82
62
83
- pipe ._pipe_name = model_or_path
84
- print ("Model Loaded!" )
85
-
86
-
87
- load_pipeline (MODEL_ID , devices )
63
+ return pipe
88
64
89
65
90
66
def inference (
67
+ pipe : DiffusionModel ,
91
68
prompt ,
92
69
num_images = 1 ,
93
70
num_inference_steps = 50 ,
@@ -105,29 +82,28 @@ def inference(
105
82
):
106
83
prompt = [prompt ] * num_images
107
84
input_kwargs = dict (
85
+ inference_type = "text" ,
108
86
prompt = prompt ,
87
+ # number of denoising steps run during inference (the higher the better)
109
88
num_inference_steps = num_inference_steps ,
110
89
height = height ,
111
90
width = width ,
112
91
guidance_scale = guidance_scale ,
113
- generator = None ,
92
+ # NOTE seed with multiples gpus will be different for each one but fixed!
93
+ generator = seed ,
114
94
)
115
95
# input sketch has priority over input image
116
96
if input_sketch is not None :
117
97
input_image = input_sketch
118
98
119
- # Img2Img: to avoid re-loading the model, we ""cast"" the pipeline
120
99
# TODO batch images by providing a torch tensor
121
100
if input_image is not None :
122
- input_image = input_image .resize ((width , height ))
123
101
# image guided generation
124
- if multi :
125
- pipe .change_pipeline_type ("img2img" )
126
- else :
127
- pipe .__class__ = StableDiffusionImg2ImgPipeline
102
+ input_image = input_image .resize ((width , height ))
128
103
# TODO negative prompt?
129
104
input_kwargs ["init_image" ] = input_image
130
105
input_kwargs ["strength" ] = 1.0 - inv_strenght
106
+ input_kwargs ["inference_type" ] = "img2img"
131
107
elif masked_image is not None :
132
108
# resize to specified shape
133
109
masked_image = {
@@ -138,61 +114,20 @@ def inference(
138
114
if np .count_nonzero (masked_image ["mask" ].convert ("1" )) < (
139
115
width * height * MIN_INPAINT_MASK_PERCENT
140
116
):
141
- # FIXME error handling
142
- raise Exception ("ERROR: mask is too small!" )
143
- if multi :
144
- pipe .change_pipeline_type ("inpaint" )
145
- else :
146
- pipe .__class__ = StableDiffusionInpaintPipeline
117
+ raise ValueError ("Mask is too small. Please paint-over a larger area" )
147
118
input_kwargs ["image" ] = masked_image ["image" ]
148
119
input_kwargs ["mask_image" ] = masked_image ["mask" ]
149
- elif multi :
150
- # default mode
151
- pipe .change_pipeline_type ("text" )
152
- else :
153
- pipe .__class__ = StableDiffusionPipeline
154
-
155
- # for repeatable results; tensor generated on cpu for model parallel
156
- if multi :
157
- # generator cant be pickled
158
- # NOTE fixed seed with multiples gpus will be different for each one but fixed!
159
- input_kwargs ["generator" ] = seed
160
- elif seed is not None and seed > 0 :
161
- input_kwargs ["generator" ] = torch .Generator (
162
- f"cuda:{ devices [0 ]} " if not MP else "cpu"
163
- ).manual_seed (seed )
164
-
165
- if nsfw_filter :
166
- if multi :
167
- pipe .safety_checker = None
168
- else :
169
- pipe .safety_checker = safety .to (f"cuda:{ devices [0 ]} " )
170
- pipe .feature_extractor = safety_extractor
171
- else :
172
- if multi :
173
- pipe .safety_checker = dummy_checker
174
- else :
175
- # remove safety network from gpu
176
- remove_nsfw (pipe )
177
-
178
- if low_vram :
179
- # needed on 16GB RAM 768x768 fp32
180
- pipe .enable_attention_slicing ()
181
- else :
182
- pipe .disable_attention_slicing ()
120
+ input_kwargs ["inference_type" ] = "inpaint"
121
+
122
+ pipe .set_nsfw (nsfw_filter )
123
+
124
+ # needed on 16GB RAM 768x768 fp32
125
+ pipe .enable_attention_slicing ("auto" if low_vram else None )
183
126
184
127
# set noise scheduler for inference
185
- if noise_scheduler is not None and noise_scheduler in schedulers :
186
- if multi :
187
- pipe .scheduler = noise_scheduler
188
- else :
189
- # load scheduler from pre-trained config
190
- s = getattr (schedulers [noise_scheduler ], "from_config" )(
191
- pipe .scheduler .config
192
- )
193
- pipe .scheduler = s
128
+ if noise_scheduler is not None :
129
+ pipe .scheduler = noise_scheduler
194
130
195
- # number of denoising steps run during inference (the higher the better)
196
131
with torch .autocast ("cuda" ):
197
132
images : List [Image .Image ] = pipe (** input_kwargs )["images" ]
198
133
return images
0 commit comments