diff --git a/gliner_inference_server/main.py b/gliner_inference_server/main.py index 8c31301..fcac631 100644 --- a/gliner_inference_server/main.py +++ b/gliner_inference_server/main.py @@ -24,7 +24,10 @@ async def lifespan(app: FastAPI): print("Loading GLiNER model...") model_name = os.getenv("MODEL_NAME", "knowledgator/gliner-multitask-large-v0.5") device = "cuda" if torch.cuda.is_available() else "cpu" - model = GLiNER.from_pretrained(model_name).to(device) + model = GLiNER.from_pretrained( + model_name, + local_files_only = True + ).to(device) print(f"Model loaded on {device}") yield print("Shutting down...")