Add the lifespan to load model or to cleanup.

1 month ago · eab1d4ffd3
parent e40c0af2f7
commit eab1d4ffd3
1 changed files with 17 additions and 1 deletions
--- a/gliner_inference_server/main.py
+++ b/gliner_inference_server/main.py
@ -1,17 +1,33 @@
 from fastapi import FastAPI, HTTPException, status
-from pydantic import BaseModel, Field, field_validator
 from typing import List, Optional, Union, Dict, Any, Tuple
 from contextlib import asynccontextmanager
 import os
+import torch
+from gliner import GLiNER
+from .models import *

 # Global model instance
 model = None


+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    """Load model on startup, cleanup on shutdown"""
+    global model
+    print("Loading GLiNER model...")
+    model_name = os.getenv("MODEL_NAME", "knowledgator/gliner-multitask-large-v0.5")
+    device = "cuda" if torch.cuda.is_available() else "cpu"
+    model = GLiNER.from_pretrained(model_name).to(device)
+    print(f"Model loaded on {device}")
+    yield
+    print("Shutting down...")
+
+
 app = FastAPI(
    title="GLiNER Inference Server",
     description="Named Entity Recognition, Relation Extraction, and Summarization API",
     version="1.0.0",
+    lifespan=lifespan
 )