Add the lifespan to load model or to cleanup.

main
KKlochko 1 month ago
parent e40c0af2f7
commit eab1d4ffd3
Signed by: KKlochko
GPG Key ID: 572ECCD219BBA91B

@ -1,17 +1,33 @@
from fastapi import FastAPI, HTTPException, status from fastapi import FastAPI, HTTPException, status
from pydantic import BaseModel, Field, field_validator
from typing import List, Optional, Union, Dict, Any, Tuple from typing import List, Optional, Union, Dict, Any, Tuple
from contextlib import asynccontextmanager from contextlib import asynccontextmanager
import os import os
import torch
from gliner import GLiNER
from .models import *
# Global model instance # Global model instance
model = None model = None
@asynccontextmanager
async def lifespan(app: FastAPI):
"""Load model on startup, cleanup on shutdown"""
global model
print("Loading GLiNER model...")
model_name = os.getenv("MODEL_NAME", "knowledgator/gliner-multitask-large-v0.5")
device = "cuda" if torch.cuda.is_available() else "cpu"
model = GLiNER.from_pretrained(model_name).to(device)
print(f"Model loaded on {device}")
yield
print("Shutting down...")
app = FastAPI( app = FastAPI(
title="GLiNER Inference Server", title="GLiNER Inference Server",
description="Named Entity Recognition, Relation Extraction, and Summarization API", description="Named Entity Recognition, Relation Extraction, and Summarization API",
version="1.0.0", version="1.0.0",
lifespan=lifespan
) )

Loading…
Cancel
Save