from fastapi import FastAPI, HTTPException, status from typing import List, Optional, Union, Dict, Any, Tuple from contextlib import asynccontextmanager import os import torch from gliner import GLiNER from .models import * # Global model instance model = None @asynccontextmanager async def lifespan(app: FastAPI): """Load model on startup, cleanup on shutdown""" global model print("Loading GLiNER model...") model_name = os.getenv("MODEL_NAME", "knowledgator/gliner-multitask-large-v0.5") device = "cuda" if torch.cuda.is_available() else "cpu" model = GLiNER.from_pretrained(model_name).to(device) print(f"Model loaded on {device}") yield print("Shutting down...") app = FastAPI( title="GLiNER Inference Server", description="Named Entity Recognition, Relation Extraction, and Summarization API", version="1.0.0", lifespan=lifespan ) @app.get("/health") async def health_check(): """Health check endpoint""" return {"status": "healthy", "model_loaded": model is not None} @app.post("/general") async def general_extraction(request): """Named Entity Recognition endpoint""" pass @app.post("/relation-extraction") async def relation_extraction(request): """Relation Extraction endpoint""" pass @app.post("/summarization") async def summarization(request): """Summarization endpoint""" pass if __name__ == "__main__": import uvicorn port = int(os.getenv("PORT", "8000")) uvicorn.run(app, host="0.0.0.0", port=port)