You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

62 lines
1.5 KiB

from fastapi import FastAPI, HTTPException, status
from typing import List, Optional, Union, Dict, Any, Tuple
from contextlib import asynccontextmanager
import os
import torch
from gliner import GLiNER
from .models import *
# Global model instance
model = None
@asynccontextmanager
async def lifespan(app: FastAPI):
"""Load model on startup, cleanup on shutdown"""
global model
print("Loading GLiNER model...")
model_name = os.getenv("MODEL_NAME", "knowledgator/gliner-multitask-large-v0.5")
device = "cuda" if torch.cuda.is_available() else "cpu"
model = GLiNER.from_pretrained(model_name).to(device)
print(f"Model loaded on {device}")
yield
print("Shutting down...")
app = FastAPI(
title="GLiNER Inference Server",
description="Named Entity Recognition, Relation Extraction, and Summarization API",
version="1.0.0",
lifespan=lifespan
)
@app.get("/health")
async def health_check():
"""Health check endpoint"""
return {"status": "healthy", "model_loaded": model is not None}
@app.post("/general")
async def general_extraction(request):
"""Named Entity Recognition endpoint"""
pass
@app.post("/relation-extraction")
async def relation_extraction(request):
"""Relation Extraction endpoint"""
pass
@app.post("/summarization")
async def summarization(request):
"""Summarization endpoint"""
pass
if __name__ == "__main__":
import uvicorn
port = int(os.getenv("PORT", "8000"))
uvicorn.run(app, host="0.0.0.0", port=port)