|
|
|
@ -1,17 +1,33 @@
|
|
|
|
from fastapi import FastAPI, HTTPException, status
|
|
|
|
from fastapi import FastAPI, HTTPException, status
|
|
|
|
from pydantic import BaseModel, Field, field_validator
|
|
|
|
|
|
|
|
from typing import List, Optional, Union, Dict, Any, Tuple
|
|
|
|
from typing import List, Optional, Union, Dict, Any, Tuple
|
|
|
|
from contextlib import asynccontextmanager
|
|
|
|
from contextlib import asynccontextmanager
|
|
|
|
import os
|
|
|
|
import os
|
|
|
|
|
|
|
|
import torch
|
|
|
|
|
|
|
|
from gliner import GLiNER
|
|
|
|
|
|
|
|
from .models import *
|
|
|
|
|
|
|
|
|
|
|
|
# Global model instance
|
|
|
|
# Global model instance
|
|
|
|
model = None
|
|
|
|
model = None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@asynccontextmanager
|
|
|
|
|
|
|
|
async def lifespan(app: FastAPI):
|
|
|
|
|
|
|
|
"""Load model on startup, cleanup on shutdown"""
|
|
|
|
|
|
|
|
global model
|
|
|
|
|
|
|
|
print("Loading GLiNER model...")
|
|
|
|
|
|
|
|
model_name = os.getenv("MODEL_NAME", "knowledgator/gliner-multitask-large-v0.5")
|
|
|
|
|
|
|
|
device = "cuda" if torch.cuda.is_available() else "cpu"
|
|
|
|
|
|
|
|
model = GLiNER.from_pretrained(model_name).to(device)
|
|
|
|
|
|
|
|
print(f"Model loaded on {device}")
|
|
|
|
|
|
|
|
yield
|
|
|
|
|
|
|
|
print("Shutting down...")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
app = FastAPI(
|
|
|
|
app = FastAPI(
|
|
|
|
title="GLiNER Inference Server",
|
|
|
|
title="GLiNER Inference Server",
|
|
|
|
description="Named Entity Recognition, Relation Extraction, and Summarization API",
|
|
|
|
description="Named Entity Recognition, Relation Extraction, and Summarization API",
|
|
|
|
version="1.0.0",
|
|
|
|
version="1.0.0",
|
|
|
|
|
|
|
|
lifespan=lifespan
|
|
|
|
)
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|