mirror of
https://github.com/OneUptime/oneuptime
synced 2024-11-23 15:49:10 +00:00
97cc28b182
This commit updates the Dockerfile.tpl to use the huggingface/transformers-pytorch-gpu image instead of the continuumio/anaconda3 image. This change allows the Llama app to utilize GPU resources for improved performance in AI processing. Additionally, the unnecessary installation of the transformers and accelerate libraries is removed as they are already included in the huggingface/transformers-pytorch-gpu image.
61 lines
1.2 KiB
Python
61 lines
1.2 KiB
Python
import time
|
|
import transformers
|
|
import torch
|
|
from fastapi import FastAPI
|
|
from pydantic import BaseModel
|
|
|
|
|
|
# Declare a Pydantic model for the request body
|
|
class Prompt(BaseModel):
|
|
prompt: str
|
|
|
|
model_path = "/app/Models/Meta-Llama-3-8B-Instruct"
|
|
|
|
pipe = transformers.pipeline(
|
|
"text-generation",
|
|
model=model_path,
|
|
# use gpu if available
|
|
device="cuda" if torch.cuda.is_available() else "cpu",
|
|
)
|
|
|
|
app = FastAPI()
|
|
|
|
@app.get("/")
|
|
async def root():
|
|
return {"status": "ok"}
|
|
|
|
@app.post("/prompt/")
|
|
async def create_item(prompt: Prompt):
|
|
|
|
# Calculate request time
|
|
start_time = time.time()
|
|
|
|
# Log prompt to console
|
|
print(prompt)
|
|
|
|
# If not prompt then return bad request error
|
|
if not prompt:
|
|
return {"error": "Prompt is required"}
|
|
|
|
messages = [
|
|
{"role": "user", "content": prompt.prompt},
|
|
]
|
|
|
|
outputs = pipe(messages)
|
|
|
|
# Log output to console
|
|
print(outputs)
|
|
|
|
end_time = time.time()
|
|
|
|
responseTime = end_time - start_time
|
|
|
|
# Print duration to console
|
|
print("Request duration: ")
|
|
print(responseTime)
|
|
|
|
# return prompt response
|
|
return {"response": outputs, "responseTime": responseTime}
|
|
|
|
|