refactor: Update Dockerfile.tpl to expose port 8547 instead of port 80

This commit modifies the Dockerfile.tpl file to update the EXPOSE directive. The port number is changed from 80 to 8547 to align with the port used by the Llama application. This change ensures that the Llama application is accessible from outside the container on the correct port.
This commit is contained in:
Simon Larsen 2024-06-18 18:42:11 +01:00
parent 20db81a5f6
commit 26bb6f1e74
No known key found for this signature in database
GPG Key ID: 96C5DCA24769DBCA
5 changed files with 60 additions and 22 deletions

View File

@ -18,7 +18,7 @@ RUN pip install --no-cache-dir transformers
# Install acceletate
RUN pip install accelerate
# Make port 80 available to the world outside this container
# Make port 8547 available to the world outside this container
EXPOSE 8547
# Run app.py when the container launches

View File

@ -1 +1,14 @@
Keep all llama models here for docker build.
Keep all llama models here for docker build.
# Downloading Model from Hugging Face
Please make sure you have git lfs installed before cloning the model.
```bash
git lfs install
```
```bash
# Here we are downloading the Meta-Llama-3-8B-Instruct model
git clone https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct
```

View File

@ -9,20 +9,17 @@ from pydantic import BaseModel
class Prompt(BaseModel):
prompt: str
model_path = "./Models/Llama-2-7b-chat-hf"
model_id = "meta-llama/Meta-Llama-3-8B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(model_path, local_files_only=True)
pipeline = transformers.pipeline(
"text-generation",
model=model_path,
# torch_dtype=torch.float32, # for CPU
torch_dtype=torch.float16, # for GPU
model=model_id,
model_kwargs={"torch_dtype": torch.bfloat16},
device_map="auto",
)
app = FastAPI()
@app.post("/prompt/")
async def create_item(prompt: Prompt):
@ -30,22 +27,29 @@ async def create_item(prompt: Prompt):
if not prompt:
return {"error": "Prompt is required"}
sequences = pipeline(
prompt.prompt,
messages = [
{"role": "system", "content": "You are a pirate chatbot who always responds in pirate speak!"},
{"role": "user", "content": "Who are you?"},
]
terminators = [
pipeline.tokenizer.eos_token_id,
pipeline.tokenizer.convert_tokens_to_ids("<|eot_id|>")
]
outputs = pipeline(
messages,
max_new_tokens=256,
eos_token_id=terminators,
do_sample=True,
top_k=10,
num_return_sequences=1,
eos_token_id=tokenizer.eos_token_id,
max_length=200,
temperature=0.6,
top_p=0.9,
)
prompt_response_array = []
for seq in sequences:
print(f"Result: {seq['generated_text']}")
prompt_response_array.append(seq["generated_text"])
output = outputs[0]["generated_text"][-1]
# return prompt response
return {"response": prompt_response_array}
return {"response": output}

View File

@ -176,7 +176,19 @@ services:
driver: "local"
options:
max-size: "1000m"
llama:
networks:
- oneuptime
restart: always
environment:
<<: *common-server-variables
PORT: 8547
logging:
driver: "local"
options:
max-size: "1000m"
admin-dashboard:
networks:

View File

@ -12,7 +12,16 @@ services:
context: .
dockerfile: ./Haraka/Dockerfile
llama:
extends:
file: ./docker-compose.base.yml
service: llama
build:
network: host
context: .
dockerfile: ./Llama/Dockerfile
redis:
ports:
- '6310:6379'