refactor: Update Dockerfile.tpl to expose port 8547 instead of port 80

This commit modifies the Dockerfile.tpl file to update the EXPOSE directive. The port number is changed from 80 to 8547 to align with the port used by the Llama application. This change ensures that the Llama application is accessible from outside the container on the correct port.
2024-11-21 22:59:07 +00:00 · 2024-06-18 18:42:11 +01:00 · 2024-06-18 18:42:11 +01:00 · 26bb6f1e74
commit 26bb6f1e74
parent 20db81a5f6
5 changed files with 60 additions and 22 deletions
--- a/Llama/Dockerfile.tpl
+++ b/Llama/Dockerfile.tpl
@ -18,7 +18,7 @@ RUN pip install --no-cache-dir transformers
 # Install acceletate
 RUN pip install accelerate
-# Make port 80 available to the world outside this container
+# Make port 8547 available to the world outside this container
 EXPOSE 8547
 # Run app.py when the container launches
--- a/Llama/Models/README.md
+++ b/Llama/Models/README.md
@ -1 +1,14 @@
-Keep all llama models here for docker build.
+Keep all llama models here for docker build.
 # Downloading Model from Hugging Face 
 Please make sure you have git lfs installed before cloning the model. 
 ```bash
 git lfs install
 ```
 ```bash
 # Here we are downloading the Meta-Llama-3-8B-Instruct model
 git clone https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct
 ```
--- a/Llama/app.py
+++ b/Llama/app.py
@ -9,20 +9,17 @@ from pydantic import BaseModel
 class Prompt(BaseModel):
   prompt: str
-model_path = "./Models/Llama-2-7b-chat-hf"
+model_id = "meta-llama/Meta-Llama-3-8B-Instruct"
 tokenizer = AutoTokenizer.from_pretrained(model_path, local_files_only=True)
 pipeline = transformers.pipeline(
    "text-generation",
-    model=model_path,
+    model=model_id,
-    # torch_dtype=torch.float32, # for CPU
+    model_kwargs={"torch_dtype": torch.bfloat16},
    torch_dtype=torch.float16, # for GPU
    device_map="auto",
 )
 app = FastAPI()
@app.post("/prompt/")
 async def create_item(prompt: Prompt):
@ -30,22 +27,29 @@ async def create_item(prompt: Prompt):
    if not prompt:
        return {"error": "Prompt is required"}
-    sequences = pipeline(
+    messages = [
-        prompt.prompt,
+        {"role": "system", "content": "You are a pirate chatbot who always responds in pirate speak!"},
        {"role": "user", "content": "Who are you?"},
    ]
    terminators = [
        pipeline.tokenizer.eos_token_id,
        pipeline.tokenizer.convert_tokens_to_ids("<|eot_id|>")
    ]
    outputs = pipeline(
        messages,
        max_new_tokens=256,
        eos_token_id=terminators,
        do_sample=True,
-        top_k=10,
+        temperature=0.6,
-        num_return_sequences=1,
+        top_p=0.9,
        eos_token_id=tokenizer.eos_token_id,
        max_length=200,
    )
-    prompt_response_array = []
+    output = outputs[0]["generated_text"][-1]
    for seq in sequences:
        print(f"Result: {seq['generated_text']}")
        prompt_response_array.append(seq["generated_text"])
    # return prompt response
-    return {"response": prompt_response_array}
+    return {"response": output}
--- a/docker-compose.base.yml
+++ b/docker-compose.base.yml
@ -176,7 +176,19 @@ services:
            driver: "local"
            options:
                max-size: "1000m"
-       
+    
    llama: 
        networks:
          - oneuptime
        restart: always
        environment:
            <<: *common-server-variables
            PORT: 8547
        logging:
            driver: "local"
            options:
                max-size: "1000m"
    admin-dashboard:
        networks:
--- a/docker-compose.dev.yml
+++ b/docker-compose.dev.yml
@ -12,7 +12,16 @@ services:
            context: .
            dockerfile: ./Haraka/Dockerfile
-    
+
    llama: 
        extends:
            file: ./docker-compose.base.yml
            service: llama
        build:
            network: host
            context: .
            dockerfile: ./Llama/Dockerfile
    redis:
        ports: 
            - '6310:6379'