From 26bb6f1e74517cf0c83b3a6daf8ceecd94d62427 Mon Sep 17 00:00:00 2001
From: Simon Larsen <simonlarsen@oneuptime.com>
Date: Tue, 18 Jun 2024 18:42:11 +0100
Subject: [PATCH] refactor: Update Dockerfile.tpl to expose port 8547 instead
 of port 80

This commit modifies the Dockerfile.tpl file to update the EXPOSE directive. The port number is changed from 80 to 8547 to align with the port used by the Llama application. This change ensures that the Llama application is accessible from outside the container on the correct port.
---
 Llama/Dockerfile.tpl    |  2 +-
 Llama/Models/README.md  | 15 ++++++++++++++-
 Llama/app.py            | 40 ++++++++++++++++++++++------------------
 docker-compose.base.yml | 14 +++++++++++++-
 docker-compose.dev.yml  | 11 ++++++++++-
 5 files changed, 60 insertions(+), 22 deletions(-)

diff --git a/Llama/Dockerfile.tpl b/Llama/Dockerfile.tpl
index 95d80a689b..475bfe629f 100644
--- a/Llama/Dockerfile.tpl
+++ b/Llama/Dockerfile.tpl
@@ -18,7 +18,7 @@ RUN pip install --no-cache-dir transformers
 # Install acceletate
 RUN pip install accelerate
 
-# Make port 80 available to the world outside this container
+# Make port 8547 available to the world outside this container
 EXPOSE 8547
 
 # Run app.py when the container launches
diff --git a/Llama/Models/README.md b/Llama/Models/README.md
index b6e7402be9..8d1119e141 100644
--- a/Llama/Models/README.md
+++ b/Llama/Models/README.md
@@ -1 +1,14 @@
-Keep all llama models here for docker build.
\ No newline at end of file
+Keep all llama models here for docker build.
+
+# Downloading Model from Hugging Face 
+
+Please make sure you have git lfs installed before cloning the model. 
+
+```bash
+git lfs install
+```
+
+```bash
+# Here we are downloading the Meta-Llama-3-8B-Instruct model
+git clone https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct
+```
diff --git a/Llama/app.py b/Llama/app.py
index 639f376ac6..882bd72a15 100644
--- a/Llama/app.py
+++ b/Llama/app.py
@@ -9,20 +9,17 @@ from pydantic import BaseModel
 class Prompt(BaseModel):
    prompt: str
 
-model_path = "./Models/Llama-2-7b-chat-hf"
+model_id = "meta-llama/Meta-Llama-3-8B-Instruct"
 
-tokenizer = AutoTokenizer.from_pretrained(model_path, local_files_only=True)
 pipeline = transformers.pipeline(
     "text-generation",
-    model=model_path,
-    # torch_dtype=torch.float32, # for CPU
-    torch_dtype=torch.float16, # for GPU
+    model=model_id,
+    model_kwargs={"torch_dtype": torch.bfloat16},
     device_map="auto",
 )
 
 app = FastAPI()
 
-
 @app.post("/prompt/")
 async def create_item(prompt: Prompt):
 
@@ -30,22 +27,29 @@ async def create_item(prompt: Prompt):
     if not prompt:
         return {"error": "Prompt is required"}
 
-    sequences = pipeline(
-        prompt.prompt,
+    messages = [
+        {"role": "system", "content": "You are a pirate chatbot who always responds in pirate speak!"},
+        {"role": "user", "content": "Who are you?"},
+    ]
+
+    terminators = [
+        pipeline.tokenizer.eos_token_id,
+        pipeline.tokenizer.convert_tokens_to_ids("<|eot_id|>")
+    ]
+
+    outputs = pipeline(
+        messages,
+        max_new_tokens=256,
+        eos_token_id=terminators,
         do_sample=True,
-        top_k=10,
-        num_return_sequences=1,
-        eos_token_id=tokenizer.eos_token_id,
-        max_length=200,
+        temperature=0.6,
+        top_p=0.9,
     )
+   
 
-    prompt_response_array = []
-
-    for seq in sequences:
-        print(f"Result: {seq['generated_text']}")
-        prompt_response_array.append(seq["generated_text"])
+    output = outputs[0]["generated_text"][-1]
 
     # return prompt response
-    return {"response": prompt_response_array}
+    return {"response": output}
 
 
diff --git a/docker-compose.base.yml b/docker-compose.base.yml
index c2ba5bdcef..386b5420a7 100644
--- a/docker-compose.base.yml
+++ b/docker-compose.base.yml
@@ -176,7 +176,19 @@ services:
             driver: "local"
             options:
                 max-size: "1000m"
-       
+    
+
+    llama: 
+        networks:
+          - oneuptime
+        restart: always
+        environment:
+            <<: *common-server-variables
+            PORT: 8547
+        logging:
+            driver: "local"
+            options:
+                max-size: "1000m"
 
     admin-dashboard:
         networks:
diff --git a/docker-compose.dev.yml b/docker-compose.dev.yml
index a4948f62df..d6b74510e0 100644
--- a/docker-compose.dev.yml
+++ b/docker-compose.dev.yml
@@ -12,7 +12,16 @@ services:
             context: .
             dockerfile: ./Haraka/Dockerfile
 
-    
+
+    llama: 
+        extends:
+            file: ./docker-compose.base.yml
+            service: llama
+        build:
+            network: host
+            context: .
+            dockerfile: ./Llama/Dockerfile
+
     redis:
         ports: 
             - '6310:6379'