diff --git a/Llama/Dockerfile.tpl b/Llama/Dockerfile.tpl
index 95d80a689b..475bfe629f 100644
--- a/Llama/Dockerfile.tpl
+++ b/Llama/Dockerfile.tpl
@@ -18,7 +18,7 @@ RUN pip install --no-cache-dir transformers
 # Install acceletate
 RUN pip install accelerate
 
-# Make port 80 available to the world outside this container
+# Make port 8547 available to the world outside this container
 EXPOSE 8547
 
 # Run app.py when the container launches
diff --git a/Llama/Models/README.md b/Llama/Models/README.md
index b6e7402be9..8d1119e141 100644
--- a/Llama/Models/README.md
+++ b/Llama/Models/README.md
@@ -1 +1,14 @@
-Keep all llama models here for docker build.
\ No newline at end of file
+Keep all llama models here for docker build.
+
+# Downloading Model from Hugging Face 
+
+Please make sure you have git lfs installed before cloning the model. 
+
+```bash
+git lfs install
+```
+
+```bash
+# Here we are downloading the Meta-Llama-3-8B-Instruct model
+git clone https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct
+```
diff --git a/Llama/app.py b/Llama/app.py
index 639f376ac6..882bd72a15 100644
--- a/Llama/app.py
+++ b/Llama/app.py
@@ -9,20 +9,17 @@ from pydantic import BaseModel
 class Prompt(BaseModel):
    prompt: str
 
-model_path = "./Models/Llama-2-7b-chat-hf"
+model_id = "meta-llama/Meta-Llama-3-8B-Instruct"
 
-tokenizer = AutoTokenizer.from_pretrained(model_path, local_files_only=True)
 pipeline = transformers.pipeline(
     "text-generation",
-    model=model_path,
-    # torch_dtype=torch.float32, # for CPU
-    torch_dtype=torch.float16, # for GPU
+    model=model_id,
+    model_kwargs={"torch_dtype": torch.bfloat16},
     device_map="auto",
 )
 
 app = FastAPI()
 
-
 @app.post("/prompt/")
 async def create_item(prompt: Prompt):
 
@@ -30,22 +27,29 @@ async def create_item(prompt: Prompt):
     if not prompt:
         return {"error": "Prompt is required"}
 
-    sequences = pipeline(
-        prompt.prompt,
+    messages = [
+        {"role": "system", "content": "You are a pirate chatbot who always responds in pirate speak!"},
+        {"role": "user", "content": "Who are you?"},
+    ]
+
+    terminators = [
+        pipeline.tokenizer.eos_token_id,
+        pipeline.tokenizer.convert_tokens_to_ids("<|eot_id|>")
+    ]
+
+    outputs = pipeline(
+        messages,
+        max_new_tokens=256,
+        eos_token_id=terminators,
         do_sample=True,
-        top_k=10,
-        num_return_sequences=1,
-        eos_token_id=tokenizer.eos_token_id,
-        max_length=200,
+        temperature=0.6,
+        top_p=0.9,
     )
+   
 
-    prompt_response_array = []
-
-    for seq in sequences:
-        print(f"Result: {seq['generated_text']}")
-        prompt_response_array.append(seq["generated_text"])
+    output = outputs[0]["generated_text"][-1]
 
     # return prompt response
-    return {"response": prompt_response_array}
+    return {"response": output}
 
 
diff --git a/docker-compose.base.yml b/docker-compose.base.yml
index c2ba5bdcef..386b5420a7 100644
--- a/docker-compose.base.yml
+++ b/docker-compose.base.yml
@@ -176,7 +176,19 @@ services:
             driver: "local"
             options:
                 max-size: "1000m"
-       
+    
+
+    llama: 
+        networks:
+          - oneuptime
+        restart: always
+        environment:
+            <<: *common-server-variables
+            PORT: 8547
+        logging:
+            driver: "local"
+            options:
+                max-size: "1000m"
 
     admin-dashboard:
         networks:
diff --git a/docker-compose.dev.yml b/docker-compose.dev.yml
index a4948f62df..d6b74510e0 100644
--- a/docker-compose.dev.yml
+++ b/docker-compose.dev.yml
@@ -12,7 +12,16 @@ services:
             context: .
             dockerfile: ./Haraka/Dockerfile
 
-    
+
+    llama: 
+        extends:
+            file: ./docker-compose.base.yml
+            service: llama
+        build:
+            network: host
+            context: .
+            dockerfile: ./Llama/Dockerfile
+
     redis:
         ports: 
             - '6310:6379'