mirror of
https://github.com/OneUptime/oneuptime
synced 2024-11-21 22:59:07 +00:00
refactor: Update Dockerfile.tpl to expose port 8547 instead of port 80
This commit modifies the Dockerfile.tpl file to update the EXPOSE directive. The port number is changed from 80 to 8547 to align with the port used by the Llama application. This change ensures that the Llama application is accessible from outside the container on the correct port.
This commit is contained in:
parent
20db81a5f6
commit
26bb6f1e74
@ -18,7 +18,7 @@ RUN pip install --no-cache-dir transformers
|
|||||||
# Install acceletate
|
# Install acceletate
|
||||||
RUN pip install accelerate
|
RUN pip install accelerate
|
||||||
|
|
||||||
# Make port 80 available to the world outside this container
|
# Make port 8547 available to the world outside this container
|
||||||
EXPOSE 8547
|
EXPOSE 8547
|
||||||
|
|
||||||
# Run app.py when the container launches
|
# Run app.py when the container launches
|
||||||
|
@ -1 +1,14 @@
|
|||||||
Keep all llama models here for docker build.
|
Keep all llama models here for docker build.
|
||||||
|
|
||||||
|
# Downloading Model from Hugging Face
|
||||||
|
|
||||||
|
Please make sure you have git lfs installed before cloning the model.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
git lfs install
|
||||||
|
```
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Here we are downloading the Meta-Llama-3-8B-Instruct model
|
||||||
|
git clone https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct
|
||||||
|
```
|
||||||
|
40
Llama/app.py
40
Llama/app.py
@ -9,20 +9,17 @@ from pydantic import BaseModel
|
|||||||
class Prompt(BaseModel):
|
class Prompt(BaseModel):
|
||||||
prompt: str
|
prompt: str
|
||||||
|
|
||||||
model_path = "./Models/Llama-2-7b-chat-hf"
|
model_id = "meta-llama/Meta-Llama-3-8B-Instruct"
|
||||||
|
|
||||||
tokenizer = AutoTokenizer.from_pretrained(model_path, local_files_only=True)
|
|
||||||
pipeline = transformers.pipeline(
|
pipeline = transformers.pipeline(
|
||||||
"text-generation",
|
"text-generation",
|
||||||
model=model_path,
|
model=model_id,
|
||||||
# torch_dtype=torch.float32, # for CPU
|
model_kwargs={"torch_dtype": torch.bfloat16},
|
||||||
torch_dtype=torch.float16, # for GPU
|
|
||||||
device_map="auto",
|
device_map="auto",
|
||||||
)
|
)
|
||||||
|
|
||||||
app = FastAPI()
|
app = FastAPI()
|
||||||
|
|
||||||
|
|
||||||
@app.post("/prompt/")
|
@app.post("/prompt/")
|
||||||
async def create_item(prompt: Prompt):
|
async def create_item(prompt: Prompt):
|
||||||
|
|
||||||
@ -30,22 +27,29 @@ async def create_item(prompt: Prompt):
|
|||||||
if not prompt:
|
if not prompt:
|
||||||
return {"error": "Prompt is required"}
|
return {"error": "Prompt is required"}
|
||||||
|
|
||||||
sequences = pipeline(
|
messages = [
|
||||||
prompt.prompt,
|
{"role": "system", "content": "You are a pirate chatbot who always responds in pirate speak!"},
|
||||||
|
{"role": "user", "content": "Who are you?"},
|
||||||
|
]
|
||||||
|
|
||||||
|
terminators = [
|
||||||
|
pipeline.tokenizer.eos_token_id,
|
||||||
|
pipeline.tokenizer.convert_tokens_to_ids("<|eot_id|>")
|
||||||
|
]
|
||||||
|
|
||||||
|
outputs = pipeline(
|
||||||
|
messages,
|
||||||
|
max_new_tokens=256,
|
||||||
|
eos_token_id=terminators,
|
||||||
do_sample=True,
|
do_sample=True,
|
||||||
top_k=10,
|
temperature=0.6,
|
||||||
num_return_sequences=1,
|
top_p=0.9,
|
||||||
eos_token_id=tokenizer.eos_token_id,
|
|
||||||
max_length=200,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
prompt_response_array = []
|
output = outputs[0]["generated_text"][-1]
|
||||||
|
|
||||||
for seq in sequences:
|
|
||||||
print(f"Result: {seq['generated_text']}")
|
|
||||||
prompt_response_array.append(seq["generated_text"])
|
|
||||||
|
|
||||||
# return prompt response
|
# return prompt response
|
||||||
return {"response": prompt_response_array}
|
return {"response": output}
|
||||||
|
|
||||||
|
|
||||||
|
@ -176,7 +176,19 @@ services:
|
|||||||
driver: "local"
|
driver: "local"
|
||||||
options:
|
options:
|
||||||
max-size: "1000m"
|
max-size: "1000m"
|
||||||
|
|
||||||
|
|
||||||
|
llama:
|
||||||
|
networks:
|
||||||
|
- oneuptime
|
||||||
|
restart: always
|
||||||
|
environment:
|
||||||
|
<<: *common-server-variables
|
||||||
|
PORT: 8547
|
||||||
|
logging:
|
||||||
|
driver: "local"
|
||||||
|
options:
|
||||||
|
max-size: "1000m"
|
||||||
|
|
||||||
admin-dashboard:
|
admin-dashboard:
|
||||||
networks:
|
networks:
|
||||||
|
@ -12,7 +12,16 @@ services:
|
|||||||
context: .
|
context: .
|
||||||
dockerfile: ./Haraka/Dockerfile
|
dockerfile: ./Haraka/Dockerfile
|
||||||
|
|
||||||
|
|
||||||
|
llama:
|
||||||
|
extends:
|
||||||
|
file: ./docker-compose.base.yml
|
||||||
|
service: llama
|
||||||
|
build:
|
||||||
|
network: host
|
||||||
|
context: .
|
||||||
|
dockerfile: ./Llama/Dockerfile
|
||||||
|
|
||||||
redis:
|
redis:
|
||||||
ports:
|
ports:
|
||||||
- '6310:6379'
|
- '6310:6379'
|
||||||
|
Loading…
Reference in New Issue
Block a user