mirror of
https://github.com/TabbyML/tabby
synced 2024-11-22 00:08:06 +00:00
121 lines
3.4 KiB
Python
Vendored
121 lines
3.4 KiB
Python
Vendored
from pathlib import Path
|
|
|
|
import modal
|
|
from modal import Image, Mount, Secret, Stub, asgi_app, gpu, method
|
|
|
|
GPU_CONFIG = gpu.T4()
|
|
MODEL_ID = "TabbyML/StarCoder-1B"
|
|
LAUNCH_FLAGS = ["serve", "--model", MODEL_ID, "--port", "8000", "--device", "cuda"]
|
|
|
|
|
|
def download_model():
|
|
import subprocess
|
|
|
|
subprocess.run(
|
|
[
|
|
"/opt/tabby/bin/tabby",
|
|
"download",
|
|
"--model",
|
|
MODEL_ID,
|
|
]
|
|
)
|
|
|
|
|
|
image = (
|
|
Image.from_registry(
|
|
"tabbyml/tabby@sha256:64d71ec4c7d9ae7269e6301ad4106baad70ee997408691a6af17d7186283a856",
|
|
add_python="3.11",
|
|
)
|
|
.dockerfile_commands("ENTRYPOINT []")
|
|
.run_function(download_model)
|
|
.pip_install(
|
|
"git+https://github.com/TabbyML/tabby.git#egg=tabby-python-client&subdirectory=experimental/eval/tabby-python-client"
|
|
)
|
|
)
|
|
|
|
stub = Stub("tabby-" + MODEL_ID.split("/")[-1], image=image)
|
|
|
|
|
|
@stub.cls(
|
|
gpu=GPU_CONFIG,
|
|
allow_concurrent_inputs=10,
|
|
container_idle_timeout=60 * 10,
|
|
timeout=360,
|
|
)
|
|
class Model:
|
|
def __enter__(self):
|
|
import socket
|
|
import subprocess
|
|
import time
|
|
|
|
from tabby_python_client import Client
|
|
|
|
self.launcher = subprocess.Popen(["/opt/tabby/bin/tabby"] + LAUNCH_FLAGS)
|
|
self.client = Client("http://127.0.0.1:8000")
|
|
|
|
# Poll until webserver at 127.0.0.1:8000 accepts connections before running inputs.
|
|
def webserver_ready():
|
|
try:
|
|
socket.create_connection(("127.0.0.1", 8000), timeout=1).close()
|
|
return True
|
|
except (socket.timeout, ConnectionRefusedError):
|
|
# Check if launcher webserving process has exited.
|
|
# If so, a connection can never be made.
|
|
retcode = self.launcher.poll()
|
|
if retcode is not None:
|
|
raise RuntimeError(
|
|
f"launcher exited unexpectedly with code {retcode}"
|
|
)
|
|
return False
|
|
|
|
while not webserver_ready():
|
|
time.sleep(1.0)
|
|
|
|
print("Tabby server ready!")
|
|
|
|
def __exit__(self, _exc_type, _exc_value, _traceback):
|
|
self.launcher.terminate()
|
|
|
|
@method()
|
|
async def health(self):
|
|
from tabby_python_client.api.v1 import health
|
|
|
|
resp = await health.asyncio(client=self.client)
|
|
return resp.to_dict()
|
|
|
|
@method()
|
|
async def complete(self, language: str, prompt: str):
|
|
from tabby_python_client.api.v1 import completion
|
|
from tabby_python_client.models import (
|
|
CompletionRequest,
|
|
DebugOptions,
|
|
CompletionResponse,
|
|
Segments,
|
|
)
|
|
|
|
request = CompletionRequest(
|
|
language=language, debug_options=DebugOptions(raw_prompt=prompt)
|
|
)
|
|
resp: CompletionResponse = await completion.asyncio(
|
|
client=self.client, json_body=request
|
|
)
|
|
return resp.choices[0].text
|
|
|
|
|
|
@stub.local_entrypoint()
|
|
def main():
|
|
import json
|
|
|
|
model = Model()
|
|
print(model.health.remote())
|
|
|
|
with open("./output.jsonl", "w") as fout:
|
|
with open("./sample.jsonl") as fin:
|
|
for line in fin:
|
|
x = json.loads(line)
|
|
prompt = x["crossfile_context"]["text"] + x["prompt"]
|
|
label = x["groundtruth"]
|
|
prediction = model.complete.remote("python", prompt)
|
|
|
|
json.dump(dict(prompt=prompt, label=label, prediction=prediction), fout)
|