Skip to content

Commit

Permalink
Merge pull request #36 from montasaurus/adam/24/01/05/add-sentry
Browse files Browse the repository at this point in the history
add sentry to vllm container
  • Loading branch information
alexanderatallah authored Jan 5, 2024
2 parents 5d419dc + 437cff3 commit 7939545
Show file tree
Hide file tree
Showing 2 changed files with 40 additions and 22 deletions.
6 changes: 6 additions & 0 deletions modal/runner/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,12 @@ Before you start working with the OpenRouter Runner, it's crucial to set up your
```shell
modal secret create ext-api-key RUNNER_API_KEY=<generate a random key>
```
- **Sentry Configuration**:
Create another secret group for the Sentry error tracking storage. Replace `<optional SENTRY_DSN>` with your DSN from sentry.io or leave it blank to disable Sentry (e.g. `SENTRY_DSN=`). You can also add an environment by adding `SENTRY_ENVIRONMENT=<environment name>` to the command.
```shell
modal secret create sentry SENTRY_DSN=<optional SENTRY_DSN>
```
Now if you go to your dashboard on [Modal](https://modal.com/) and click on the secrets tab you should be able to see your keys deployed there.
Expand Down
56 changes: 34 additions & 22 deletions modal/runner/containers/vllm_unified.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
from os import environ
from typing import Optional

import modal.gpu
from modal import Image
from modal import Image, Secret

from runner.engines.vllm import VllmEngine, VllmParams
from runner.shared.common import stub
Expand All @@ -10,9 +11,7 @@
_vllm_image = Image.from_registry(
"nvidia/cuda:12.1.0-base-ubuntu22.04",
add_python="3.10",
).pip_install(
"vllm==0.2.6",
)
).pip_install("vllm==0.2.6", "sentry-sdk==1.39.1")


def _make_container(
Expand All @@ -28,27 +27,39 @@ def __init__(
model_path: str,
max_model_len: Optional[int] = None,
):
if num_gpus > 1:
# Patch issue from https://github.com/vllm-project/vllm/issues/1116
import ray

ray.shutdown()
ray.init(num_gpus=num_gpus, ignore_reinit_error=True)

super().__init__(
VllmParams(
model=model_path,
tensor_parallel_size=num_gpus,
max_model_len=max_model_len,
)
import sentry_sdk

sentry_sdk.init(
dsn=environ.get("SENTRY_DSN"),
environment=environ.get("SENTRY_ENVIRONMENT") or "development",
)

# Performance improvement from https://github.com/vllm-project/vllm/issues/2073#issuecomment-1853422529
if num_gpus > 1:
import subprocess
try:
if num_gpus > 1:
# Patch issue from https://github.com/vllm-project/vllm/issues/1116
import ray

ray.shutdown()
ray.init(num_gpus=num_gpus, ignore_reinit_error=True)

super().__init__(
VllmParams(
model=model_path,
tensor_parallel_size=num_gpus,
max_model_len=max_model_len,
)
)

# Performance improvement from https://github.com/vllm-project/vllm/issues/2073#issuecomment-1853422529
if num_gpus > 1:
import subprocess

RAY_CORE_PIN_OVERRIDE = "cpuid=0 ; for pid in $(ps xo '%p %c' | grep ray:: | awk '{print $1;}') ; do taskset -cp $cpuid $pid ; cpuid=$(($cpuid + 1)) ; done"
subprocess.call(RAY_CORE_PIN_OVERRIDE, shell=True)
RAY_CORE_PIN_OVERRIDE = "cpuid=0 ; for pid in $(ps xo '%p %c' | grep ray:: | awk '{print $1;}') ; do taskset -cp $cpuid $pid ; cpuid=$(($cpuid + 1)) ; done"
subprocess.call(RAY_CORE_PIN_OVERRIDE, shell=True)
except Exception as e:
# We have to manually capture and re-raise because Modal catches the exception upstream
sentry_sdk.capture_exception(e)
raise e

_VllmContainer.__name__ = name

Expand All @@ -59,6 +70,7 @@ def __init__(
allow_concurrent_inputs=concurrent_inputs,
container_idle_timeout=20 * 60,
timeout=10 * 60,
secret=Secret.from_name("sentry"),
)
return wrap(_VllmContainer)

Expand Down

0 comments on commit 7939545

Please sign in to comment.