Skip to content

Commit 416e798

Browse files
committed
fix secret redaction
1 parent 8c0915f commit 416e798

File tree

8 files changed

+59
-22
lines changed

8 files changed

+59
-22
lines changed

Diff for: src/codegate/config.py

+1
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
"anthropic": "https://api.anthropic.com/v1",
2121
"vllm": "http://localhost:8000", # Base URL without /v1 path
2222
"ollama": "http://localhost:11434", # Default Ollama server URL
23+
"lm_studio": "http://localhost:1234"
2324
}
2425

2526

Diff for: src/codegate/pipeline/base.py

+2-4
Original file line numberDiff line numberDiff line change
@@ -321,8 +321,7 @@ async def process(
321321

322322
class InputPipelineInstance:
323323
def __init__(
324-
self, pipeline_steps: List[PipelineStep], secret_manager: SecretsManager, is_fim: bool
325-
):
324+
self, pipeline_steps: List[PipelineStep], secret_manager: SecretsManager, is_fim: bool):
326325
self.pipeline_steps = pipeline_steps
327326
self.secret_manager = secret_manager
328327
self.is_fim = is_fim
@@ -385,8 +384,7 @@ async def process_request(
385384

386385
class SequentialPipelineProcessor:
387386
def __init__(
388-
self, pipeline_steps: List[PipelineStep], secret_manager: SecretsManager, is_fim: bool
389-
):
387+
self, pipeline_steps: List[PipelineStep], secret_manager: SecretsManager, is_fim: bool):
390388
self.pipeline_steps = pipeline_steps
391389
self.secret_manager = secret_manager
392390
self.is_fim = is_fim

Diff for: src/codegate/pipeline/codegate_context_retriever/codegate.py

+17-3
Original file line numberDiff line numberDiff line change
@@ -91,10 +91,11 @@ async def process(
9191
) # type: ignore
9292
logger.info(f"Found {len(bad_snippet_packages)} bad packages in code snippets.")
9393

94-
# Remove code snippets from the user messages and search for bad packages
94+
# Remove code snippets and file listing from the user messages and search for bad packages
9595
# in the rest of the user query/messsages
9696
user_messages = re.sub(r"```.*?```", "", user_message, flags=re.DOTALL)
9797
user_messages = re.sub(r"⋮...*?⋮...\n\n", "", user_messages, flags=re.DOTALL)
98+
user_messages = re.sub(r"<environment_details>.*?</environment_details>", "", user_messages, flags=re.DOTALL)
9899

99100
# split messages into double newlines, to avoid passing so many content in the search
100101
split_messages = re.split(r'</?task>|(\n\n)', user_messages)
@@ -126,10 +127,23 @@ async def process(
126127
# Make a copy of the request
127128
new_request = request.copy()
128129

129-
# Add the context to the last user message
130130
# Format: "Context: {context_str} \n Query: {last user message content}"
131131
message = new_request["messages"][last_user_idx]
132-
context_msg = f'Context: {context_str} \n\n Query: {message["content"]}' # type: ignore
132+
message_str = str(message["content"]) # type: ignore
133+
# Add the context to the last user message
134+
if message_str.strip().startswith("<task>"):
135+
# formatting of cline
136+
match = re.match(r"(<task>)(.*?)(</task>)(.*)", message_str, re.DOTALL)
137+
if match:
138+
task_start, task_content, task_end, rest_of_message = match.groups()
139+
140+
# Embed the context into the task block
141+
updated_task_content = f"{task_start}Context: {context_str}\nQuery: {task_content.strip()}</details>{task_end}"
142+
143+
# Combine the updated task block with the rest of the message
144+
context_msg = updated_task_content + rest_of_message
145+
else:
146+
context_msg = f'Context: {context_str} \n\n Query: {message_str}' # type: ignore
133147
message["content"] = context_msg
134148

135149
logger.debug("Final context message", context_message=context_msg)

Diff for: src/codegate/pipeline/secrets/secrets.py

+19-6
Original file line numberDiff line numberDiff line change
@@ -451,17 +451,30 @@ async def process_chunk(
451451
):
452452
return [chunk]
453453

454+
is_cline_client = any(
455+
"Cline" in str(message.trigger_string or "") for message in input_context.alerts_raised or []
456+
)
457+
454458
# Check if this is the first chunk (delta role will be present, others will not)
455459
if len(chunk.choices) > 0 and chunk.choices[0].delta.role:
456460
redacted_count = input_context.metadata["redacted_secrets_count"]
457461
secret_text = "secret" if redacted_count == 1 else "secrets"
458462
# Create notification chunk
459-
notification_chunk = self._create_chunk(
460-
chunk,
461-
f"\n🛡️ [CodeGate prevented {redacted_count} {secret_text}]"
462-
f"(http://localhost:9090/?search=codegate-secrets) from being leaked "
463-
f"by redacting them.\n\n",
464-
)
463+
if is_cline_client:
464+
notification_chunk = self._create_chunk(
465+
chunk,
466+
f"<thinking>\n🛡️ [CodeGate prevented {redacted_count} {secret_text}]"
467+
f"(http://localhost:9090/?search=codegate-secrets) from being leaked "
468+
f"by redacting them.</thinking>\n\n",
469+
)
470+
notification_chunk.choices[0].delta.role = "assistant"
471+
else:
472+
notification_chunk = self._create_chunk(
473+
chunk,
474+
f"\n🛡️ [CodeGate prevented {redacted_count} {secret_text}]"
475+
f"(http://localhost:9090/?search=codegate-secrets) from being leaked "
476+
f"by redacting them.\n\n",
477+
)
465478

466479
# Reset the counter
467480
input_context.metadata["redacted_secrets_count"] = 0

Diff for: src/codegate/providers/base.py

+8-8
Original file line numberDiff line numberDiff line change
@@ -199,8 +199,7 @@ async def _cleanup_after_streaming(
199199
context.sensitive.secure_cleanup()
200200

201201
async def complete(
202-
self, data: Dict, api_key: Optional[str], is_fim_request: bool
203-
) -> Union[ModelResponse, AsyncIterator[ModelResponse]]:
202+
self, data: Dict, api_key: Optional[str], is_fim_request: bool) -> Union[ModelResponse, AsyncIterator[ModelResponse]]:
204203
"""
205204
Main completion flow with pipeline integration
206205
@@ -220,20 +219,21 @@ async def complete(
220219
data.get("base_url"),
221220
is_fim_request,
222221
)
223-
if input_pipeline_result.response:
222+
if input_pipeline_result.response and input_pipeline_result.context:
224223
return await self._pipeline_response_formatter.handle_pipeline_response(
225224
input_pipeline_result.response, streaming, context=input_pipeline_result.context
226225
)
227226

228-
provider_request = self._input_normalizer.denormalize(input_pipeline_result.request)
227+
if input_pipeline_result.request:
228+
provider_request = self._input_normalizer.denormalize(input_pipeline_result.request)
229229
if is_fim_request:
230-
provider_request = self._fim_normalizer.denormalize(provider_request)
230+
provider_request = self._fim_normalizer.denormalize(provider_request) # type: ignore
231231

232232
# Execute the completion and translate the response
233233
# This gives us either a single response or a stream of responses
234234
# based on the streaming flag
235235
model_response = await self._completion_handler.execute_completion(
236-
provider_request, api_key=api_key, stream=streaming, is_fim_request=is_fim_request
236+
provider_request, api_key=api_key, stream=streaming, is_fim_request=is_fim_request # type: ignore
237237
)
238238
if not streaming:
239239
normalized_response = self._output_normalizer.normalize(model_response)
@@ -242,9 +242,9 @@ async def complete(
242242
return self._output_normalizer.denormalize(pipeline_output)
243243

244244
pipeline_output_stream = await self._run_output_stream_pipeline(
245-
input_pipeline_result.context, model_response, is_fim_request=is_fim_request
245+
input_pipeline_result.context, model_response, is_fim_request=is_fim_request # type: ignore
246246
)
247-
return self._cleanup_after_streaming(pipeline_output_stream, input_pipeline_result.context)
247+
return self._cleanup_after_streaming(pipeline_output_stream, input_pipeline_result.context) # type: ignore
248248

249249
def get_routes(self) -> APIRouter:
250250
return self.router

Diff for: src/codegate/providers/ollama/completion_handler.py

-1
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,6 @@ async def ollama_stream_generator(
6363
for field in optional_fields:
6464
if field in chunk_dict:
6565
response[field] = chunk_dict[field]
66-
6766
yield f"data: {json.dumps(response)}\n"
6867
except Exception as e:
6968
logger.error(f"Error in stream generator: {str(e)}")

Diff for: src/codegate/providers/ollama/provider.py

+2
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,8 @@ async def show_model(request: Request):
8080
@self.router.post(f"/{self.provider_route_name}/v1/generate")
8181
async def create_completion(request: Request):
8282
body = await request.body()
83+
print("i request")
84+
print(body)
8385
data = json.loads(body)
8486
# `base_url` is used in the providers pipeline to do the packages lookup.
8587
# Force it to be the one that comes in the configuration.

Diff for: src/codegate/providers/openai/provider.py

+10
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import structlog
55
from fastapi import Header, HTTPException, Request
66

7+
from codegate.config import Config
78
from codegate.pipeline.factory import PipelineFactory
89
from codegate.providers.base import BaseProvider
910
from codegate.providers.litellmshim import LiteLLmShim, sse_stream_generator
@@ -16,6 +17,11 @@ def __init__(
1617
pipeline_factory: PipelineFactory,
1718
):
1819
completion_handler = LiteLLmShim(stream_generator=sse_stream_generator)
20+
config = Config.get_config()
21+
if config is not None:
22+
provided_urls = config.provider_urls
23+
self.lm_studio_url = provided_urls.get("lm_studio", "http://localhost:11434/")
24+
1925
super().__init__(
2026
OpenAIInputNormalizer(),
2127
OpenAIOutputNormalizer(),
@@ -47,6 +53,10 @@ async def create_completion(
4753
api_key = authorization.split(" ")[1]
4854
body = await request.body()
4955
data = json.loads(body)
56+
57+
# if model starts with lm_studio, propagate it
58+
if data.get("model", "").startswith("lm_studio"):
59+
data["base_url"] = self.lm_studio_url+"/v1/"
5060
is_fim_request = self._is_fim_request(request, data)
5161
try:
5262
stream = await self.complete(data, api_key, is_fim_request=is_fim_request)

0 commit comments

Comments
 (0)