Cline Support

lukehinds · yrobla · commit 0d5796d867ed · 2025-01-13T11:33:26.000+01:00
This should be considered experimental until tester more widely
by the community.

I have it working with Anthropic and Ollama so far.
diff --git a/src/codegate/pipeline/secrets/signatures.py b/src/codegate/pipeline/secrets/signatures.py
@@ -2,7 +2,7 @@
 import re
 from pathlib import Path
 from threading import Lock
-from typing import ClassVar, Dict, List, NamedTuple, Optional
+from typing import ClassVar, Dict, List, NamedTuple, Optional, Union
 
 import structlog
 import yaml
@@ -215,16 +215,26 @@ def _load_signatures(cls) -> None:
             raise
 
     @classmethod
-    def find_in_string(cls, text: str) -> List[Match]:
-        """Search for secrets in the provided string."""
+    def find_in_string(cls, text: Union[str, List[str]]) -> List[Match]:
+        """Search for secrets in the provided string or list of strings."""
         if not text:
             return []
 
         if not cls._yaml_path:
             raise RuntimeError("SecretFinder not initialized.")
 
+        # Convert list to string if necessary (needed for Cline, which sends a list of strings)
+        if isinstance(text, list):
+            text = "\n".join(str(line) for line in text)
+
         matches = []
-        lines = text.splitlines()
+
+        # Split text into lines for processing
+        try:
+            lines = text.splitlines()
+        except Exception as e:
+            logger.warning(f"Error splitting text into lines: {e}")
+            return []
 
         for line_num, line in enumerate(lines, start=1):
             for group in cls._signature_groups:
diff --git a/src/codegate/pipeline/system_prompt/codegate.py b/src/codegate/pipeline/system_prompt/codegate.py
@@ -51,12 +51,20 @@ async def process(
             # Add system message
             context.add_alert(self.name, trigger_string=json.dumps(self._system_message))
             new_request["messages"].insert(0, self._system_message)
-        elif "codegate" not in request_system_message["content"].lower():
+        # Addded Logic for Cline, which sends a list of strings
+        elif (
+            "content" not in request_system_message
+            or not isinstance(request_system_message["content"], str)
+            or "codegate" not in request_system_message["content"].lower()
+        ):
             # Prepend to the system message
+            original_content = request_system_message.get("content", "")
+            if not isinstance(original_content, str):
+                original_content = json.dumps(original_content)
             prepended_message = (
                 self._system_message["content"]
                 + "\n Here are additional instructions. \n "
-                + request_system_message["content"]
+                + original_content
             )
             context.add_alert(self.name, trigger_string=prepended_message)
             request_system_message["content"] = prepended_message
diff --git a/src/codegate/pipeline/systemmsg.py b/src/codegate/pipeline/systemmsg.py
@@ -16,6 +16,7 @@ def get_existing_system_message(request: ChatCompletionRequest) -> Optional[dict
     Returns:
         The existing system message if found, otherwise None.
     """
+
     for message in request.get("messages", []):
         if message["role"] == "system":
             return message
@@ -50,8 +51,18 @@ def add_or_update_system_message(
         context.add_alert("add-system-message", trigger_string=json.dumps(system_message))
         new_request["messages"].insert(0, system_message)
     else:
+        # Handle both string and list content types (needed for Cline (sends list)
+        existing_content = request_system_message["content"]
+        new_content = system_message["content"]
+
+        # Convert list to string if necessary (needed for Cline (sends list)
+        if isinstance(existing_content, list):
+            existing_content = "\n".join(str(item) for item in existing_content)
+        if isinstance(new_content, list):
+            new_content = "\n".join(str(item) for item in new_content)
+
         # Update existing system message
-        updated_content = request_system_message["content"] + "\n\n" + system_message["content"]
+        updated_content = existing_content + "\n\n" + new_content
         context.add_alert("update-system-message", trigger_string=updated_content)
         request_system_message["content"] = updated_content
 
diff --git a/src/codegate/providers/anthropic/provider.py b/src/codegate/providers/anthropic/provider.py
@@ -40,9 +40,14 @@ def _setup_routes(self):
         Sets up the /messages route for the provider as expected by the Anthropic
         API. Extracts the API key from the "x-api-key" header and passes it to the
         completion handler.
+
+        There are two routes:
+        - /messages: This is the route that is used by the Anthropic API with Continue.dev
+        - /v1/messages: This is the route that is used by the Anthropic API with Cline
         """
 
         @self.router.post(f"/{self.provider_route_name}/messages")
+        @self.router.post(f"/{self.provider_route_name}/v1/messages")
         async def create_message(
             request: Request,
             x_api_key: str = Header(None),
diff --git a/src/codegate/providers/ollama/completion_handler.py b/src/codegate/providers/ollama/completion_handler.py
@@ -1,3 +1,4 @@
+import json
 from typing import AsyncIterator, Optional, Union
 
 import structlog
@@ -11,32 +12,90 @@
 
 
 async def ollama_stream_generator(
-    stream: AsyncIterator[ChatResponse],
+    stream: AsyncIterator[ChatResponse], is_cline_client: bool
 ) -> AsyncIterator[str]:
     """OpenAI-style SSE format"""
     try:
         async for chunk in stream:
             try:
-                yield f"{chunk.model_dump_json()}\n\n"
+                # TODO We should wire in the client info so we can respond with
+                # the correct format and start to handle multiple clients
+                # in a more robust way.
+                if not is_cline_client:
+                    yield f"{chunk.model_dump_json()}\n\n"
+                else:
+                    # First get the raw dict from the chunk
+                    chunk_dict = chunk.model_dump()
+                    # Create response dictionary in OpenAI-like format
+                    response = {
+                        "id": f"chatcmpl-{chunk_dict.get('created_at', '')}",
+                        "object": "chat.completion.chunk",
+                        "created": chunk_dict.get("created_at"),
+                        "model": chunk_dict.get("model"),
+                        "choices": [
+                            {
+                                "index": 0,
+                                "delta": {
+                                    "content": chunk_dict.get("message", {}).get("content", ""),
+                                    "role": chunk_dict.get("message", {}).get("role", "assistant"),
+                                },
+                                "finish_reason": (
+                                    chunk_dict.get("done_reason")
+                                    if chunk_dict.get("done", False)
+                                    else None
+                                ),
+                            }
+                        ],
+                    }
+                    # Preserve existing type or add default if missing
+                    response["type"] = chunk_dict.get("type", "stream")
+
+                    # Add optional fields that might be present in the final message
+                    optional_fields = [
+                        "total_duration",
+                        "load_duration",
+                        "prompt_eval_count",
+                        "prompt_eval_duration",
+                        "eval_count",
+                        "eval_duration",
+                    ]
+                    for field in optional_fields:
+                        if field in chunk_dict:
+                            response[field] = chunk_dict[field]
+
+                    yield f"data: {json.dumps(response)}\n\n"
             except Exception as e:
-                yield f"{str(e)}\n\n"
+                logger.error(f"Error in stream generator: {str(e)}")
+                yield f"data: {json.dumps({'error': str(e), 'type': 'error', 'choices': []})}\n\n"
     except Exception as e:
-        yield f"{str(e)}\n\n"
+        logger.error(f"Stream error: {str(e)}")
+        yield f"data: {json.dumps({'error': str(e), 'type': 'error', 'choices': []})}\n\n"
 
 
 class OllamaShim(BaseCompletionHandler):
 
     def __init__(self, base_url):
         self.client = AsyncClient(host=base_url, timeout=300)
+        self.is_cline_client = False
 
     async def execute_completion(
         self,
         request: ChatCompletionRequest,
         api_key: Optional[str],
         stream: bool = False,
         is_fim_request: bool = False,
+        is_cline_client: bool = False,
     ) -> Union[ChatResponse, GenerateResponse]:
         """Stream response directly from Ollama API."""
+
+        # TODO: I don't like this, but it's a quick fix for now until we start
+        # passing through the client info so we can respond with the correct
+        # format.
+        # Determine if the client is a Cline client
+        self.is_cline_client = any(
+            "Cline" in message["content"] for message in request.get("messages", [])
+        )
+
         if is_fim_request:
             prompt = request["messages"][0]["content"]
             response = await self.client.generate(
@@ -57,7 +116,7 @@ def _create_streaming_response(self, stream: AsyncIterator[ChatResponse]) -> Str
         is the format that FastAPI expects for streaming responses.
         """
         return StreamingResponse(
-            ollama_stream_generator(stream),
+            ollama_stream_generator(stream, self.is_cline_client),
             media_type="application/x-ndjson",
             headers={
                 "Cache-Control": "no-cache",
diff --git a/src/codegate/providers/ollama/provider.py b/src/codegate/providers/ollama/provider.py
@@ -72,12 +72,42 @@ async def show_model(request: Request):
                 )
                 return response.json()
 
+        @self.router.get(f"/{self.provider_route_name}/api/tags")
+        async def get_tags(request: Request):
+            """
+            Special route for /api/tags that responds outside of the pipeline
+            Tags are used to get the list of models
+            https://github.com/ollama/ollama/blob/main/docs/api.md#list-local-models
+            """
+            async with httpx.AsyncClient() as client:
+                response = await client.get(f"{self.base_url}/api/tags")
+                return response.json()
+
+        @self.router.post(f"/{self.provider_route_name}/api/show")
+        async def show_model(request: Request):
+            """
+            route for /api/show that responds outside of the pipeline
+            /api/show displays model is used to get the model information
+            https://github.com/ollama/ollama/blob/main/docs/api.md#show-model-information
+            """
+            body = await request.body()
+            async with httpx.AsyncClient() as client:
+                response = await client.post(
+                    f"{self.base_url}/api/show",
+                    content=body,
+                    headers={"Content-Type": "application/json"},
+                )
+                return response.json()
+
         # Native Ollama API routes
         @self.router.post(f"/{self.provider_route_name}/api/chat")
         @self.router.post(f"/{self.provider_route_name}/api/generate")
         # OpenAI-compatible routes for backward compatibility
         @self.router.post(f"/{self.provider_route_name}/chat/completions")
         @self.router.post(f"/{self.provider_route_name}/completions")
+        # Cline API routes
+        @self.router.post(f"/{self.provider_route_name}/v1/chat/completions")
+        @self.router.post(f"/{self.provider_route_name}/v1/generate")
         async def create_completion(request: Request):
             body = await request.body()
             data = json.loads(body)
@@ -93,7 +123,7 @@ async def create_completion(request: Request):
                 logger.error("Error in OllamaProvider completion", error=str(e))
                 raise HTTPException(status_code=503, detail="Ollama service is unavailable")
             except Exception as e:
-                #  check if we have an status code there
+                #  check if we have an status code there
                 if hasattr(e, "status_code"):
                     # log the exception
                     logger = structlog.get_logger("codegate")