camel-ai
diff --git a/‎camel/environments/base.py
+16-8 b/‎camel/environments/base.py
+16-8
diff --git a/‎camel/extractors/__init__.py
+2-2 b/‎camel/extractors/__init__.py
+2-2
diff --git a/‎camel/extractors/base.py
+86-64 b/‎camel/extractors/base.py
+86-64
@@ -151,20 +151,26 @@ def __init__(
         r"""Initialize the environment.
 
         Args:
-            dataset: Dataset to sample questions from.
-            verifier: Verifier to check responses.
-            extractor: Extractor to process LLM responses.
-            max_steps: Maximum steps per episode.
-            teacher_agent: Optional agent for reward shaping and hints
-            curriculum_config: Configuration for curriculum learning including:
+            dataset (BaseDataset): Dataset to sample questions from.
+            verifier (BaseVerifier): Verifier to check responses.
+            extractor (BaseExtractor): Extractor to process LLM responses.
+            max_steps (Optional[int]): Maximum steps per episode. (default:
+            :obj:`None`)
+            teacher_agent (Optional[ChatAgent]): Optional agent for reward
+                shaping and hints. (default: :obj:`None`)
+            curriculum_config (Optional[Dict[str, Any]]): Configuration for
+                curriculum learning including:
                 - difficulty_levels: List of available difficulty levels
                 - promotion_threshold: Score needed to advance
                 - demotion_threshold: Score triggering level decrease
                 - min_questions_per_level: Questions before promotion
-            practice_env_config: Configuration for practice environments:
+                (default: :obj:`None`)
+            practice_env_config (Optional[Dict[str, Any]]): Configuration for
+                practice environments:
                 - max_practice_envs: Maximum concurrent environments
                 - difficulty_range: Allowed difficulty variation
                 - focus_areas: Specific skills to practice
+                (default: :obj:`None`)
             **kwargs: Additional environment parameters.
         """
         self.dataset = dataset
@@ -289,7 +295,9 @@ async def step(self, action: Action) -> StepResult:
         # extract verifiable part from llm response
         extraction_result = await self.extractor.extract(action.llm_response)
 
-        # TODO: extract executable llm response specifically
+        # Ensure extraction_result is a string
+        if extraction_result is None:
+            extraction_result = ""
 
         # verify the extracted
         verification_result = await self.verifier.verify(
 
@@ -11,6 +11,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
-from .base import BaseExtractor
+from .base import BaseExtractor, BaseExtractorStrategy
 
-__all__ = ["BaseExtractor"]
+__all__ = ["BaseExtractor", "BaseExtractorStrategy"]
@@ -12,28 +12,47 @@
 # limitations under the License.
 # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
 
+import asyncio
 from abc import ABC, abstractmethod
 from types import TracebackType
-from typing import Any, Dict, Optional, Type
-
-from typing_extensions import Self
+from typing import Any, Dict, List, Optional, Type
 
 from camel.logger import get_logger
 from camel.utils import BatchProcessor
 
 logger = get_logger(__name__)
 
 
-class BaseExtractor(ABC):
-    r"""Base class for all response extractors.
+class BaseExtractorStrategy(ABC):
+    r"""Abstract base class for extraction strategies."""
+
+    @abstractmethod
+    async def extract(self, text: str) -> Optional[str]:
+        r"""Asynchronously extracts relevant parts from text.
+
+        Args:
+            text (str): The input text to process.
+
+        Returns:
+            Optional[str]: Extracted str if successful, otherwise None.
+        """
+        pass
+
+
+class BaseExtractor:
+    r"""Base class for response extractors with a fixed strategy pipeline.
 
-    An extractor takes the response and extracts the relevant parts,
-    converting them into a format that the verifier can handle.
-    Implements async context manager protocol for proper resource management.
+    This extractor:
+    - Uses a **fixed multi-stage pipeline** of extraction strategies.
+    - Tries **each strategy in order** within a stage until one succeeds.
+    - Feeds the **output of one stage into the next** for processing.
+    - Supports **async execution** for efficient processing.
+    - Provides **batch processing and resource monitoring** options.
     """
 
     def __init__(
         self,
+        pipeline: List[List[BaseExtractorStrategy]],
         cache_templates: bool = True,
         max_cache_size: int = 1000,
         extraction_timeout: float = 30.0,
@@ -43,9 +62,12 @@ def __init__(
         memory_threshold: float = 85.0,
         **kwargs,
     ):
-        r"""Initialize the extractor.
+        r"""Initialize the extractor with a multi-stage strategy pipeline.
 
         Args:
+            pipeline (List[List[BaseExtractorStrategy]]):
+                A fixed list of lists where each list represents a stage
+                containing extractor strategies executed in order.
             cache_templates (bool): Whether to cache extraction templates.
                 (default: :obj:`True`)
             max_cache_size (int): Maximum number of templates to cache.
@@ -61,11 +83,8 @@ def __init__(
             memory_threshold (float): Memory usage percentage threshold for
                 scaling down. (default: :obj:`85.0`)
             **kwargs: Additional extractor parameters.
-
-        Raises:
-            ValueError: If invalid parameter values are provided
         """
-        # Store all parameters in metadata dict for compatibility
+
         self._metadata = {
             'cache_templates': cache_templates,
             'max_cache_size': max_cache_size,
@@ -81,14 +100,7 @@ def __init__(
         self._cache: Dict[str, Any] = {}
         self._batch_processor: Optional[BatchProcessor] = None
 
-        # Store configuration parameters
-        self._cache_templates = cache_templates
-        self._max_cache_size = max_cache_size
-        self._extraction_timeout = extraction_timeout
-        self._batch_size = batch_size
-        self._monitoring_interval = monitoring_interval
-        self._cpu_threshold = cpu_threshold
-        self._memory_threshold = memory_threshold
+        self._pipeline = pipeline
 
     async def setup(self) -> None:
         r"""Set up the extractor with necessary resources.
@@ -106,17 +118,15 @@ async def setup(self) -> None:
             return
 
         try:
-            # Initialize template cache if enabled
-            if self._cache_templates:
+            if self._metadata["cache_templates"]:
                 self._template_cache: Dict[str, Any] = {}
 
-            # Set up batch processing if needed
-            if self._batch_size > 1:
+            if self._metadata["batch_size"] > 1:
                 self._batch_processor = BatchProcessor(
-                    initial_batch_size=self._batch_size,
-                    monitoring_interval=self._monitoring_interval,
-                    cpu_threshold=self._cpu_threshold,
-                    memory_threshold=self._memory_threshold,
+                    initial_batch_size=self._metadata["batch_size"],
+                    monitoring_interval=self._metadata["monitoring_interval"],
+                    cpu_threshold=self._metadata["cpu_threshold"],
+                    memory_threshold=self._metadata["memory_threshold"],
                 )
 
             self._is_setup = True
@@ -171,13 +181,6 @@ async def cleanup(self) -> None:
                     )
 
             # Preserve init config in metadata
-            self._metadata = {
-                'cache_templates': self._cache_templates,
-                'max_cache_size': self._max_cache_size,
-                'extraction_timeout': self._extraction_timeout,
-                'batch_size': self._batch_size,
-            }
-
             if not errors:
                 logger.info(
                     f"{self.__class__.__name__} cleaned up successfully"
@@ -187,23 +190,19 @@ async def cleanup(self) -> None:
             errors.append(f"Unexpected error during cleanup: {e}")
 
         finally:
-            # Always mark as uninitialized, even if cleanup fails
             self._is_setup = False
             self._batch_processor = None
 
         if errors:
-            error_msg = (
-                f"Errors during {self.__class__.__name__} cleanup: "
-                f"{'; '.join(errors)}"
-            )
+            error_msg = f"Errors during cleanup: {'; '.join(errors)}"
             logger.error(error_msg)
             raise RuntimeError(error_msg)
 
-    async def __aenter__(self) -> Self:
+    async def __aenter__(self) -> "BaseExtractor":
         r"""Async context manager entry.
 
         Returns:
-            Self reference for context manager usage.
+            BaseExtractor: The initialized extractor instance.
         """
         await self.setup()
         return self
@@ -226,38 +225,61 @@ async def __aexit__(
         """
         await self.cleanup()
 
-    @abstractmethod
-    async def extract(
-        self, response: str, context: Optional[Dict[str, Any]] = None
-    ) -> str:
-        r"""Extract relevant parts from a response.
-
-        Extracts:
-        1. Final answer or output
-        2. Chain of thought reasoning steps
-        3. Difficulty assessment
+    async def extract(self, response: str) -> Optional[str]:
+        r"""Extracts a normalized, comparable part of the LLM response
+        using the fixed multi-stage strategy pipeline.
 
         Args:
-            response (str): Raw response from agent generation.
-            context (Optional[Dict[str, Any]]): Optional context for
-            extraction like:
-                - final_answer
-                - rationale
-                - complexity
+            response (str): The raw response text.
 
         Returns:
-            str: Extracted content string.
+            Optional[str]: Extracted data if successful, otherwise None.
 
         Raises:
             ValueError: If response is empty or invalid.
-            NotImplementedError: If no implementation is provided.
             RuntimeError: If extractor is not initialized.
         """
         if not self._is_setup:
             raise RuntimeError(
-                f"{self.__class__.__name__} must be initialized "
-                "before extraction"
+                "Extractor must be initialized before extraction"
             )
         if not response or not response.strip():
             raise ValueError("Empty or whitespace-only response")
-        raise NotImplementedError("Subclasses must implement extract()")
+
+        current_input = response  # Initial input
+
+        for stage in self._pipeline:
+            stage_success = (
+                False  # Track if any strategy in the stage succeeds
+            )
+
+            for strategy in stage:
+                try:
+                    # Apply the extraction timeout
+                    result = await asyncio.wait_for(
+                        strategy.extract(current_input),
+                        timeout=self._metadata["extraction_timeout"],
+                    )
+
+                    if result is not None:
+                        current_input = result  # Feed into next stage
+                        stage_success = True
+                        break  # Move to next stage if valid extraction occurs
+
+                except asyncio.TimeoutError:
+                    logger.warning(
+                        f"Strategy {strategy.__class__.__name__} timed out "
+                        f"after {self._metadata['extraction_timeout']} seconds"
+                    )
+                except Exception as e:
+                    logger.warning(
+                        f"Strategy {strategy.__class__.__name__} failed: {e}"
+                    )
+
+            if not stage_success:
+                logger.debug(
+                    "No strategy in stage succeeded, stopping extraction."
+                )
+                return None  # Stop processing if the stage fails
+
+        return current_input  # Final processed output