diff --git a/.coveragerc b/.coveragerc deleted file mode 100644 index 929b9a7c0959..000000000000 --- a/.coveragerc +++ /dev/null @@ -1,6 +0,0 @@ -[run] -branch = True -source = autogen -omit = - *test* - *samples* diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile index 755531953f4e..439248e199b9 100644 --- a/.devcontainer/Dockerfile +++ b/.devcontainer/Dockerfile @@ -1,32 +1,6 @@ -#------------------------------------------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. See LICENSE file in the project root for license information. -#------------------------------------------------------------------------------------------------------------- +# Note: You can use any Debian/Ubuntu based image you want. +FROM mcr.microsoft.com/devcontainers/base:bullseye -FROM mcr.microsoft.com/vscode/devcontainers/python:3.10 - -# -# Update the OS and maybe install packages -# -ENV DEBIAN_FRONTEND=noninteractive - -# add git lhs to apt -RUN curl -s https://packagecloud.io/install/repositories/github/git-lfs/script.deb.sh | bash - -RUN apt-get update \ - && apt-get upgrade -y \ - && apt-get -y install --no-install-recommends build-essential npm git-lfs \ - && apt-get autoremove -y \ - && apt-get clean -y \ - && arch=$(arch | sed s/aarch64/arm64/ | sed s/x86_64/amd64/) \ - && wget https://github.com/quarto-dev/quarto-cli/releases/download/v1.5.23/quarto-1.5.23-linux-${arch}.deb \ - && dpkg -i quarto-1.5.23-linux-${arch}.deb \ - && rm -rf /var/lib/apt/lists/* quarto-1.5.23-linux-${arch}.deb -ENV DEBIAN_FRONTEND=dialog - -# For docs -RUN npm install --global yarn -RUN pip install --upgrade pip -RUN pip install pydoc-markdown -RUN pip install pyyaml -RUN pip install colored +# [Optional] Uncomment this section to install additional OS packages. +# RUN apt-get update && export DEBIAN_FRONTEND=noninteractive \ +# && apt-get -y install --no-install-recommends diff --git a/.devcontainer/README.md b/.devcontainer/README.md deleted file mode 100644 index 8ae045f27d19..000000000000 --- a/.devcontainer/README.md +++ /dev/null @@ -1,96 +0,0 @@ -# Dockerfiles and Devcontainer Configurations for AutoGen - -Welcome to the `.devcontainer` directory! Here you'll find Dockerfiles and devcontainer configurations that are essential for setting up your AutoGen development environment. Each Dockerfile is tailored for different use cases and requirements. Below is a brief overview of each and how you can utilize them effectively. - -These configurations can be used with Codespaces and locally. - -## Dockerfile Descriptions - -### base - -- **Purpose**: This Dockerfile, i.e., `./Dockerfile`, is designed for basic setups. It includes common Python libraries and essential dependencies required for general usage of AutoGen. -- **Usage**: Ideal for those just starting with AutoGen or for general-purpose applications. -- **Building the Image**: Run `docker build -f ./Dockerfile -t autogen_base_img .` in this directory. -- **Using with Codespaces**: `Code > Codespaces > Click on +` By default + creates a Codespace on the current branch. - -### full - -- **Purpose**: This Dockerfile, i.e., `./full/Dockerfile` is for advanced features. It includes additional dependencies and is configured for more complex or feature-rich AutoGen applications. -- **Usage**: Suited for advanced users who need the full range of AutoGen's capabilities. -- **Building the Image**: Execute `docker build -f full/Dockerfile -t autogen_full_img .`. -- **Using with Codespaces**: `Code > Codespaces > Click on ...> New with options > Choose "full" as devcontainer configuration`. This image may require a Codespace with at least 64GB of disk space. - -### dev - -- **Purpose**: Tailored for AutoGen project developers, this Dockerfile, i.e., `./dev/Dockerfile` includes tools and configurations aiding in development and contribution. -- **Usage**: Recommended for developers who are contributing to the AutoGen project. -- **Building the Image**: Run `docker build -f dev/Dockerfile -t autogen_dev_img .`. -- **Using with Codespaces**: `Code > Codespaces > Click on ...> New with options > Choose "dev" as devcontainer configuration`. This image may require a Codespace with at least 64GB of disk space. -- **Before using**: We highly encourage all potential contributors to read the [AutoGen Contributing](https://microsoft.github.io/autogen/docs/Contribute) page prior to submitting any pull requests. - - -### studio - -- **Purpose**: Tailored for AutoGen project developers, this Dockerfile, i.e., `./studio/Dockerfile`, includes tools and configurations aiding in development and contribution. -- **Usage**: Recommended for developers who are contributing to the AutoGen project. -- **Building the Image**: Run `docker build -f studio/Dockerfile -t autogen_studio_img .`. -- **Using with Codespaces**: `Code > Codespaces > Click on ...> New with options > Choose "studio" as devcontainer configuration`. -- **Before using**: We highly encourage all potential contributors to read the [AutoGen Contributing](https://microsoft.github.io/autogen/docs/Contribute) page prior to submitting any pull requests. - - -## Customizing Dockerfiles - -Feel free to modify these Dockerfiles for your specific project needs. Here are some common customizations: - -- **Adding New Dependencies**: If your project requires additional Python packages, you can add them using the `RUN pip install` command. -- **Changing the Base Image**: You may change the base image (e.g., from a Python image to an Ubuntu image) to suit your project's requirements. -- **Changing the Python version**: do you need a different version of python other than 3.11. Just update the first line of each of the Dockerfiles like so: - `FROM python:3.11-slim-bookworm` to `FROM python:3.10-slim-bookworm` -- **Setting Environment Variables**: Add environment variables using the `ENV` command for any application-specific configurations. We have prestaged the line needed to inject your OpenAI_key into the docker environment as a environmental variable. Others can be staged in the same way. Just uncomment the line. - `# ENV OPENAI_API_KEY="{OpenAI-API-Key}"` to `ENV OPENAI_API_KEY="{OpenAI-API-Key}"` -- **Need a less "Advanced" Autogen build**: If the `./full/Dockerfile` is to much but you need more than advanced then update this line in the Dockerfile file. -`RUN pip install pyautogen[teachable,lmm,retrievechat,mathchat,blendsearch] autogenra` to install just what you need. `RUN pip install pyautogen[retrievechat,blendsearch] autogenra` -- **Can't Dev without your favorite CLI tool**: if you need particular OS tools to be installed in your Docker container you can add those packages here right after the sudo for the `./base/Dockerfile` and `./full/Dockerfile` files. In the example below we are installing net-tools and vim to the environment. - - ```code - RUN apt-get update \ - && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ - software-properties-common sudo net-tools vim\ - && apt-get clean \ - && rm -rf /var/lib/apt/lists/* - ``` - -### Managing Your Docker Environment - -After customizing your Dockerfile, build the Docker image using the `docker build` command as shown above. To run a container based on your new image, use: - -```bash -docker run -it -v $(pwd)/your_app:/app your_image_name -``` - -Replace `your_app` with your application directory and `your_image_name` with the name of the image you built. - -#### Closing for the Day - -- **Exit the container**: Type `exit`. -- **Stop the container**: Use `docker stop {application_project_name}`. - -#### Resuming Work - -- **Restart the container**: Use `docker start {application_project_name}`. -- **Access the container**: Execute `sudo docker exec -it {application_project_name} bash`. -- **Reactivate the environment**: Run `source /usr/src/app/autogen_env/bin/activate`. - -### Useful Docker Commands - -- **View running containers**: `docker ps -a`. -- **View Docker images**: `docker images`. -- **Restart container setup**: Stop (`docker stop my_container`), remove the container (`docker rm my_container`), and remove the image (`docker rmi my_image:latest`). - -#### Troubleshooting Common Issues - -- Check Docker daemon, port conflicts, and permissions issues. - -#### Additional Resources - -For more information on Docker usage and best practices, refer to the [official Docker documentation](https://docs.docker.com). diff --git a/.devcontainer/dev/Dockerfile b/.devcontainer/dev/Dockerfile deleted file mode 100644 index 04f4c54edf4e..000000000000 --- a/.devcontainer/dev/Dockerfile +++ /dev/null @@ -1,54 +0,0 @@ -# Basic setup -FROM python:3.11-slim-bookworm - -# add git lhs to apt -RUN curl -s https://packagecloud.io/install/repositories/github/git-lfs/script.deb.sh | bash - -# Update and install necessary packages -RUN apt-get update && apt-get -y update -# added vim and nano for convenience -RUN apt-get install -y sudo git npm vim nano curl wget git-lfs - -# Setup a non-root user 'autogen' with sudo access -RUN adduser --disabled-password --gecos '' autogen -RUN adduser autogen sudo -RUN echo '%sudo ALL=(ALL) NOPASSWD:ALL' >> /etc/sudoers -USER autogen -WORKDIR /home/autogen - -# Set environment variable -# ENV OPENAI_API_KEY="{OpenAI-API-Key}" - -# Clone the AutoGen repository -RUN git clone https://github.com/microsoft/autogen.git /home/autogen/autogen -WORKDIR /home/autogen/autogen - -# Install AutoGen in editable mode with extra components -RUN sudo pip install -e .[test,teachable,lmm,retrievechat,mathchat,blendsearch] - -# Install pre-commit hooks -RUN pre-commit install - -# Setup Docusaurus and Yarn for the documentation website -RUN sudo npm install --global yarn -RUN sudo pip install pydoc-markdown -RUN cd website -RUN yarn install --frozen-lockfile --ignore-engines - -RUN arch=$(arch | sed s/aarch64/arm64/ | sed s/x86_64/amd64/) && \ - wget -q https://github.com/quarto-dev/quarto-cli/releases/download/v1.5.23/quarto-1.5.23-linux-${arch}.tar.gz && \ - mkdir -p /home/autogen/quarto/ && \ - tar -xzf quarto-1.5.23-linux-${arch}.tar.gz --directory /home/autogen/quarto/ && \ - rm quarto-1.5.23-linux-${arch}.tar.gz - -ENV PATH="${PATH}:/home/autogen/quarto/quarto-1.5.23/bin/" - -# Exposes the Yarn port for Docusaurus -EXPOSE 3000 - -# Pre-load popular Python packages -RUN pip install --upgrade pip -RUN pip install numpy pandas matplotlib seaborn scikit-learn requests urllib3 nltk pillow pytest beautifulsoup4 - -# Set the default command to bash -CMD ["/bin/bash"] diff --git a/.devcontainer/dev/devcontainer.json b/.devcontainer/dev/devcontainer.json deleted file mode 100644 index 9ebff28d5ca9..000000000000 --- a/.devcontainer/dev/devcontainer.json +++ /dev/null @@ -1,3 +0,0 @@ -{ - "dockerFile": "Dockerfile" -} diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index 8ca4604d85eb..1bbf484de0f0 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -1,22 +1,53 @@ +// For format details, see https://aka.ms/devcontainer.json. For config options, see the +// README at: https://github.com/devcontainers/templates/tree/main/src/docker-outside-of-docker-compose { - "customizations": { - "vscode": { - "extensions": [ - "ms-python.python", - "ms-toolsai.jupyter", - "visualstudioexptteam.vscodeintellicode", - "GitHub.copilot" - ], - "settings": { - "terminal.integrated.profiles.linux": { - "bash": { - "path": "/bin/bash" - } - }, - "terminal.integrated.defaultProfile.linux": "bash" - } - } - }, - "dockerFile": "Dockerfile", - "updateContentCommand": "pip install -e . pre-commit && pre-commit install" -} + "name": "agnext devcontainer", + "dockerComposeFile": "docker-compose.yml", + "service": "devcontainer", + "workspaceFolder": "/workspaces/${localWorkspaceFolderBasename}", + + // Use this environment variable if you need to bind mount your local source code into a new container. + "remoteEnv": { + "LOCAL_WORKSPACE_FOLDER": "${localWorkspaceFolder}" + }, + + "features": { + "ghcr.io/devcontainers/features/docker-outside-of-docker:1": { + "moby": true, + "installDockerBuildx": true, + "version": "latest", + "dockerDashComposeVersion": "none" + }, + "ghcr.io/devcontainers/features/python:1": { + "installTools": true, + "version": "latest" + }, + "ghcr.io/devcontainers/features/dotnet:2": { + "version": "8.0.302" + }, + "ghcr.io/elanhasson/devcontainer-features/dotnet-aspire-daily:1": {}, + "ghcr.io/devcontainers/features/azure-cli:1": {}, + "ghcr.io/azure/azure-dev/azd:0": {}, + "ghcr.io/devcontainers/features/node:1": {} + }, + // Use 'forwardPorts' to make a list of ports inside the container available locally. + // "forwardPorts": [], + + // Use 'postCreateCommand' to run commands after the container is created. + "postCreateCommand": "bash .devcontainer/startup.sh", + + // Uncomment to connect as root instead. More info: https://aka.ms/dev-containers-non-root. + "remoteUser": "root", + "customizations": { + "vscode": { + "extensions": [ + "ms-python.python", + "ms-python.debugpy", + "GitHub.copilot", + "ms-dotnettools.csdevkit", + "ms-dotnettools.vscodeintellicode-csharp", + "github.vscode-github-actions" + ] + } + } +} \ No newline at end of file diff --git a/.devcontainer/docker-compose.yml b/.devcontainer/docker-compose.yml new file mode 100644 index 000000000000..b95cf48e56cd --- /dev/null +++ b/.devcontainer/docker-compose.yml @@ -0,0 +1,26 @@ +version: '3' + +services: + devcontainer: + build: + context: . + dockerfile: Dockerfile + + volumes: + # Forwards the local Docker socket to the container. + - /var/run/docker.sock:/var/run/docker-host.sock + # Update this to wherever you want VS Code to mount the folder of your project + - ../..:/workspaces:cached + + # Overrides default command so things don't shut down after the process ends. + entrypoint: /usr/local/share/docker-init.sh + command: sleep infinity + + # Uncomment the next four lines if you will use a ptrace-based debuggers like C++, Go, and Rust. + # cap_add: + # - SYS_PTRACE + # security_opt: + # - seccomp:unconfined + + # Use "forwardPorts" in **devcontainer.json** to forward an app port locally. + # (Adding the "ports" property to this file will not forward from a Codespace.) diff --git a/.devcontainer/full/Dockerfile b/.devcontainer/full/Dockerfile deleted file mode 100644 index 0787ad240272..000000000000 --- a/.devcontainer/full/Dockerfile +++ /dev/null @@ -1,32 +0,0 @@ -FROM python:3.11-slim-bookworm - -# add git lhs to apt -RUN curl -s https://packagecloud.io/install/repositories/github/git-lfs/script.deb.sh | bash - -# Update and install dependencies -RUN apt-get update \ - && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ - software-properties-common sudo git-lfs \ - && apt-get clean \ - && rm -rf /var/lib/apt/lists/* - -# Setup a non-root user 'autogen' with sudo access -RUN adduser --disabled-password --gecos '' autogen -RUN adduser autogen sudo -RUN echo '%sudo ALL=(ALL) NOPASSWD:ALL' >> /etc/sudoers -USER autogen -WORKDIR /home/autogen - -# Set environment variable if needed -# ENV OPENAI_API_KEY="{OpenAI-API-Key}" - -# Install Python packages -RUN pip install --upgrade pip -RUN pip install pyautogen[teachable,lmm,retrievechat,mathchat,blendsearch] autogenra -RUN pip install numpy pandas matplotlib seaborn scikit-learn requests urllib3 nltk pillow pytest beautifulsoup4 - -# Expose port -EXPOSE 8081 - -# Start Command -CMD ["/bin/bash"] diff --git a/.devcontainer/full/devcontainer.json b/.devcontainer/full/devcontainer.json deleted file mode 100644 index 9ebff28d5ca9..000000000000 --- a/.devcontainer/full/devcontainer.json +++ /dev/null @@ -1,3 +0,0 @@ -{ - "dockerFile": "Dockerfile" -} diff --git a/.devcontainer/startup.sh b/.devcontainer/startup.sh new file mode 100644 index 000000000000..ef05df76960f --- /dev/null +++ b/.devcontainer/startup.sh @@ -0,0 +1,13 @@ +#!/bin/bash + +# dotnet setup +dotnet workload update +dotnet dev-certs https --trust + +# python setup +pushd python +pip install uv +uv sync +source .venv/bin/activate +echo "export PATH=$PATH" >> ~/.bashrc +popd diff --git a/.devcontainer/studio/Dockerfile b/.devcontainer/studio/Dockerfile deleted file mode 100644 index 4a08aea98724..000000000000 --- a/.devcontainer/studio/Dockerfile +++ /dev/null @@ -1,27 +0,0 @@ -#------------------------------------------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. See LICENSE file in the project root for license information. -#------------------------------------------------------------------------------------------------------------- - -FROM mcr.microsoft.com/vscode/devcontainers/python:3.10 - -# -# Update the OS and maybe install packages -# -ENV DEBIAN_FRONTEND=noninteractive - -# add git lhs to apt -RUN curl -s https://packagecloud.io/install/repositories/github/git-lfs/script.deb.sh | bash - -RUN apt-get update \ - && apt-get upgrade -y \ - && apt-get -y install --no-install-recommends build-essential npm git-lfs \ - && apt-get autoremove -y \ - && apt-get clean -y \ - && rm -rf /var/lib/apt/lists/* -ENV DEBIAN_FRONTEND=dialog - -# For docs -RUN npm install --global yarn -RUN pip install --upgrade pip -RUN pip install pydoc-markdown diff --git a/.devcontainer/studio/devcontainer.json b/.devcontainer/studio/devcontainer.json deleted file mode 100644 index 23627237e201..000000000000 --- a/.devcontainer/studio/devcontainer.json +++ /dev/null @@ -1,21 +0,0 @@ -{ - "customizations": { - "vscode": { - "extensions": [ - "ms-python.python", - "ms-toolsai.jupyter", - "visualstudioexptteam.vscodeintellicode" - ], - "settings": { - "terminal.integrated.profiles.linux": { - "bash": { - "path": "/bin/bash" - } - }, - "terminal.integrated.defaultProfile.linux": "bash" - } - } - }, - "dockerFile": "Dockerfile", - "updateContentCommand": "cd samples/apps/autogen-studio && pip install -e . && sudo npm install -g gatsby-cli && cd frontend && yarn install && yarn build" -} diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml deleted file mode 100644 index a92044f15b78..000000000000 --- a/.github/workflows/build.yml +++ /dev/null @@ -1,142 +0,0 @@ -# This workflow will install Python dependencies, run tests and lint with a variety of Python versions -# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions - -name: Build - -on: - push: - branches: ["main"] - pull_request: - branches: ["main"] - merge_group: - types: [checks_requested] - -concurrency: - group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref }} - cancel-in-progress: ${{ github.ref != 'refs/heads/main' }} -permissions: {} -jobs: - paths-filter: - runs-on: ubuntu-latest - outputs: - hasChanges: ${{ steps.filter.outputs.autogen == 'true' || steps.filter.outputs.test == 'true' || steps.filter.outputs.workflows == 'true' || steps.filter.outputs.setup == 'true' }} - steps: - - uses: actions/checkout@v4 - - uses: dorny/paths-filter@v2 - id: filter - with: - filters: | - autogen: - - "autogen/**" - test: - - "test/**" - workflows: - - ".github/workflows/**" - setup: - - "setup.py" - - name: autogen has changes - run: echo "autogen has changes" - if: steps.filter.outputs.autogen == 'true' - - name: test has changes - run: echo "test has changes" - if: steps.filter.outputs.test == 'true' - - name: workflows has changes - run: echo "workflows has changes" - if: steps.filter.outputs.workflows == 'true' - - name: setup has changes - run: echo "setup has changes" - if: steps.filter.outputs.setup == 'true' - build: - needs: paths-filter - if: needs.paths-filter.outputs.hasChanges == 'true' - runs-on: ${{ matrix.os }} - env: - AUTOGEN_USE_DOCKER: ${{ matrix.os != 'ubuntu-latest' && 'False' }} - strategy: - fail-fast: false - matrix: - os: [ubuntu-latest, macos-latest, windows-latest] - python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"] - exclude: - - os: macos-latest - python-version: "3.8" - - os: macos-latest - python-version: "3.9" - steps: - - uses: actions/checkout@v4 - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v5 - with: - python-version: ${{ matrix.python-version }} - - name: Install packages and dependencies - run: | - python -m pip install --upgrade pip wheel - pip install -e .[cosmosdb] - python -c "import autogen" - pip install pytest-cov>=5 mock - - name: Install optional dependencies for code executors - # code executors and udfs auto skip without deps, so only run for python 3.11 - if: matrix.python-version == '3.11' - run: | - pip install -e ".[jupyter-executor,test]" - python -m ipykernel install --user --name python3 - - name: Set AUTOGEN_USE_DOCKER based on OS - shell: bash - run: | - if [[ ${{ matrix.os }} != ubuntu-latest ]]; then - echo "AUTOGEN_USE_DOCKER=False" >> $GITHUB_ENV - fi - - name: Test with pytest skipping openai tests - if: matrix.python-version != '3.10' && matrix.os == 'ubuntu-latest' - # Remove the line below once https://github.com/docker/docker-py/issues/3256 is merged - run: | - pip install "requests<2.32.0" - pytest test --ignore=test/agentchat/contrib --skip-openai --durations=10 --durations-min=1.0 - - name: Test with pytest skipping openai and docker tests - if: matrix.python-version != '3.10' && matrix.os != 'ubuntu-latest' - run: | - pytest test --ignore=test/agentchat/contrib --skip-openai --skip-docker --durations=10 --durations-min=1.0 - - name: Coverage with Redis - if: matrix.python-version == '3.10' - run: | - pip install -e .[test,redis,websockets] - pytest test --ignore=test/agentchat/contrib --skip-openai --durations=10 --durations-min=1.0 - - name: Test with Cosmos DB - run: | - pip install -e .[test,cosmosdb] - pytest test/cache/test_cosmos_db_cache.py --skip-openai --durations=10 --durations-min=1.0 - - name: Upload coverage to Codecov - if: matrix.python-version == '3.10' - uses: codecov/codecov-action@v3 - with: - file: ./coverage.xml - flags: unittests - build-check: - if: always() - runs-on: ubuntu-latest - needs: [build] - steps: - - name: Get Date - shell: bash - run: | - echo "date=$(date +'%m/%d/%Y %H:%M:%S')" >> "$GITHUB_ENV" - - - name: Run Type is ${{ github.event_name }} - if: ${{ github.event_name != 'schedule' && github.event_name != 'workflow_dispatch'}} - shell: bash - run: | - echo "run_type=${{ github.event_name }}" >> "$GITHUB_ENV" - - - name: Fail workflow if build failed - id: check_build_failed - if: contains(join(needs.*.result, ','), 'failure') - uses: actions/github-script@v6 - with: - script: core.setFailed('Build Failed!') - - - name: Fail workflow if build cancelled - id: check_build_cancelled - if: contains(join(needs.*.result, ','), 'cancelled') - uses: actions/github-script@v6 - with: - script: core.setFailed('Build Cancelled!') diff --git a/.github/workflows/checks.yml b/.github/workflows/checks.yml new file mode 100644 index 000000000000..e7d0fe3f1aeb --- /dev/null +++ b/.github/workflows/checks.yml @@ -0,0 +1,161 @@ +name: Checks + +on: + push: + branches: + - main + pull_request: + branches: + - main + +jobs: + format: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - run: curl -LsSf https://astral.sh/uv/install.sh | sh + - uses: actions/setup-python@v5 + with: + python-version: "3.11" + - run: uv sync --locked + working-directory: ./python + - name: Run task + run: | + source ${{ github.workspace }}/python/.venv/bin/activate + poe fmt --check + working-directory: ./python + + lint: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - run: curl -LsSf https://astral.sh/uv/install.sh | sh + - uses: actions/setup-python@v5 + with: + python-version: "3.11" + - run: uv sync --locked + working-directory: ./python + - name: Run task + run: | + source ${{ github.workspace }}/python/.venv/bin/activate + poe lint + working-directory: ./python + + mypy: + runs-on: ubuntu-latest + strategy: + matrix: + package: + [ + "./packages/autogen-core", + "./packages/team-one", + "./packages/agbench", + "./packages/autogen-ext", + "./packages/autogen-agentchat", + ] + steps: + - uses: actions/checkout@v4 + - run: curl -LsSf https://astral.sh/uv/install.sh | sh + - uses: actions/setup-python@v5 + with: + python-version: "3.11" + - run: uv sync --locked + working-directory: ./python + - name: Run task + run: | + source ${{ github.workspace }}/python/.venv/bin/activate + poe --directory ${{ matrix.package }} mypy + working-directory: ./python + + pyright: + runs-on: ubuntu-latest + strategy: + matrix: + package: + [ + "./packages/autogen-core", + "./packages/team-one", + "./packages/agbench", + "./packages/autogen-ext", + "./packages/autogen-agentchat", + ] + steps: + - uses: actions/checkout@v4 + - run: curl -LsSf https://astral.sh/uv/install.sh | sh + - uses: actions/setup-python@v5 + with: + python-version: "3.11" + - run: uv sync --locked + working-directory: ./python + - name: Run task + run: | + source ${{ github.workspace }}/python/.venv/bin/activate + poe --directory ${{ matrix.package }} pyright + working-directory: ./python + + test: + runs-on: ubuntu-latest + strategy: + matrix: + package: + [ + "./packages/autogen-core", + "./packages/team-one", + "./packages/autogen-ext", + "./packages/autogen-agentchat", + ] + steps: + - uses: actions/checkout@v4 + - run: curl -LsSf https://astral.sh/uv/install.sh | sh + - uses: actions/setup-python@v5 + with: + python-version: "3.11" + - run: uv sync --locked + working-directory: ./python + - name: Run task + run: | + source ${{ github.workspace }}/python/.venv/bin/activate + poe --directory ${{ matrix.package }} test + working-directory: ./python + + docs: + runs-on: ubuntu-latest + strategy: + matrix: + package: ["./packages/autogen-core"] + steps: + - uses: actions/checkout@v4 + - run: curl -LsSf https://astral.sh/uv/install.sh | sh + - uses: actions/setup-python@v5 + with: + python-version: "3.11" + - run: uv sync --locked + working-directory: ./python + - name: Run task + run: | + source ${{ github.workspace }}/python/.venv/bin/activate + poe --directory ${{ matrix.package }} docs-check + working-directory: ./python + + check-proto-changes-python: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - run: curl -LsSf https://astral.sh/uv/install.sh | sh + - uses: actions/setup-python@v5 + with: + python-version: "3.11" + - run: uv sync --locked + working-directory: ./python + - name: Run task + run: | + source ${{ github.workspace }}/python/.venv/bin/activate + poe gen-proto + working-directory: ./python + - name: Evaluate if there are changes + run: | + if [[ `git status --porcelain` ]]; then + echo "There are changes that need to be generated and commit for the proto files" + exit 1 + fi + shell: bash diff --git a/.github/workflows/contrib-openai.yml b/.github/workflows/contrib-openai.yml deleted file mode 100644 index 7e8fb0033177..000000000000 --- a/.github/workflows/contrib-openai.yml +++ /dev/null @@ -1,342 +0,0 @@ -# This workflow will install Python dependencies and run tests -# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions - -name: OpenAI4ContribTests - -on: - pull_request: - branches: ["main"] - paths: - - "autogen/**" - - "test/agentchat/contrib/**" - - ".github/workflows/contrib-openai.yml" - - "setup.py" -permissions: - {} - # actions: read - # checks: read - # contents: read - # deployments: read -jobs: - RetrieveChatTest: - strategy: - matrix: - os: [ubuntu-latest] - python-version: ["3.10"] - runs-on: ${{ matrix.os }} - environment: openai1 - services: - pgvector: - image: ankane/pgvector - env: - POSTGRES_DB: postgres - POSTGRES_USER: postgres - POSTGRES_PASSWORD: ${{ secrets.POSTGRES_PASSWORD }} - POSTGRES_HOST_AUTH_METHOD: trust - options: >- - --health-cmd pg_isready - --health-interval 10s - --health-timeout 5s - --health-retries 5 - ports: - - 5432:5432 - steps: - # checkout to pr branch - - name: Checkout - uses: actions/checkout@v4 - with: - ref: ${{ github.event.pull_request.head.sha }} - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v5 - with: - python-version: ${{ matrix.python-version }} - - name: Install packages and dependencies - run: | - docker --version - python -m pip install --upgrade pip wheel - pip install -e . - python -c "import autogen" - pip install pytest-cov>=5 pytest-asyncio - - name: Install packages for test when needed - run: | - pip install docker - pip install -e .[retrievechat,retrievechat-qdrant,retrievechat-pgvector] - - name: Coverage - env: - OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} - AZURE_OPENAI_API_KEY: ${{ secrets.AZURE_OPENAI_API_KEY }} - AZURE_OPENAI_API_BASE: ${{ secrets.AZURE_OPENAI_API_BASE }} - OAI_CONFIG_LIST: ${{ secrets.OAI_CONFIG_LIST }} - run: | - pytest test/agentchat/contrib/retrievechat/ test/agentchat/contrib/retrievechat - - name: Upload coverage to Codecov - uses: codecov/codecov-action@v3 - with: - file: ./coverage.xml - flags: unittests - AgentEvalTest: - strategy: - matrix: - os: [ubuntu-latest] - python-version: ["3.10"] - runs-on: ${{ matrix.os }} - environment: openai1 - steps: - # checkout to pr branch - - name: Checkout - uses: actions/checkout@v4 - with: - ref: ${{ github.event.pull_request.head.sha }} - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v5 - with: - python-version: ${{ matrix.python-version }} - - name: Install packages and dependencies - run: | - docker --version - python -m pip install --upgrade pip wheel - pip install -e . - python -c "import autogen" - pip install pytest-cov>=5 pytest-asyncio - - name: Coverage - env: - OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} - AZURE_OPENAI_API_KEY: ${{ secrets.AZURE_OPENAI_API_KEY }} - AZURE_OPENAI_API_BASE: ${{ secrets.AZURE_OPENAI_API_BASE }} - OAI_CONFIG_LIST: ${{ secrets.OAI_CONFIG_LIST }} - run: | - pytest test/agentchat/contrib/agent_eval/test_agent_eval.py - - name: Upload coverage to Codecov - uses: codecov/codecov-action@v3 - with: - file: ./coverage.xml - flags: unittests - - GPTAssistantAgent: - strategy: - matrix: - os: [ubuntu-latest] - python-version: ["3.11"] - runs-on: ${{ matrix.os }} - environment: openai1 - steps: - # checkout to pr branch - - name: Checkout - uses: actions/checkout@v4 - with: - ref: ${{ github.event.pull_request.head.sha }} - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v5 - with: - python-version: ${{ matrix.python-version }} - - name: Install packages and dependencies - run: | - docker --version - python -m pip install --upgrade pip wheel - pip install -e . - python -c "import autogen" - pip install pytest-cov>=5 pytest-asyncio - - name: Install packages for test when needed - run: | - pip install docker - - name: Coverage - env: - OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} - AZURE_OPENAI_API_KEY: ${{ secrets.AZURE_OPENAI_API_KEY }} - AZURE_OPENAI_API_BASE: ${{ secrets.AZURE_OPENAI_API_BASE }} - OAI_CONFIG_LIST: ${{ secrets.OAI_CONFIG_LIST }} - run: | - pytest test/agentchat/contrib/test_gpt_assistant.py - - name: Upload coverage to Codecov - uses: codecov/codecov-action@v3 - with: - file: ./coverage.xml - flags: unittests - TeachableAgent: - strategy: - matrix: - os: [ubuntu-latest] - python-version: ["3.11"] - runs-on: ${{ matrix.os }} - environment: openai1 - steps: - # checkout to pr branch - - name: Checkout - uses: actions/checkout@v4 - with: - ref: ${{ github.event.pull_request.head.sha }} - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v5 - with: - python-version: ${{ matrix.python-version }} - - name: Install packages and dependencies - run: | - docker --version - python -m pip install --upgrade pip wheel - pip install -e .[teachable] - python -c "import autogen" - pip install pytest-cov>=5 - - name: Coverage - env: - OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} - AZURE_OPENAI_API_KEY: ${{ secrets.AZURE_OPENAI_API_KEY }} - AZURE_OPENAI_API_BASE: ${{ secrets.AZURE_OPENAI_API_BASE }} - OAI_CONFIG_LIST: ${{ secrets.OAI_CONFIG_LIST }} - run: | - pytest test/agentchat/contrib/capabilities/test_teachable_agent.py - - name: Upload coverage to Codecov - uses: codecov/codecov-action@v3 - with: - file: ./coverage.xml - flags: unittests - AgentBuilder: - strategy: - matrix: - os: [ubuntu-latest] - python-version: ["3.11"] - runs-on: ${{ matrix.os }} - environment: openai1 - steps: - # checkout to pr branch - - name: Checkout - uses: actions/checkout@v4 - with: - ref: ${{ github.event.pull_request.head.sha }} - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v5 - with: - python-version: ${{ matrix.python-version }} - - name: Install packages and dependencies - run: | - docker --version - python -m pip install --upgrade pip wheel - pip install -e . - python -c "import autogen" - pip install pytest-cov>=5 pytest-asyncio - - name: Install packages for test when needed - run: | - pip install -e .[autobuild] - - name: Coverage - env: - OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} - AZURE_OPENAI_API_KEY: ${{ secrets.AZURE_OPENAI_API_KEY }} - AZURE_OPENAI_API_BASE: ${{ secrets.AZURE_OPENAI_API_BASE }} - OAI_CONFIG_LIST: ${{ secrets.OAI_CONFIG_LIST }} - run: | - pytest test/agentchat/contrib/test_agent_builder.py - - name: Upload coverage to Codecov - uses: codecov/codecov-action@v3 - with: - file: ./coverage.xml - flags: unittests - WebSurfer: - strategy: - matrix: - os: [ubuntu-latest] - python-version: ["3.11"] - runs-on: ${{ matrix.os }} - environment: openai1 - steps: - # checkout to pr branch - - name: Checkout - uses: actions/checkout@v4 - with: - ref: ${{ github.event.pull_request.head.sha }} - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v5 - with: - python-version: ${{ matrix.python-version }} - - name: Install packages and dependencies - run: | - docker --version - python -m pip install --upgrade pip wheel - pip install -e .[websurfer] - python -c "import autogen" - pip install pytest-cov>=5 - - name: Coverage - env: - OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} - AZURE_OPENAI_API_KEY: ${{ secrets.AZURE_OPENAI_API_KEY }} - AZURE_OPENAI_API_BASE: ${{ secrets.AZURE_OPENAI_API_BASE }} - OAI_CONFIG_LIST: ${{ secrets.OAI_CONFIG_LIST }} - BING_API_KEY: ${{ secrets.BING_API_KEY }} - run: | - pytest test/agentchat/contrib/test_web_surfer.py - - name: Upload coverage to Codecov - uses: codecov/codecov-action@v3 - with: - file: ./coverage.xml - flags: unittests - - ImageGen: - strategy: - matrix: - os: [ubuntu-latest] - python-version: ["3.12"] - runs-on: ${{ matrix.os }} - environment: openai1 - steps: - # checkout to pr branch - - name: Checkout - uses: actions/checkout@v4 - with: - ref: ${{ github.event.pull_request.head.sha }} - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v5 - with: - python-version: ${{ matrix.python-version }} - - name: Install packages and dependencies - run: | - docker --version - python -m pip install --upgrade pip wheel - pip install -e .[lmm] - python -c "import autogen" - pip install pytest-cov>=5 - - name: Coverage - env: - OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} - run: | - pytest test/agentchat/contrib/capabilities/test_image_generation_capability.py - - name: Upload coverage to Codecov - uses: codecov/codecov-action@v3 - with: - file: ./coverage.xml - flags: unittests - - AgentOptimizer: - strategy: - matrix: - os: [ubuntu-latest] - python-version: ["3.11"] - runs-on: ${{ matrix.os }} - environment: openai1 - steps: - # checkout to pr branch - - name: Checkout - uses: actions/checkout@v4 - with: - ref: ${{ github.event.pull_request.head.sha }} - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v5 - with: - python-version: ${{ matrix.python-version }} - - name: Install packages and dependencies - run: | - docker --version - python -m pip install --upgrade pip wheel - pip install -e . - python -c "import autogen" - pip install pytest-cov>=5 - - name: Coverage - env: - OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} - AZURE_OPENAI_API_KEY: ${{ secrets.AZURE_OPENAI_API_KEY }} - AZURE_OPENAI_API_BASE: ${{ secrets.AZURE_OPENAI_API_BASE }} - OAI_CONFIG_LIST: ${{ secrets.OAI_CONFIG_LIST }} - run: | - pytest test/agentchat/contrib/test_agent_optimizer.py - - name: Upload coverage to Codecov - uses: codecov/codecov-action@v3 - with: - file: ./coverage.xml - flags: unittests diff --git a/.github/workflows/contrib-tests.yml b/.github/workflows/contrib-tests.yml deleted file mode 100644 index d58098c98e76..000000000000 --- a/.github/workflows/contrib-tests.yml +++ /dev/null @@ -1,671 +0,0 @@ -# This workflow will install Python dependencies, run tests and lint with a variety of Python versions -# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions - -name: ContribTests - -on: - pull_request: - branches: ["main"] - paths: - - "autogen/**" - - "test/agentchat/contrib/**" - - "test/test_browser_utils.py" - - "test/test_retrieve_utils.py" - - ".github/workflows/contrib-tests.yml" - - "setup.py" - -concurrency: - group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref }} - cancel-in-progress: ${{ github.ref != 'refs/heads/main' }} -permissions: - {} - # actions: read - # checks: read - # contents: read - # deployments: read -jobs: - RetrieveChatTest: - runs-on: ${{ matrix.os }} - strategy: - fail-fast: false - matrix: - os: [macos-latest, windows-2019] - python-version: ["3.9", "3.10", "3.11"] - exclude: - - os: macos-latest - python-version: "3.9" - steps: - - uses: actions/checkout@v4 - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v5 - with: - python-version: ${{ matrix.python-version }} - - name: Install packages and dependencies for all tests - run: | - python -m pip install --upgrade pip wheel - pip install pytest-cov>=5 - - name: Install qdrant_client when python-version is 3.10 - if: matrix.python-version == '3.10' - run: | - pip install -e .[retrievechat-qdrant] - - name: Install packages and dependencies for RetrieveChat - run: | - pip install -e .[retrievechat] - - name: Set AUTOGEN_USE_DOCKER based on OS - shell: bash - run: | - if [[ ${{ matrix.os }} != ubuntu-latest ]]; then - echo "AUTOGEN_USE_DOCKER=False" >> $GITHUB_ENV - fi - - name: Coverage - run: | - pytest test/test_retrieve_utils.py test/agentchat/contrib/retrievechat/test_retrievechat.py test/agentchat/contrib/retrievechat/test_qdrant_retrievechat.py test/agentchat/contrib/vectordb --skip-openai - - name: Upload coverage to Codecov - uses: codecov/codecov-action@v3 - with: - file: ./coverage.xml - flags: unittests - - RetrieveChatTest-Ubuntu: - runs-on: ubuntu-latest - strategy: - fail-fast: false - matrix: - python-version: ["3.9", "3.10", "3.11"] - services: - pgvector: - image: ankane/pgvector - env: - POSTGRES_DB: postgres - POSTGRES_USER: postgres - POSTGRES_PASSWORD: ${{ secrets.POSTGRES_PASSWORD }} - POSTGRES_HOST_AUTH_METHOD: trust - options: >- - --health-cmd pg_isready - --health-interval 10s - --health-timeout 5s - --health-retries 5 - ports: - - 5432:5432 - mongodb: - image: mongodb/mongodb-atlas-local:latest - ports: - - 27017:27017 - couchbase: - image: couchbase:enterprise-7.6.3 - ports: - - "8091-8095:8091-8095" - - "11210:11210" - - "9102:9102" - healthcheck: # checks couchbase server is up - test: ["CMD", "curl", "-v", "http://localhost:8091/pools"] - interval: 20s - timeout: 20s - retries: 5 - steps: - - uses: actions/checkout@v4 - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v5 - with: - python-version: ${{ matrix.python-version }} - - name: Install packages and dependencies for all tests - run: | - python -m pip install --upgrade pip wheel - pip install pytest - - name: Install qdrant_client when python-version is 3.10 - if: matrix.python-version == '3.10' - run: | - pip install -e .[retrievechat-qdrant] - - name: Install pgvector when on linux - run: | - pip install -e .[retrievechat-pgvector] - - name: Install mongodb when on linux - run: | - pip install -e .[retrievechat-mongodb] - - name: Install couchbase when on linux - run: | - pip install -e .[retrievechat-couchbase] - - name: Install unstructured when python-version is 3.9 and on linux - if: matrix.python-version == '3.9' - run: | - sudo apt-get update - sudo apt-get install -y tesseract-ocr poppler-utils - pip install --no-cache-dir unstructured[all-docs]==0.13.0 - - name: Install packages and dependencies for RetrieveChat - run: | - pip install -e .[retrievechat] - - name: Install packages and dependencies for WebSurfer and browser_utils - run: | - pip install -e .[test,websurfer] - - name: Set AUTOGEN_USE_DOCKER based on OS - shell: bash - run: | - echo "AUTOGEN_USE_DOCKER=False" >> $GITHUB_ENV - - name: Coverage - run: | - pip install pytest-cov>=5 - pytest test/test_retrieve_utils.py test/agentchat/contrib/retrievechat test/agentchat/contrib/vectordb --skip-openai - - name: Upload coverage to Codecov - uses: codecov/codecov-action@v3 - with: - file: ./coverage.xml - flags: unittests - - AgentEvalTest: - strategy: - fail-fast: false - matrix: - os: [ubuntu-latest] - python-version: ["3.10"] - runs-on: ${{ matrix.os }} - steps: - - uses: actions/checkout@v4 - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v5 - with: - python-version: ${{ matrix.python-version }} - - name: Install packages and dependencies for all tests - run: | - python -m pip install --upgrade pip wheel - pip install pytest-cov>=5 - - name: Install packages and dependencies for AgentEval - run: | - pip install -e . - - name: Coverage - run: | - pytest test/agentchat/contrib/agent_eval/ --skip-openai - - name: Upload coverage to Codecov - uses: codecov/codecov-action@v3 - with: - file: ./coverage.xml - flags: unittests - - GPTAssistantAgent: - runs-on: ${{ matrix.os }} - strategy: - fail-fast: false - matrix: - os: [ubuntu-latest, macos-latest, windows-2019] - python-version: ["3.10"] - steps: - - uses: actions/checkout@v4 - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v5 - with: - python-version: ${{ matrix.python-version }} - - name: Install packages and dependencies for all tests - run: | - python -m pip install --upgrade pip wheel - pip install pytest-cov>=5 - - name: Install packages and dependencies for GPTAssistantAgent - run: | - pip install -e . - - name: Set AUTOGEN_USE_DOCKER based on OS - shell: bash - run: | - if [[ ${{ matrix.os }} != ubuntu-latest ]]; then - echo "AUTOGEN_USE_DOCKER=False" >> $GITHUB_ENV - fi - - name: Coverage - run: | - pytest test/agentchat/contrib/test_gpt_assistant.py --skip-openai - - name: Upload coverage to Codecov - uses: codecov/codecov-action@v3 - with: - file: ./coverage.xml - flags: unittests - - TeachableAgent: - runs-on: ${{ matrix.os }} - strategy: - fail-fast: false - matrix: - os: [ubuntu-latest, macos-latest, windows-2019] - python-version: ["3.11"] - steps: - - uses: actions/checkout@v4 - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v5 - with: - python-version: ${{ matrix.python-version }} - - name: Install packages and dependencies for all tests - run: | - python -m pip install --upgrade pip wheel - pip install pytest-cov>=5 - - name: Install packages and dependencies for Teachability - run: | - pip install -e .[teachable] - - name: Set AUTOGEN_USE_DOCKER based on OS - shell: bash - run: | - if [[ ${{ matrix.os }} != ubuntu-latest ]]; then - echo "AUTOGEN_USE_DOCKER=False" >> $GITHUB_ENV - fi - - name: Coverage - run: | - pytest test/agentchat/contrib/capabilities/test_teachable_agent.py --skip-openai - - name: Upload coverage to Codecov - uses: codecov/codecov-action@v3 - with: - file: ./coverage.xml - flags: unittests - - WebSurfer: - runs-on: ${{ matrix.os }} - strategy: - fail-fast: false - matrix: - os: [ubuntu-latest, macos-latest, windows-2019] - python-version: ["3.12"] - steps: - - uses: actions/checkout@v4 - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v5 - with: - python-version: ${{ matrix.python-version }} - - name: Install packages and dependencies for all tests - run: | - python -m pip install --upgrade pip wheel - pip install pytest-cov>=5 - - name: Install packages and dependencies for WebSurfer - run: | - pip install -e .[websurfer] - - name: Set AUTOGEN_USE_DOCKER based on OS - shell: bash - run: | - if [[ ${{ matrix.os }} != ubuntu-latest ]]; then - echo "AUTOGEN_USE_DOCKER=False" >> $GITHUB_ENV - fi - - name: Coverage - run: | - pytest test/browser_utils test/agentchat/contrib/test_web_surfer.py --skip-openai - - name: Upload coverage to Codecov - uses: codecov/codecov-action@v3 - with: - file: ./coverage.xml - flags: unittests - - LMMTest: - runs-on: ${{ matrix.os }} - strategy: - fail-fast: false - matrix: - os: [ubuntu-latest, macos-latest, windows-2019] - python-version: ["3.12"] - steps: - - uses: actions/checkout@v4 - with: - lfs: true - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v5 - with: - python-version: ${{ matrix.python-version }} - - name: Install packages and dependencies for all tests - run: | - python -m pip install --upgrade pip wheel - pip install pytest-cov>=5 - - name: Install packages and dependencies for LMM - run: | - pip install -e .[lmm] - - name: Set AUTOGEN_USE_DOCKER based on OS - shell: bash - run: | - if [[ ${{ matrix.os }} != ubuntu-latest ]]; then - echo "AUTOGEN_USE_DOCKER=False" >> $GITHUB_ENV - fi - - name: Coverage - run: | - pytest test/agentchat/contrib/test_img_utils.py test/agentchat/contrib/test_lmm.py test/agentchat/contrib/test_llava.py test/agentchat/contrib/capabilities/test_vision_capability.py --skip-openai - - name: Image Gen Coverage - if: ${{ matrix.os != 'windows-2019' && matrix.python-version != '3.12' }} - run: | - pytest test/agentchat/contrib/capabilities/test_image_generation_capability.py --skip-openai - - name: Upload coverage to Codecov - uses: codecov/codecov-action@v3 - with: - file: ./coverage.xml - flags: unittests - - GeminiTest: - runs-on: ${{ matrix.os }} - strategy: - fail-fast: false - matrix: - os: [ubuntu-latest, macos-latest, windows-2019] - python-version: ["3.9", "3.10", "3.11", "3.12"] - exclude: - - os: macos-latest - python-version: "3.9" - steps: - - uses: actions/checkout@v4 - with: - lfs: true - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v5 - with: - python-version: ${{ matrix.python-version }} - - name: Install packages and dependencies for all tests - run: | - python -m pip install --upgrade pip wheel - pip install pytest-cov>=5 - - name: Install packages and dependencies for Gemini - run: | - pip install -e .[gemini,test] - - name: Set AUTOGEN_USE_DOCKER based on OS - shell: bash - run: | - if [[ ${{ matrix.os }} != ubuntu-latest ]]; then - echo "AUTOGEN_USE_DOCKER=False" >> $GITHUB_ENV - fi - - name: Coverage - run: | - pytest test/oai/test_gemini.py --skip-openai - - name: Upload coverage to Codecov - uses: codecov/codecov-action@v3 - with: - file: ./coverage.xml - flags: unittests - - TransformMessages: - runs-on: ${{ matrix.os }} - strategy: - fail-fast: false - matrix: - os: [ubuntu-latest, macos-latest, windows-2019] - python-version: ["3.11"] - steps: - - uses: actions/checkout@v4 - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v5 - with: - python-version: ${{ matrix.python-version }} - - name: Install packages and dependencies for all tests - run: | - python -m pip install --upgrade pip wheel - pip install pytest-cov>=5 - - name: Install packages and dependencies for Transform Messages - run: | - pip install -e '.[long-context]' - - name: Set AUTOGEN_USE_DOCKER based on OS - shell: bash - run: | - if [[ ${{ matrix.os }} != ubuntu-latest ]]; then - echo "AUTOGEN_USE_DOCKER=False" >> $GITHUB_ENV - fi - - name: Coverage - run: | - pytest test/agentchat/contrib/capabilities/test_transform_messages.py --skip-openai - - name: Upload coverage to Codecov - uses: codecov/codecov-action@v3 - with: - file: ./coverage.xml - flags: unittest - - LlamaIndexAgent: - runs-on: ${{ matrix.os }} - strategy: - fail-fast: false - matrix: - os: [ubuntu-latest, macos-latest, windows-2019] - python-version: ["3.11"] - steps: - - uses: actions/checkout@v4 - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v5 - with: - python-version: ${{ matrix.python-version }} - - name: Install packages and dependencies for all tests - run: | - python -m pip install --upgrade pip wheel - pip install pytest-cov>=5 - - name: Install packages and dependencies for LlamaIndexConverableAgent - run: | - pip install -e . - pip install llama-index - pip install llama-index-llms-openai - - name: Coverage - run: | - pytest test/agentchat/contrib/test_llamaindex_conversable_agent.py --skip-openai - - name: Upload coverage to Codecov - uses: codecov/codecov-action@v3 - with: - file: ./coverage.xml - flags: unittests - - AnthropicTest: - runs-on: ${{ matrix.os }} - strategy: - fail-fast: false - matrix: - os: ["ubuntu-latest", "windows-latest", "macos-latest"] - python-version: ["3.9", "3.10", "3.11", "3.12"] - - steps: - - uses: actions/checkout@v4 - with: - lfs: true - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v5 - with: - python-version: ${{ matrix.python-version }} - - name: Install packages and dependencies for all tests - run: | - python -m pip install --upgrade pip wheel - pip install pytest-cov>=5 - - - name: Install packages and dependencies for Anthropic - run: | - pip install -e .[test] - pip install -e .[anthropic] - - - name: Set AUTOGEN_USE_DOCKER based on OS - shell: bash - run: | - if [[ ${{ matrix.os }} != ubuntu-latest ]]; then - echo "AUTOGEN_USE_DOCKER=False" >> $GITHUB_ENV - fi - - - name: Coverage - run: | - pytest test/oai/test_anthropic.py --skip-openai - - name: Upload coverage to Codecov - uses: codecov/codecov-action@v3 - with: - file: ./coverage.xml - flags: unittests - - MistralTest: - runs-on: ${{ matrix.os }} - strategy: - fail-fast: false - matrix: - os: [ubuntu-latest, macos-latest, windows-2019] - python-version: ["3.9", "3.10", "3.11", "3.12"] - exclude: - - os: macos-latest - python-version: "3.9" - steps: - - uses: actions/checkout@v4 - with: - lfs: true - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v5 - with: - python-version: ${{ matrix.python-version }} - - name: Install packages and dependencies for all tests - run: | - python -m pip install --upgrade pip wheel - pip install pytest-cov>=5 - - name: Install packages and dependencies for Mistral - run: | - pip install -e .[mistral,test] - - name: Set AUTOGEN_USE_DOCKER based on OS - shell: bash - run: | - if [[ ${{ matrix.os }} != ubuntu-latest ]]; then - echo "AUTOGEN_USE_DOCKER=False" >> $GITHUB_ENV - fi - - name: Coverage - run: | - pytest test/oai/test_mistral.py --skip-openai - - name: Upload coverage to Codecov - uses: codecov/codecov-action@v3 - with: - file: ./coverage.xml - flags: unittests - - TogetherTest: - runs-on: ${{ matrix.os }} - strategy: - fail-fast: false - matrix: - os: [ubuntu-latest, macos-latest, windows-2019] - python-version: ["3.9", "3.10", "3.11", "3.12"] - exclude: - - os: macos-latest - python-version: "3.9" - steps: - - uses: actions/checkout@v4 - with: - lfs: true - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v5 - with: - python-version: ${{ matrix.python-version }} - - name: Install packages and dependencies for all tests - run: | - python -m pip install --upgrade pip wheel - pip install pytest-cov>=5 - - name: Install packages and dependencies for Together - run: | - pip install -e .[together,test] - - name: Set AUTOGEN_USE_DOCKER based on OS - shell: bash - run: | - if [[ ${{ matrix.os }} != ubuntu-latest ]]; then - echo "AUTOGEN_USE_DOCKER=False" >> $GITHUB_ENV - fi - - name: Coverage - run: | - pytest test/oai/test_together.py --skip-openai - - name: Upload coverage to Codecov - uses: codecov/codecov-action@v3 - with: - file: ./coverage.xml - flags: unittests - - GroqTest: - runs-on: ${{ matrix.os }} - strategy: - fail-fast: false - matrix: - os: [ubuntu-latest, macos-latest, windows-2019] - python-version: ["3.9", "3.10", "3.11", "3.12"] - exclude: - - os: macos-latest - python-version: "3.9" - steps: - - uses: actions/checkout@v4 - with: - lfs: true - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v5 - with: - python-version: ${{ matrix.python-version }} - - name: Install packages and dependencies for all tests - run: | - python -m pip install --upgrade pip wheel - pip install pytest-cov>=5 - - name: Install packages and dependencies for Groq - run: | - pip install -e .[groq,test] - - name: Set AUTOGEN_USE_DOCKER based on OS - shell: bash - run: | - if [[ ${{ matrix.os }} != ubuntu-latest ]]; then - echo "AUTOGEN_USE_DOCKER=False" >> $GITHUB_ENV - fi - - name: Coverage - run: | - pytest test/oai/test_groq.py --skip-openai - - name: Upload coverage to Codecov - uses: codecov/codecov-action@v3 - with: - file: ./coverage.xml - flags: unittests - - CohereTest: - runs-on: ${{ matrix.os }} - strategy: - matrix: - os: [ubuntu-latest, macos-latest, windows-latest] - python-version: ["3.9", "3.10", "3.11", "3.12"] - steps: - - uses: actions/checkout@v4 - with: - lfs: true - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v5 - with: - python-version: ${{ matrix.python-version }} - - name: Install packages and dependencies for all tests - run: | - python -m pip install --upgrade pip wheel - pip install pytest-cov>=5 - - name: Install packages and dependencies for Cohere - run: | - pip install -e .[cohere,test] - - name: Set AUTOGEN_USE_DOCKER based on OS - shell: bash - run: | - if [[ ${{ matrix.os }} != ubuntu-latest ]]; then - echo "AUTOGEN_USE_DOCKER=False" >> $GITHUB_ENV - fi - - name: Coverage - run: | - pytest test/oai/test_cohere.py --skip-openai - - name: Upload coverage to Codecov - uses: codecov/codecov-action@v3 - with: - file: ./coverage.xml - flags: unittests - - BedrockTest: - runs-on: ${{ matrix.os }} - strategy: - fail-fast: false - matrix: - os: [ubuntu-latest, macos-latest, windows-2019] - python-version: ["3.9", "3.10", "3.11", "3.12"] - exclude: - - os: macos-latest - python-version: "3.9" - steps: - - uses: actions/checkout@v4 - with: - lfs: true - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v5 - with: - python-version: ${{ matrix.python-version }} - - name: Install packages and dependencies for all tests - run: | - python -m pip install --upgrade pip wheel - pip install pytest-cov>=5 - - name: Install packages and dependencies for Amazon Bedrock - run: | - pip install -e .[boto3,test] - - name: Set AUTOGEN_USE_DOCKER based on OS - shell: bash - run: | - if [[ ${{ matrix.os }} != ubuntu-latest ]]; then - echo "AUTOGEN_USE_DOCKER=False" >> $GITHUB_ENV - fi - - name: Coverage - run: | - pytest test/oai/test_bedrock.py --skip-openai - - name: Upload coverage to Codecov - uses: codecov/codecov-action@v3 - with: - file: ./coverage.xml - flags: unittests diff --git a/.github/workflows/deploy-website.yml b/.github/workflows/deploy-website.yml deleted file mode 100644 index 2f2ba4d473f3..000000000000 --- a/.github/workflows/deploy-website.yml +++ /dev/null @@ -1,123 +0,0 @@ -name: docs - -on: - pull_request: - branches: [main] - path: - - "autogen/*" - - "website/*" - - ".github/workflows/deploy-website.yml" - push: - branches: [main] - path: - - "autogen/*" - - "website/*" - - ".github/workflows/deploy-website.yml" - workflow_dispatch: - merge_group: - types: [checks_requested] -permissions: - id-token: write - pages: write -jobs: - checks: - if: github.event_name != 'push' - runs-on: ubuntu-latest - defaults: - run: - working-directory: website - steps: - - uses: actions/checkout@v4 - with: - lfs: true - - uses: actions/setup-node@v4 - with: - node-version: 18.x - - name: setup python - uses: actions/setup-python@v5 - with: - python-version: "3.8" - - name: pydoc-markdown install - run: | - python -m pip install --upgrade pip - pip install pydoc-markdown pyyaml termcolor - # Pin databind packages as version 4.5.0 is not compatible with pydoc-markdown. - pip install databind.core==4.4.2 databind.json==4.4.2 - - name: pydoc-markdown run - run: | - pydoc-markdown - - name: quarto install - working-directory: ${{ runner.temp }} - run: | - wget -q https://github.com/quarto-dev/quarto-cli/releases/download/v1.5.23/quarto-1.5.23-linux-amd64.tar.gz - tar -xzf quarto-1.5.23-linux-amd64.tar.gz - echo "$(pwd)/quarto-1.5.23/bin/" >> $GITHUB_PATH - - name: Process notebooks - run: | - python process_notebooks.py render - - name: Test Build - run: | - if [ -e yarn.lock ]; then - yarn install --frozen-lockfile --ignore-engines - yarn build - elif [ -e package-lock.json ]; then - npm ci - npm run build - else - npm i --legacy-peer-deps - npm run build - fi - gh-release: - if: github.event_name != 'pull_request' - runs-on: ubuntu-latest - defaults: - run: - working-directory: website - steps: - - uses: actions/checkout@v4 - with: - lfs: true - - uses: actions/setup-node@v4 - with: - node-version: 18.x - - name: setup python - uses: actions/setup-python@v5 - with: - python-version: "3.8" - - name: pydoc-markdown install - run: | - python -m pip install --upgrade pip - pip install pydoc-markdown pyyaml termcolor - # Pin databind packages as version 4.5.0 is not compatible with pydoc-markdown. - pip install databind.core==4.4.2 databind.json==4.4.2 - - name: pydoc-markdown run - run: | - pydoc-markdown - - name: quarto install - working-directory: ${{ runner.temp }} - run: | - wget -q https://github.com/quarto-dev/quarto-cli/releases/download/v1.5.23/quarto-1.5.23-linux-amd64.tar.gz - tar -xzf quarto-1.5.23-linux-amd64.tar.gz - echo "$(pwd)/quarto-1.5.23/bin/" >> $GITHUB_PATH - - name: Process notebooks - run: | - python process_notebooks.py render - - name: Build website - run: | - if [ -e yarn.lock ]; then - yarn install --frozen-lockfile --ignore-engines - yarn build - elif [ -e package-lock.json ]; then - npm ci - npm run build - else - npm i --legacy-peer-deps - npm run build - fi - - name: Upload artifact - uses: actions/upload-pages-artifact@v3 - with: - path: "website/build" - - name: Deploy to GitHub Pages - id: deployment - uses: actions/deploy-pages@v4 diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml new file mode 100644 index 000000000000..04b3341fdc19 --- /dev/null +++ b/.github/workflows/docs.yml @@ -0,0 +1,48 @@ +# Simple workflow for deploying static content to GitHub Pages +name: Docs + +on: + # Runs on pushes targeting the default branch + push: + branches: ["main"] + + # Allows you to run this workflow manually from the Actions tab + workflow_dispatch: + +# Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages +permissions: + contents: read + pages: write + id-token: write + +# Allow only one concurrent deployment, skipping runs queued between the run in-progress and latest queued. +# However, do NOT cancel in-progress runs as we want to allow these production deployments to complete. +concurrency: + group: "pages" + cancel-in-progress: false + +jobs: + deploy: + environment: + name: github-pages + url: ${{ steps.deployment.outputs.page_url }} + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + - run: curl -LsSf https://astral.sh/uv/install.sh | sh + - uses: actions/setup-python@v5 + with: + python-version: '3.11' + - run: | + uv sync --locked + source .venv/bin/activate + poe --directory ./packages/autogen-core docs-build + working-directory: ./python + - name: Upload artifact + uses: actions/upload-pages-artifact@v3 + with: + path: "python/packages/autogen-core/docs/build" + - name: Deploy to GitHub Pages + id: deployment + uses: actions/deploy-pages@v4 diff --git a/.github/workflows/dotnet-build-test-packages.yml b/.github/workflows/dotnet-build-test-packages.yml new file mode 100644 index 000000000000..faaa0e3edf53 --- /dev/null +++ b/.github/workflows/dotnet-build-test-packages.yml @@ -0,0 +1,31 @@ +name: '[AutoGen nuget] Build and test' + +on: + workflow_call: + push: + branches: + - main + pull_request: + branches: + - main + +jobs: + build: + runs-on: ubuntu-latest + defaults: + run: + working-directory: dotnet + steps: + - uses: actions/checkout@v4 + - name: Setup .NET + uses: actions/setup-dotnet@v4 + with: + dotnet-version: 8.0.x + - name: Install .NET Aspire workload + run: dotnet workload install aspire + - name: Restore dependencies + run: dotnet restore AutoGen.sln + - name: Build + run: dotnet build AutoGen.sln + - name: Test + run: dotnet test --no-build --verbosity normal AutoGen.sln \ No newline at end of file diff --git a/.github/workflows/dotnet-publish-nuget.yml b/.github/workflows/dotnet-publish-nuget.yml new file mode 100644 index 000000000000..268700c60ad3 --- /dev/null +++ b/.github/workflows/dotnet-publish-nuget.yml @@ -0,0 +1,35 @@ +name: '[AutoGen nuget] Publish nuget package' + +on: + workflow_call: + inputs: + path: + required: true + type: string + version-prefix: + required: true + type: string + version-suffix: + required: true + type: string + secrets: + NUGET_API_KEY: + required: true + + # +jobs: + build: + + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + - name: Setup .NET + uses: actions/setup-dotnet@v4 + with: + dotnet-version: 8.0.x + - name: Publish package + run: | + cd dotnet/src/${{ inputs.path }} + dotnet pack -o packages /p:PackageVersion=${{ inputs.version-prefix }}-alpha.${{ inputs.version-suffix }} + dotnet nuget push $GITHUB_WORKSPACE/dotnet/src/${{ inputs.path }}/packages/*.nupkg --api-key ${{ secrets.NUGET_API_KEY }} --source https://api.nuget.org/v3/index.json \ No newline at end of file diff --git a/.github/workflows/dotnet-publish-packages.yml b/.github/workflows/dotnet-publish-packages.yml new file mode 100644 index 000000000000..07d03baeedf4 --- /dev/null +++ b/.github/workflows/dotnet-publish-packages.yml @@ -0,0 +1,26 @@ +name: '[AutoGen nuget] Publish all packages' + +on: + push: + paths: + - 'dotnet/src/**' + - 'protos/**' + workflow_dispatch: + +permissions: + id-token: write + contents: read +jobs: + init: + runs-on: ubuntu-latest + outputs: + version_suffix: ${{ steps.set_version_suffix.outputs.VERSION_SUFFIX }} + version_prefix: '0.0.1' + steps: + - name: Set version suffix + id: set_version_suffix + run: echo "VERSION_SUFFIX=$(date +%Y%m%d%H%M%S)" >> "$GITHUB_OUTPUT" + build-test-packages: + uses: ./.github/workflows/dotnet-build-test-packages.yml + needs: init + secrets: inherit diff --git a/.github/workflows/openai.yml b/.github/workflows/openai.yml deleted file mode 100644 index a9ab8e9e0c5f..000000000000 --- a/.github/workflows/openai.yml +++ /dev/null @@ -1,84 +0,0 @@ -# This workflow will install Python dependencies and run tests with a variety of Python versions -# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions - -name: OpenAI - -on: - pull_request: - branches: ["main"] - paths: - - "autogen/**" - - "test/**" - - "notebook/agentchat_auto_feedback_from_code_execution.ipynb" - - "notebook/agentchat_function_call.ipynb" - - "notebook/agentchat_groupchat_finite_state_machine.ipynb" - - ".github/workflows/openai.yml" -permissions: - {} - # actions: read - # checks: read - # contents: read - # deployments: read - -jobs: - test: - strategy: - matrix: - os: [ubuntu-latest] - python-version: ["3.9", "3.10", "3.11", "3.12"] - runs-on: ${{ matrix.os }} - environment: openai1 - services: - redis: - image: redis - ports: - - 6379:6379 - options: --entrypoint redis-server - steps: - # checkout to pr branch - - name: Checkout - uses: actions/checkout@v4 - with: - ref: ${{ github.event.pull_request.head.sha }} - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v5 - with: - python-version: ${{ matrix.python-version }} - - name: Install packages and dependencies - run: | - docker --version - python -m pip install --upgrade pip wheel - pip install -e. - python -c "import autogen" - pip install pytest-cov>=5 pytest-asyncio - - name: Install packages for test when needed - if: matrix.python-version == '3.9' - run: | - pip install docker - pip install -e .[redis] - - name: Coverage - if: matrix.python-version == '3.9' - env: - OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} - AZURE_OPENAI_API_KEY: ${{ secrets.AZURE_OPENAI_API_KEY }} - AZURE_OPENAI_API_BASE: ${{ secrets.AZURE_OPENAI_API_BASE }} - OAI_CONFIG_LIST: ${{ secrets.OAI_CONFIG_LIST }} - run: | - pytest test --ignore=test/agentchat/contrib --durations=10 --durations-min=1.0 - - name: Coverage and check notebook outputs - if: matrix.python-version != '3.9' - env: - OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} - AZURE_OPENAI_API_KEY: ${{ secrets.AZURE_OPENAI_API_KEY }} - AZURE_OPENAI_API_BASE: ${{ secrets.AZURE_OPENAI_API_BASE }} - WOLFRAM_ALPHA_APPID: ${{ secrets.WOLFRAM_ALPHA_APPID }} - OAI_CONFIG_LIST: ${{ secrets.OAI_CONFIG_LIST }} - run: | - pip install nbconvert nbformat ipykernel - pytest test/test_notebook.py --durations=10 --durations-min=1.0 - cat "$(pwd)/test/executed_openai_notebook_output.txt" - - name: Upload coverage to Codecov - uses: codecov/codecov-action@v3 - with: - file: ./coverage.xml - flags: unittests diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml deleted file mode 100644 index 8404de61154d..000000000000 --- a/.github/workflows/pre-commit.yml +++ /dev/null @@ -1,32 +0,0 @@ -name: Code formatting - -# see: https://help.github.com/en/actions/reference/events-that-trigger-workflows -on: # Trigger the workflow on pull request or merge - pull_request: - merge_group: - types: [checks_requested] - -defaults: - run: - shell: bash -permissions: {} - # actions: read - # checks: read - # contents: read - # deployments: read -jobs: - - pre-commit-check: - runs-on: ubuntu-latest - env: - SKIP: "mypy" - steps: - - uses: actions/checkout@v4 - - uses: actions/setup-python@v5 - - name: Set $PY environment variable - run: echo "PY=$(python -VV | sha256sum | cut -d' ' -f1)" >> $GITHUB_ENV - - uses: actions/cache@v4 - with: - path: ~/.cache/pre-commit - key: pre-commit|${{ env.PY }}|${{ hashFiles('.pre-commit-config.yaml') }} - - uses: pre-commit/action@v3.0.1 diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml deleted file mode 100644 index f2967c13f5f0..000000000000 --- a/.github/workflows/python-package.yml +++ /dev/null @@ -1,56 +0,0 @@ -# This workflows will build and upload a Python Package using Twine when a release is published -# Conda-forge bot will pick up new PyPI version and automatically create new version -# For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries - -name: python-package - -on: - release: - types: [published] -permissions: {} - # actions: read - # checks: read - # contents: read - # deployments: read -jobs: - deploy: - strategy: - matrix: - os: ['ubuntu-latest'] - python-version: [3.10] - runs-on: ${{ matrix.os }} - environment: package - steps: - - name: Checkout - uses: actions/checkout@v4 - # - name: Cache conda - # uses: actions/cache@v4 - # with: - # path: ~/conda_pkgs_dir - # key: conda-${{ matrix.os }}-python-${{ matrix.python-version }}-${{ hashFiles('environment.yml') }} - # - name: Setup Miniconda - # uses: conda-incubator/setup-miniconda@v2 - # with: - # auto-update-conda: true - # auto-activate-base: false - # activate-environment: hcrystalball - # python-version: ${{ matrix.python-version }} - # use-only-tar-bz2: true - - name: Install from source - # This is required for the pre-commit tests - shell: pwsh - run: pip install . - # - name: Conda list - # shell: pwsh - # run: conda list - - name: Build - shell: pwsh - run: | - pip install twine - python setup.py sdist bdist_wheel - - name: Publish to PyPI - env: - TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }} - TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }} - shell: pwsh - run: twine upload dist/* diff --git a/.github/workflows/samples-tools-tests.yml b/.github/workflows/samples-tools-tests.yml deleted file mode 100644 index e774e5cb0b1f..000000000000 --- a/.github/workflows/samples-tools-tests.yml +++ /dev/null @@ -1,49 +0,0 @@ -# This workflow will install Python dependencies, run tests and lint with a variety of Python versions -# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions - -name: SamplesToolsTests - -on: - pull_request: - branches: ["main"] - paths: - - "autogen/**" - - "samples/tools/**" - - ".github/workflows/samples-tools-tests.yml" - - "setup.py" - -concurrency: - group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref }} - cancel-in-progress: ${{ github.ref != 'refs/heads/main' }} -permissions: {} -jobs: - SamplesToolsFineTuningTests: - runs-on: ${{ matrix.os }} - strategy: - fail-fast: false - matrix: - os: [ubuntu-latest, macos-latest] - python-version: ["3.9", "3.10", "3.11"] - exclude: - - os: macos-latest - python-version: "3.9" - steps: - - uses: actions/checkout@v4 - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v5 - with: - python-version: ${{ matrix.python-version }} - - name: Install packages and dependencies for all tests - run: | - python -m pip install --upgrade pip wheel - pip install -e . - pip install pytest-cov>=5 - - name: Set AUTOGEN_USE_DOCKER based on OS - shell: bash - run: | - if [[ ${{ matrix.os }} != ubuntu-latest ]]; then - echo "AUTOGEN_USE_DOCKER=False" >> $GITHUB_ENV - fi - - name: Test finetuning tools - run: | - pytest samples/tools/finetuning/tests/ diff --git a/.github/workflows/type-check.yml b/.github/workflows/type-check.yml deleted file mode 100644 index c66fb6ad7b10..000000000000 --- a/.github/workflows/type-check.yml +++ /dev/null @@ -1,27 +0,0 @@ -name: Type check -# see: https://help.github.com/en/actions/reference/events-that-trigger-workflows -on: # Trigger the workflow on pull request or merge - pull_request: - merge_group: - types: [checks_requested] -defaults: - run: - shell: bash -permissions: {} -jobs: - type-check: - strategy: - fail-fast: true - matrix: - version: ["3.8", "3.9", "3.10", "3.11", "3.12"] - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - - uses: actions/setup-python@v5 - with: - python-version: ${{ matrix.version }} - # All additional modules should be defined in setup.py - - run: pip install ".[types]" - # Any additional configuration should be defined in pyproject.toml - - run: | - mypy diff --git a/.gitignore b/.gitignore index 4c925f739ec6..fa3abc2ae986 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,9 @@ node_modules/ # Project /.vs +# Visual Studio 2015/2017 cache/options directory +.vs/ + .vscode # Log files diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml deleted file mode 100644 index 7cbe7df31cb1..000000000000 --- a/.pre-commit-config.yaml +++ /dev/null @@ -1,71 +0,0 @@ -default_language_version: - python: python3 -exclude: 'dotnet' -ci: - autofix_prs: true - autoupdate_commit_msg: '[pre-commit.ci] pre-commit suggestions' - autoupdate_schedule: 'monthly' - -repos: - - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.6.0 - hooks: - - id: check-added-large-files - - id: check-ast - - id: check-yaml - - id: check-toml - - id: check-json - - id: check-byte-order-marker - exclude: .gitignore - - id: check-merge-conflict - - id: detect-private-key - - id: trailing-whitespace - - id: end-of-file-fixer - - id: no-commit-to-branch - - repo: https://github.com/psf/black - rev: 24.4.2 - hooks: - - id: black - - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.4.8 - hooks: - - id: ruff - types_or: [ python, pyi, jupyter ] - args: ["--fix", "--ignore=E402"] - exclude: notebook/agentchat_databricks_dbrx.ipynb - - repo: https://github.com/codespell-project/codespell - rev: v2.3.0 - hooks: - - id: codespell - args: ["-L", "ans,linar,nam,tread,ot,assertIn,dependin,socio-economic"] - exclude: | - (?x)^( - pyproject.toml | - website/static/img/ag.svg | - website/yarn.lock | - website/docs/tutorial/code-executors.ipynb | - website/docs/topics/code-execution/custom-executor.ipynb | - website/docs/topics/non-openai-models/cloud-gemini.ipynb | - notebook/.* | - test/browser_utils/test_files/.* - )$ - # See https://jaredkhan.com/blog/mypy-pre-commit - - repo: local - hooks: - - id: mypy - name: mypy - entry: "./scripts/pre-commit-mypy-run.sh" - language: python - # use your preferred Python version - # language_version: python3.8 - additional_dependencies: [] - types: [python] - # use require_serial so that script - # is only called once per commit - require_serial: true - # Print the number of files as a sanity-check - verbose: true - - repo: https://github.com/nbQA-dev/nbQA - rev: 1.8.5 - hooks: - - id: nbqa-black diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 000000000000..411a8da3f3c6 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,34 @@ +# Contributing + +The project welcomes contributions from developers and organizations worldwide. Our goal is to foster a collaborative and inclusive community where diverse perspectives and expertise can drive innovation and enhance the project's capabilities. Whether you are an individual contributor or represent an organization, we invite you to join us in shaping the future of this project. Possible contributions include but not limited to: + +- Pushing patches. +- Code review of pull requests. +- Documentation, examples and test cases. +- Readability improvement, e.g., improvement on docstr and comments. +- Community participation in [issues](https://github.com/microsoft/autogen/issues), [discussions](https://github.com/microsoft/autogen/discussions), and [twitter](https://twitter.com/pyautogen). +- Tutorials, blog posts, talks that promote the project. +- Sharing application scenarios and/or related research. + +Most contributions require you to agree to a +Contributor License Agreement (CLA) declaring that you have the right to, and actually do, grant us +the rights to use your contribution. For details, visit . + +If you are new to GitHub [here](https://help.github.com/categories/collaborating-with-issues-and-pull-requests/) is a detailed help source on getting involved with development on GitHub. + +When you submit a pull request, a CLA bot will automatically determine whether you need to provide +a CLA and decorate the PR appropriately (e.g., status check, comment). Simply follow the instructions +provided by the bot. You will only need to do this once across all repos using our CLA. + +This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). +For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or +contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments. + +## Roadmaps + +To see what we are working on and what we plan to work on, please check our +[Roadmap Issues](https://aka.ms/autogen-roadmap). + +## Becoming a Reviewer + +There is currently no formal reviewer solicitation process. Current reviewers identify reviewers from active contributors. diff --git a/OAI_CONFIG_LIST_sample b/OAI_CONFIG_LIST_sample deleted file mode 100644 index c1711acd7c65..000000000000 --- a/OAI_CONFIG_LIST_sample +++ /dev/null @@ -1,25 +0,0 @@ -// Please modify the content, remove these four lines of comment and rename this file to OAI_CONFIG_LIST to run the sample code. -// If using pyautogen v0.1.x with Azure OpenAI, please replace "base_url" with "api_base" (line 14 and line 21 below). Use "pip list" to check version of pyautogen installed. -// -// NOTE: This configuration lists GPT-4 as the default model, as this represents our current recommendation, and is known to work well with AutoGen. If you use a model other than GPT-4, you may need to revise various system prompts (especially if using weaker models like GPT-3.5-turbo). Moreover, if you use models other than those hosted by OpenAI or Azure, you may incur additional risks related to alignment and safety. Proceed with caution if updating this default. -[ - { - "model": "gpt-4", - "api_key": "", - "tags": ["gpt-4", "tool"] - }, - { - "model": "", - "api_key": "", - "base_url": "", - "api_type": "azure", - "api_version": "" - }, - { - "model": "", - "api_key": "", - "base_url": "", - "api_type": "azure", - "api_version": "" - } -] diff --git a/README.md b/README.md index 8595bb60506c..0e70fa99d883 100644 --- a/README.md +++ b/README.md @@ -1,157 +1,51 @@ -
- AutoGen Logo -![Python Version](https://img.shields.io/badge/3.8%20%7C%203.9%20%7C%203.10%20%7C%203.11%20%7C%203.12-blue) [![PyPI version](https://img.shields.io/badge/PyPI-v0.2.34-blue.svg)](https://pypi.org/project/pyautogen/) +[![PyPI version](https://badge.fury.io/py/autogen-agentchat.svg)](https://badge.fury.io/py/autogen-agentchat) [![NuGet version](https://badge.fury.io/nu/AutoGen.Core.svg)](https://badge.fury.io/nu/AutoGen.Core) - -[![Downloads](https://static.pepy.tech/badge/pyautogen/week)](https://pepy.tech/project/pyautogen) -[![Discord](https://img.shields.io/discord/1153072414184452236?logo=discord&style=flat)](https://aka.ms/autogen-dc) - [![Twitter](https://img.shields.io/twitter/url/https/twitter.com/cloudposse.svg?style=social&label=Follow%20%40pyautogen)](https://twitter.com/pyautogen)
# AutoGen -AutoGen is an open-source programming framework for building AI agents and facilitating cooperation among multiple agents to solve tasks. AutoGen aims to streamline the development and research of agentic AI, much like PyTorch does for Deep Learning. It offers features such as agents capable of interacting with each other, facilitates the use of various large language models (LLMs) and tool use support, autonomous and human-in-the-loop workflows, and multi-agent conversation patterns. - -> [!IMPORTANT] -> *Note for contributors and users*: [microsoft/autogen](https://aka.ms/autogen-gh) is the official repository of AutoGen project and it is under active development and maintenance under MIT license. We welcome contributions from developers and organizations worldwide. Our goal is to foster a collaborative and inclusive community where diverse perspectives and expertise can drive innovation and enhance the project's capabilities. We acknowledge the invaluable contributions from our existing contributors, as listed in [contributors.md](./CONTRIBUTORS.md). Whether you are an individual contributor or represent an organization, we invite you to join us in shaping the future of this project. For further information please also see [Microsoft open-source contributing guidelines](https://github.com/microsoft/autogen?tab=readme-ov-file#contributing). -> -> -_Maintainers (Sept 6th, 2024)_ - - -![AutoGen Overview](https://github.com/microsoft/autogen/blob/main/website/static/img/autogen_agentchat.png) - -- AutoGen enables building next-gen LLM applications based on [multi-agent conversations](https://microsoft.github.io/autogen/docs/Use-Cases/agent_chat) with minimal effort. It simplifies the orchestration, automation, and optimization of a complex LLM workflow. It maximizes the performance of LLM models and overcomes their weaknesses. -- It supports [diverse conversation patterns](https://microsoft.github.io/autogen/docs/Use-Cases/agent_chat#supporting-diverse-conversation-patterns) for complex workflows. With customizable and conversable agents, developers can use AutoGen to build a wide range of conversation patterns concerning conversation autonomy, - the number of agents, and agent conversation topology. -- It provides a collection of working systems with different complexities. These systems span a [wide range of applications](https://microsoft.github.io/autogen/docs/Use-Cases/agent_chat#diverse-applications-implemented-with-autogen) from various domains and complexities. This demonstrates how AutoGen can easily support diverse conversation patterns. -- AutoGen provides [enhanced LLM inference](https://microsoft.github.io/autogen/docs/Use-Cases/enhanced_inference#api-unification). It offers utilities like API unification and caching, and advanced usage patterns, such as error handling, multi-config inference, context programming, etc. - -AutoGen was created out of collaborative [research](https://microsoft.github.io/autogen/docs/Research) from Microsoft, Penn State University, and the University of Washington. - -

- - ↑ Back to Top ↑ - -

- - - -## News -
- -Expand - -:fire: June 6, 2024: WIRED publishes a new article on AutoGen: [Chatbot Teamwork Makes the AI Dream Work](https://www.wired.com/story/chatbot-teamwork-makes-the-ai-dream-work/) based on interview with [Adam Fourney](https://github.com/afourney). - -:fire: June 4th, 2024: Microsoft Research Forum publishes new update and video on [AutoGen and Complex Tasks](https://www.microsoft.com/en-us/research/video/autogen-update-complex-tasks-and-agents/) presented by [Adam Fourney](https://github.com/afourney). - -:fire: May 29, 2024: DeepLearning.ai launched a new short course [AI Agentic Design Patterns with AutoGen](https://www.deeplearning.ai/short-courses/ai-agentic-design-patterns-with-autogen), made in collaboration with Microsoft and Penn State University, and taught by AutoGen creators [Chi Wang](https://github.com/sonichi) and [Qingyun Wu](https://github.com/qingyun-wu). - -:fire: May 24, 2024: Foundation Capital published an article on [Forbes: The Promise of Multi-Agent AI](https://www.forbes.com/sites/joannechen/2024/05/24/the-promise-of-multi-agent-ai/?sh=2c1e4f454d97) and a video [AI in the Real World Episode 2: Exploring Multi-Agent AI and AutoGen with Chi Wang](https://www.youtube.com/watch?v=RLwyXRVvlNk). - -:fire: May 13, 2024: [The Economist](https://www.economist.com/science-and-technology/2024/05/13/todays-ai-models-are-impressive-teams-of-them-will-be-formidable) published an article about multi-agent systems (MAS) following a January 2024 interview with [Chi Wang](https://github.com/sonichi). - -:fire: May 11, 2024: [AutoGen: Enabling Next-Gen LLM Applications via Multi-Agent Conversation](https://openreview.net/pdf?id=uAjxFFing2) received the best paper award at the [ICLR 2024 LLM Agents Workshop](https://llmagents.github.io/). - -:fire: Apr 26, 2024: [AutoGen.NET](https://microsoft.github.io/autogen-for-net/) is available for .NET developers! Thanks [XiaoYun Zhang](https://www.linkedin.com/in/xiaoyun-zhang-1b531013a/) +AutoGen is an open-source framework for building AI agent systems. +It simplifies the creation of event-driven, distributed, scalable, and resilient AI applications. +Using AutoGen, you can quickly build systems where AI agents collaborate +and perform tasks autonomously or with human oversight. -:fire: Apr 17, 2024: Andrew Ng cited AutoGen in [The Batch newsletter](https://www.deeplearning.ai/the-batch/issue-245/) and [What's next for AI agentic workflows](https://youtu.be/sal78ACtGTc?si=JduUzN_1kDnMq0vF) at Sequoia Capital's AI Ascent (Mar 26). +* [Installation](#install) +* [Quickstart](#quickstart) +* [Using AutoGen](#using-autogen) +* [Roadmap](#roadmap) +* [FAQs](#faqs) -:fire: Mar 3, 2024: What's new in AutoGen? 📰[Blog](https://microsoft.github.io/autogen/blog/2024/03/03/AutoGen-Update); 📺[Youtube](https://www.youtube.com/watch?v=j_mtwQiaLGU). +AutoGen streamlines AI development and research, enabling the use of multiple large language models (LLMs), integrated tools, and advanced multi-agent design patterns. +You can develop and test your agent systems locally, then seamlessly deploy to a distributed cloud environment as your needs grow. -:fire: Mar 1, 2024: the first AutoGen multi-agent experiment on the challenging [GAIA](https://huggingface.co/spaces/gaia-benchmark/leaderboard) benchmark achieved the No. 1 accuracy in all the three levels. +:fire: **September 18, 2024**: AutoGen 0.5 is a new architecture for AutoGen! This new version is in preview release and being developed in the open over the next several weeks as we refine the documentation, samples, and work with our users on evolving this new version. 🚀 - +- AutoGen 0.5 represents a rearchitecutre of the system to make it more scalable, resilient, and interoperable across multiple programming languages. +- It is designed to be more modular and extensible, with a focus on enabling a wide range of applications and use cases. +- This redesign features full .NET and Python libraries, with more languages to come. Agents may be written in different languages and interoperate with one another over a common messaging protocol using the CloudEvents standard. -:tada: Dec 31, 2023: [AutoGen: Enabling Next-Gen LLM Applications via Multi-Agent Conversation Framework](https://arxiv.org/abs/2308.08155) is selected by [TheSequence: My Five Favorite AI Papers of 2023](https://thesequence.substack.com/p/my-five-favorite-ai-papers-of-2023). +## Install - +### Python - - -:tada: Nov 8, 2023: AutoGen is selected into [Open100: Top 100 Open Source achievements](https://www.benchcouncil.org/evaluation/opencs/annual.html) 35 days after spinoff from [FLAML](https://github.com/microsoft/FLAML). - - - - - - - - - -:tada: Mar 29, 2023: AutoGen is first created in [FLAML](https://github.com/microsoft/FLAML). - - - -
- -## Roadmaps - -To see what we are working on and what we plan to work on, please check our -[Roadmap Issues](https://aka.ms/autogen-roadmap). - -

- - ↑ Back to Top ↑ - -

- -## Quickstart -The easiest way to start playing is -1. Click below to use the GitHub Codespace - - [![Open in GitHub Codespaces](https://github.com/codespaces/badge.svg)](https://codespaces.new/microsoft/autogen?quickstart=1) - - 2. Copy OAI_CONFIG_LIST_sample to ./notebook folder, name to OAI_CONFIG_LIST, and set the correct configuration. - 3. Start playing with the notebooks! - -*NOTE*: OAI_CONFIG_LIST_sample lists GPT-4 as the default model, as this represents our current recommendation, and is known to work well with AutoGen. If you use a model other than GPT-4, you may need to revise various system prompts (especially if using weaker models like GPT-3.5-turbo). Moreover, if you use models other than those hosted by OpenAI or Azure, you may incur additional risks related to alignment and safety. Proceed with caution if updating this default. - -

- - ↑ Back to Top ↑ - -

- -## [Installation](https://microsoft.github.io/autogen/docs/Installation) -### Option 1. Install and Run AutoGen in Docker - -Find detailed instructions for users [here](https://microsoft.github.io/autogen/docs/installation/Docker#step-1-install-docker), and for developers [here](https://microsoft.github.io/autogen/docs/Contribute#docker-for-development). - -### Option 2. Install AutoGen Locally - -AutoGen requires **Python version >= 3.8, < 3.13**. It can be installed from pip: +AutoGen requires Python 3.10+. It has multiple packages. ```bash -pip install pyautogen +pip install autogen-agentchat autogen-core autogen-ext ``` -Minimal dependencies are installed without extra options. You can install extra options based on the feature you need. - - - -Find more options in [Installation](https://microsoft.github.io/autogen/docs/Installation#option-2-install-autogen-locally-using-virtual-environment). - - +See [packages](https://microsoft.github.io/agnext/packages) for more information about available packages. -Even if you are installing and running AutoGen locally outside of docker, the recommendation and default behavior of agents is to perform [code execution](https://microsoft.github.io/autogen/docs/FAQ/#code-execution) in docker. Find more instructions and how to change the default behaviour [here](https://microsoft.github.io/autogen/docs/Installation#code-execution-with-docker-(default)). +### .NET -For LLM inference configurations, check the [FAQs](https://microsoft.github.io/autogen/docs/FAQ#set-your-api-endpoints). +(Add .NET installation instruction here)

@@ -159,44 +53,34 @@ For LLM inference configurations, check the [FAQs](https://microsoft.github.io/a

-## Multi-Agent Conversation Framework - -Autogen enables the next-gen LLM applications with a generic [multi-agent conversation](https://microsoft.github.io/autogen/docs/Use-Cases/agent_chat) framework. It offers customizable and conversable agents that integrate LLMs, tools, and humans. -By automating chat among multiple capable agents, one can easily make them collectively perform tasks autonomously or with human feedback, including tasks that require using tools via code. - -Features of this use case include: - -- **Multi-agent conversations**: AutoGen agents can communicate with each other to solve tasks. This allows for more complex and sophisticated applications than would be possible with a single LLM. -- **Customization**: AutoGen agents can be customized to meet the specific needs of an application. This includes the ability to choose the LLMs to use, the types of human input to allow, and the tools to employ. -- **Human participation**: AutoGen seamlessly allows human participation. This means that humans can provide input and feedback to the agents as needed. +## Quickstart -For [example](https://github.com/microsoft/autogen/blob/main/test/twoagent.py), +### Python ```python -from autogen import AssistantAgent, UserProxyAgent, config_list_from_json -# Load LLM inference endpoints from an env variable or a file -# See https://microsoft.github.io/autogen/docs/FAQ#set-your-api-endpoints -# and OAI_CONFIG_LIST_sample -config_list = config_list_from_json(env_or_file="OAI_CONFIG_LIST") -# You can also set config_list directly as a list, for example, config_list = [{'model': 'gpt-4', 'api_key': ''},] -assistant = AssistantAgent("assistant", llm_config={"config_list": config_list}) -user_proxy = UserProxyAgent("user_proxy", code_execution_config={"work_dir": "coding", "use_docker": False}) # IMPORTANT: set to True to run code in docker, recommended -user_proxy.initiate_chat(assistant, message="Plot a chart of NVDA and TESLA stock price change YTD.") -# This initiates an automated chat between the two agents to solve the task -``` +import asyncio +import os +from autogen_core.components.models import chat_completion_client_from_json as client_from_json +from autogen_agentchat import CodingAssistant, CodeExecutorAgent, RoundRobinTeam, console_output +from autogen_ext.code_executors import DockerCommandLineCodeExecutor -This example can be run with +async def main(): + chat_completion_client = client_from_json(os.environ["MODEL_CLIENT_JSON"]) + async with DockerCommandLineCodeExecutor(work_dir="coding") as executor: + assistant = CodingAssistant("assistant", chat_completion_client=chat_completion_client) + executor_agent = CodeExecutorAgent( + "code_executor", executor=executor, + ) + team = RoundRobinTeam(agents=[assistant, executor_agent]) + result = await team.run("Plot a chart of NVDA and TESLA stock price change YTD. Save the plot to a file called plot.png", output=console_output) -```python -python test/twoagent.py +if __name__ == "__main__": + asyncio.run(main()) ``` -After the repo is cloned. -The figure below shows an example conversation flow with AutoGen. -![Agent Chat Example](https://github.com/microsoft/autogen/blob/main/website/static/img/chat_example.png) +### C# -Alternatively, the [sample code](https://github.com/microsoft/autogen/blob/main/samples/simple_chat.py) here allows a user to chat with an AutoGen agent in ChatGPT style. -Please find more [code examples](https://microsoft.github.io/autogen/docs/Examples#automated-multi-agent-chat) for this feature. +(Add .NET quickstart here)

@@ -204,28 +88,24 @@ Please find more [code examples](https://microsoft.github.io/autogen/docs/Exampl

-## Enhanced LLM Inferences +## Using AutoGen -Autogen also helps maximize the utility out of the expensive LLMs such as ChatGPT and GPT-4. It offers [enhanced LLM inference](https://microsoft.github.io/autogen/docs/Use-Cases/enhanced_inference#api-unification) with powerful functionalities like caching, error handling, multi-config inference and templating. +The version you are looking at is AutoGen 0.5, which introduces a new architecture. +The best place to start is the [documentation](https://microsoft.github.io/agnext). - +The current stable release is AutoGen 0.2. +You can find the documentation [here](https://microsoft.github.io/autogen).

@@ -233,19 +113,14 @@ Please find more [code examples](https://microsoft.github.io/autogen/docs/Exampl

-## Documentation - -You can find detailed documentation about AutoGen [here](https://microsoft.github.io/autogen/). - -In addition, you can find: - -- [Research](https://microsoft.github.io/autogen/docs/Research), [blogposts](https://microsoft.github.io/autogen/blog) around AutoGen, and [Transparency FAQs](https://github.com/microsoft/autogen/blob/main/TRANSPARENCY_FAQS.md) - -- [Discord](https://aka.ms/autogen-dc) +## Roadmap -- [Contributing guide](https://microsoft.github.io/autogen/docs/Contribute) - -- [Roadmap](https://github.com/orgs/microsoft/projects/989/views/3) +- [AutoGen 0.2] - This is the current stable release of AutoGen. We will continue to accept bug fixes and minor enhancements to this version. +- [AutoGen 0.5] - This is the first release of the new event-driven architecture. This release is still in preview. We will be focusing on stability of the interfaces, documentation, tutorials, samples, and a collection of base agents from which you can inherit. We are also working on compatibility interfaces for those familiar with prior versions of AutoGen. +- [future] - We are excited to work with our community to define the future of AutoGen. We are looking for feedback and contributions to help shape the future of this project.Here are some major planned items: + - [ ] Add support for more languages + - [ ] Add support for more base agents and patterns + - [ ] Add compatibility with Bot Framework Activity Protocol

@@ -253,105 +128,46 @@ In addition, you can find:

-## Related Papers +## FAQs -[AutoGen Studio](https://www.microsoft.com/en-us/research/publication/autogen-studio-a-no-code-developer-tool-for-building-and-debugging-multi-agent-systems/) +Q: What is AutoGen 0.5? -``` -@inproceedings{dibia2024studio, - title={AutoGen Studio: A No-Code Developer Tool for Building and Debugging Multi-Agent Systems}, - author={Victor Dibia and Jingya Chen and Gagan Bansal and Suff Syed and Adam Fourney and Erkang (Eric) Zhu and Chi Wang and Saleema Amershi}, - year={2024}, - booktitle={Pre-Print} -} -``` +AutoGen 0.5 is a rewrite of AutoGen from the ground up to create a more robust, scalable, easier to use, cross-language library for building AI Agents. -[AutoGen](https://aka.ms/autogen-pdf) +Q: Why these changes? -``` -@inproceedings{wu2023autogen, - title={AutoGen: Enabling Next-Gen LLM Applications via Multi-Agent Conversation Framework}, - author={Qingyun Wu and Gagan Bansal and Jieyu Zhang and Yiran Wu and Beibin Li and Erkang Zhu and Li Jiang and Xiaoyun Zhang and Shaokun Zhang and Jiale Liu and Ahmed Hassan Awadallah and Ryen W White and Doug Burger and Chi Wang}, - year={2024}, - booktitle={COLM}, -} -``` +We listened to our AutoGen users, learned from what was working, and adapted to fix what wasn't. We brought together wide ranging teams working on many different types of AI Agents and collaborated to design an improved framework with a more flexible programming model and better scalability. -[EcoOptiGen](https://arxiv.org/abs/2303.04673) +Q: Who should use it? -``` -@inproceedings{wang2023EcoOptiGen, - title={Cost-Effective Hyperparameter Optimization for Large Language Model Generation Inference}, - author={Chi Wang and Susan Xueqing Liu and Ahmed H. Awadallah}, - year={2023}, - booktitle={AutoML'23}, -} -``` +This code is still experimental. We encourage adventurous early adopters to please try it out, give us feedback, and contribute. -[MathChat](https://arxiv.org/abs/2306.01337) +Q: I'm using AutoGen 0.2, should I upgrade? -``` -@inproceedings{wu2023empirical, - title={An Empirical Study on Challenging Math Problem Solving with GPT-4}, - author={Yiran Wu and Feiran Jia and Shaokun Zhang and Hangyu Li and Erkang Zhu and Yue Wang and Yin Tat Lee and Richard Peng and Qingyun Wu and Chi Wang}, - year={2023}, - booktitle={ArXiv preprint arXiv:2306.01337}, -} -``` +If you consider yourself an early adopter, you are comfortable making some changes to your code, and are willing to try it out, then yes. -[AgentOptimizer](https://arxiv.org/pdf/2402.11359) +Q: How do I still use AutoGen 0.2? -``` -@article{zhang2024training, - title={Training Language Model Agents without Modifying Language Models}, - author={Zhang, Shaokun and Zhang, Jieyu and Liu, Jiale and Song, Linxin and Wang, Chi and Krishna, Ranjay and Wu, Qingyun}, - journal={ICML'24}, - year={2024} -} -``` +Just keep doing what you were doing before. -[StateFlow](https://arxiv.org/abs/2403.11322) -``` -@article{wu2024stateflow, - title={StateFlow: Enhancing LLM Task-Solving through State-Driven Workflows}, - author={Wu, Yiran and Yue, Tianwei and Zhang, Shaokun and Wang, Chi and Wu, Qingyun}, - journal={arXiv preprint arXiv:2403.11322}, - year={2024} -} -``` +Q: How do I migrate? -

- - ↑ Back to Top ↑ - -

- -## Contributing +We are working on a migration guide. Until then, see the [documentation](http://microsoft.github.io/agnext). -This project welcomes contributions and suggestions. Most contributions require you to agree to a -Contributor License Agreement (CLA) declaring that you have the right to, and actually do, grant us -the rights to use your contribution. For details, visit . +Q: What is happening next? When will this release be ready? -If you are new to GitHub, [here](https://opensource.guide/how-to-contribute/#how-to-submit-a-contribution) is a detailed help source on getting involved with development on GitHub. +We are still working on improving the documentation, samples, and enhancing the code. We will prepare a release announcement when these things are completed in the next few weeks. -When you submit a pull request, a CLA bot will automatically determine whether you need to provide -a CLA and decorate the PR appropriately (e.g., status check, comment). Simply follow the instructions -provided by the bot. You will only need to do this once across all repos using our CLA. +Q: What is the history of this project? -This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). -For more information, see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or -contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments. +The rearchitecture of AutoGen came from multiple Microsoft teams coming together to build the next generation of AI agent framework - merging ideas from several predecessor projects. +The team decided to bring this work to OSS as an evolution of AutoGen in September 2024. +Prior to that, AutoGen has been developed and maintained by [a community of contributors](CONTRIBUTORS.md). -

- - ↑ Back to Top ↑ - -

+Q: What is the official channel for support? -## Contributors Wall - - - +Use GitHub [Issues](https://github.com/microsoft/agnext/issues) for bug reports and feature requests. +Use GitHub [Discussions](https://github.com/microsoft/agnext/discussions) for general questions and discussions.

@@ -359,7 +175,7 @@ contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additio

-# Legal Notices +## Legal Notices Microsoft and any contributors grant you a license to the Microsoft documentation and other content in this repository under the [Creative Commons Attribution 4.0 International Public License](https://creativecommons.org/licenses/by/4.0/legalcode), @@ -380,4 +196,4 @@ or trademarks, whether by implication, estoppel, or otherwise. ↑ Back to Top ↑ -

+

\ No newline at end of file diff --git a/SECURITY.md b/SECURITY.md index 9dc631647210..3ec12044f6d8 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -1,20 +1,20 @@ - + ## Security -Microsoft takes the security of our software products and services seriously, which includes all source code repositories managed through our GitHub organizations, which include [Microsoft](https://github.com/microsoft), [Azure](https://github.com/Azure), [DotNet](https://github.com/dotnet), [AspNet](https://github.com/aspnet), [Xamarin](https://github.com/xamarin), and [our GitHub organizations](https://opensource.microsoft.com/). +Microsoft takes the security of our software products and services seriously, which includes all source code repositories managed through our GitHub organizations, which include [Microsoft](https://github.com/Microsoft), [Azure](https://github.com/Azure), [DotNet](https://github.com/dotnet), [AspNet](https://github.com/aspnet) and [Xamarin](https://github.com/xamarin). -If you believe you have found a security vulnerability in any Microsoft-owned repository that meets [Microsoft's definition of a security vulnerability](https://aka.ms/opensource/security/definition), please report it to us as described below. +If you believe you have found a security vulnerability in any Microsoft-owned repository that meets [Microsoft's definition of a security vulnerability](https://aka.ms/security.md/definition), please report it to us as described below. ## Reporting Security Issues **Please do not report security vulnerabilities through public GitHub issues.** -Instead, please report them to the Microsoft Security Response Center (MSRC) at [https://msrc.microsoft.com/create-report](https://aka.ms/opensource/security/create-report). +Instead, please report them to the Microsoft Security Response Center (MSRC) at [https://msrc.microsoft.com/create-report](https://aka.ms/security.md/msrc/create-report). -If you prefer to submit without logging in, send email to [secure@microsoft.com](mailto:secure@microsoft.com). If possible, encrypt your message with our PGP key; please download it from the [Microsoft Security Response Center PGP Key page](https://aka.ms/opensource/security/pgpkey). +If you prefer to submit without logging in, send email to [secure@microsoft.com](mailto:secure@microsoft.com). If possible, encrypt your message with our PGP key; please download it from the [Microsoft Security Response Center PGP Key page](https://aka.ms/security.md/msrc/pgp). -You should receive a response within 24 hours. If for some reason you do not, please follow up via email to ensure we received your original message. Additional information can be found at [microsoft.com/msrc](https://aka.ms/opensource/security/msrc). +You should receive a response within 24 hours. If for some reason you do not, please follow up via email to ensure we received your original message. Additional information can be found at [microsoft.com/msrc](https://www.microsoft.com/msrc). Please include the requested information listed below (as much as you can provide) to help us better understand the nature and scope of the possible issue: @@ -28,7 +28,7 @@ Please include the requested information listed below (as much as you can provid This information will help us triage your report more quickly. -If you are reporting for a bug bounty, more complete reports can contribute to a higher bounty award. Please visit our [Microsoft Bug Bounty Program](https://aka.ms/opensource/security/bounty) page for more details about our active programs. +If you are reporting for a bug bounty, more complete reports can contribute to a higher bounty award. Please visit our [Microsoft Bug Bounty Program](https://aka.ms/security.md/msrc/bounty) page for more details about our active programs. ## Preferred Languages @@ -36,6 +36,6 @@ We prefer all communications to be in English. ## Policy -Microsoft follows the principle of [Coordinated Vulnerability Disclosure](https://aka.ms/opensource/security/cvd). +Microsoft follows the principle of [Coordinated Vulnerability Disclosure](https://aka.ms/security.md/cvd). - + \ No newline at end of file diff --git a/SUPPORT.md b/SUPPORT.md new file mode 100644 index 000000000000..eaf439aecca0 --- /dev/null +++ b/SUPPORT.md @@ -0,0 +1,25 @@ +# TODO: The maintainer of this repo has not yet edited this file + +**REPO OWNER**: Do you want Customer Service & Support (CSS) support for this product/project? + +- **No CSS support:** Fill out this template with information about how to file issues and get help. +- **Yes CSS support:** Fill out an intake form at [aka.ms/onboardsupport](https://aka.ms/onboardsupport). CSS will work with/help you to determine next steps. +- **Not sure?** Fill out an intake as though the answer were "Yes". CSS will help you decide. + +*Then remove this first heading from this SUPPORT.MD file before publishing your repo.* + +# Support + +## How to file issues and get help + +This project uses GitHub Issues to track bugs and feature requests. Please search the existing +issues before filing new issues to avoid duplicates. For new issues, file your bug or +feature request as a new Issue. + +For help and questions about using this project, please **REPO MAINTAINER: INSERT INSTRUCTIONS HERE +FOR HOW TO ENGAGE REPO OWNERS OR COMMUNITY FOR HELP. COULD BE A STACK OVERFLOW TAG OR OTHER +CHANNEL. WHERE WILL YOU HELP PEOPLE?**. + +## Microsoft Support Policy + +Support for this **PROJECT or PRODUCT** is limited to the resources listed above. diff --git a/TRANSPARENCY_FAQS.md b/TRANSPARENCY_FAQS.md index addf29d8b8d3..c88ddf622bfc 100644 --- a/TRANSPARENCY_FAQS.md +++ b/TRANSPARENCY_FAQS.md @@ -28,11 +28,10 @@ AutoGen is a generic infrastructure that can be used in multiple scenarios. The While AutoGen automates LLM workflows, decisions about how to use specific LLM outputs should always have a human in the loop. For example, you should not use AutoGen to automatically post LLM generated content to social media. ## How was AutoGen evaluated? What metrics are used to measure performance? -- Current version of AutoGen was evaluated on six applications to illustrate its potential in simplifying the development of high-performance multi-agent applications. These applications are selected based on their real-world relevance, problem difficulty and problem solving capabilities enabled by AutoGen, and innovative potential. -- These applications involve using AutoGen to solve math problems, question answering, decision making in text world environments, supply chain optimization, etc. For each of these domains AutoGen was evaluated on various success based metrics (i.e., how often the AutoGen based implementation solved the task). And, in some cases, AutoGen based approach was also evaluated on implementation efficiency (e.g., to track reductions in developer effort to build). More details can be found at: https://aka.ms/AutoGen/TechReport -- The team has conducted tests where a “red” agent attempts to get the default AutoGen assistant to break from its alignment and guardrails. The team has observed that out of 70 attempts to break guardrails, only 1 was successful in producing text that would have been flagged as problematic by Azure OpenAI filters. The team has not observed any evidence that AutoGen (or GPT models as hosted by OpenAI or Azure) can produce novel code exploits or jailbreak prompts, since direct prompts to “be a hacker”, “write exploits”, or “produce a phishing email” are refused by existing filters. -- We also evaluated [a team of AutoGen agents](https://github.com/microsoft/autogen/tree/gaia_multiagent_v01_march_1st/samples/tools/autogenbench/scenarios/GAIA/Templates/Orchestrator) on the [GAIA benchmarks](https://arxiv.org/abs/2311.12983), and got [SOTA results](https://huggingface.co/spaces/gaia-benchmark/leaderboard) as of - March 1, 2024. +- We performed testing for Responsible AI harm e.g., cross-domain prompt injection and all tests returned the expected results with no signs of jailbreak. +- AutoGen was evaluated on six applications to illustrate its potential in simplifying the development of high-performance multi-agent applications. These applications are selected based on their real-world relevance, problem difficulty and problem-solving capabilities enabled by AutoGen, and innovative potential. These applications involve using AutoGen to solve math problems, question answering, decision making in text world environments, supply chain optimization, etc. For each of these domains AutoGen was evaluated on various success-based metrics (i.e., how often the AutoGen based implementation solved the task). And, in some cases, AutoGen based approach was also evaluated on implementation efficiency (e.g., to track reductions in developer effort to build). More details can be found at: https://aka.ms/autogen-pdf. +- We evaluated [a team of AutoGen agents](https://github.com/microsoft/autogen/tree/gaia_multiagent_v01_march_1st/samples/tools/autogenbench/scenarios/GAIA/Templates/Orchestrator) on the [GAIA benchmark](https://arxiv.org/abs/2311.12983), and got [SOTA results](https://huggingface.co/spaces/gaia-benchmark/leaderboard) as of March 1, 2024. + ## What are the limitations of AutoGen? How can users minimize the impact of AutoGen’s limitations when using the system? AutoGen relies on existing LLMs. Experimenting with AutoGen would retain common limitations of large language models; including: @@ -53,7 +52,8 @@ Additionally, AutoGen’s multi-agent framework may amplify or introduce additio ## What operational factors and settings allow for effective and responsible use of AutoGen? - Code execution: AutoGen recommends using docker containers so that code execution can happen in a safer manner. Users can use function call instead of free-form code to execute pre-defined functions only. That helps increase the reliability and safety. Users can customize the code execution environment to tailor to their requirements. -- Human involvement: AutoGen prioritizes human involvement in multi agent conversation. The overseers can step in to give feedback to agents and steer them in the correct direction. By default, users get chance to confirm before code is executed. +- Human involvement: AutoGen prioritizes human involvement in multi agent conversation. The overseers can step in to give feedback to agents and steer them in the correct direction. In all examples, users confirm code before it is executed. - Agent modularity: Modularity allows agents to have different levels of information access. Additional agents can assume roles that help keep other agents in check. For example, one can easily add a dedicated agent to play the role of safeguard. -- LLMs: Users can choose the LLM that is optimized for responsible use. The default LLM is GPT-4 which inherits the existing RAI mechanisms and filters from the LLM provider. Caching is enabled by default to increase reliability and control cost. We encourage developers to review [OpenAI’s Usage policies](https://openai.com/policies/usage-policies) and [Azure OpenAI’s Code of Conduct](https://learn.microsoft.com/en-us/legal/cognitive-services/openai/code-of-conduct) when using GPT-4. -- Multi-agent setup: When using auto replies, the users can limit the number of auto replies, termination conditions etc. in the settings to increase reliability. +- LLMs: Users can choose the LLM that is optimized for responsible use. The default LLM in all examples is GPT-4o which inherits the existing RAI mechanisms and filters from the LLM provider. We encourage developers to review [OpenAI’s Usage policies](https://openai.com/policies/usage-policies) and [Azure OpenAI’s Code of Conduct](https://learn.microsoft.com/en-us/legal/cognitive-services/openai/code-of-conduct) when using GPT-4o. We encourage developers experimenting with agents to add content moderation and/or use safety metaprompts when using agents, like they would do when using LLMs. + + diff --git a/autogen/__init__.py b/autogen/__init__.py deleted file mode 100644 index 02f956c4bcf6..000000000000 --- a/autogen/__init__.py +++ /dev/null @@ -1,11 +0,0 @@ -import logging - -from .agentchat import * -from .code_utils import DEFAULT_MODEL, FAST_MODEL -from .exception_utils import * -from .oai import * -from .version import __version__ - -# Set the root logger. -logger = logging.getLogger(__name__) -logger.setLevel(logging.INFO) diff --git a/autogen/_pydantic.py b/autogen/_pydantic.py deleted file mode 100644 index c463dbb38754..000000000000 --- a/autogen/_pydantic.py +++ /dev/null @@ -1,110 +0,0 @@ -from typing import Any, Dict, Optional, Tuple, Type, Union, get_args - -from pydantic import BaseModel -from pydantic.version import VERSION as PYDANTIC_VERSION -from typing_extensions import get_origin - -__all__ = ("JsonSchemaValue", "model_dump", "model_dump_json", "type2schema", "evaluate_forwardref") - -PYDANTIC_V1 = PYDANTIC_VERSION.startswith("1.") - -if not PYDANTIC_V1: - from pydantic import TypeAdapter - from pydantic._internal._typing_extra import eval_type_lenient as evaluate_forwardref - from pydantic.json_schema import JsonSchemaValue - - def type2schema(t: Any) -> JsonSchemaValue: - """Convert a type to a JSON schema - - Args: - t (Type): The type to convert - - Returns: - JsonSchemaValue: The JSON schema - """ - return TypeAdapter(t).json_schema() - - def model_dump(model: BaseModel) -> Dict[str, Any]: - """Convert a pydantic model to a dict - - Args: - model (BaseModel): The model to convert - - Returns: - Dict[str, Any]: The dict representation of the model - - """ - return model.model_dump() - - def model_dump_json(model: BaseModel) -> str: - """Convert a pydantic model to a JSON string - - Args: - model (BaseModel): The model to convert - - Returns: - str: The JSON string representation of the model - """ - return model.model_dump_json() - - -# Remove this once we drop support for pydantic 1.x -else: # pragma: no cover - from pydantic import schema_of - from pydantic.typing import evaluate_forwardref as evaluate_forwardref # type: ignore[no-redef] - - JsonSchemaValue = Dict[str, Any] # type: ignore[misc] - - def type2schema(t: Any) -> JsonSchemaValue: - """Convert a type to a JSON schema - - Args: - t (Type): The type to convert - - Returns: - JsonSchemaValue: The JSON schema - """ - - if t is None: - return {"type": "null"} - elif get_origin(t) is Union: - return {"anyOf": [type2schema(tt) for tt in get_args(t)]} - elif get_origin(t) in [Tuple, tuple]: - prefixItems = [type2schema(tt) for tt in get_args(t)] - return { - "maxItems": len(prefixItems), - "minItems": len(prefixItems), - "prefixItems": prefixItems, - "type": "array", - } - else: - d = schema_of(t) - if "title" in d: - d.pop("title") - if "description" in d: - d.pop("description") - - return d - - def model_dump(model: BaseModel) -> Dict[str, Any]: - """Convert a pydantic model to a dict - - Args: - model (BaseModel): The model to convert - - Returns: - Dict[str, Any]: The dict representation of the model - - """ - return model.dict() - - def model_dump_json(model: BaseModel) -> str: - """Convert a pydantic model to a JSON string - - Args: - model (BaseModel): The model to convert - - Returns: - str: The JSON string representation of the model - """ - return model.json() diff --git a/autogen/agentchat/__init__.py b/autogen/agentchat/__init__.py deleted file mode 100644 index d31a59d98fbf..000000000000 --- a/autogen/agentchat/__init__.py +++ /dev/null @@ -1,20 +0,0 @@ -from .agent import Agent -from .assistant_agent import AssistantAgent -from .chat import ChatResult, initiate_chats -from .conversable_agent import ConversableAgent, register_function -from .groupchat import GroupChat, GroupChatManager -from .user_proxy_agent import UserProxyAgent -from .utils import gather_usage_summary - -__all__ = ( - "Agent", - "ConversableAgent", - "AssistantAgent", - "UserProxyAgent", - "GroupChat", - "GroupChatManager", - "register_function", - "initiate_chats", - "gather_usage_summary", - "ChatResult", -) diff --git a/autogen/agentchat/agent.py b/autogen/agentchat/agent.py deleted file mode 100644 index 410635bce6ea..000000000000 --- a/autogen/agentchat/agent.py +++ /dev/null @@ -1,136 +0,0 @@ -from typing import Any, Dict, List, Optional, Protocol, Union, runtime_checkable - - -@runtime_checkable -class Agent(Protocol): - """(In preview) A protocol for Agent. - - An agent can communicate with other agents and perform actions. - Different agents can differ in what actions they perform in the `receive` method. - """ - - @property - def name(self) -> str: - """The name of the agent.""" - ... - - @property - def description(self) -> str: - """The description of the agent. Used for the agent's introduction in - a group chat setting.""" - ... - - def send( - self, - message: Union[Dict[str, Any], str], - recipient: "Agent", - request_reply: Optional[bool] = None, - ) -> None: - """Send a message to another agent. - - Args: - message (dict or str): the message to send. If a dict, it should be - a JSON-serializable and follows the OpenAI's ChatCompletion schema. - recipient (Agent): the recipient of the message. - request_reply (bool): whether to request a reply from the recipient. - """ - ... - - async def a_send( - self, - message: Union[Dict[str, Any], str], - recipient: "Agent", - request_reply: Optional[bool] = None, - ) -> None: - """(Async) Send a message to another agent. - - Args: - message (dict or str): the message to send. If a dict, it should be - a JSON-serializable and follows the OpenAI's ChatCompletion schema. - recipient (Agent): the recipient of the message. - request_reply (bool): whether to request a reply from the recipient. - """ - ... - - def receive( - self, - message: Union[Dict[str, Any], str], - sender: "Agent", - request_reply: Optional[bool] = None, - ) -> None: - """Receive a message from another agent. - - Args: - message (dict or str): the message received. If a dict, it should be - a JSON-serializable and follows the OpenAI's ChatCompletion schema. - sender (Agent): the sender of the message. - request_reply (bool): whether the sender requests a reply. - """ - - async def a_receive( - self, - message: Union[Dict[str, Any], str], - sender: "Agent", - request_reply: Optional[bool] = None, - ) -> None: - """(Async) Receive a message from another agent. - - Args: - message (dict or str): the message received. If a dict, it should be - a JSON-serializable and follows the OpenAI's ChatCompletion schema. - sender (Agent): the sender of the message. - request_reply (bool): whether the sender requests a reply. - """ - ... - - def generate_reply( - self, - messages: Optional[List[Dict[str, Any]]] = None, - sender: Optional["Agent"] = None, - **kwargs: Any, - ) -> Union[str, Dict[str, Any], None]: - """Generate a reply based on the received messages. - - Args: - messages (list[dict]): a list of messages received from other agents. - The messages are dictionaries that are JSON-serializable and - follows the OpenAI's ChatCompletion schema. - sender: sender of an Agent instance. - - Returns: - str or dict or None: the generated reply. If None, no reply is generated. - """ - - async def a_generate_reply( - self, - messages: Optional[List[Dict[str, Any]]] = None, - sender: Optional["Agent"] = None, - **kwargs: Any, - ) -> Union[str, Dict[str, Any], None]: - """(Async) Generate a reply based on the received messages. - - Args: - messages (list[dict]): a list of messages received from other agents. - The messages are dictionaries that are JSON-serializable and - follows the OpenAI's ChatCompletion schema. - sender: sender of an Agent instance. - - Returns: - str or dict or None: the generated reply. If None, no reply is generated. - """ - - -@runtime_checkable -class LLMAgent(Agent, Protocol): - """(In preview) A protocol for an LLM agent.""" - - @property - def system_message(self) -> str: - """The system message of this agent.""" - - def update_system_message(self, system_message: str) -> None: - """Update this agent's system message. - - Args: - system_message (str): system message for inference. - """ diff --git a/autogen/agentchat/assistant_agent.py b/autogen/agentchat/assistant_agent.py deleted file mode 100644 index c1601ea9ba81..000000000000 --- a/autogen/agentchat/assistant_agent.py +++ /dev/null @@ -1,79 +0,0 @@ -from typing import Callable, Dict, Literal, Optional, Union - -from autogen.runtime_logging import log_new_agent, logging_enabled - -from .conversable_agent import ConversableAgent - - -class AssistantAgent(ConversableAgent): - """(In preview) Assistant agent, designed to solve a task with LLM. - - AssistantAgent is a subclass of ConversableAgent configured with a default system message. - The default system message is designed to solve a task with LLM, - including suggesting python code blocks and debugging. - `human_input_mode` is default to "NEVER" - and `code_execution_config` is default to False. - This agent doesn't execute code by default, and expects the user to execute the code. - """ - - DEFAULT_SYSTEM_MESSAGE = """You are a helpful AI assistant. -Solve tasks using your coding and language skills. -In the following cases, suggest python code (in a python coding block) or shell script (in a sh coding block) for the user to execute. - 1. When you need to collect info, use the code to output the info you need, for example, browse or search the web, download/read a file, print the content of a webpage or a file, get the current date/time, check the operating system. After sufficient info is printed and the task is ready to be solved based on your language skill, you can solve the task by yourself. - 2. When you need to perform some task with code, use the code to perform the task and output the result. Finish the task smartly. -Solve the task step by step if you need to. If a plan is not provided, explain your plan first. Be clear which step uses code, and which step uses your language skill. -When using code, you must indicate the script type in the code block. The user cannot provide any other feedback or perform any other action beyond executing the code you suggest. The user can't modify your code. So do not suggest incomplete code which requires users to modify. Don't use a code block if it's not intended to be executed by the user. -If you want the user to save the code in a file before executing it, put # filename: inside the code block as the first line. Don't include multiple code blocks in one response. Do not ask users to copy and paste the result. Instead, use 'print' function for the output when relevant. Check the execution result returned by the user. -If the result indicates there is an error, fix the error and output the code again. Suggest the full code instead of partial code or code changes. If the error can't be fixed or if the task is not solved even after the code is executed successfully, analyze the problem, revisit your assumption, collect additional info you need, and think of a different approach to try. -When you find an answer, verify the answer carefully. Include verifiable evidence in your response if possible. -Reply "TERMINATE" in the end when everything is done. - """ - - DEFAULT_DESCRIPTION = "A helpful and general-purpose AI assistant that has strong language skills, Python skills, and Linux command line skills." - - def __init__( - self, - name: str, - system_message: Optional[str] = DEFAULT_SYSTEM_MESSAGE, - llm_config: Optional[Union[Dict, Literal[False]]] = None, - is_termination_msg: Optional[Callable[[Dict], bool]] = None, - max_consecutive_auto_reply: Optional[int] = None, - human_input_mode: Literal["ALWAYS", "NEVER", "TERMINATE"] = "NEVER", - description: Optional[str] = None, - **kwargs, - ): - """ - Args: - name (str): agent name. - system_message (str): system message for the ChatCompletion inference. - Please override this attribute if you want to reprogram the agent. - llm_config (dict or False or None): llm inference configuration. - Please refer to [OpenAIWrapper.create](/docs/reference/oai/client#create) - for available options. - is_termination_msg (function): a function that takes a message in the form of a dictionary - and returns a boolean value indicating if this received message is a termination message. - The dict can contain the following keys: "content", "role", "name", "function_call". - max_consecutive_auto_reply (int): the maximum number of consecutive auto replies. - default to None (no limit provided, class attribute MAX_CONSECUTIVE_AUTO_REPLY will be used as the limit in this case). - The limit only plays a role when human_input_mode is not "ALWAYS". - **kwargs (dict): Please refer to other kwargs in - [ConversableAgent](conversable_agent#__init__). - """ - super().__init__( - name, - system_message, - is_termination_msg, - max_consecutive_auto_reply, - human_input_mode, - llm_config=llm_config, - description=description, - **kwargs, - ) - if logging_enabled(): - log_new_agent(self, locals()) - - # Update the provided description if None, and we are using the default system_message, - # then use the default description. - if description is None: - if system_message == self.DEFAULT_SYSTEM_MESSAGE: - self.description = self.DEFAULT_DESCRIPTION diff --git a/autogen/agentchat/chat.py b/autogen/agentchat/chat.py deleted file mode 100644 index d07b4d15cb62..000000000000 --- a/autogen/agentchat/chat.py +++ /dev/null @@ -1,300 +0,0 @@ -import asyncio -import datetime -import logging -import warnings -from collections import abc, defaultdict -from dataclasses import dataclass -from functools import partial -from typing import Any, Dict, List, Set, Tuple - -from ..formatting_utils import colored -from ..io.base import IOStream -from .utils import consolidate_chat_info - -logger = logging.getLogger(__name__) -Prerequisite = Tuple[int, int] - - -@dataclass -class ChatResult: - """(Experimental) The result of a chat. Almost certain to be changed.""" - - chat_id: int = None - """chat id""" - chat_history: List[Dict[str, Any]] = None - """The chat history.""" - summary: str = None - """A summary obtained from the chat.""" - cost: Dict[str, dict] = None # keys: "usage_including_cached_inference", "usage_excluding_cached_inference" - """The cost of the chat. - The value for each usage type is a dictionary containing cost information for that specific type. - - "usage_including_cached_inference": Cost information on the total usage, including the tokens in cached inference. - - "usage_excluding_cached_inference": Cost information on the usage of tokens, excluding the tokens in cache. No larger than "usage_including_cached_inference". - """ - human_input: List[str] = None - """A list of human input solicited during the chat.""" - - -def _validate_recipients(chat_queue: List[Dict[str, Any]]) -> None: - """ - Validate recipients exits and warn repetitive recipients. - """ - receipts_set = set() - for chat_info in chat_queue: - assert "recipient" in chat_info, "recipient must be provided." - receipts_set.add(chat_info["recipient"]) - if len(receipts_set) < len(chat_queue): - warnings.warn( - "Repetitive recipients detected: The chat history will be cleared by default if a recipient appears more than once. To retain the chat history, please set 'clear_history=False' in the configuration of the repeating agent.", - UserWarning, - ) - - -def __create_async_prerequisites(chat_queue: List[Dict[str, Any]]) -> List[Prerequisite]: - """ - Create list of Prerequisite (prerequisite_chat_id, chat_id) - """ - prerequisites = [] - for chat_info in chat_queue: - if "chat_id" not in chat_info: - raise ValueError("Each chat must have a unique id for async multi-chat execution.") - chat_id = chat_info["chat_id"] - pre_chats = chat_info.get("prerequisites", []) - for pre_chat_id in pre_chats: - if not isinstance(pre_chat_id, int): - raise ValueError("Prerequisite chat id is not int.") - prerequisites.append((chat_id, pre_chat_id)) - return prerequisites - - -def __find_async_chat_order(chat_ids: Set[int], prerequisites: List[Prerequisite]) -> List[int]: - """Find chat order for async execution based on the prerequisite chats - - args: - num_chats: number of chats - prerequisites: List of Prerequisite (prerequisite_chat_id, chat_id) - - returns: - list: a list of chat_id in order. - """ - edges = defaultdict(set) - indegree = defaultdict(int) - for pair in prerequisites: - chat, pre = pair[0], pair[1] - if chat not in edges[pre]: - indegree[chat] += 1 - edges[pre].add(chat) - bfs = [i for i in chat_ids if i not in indegree] - chat_order = [] - steps = len(indegree) - for _ in range(steps + 1): - if not bfs: - break - chat_order.extend(bfs) - nxt = [] - for node in bfs: - if node in edges: - for course in edges[node]: - indegree[course] -= 1 - if indegree[course] == 0: - nxt.append(course) - indegree.pop(course) - edges.pop(node) - bfs = nxt - - if indegree: - return [] - return chat_order - - -def _post_process_carryover_item(carryover_item): - if isinstance(carryover_item, str): - return carryover_item - elif isinstance(carryover_item, dict) and "content" in carryover_item: - return str(carryover_item["content"]) - else: - return str(carryover_item) - - -def __post_carryover_processing(chat_info: Dict[str, Any]) -> None: - iostream = IOStream.get_default() - - if "message" not in chat_info: - warnings.warn( - "message is not provided in a chat_queue entry. input() will be called to get the initial message.", - UserWarning, - ) - print_carryover = ( - ("\n").join([_post_process_carryover_item(t) for t in chat_info["carryover"]]) - if isinstance(chat_info["carryover"], list) - else chat_info["carryover"] - ) - message = chat_info.get("message") - if isinstance(message, str): - print_message = message - elif callable(message): - print_message = "Callable: " + message.__name__ - elif isinstance(message, dict): - print_message = "Dict: " + str(message) - elif message is None: - print_message = "None" - iostream.print(colored("\n" + "*" * 80, "blue"), flush=True, sep="") - iostream.print( - colored( - "Starting a new chat....", - "blue", - ), - flush=True, - ) - if chat_info.get("verbose", False): - iostream.print(colored("Message:\n" + print_message, "blue"), flush=True) - iostream.print(colored("Carryover:\n" + print_carryover, "blue"), flush=True) - iostream.print(colored("\n" + "*" * 80, "blue"), flush=True, sep="") - - -def initiate_chats(chat_queue: List[Dict[str, Any]]) -> List[ChatResult]: - """Initiate a list of chats. - Args: - chat_queue (List[Dict]): A list of dictionaries containing the information about the chats. - - Each dictionary should contain the input arguments for - [`ConversableAgent.initiate_chat`](/docs/reference/agentchat/conversable_agent#initiate_chat). - For example: - - `"sender"` - the sender agent. - - `"recipient"` - the recipient agent. - - `"clear_history"` (bool) - whether to clear the chat history with the agent. - Default is True. - - `"silent"` (bool or None) - (Experimental) whether to print the messages in this - conversation. Default is False. - - `"cache"` (Cache or None) - the cache client to use for this conversation. - Default is None. - - `"max_turns"` (int or None) - maximum number of turns for the chat. If None, the chat - will continue until a termination condition is met. Default is None. - - `"summary_method"` (str or callable) - a string or callable specifying the method to get - a summary from the chat. Default is DEFAULT_summary_method, i.e., "last_msg". - - `"summary_args"` (dict) - a dictionary of arguments to be passed to the summary_method. - Default is {}. - - `"message"` (str, callable or None) - if None, input() will be called to get the - initial message. - - `**context` - additional context information to be passed to the chat. - - `"carryover"` - It can be used to specify the carryover information to be passed - to this chat. If provided, we will combine this carryover with the "message" content when - generating the initial chat message in `generate_init_message`. - - `"finished_chat_indexes_to_exclude_from_carryover"` - It can be used by specifying a list of indexes of the finished_chats list, - from which to exclude the summaries for carryover. If 'finished_chat_indexes_to_exclude_from_carryover' is not provided or an empty list, - then summary from all the finished chats will be taken. - Returns: - (list): a list of ChatResult objects corresponding to the finished chats in the chat_queue. - """ - - consolidate_chat_info(chat_queue) - _validate_recipients(chat_queue) - current_chat_queue = chat_queue.copy() - finished_chats = [] - while current_chat_queue: - chat_info = current_chat_queue.pop(0) - _chat_carryover = chat_info.get("carryover", []) - finished_chat_indexes_to_exclude_from_carryover = chat_info.get( - "finished_chat_indexes_to_exclude_from_carryover", [] - ) - - if isinstance(_chat_carryover, str): - _chat_carryover = [_chat_carryover] - chat_info["carryover"] = _chat_carryover + [ - r.summary for i, r in enumerate(finished_chats) if i not in finished_chat_indexes_to_exclude_from_carryover - ] - - if not chat_info.get("silent", False): - __post_carryover_processing(chat_info) - - sender = chat_info["sender"] - chat_res = sender.initiate_chat(**chat_info) - finished_chats.append(chat_res) - return finished_chats - - -def __system_now_str(): - ct = datetime.datetime.now() - return f" System time at {ct}. " - - -def _on_chat_future_done(chat_future: asyncio.Future, chat_id: int): - """ - Update ChatResult when async Task for Chat is completed. - """ - logger.debug(f"Update chat {chat_id} result on task completion." + __system_now_str()) - chat_result = chat_future.result() - chat_result.chat_id = chat_id - - -async def _dependent_chat_future( - chat_id: int, chat_info: Dict[str, Any], prerequisite_chat_futures: Dict[int, asyncio.Future] -) -> asyncio.Task: - """ - Create an async Task for each chat. - """ - logger.debug(f"Create Task for chat {chat_id}." + __system_now_str()) - _chat_carryover = chat_info.get("carryover", []) - finished_chat_indexes_to_exclude_from_carryover = chat_info.get( - "finished_chat_indexes_to_exclude_from_carryover", [] - ) - finished_chats = dict() - for chat in prerequisite_chat_futures: - chat_future = prerequisite_chat_futures[chat] - if chat_future.cancelled(): - raise RuntimeError(f"Chat {chat} is cancelled.") - - # wait for prerequisite chat results for the new chat carryover - finished_chats[chat] = await chat_future - - if isinstance(_chat_carryover, str): - _chat_carryover = [_chat_carryover] - data = [ - chat_result.summary - for chat_id, chat_result in finished_chats.items() - if chat_id not in finished_chat_indexes_to_exclude_from_carryover - ] - chat_info["carryover"] = _chat_carryover + data - if not chat_info.get("silent", False): - __post_carryover_processing(chat_info) - - sender = chat_info["sender"] - chat_res_future = asyncio.create_task(sender.a_initiate_chat(**chat_info)) - call_back_with_args = partial(_on_chat_future_done, chat_id=chat_id) - chat_res_future.add_done_callback(call_back_with_args) - logger.debug(f"Task for chat {chat_id} created." + __system_now_str()) - return chat_res_future - - -async def a_initiate_chats(chat_queue: List[Dict[str, Any]]) -> Dict[int, ChatResult]: - """(async) Initiate a list of chats. - - args: - - Please refer to `initiate_chats`. - - - returns: - - (Dict): a dict of ChatId: ChatResult corresponding to the finished chats in the chat_queue. - """ - consolidate_chat_info(chat_queue) - _validate_recipients(chat_queue) - chat_book = {chat_info["chat_id"]: chat_info for chat_info in chat_queue} - num_chats = chat_book.keys() - prerequisites = __create_async_prerequisites(chat_queue) - chat_order_by_id = __find_async_chat_order(num_chats, prerequisites) - finished_chat_futures = dict() - for chat_id in chat_order_by_id: - chat_info = chat_book[chat_id] - prerequisite_chat_ids = chat_info.get("prerequisites", []) - pre_chat_futures = dict() - for pre_chat_id in prerequisite_chat_ids: - pre_chat_future = finished_chat_futures[pre_chat_id] - pre_chat_futures[pre_chat_id] = pre_chat_future - current_chat_future = await _dependent_chat_future(chat_id, chat_info, pre_chat_futures) - finished_chat_futures[chat_id] = current_chat_future - await asyncio.gather(*list(finished_chat_futures.values())) - finished_chats = dict() - for chat in finished_chat_futures: - chat_result = finished_chat_futures[chat].result() - finished_chats[chat] = chat_result - return finished_chats diff --git a/autogen/agentchat/contrib/agent_builder.py b/autogen/agentchat/contrib/agent_builder.py deleted file mode 100644 index 430017d13fc9..000000000000 --- a/autogen/agentchat/contrib/agent_builder.py +++ /dev/null @@ -1,779 +0,0 @@ -import hashlib -import importlib -import json -import logging -import re -import socket -import subprocess as sp -import time -from typing import Dict, List, Optional, Tuple, Union - -import requests -from termcolor import colored - -import autogen - -logger = logging.getLogger(__name__) - - -def _config_check(config: Dict): - # check config loading - assert config.get("coding", None) is not None, 'Missing "coding" in your config.' - assert config.get("default_llm_config", None) is not None, 'Missing "default_llm_config" in your config.' - assert config.get("code_execution_config", None) is not None, 'Missing "code_execution_config" in your config.' - - for agent_config in config["agent_configs"]: - assert agent_config.get("name", None) is not None, 'Missing agent "name" in your agent_configs.' - assert ( - agent_config.get("system_message", None) is not None - ), 'Missing agent "system_message" in your agent_configs.' - assert agent_config.get("description", None) is not None, 'Missing agent "description" in your agent_configs.' - - -def _retrieve_json(text): - match = re.findall(autogen.code_utils.CODE_BLOCK_PATTERN, text, flags=re.DOTALL) - if not match: - return text - code_blocks = [] - for _, code in match: - code_blocks.append(code) - return code_blocks[0] - - -class AgentBuilder: - """ - AgentBuilder can help user build an automatic task solving process powered by multi-agent system. - Specifically, our building pipeline includes initialize and build. - """ - - online_server_name = "online" - - DEFAULT_PROXY_AUTO_REPLY = 'There is no code from the last 1 message for me to execute. Group chat manager should let other participants to continue the conversation. If the group chat manager want to end the conversation, you should let other participant reply me only with "TERMINATE"' - - GROUP_CHAT_DESCRIPTION = """ # Group chat instruction -You are now working in a group chat with different expert and a group chat manager. -You should refer to the previous message from other participant members or yourself, follow their topic and reply to them. - -**Your role is**: {name} -Group chat members: {members}{user_proxy_desc} - -When the task is complete and the result has been carefully verified, after obtaining agreement from the other members, you can end the conversation by replying only with "TERMINATE". - -# Your profile -{sys_msg} -""" - - DEFAULT_DESCRIPTION = """## Your role -[Complete this part with expert's name and skill description] - -## Task and skill instructions -- [Complete this part with task description] -- [Complete this part with skill description] -- [(Optional) Complete this part with other information] -""" - - CODING_AND_TASK_SKILL_INSTRUCTION = """## Useful instructions for task-solving -- Solve the task step by step if you need to. -- When you find an answer, verify the answer carefully. Include verifiable evidence with possible test case in your response if possible. -- All your reply should be based on the provided facts. - -## How to verify? -**You have to keep believing that everyone else's answers are wrong until they provide clear enough evidence.** -- Verifying with step-by-step backward reasoning. -- Write test cases according to the general task. - -## How to use code? -- Suggest python code (in a python coding block) or shell script (in a sh coding block) for the Computer_terminal to execute. -- If missing python packages, you can install the package by suggesting a `pip install` code in the ```sh ... ``` block. -- When using code, you must indicate the script type in the coding block. -- Do not the coding block which requires users to modify. -- Do not suggest a coding block if it's not intended to be executed by the Computer_terminal. -- The Computer_terminal cannot modify your code. -- **Use 'print' function for the output when relevant**. -- Check the execution result returned by the Computer_terminal. -- Do not ask Computer_terminal to copy and paste the result. -- If the result indicates there is an error, fix the error and output the code again. """ - - CODING_PROMPT = """Does the following task need programming (i.e., access external API or tool by coding) to solve, -or coding may help the following task become easier? - -TASK: {task} - -Answer only YES or NO. -""" - - AGENT_NAME_PROMPT = """# Your task -Suggest no more than {max_agents} experts with their name according to the following user requirement. - -## User requirement -{task} - -# Task requirement -- Expert's name should follow the format: [skill]_Expert. -- Only reply the names of the experts, separated by ",". -For example: Python_Expert, Math_Expert, ... """ - - AGENT_SYS_MSG_PROMPT = """# Your goal -- According to the task and expert name, write a high-quality description for the expert by filling the given template. -- Ensure that your description are clear and unambiguous, and include all necessary information. - -# Task -{task} - -# Expert name -{position} - -# Template -{default_sys_msg} -""" - - AGENT_DESCRIPTION_PROMPT = """# Your goal -Summarize the following expert's description in a sentence. - -# Expert name -{position} - -# Expert's description -{sys_msg} -""" - - AGENT_SEARCHING_PROMPT = """# Your goal -Considering the following task, what experts should be involved to the task? - -# TASK -{task} - -# EXPERT LIST -{agent_list} - -# Requirement -- You should consider if the experts' name and profile match the task. -- Considering the effort, you should select less then {max_agents} experts; less is better. -- Separate expert names by commas and use "_" instead of space. For example, Product_manager,Programmer -- Only return the list of expert names. -""" - - AGENT_SELECTION_PROMPT = """# Your goal -Match roles in the role set to each expert in expert set. - -# Skill set -{skills} - -# Expert pool (formatting with name: description) -{expert_pool} - -# Answer format -```json -{{ - "skill_1 description": "expert_name: expert_description", // if there exists an expert that suitable for skill_1 - "skill_2 description": "None", // if there is no experts that suitable for skill_2 - ... -}} -``` -""" - - def __init__( - self, - config_file_or_env: Optional[str] = "OAI_CONFIG_LIST", - config_file_location: Optional[str] = "", - builder_model: Optional[Union[str, list]] = [], - agent_model: Optional[Union[str, list]] = [], - builder_model_tags: Optional[list] = [], - agent_model_tags: Optional[list] = [], - max_agents: Optional[int] = 5, - ): - """ - (These APIs are experimental and may change in the future.) - Args: - config_file_or_env: path or environment of the OpenAI api configs. - builder_model: specify a model as the backbone of build manager. - agent_model: specify a model as the backbone of participant agents. - endpoint_building_timeout: timeout for building up an endpoint server. - max_agents: max agents for each task. - """ - builder_model = builder_model if isinstance(builder_model, list) else [builder_model] - builder_filter_dict = {} - if len(builder_model) != 0: - builder_filter_dict.update({"model": builder_model}) - if len(builder_model_tags) != 0: - builder_filter_dict.update({"tags": builder_model_tags}) - builder_config_list = autogen.config_list_from_json(config_file_or_env, filter_dict=builder_filter_dict) - if len(builder_config_list) == 0: - raise RuntimeError( - f"Fail to initialize build manager: {builder_model}{builder_model_tags} does not exist in {config_file_or_env}. " - f'If you want to change this model, please specify the "builder_model" in the constructor.' - ) - self.builder_model = autogen.OpenAIWrapper(config_list=builder_config_list) - - self.agent_model = agent_model if isinstance(agent_model, list) else [agent_model] - self.agent_model_tags = agent_model_tags - self.config_file_or_env = config_file_or_env - self.config_file_location = config_file_location - - self.building_task: str = None - self.agent_configs: List[Dict] = [] - self.open_ports: List[str] = [] - self.agent_procs: Dict[str, Tuple[sp.Popen, str]] = {} - self.agent_procs_assign: Dict[str, Tuple[autogen.ConversableAgent, str]] = {} - self.cached_configs: Dict = {} - - self.max_agents = max_agents - - def set_builder_model(self, model: str): - self.builder_model = model - - def set_agent_model(self, model: str): - self.agent_model = model - - def _create_agent( - self, - agent_config: Dict, - member_name: List[str], - llm_config: dict, - use_oai_assistant: Optional[bool] = False, - ) -> autogen.AssistantAgent: - """ - Create a group chat participant agent. - - If the agent rely on an open-source model, this function will automatically set up an endpoint for that agent. - The API address of that endpoint will be "localhost:{free port}". - - Args: - agent_config: agent's config. It should include the following information: - 1. model_name: backbone model of an agent, e.g., gpt-4-1106-preview, meta/Llama-2-70b-chat - 2. agent_name: use to identify an agent in the group chat. - 3. system_message: including persona, task solving instruction, etc. - 4. description: brief description of an agent that help group chat manager to pick the speaker. - llm_config: specific configs for LLM (e.g., config_list, seed, temperature, ...). - use_oai_assistant: use OpenAI assistant api instead of self-constructed agent. - world_size: the max size of parallel tensors (in most of the cases, this is identical to the amount of GPUs). - - Returns: - agent: a set-up agent. - """ - model_name_or_hf_repo = agent_config.get("model", []) - model_name_or_hf_repo = ( - model_name_or_hf_repo if isinstance(model_name_or_hf_repo, list) else [model_name_or_hf_repo] - ) - model_tags = agent_config.get("tags", []) - agent_name = agent_config["name"] - system_message = agent_config["system_message"] - description = agent_config["description"] - - # Path to the customize **ConversableAgent** class. - model_path = agent_config.get("model_path", None) - filter_dict = {} - if len(model_name_or_hf_repo) > 0: - filter_dict.update({"model": model_name_or_hf_repo}) - if len(model_tags) > 0: - filter_dict.update({"tags": model_tags}) - config_list = autogen.config_list_from_json( - self.config_file_or_env, file_location=self.config_file_location, filter_dict=filter_dict - ) - if len(config_list) == 0: - raise RuntimeError( - f"Fail to initialize agent {agent_name}: {model_name_or_hf_repo}{model_tags} does not exist in {self.config_file_or_env}.\n" - f'If you would like to change this model, please specify the "agent_model" in the constructor.\n' - f"If you load configs from json, make sure the model in agent_configs is in the {self.config_file_or_env}." - ) - server_id = self.online_server_name - current_config = llm_config.copy() - current_config.update({"config_list": config_list}) - if use_oai_assistant: - from autogen.agentchat.contrib.gpt_assistant_agent import GPTAssistantAgent - - agent = GPTAssistantAgent( - name=agent_name, - llm_config={**current_config, "assistant_id": None}, - instructions=system_message, - overwrite_instructions=False, - ) - else: - user_proxy_desc = "" - if self.cached_configs["coding"] is True: - user_proxy_desc = ( - "\nThe group also include a Computer_terminal to help you run the python and shell code." - ) - - model_class = autogen.AssistantAgent - if model_path: - module_path, model_class_name = model_path.replace("/", ".").rsplit(".", 1) - module = importlib.import_module(module_path) - model_class = getattr(module, model_class_name) - if not issubclass(model_class, autogen.ConversableAgent): - logger.error(f"{model_class} is not a ConversableAgent. Use AssistantAgent as default") - model_class = autogen.AssistantAgent - - additional_config = { - k: v - for k, v in agent_config.items() - if k not in ["model", "name", "system_message", "description", "model_path", "tags"] - } - agent = model_class( - name=agent_name, llm_config=current_config.copy(), description=description, **additional_config - ) - if system_message == "": - system_message = agent.system_message - else: - system_message = f"{system_message}\n\n{self.CODING_AND_TASK_SKILL_INSTRUCTION}" - - enhanced_sys_msg = self.GROUP_CHAT_DESCRIPTION.format( - name=agent_name, members=member_name, user_proxy_desc=user_proxy_desc, sys_msg=system_message - ) - agent.update_system_message(enhanced_sys_msg) - self.agent_procs_assign[agent_name] = (agent, server_id) - return agent - - def clear_agent(self, agent_name: str, recycle_endpoint: Optional[bool] = True): - """ - Clear a specific agent by name. - - Args: - agent_name: the name of agent. - recycle_endpoint: trigger for recycle the endpoint server. If true, the endpoint will be recycled - when there is no agent depending on. - """ - _, server_id = self.agent_procs_assign[agent_name] - del self.agent_procs_assign[agent_name] - if recycle_endpoint: - if server_id == self.online_server_name: - return - else: - for _, iter_sid in self.agent_procs_assign.values(): - if server_id == iter_sid: - return - self.agent_procs[server_id][0].terminate() - self.open_ports.append(server_id.split("_")[-1]) - print(colored(f"Agent {agent_name} has been cleared.", "yellow"), flush=True) - - def clear_all_agents(self, recycle_endpoint: Optional[bool] = True): - """ - Clear all cached agents. - """ - for agent_name in [agent_name for agent_name in self.agent_procs_assign.keys()]: - self.clear_agent(agent_name, recycle_endpoint) - print(colored("All agents have been cleared.", "yellow"), flush=True) - - def build( - self, - building_task: str, - default_llm_config: Dict, - coding: Optional[bool] = None, - code_execution_config: Optional[Dict] = None, - use_oai_assistant: Optional[bool] = False, - user_proxy: Optional[autogen.ConversableAgent] = None, - max_agents: Optional[int] = None, - **kwargs, - ) -> Tuple[List[autogen.ConversableAgent], Dict]: - """ - Auto build agents based on the building task. - - Args: - building_task: instruction that helps build manager (gpt-4) to decide what agent should be built. - coding: use to identify if the user proxy (a code interpreter) should be added. - code_execution_config: specific configs for user proxy (e.g., last_n_messages, work_dir, ...). - default_llm_config: specific configs for LLM (e.g., config_list, seed, temperature, ...). - use_oai_assistant: use OpenAI assistant api instead of self-constructed agent. - user_proxy: user proxy's class that can be used to replace the default user proxy. - - Returns: - agent_list: a list of agents. - cached_configs: cached configs. - """ - if code_execution_config is None: - code_execution_config = { - "last_n_messages": 1, - "work_dir": "groupchat", - "use_docker": False, - "timeout": 10, - } - - if max_agents is None: - max_agents = self.max_agents - - agent_configs = [] - self.building_task = building_task - - print(colored("==> Generating agents...", "green"), flush=True) - resp_agent_name = ( - self.builder_model.create( - messages=[ - { - "role": "user", - "content": self.AGENT_NAME_PROMPT.format(task=building_task, max_agents=max_agents), - } - ] - ) - .choices[0] - .message.content - ) - agent_name_list = [agent_name.strip().replace(" ", "_") for agent_name in resp_agent_name.split(",")] - print(f"{agent_name_list} are generated.", flush=True) - - print(colored("==> Generating system message...", "green"), flush=True) - agent_sys_msg_list = [] - for name in agent_name_list: - print(f"Preparing system message for {name}", flush=True) - resp_agent_sys_msg = ( - self.builder_model.create( - messages=[ - { - "role": "user", - "content": self.AGENT_SYS_MSG_PROMPT.format( - task=building_task, - position=name, - default_sys_msg=self.DEFAULT_DESCRIPTION, - ), - } - ] - ) - .choices[0] - .message.content - ) - agent_sys_msg_list.append(resp_agent_sys_msg) - - print(colored("==> Generating description...", "green"), flush=True) - agent_description_list = [] - for name, sys_msg in list(zip(agent_name_list, agent_sys_msg_list)): - print(f"Preparing description for {name}", flush=True) - resp_agent_description = ( - self.builder_model.create( - messages=[ - { - "role": "user", - "content": self.AGENT_DESCRIPTION_PROMPT.format(position=name, sys_msg=sys_msg), - } - ] - ) - .choices[0] - .message.content - ) - agent_description_list.append(resp_agent_description) - - for name, sys_msg, description in list(zip(agent_name_list, agent_sys_msg_list, agent_description_list)): - agent_configs.append( - { - "name": name, - "model": self.agent_model, - "tags": self.agent_model_tags, - "system_message": sys_msg, - "description": description, - } - ) - - if coding is None: - resp = ( - self.builder_model.create( - messages=[{"role": "user", "content": self.CODING_PROMPT.format(task=building_task)}] - ) - .choices[0] - .message.content - ) - coding = True if resp == "YES" else False - - self.cached_configs.update( - { - "building_task": building_task, - "agent_configs": agent_configs, - "coding": coding, - "default_llm_config": default_llm_config, - "code_execution_config": code_execution_config, - } - ) - _config_check(self.cached_configs) - return self._build_agents(use_oai_assistant, user_proxy=user_proxy, **kwargs) - - def build_from_library( - self, - building_task: str, - library_path_or_json: str, - default_llm_config: Dict, - top_k: int = 3, - coding: Optional[bool] = None, - code_execution_config: Optional[Dict] = None, - use_oai_assistant: Optional[bool] = False, - embedding_model: Optional[str] = "all-mpnet-base-v2", - user_proxy: Optional[autogen.ConversableAgent] = None, - **kwargs, - ) -> Tuple[List[autogen.ConversableAgent], Dict]: - """ - Build agents from a library. - The library is a list of agent configs, which contains the name and system_message for each agent. - We use a build manager to decide what agent in that library should be involved to the task. - - Args: - building_task: instruction that helps build manager (gpt-4) to decide what agent should be built. - library_path_or_json: path or JSON string config of agent library. - default_llm_config: specific configs for LLM (e.g., config_list, seed, temperature, ...). - coding: use to identify if the user proxy (a code interpreter) should be added. - code_execution_config: specific configs for user proxy (e.g., last_n_messages, work_dir, ...). - use_oai_assistant: use OpenAI assistant api instead of self-constructed agent. - embedding_model: a Sentence-Transformers model use for embedding similarity to select agents from library. - As reference, chromadb use "all-mpnet-base-v2" as default. - user_proxy: user proxy's class that can be used to replace the default user proxy. - - Returns: - agent_list: a list of agents. - cached_configs: cached configs. - """ - import sqlite3 - - # Some system will have an unexcepted sqlite3 version. - # Check if the user has installed pysqlite3. - if int(sqlite3.version.split(".")[0]) < 3: - try: - __import__("pysqlite3") - import sys - - sys.modules["sqlite3"] = sys.modules.pop("pysqlite3") - except Exception as e: - raise e - import chromadb - from chromadb.utils import embedding_functions - - if code_execution_config is None: - code_execution_config = { - "last_n_messages": 1, - "work_dir": "groupchat", - "use_docker": False, - "timeout": 120, - } - - try: - agent_library = json.loads(library_path_or_json) - except json.decoder.JSONDecodeError: - with open(library_path_or_json, "r") as f: - agent_library = json.load(f) - except Exception as e: - raise e - - print(colored("==> Looking for suitable agents in the library...", "green"), flush=True) - skills = building_task.replace(":", " ").split("\n") - # skills = [line.split("-", 1)[1].strip() if line.startswith("-") else line for line in lines] - if len(skills) == 0: - skills = [building_task] - - chroma_client = chromadb.Client() - collection = chroma_client.create_collection( - name="agent_list", - embedding_function=embedding_functions.SentenceTransformerEmbeddingFunction(model_name=embedding_model), - ) - collection.add( - documents=[agent["description"] for agent in agent_library], - metadatas=[{"source": "agent_profile"} for _ in range(len(agent_library))], - ids=[f"agent_{i}" for i in range(len(agent_library))], - ) - agent_desc_list = set() - for skill in skills: - recall = set(collection.query(query_texts=[skill], n_results=top_k)["documents"][0]) - agent_desc_list = agent_desc_list.union(recall) - - agent_config_list = [] - for description in list(agent_desc_list): - for agent in agent_library: - if agent["description"] == description: - agent_config_list.append(agent.copy()) - break - chroma_client.delete_collection(collection.name) - - # double recall from the searching result - expert_pool = [f"{agent['name']}: {agent['description']}" for agent in agent_config_list] - while True: - skill_agent_pair_json = ( - self.builder_model.create( - messages=[ - { - "role": "user", - "content": self.AGENT_SELECTION_PROMPT.format( - skills=building_task, expert_pool=expert_pool, max_agents=self.max_agents - ), - } - ] - ) - .choices[0] - .message.content - ) - try: - skill_agent_pair_json = _retrieve_json(skill_agent_pair_json) - skill_agent_pair = json.loads(skill_agent_pair_json) - break - except Exception as e: - print(e, flush=True) - time.sleep(5) - continue - - recalled_agent_config_list = [] - recalled_name_desc = [] - for skill, agent_profile in skill_agent_pair.items(): - # If no suitable agent, generate an agent - if agent_profile == "None": - _, agent_config_temp = self.build( - building_task=skill, - default_llm_config=default_llm_config.copy(), - coding=False, - use_oai_assistant=use_oai_assistant, - max_agents=1, - ) - self.clear_agent(agent_config_temp["agent_configs"][0]["name"]) - recalled_agent_config_list.append(agent_config_temp["agent_configs"][0]) - else: - if agent_profile in recalled_name_desc: - # prevent identical agents - continue - recalled_name_desc.append(agent_profile) - name = agent_profile.split(":")[0].strip() - desc = agent_profile.split(":")[1].strip() - for agent in agent_config_list: - if name == agent["name"] and desc == agent["description"]: - recalled_agent_config_list.append(agent.copy()) - - print(f"{[agent['name'] for agent in recalled_agent_config_list]} are selected.", flush=True) - - if coding is None: - resp = ( - self.builder_model.create( - messages=[{"role": "user", "content": self.CODING_PROMPT.format(task=building_task)}] - ) - .choices[0] - .message.content - ) - coding = True if resp == "YES" else False - - self.cached_configs.update( - { - "building_task": building_task, - "agent_configs": recalled_agent_config_list, - "coding": coding, - "default_llm_config": default_llm_config, - "code_execution_config": code_execution_config, - } - ) - _config_check(self.cached_configs) - - return self._build_agents(use_oai_assistant, user_proxy=user_proxy, **kwargs) - - def _build_agents( - self, use_oai_assistant: Optional[bool] = False, user_proxy: Optional[autogen.ConversableAgent] = None, **kwargs - ) -> Tuple[List[autogen.ConversableAgent], Dict]: - """ - Build agents with generated configs. - - Args: - use_oai_assistant: use OpenAI assistant api instead of self-constructed agent. - user_proxy: user proxy's class that can be used to replace the default user proxy. - - Returns: - agent_list: a list of agents. - cached_configs: cached configs. - """ - agent_configs = self.cached_configs["agent_configs"] - default_llm_config = self.cached_configs["default_llm_config"] - coding = self.cached_configs["coding"] - code_execution_config = self.cached_configs["code_execution_config"] - - print(colored("==> Creating agents...", "green"), flush=True) - for config in agent_configs: - print(f"Creating agent {config['name']}...", flush=True) - self._create_agent( - agent_config=config.copy(), - member_name=[agent["name"] for agent in agent_configs], - llm_config=default_llm_config, - use_oai_assistant=use_oai_assistant, - **kwargs, - ) - agent_list = [agent_config[0] for agent_config in self.agent_procs_assign.values()] - - if coding is True: - print("Adding user console proxy...", flush=True) - if user_proxy is None: - user_proxy = autogen.UserProxyAgent( - name="Computer_terminal", - is_termination_msg=lambda x: x == "TERMINATE" or x == "TERMINATE.", - code_execution_config=code_execution_config, - human_input_mode="NEVER", - default_auto_reply=self.DEFAULT_PROXY_AUTO_REPLY, - ) - agent_list = agent_list + [user_proxy] - - return agent_list, self.cached_configs.copy() - - def save(self, filepath: Optional[str] = None) -> str: - """ - Save building configs. If the filepath is not specific, this function will create a filename by encrypt the - building_task string by md5 with "save_config_" prefix, and save config to the local path. - - Args: - filepath: save path. - - Return: - filepath: path save. - """ - if filepath is None: - filepath = f'./save_config_{hashlib.md5(self.building_task.encode("utf-8")).hexdigest()}.json' - with open(filepath, "w") as save_file: - json.dump(self.cached_configs, save_file, indent=4) - print(colored(f"Building config saved to {filepath}", "green"), flush=True) - - return filepath - - def load( - self, - filepath: Optional[str] = None, - config_json: Optional[str] = None, - use_oai_assistant: Optional[bool] = False, - **kwargs, - ) -> Tuple[List[autogen.ConversableAgent], Dict]: - """ - Load building configs and call the build function to complete building without calling online LLMs' api. - - Args: - filepath: filepath or JSON string for the save config. - config_json: JSON string for the save config. - use_oai_assistant: use OpenAI assistant api instead of self-constructed agent. - - Returns: - agent_list: a list of agents. - cached_configs: cached configs. - """ - # load json string. - if config_json is not None: - print(colored("Loading config from JSON...", "green"), flush=True) - cached_configs = json.loads(config_json) - - # load from path. - if filepath is not None: - print(colored(f"Loading config from {filepath}", "green"), flush=True) - with open(filepath) as f: - cached_configs = json.load(f) - - _config_check(cached_configs) - - agent_configs = cached_configs["agent_configs"] - default_llm_config = cached_configs["default_llm_config"] - coding = cached_configs["coding"] - - if kwargs.get("code_execution_config", None) is not None: - # for test - self.cached_configs.update( - { - "building_task": cached_configs["building_task"], - "agent_configs": agent_configs, - "coding": coding, - "default_llm_config": default_llm_config, - "code_execution_config": kwargs["code_execution_config"], - } - ) - del kwargs["code_execution_config"] - return self._build_agents(use_oai_assistant, **kwargs) - else: - code_execution_config = cached_configs["code_execution_config"] - self.cached_configs.update( - { - "building_task": cached_configs["building_task"], - "agent_configs": agent_configs, - "coding": coding, - "default_llm_config": default_llm_config, - "code_execution_config": code_execution_config, - } - ) - return self._build_agents(use_oai_assistant, **kwargs) diff --git a/autogen/agentchat/contrib/agent_eval/README.md b/autogen/agentchat/contrib/agent_eval/README.md deleted file mode 100644 index 478f28fd74ec..000000000000 --- a/autogen/agentchat/contrib/agent_eval/README.md +++ /dev/null @@ -1,9 +0,0 @@ -Agents for running the [AgentEval](https://microsoft.github.io/autogen/blog/2023/11/20/AgentEval/) pipeline. - -AgentEval is a process for evaluating a LLM-based system's performance on a given task. - -When given a task to evaluate and a few example runs, the critic and subcritic agents create evaluation criteria for evaluating a system's solution. Once the criteria has been created, the quantifier agent can evaluate subsequent task solutions based on the generated criteria. - -For more information see: [AgentEval Integration Roadmap](https://github.com/microsoft/autogen/issues/2162) - -See our [blog post](https://microsoft.github.io/autogen/blog/2024/06/21/AgentEval) for usage examples and general explanations. diff --git a/autogen/agentchat/contrib/agent_eval/agent_eval.py b/autogen/agentchat/contrib/agent_eval/agent_eval.py deleted file mode 100644 index b48c65a66d23..000000000000 --- a/autogen/agentchat/contrib/agent_eval/agent_eval.py +++ /dev/null @@ -1,101 +0,0 @@ -from typing import Dict, List, Literal, Optional, Union - -import autogen -from autogen.agentchat.contrib.agent_eval.criterion import Criterion -from autogen.agentchat.contrib.agent_eval.critic_agent import CriticAgent -from autogen.agentchat.contrib.agent_eval.quantifier_agent import QuantifierAgent -from autogen.agentchat.contrib.agent_eval.subcritic_agent import SubCriticAgent -from autogen.agentchat.contrib.agent_eval.task import Task - - -def generate_criteria( - llm_config: Optional[Union[Dict, Literal[False]]] = None, - task: Task = None, - additional_instructions: str = "", - max_round=2, - use_subcritic: bool = False, -): - """ - Creates a list of criteria for evaluating the utility of a given task. - Args: - llm_config (dict or bool): llm inference configuration. - task (Task): The task to evaluate. - additional_instructions (str): Additional instructions for the criteria agent. - max_round (int): The maximum number of rounds to run the conversation. - use_subcritic (bool): Whether to use the subcritic agent to generate subcriteria. - Returns: - list: A list of Criterion objects for evaluating the utility of the given task. - """ - critic = CriticAgent( - system_message=CriticAgent.DEFAULT_SYSTEM_MESSAGE + "\n" + additional_instructions, - llm_config=llm_config, - ) - - critic_user = autogen.UserProxyAgent( - name="critic_user", - max_consecutive_auto_reply=0, # terminate without auto-reply - human_input_mode="NEVER", - code_execution_config={"use_docker": False}, - ) - - agents = [critic_user, critic] - - if use_subcritic: - subcritic = SubCriticAgent( - llm_config=llm_config, - ) - agents.append(subcritic) - - groupchat = autogen.GroupChat( - agents=agents, messages=[], max_round=max_round, speaker_selection_method="round_robin" - ) - critic_manager = autogen.GroupChatManager(groupchat=groupchat, llm_config=llm_config) - - critic_user.initiate_chat(critic_manager, message=task.get_sys_message()) - criteria = critic_user.last_message() - content = criteria["content"] - # need to strip out any extra code around the returned json - content = content[content.find("[") : content.rfind("]") + 1] - criteria = Criterion.parse_json_str(content) - return criteria - - -def quantify_criteria( - llm_config: Optional[Union[Dict, Literal[False]]] = None, - criteria: List[Criterion] = None, - task: Task = None, - test_case: str = "", - ground_truth: str = "", -): - """ - Quantifies the performance of a system using the provided criteria. - Args: - llm_config (dict or bool): llm inference configuration. - criteria ([Criterion]): A list of criteria for evaluating the utility of a given task. - task (Task): The task to evaluate. - test_case (str): The test case to evaluate. - ground_truth (str): The ground truth for the test case. - Returns: - dict: A dictionary where the keys are the criteria and the values are the assessed performance based on accepted values for each criteria. - """ - quantifier = QuantifierAgent( - llm_config=llm_config, - ) - - quantifier_user = autogen.UserProxyAgent( - name="quantifier_user", - max_consecutive_auto_reply=0, # terminate without auto-reply - human_input_mode="NEVER", - code_execution_config={"use_docker": False}, - ) - - quantifier_user.initiate_chat( # noqa: F841 - quantifier, - message=task.get_sys_message() - + "Evaluation dictionary: " - + Criterion.write_json(criteria) - + "actual test case to evaluate: " - + test_case, - ) - quantified_results = quantifier_user.last_message() - return {"actual_success": ground_truth, "estimated_performance": quantified_results["content"]} diff --git a/autogen/agentchat/contrib/agent_eval/criterion.py b/autogen/agentchat/contrib/agent_eval/criterion.py deleted file mode 100644 index 5efd121ec07c..000000000000 --- a/autogen/agentchat/contrib/agent_eval/criterion.py +++ /dev/null @@ -1,41 +0,0 @@ -from __future__ import annotations - -import json -from typing import List - -import pydantic_core -from pydantic import BaseModel -from pydantic.json import pydantic_encoder - - -class Criterion(BaseModel): - """ - A class that represents a criterion for agent evaluation. - """ - - name: str - description: str - accepted_values: List[str] - sub_criteria: List[Criterion] = list() - - @staticmethod - def parse_json_str(criteria: str): - """ - Create a list of Criterion objects from a json string. - Args: - criteria (str): Json string that represents the criteria - returns: - [Criterion]: A list of Criterion objects that represents the json criteria information. - """ - return [Criterion(**crit) for crit in json.loads(criteria)] - - @staticmethod - def write_json(criteria): - """ - Create a json string from a list of Criterion objects. - Args: - criteria ([Criterion]): A list of Criterion objects. - Returns: - str: A json string that represents the list of Criterion objects. - """ - return json.dumps([crit.model_dump() for crit in criteria], indent=2) diff --git a/autogen/agentchat/contrib/agent_eval/critic_agent.py b/autogen/agentchat/contrib/agent_eval/critic_agent.py deleted file mode 100644 index 2f5e5598ba60..000000000000 --- a/autogen/agentchat/contrib/agent_eval/critic_agent.py +++ /dev/null @@ -1,41 +0,0 @@ -from typing import Optional - -from autogen.agentchat.conversable_agent import ConversableAgent - - -class CriticAgent(ConversableAgent): - """ - An agent for creating list of criteria for evaluating the utility of a given task. - """ - - DEFAULT_SYSTEM_MESSAGE = """You are a helpful assistant. You suggest criteria for evaluating different tasks. They should be distinguishable, quantifiable and not redundant. - Convert the evaluation criteria into a list where each item is a criteria which consists of the following dictionary as follows - {"name": name of the criterion, "description": criteria description , "accepted_values": possible accepted inputs for this key} - Make sure "accepted_values" include the acceptable inputs for each key that are fine-grained and preferably multi-graded levels and "description" includes the criterion description. - Output just the criteria string you have created, no code. - """ - - DEFAULT_DESCRIPTION = "An AI agent for creating list criteria for evaluating the utility of a given task." - - def __init__( - self, - name="critic", - system_message: Optional[str] = DEFAULT_SYSTEM_MESSAGE, - description: Optional[str] = DEFAULT_DESCRIPTION, - **kwargs, - ): - """ - Args: - name (str): agent name. - system_message (str): system message for the ChatCompletion inference. - Please override this attribute if you want to reprogram the agent. - description (str): The description of the agent. - **kwargs (dict): Please refer to other kwargs in - [ConversableAgent](../../conversable_agent#__init__). - """ - super().__init__( - name=name, - system_message=system_message, - description=description, - **kwargs, - ) diff --git a/autogen/agentchat/contrib/agent_eval/quantifier_agent.py b/autogen/agentchat/contrib/agent_eval/quantifier_agent.py deleted file mode 100644 index 02a8f650fab4..000000000000 --- a/autogen/agentchat/contrib/agent_eval/quantifier_agent.py +++ /dev/null @@ -1,36 +0,0 @@ -from typing import Optional - -from autogen.agentchat.conversable_agent import ConversableAgent - - -class QuantifierAgent(ConversableAgent): - """ - An agent for quantifying the performance of a system using the provided criteria. - """ - - DEFAULT_SYSTEM_MESSAGE = """"You are a helpful assistant. You quantify the output of different tasks based on the given criteria. - The criterion is given in a json list format where each element is a distinct criteria. - The each element is a dictionary as follows {"name": name of the criterion, "description": criteria description , "accepted_values": possible accepted inputs for this key} - You are going to quantify each of the crieria for a given task based on the task description. - Return a dictionary where the keys are the criteria and the values are the assessed performance based on accepted values for each criteria. - Return only the dictionary, no code.""" - - DEFAULT_DESCRIPTION = "An AI agent for quantifing the performance of a system using the provided criteria." - - def __init__( - self, - name="quantifier", - system_message: Optional[str] = DEFAULT_SYSTEM_MESSAGE, - description: Optional[str] = DEFAULT_DESCRIPTION, - **kwargs, - ): - """ - Args: - name (str): agent name. - system_message (str): system message for the ChatCompletion inference. - Please override this attribute if you want to reprogram the agent. - description (str): The description of the agent. - **kwargs (dict): Please refer to other kwargs in - [ConversableAgent](../../conversable_agent#__init__). - """ - super().__init__(name=name, system_message=system_message, description=description, **kwargs) diff --git a/autogen/agentchat/contrib/agent_eval/subcritic_agent.py b/autogen/agentchat/contrib/agent_eval/subcritic_agent.py deleted file mode 100755 index fa994ee7bdaf..000000000000 --- a/autogen/agentchat/contrib/agent_eval/subcritic_agent.py +++ /dev/null @@ -1,42 +0,0 @@ -from typing import Optional - -from autogen.agentchat.conversable_agent import ConversableAgent - - -class SubCriticAgent(ConversableAgent): - """ - An agent for creating subcriteria from a given list of criteria for evaluating the utility of a given task. - """ - - DEFAULT_SYSTEM_MESSAGE = """You are a helpful assistant to the critic agent. You suggest sub criteria for evaluating different tasks based on the criteria provided by the critic agent (if you feel it is needed). - They should be distinguishable, quantifiable, and related to the overall theme of the critic's provided criteria. - You operate by taking in the description of the criteria. You then create a new key called sub criteria where you provide the sub criteria for the given criteria. - The value of the sub_criteria is a dictionary where the keys are the subcriteria and each value is as follows {"description": sub criteria description , "accepted_values": possible accepted inputs for this key} - Do this for each criteria provided by the critic (removing the criteria's accepted values). "accepted_values" include the acceptable inputs for each key that are fine-grained and preferably multi-graded levels. "description" includes the criterion description. - Once you have created the sub criteria for the given criteria, you return the json (make sure to include the contents of the critic's dictionary in the final dictionary as well). - Make sure to return a valid json and no code""" - - DEFAULT_DESCRIPTION = "An AI agent for creating subcriteria from a given list of criteria." - - def __init__( - self, - name="subcritic", - system_message: Optional[str] = DEFAULT_SYSTEM_MESSAGE, - description: Optional[str] = DEFAULT_DESCRIPTION, - **kwargs, - ): - """ - Args: - name (str): agent name. - system_message (str): system message for the ChatCompletion inference. - Please override this attribute if you want to reprogram the agent. - description (str): The description of the agent. - **kwargs (dict): Please refer to other kwargs in - [ConversableAgent](../../conversable_agent#__init__). - """ - super().__init__( - name=name, - system_message=system_message, - description=description, - **kwargs, - ) diff --git a/autogen/agentchat/contrib/agent_eval/task.py b/autogen/agentchat/contrib/agent_eval/task.py deleted file mode 100644 index 9f96fbf79e2f..000000000000 --- a/autogen/agentchat/contrib/agent_eval/task.py +++ /dev/null @@ -1,37 +0,0 @@ -import json - -from pydantic import BaseModel - - -class Task(BaseModel): - """ - Class representing a task for agent completion, includes example agent execution for criteria generation. - """ - - name: str - description: str - successful_response: str - failed_response: str - - def get_sys_message(self): - return f"""Task: {self.name}. - Task description: {self.description} - Task successful example: {self.successful_response} - Task failed example: {self.failed_response} - """ - - @staticmethod - def parse_json_str(task: str): - """ - Create a Task object from a json object. - Args: - json_data (dict): A dictionary that represents the task. - Returns: - Task: A Task object that represents the json task information. - """ - json_data = json.loads(task) - name = json_data.get("name") - description = json_data.get("description") - successful_response = json_data.get("successful_response") - failed_response = json_data.get("failed_response") - return Task(name, description, successful_response, failed_response) diff --git a/autogen/agentchat/contrib/agent_optimizer.py b/autogen/agentchat/contrib/agent_optimizer.py deleted file mode 100644 index af264d4b65f1..000000000000 --- a/autogen/agentchat/contrib/agent_optimizer.py +++ /dev/null @@ -1,444 +0,0 @@ -import copy -import json -from typing import Dict, List, Literal, Optional, Union - -import autogen -from autogen.code_utils import execute_code - -ADD_FUNC = { - "type": "function", - "function": { - "name": "add_function", - "description": "Add a function in the context of the conversation. Necessary Python packages must be declared. The name of the function MUST be the same with the function name in the code you generated.", - "parameters": { - "type": "object", - "properties": { - "name": {"type": "string", "description": "The name of the function in the code implementation."}, - "description": {"type": "string", "description": "A short description of the function."}, - "arguments": { - "type": "string", - "description": 'JSON schema of arguments encoded as a string. Please note that the JSON schema only supports specific types including string, integer, object, array, boolean. (do not have float type) For example: { "url": { "type": "string", "description": "The URL", }}. Please avoid the error \'array schema missing items\' when using array type.', - }, - "packages": { - "type": "string", - "description": "A list of package names imported by the function, and that need to be installed with pip prior to invoking the function. This solves ModuleNotFoundError. It should be string, not list.", - }, - "code": { - "type": "string", - "description": "The implementation in Python. Do not include the function declaration.", - }, - }, - "required": ["name", "description", "arguments", "packages", "code"], - }, - }, -} - -REVISE_FUNC = { - "type": "function", - "function": { - "name": "revise_function", - "description": "Revise a function in the context of the conversation. Necessary Python packages must be declared. The name of the function MUST be the same with the function name in the code you generated.", - "parameters": { - "type": "object", - "properties": { - "name": {"type": "string", "description": "The name of the function in the code implementation."}, - "description": {"type": "string", "description": "A short description of the function."}, - "arguments": { - "type": "string", - "description": 'JSON schema of arguments encoded as a string. Please note that the JSON schema only supports specific types including string, integer, object, array, boolean. (do not have float type) For example: { "url": { "type": "string", "description": "The URL", }}. Please avoid the error \'array schema missing items\' when using array type.', - }, - "packages": { - "type": "string", - "description": "A list of package names imported by the function, and that need to be installed with pip prior to invoking the function. This solves ModuleNotFoundError. It should be string, not list.", - }, - "code": { - "type": "string", - "description": "The implementation in Python. Do not include the function declaration.", - }, - }, - "required": ["name", "description", "arguments", "packages", "code"], - }, - }, -} - -REMOVE_FUNC = { - "type": "function", - "function": { - "name": "remove_function", - "description": "Remove one function in the context of the conversation. Once remove one function, the assistant will not use this function in future conversation.", - "parameters": { - "type": "object", - "properties": { - "name": {"type": "string", "description": "The name of the function in the code implementation."} - }, - "required": ["name"], - }, - }, -} - -OPT_PROMPT = """You are a function optimizer. Your task is to maintain a list of functions for the assistant according to the existing function list and conversation history that happens between the assistant and the user. -You can perform one of the following four actions to manipulate the function list using the functions you have: -1. Revise one existing function (using revise_function). -2. Remove one existing function (using remove_function). -3. Add one new function (using add_function). -4. Directly return "TERMINATE" to me if no more actions are needed for the current function list. - -Below are the principles that you need to follow for taking these four actions. -(1) Revise one existing function: -1. Pay more attention to the failed tasks and corresponding error information, and optimize the function used in these tasks according to the conversation history if needed. -2. A failed function call can occur due to incorrect input arguments (missing arguments) or an incorrect function code implementation. You should focus more on the function code implementation and make it easy to get success function call. -3. Do not revise the function that you think works well and plays a critical role in solving the problems according to the conversation history. Only making revisions if needed. -4. Sometimes, a NameError may occur. To fix this error, you can either revise the name of the function in the code implementation or revise the name of the function call to make these two names consistent. -(2) Remove one existing function: -1. Only remove the function that you think is not needed anymore in future tasks. -(3) Add one new function: -1. The added function should be general enough to be used in future tasks. For instance, if you encounter a problem that this function can solve, or one step of it, you can use the generated function directly instead of starting from scratch -2. The added new function should solve a higher-level question that encompasses the original query and extend the code's functionality to make it more versatile and widely applicable. -3. Replace specific strings or variable names with general variables to enhance the tool's applicability to various queries. All names used inside the function should be passed in as arguments. -Below is an example of a function that potentially deserves to be adde in solving MATH problems, which can be used to solve a higher-level question: -{{ - \"name\": \"evaluate_expression\", - \"description\": \"Evaluate arithmetic or mathematical expressions provided as strings.\", - \"arguments\": {{ - \"expression\": {{ - \"type\": \"string\", - \"description\": \"The mathematical expression to evaluate.\" - }} - }}, - \"packages\": \"sympy\", - \"code\": \"from sympy import sympify, SympifyError\\n\\ndef evaluate_expression(expression):\\n try:\\n result = sympify(expression)\\n if result.is_number:\\n result = float(result)\\n else:\\n result = str(result)\\n return result\\n except SympifyError as e:\\n return str(e)\" -}} -(4) Directly return "TERMINATE": -If you think there is no need to perform any other actions for the current function list since the current list is optimal more actions will harm the performance in future tasks. Please directly reply to me with "TERMINATE". - -One function signature includes the following five elements: -1. Function name -2. Function description -3. JSON schema of arguments encoded as a string -4. A list of package names imported by the function packages -5. The code implementation - -Below are the signatures of the current functions: -List A: {best_functions}. -The following list are the function signatures that you have after taking {actions_num} actions to manipulate List A: -List B: {incumbent_functions}. - -{accumulated_experience} - -Here are {best_conversations_num} conversation histories of solving {best_conversations_num} tasks using List A. -History: -{best_conversations_history} - -{statistic_informations} - -According to the information I provide, please take one of four actions to manipulate list B using the functions you know. -Instead of returning TERMINATE directly or taking no action, you should try your best to optimize the function list. Only take no action if you really think the current list is optimal, as more actions will harm performance in future tasks. -Even adding a general function that can substitute the assistant’s repeated suggestions of Python code with the same functionality could also be helpful. -""" - - -def execute_func(name, packages, code, **args): - """ - The wrapper for generated functions. - """ - pip_install = ( - f"""print("Installing package: {packages}")\nsubprocess.run(["pip", "-qq", "install", "{packages}"])""" - if packages - else "" - ) - str = f""" -import subprocess -{pip_install} -print("Result of {name} function execution:") -{code} -args={args} -result={name}(**args) -if result is not None: print(result) -""" - print(f"execute_code:\n{str}") - result = execute_code(str, use_docker="shaokun529/evoagent:v1") - if result[0] != 0: - raise Exception("Error in executing function:" + result[1]) - print(f"Result: {result[1]}") - return result[1] - - -class AgentOptimizer: - """ - Base class for optimizing AutoGen agents. Specifically, it is used to optimize the functions used in the agent. - More information could be found in the following paper: https://arxiv.org/abs/2402.11359. - """ - - def __init__( - self, - max_actions_per_step: int, - llm_config: dict, - optimizer_model: Optional[str] = "gpt-4-1106-preview", - ): - """ - (These APIs are experimental and may change in the future.) - Args: - max_actions_per_step (int): the maximum number of actions that the optimizer can take in one step. - llm_config (dict): llm inference configuration. - Please refer to [OpenAIWrapper.create](/docs/reference/oai/client#create) for available options. - When using OpenAI or Azure OpenAI endpoints, please specify a non-empty 'model' either in `llm_config` or in each config of 'config_list' in `llm_config`. - optimizer_model: the model used for the optimizer. - """ - self.max_actions_per_step = max_actions_per_step - self._max_trials = 3 - self.optimizer_model = optimizer_model - - self._trial_conversations_history = [] - self._trial_conversations_performance = [] - self._trial_functions = [] - - self._best_conversations_history = [] - self._best_conversations_performance = [] - self._best_functions = [] - - self._failure_functions_performance = [] - self._best_performance = -1 - - assert isinstance(llm_config, dict), "llm_config must be a dict" - llm_config = copy.deepcopy(llm_config) - self.llm_config = llm_config - if self.llm_config in [{}, {"config_list": []}, {"config_list": [{"model": ""}]}]: - raise ValueError( - "When using OpenAI or Azure OpenAI endpoints, specify a non-empty 'model' either in 'llm_config' or in each config of 'config_list'." - ) - self.llm_config["config_list"] = autogen.filter_config( - llm_config["config_list"], {"model": [self.optimizer_model]} - ) - self._client = autogen.OpenAIWrapper(**self.llm_config) - - def record_one_conversation(self, conversation_history: List[Dict], is_satisfied: bool = None): - """ - record one conversation history. - Args: - conversation_history (List[Dict]): the chat messages of the conversation. - is_satisfied (bool): whether the user is satisfied with the solution. If it is none, the user will be asked to input the satisfaction. - """ - if is_satisfied is None: - reply = input( - "Please provide whether the user is satisfied with the solution. 1 represents satisfied. 0 represents not satisfied. Press enter to submit. \n" - ) - assert reply in [ - "0", - "1", - ], "The input is invalid. Please input 1 or 0. 1 represents satisfied. 0 represents not satisfied." - is_satisfied = True if reply == "1" else False - self._trial_conversations_history.append( - {"Conversation {i}".format(i=len(self._trial_conversations_history)): conversation_history} - ) - self._trial_conversations_performance.append( - {"Conversation {i}".format(i=len(self._trial_conversations_performance)): 1 if is_satisfied else 0} - ) - - def step(self): - """ - One step of training. It will return register_for_llm and register_for_executor at each iteration, - which are subsequently utilized to update the assistant and executor agents, respectively. - See example: https://github.com/microsoft/autogen/blob/main/notebook/agentchat_agentoptimizer.ipynb - """ - performance = sum(sum(d.values()) for d in self._trial_conversations_performance) / len( - self._trial_conversations_performance - ) - - if performance < self._best_performance: - self._failure_functions_performance.append({"functions": self._trial_functions, "performance": performance}) - self._failure_functions_performance = sorted( - self._failure_functions_performance, key=lambda x: x["performance"] - ) - else: - self._failure_functions_performance = [] - self._best_performance = performance - self._best_functions = copy.deepcopy(self._trial_functions) - self._best_conversations_history = copy.deepcopy(self._trial_conversations_history) - self._best_conversations_performance = copy.deepcopy(self._trial_conversations_performance) - self._trial_conversations_history = [] - self._trial_conversations_performance = [] - - best_functions = copy.deepcopy(self._best_functions) - incumbent_functions = copy.deepcopy(self._best_functions) - failure_experience_prompt, statistic_prompt = self._construct_intermediate_prompt() - - for action_index in range(self.max_actions_per_step): - prompt = OPT_PROMPT.format( - best_conversations_history=self._best_conversations_history, - best_conversations_num=len(self._best_conversations_history), - actions_num=action_index, - best_functions=best_functions, - incumbent_functions=incumbent_functions, - accumulated_experience=failure_experience_prompt, - statistic_informations=statistic_prompt, - ) - messages = [{"role": "user", "content": prompt}] - for _ in range(self._max_trials): - response = self._client.create( - messages=messages, tools=[ADD_FUNC, REVISE_FUNC, REMOVE_FUNC], tool_choice="auto" - ) - actions = response.choices[0].message.tool_calls - if self._validate_actions(actions, incumbent_functions): - break - if actions is not None and self._validate_actions(actions, incumbent_functions): - incumbent_functions = self._update_function_call(incumbent_functions, actions) - - remove_functions = list( - set([key for dictionary in self._trial_functions for key in dictionary.keys()]) - - set([key for dictionary in incumbent_functions for key in dictionary.keys()]) - ) - - register_for_llm = [] - register_for_exector = {} - for name in remove_functions: - register_for_llm.append({"func_sig": {"name": name}, "is_remove": True}) - register_for_exector.update({name: None}) - for func in incumbent_functions: - register_for_llm.append( - { - "func_sig": { - "name": func.get("name"), - "description": func.get("description"), - "parameters": {"type": "object", "properties": func.get("arguments")}, - }, - "is_remove": False, - } - ) - register_for_exector.update( - { - func.get("name"): lambda **args: execute_func( - func.get("name"), func.get("packages"), func.get("code"), **args - ) - } - ) - - self._trial_functions = incumbent_functions - return register_for_llm, register_for_exector - - def reset_optimizer(self): - """ - reset the optimizer. - """ - - self._trial_conversations_history = [] - self._trial_conversations_performance = [] - self._trial_functions = [] - - self._best_conversations_history = [] - self._best_conversations_performance = [] - self._best_functions = [] - - self._best_performance = -1 - self._failure_functions_performance = [] - - def _update_function_call(self, incumbent_functions, actions): - """ - update function call. - """ - - formated_actions = [] - for action in actions: - func = json.loads(action.function.arguments.strip('"')) - func["action_name"] = action.function.name - - if func.get("action_name") == "remove_function": - item = { - "action_name": func.get("action_name"), - "name": func.get("name"), - } - else: - item = { - "action_name": func.get("action_name"), - "name": func.get("name"), - "description": func.get("description"), - "arguments": json.loads(func.get("arguments").strip('"')), - "packages": func.get("packages"), - "code": func.get("code"), - } - formated_actions.append(item) - actions = formated_actions - - for action in actions: - name, description, arguments, packages, code, action_name = ( - action.get("name"), - action.get("description"), - action.get("arguments"), - action.get("packages"), - action.get("code"), - action.get("action_name"), - ) - if action_name == "remove_function": - incumbent_functions = [item for item in incumbent_functions if item["name"] != name] - else: - incumbent_functions = [item for item in incumbent_functions if item["name"] != name] - incumbent_functions.append( - { - "name": name, - "description": description, - "arguments": arguments, - "packages": packages, - "code": code, - } - ) - - return incumbent_functions - - def _construct_intermediate_prompt(self): - """ - construct intermediate prompts. - """ - if len(self._failure_functions_performance) != 0: - failure_experience_prompt = "We also provide more examples for different functions and their corresponding performance (0-100).\n The following function signatures are arranged in are arranged in ascending order based on their performance, where higher performance indicate better quality." - failure_experience_prompt += "\n" - for item in self._failure_functions_performance: - failure_experience_prompt += "Function: \n" + str(item["functions"]) + "\n" - failure_experience_prompt += "Performance: \n" + str(item["performance"]) + "\n" - else: - failure_experience_prompt = "\n" - - if len(self._best_conversations_performance) != 0: - statistic_prompt = "The following table shows the statistical information for solving each task in each conversation and indicates, whether the result is satisfied by the users. 1 represents satisfied. 0 represents not satisfied." - statistic_prompt += "\n" - for item in self._best_conversations_performance: - statistic_prompt += str(item) + "\n" - else: - statistic_prompt = "\n" - - return failure_experience_prompt, statistic_prompt - - def _validate_actions(self, actions, incumbent_functions): - """ - validate whether the proposed actions are feasible. - """ - if actions is None: - return True - else: - # val json format - for action in actions: - function_args = action.function.arguments - try: - function_args = json.loads(function_args.strip('"')) - if "arguments" in function_args.keys(): - json.loads(function_args.get("arguments").strip('"')) - except Exception as e: - print("JSON is invalid:", e) - return False - # val syntax - for action in actions: - if action.function.name != "remove_function": - function_args = json.loads(action.function.arguments.strip('"')) - code = function_args.get("code") - try: - compile(code, "", "exec") - print("successfully compiled") - except Exception as e: - print("Syntax is invalid:", e) - return False - for action in actions: - action_name = action.function.name - if action_name == "remove_function": - function_args = json.loads(action.function.arguments.strip('"')) - if function_args.get("name") not in [item["name"] for item in incumbent_functions]: - print("The function you want to remove does not exist.") - return False - return True diff --git a/autogen/agentchat/contrib/capabilities/agent_capability.py b/autogen/agentchat/contrib/capabilities/agent_capability.py deleted file mode 100644 index cb2a21f58885..000000000000 --- a/autogen/agentchat/contrib/capabilities/agent_capability.py +++ /dev/null @@ -1,15 +0,0 @@ -from autogen.agentchat.assistant_agent import ConversableAgent - - -class AgentCapability: - """Base class for composable capabilities that can be added to an agent.""" - - def __init__(self): - pass - - def add_to_agent(self, agent: ConversableAgent): - """ - Adds a particular capability to the given agent. Must be implemented by the capability subclass. - An implementation will typically call agent.register_hook() one or more times. See teachability.py as an example. - """ - raise NotImplementedError diff --git a/autogen/agentchat/contrib/capabilities/generate_images.py b/autogen/agentchat/contrib/capabilities/generate_images.py deleted file mode 100644 index e4a8f1195c27..000000000000 --- a/autogen/agentchat/contrib/capabilities/generate_images.py +++ /dev/null @@ -1,291 +0,0 @@ -import re -from typing import Any, Dict, List, Literal, Optional, Protocol, Tuple, Union - -from openai import OpenAI -from PIL.Image import Image - -from autogen import Agent, ConversableAgent, code_utils -from autogen.agentchat.contrib import img_utils -from autogen.agentchat.contrib.capabilities.agent_capability import AgentCapability -from autogen.agentchat.contrib.text_analyzer_agent import TextAnalyzerAgent -from autogen.cache import AbstractCache - -SYSTEM_MESSAGE = "You've been given the special ability to generate images." -DESCRIPTION_MESSAGE = "This agent has the ability to generate images." - -PROMPT_INSTRUCTIONS = """In detail, please summarize the provided prompt to generate the image described in the TEXT. -DO NOT include any advice. RESPOND like the following example: -EXAMPLE: Blue background, 3D shapes, ... -""" - - -class ImageGenerator(Protocol): - """This class defines an interface for image generators. - - Concrete implementations of this protocol must provide a `generate_image` method that takes a string prompt as - input and returns a PIL Image object. - - NOTE: Current implementation does not allow you to edit a previously existing image. - """ - - def generate_image(self, prompt: str) -> Image: - """Generates an image based on the provided prompt. - - Args: - prompt: A string describing the desired image. - - Returns: - A PIL Image object representing the generated image. - - Raises: - ValueError: If the image generation fails. - """ - ... - - def cache_key(self, prompt: str) -> str: - """Generates a unique cache key for the given prompt. - - This key can be used to store and retrieve generated images based on the prompt. - - Args: - prompt: A string describing the desired image. - - Returns: - A unique string that can be used as a cache key. - """ - ... - - -class DalleImageGenerator: - """Generates images using OpenAI's DALL-E models. - - This class provides a convenient interface for generating images based on textual prompts using OpenAI's DALL-E - models. It allows you to specify the DALL-E model, resolution, quality, and the number of images to generate. - - Note: Current implementation does not allow you to edit a previously existing image. - """ - - def __init__( - self, - llm_config: Dict, - resolution: Literal["256x256", "512x512", "1024x1024", "1792x1024", "1024x1792"] = "1024x1024", - quality: Literal["standard", "hd"] = "standard", - num_images: int = 1, - ): - """ - Args: - llm_config (dict): llm config, must contain a valid dalle model and OpenAI API key in config_list. - resolution (str): The resolution of the image you want to generate. Must be one of "256x256", "512x512", "1024x1024", "1792x1024", "1024x1792". - quality (str): The quality of the image you want to generate. Must be one of "standard", "hd". - num_images (int): The number of images to generate. - """ - config_list = llm_config["config_list"] - _validate_dalle_model(config_list[0]["model"]) - _validate_resolution_format(resolution) - - self._model = config_list[0]["model"] - self._resolution = resolution - self._quality = quality - self._num_images = num_images - self._dalle_client = OpenAI(api_key=config_list[0]["api_key"]) - - def generate_image(self, prompt: str) -> Image: - response = self._dalle_client.images.generate( - model=self._model, - prompt=prompt, - size=self._resolution, - quality=self._quality, - n=self._num_images, - ) - - image_url = response.data[0].url - if image_url is None: - raise ValueError("Failed to generate image.") - - return img_utils.get_pil_image(image_url) - - def cache_key(self, prompt: str) -> str: - keys = (prompt, self._model, self._resolution, self._quality, self._num_images) - return ",".join([str(k) for k in keys]) - - -class ImageGeneration(AgentCapability): - """This capability allows a ConversableAgent to generate images based on the message received from other Agents. - - 1. Utilizes a TextAnalyzerAgent to analyze incoming messages to identify requests for image generation and - extract relevant details. - 2. Leverages the provided ImageGenerator (e.g., DalleImageGenerator) to create the image. - 3. Optionally caches generated images for faster retrieval in future conversations. - - NOTE: This capability increases the token usage of the agent, as it uses TextAnalyzerAgent to analyze every - message received by the agent. - - Example: - ```python - import autogen - from autogen.agentchat.contrib.capabilities.image_generation import ImageGeneration - - # Assuming you have llm configs configured for the LLMs you want to use and Dalle. - # Create the agent - agent = autogen.ConversableAgent( - name="dalle", llm_config={...}, max_consecutive_auto_reply=3, human_input_mode="NEVER" - ) - - # Create an ImageGenerator with desired settings - dalle_gen = generate_images.DalleImageGenerator(llm_config={...}) - - # Add the ImageGeneration capability to the agent - agent.add_capability(ImageGeneration(image_generator=dalle_gen)) - ``` - """ - - def __init__( - self, - image_generator: ImageGenerator, - cache: Optional[AbstractCache] = None, - text_analyzer_llm_config: Optional[Dict] = None, - text_analyzer_instructions: str = PROMPT_INSTRUCTIONS, - verbosity: int = 0, - register_reply_position: int = 2, - ): - """ - Args: - image_generator (ImageGenerator): The image generator you would like to use to generate images. - cache (None or AbstractCache): The cache client to use to store and retrieve generated images. If None, - no caching will be used. - text_analyzer_llm_config (Dict or None): The LLM config for the text analyzer. If None, the LLM config will - be retrieved from the agent you're adding the ability to. - text_analyzer_instructions (str): Instructions provided to the TextAnalyzerAgent used to analyze - incoming messages and extract the prompt for image generation. The default instructions focus on - summarizing the prompt. You can customize the instructions to achieve more granular control over prompt - extraction. - Example: 'Extract specific details from the message, like desired objects, styles, or backgrounds.' - verbosity (int): The verbosity level. Defaults to 0 and must be greater than or equal to 0. The text - analyzer llm calls will be silent if verbosity is less than 2. - register_reply_position (int): The position of the reply function in the agent's list of reply functions. - This capability registers a new reply function to handle messages with image generation requests. - Defaults to 2 to place it after the check termination and human reply for a ConversableAgent. - """ - self._image_generator = image_generator - self._cache = cache - self._text_analyzer_llm_config = text_analyzer_llm_config - self._text_analyzer_instructions = text_analyzer_instructions - self._verbosity = verbosity - self._register_reply_position = register_reply_position - - self._agent: Optional[ConversableAgent] = None - self._text_analyzer: Optional[TextAnalyzerAgent] = None - - def add_to_agent(self, agent: ConversableAgent): - """Adds the Image Generation capability to the specified ConversableAgent. - - This function performs the following modifications to the agent: - - 1. Registers a reply function: A new reply function is registered with the agent to handle messages that - potentially request image generation. This function analyzes the message and triggers image generation if - necessary. - 2. Creates an Agent (TextAnalyzerAgent): This is used to analyze messages for image generation requirements. - 3. Updates System Message: The agent's system message is updated to include a message indicating the - capability to generate images has been added. - 4. Updates Description: The agent's description is updated to reflect the addition of the Image Generation - capability. This might be helpful in certain use cases, like group chats. - - Args: - agent (ConversableAgent): The ConversableAgent to add the capability to. - """ - self._agent = agent - - agent.register_reply([Agent, None], self._image_gen_reply, position=self._register_reply_position) - - self._text_analyzer_llm_config = self._text_analyzer_llm_config or agent.llm_config - self._text_analyzer = TextAnalyzerAgent(llm_config=self._text_analyzer_llm_config) - - agent.update_system_message(agent.system_message + "\n" + SYSTEM_MESSAGE) - agent.description += "\n" + DESCRIPTION_MESSAGE - - def _image_gen_reply( - self, - recipient: ConversableAgent, - messages: Optional[List[Dict]], - sender: Optional[Agent] = None, - config: Optional[Any] = None, - ) -> Tuple[bool, Union[str, Dict, None]]: - if messages is None: - return False, None - - last_message = code_utils.content_str(messages[-1]["content"]) - - if not last_message: - return False, None - - if self._should_generate_image(last_message): - prompt = self._extract_prompt(last_message) - - image = self._cache_get(prompt) - if image is None: - image = self._image_generator.generate_image(prompt) - self._cache_set(prompt, image) - - return True, self._generate_content_message(prompt, image) - - else: - return False, None - - def _should_generate_image(self, message: str) -> bool: - assert self._text_analyzer is not None - - instructions = """ - Does any part of the TEXT ask the agent to generate an image? - The TEXT must explicitly mention that the image must be generated. - Answer with just one word, yes or no. - """ - analysis = self._text_analyzer.analyze_text(message, instructions) - - return "yes" in self._extract_analysis(analysis).lower() - - def _extract_prompt(self, last_message) -> str: - assert self._text_analyzer is not None - - analysis = self._text_analyzer.analyze_text(last_message, self._text_analyzer_instructions) - return self._extract_analysis(analysis) - - def _cache_get(self, prompt: str) -> Optional[Image]: - if self._cache: - key = self._image_generator.cache_key(prompt) - cached_value = self._cache.get(key) - - if cached_value: - return img_utils.get_pil_image(cached_value) - - def _cache_set(self, prompt: str, image: Image): - if self._cache: - key = self._image_generator.cache_key(prompt) - self._cache.set(key, img_utils.pil_to_data_uri(image)) - - def _extract_analysis(self, analysis: Union[str, Dict, None]) -> str: - if isinstance(analysis, Dict): - return code_utils.content_str(analysis["content"]) - else: - return code_utils.content_str(analysis) - - def _generate_content_message(self, prompt: str, image: Image) -> Dict[str, Any]: - return { - "content": [ - {"type": "text", "text": f"I generated an image with the prompt: {prompt}"}, - {"type": "image_url", "image_url": {"url": img_utils.pil_to_data_uri(image)}}, - ] - } - - -### Helpers -def _validate_resolution_format(resolution: str): - """Checks if a string is in a valid resolution format (e.g., "1024x768").""" - pattern = r"^\d+x\d+$" # Matches a pattern of digits, "x", and digits - matched_resolution = re.match(pattern, resolution) - if matched_resolution is None: - raise ValueError(f"Invalid resolution format: {resolution}") - - -def _validate_dalle_model(model: str): - if model not in ["dall-e-3", "dall-e-2"]: - raise ValueError(f"Invalid DALL-E model: {model}. Must be 'dall-e-3' or 'dall-e-2'") diff --git a/autogen/agentchat/contrib/capabilities/teachability.py b/autogen/agentchat/contrib/capabilities/teachability.py deleted file mode 100644 index 596e449ce341..000000000000 --- a/autogen/agentchat/contrib/capabilities/teachability.py +++ /dev/null @@ -1,400 +0,0 @@ -import os -import pickle -from typing import Dict, Optional, Union - -import chromadb -from chromadb.config import Settings - -from autogen.agentchat.assistant_agent import ConversableAgent -from autogen.agentchat.contrib.capabilities.agent_capability import AgentCapability -from autogen.agentchat.contrib.text_analyzer_agent import TextAnalyzerAgent - -from ....formatting_utils import colored - - -class Teachability(AgentCapability): - """ - Teachability uses a vector database to give an agent the ability to remember user teachings, - where the user is any caller (human or not) sending messages to the teachable agent. - Teachability is designed to be composable with other agent capabilities. - To make any conversable agent teachable, instantiate both the agent and the Teachability class, - then pass the agent to teachability.add_to_agent(agent). - Note that teachable agents in a group chat must be given unique path_to_db_dir values. - - When adding Teachability to an agent, the following are modified: - - The agent's system message is appended with a note about the agent's new ability. - - A hook is added to the agent's `process_last_received_message` hookable method, - and the hook potentially modifies the last of the received messages to include earlier teachings related to the message. - Added teachings do not propagate into the stored message history. - If new user teachings are detected, they are added to new memos in the vector database. - """ - - def __init__( - self, - verbosity: Optional[int] = 0, - reset_db: Optional[bool] = False, - path_to_db_dir: Optional[str] = "./tmp/teachable_agent_db", - recall_threshold: Optional[float] = 1.5, - max_num_retrievals: Optional[int] = 10, - llm_config: Optional[Union[Dict, bool]] = None, - ): - """ - Args: - verbosity (Optional, int): # 0 (default) for basic info, 1 to add memory operations, 2 for analyzer messages, 3 for memo lists. - reset_db (Optional, bool): True to clear the DB before starting. Default False. - path_to_db_dir (Optional, str): path to the directory where this particular agent's DB is stored. Default "./tmp/teachable_agent_db" - recall_threshold (Optional, float): The maximum distance for retrieved memos, where 0.0 is exact match. Default 1.5. Larger values allow more (but less relevant) memos to be recalled. - max_num_retrievals (Optional, int): The maximum number of memos to retrieve from the DB. Default 10. - llm_config (dict or False): llm inference configuration passed to TextAnalyzerAgent. - If None, TextAnalyzerAgent uses llm_config from the teachable agent. - """ - self.verbosity = verbosity - self.path_to_db_dir = path_to_db_dir - self.recall_threshold = recall_threshold - self.max_num_retrievals = max_num_retrievals - self.llm_config = llm_config - - self.analyzer = None - self.teachable_agent = None - - # Create the memo store. - self.memo_store = MemoStore(self.verbosity, reset_db, self.path_to_db_dir) - - def add_to_agent(self, agent: ConversableAgent): - """Adds teachability to the given agent.""" - self.teachable_agent = agent - - # Register a hook for processing the last message. - agent.register_hook(hookable_method="process_last_received_message", hook=self.process_last_received_message) - - # Was an llm_config passed to the constructor? - if self.llm_config is None: - # No. Use the agent's llm_config. - self.llm_config = agent.llm_config - assert self.llm_config, "Teachability requires a valid llm_config." - - # Create the analyzer agent. - self.analyzer = TextAnalyzerAgent(llm_config=self.llm_config) - - # Append extra info to the system message. - agent.update_system_message( - agent.system_message - + "\nYou've been given the special ability to remember user teachings from prior conversations." - ) - - def prepopulate_db(self): - """Adds a few arbitrary memos to the DB.""" - self.memo_store.prepopulate() - - def process_last_received_message(self, text: Union[Dict, str]): - """ - Appends any relevant memos to the message text, and stores any apparent teachings in new memos. - Uses TextAnalyzerAgent to make decisions about memo storage and retrieval. - """ - - # Try to retrieve relevant memos from the DB. - expanded_text = text - if self.memo_store.last_memo_id > 0: - expanded_text = self._consider_memo_retrieval(text) - - # Try to store any user teachings in new memos to be used in the future. - self._consider_memo_storage(text) - - # Return the (possibly) expanded message text. - return expanded_text - - def _consider_memo_storage(self, comment: Union[Dict, str]): - """Decides whether to store something from one user comment in the DB.""" - memo_added = False - - # Check for a problem-solution pair. - response = self._analyze( - comment, - "Does any part of the TEXT ask the agent to perform a task or solve a problem? Answer with just one word, yes or no.", - ) - if "yes" in response.lower(): - # Can we extract advice? - advice = self._analyze( - comment, - "Briefly copy any advice from the TEXT that may be useful for a similar but different task in the future. But if no advice is present, just respond with 'none'.", - ) - if "none" not in advice.lower(): - # Yes. Extract the task. - task = self._analyze( - comment, - "Briefly copy just the task from the TEXT, then stop. Don't solve it, and don't include any advice.", - ) - # Generalize the task. - general_task = self._analyze( - task, - "Summarize very briefly, in general terms, the type of task described in the TEXT. Leave out details that might not appear in a similar problem.", - ) - # Add the task-advice (problem-solution) pair to the vector DB. - if self.verbosity >= 1: - print(colored("\nREMEMBER THIS TASK-ADVICE PAIR", "light_yellow")) - self.memo_store.add_input_output_pair(general_task, advice) - memo_added = True - - # Check for information to be learned. - response = self._analyze( - comment, - "Does the TEXT contain information that could be committed to memory? Answer with just one word, yes or no.", - ) - if "yes" in response.lower(): - # Yes. What question would this information answer? - question = self._analyze( - comment, - "Imagine that the user forgot this information in the TEXT. How would they ask you for this information? Include no other text in your response.", - ) - # Extract the information. - answer = self._analyze( - comment, "Copy the information from the TEXT that should be committed to memory. Add no explanation." - ) - # Add the question-answer pair to the vector DB. - if self.verbosity >= 1: - print(colored("\nREMEMBER THIS QUESTION-ANSWER PAIR", "light_yellow")) - self.memo_store.add_input_output_pair(question, answer) - memo_added = True - - # Were any memos added? - if memo_added: - # Yes. Save them to disk. - self.memo_store._save_memos() - - def _consider_memo_retrieval(self, comment: Union[Dict, str]): - """Decides whether to retrieve memos from the DB, and add them to the chat context.""" - - # First, use the comment directly as the lookup key. - if self.verbosity >= 1: - print(colored("\nLOOK FOR RELEVANT MEMOS, AS QUESTION-ANSWER PAIRS", "light_yellow")) - memo_list = self._retrieve_relevant_memos(comment) - - # Next, if the comment involves a task, then extract and generalize the task before using it as the lookup key. - response = self._analyze( - comment, - "Does any part of the TEXT ask the agent to perform a task or solve a problem? Answer with just one word, yes or no.", - ) - if "yes" in response.lower(): - if self.verbosity >= 1: - print(colored("\nLOOK FOR RELEVANT MEMOS, AS TASK-ADVICE PAIRS", "light_yellow")) - # Extract the task. - task = self._analyze( - comment, "Copy just the task from the TEXT, then stop. Don't solve it, and don't include any advice." - ) - # Generalize the task. - general_task = self._analyze( - task, - "Summarize very briefly, in general terms, the type of task described in the TEXT. Leave out details that might not appear in a similar problem.", - ) - # Append any relevant memos. - memo_list.extend(self._retrieve_relevant_memos(general_task)) - - # De-duplicate the memo list. - memo_list = list(set(memo_list)) - - # Append the memos to the text of the last message. - return comment + self._concatenate_memo_texts(memo_list) - - def _retrieve_relevant_memos(self, input_text: str) -> list: - """Returns semantically related memos from the DB.""" - memo_list = self.memo_store.get_related_memos( - input_text, n_results=self.max_num_retrievals, threshold=self.recall_threshold - ) - - if self.verbosity >= 1: - # Was anything retrieved? - if len(memo_list) == 0: - # No. Look at the closest memo. - print(colored("\nTHE CLOSEST MEMO IS BEYOND THE THRESHOLD:", "light_yellow")) - self.memo_store.get_nearest_memo(input_text) - print() # Print a blank line. The memo details were printed by get_nearest_memo(). - - # Create a list of just the memo output_text strings. - memo_list = [memo[1] for memo in memo_list] - return memo_list - - def _concatenate_memo_texts(self, memo_list: list) -> str: - """Concatenates the memo texts into a single string for inclusion in the chat context.""" - memo_texts = "" - if len(memo_list) > 0: - info = "\n# Memories that might help\n" - for memo in memo_list: - info = info + "- " + memo + "\n" - if self.verbosity >= 1: - print(colored("\nMEMOS APPENDED TO LAST MESSAGE...\n" + info + "\n", "light_yellow")) - memo_texts = memo_texts + "\n" + info - return memo_texts - - def _analyze(self, text_to_analyze: Union[Dict, str], analysis_instructions: Union[Dict, str]): - """Asks TextAnalyzerAgent to analyze the given text according to specific instructions.""" - self.analyzer.reset() # Clear the analyzer's list of messages. - self.teachable_agent.send( - recipient=self.analyzer, message=text_to_analyze, request_reply=False, silent=(self.verbosity < 2) - ) # Put the message in the analyzer's list. - self.teachable_agent.send( - recipient=self.analyzer, message=analysis_instructions, request_reply=True, silent=(self.verbosity < 2) - ) # Request the reply. - return self.teachable_agent.last_message(self.analyzer)["content"] - - -class MemoStore: - """ - Provides memory storage and retrieval for a teachable agent, using a vector database. - Each DB entry (called a memo) is a pair of strings: an input text and an output text. - The input text might be a question, or a task to perform. - The output text might be an answer to the question, or advice on how to perform the task. - Vector embeddings are currently supplied by Chroma's default Sentence Transformers. - """ - - def __init__( - self, - verbosity: Optional[int] = 0, - reset: Optional[bool] = False, - path_to_db_dir: Optional[str] = "./tmp/teachable_agent_db", - ): - """ - Args: - - verbosity (Optional, int): 1 to print memory operations, 0 to omit them. 3+ to print memo lists. - - reset (Optional, bool): True to clear the DB before starting. Default False. - - path_to_db_dir (Optional, str): path to the directory where the DB is stored. - """ - self.verbosity = verbosity - self.path_to_db_dir = path_to_db_dir - - # Load or create the vector DB on disk. - settings = Settings( - anonymized_telemetry=False, allow_reset=True, is_persistent=True, persist_directory=path_to_db_dir - ) - self.db_client = chromadb.Client(settings) - self.vec_db = self.db_client.create_collection("memos", get_or_create=True) # The collection is the DB. - - # Load or create the associated memo dict on disk. - self.path_to_dict = os.path.join(path_to_db_dir, "uid_text_dict.pkl") - self.uid_text_dict = {} - self.last_memo_id = 0 - if (not reset) and os.path.exists(self.path_to_dict): - print(colored("\nLOADING MEMORY FROM DISK", "light_green")) - print(colored(" Location = {}".format(self.path_to_dict), "light_green")) - with open(self.path_to_dict, "rb") as f: - self.uid_text_dict = pickle.load(f) - self.last_memo_id = len(self.uid_text_dict) - if self.verbosity >= 3: - self.list_memos() - - # Clear the DB if requested. - if reset: - self.reset_db() - - def list_memos(self): - """Prints the contents of MemoStore.""" - print(colored("LIST OF MEMOS", "light_green")) - for uid, text in self.uid_text_dict.items(): - input_text, output_text = text - print( - colored( - " ID: {}\n INPUT TEXT: {}\n OUTPUT TEXT: {}".format(uid, input_text, output_text), - "light_green", - ) - ) - - def _save_memos(self): - """Saves self.uid_text_dict to disk.""" - with open(self.path_to_dict, "wb") as file: - pickle.dump(self.uid_text_dict, file) - - def reset_db(self): - """Forces immediate deletion of the DB's contents, in memory and on disk.""" - print(colored("\nCLEARING MEMORY", "light_green")) - self.db_client.delete_collection("memos") - self.vec_db = self.db_client.create_collection("memos") - self.uid_text_dict = {} - self._save_memos() - - def add_input_output_pair(self, input_text: str, output_text: str): - """Adds an input-output pair to the vector DB.""" - self.last_memo_id += 1 - self.vec_db.add(documents=[input_text], ids=[str(self.last_memo_id)]) - self.uid_text_dict[str(self.last_memo_id)] = input_text, output_text - if self.verbosity >= 1: - print( - colored( - "\nINPUT-OUTPUT PAIR ADDED TO VECTOR DATABASE:\n ID\n {}\n INPUT\n {}\n OUTPUT\n {}\n".format( - self.last_memo_id, input_text, output_text - ), - "light_yellow", - ) - ) - if self.verbosity >= 3: - self.list_memos() - - def get_nearest_memo(self, query_text: str): - """Retrieves the nearest memo to the given query text.""" - results = self.vec_db.query(query_texts=[query_text], n_results=1) - uid, input_text, distance = results["ids"][0][0], results["documents"][0][0], results["distances"][0][0] - input_text_2, output_text = self.uid_text_dict[uid] - assert input_text == input_text_2 - if self.verbosity >= 1: - print( - colored( - "\nINPUT-OUTPUT PAIR RETRIEVED FROM VECTOR DATABASE:\n INPUT1\n {}\n OUTPUT\n {}\n DISTANCE\n {}".format( - input_text, output_text, distance - ), - "light_yellow", - ) - ) - return input_text, output_text, distance - - def get_related_memos(self, query_text: str, n_results: int, threshold: Union[int, float]): - """Retrieves memos that are related to the given query text within the specified distance threshold.""" - if n_results > len(self.uid_text_dict): - n_results = len(self.uid_text_dict) - results = self.vec_db.query(query_texts=[query_text], n_results=n_results) - memos = [] - num_results = len(results["ids"][0]) - for i in range(num_results): - uid, input_text, distance = results["ids"][0][i], results["documents"][0][i], results["distances"][0][i] - if distance < threshold: - input_text_2, output_text = self.uid_text_dict[uid] - assert input_text == input_text_2 - if self.verbosity >= 1: - print( - colored( - "\nINPUT-OUTPUT PAIR RETRIEVED FROM VECTOR DATABASE:\n INPUT1\n {}\n OUTPUT\n {}\n DISTANCE\n {}".format( - input_text, output_text, distance - ), - "light_yellow", - ) - ) - memos.append((input_text, output_text, distance)) - return memos - - def prepopulate(self): - """Adds a few arbitrary examples to the vector DB, just to make retrieval less trivial.""" - if self.verbosity >= 1: - print(colored("\nPREPOPULATING MEMORY", "light_green")) - examples = [] - examples.append({"text": "When I say papers I mean research papers, which are typically pdfs.", "label": "yes"}) - examples.append({"text": "Please verify that each paper you listed actually uses langchain.", "label": "no"}) - examples.append({"text": "Tell gpt the output should still be latex code.", "label": "no"}) - examples.append({"text": "Hint: convert pdfs to text and then answer questions based on them.", "label": "yes"}) - examples.append( - {"text": "To create a good PPT, include enough content to make it interesting.", "label": "yes"} - ) - examples.append( - { - "text": "No, for this case the columns should be aspects and the rows should be frameworks.", - "label": "no", - } - ) - examples.append({"text": "When writing code, remember to include any libraries that are used.", "label": "yes"}) - examples.append({"text": "Please summarize the papers by Eric Horvitz on bounded rationality.", "label": "no"}) - examples.append({"text": "Compare the h-index of Daniel Weld and Oren Etzioni.", "label": "no"}) - examples.append( - { - "text": "Double check to be sure that the columns in a table correspond to what was asked for.", - "label": "yes", - } - ) - for example in examples: - self.add_input_output_pair(example["text"], example["label"]) - self._save_memos() diff --git a/autogen/agentchat/contrib/capabilities/text_compressors.py b/autogen/agentchat/contrib/capabilities/text_compressors.py deleted file mode 100644 index 78554bdc9357..000000000000 --- a/autogen/agentchat/contrib/capabilities/text_compressors.py +++ /dev/null @@ -1,68 +0,0 @@ -from typing import Any, Dict, Optional, Protocol - -IMPORT_ERROR: Optional[Exception] = None -try: - import llmlingua -except ImportError: - IMPORT_ERROR = ImportError( - "LLMLingua is not installed. Please install it with `pip install pyautogen[long-context]`" - ) - PromptCompressor = object -else: - from llmlingua import PromptCompressor - - -class TextCompressor(Protocol): - """Defines a protocol for text compression to optimize agent interactions.""" - - def compress_text(self, text: str, **compression_params) -> Dict[str, Any]: - """This method takes a string as input and returns a dictionary containing the compressed text and other - relevant information. The compressed text should be stored under the 'compressed_text' key in the dictionary. - To calculate the number of saved tokens, the dictionary should include 'origin_tokens' and 'compressed_tokens' keys. - """ - ... - - -class LLMLingua: - """Compresses text messages using LLMLingua for improved efficiency in processing and response generation. - - NOTE: The effectiveness of compression and the resultant token savings can vary based on the content of the messages - and the specific configurations used for the PromptCompressor. - """ - - def __init__( - self, - prompt_compressor_kwargs: Dict = dict( - model_name="microsoft/llmlingua-2-bert-base-multilingual-cased-meetingbank", - use_llmlingua2=True, - device_map="cpu", - ), - structured_compression: bool = False, - ) -> None: - """ - Args: - prompt_compressor_kwargs (dict): A dictionary of keyword arguments for the PromptCompressor. Defaults to a - dictionary with model_name set to "microsoft/llmlingua-2-bert-base-multilingual-cased-meetingbank", - use_llmlingua2 set to True, and device_map set to "cpu". - structured_compression (bool): A flag indicating whether to use structured compression. If True, the - structured_compress_prompt method of the PromptCompressor is used. Otherwise, the compress_prompt method - is used. Defaults to False. - dictionary. - - Raises: - ImportError: If the llmlingua library is not installed. - """ - if IMPORT_ERROR: - raise IMPORT_ERROR - - self._prompt_compressor = PromptCompressor(**prompt_compressor_kwargs) - - assert isinstance(self._prompt_compressor, llmlingua.PromptCompressor) - self._compression_method = ( - self._prompt_compressor.structured_compress_prompt - if structured_compression - else self._prompt_compressor.compress_prompt - ) - - def compress_text(self, text: str, **compression_params) -> Dict[str, Any]: - return self._compression_method([text], **compression_params) diff --git a/autogen/agentchat/contrib/capabilities/transform_messages.py b/autogen/agentchat/contrib/capabilities/transform_messages.py deleted file mode 100644 index 1ce219bdadfa..000000000000 --- a/autogen/agentchat/contrib/capabilities/transform_messages.py +++ /dev/null @@ -1,86 +0,0 @@ -import copy -from typing import Dict, List - -from ....formatting_utils import colored -from ...conversable_agent import ConversableAgent -from .transforms import MessageTransform - - -class TransformMessages: - """Agent capability for transforming messages before reply generation. - - This capability allows you to apply a series of message transformations to - a ConversableAgent's incoming messages before they are processed for response - generation. This is useful for tasks such as: - - - Limiting the number of messages considered for context. - - Truncating messages to meet token limits. - - Filtering sensitive information. - - Customizing message formatting. - - To use `TransformMessages`: - - 1. Create message transformations (e.g., `MessageHistoryLimiter`, `MessageTokenLimiter`). - 2. Instantiate `TransformMessages` with a list of these transformations. - 3. Add the `TransformMessages` instance to your `ConversableAgent` using `add_to_agent`. - - NOTE: Order of message transformations is important. You could get different results based on - the order of transformations. - - Example: - ```python - from agentchat import ConversableAgent - from agentchat.contrib.capabilities import TransformMessages, MessageHistoryLimiter, MessageTokenLimiter - - max_messages = MessageHistoryLimiter(max_messages=2) - truncate_messages = MessageTokenLimiter(max_tokens=500) - transform_messages = TransformMessages(transforms=[max_messages, truncate_messages]) - - agent = ConversableAgent(...) - transform_messages.add_to_agent(agent) - ``` - """ - - def __init__(self, *, transforms: List[MessageTransform] = [], verbose: bool = True): - """ - Args: - transforms: A list of message transformations to apply. - verbose: Whether to print logs of each transformation or not. - """ - self._transforms = transforms - self._verbose = verbose - - def add_to_agent(self, agent: ConversableAgent): - """Adds the message transformations capability to the specified ConversableAgent. - - This function performs the following modifications to the agent: - - 1. Registers a hook that automatically transforms all messages before they are processed for - response generation. - """ - agent.register_hook(hookable_method="process_all_messages_before_reply", hook=self._transform_messages) - - def _transform_messages(self, messages: List[Dict]) -> List[Dict]: - post_transform_messages = copy.deepcopy(messages) - system_message = None - - if messages[0]["role"] == "system": - system_message = copy.deepcopy(messages[0]) - post_transform_messages.pop(0) - - for transform in self._transforms: - # deepcopy in case pre_transform_messages will later be used for logs printing - pre_transform_messages = ( - copy.deepcopy(post_transform_messages) if self._verbose else post_transform_messages - ) - post_transform_messages = transform.apply_transform(pre_transform_messages) - - if self._verbose: - logs_str, had_effect = transform.get_logs(pre_transform_messages, post_transform_messages) - if had_effect: - print(colored(logs_str, "yellow")) - - if system_message: - post_transform_messages.insert(0, system_message) - - return post_transform_messages diff --git a/autogen/agentchat/contrib/capabilities/transforms.py b/autogen/agentchat/contrib/capabilities/transforms.py deleted file mode 100644 index d9ad365b91b3..000000000000 --- a/autogen/agentchat/contrib/capabilities/transforms.py +++ /dev/null @@ -1,539 +0,0 @@ -import copy -import sys -from typing import Any, Dict, List, Optional, Protocol, Tuple, Union - -import tiktoken -from termcolor import colored - -from autogen import token_count_utils -from autogen.cache import AbstractCache, Cache -from autogen.types import MessageContentType - -from . import transforms_util -from .text_compressors import LLMLingua, TextCompressor - - -class MessageTransform(Protocol): - """Defines a contract for message transformation. - - Classes implementing this protocol should provide an `apply_transform` method - that takes a list of messages and returns the transformed list. - """ - - def apply_transform(self, messages: List[Dict]) -> List[Dict]: - """Applies a transformation to a list of messages. - - Args: - messages: A list of dictionaries representing messages. - - Returns: - A new list of dictionaries containing the transformed messages. - """ - ... - - def get_logs(self, pre_transform_messages: List[Dict], post_transform_messages: List[Dict]) -> Tuple[str, bool]: - """Creates the string including the logs of the transformation - - Alongside the string, it returns a boolean indicating whether the transformation had an effect or not. - - Args: - pre_transform_messages: A list of dictionaries representing messages before the transformation. - post_transform_messages: A list of dictionaries representig messages after the transformation. - - Returns: - A tuple with a string with the logs and a flag indicating whether the transformation had an effect or not. - """ - ... - - -class MessageHistoryLimiter: - """Limits the number of messages considered by an agent for response generation. - - This transform keeps only the most recent messages up to the specified maximum number of messages (max_messages). - It trims the conversation history by removing older messages, retaining only the most recent messages. - """ - - def __init__(self, max_messages: Optional[int] = None, keep_first_message: bool = False): - """ - Args: - max_messages Optional[int]: Maximum number of messages to keep in the context. Must be greater than 0 if not None. - keep_first_message bool: Whether to keep the original first message in the conversation history. - Defaults to False. - """ - self._validate_max_messages(max_messages) - self._max_messages = max_messages - self._keep_first_message = keep_first_message - - def apply_transform(self, messages: List[Dict]) -> List[Dict]: - """Truncates the conversation history to the specified maximum number of messages. - - This method returns a new list containing the most recent messages up to the specified - maximum number of messages (max_messages). If max_messages is None, it returns the - original list of messages unmodified. - - Args: - messages (List[Dict]): The list of messages representing the conversation history. - - Returns: - List[Dict]: A new list containing the most recent messages up to the specified maximum. - """ - - if self._max_messages is None or len(messages) <= self._max_messages: - return messages - - truncated_messages = [] - remaining_count = self._max_messages - - # Start with the first message if we need to keep it - if self._keep_first_message: - truncated_messages = [messages[0]] - remaining_count -= 1 - - # Loop through messages in reverse - for i in range(len(messages) - 1, 0, -1): - if remaining_count > 1: - truncated_messages.insert(1 if self._keep_first_message else 0, messages[i]) - if remaining_count == 1: - # If there's only 1 slot left and it's a 'tools' message, ignore it. - if messages[i].get("role") != "tool": - truncated_messages.insert(1, messages[i]) - - remaining_count -= 1 - if remaining_count == 0: - break - - return truncated_messages - - def get_logs(self, pre_transform_messages: List[Dict], post_transform_messages: List[Dict]) -> Tuple[str, bool]: - pre_transform_messages_len = len(pre_transform_messages) - post_transform_messages_len = len(post_transform_messages) - - if post_transform_messages_len < pre_transform_messages_len: - logs_str = ( - f"Removed {pre_transform_messages_len - post_transform_messages_len} messages. " - f"Number of messages reduced from {pre_transform_messages_len} to {post_transform_messages_len}." - ) - return logs_str, True - return "No messages were removed.", False - - def _validate_max_messages(self, max_messages: Optional[int]): - if max_messages is not None and max_messages < 1: - raise ValueError("max_messages must be None or greater than 1") - - -class MessageTokenLimiter: - """Truncates messages to meet token limits for efficient processing and response generation. - - This transformation applies two levels of truncation to the conversation history: - - 1. Truncates each individual message to the maximum number of tokens specified by max_tokens_per_message. - 2. Truncates the overall conversation history to the maximum number of tokens specified by max_tokens. - - NOTE: Tokens are counted using the encoder for the specified model. Different models may yield different token - counts for the same text. - - NOTE: For multimodal LLMs, the token count may be inaccurate as it does not account for the non-text input - (e.g images). - - The truncation process follows these steps in order: - - 1. The minimum tokens threshold (`min_tokens`) is checked (0 by default). If the total number of tokens in messages - are less than this threshold, then the messages are returned as is. In other case, the following process is applied. - 2. Messages are processed in reverse order (newest to oldest). - 3. Individual messages are truncated based on max_tokens_per_message. For multimodal messages containing both text - and other types of content, only the text content is truncated. - 4. The overall conversation history is truncated based on the max_tokens limit. Once the accumulated token count - exceeds this limit, the current message being processed get truncated to meet the total token count and any - remaining messages get discarded. - 5. The truncated conversation history is reconstructed by prepending the messages to a new list to preserve the - original message order. - """ - - def __init__( - self, - max_tokens_per_message: Optional[int] = None, - max_tokens: Optional[int] = None, - min_tokens: Optional[int] = None, - model: str = "gpt-3.5-turbo-0613", - filter_dict: Optional[Dict] = None, - exclude_filter: bool = True, - ): - """ - Args: - max_tokens_per_message (None or int): Maximum number of tokens to keep in each message. - Must be greater than or equal to 0 if not None. - max_tokens (Optional[int]): Maximum number of tokens to keep in the chat history. - Must be greater than or equal to 0 if not None. - min_tokens (Optional[int]): Minimum number of tokens in messages to apply the transformation. - Must be greater than or equal to 0 if not None. - model (str): The target OpenAI model for tokenization alignment. - filter_dict (None or dict): A dictionary to filter out messages that you want/don't want to compress. - If None, no filters will be applied. - exclude_filter (bool): If exclude filter is True (the default value), messages that match the filter will be - excluded from token truncation. If False, messages that match the filter will be truncated. - """ - self._model = model - self._max_tokens_per_message = self._validate_max_tokens(max_tokens_per_message) - self._max_tokens = self._validate_max_tokens(max_tokens) - self._min_tokens = self._validate_min_tokens(min_tokens, max_tokens) - self._filter_dict = filter_dict - self._exclude_filter = exclude_filter - - def apply_transform(self, messages: List[Dict]) -> List[Dict]: - """Applies token truncation to the conversation history. - - Args: - messages (List[Dict]): The list of messages representing the conversation history. - - Returns: - List[Dict]: A new list containing the truncated messages up to the specified token limits. - """ - assert self._max_tokens_per_message is not None - assert self._max_tokens is not None - assert self._min_tokens is not None - - # if the total number of tokens in the messages is less than the min_tokens, return the messages as is - if not transforms_util.min_tokens_reached(messages, self._min_tokens): - return messages - - temp_messages = copy.deepcopy(messages) - processed_messages = [] - processed_messages_tokens = 0 - - for msg in reversed(temp_messages): - # Some messages may not have content. - if not transforms_util.is_content_right_type(msg.get("content")): - processed_messages.insert(0, msg) - continue - - if not transforms_util.should_transform_message(msg, self._filter_dict, self._exclude_filter): - processed_messages.insert(0, msg) - processed_messages_tokens += transforms_util.count_text_tokens(msg["content"]) - continue - - expected_tokens_remained = self._max_tokens - processed_messages_tokens - self._max_tokens_per_message - - # If adding this message would exceed the token limit, truncate the last message to meet the total token - # limit and discard all remaining messages - if expected_tokens_remained < 0: - msg["content"] = self._truncate_str_to_tokens( - msg["content"], self._max_tokens - processed_messages_tokens - ) - processed_messages.insert(0, msg) - break - - msg["content"] = self._truncate_str_to_tokens(msg["content"], self._max_tokens_per_message) - msg_tokens = transforms_util.count_text_tokens(msg["content"]) - - # prepend the message to the list to preserve order - processed_messages_tokens += msg_tokens - processed_messages.insert(0, msg) - - return processed_messages - - def get_logs(self, pre_transform_messages: List[Dict], post_transform_messages: List[Dict]) -> Tuple[str, bool]: - pre_transform_messages_tokens = sum( - transforms_util.count_text_tokens(msg["content"]) for msg in pre_transform_messages if "content" in msg - ) - post_transform_messages_tokens = sum( - transforms_util.count_text_tokens(msg["content"]) for msg in post_transform_messages if "content" in msg - ) - - if post_transform_messages_tokens < pre_transform_messages_tokens: - logs_str = ( - f"Truncated {pre_transform_messages_tokens - post_transform_messages_tokens} tokens. " - f"Number of tokens reduced from {pre_transform_messages_tokens} to {post_transform_messages_tokens}" - ) - return logs_str, True - return "No tokens were truncated.", False - - def _truncate_str_to_tokens(self, contents: Union[str, List], n_tokens: int) -> Union[str, List]: - if isinstance(contents, str): - return self._truncate_tokens(contents, n_tokens) - elif isinstance(contents, list): - return self._truncate_multimodal_text(contents, n_tokens) - else: - raise ValueError(f"Contents must be a string or a list of dictionaries. Received type: {type(contents)}") - - def _truncate_multimodal_text(self, contents: List[Dict[str, Any]], n_tokens: int) -> List[Dict[str, Any]]: - """Truncates text content within a list of multimodal elements, preserving the overall structure.""" - tmp_contents = [] - for content in contents: - if content["type"] == "text": - truncated_text = self._truncate_tokens(content["text"], n_tokens) - tmp_contents.append({"type": "text", "text": truncated_text}) - else: - tmp_contents.append(content) - return tmp_contents - - def _truncate_tokens(self, text: str, n_tokens: int) -> str: - encoding = tiktoken.encoding_for_model(self._model) # Get the appropriate tokenizer - - encoded_tokens = encoding.encode(text) - truncated_tokens = encoded_tokens[:n_tokens] - truncated_text = encoding.decode(truncated_tokens) # Decode back to text - - return truncated_text - - def _validate_max_tokens(self, max_tokens: Optional[int] = None) -> Optional[int]: - if max_tokens is not None and max_tokens < 0: - raise ValueError("max_tokens and max_tokens_per_message must be None or greater than or equal to 0") - - try: - allowed_tokens = token_count_utils.get_max_token_limit(self._model) - except Exception: - print(colored(f"Model {self._model} not found in token_count_utils.", "yellow")) - allowed_tokens = None - - if max_tokens is not None and allowed_tokens is not None: - if max_tokens > allowed_tokens: - print( - colored( - f"Max token was set to {max_tokens}, but {self._model} can only accept {allowed_tokens} tokens. Capping it to {allowed_tokens}.", - "yellow", - ) - ) - return allowed_tokens - - return max_tokens if max_tokens is not None else sys.maxsize - - def _validate_min_tokens(self, min_tokens: Optional[int], max_tokens: Optional[int]) -> int: - if min_tokens is None: - return 0 - if min_tokens < 0: - raise ValueError("min_tokens must be None or greater than or equal to 0.") - if max_tokens is not None and min_tokens > max_tokens: - raise ValueError("min_tokens must not be more than max_tokens.") - return min_tokens - - -class TextMessageCompressor: - """A transform for compressing text messages in a conversation history. - - It uses a specified text compression method to reduce the token count of messages, which can lead to more efficient - processing and response generation by downstream models. - """ - - def __init__( - self, - text_compressor: Optional[TextCompressor] = None, - min_tokens: Optional[int] = None, - compression_params: Dict = dict(), - cache: Optional[AbstractCache] = Cache.disk(), - filter_dict: Optional[Dict] = None, - exclude_filter: bool = True, - ): - """ - Args: - text_compressor (TextCompressor or None): An instance of a class that implements the TextCompressor - protocol. If None, it defaults to LLMLingua. - min_tokens (int or None): Minimum number of tokens in messages to apply the transformation. Must be greater - than or equal to 0 if not None. If None, no threshold-based compression is applied. - compression_args (dict): A dictionary of arguments for the compression method. Defaults to an empty - dictionary. - cache (None or AbstractCache): The cache client to use to store and retrieve previously compressed messages. - If None, no caching will be used. - filter_dict (None or dict): A dictionary to filter out messages that you want/don't want to compress. - If None, no filters will be applied. - exclude_filter (bool): If exclude filter is True (the default value), messages that match the filter will be - excluded from compression. If False, messages that match the filter will be compressed. - """ - - if text_compressor is None: - text_compressor = LLMLingua() - - self._validate_min_tokens(min_tokens) - - self._text_compressor = text_compressor - self._min_tokens = min_tokens - self._compression_args = compression_params - self._filter_dict = filter_dict - self._exclude_filter = exclude_filter - self._cache = cache - - # Optimizing savings calculations to optimize log generation - self._recent_tokens_savings = 0 - - def apply_transform(self, messages: List[Dict]) -> List[Dict]: - """Applies compression to messages in a conversation history based on the specified configuration. - - The function processes each message according to the `compression_args` and `min_tokens` settings, applying - the specified compression configuration and returning a new list of messages with reduced token counts - where possible. - - Args: - messages (List[Dict]): A list of message dictionaries to be compressed. - - Returns: - List[Dict]: A list of dictionaries with the message content compressed according to the configured - method and scope. - """ - # Make sure there is at least one message - if not messages: - return messages - - # if the total number of tokens in the messages is less than the min_tokens, return the messages as is - if not transforms_util.min_tokens_reached(messages, self._min_tokens): - return messages - - total_savings = 0 - processed_messages = messages.copy() - for message in processed_messages: - # Some messages may not have content. - if not transforms_util.is_content_right_type(message.get("content")): - continue - - if not transforms_util.should_transform_message(message, self._filter_dict, self._exclude_filter): - continue - - if transforms_util.is_content_text_empty(message["content"]): - continue - - cache_key = transforms_util.cache_key(message["content"], self._min_tokens) - cached_content = transforms_util.cache_content_get(self._cache, cache_key) - if cached_content is not None: - message["content"], savings = cached_content - else: - message["content"], savings = self._compress(message["content"]) - - transforms_util.cache_content_set(self._cache, cache_key, message["content"], savings) - - assert isinstance(savings, int) - total_savings += savings - - self._recent_tokens_savings = total_savings - return processed_messages - - def get_logs(self, pre_transform_messages: List[Dict], post_transform_messages: List[Dict]) -> Tuple[str, bool]: - if self._recent_tokens_savings > 0: - return f"{self._recent_tokens_savings} tokens saved with text compression.", True - else: - return "No tokens saved with text compression.", False - - def _compress(self, content: MessageContentType) -> Tuple[MessageContentType, int]: - """Compresses the given text or multimodal content using the specified compression method.""" - if isinstance(content, str): - return self._compress_text(content) - elif isinstance(content, list): - return self._compress_multimodal(content) - else: - return content, 0 - - def _compress_multimodal(self, content: MessageContentType) -> Tuple[MessageContentType, int]: - tokens_saved = 0 - for item in content: - if isinstance(item, dict) and "text" in item: - item["text"], savings = self._compress_text(item["text"]) - tokens_saved += savings - - elif isinstance(item, str): - item, savings = self._compress_text(item) - tokens_saved += savings - - return content, tokens_saved - - def _compress_text(self, text: str) -> Tuple[str, int]: - """Compresses the given text using the specified compression method.""" - compressed_text = self._text_compressor.compress_text(text, **self._compression_args) - - savings = 0 - if "origin_tokens" in compressed_text and "compressed_tokens" in compressed_text: - savings = compressed_text["origin_tokens"] - compressed_text["compressed_tokens"] - - return compressed_text["compressed_prompt"], savings - - def _validate_min_tokens(self, min_tokens: Optional[int]): - if min_tokens is not None and min_tokens <= 0: - raise ValueError("min_tokens must be greater than 0 or None") - - -class TextMessageContentName: - """A transform for including the agent's name in the content of a message.""" - - def __init__( - self, - position: str = "start", - format_string: str = "{name}:\n", - deduplicate: bool = True, - filter_dict: Optional[Dict] = None, - exclude_filter: bool = True, - ): - """ - Args: - position (str): The position to add the name to the content. The possible options are 'start' or 'end'. Defaults to 'start'. - format_string (str): The f-string to format the message name with. Use '{name}' as a placeholder for the agent's name. Defaults to '{name}:\n' and must contain '{name}'. - deduplicate (bool): Whether to deduplicate the formatted string so it doesn't appear twice (sometimes the LLM will add it to new messages itself). Defaults to True. - filter_dict (None or dict): A dictionary to filter out messages that you want/don't want to compress. - If None, no filters will be applied. - exclude_filter (bool): If exclude filter is True (the default value), messages that match the filter will be - excluded from compression. If False, messages that match the filter will be compressed. - """ - - assert isinstance(position, str) and position is not None - assert position in ["start", "end"] - assert isinstance(format_string, str) and format_string is not None - assert "{name}" in format_string - assert isinstance(deduplicate, bool) and deduplicate is not None - - self._position = position - self._format_string = format_string - self._deduplicate = deduplicate - self._filter_dict = filter_dict - self._exclude_filter = exclude_filter - - # Track the number of messages changed for logging - self._messages_changed = 0 - - def apply_transform(self, messages: List[Dict]) -> List[Dict]: - """Applies the name change to the message based on the position and format string. - - Args: - messages (List[Dict]): A list of message dictionaries. - - Returns: - List[Dict]: A list of dictionaries with the message content updated with names. - """ - # Make sure there is at least one message - if not messages: - return messages - - messages_changed = 0 - processed_messages = copy.deepcopy(messages) - for message in processed_messages: - # Some messages may not have content. - if not transforms_util.is_content_right_type( - message.get("content") - ) or not transforms_util.is_content_right_type(message.get("name")): - continue - - if not transforms_util.should_transform_message(message, self._filter_dict, self._exclude_filter): - continue - - if transforms_util.is_content_text_empty(message["content"]) or transforms_util.is_content_text_empty( - message["name"] - ): - continue - - # Get and format the name in the content - content = message["content"] - formatted_name = self._format_string.format(name=message["name"]) - - if self._position == "start": - if not self._deduplicate or not content.startswith(formatted_name): - message["content"] = f"{formatted_name}{content}" - - messages_changed += 1 - else: - if not self._deduplicate or not content.endswith(formatted_name): - message["content"] = f"{content}{formatted_name}" - - messages_changed += 1 - - self._messages_changed = messages_changed - return processed_messages - - def get_logs(self, pre_transform_messages: List[Dict], post_transform_messages: List[Dict]) -> Tuple[str, bool]: - if self._messages_changed > 0: - return f"{self._messages_changed} message(s) changed to incorporate name.", True - else: - return "No messages changed to incorporate name.", False diff --git a/autogen/agentchat/contrib/capabilities/transforms_util.py b/autogen/agentchat/contrib/capabilities/transforms_util.py deleted file mode 100644 index 8678dec654c4..000000000000 --- a/autogen/agentchat/contrib/capabilities/transforms_util.py +++ /dev/null @@ -1,114 +0,0 @@ -from typing import Any, Dict, Hashable, List, Optional, Tuple - -from autogen import token_count_utils -from autogen.cache.abstract_cache_base import AbstractCache -from autogen.oai.openai_utils import filter_config -from autogen.types import MessageContentType - - -def cache_key(content: MessageContentType, *args: Hashable) -> str: - """Calculates the cache key for the given message content and any other hashable args. - - Args: - content (MessageContentType): The message content to calculate the cache key for. - *args: Any additional hashable args to include in the cache key. - """ - str_keys = [str(key) for key in (content, *args)] - return "".join(str_keys) - - -def cache_content_get(cache: Optional[AbstractCache], key: str) -> Optional[Tuple[MessageContentType, ...]]: - """Retrieves cachedd content from the cache. - - Args: - cache (None or AbstractCache): The cache to retrieve the content from. If None, the cache is ignored. - key (str): The key to retrieve the content from. - """ - if cache: - cached_value = cache.get(key) - if cached_value: - return cached_value - - -def cache_content_set(cache: Optional[AbstractCache], key: str, content: MessageContentType, *extra_values): - """Sets content into the cache. - - Args: - cache (None or AbstractCache): The cache to set the content into. If None, the cache is ignored. - key (str): The key to set the content into. - content (MessageContentType): The message content to set into the cache. - *extra_values: Additional values to be passed to the cache. - """ - if cache: - cache_value = (content, *extra_values) - cache.set(key, cache_value) - - -def min_tokens_reached(messages: List[Dict], min_tokens: Optional[int]) -> bool: - """Returns True if the total number of tokens in the messages is greater than or equal to the specified value. - - Args: - messages (List[Dict]): A list of messages to check. - """ - if not min_tokens: - return True - - messages_tokens = sum(count_text_tokens(msg["content"]) for msg in messages if "content" in msg) - return messages_tokens >= min_tokens - - -def count_text_tokens(content: MessageContentType) -> int: - """Calculates the number of text tokens in the given message content. - - Args: - content (MessageContentType): The message content to calculate the number of text tokens for. - """ - token_count = 0 - if isinstance(content, str): - token_count = token_count_utils.count_token(content) - elif isinstance(content, list): - for item in content: - if isinstance(item, str): - token_count += token_count_utils.count_token(item) - else: - token_count += count_text_tokens(item.get("text", "")) - return token_count - - -def is_content_right_type(content: Any) -> bool: - """A helper function to check if the passed in content is of the right type.""" - return isinstance(content, (str, list)) - - -def is_content_text_empty(content: MessageContentType) -> bool: - """Checks if the content of the message does not contain any text. - - Args: - content (MessageContentType): The message content to check. - """ - if isinstance(content, str): - return content == "" - elif isinstance(content, list): - texts = [] - for item in content: - if isinstance(item, str): - texts.append(item) - elif isinstance(item, dict): - texts.append(item.get("text", "")) - return not any(texts) - else: - return True - - -def should_transform_message(message: Dict[str, Any], filter_dict: Optional[Dict[str, Any]], exclude: bool) -> bool: - """Validates whether the transform should be applied according to the filter dictionary. - - Args: - message (Dict[str, Any]): The message to validate. - filter_dict (None or Dict[str, Any]): The filter dictionary to validate against. If None, the transform is always applied. - exclude (bool): Whether to exclude messages that match the filter dictionary. - """ - if not filter_dict: - return True - - return len(filter_config([message], filter_dict, exclude)) > 0 diff --git a/autogen/agentchat/contrib/capabilities/vision_capability.py b/autogen/agentchat/contrib/capabilities/vision_capability.py deleted file mode 100644 index acfb9c8f6d82..000000000000 --- a/autogen/agentchat/contrib/capabilities/vision_capability.py +++ /dev/null @@ -1,211 +0,0 @@ -import copy -from typing import Callable, Dict, List, Optional, Union - -from autogen.agentchat.assistant_agent import ConversableAgent -from autogen.agentchat.contrib.capabilities.agent_capability import AgentCapability -from autogen.agentchat.contrib.img_utils import ( - convert_base64_to_data_uri, - get_image_data, - get_pil_image, - gpt4v_formatter, - message_formatter_pil_to_b64, -) -from autogen.agentchat.contrib.multimodal_conversable_agent import MultimodalConversableAgent -from autogen.agentchat.conversable_agent import colored -from autogen.code_utils import content_str -from autogen.oai.client import OpenAIWrapper - -DEFAULT_DESCRIPTION_PROMPT = ( - "Write a detailed caption for this image. " - "Pay special attention to any details that might be useful or relevant " - "to the ongoing conversation." -) - - -class VisionCapability(AgentCapability): - """We can add vision capability to regular ConversableAgent, even if the agent does not have the multimodal capability, - such as GPT-3.5-turbo agent, Llama, Orca, or Mistral agents. This vision capability will invoke a LMM client to describe - the image (captioning) before sending the information to the agent's actual client. - - The vision capability will hook to the ConversableAgent's `process_last_received_message`. - - Some technical details: - When the agent (who has the vision capability) received an message, it will: - 1. _process_received_message: - a. _append_oai_message - 2. generate_reply: if the agent is a MultimodalAgent, it will also use the image tag. - a. hook process_last_received_message (NOTE: this is where the vision capability will be hooked to.) - b. hook process_all_messages_before_reply - 3. send: - a. hook process_message_before_send - b. _append_oai_message - """ - - def __init__( - self, - lmm_config: Dict, - description_prompt: Optional[str] = DEFAULT_DESCRIPTION_PROMPT, - custom_caption_func: Callable = None, - ) -> None: - """ - Initializes a new instance, setting up the configuration for interacting with - a Language Multimodal (LMM) client and specifying optional parameters for image - description and captioning. - - Args: - lmm_config (Dict): Configuration for the LMM client, which is used to call - the LMM service for describing the image. This must be a dictionary containing - the necessary configuration parameters. If `lmm_config` is False or an empty dictionary, - it is considered invalid, and initialization will assert. - description_prompt (Optional[str], optional): The prompt to use for generating - descriptions of the image. This parameter allows customization of the - prompt passed to the LMM service. Defaults to `DEFAULT_DESCRIPTION_PROMPT` if not provided. - custom_caption_func (Callable, optional): A callable that, if provided, will be used - to generate captions for images. This allows for custom captioning logic outside - of the standard LMM service interaction. - The callable should take three parameters as input: - 1. an image URL (or local location) - 2. image_data (a PIL image) - 3. lmm_client (to call remote LMM) - and then return a description (as string). - If not provided, captioning will rely on the LMM client configured via `lmm_config`. - If provided, we will not run the default self._get_image_caption method. - - Raises: - AssertionError: If neither a valid `lmm_config` nor a `custom_caption_func` is provided, - an AssertionError is raised to indicate that the Vision Capability requires - one of these to be valid for operation. - """ - self._lmm_config = lmm_config - self._description_prompt = description_prompt - self._parent_agent = None - - if lmm_config: - self._lmm_client = OpenAIWrapper(**lmm_config) - else: - self._lmm_client = None - - self._custom_caption_func = custom_caption_func - assert ( - self._lmm_config or custom_caption_func - ), "Vision Capability requires a valid lmm_config or custom_caption_func." - - def add_to_agent(self, agent: ConversableAgent) -> None: - self._parent_agent = agent - - # Append extra info to the system message. - agent.update_system_message(agent.system_message + "\nYou've been given the ability to interpret images.") - - # Register a hook for processing the last message. - agent.register_hook(hookable_method="process_last_received_message", hook=self.process_last_received_message) - - def process_last_received_message(self, content: Union[str, List[dict]]) -> str: - """ - Processes the last received message content by normalizing and augmenting it - with descriptions of any included images. The function supports input content - as either a string or a list of dictionaries, where each dictionary represents - a content item (e.g., text, image). If the content contains image URLs, it - fetches the image data, generates a caption for each image, and inserts the - caption into the augmented content. - - The function aims to transform the content into a format compatible with GPT-4V - multimodal inputs, specifically by formatting strings into PIL-compatible - images if needed and appending text descriptions for images. This allows for - a more accessible presentation of the content, especially in contexts where - images cannot be displayed directly. - - Args: - content (Union[str, List[dict]]): The last received message content, which - can be a plain text string or a list of dictionaries representing - different types of content items (e.g., text, image_url). - - Returns: - str: The augmented message content - - Raises: - AssertionError: If an item in the content list is not a dictionary. - - Examples: - Assuming `self._get_image_caption(img_data)` returns - "A beautiful sunset over the mountains" for the image. - - - Input as String: - content = "Check out this cool photo!" - Output: "Check out this cool photo!" - (Content is a string without an image, remains unchanged.) - - - Input as String, with image location: - content = "What's weather in this cool photo: " - Output: "What's weather in this cool photo: in case you can not see, the caption of this image is: - A beautiful sunset over the mountains\n" - (Caption added after the image) - - - Input as List with Text Only: - content = [{"type": "text", "text": "Here's an interesting fact."}] - Output: "Here's an interesting fact." - (No images in the content, it remains unchanged.) - - - Input as List with Image URL: - content = [ - {"type": "text", "text": "What's weather in this cool photo:"}, - {"type": "image_url", "image_url": {"url": "http://example.com/photo.jpg"}} - ] - Output: "What's weather in this cool photo: in case you can not see, the caption of this image is: - A beautiful sunset over the mountains\n" - (Caption added after the image) - """ - copy.deepcopy(content) - # normalize the content into the gpt-4v format for multimodal - # we want to keep the URL format to keep it concise. - if isinstance(content, str): - content = gpt4v_formatter(content, img_format="url") - - aug_content: str = "" - for item in content: - assert isinstance(item, dict) - if item["type"] == "text": - aug_content += item["text"] - elif item["type"] == "image_url": - img_url = item["image_url"]["url"] - img_caption = "" - - if self._custom_caption_func: - img_caption = self._custom_caption_func(img_url, get_pil_image(img_url), self._lmm_client) - elif self._lmm_client: - img_data = get_image_data(img_url) - img_caption = self._get_image_caption(img_data) - else: - img_caption = "" - - aug_content += f" in case you can not see, the caption of this image is: {img_caption}\n" - else: - print(f"Warning: the input type should either be `test` or `image_url`. Skip {item['type']} here.") - - return aug_content - - def _get_image_caption(self, img_data: str) -> str: - """ - Args: - img_data (str): base64 encoded image data. - Returns: - str: caption for the given image. - """ - response = self._lmm_client.create( - context=None, - messages=[ - { - "role": "user", - "content": [ - {"type": "text", "text": self._description_prompt}, - { - "type": "image_url", - "image_url": { - "url": convert_base64_to_data_uri(img_data), - }, - }, - ], - } - ], - ) - description = response.choices[0].message.content - return content_str(description) diff --git a/autogen/agentchat/contrib/gpt_assistant_agent.py b/autogen/agentchat/contrib/gpt_assistant_agent.py deleted file mode 100644 index 244f5ed81894..000000000000 --- a/autogen/agentchat/contrib/gpt_assistant_agent.py +++ /dev/null @@ -1,539 +0,0 @@ -import copy -import json -import logging -import time -from collections import defaultdict -from typing import Any, Dict, List, Optional, Tuple, Union - -from autogen import OpenAIWrapper -from autogen.agentchat.agent import Agent -from autogen.agentchat.assistant_agent import AssistantAgent, ConversableAgent -from autogen.oai.openai_utils import create_gpt_assistant, retrieve_assistants_by_name, update_gpt_assistant -from autogen.runtime_logging import log_new_agent, logging_enabled - -logger = logging.getLogger(__name__) - - -class GPTAssistantAgent(ConversableAgent): - """ - An experimental AutoGen agent class that leverages the OpenAI Assistant API for conversational capabilities. - This agent is unique in its reliance on the OpenAI Assistant for state management, differing from other agents like ConversableAgent. - """ - - DEFAULT_MODEL_NAME = "gpt-4-0125-preview" - - def __init__( - self, - name="GPT Assistant", - instructions: Optional[str] = None, - llm_config: Optional[Union[Dict, bool]] = None, - assistant_config: Optional[Dict] = None, - overwrite_instructions: bool = False, - overwrite_tools: bool = False, - **kwargs, - ): - """ - Args: - name (str): name of the agent. It will be used to find the existing assistant by name. Please remember to delete an old assistant with the same name if you intend to create a new assistant with the same name. - instructions (str): instructions for the OpenAI assistant configuration. - When instructions is not None, the system message of the agent will be - set to the provided instructions and used in the assistant run, irrespective - of the overwrite_instructions flag. But when instructions is None, - and the assistant does not exist, the system message will be set to - AssistantAgent.DEFAULT_SYSTEM_MESSAGE. If the assistant exists, the - system message will be set to the existing assistant instructions. - llm_config (dict or False): llm inference configuration. - - model: Model to use for the assistant (gpt-4-1106-preview, gpt-3.5-turbo-1106). - assistant_config - - assistant_id: ID of the assistant to use. If None, a new assistant will be created. - - check_every_ms: check thread run status interval - - tools: Give Assistants access to OpenAI-hosted tools like Code Interpreter and Knowledge Retrieval, - or build your own tools using Function calling. ref https://platform.openai.com/docs/assistants/tools - - file_ids: (Deprecated) files used by retrieval in run. It is Deprecated, use tool_resources instead. https://platform.openai.com/docs/assistants/migration/what-has-changed. - - tool_resources: A set of resources that are used by the assistant's tools. The resources are specific to the type of tool. - overwrite_instructions (bool): whether to overwrite the instructions of an existing assistant. This parameter is in effect only when assistant_id is specified in llm_config. - overwrite_tools (bool): whether to overwrite the tools of an existing assistant. This parameter is in effect only when assistant_id is specified in llm_config. - kwargs (dict): Additional configuration options for the agent. - - verbose (bool): If set to True, enables more detailed output from the assistant thread. - - Other kwargs: Except verbose, others are passed directly to ConversableAgent. - """ - - self._verbose = kwargs.pop("verbose", False) - openai_client_cfg, openai_assistant_cfg = self._process_assistant_config(llm_config, assistant_config) - - super().__init__( - name=name, system_message=instructions, human_input_mode="NEVER", llm_config=openai_client_cfg, **kwargs - ) - if logging_enabled(): - log_new_agent(self, locals()) - - # GPTAssistantAgent's azure_deployment param may cause NotFoundError (404) in client.beta.assistants.list() - # See: https://github.com/microsoft/autogen/pull/1721 - model_name = self.DEFAULT_MODEL_NAME - if openai_client_cfg.get("config_list") is not None and len(openai_client_cfg["config_list"]) > 0: - model_name = openai_client_cfg["config_list"][0].pop("model", self.DEFAULT_MODEL_NAME) - else: - model_name = openai_client_cfg.pop("model", self.DEFAULT_MODEL_NAME) - - logger.warning("OpenAI client config of GPTAssistantAgent(%s) - model: %s", name, model_name) - - oai_wrapper = OpenAIWrapper(**openai_client_cfg) - if len(oai_wrapper._clients) > 1: - logger.warning("GPT Assistant only supports one OpenAI client. Using the first client in the list.") - - self._openai_client = oai_wrapper._clients[0]._oai_client - openai_assistant_id = openai_assistant_cfg.get("assistant_id", None) - if openai_assistant_id is None: - # try to find assistant by name first - candidate_assistants = retrieve_assistants_by_name(self._openai_client, name) - if len(candidate_assistants) > 0: - # Filter out candidates with the same name but different instructions, file IDs, and function names. - candidate_assistants = self.find_matching_assistant( - candidate_assistants, - instructions, - openai_assistant_cfg.get("tools", []), - ) - - if len(candidate_assistants) == 0: - logger.warning("No matching assistant found, creating a new assistant") - # create a new assistant - if instructions is None: - logger.warning( - "No instructions were provided for new assistant. Using default instructions from AssistantAgent.DEFAULT_SYSTEM_MESSAGE." - ) - instructions = AssistantAgent.DEFAULT_SYSTEM_MESSAGE - self._openai_assistant = create_gpt_assistant( - self._openai_client, - name=name, - instructions=instructions, - model=model_name, - assistant_config=openai_assistant_cfg, - ) - else: - logger.warning( - "Matching assistant found, using the first matching assistant: %s", - candidate_assistants[0].__dict__, - ) - self._openai_assistant = candidate_assistants[0] - else: - # retrieve an existing assistant - self._openai_assistant = self._openai_client.beta.assistants.retrieve(openai_assistant_id) - # if no instructions are provided, set the instructions to the existing instructions - if instructions is None: - logger.warning( - "No instructions were provided for given assistant. Using existing instructions from assistant API." - ) - instructions = self.get_assistant_instructions() - elif overwrite_instructions is True: - logger.warning( - "overwrite_instructions is True. Provided instructions will be used and will modify the assistant in the API" - ) - self._openai_assistant = update_gpt_assistant( - self._openai_client, - assistant_id=openai_assistant_id, - assistant_config={ - "instructions": instructions, - }, - ) - else: - logger.warning( - "overwrite_instructions is False. Provided instructions will be used without permanently modifying the assistant in the API." - ) - - # Check if tools are specified in assistant_config - specified_tools = openai_assistant_cfg.get("tools", None) - - if specified_tools is None: - # Check if the current assistant has tools defined - if self._openai_assistant.tools: - logger.warning( - "No tools were provided for given assistant. Using existing tools from assistant API." - ) - else: - logger.info( - "No tools were provided for the assistant, and the assistant currently has no tools set." - ) - elif overwrite_tools is True: - # Tools are specified and overwrite_tools is True; update the assistant's tools - logger.warning( - "overwrite_tools is True. Provided tools will be used and will modify the assistant in the API" - ) - self._openai_assistant = update_gpt_assistant( - self._openai_client, - assistant_id=openai_assistant_id, - assistant_config={ - "tools": specified_tools, - "tool_resources": openai_assistant_cfg.get("tool_resources", None), - }, - ) - else: - # Tools are specified but overwrite_tools is False; do not update the assistant's tools - logger.warning("overwrite_tools is False. Using existing tools from assistant API.") - - self.update_system_message(self._openai_assistant.instructions) - # lazily create threads - self._openai_threads = {} - self._unread_index = defaultdict(int) - self.register_reply([Agent, None], GPTAssistantAgent._invoke_assistant, position=2) - - def _invoke_assistant( - self, - messages: Optional[List[Dict]] = None, - sender: Optional[Agent] = None, - config: Optional[Any] = None, - ) -> Tuple[bool, Union[str, Dict, None]]: - """ - Invokes the OpenAI assistant to generate a reply based on the given messages. - - Args: - messages: A list of messages in the conversation history with the sender. - sender: The agent instance that sent the message. - config: Optional configuration for message processing. - - Returns: - A tuple containing a boolean indicating success and the assistant's reply. - """ - - if messages is None: - messages = self._oai_messages[sender] - unread_index = self._unread_index[sender] or 0 - pending_messages = messages[unread_index:] - - # Check and initiate a new thread if necessary - if self._openai_threads.get(sender, None) is None: - self._openai_threads[sender] = self._openai_client.beta.threads.create( - messages=[], - ) - assistant_thread = self._openai_threads[sender] - # Process each unread message - for message in pending_messages: - if message["content"].strip() == "": - continue - # Convert message roles to 'user' or 'assistant', by calling _map_role_for_api, to comply with OpenAI API spec - api_role = self._map_role_for_api(message["role"]) - self._openai_client.beta.threads.messages.create( - thread_id=assistant_thread.id, - content=message["content"], - role=api_role, - ) - - # Create a new run to get responses from the assistant - run = self._openai_client.beta.threads.runs.create( - thread_id=assistant_thread.id, - assistant_id=self._openai_assistant.id, - # pass the latest system message as instructions - instructions=self.system_message, - ) - - run_response_messages = self._get_run_response(assistant_thread, run) - assert len(run_response_messages) > 0, "No response from the assistant." - - response = { - "role": run_response_messages[-1]["role"], - "content": "", - } - for message in run_response_messages: - # just logging or do something with the intermediate messages? - # if current response is not empty and there is more, append new lines - if len(response["content"]) > 0: - response["content"] += "\n\n" - response["content"] += message["content"] - - self._unread_index[sender] = len(self._oai_messages[sender]) + 1 - return True, response - - def _map_role_for_api(self, role: str) -> str: - """ - Maps internal message roles to the roles expected by the OpenAI Assistant API. - - Args: - role (str): The role from the internal message. - - Returns: - str: The mapped role suitable for the API. - """ - if role in ["function", "tool"]: - return "assistant" - elif role == "system": - return "system" - elif role == "user": - return "user" - elif role == "assistant": - return "assistant" - else: - # Default to 'assistant' for any other roles not recognized by the API - return "assistant" - - def _get_run_response(self, thread, run): - """ - Waits for and processes the response of a run from the OpenAI assistant. - - Args: - run: The run object initiated with the OpenAI assistant. - - Returns: - Updated run object, status of the run, and response messages. - """ - while True: - run = self._wait_for_run(run.id, thread.id) - if run.status == "completed": - response_messages = self._openai_client.beta.threads.messages.list(thread.id, order="asc") - - new_messages = [] - for msg in response_messages: - if msg.run_id == run.id: - for content in msg.content: - if content.type == "text": - new_messages.append( - {"role": msg.role, "content": self._format_assistant_message(content.text)} - ) - elif content.type == "image_file": - new_messages.append( - { - "role": msg.role, - "content": f"Received file id={content.image_file.file_id}", - } - ) - return new_messages - elif run.status == "requires_action": - actions = [] - for tool_call in run.required_action.submit_tool_outputs.tool_calls: - function = tool_call.function - is_exec_success, tool_response = self.execute_function(function.dict(), self._verbose) - tool_response["metadata"] = { - "tool_call_id": tool_call.id, - "run_id": run.id, - "thread_id": thread.id, - } - - logger.info( - "Intermediate executing(%s, Success: %s) : %s", - tool_response["name"], - is_exec_success, - tool_response["content"], - ) - actions.append(tool_response) - - submit_tool_outputs = { - "tool_outputs": [ - {"output": action["content"], "tool_call_id": action["metadata"]["tool_call_id"]} - for action in actions - ], - "run_id": run.id, - "thread_id": thread.id, - } - - run = self._openai_client.beta.threads.runs.submit_tool_outputs(**submit_tool_outputs) - else: - run_info = json.dumps(run.dict(), indent=2) - raise ValueError(f"Unexpected run status: {run.status}. Full run info:\n\n{run_info})") - - def _wait_for_run(self, run_id: str, thread_id: str) -> Any: - """ - Waits for a run to complete or reach a final state. - - Args: - run_id: The ID of the run. - thread_id: The ID of the thread associated with the run. - - Returns: - The updated run object after completion or reaching a final state. - """ - in_progress = True - while in_progress: - run = self._openai_client.beta.threads.runs.retrieve(run_id, thread_id=thread_id) - in_progress = run.status in ("in_progress", "queued") - if in_progress: - time.sleep(self.llm_config.get("check_every_ms", 1000) / 1000) - return run - - def _format_assistant_message(self, message_content): - """ - Formats the assistant's message to include annotations and citations. - """ - - annotations = message_content.annotations - citations = [] - - # Iterate over the annotations and add footnotes - for index, annotation in enumerate(annotations): - # Replace the text with a footnote - message_content.value = message_content.value.replace(annotation.text, f" [{index}]") - - # Gather citations based on annotation attributes - if file_citation := getattr(annotation, "file_citation", None): - try: - cited_file = self._openai_client.files.retrieve(file_citation.file_id) - citations.append(f"[{index}] {cited_file.filename}: {file_citation.quote}") - except Exception as e: - logger.error(f"Error retrieving file citation: {e}") - elif file_path := getattr(annotation, "file_path", None): - try: - cited_file = self._openai_client.files.retrieve(file_path.file_id) - citations.append(f"[{index}] Click to download {cited_file.filename}") - except Exception as e: - logger.error(f"Error retrieving file citation: {e}") - # Note: File download functionality not implemented above for brevity - - # Add footnotes to the end of the message before displaying to user - message_content.value += "\n" + "\n".join(citations) - return message_content.value - - def can_execute_function(self, name: str) -> bool: - """Whether the agent can execute the function.""" - return False - - def reset(self): - """ - Resets the agent, clearing any existing conversation thread and unread message indices. - """ - super().reset() - for thread in self._openai_threads.values(): - # Delete the existing thread to start fresh in the next conversation - self._openai_client.beta.threads.delete(thread.id) - self._openai_threads = {} - # Clear the record of unread messages - self._unread_index.clear() - - def clear_history(self, agent: Optional[Agent] = None): - """Clear the chat history of the agent. - - Args: - agent: the agent with whom the chat history to clear. If None, clear the chat history with all agents. - """ - super().clear_history(agent) - if self._openai_threads.get(agent, None) is not None: - # Delete the existing thread to start fresh in the next conversation - thread = self._openai_threads[agent] - logger.info("Clearing thread %s", thread.id) - self._openai_client.beta.threads.delete(thread.id) - self._openai_threads.pop(agent) - self._unread_index[agent] = 0 - - def pretty_print_thread(self, thread): - """Pretty print the thread.""" - if thread is None: - print("No thread to print") - return - # NOTE: that list may not be in order, sorting by created_at is important - messages = self._openai_client.beta.threads.messages.list( - thread_id=thread.id, - ) - messages = sorted(messages.data, key=lambda x: x.created_at) - print("~~~~~~~THREAD CONTENTS~~~~~~~") - for message in messages: - content_types = [content.type for content in message.content] - print(f"[{message.created_at}]", message.role, ": [", ", ".join(content_types), "]") - for content in message.content: - content_type = content.type - if content_type == "text": - print(content.type, ": ", content.text.value) - elif content_type == "image_file": - print(content.type, ": ", content.image_file.file_id) - else: - print(content.type, ": ", content) - print("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~") - - @property - def oai_threads(self) -> Dict[Agent, Any]: - """Return the threads of the agent.""" - return self._openai_threads - - @property - def assistant_id(self): - """Return the assistant id""" - return self._openai_assistant.id - - @property - def openai_client(self): - return self._openai_client - - @property - def openai_assistant(self): - return self._openai_assistant - - def get_assistant_instructions(self): - """Return the assistant instructions from OAI assistant API""" - return self._openai_assistant.instructions - - def delete_assistant(self): - """Delete the assistant from OAI assistant API""" - logger.warning("Permanently deleting assistant...") - self._openai_client.beta.assistants.delete(self.assistant_id) - - def find_matching_assistant(self, candidate_assistants, instructions, tools): - """ - Find the matching assistant from a list of candidate assistants. - Filter out candidates with the same name but different instructions, and function names. - """ - matching_assistants = [] - - # Preprocess the required tools for faster comparison - required_tool_types = set( - "file_search" if tool.get("type") in ["retrieval", "file_search"] else tool.get("type") for tool in tools - ) - - required_function_names = set( - tool.get("function", {}).get("name") - for tool in tools - if tool.get("type") not in ["code_interpreter", "retrieval", "file_search"] - ) - - for assistant in candidate_assistants: - # Check if instructions are similar - if instructions and instructions != getattr(assistant, "instructions", None): - logger.warning( - "instructions not match, skip assistant(%s): %s", - assistant.id, - getattr(assistant, "instructions", None), - ) - continue - - # Preprocess the assistant's tools - assistant_tool_types = set( - "file_search" if tool.type in ["retrieval", "file_search"] else tool.type for tool in assistant.tools - ) - assistant_function_names = set(tool.function.name for tool in assistant.tools if hasattr(tool, "function")) - - # Check if the tool types, function names match - if required_tool_types != assistant_tool_types or required_function_names != assistant_function_names: - logger.warning( - "tools not match, skip assistant(%s): tools %s, functions %s", - assistant.id, - assistant_tool_types, - assistant_function_names, - ) - continue - - # Append assistant to matching list if all conditions are met - matching_assistants.append(assistant) - - return matching_assistants - - def _process_assistant_config(self, llm_config, assistant_config): - """ - Process the llm_config and assistant_config to extract the model name and assistant related configurations. - """ - - if llm_config is False: - raise ValueError("llm_config=False is not supported for GPTAssistantAgent.") - - if llm_config is None: - openai_client_cfg = {} - else: - openai_client_cfg = copy.deepcopy(llm_config) - - if assistant_config is None: - openai_assistant_cfg = {} - else: - openai_assistant_cfg = copy.deepcopy(assistant_config) - - # Move the assistant related configurations to assistant_config - # It's important to keep forward compatibility - assistant_config_items = ["assistant_id", "tools", "file_ids", "tool_resources", "check_every_ms"] - for item in assistant_config_items: - if openai_client_cfg.get(item) is not None and openai_assistant_cfg.get(item) is None: - openai_assistant_cfg[item] = openai_client_cfg[item] - openai_client_cfg.pop(item, None) - - return openai_client_cfg, openai_assistant_cfg diff --git a/autogen/agentchat/contrib/graph_rag/document.py b/autogen/agentchat/contrib/graph_rag/document.py deleted file mode 100644 index 9730269c7ab6..000000000000 --- a/autogen/agentchat/contrib/graph_rag/document.py +++ /dev/null @@ -1,24 +0,0 @@ -from dataclasses import dataclass -from enum import Enum, auto -from typing import Optional - - -class DocumentType(Enum): - """ - Enum for supporting document type. - """ - - TEXT = auto() - HTML = auto() - PDF = auto() - - -@dataclass -class Document: - """ - A wrapper of graph store query results. - """ - - doctype: DocumentType - data: Optional[object] = None - path_or_url: Optional[str] = "" diff --git a/autogen/agentchat/contrib/graph_rag/graph_query_engine.py b/autogen/agentchat/contrib/graph_rag/graph_query_engine.py deleted file mode 100644 index 28ef6ede84a6..000000000000 --- a/autogen/agentchat/contrib/graph_rag/graph_query_engine.py +++ /dev/null @@ -1,51 +0,0 @@ -from dataclasses import dataclass, field -from typing import List, Optional, Protocol - -from .document import Document - - -@dataclass -class GraphStoreQueryResult: - """ - A wrapper of graph store query results. - - answer: human readable answer to question/query. - results: intermediate results to question/query, e.g. node entities. - """ - - answer: Optional[str] = None - results: list = field(default_factory=list) - - -class GraphQueryEngine(Protocol): - """An abstract base class that represents a graph query engine on top of a underlying graph database. - - This interface defines the basic methods for graph rag. - """ - - def init_db(self, input_doc: List[Document] | None = None): - """ - This method initializes graph database with the input documents or records. - Usually, it takes the following steps, - 1. connecting to a graph database. - 2. extract graph nodes, edges based on input data, graph schema and etc. - 3. build indexes etc. - - Args: - input_doc: a list of input documents that are used to build the graph in database. - - Returns: GraphStore - """ - pass - - def add_records(self, new_records: List) -> bool: - """ - Add new records to the underlying database and add to the graph if required. - """ - pass - - def query(self, question: str, n_results: int = 1, **kwargs) -> GraphStoreQueryResult: - """ - This method transform a string format question into database query and return the result. - """ - pass diff --git a/autogen/agentchat/contrib/graph_rag/graph_rag_capability.py b/autogen/agentchat/contrib/graph_rag/graph_rag_capability.py deleted file mode 100644 index b6412305e069..000000000000 --- a/autogen/agentchat/contrib/graph_rag/graph_rag_capability.py +++ /dev/null @@ -1,56 +0,0 @@ -from autogen.agentchat.contrib.capabilities.agent_capability import AgentCapability -from autogen.agentchat.conversable_agent import ConversableAgent - -from .graph_query_engine import GraphQueryEngine - - -class GraphRagCapability(AgentCapability): - """ - A graph rag capability uses a graph query engine to give a conversable agent the graph rag ability. - - An agent class with graph rag capability could - 1. create a graph in the underlying database with input documents. - 2. retrieved relevant information based on messages received by the agent. - 3. generate answers from retrieved information and send messages back. - - For example, - graph_query_engine = GraphQueryEngine(...) - graph_query_engine.init_db([Document(doc1), Document(doc2), ...]) - - graph_rag_agent = ConversableAgent( - name="graph_rag_agent", - max_consecutive_auto_reply=3, - ... - ) - graph_rag_capability = GraphRagCapbility(graph_query_engine) - graph_rag_capability.add_to_agent(graph_rag_agent) - - user_proxy = UserProxyAgent( - name="user_proxy", - code_execution_config=False, - is_termination_msg=lambda msg: "TERMINATE" in msg["content"], - human_input_mode="ALWAYS", - ) - user_proxy.initiate_chat(graph_rag_agent, message="Name a few actors who've played in 'The Matrix'") - - # ChatResult( - # chat_id=None, - # chat_history=[ - # {'content': 'Name a few actors who've played in \'The Matrix\'', 'role': 'graph_rag_agent'}, - # {'content': 'A few actors who have played in The Matrix are: - # - Keanu Reeves - # - Laurence Fishburne - # - Carrie-Anne Moss - # - Hugo Weaving', - # 'role': 'user_proxy'}, - # ...) - - """ - - def __init__(self, query_engine: GraphQueryEngine): - """ - initialize graph rag capability with a graph query engine - """ - ... - - def add_to_agent(self, agent: ConversableAgent): ... diff --git a/autogen/agentchat/contrib/img_utils.py b/autogen/agentchat/contrib/img_utils.py deleted file mode 100644 index a389c74b064d..000000000000 --- a/autogen/agentchat/contrib/img_utils.py +++ /dev/null @@ -1,300 +0,0 @@ -import base64 -import copy -import os -import re -from io import BytesIO -from typing import Dict, List, Tuple, Union - -import requests -from PIL import Image - -from autogen.agentchat import utils - - -def get_pil_image(image_file: Union[str, Image.Image]) -> Image.Image: - """ - Loads an image from a file and returns a PIL Image object. - - Parameters: - image_file (str, or Image): The filename, URL, URI, or base64 string of the image file. - - Returns: - Image.Image: The PIL Image object. - """ - if isinstance(image_file, Image.Image): - # Already a PIL Image object - return image_file - - # Remove quotes if existed - if image_file.startswith('"') and image_file.endswith('"'): - image_file = image_file[1:-1] - if image_file.startswith("'") and image_file.endswith("'"): - image_file = image_file[1:-1] - - if image_file.startswith("http://") or image_file.startswith("https://"): - # A URL file - response = requests.get(image_file) - content = BytesIO(response.content) - image = Image.open(content) - elif re.match(r"data:image/(?:png|jpeg);base64,", image_file): - # A URI. Remove the prefix and decode the base64 string. - base64_data = re.sub(r"data:image/(?:png|jpeg);base64,", "", image_file) - image = _to_pil(base64_data) - elif os.path.exists(image_file): - # A local file - image = Image.open(image_file) - else: - # base64 encoded string - image = _to_pil(image_file) - - return image.convert("RGB") - - -def get_image_data(image_file: Union[str, Image.Image], use_b64=True) -> bytes: - """ - Loads an image and returns its data either as raw bytes or in base64-encoded format. - - This function first loads an image from the specified file, URL, or base64 string using - the `get_pil_image` function. It then saves this image in memory in PNG format and - retrieves its binary content. Depending on the `use_b64` flag, this binary content is - either returned directly or as a base64-encoded string. - - Parameters: - image_file (str, or Image): The path to the image file, a URL to an image, or a base64-encoded - string of the image. - use_b64 (bool): If True, the function returns a base64-encoded string of the image data. - If False, it returns the raw byte data of the image. Defaults to True. - - Returns: - bytes: The image data in raw bytes if `use_b64` is False, or a base64-encoded string - if `use_b64` is True. - """ - image = get_pil_image(image_file) - - buffered = BytesIO() - image.save(buffered, format="PNG") - content = buffered.getvalue() - - if use_b64: - return base64.b64encode(content).decode("utf-8") - else: - return content - - -def llava_formatter(prompt: str, order_image_tokens: bool = False) -> Tuple[str, List[str]]: - """ - Formats the input prompt by replacing image tags and returns the new prompt along with image locations. - - Parameters: - - prompt (str): The input string that may contain image tags like . - - order_image_tokens (bool, optional): Whether to order the image tokens with numbers. - It will be useful for GPT-4V. Defaults to False. - - Returns: - - Tuple[str, List[str]]: A tuple containing the formatted string and a list of images (loaded in b64 format). - """ - - # Initialize variables - new_prompt = prompt - image_locations = [] - images = [] - image_count = 0 - - # Regular expression pattern for matching tags - img_tag_pattern = re.compile(r"]+)>") - - # Find all image tags - for match in img_tag_pattern.finditer(prompt): - image_location = match.group(1) - - try: - img_data = get_image_data(image_location) - except Exception as e: - # Remove the token - print(f"Warning! Unable to load image from {image_location}, because of {e}") - new_prompt = new_prompt.replace(match.group(0), "", 1) - continue - - image_locations.append(image_location) - images.append(img_data) - - # Increment the image count and replace the tag in the prompt - new_token = f"" if order_image_tokens else "" - - new_prompt = new_prompt.replace(match.group(0), new_token, 1) - image_count += 1 - - return new_prompt, images - - -def pil_to_data_uri(image: Image.Image) -> str: - """ - Converts a PIL Image object to a data URI. - - Parameters: - image (Image.Image): The PIL Image object. - - Returns: - str: The data URI string. - """ - buffered = BytesIO() - image.save(buffered, format="PNG") - content = buffered.getvalue() - return convert_base64_to_data_uri(base64.b64encode(content).decode("utf-8")) - - -def convert_base64_to_data_uri(base64_image): - def _get_mime_type_from_data_uri(base64_image): - # Decode the base64 string - image_data = base64.b64decode(base64_image) - # Check the first few bytes for known signatures - if image_data.startswith(b"\xff\xd8\xff"): - return "image/jpeg" - elif image_data.startswith(b"\x89PNG\r\n\x1a\n"): - return "image/png" - elif image_data.startswith(b"GIF87a") or image_data.startswith(b"GIF89a"): - return "image/gif" - elif image_data.startswith(b"RIFF") and image_data[8:12] == b"WEBP": - return "image/webp" - return "image/jpeg" # use jpeg for unknown formats, best guess. - - mime_type = _get_mime_type_from_data_uri(base64_image) - data_uri = f"data:{mime_type};base64,{base64_image}" - return data_uri - - -def gpt4v_formatter(prompt: str, img_format: str = "uri") -> List[Union[str, dict]]: - """ - Formats the input prompt by replacing image tags and returns a list of text and images. - - Args: - - prompt (str): The input string that may contain image tags like . - - img_format (str): what image format should be used. One of "uri", "url", "pil". - - Returns: - - List[Union[str, dict]]: A list of alternating text and image dictionary items. - """ - assert img_format in ["uri", "url", "pil"] - - output = [] - last_index = 0 - image_count = 0 - - # Find all image tags - for parsed_tag in utils.parse_tags_from_content("img", prompt): - image_location = parsed_tag["attr"]["src"] - try: - if img_format == "pil": - img_data = get_pil_image(image_location) - elif img_format == "uri": - img_data = get_image_data(image_location) - img_data = convert_base64_to_data_uri(img_data) - elif img_format == "url": - img_data = image_location - else: - raise ValueError(f"Unknown image format {img_format}") - except Exception as e: - # Warning and skip this token - print(f"Warning! Unable to load image from {image_location}, because {e}") - continue - - # Add text before this image tag to output list - output.append({"type": "text", "text": prompt[last_index : parsed_tag["match"].start()]}) - - # Add image data to output list - output.append({"type": "image_url", "image_url": {"url": img_data}}) - - last_index = parsed_tag["match"].end() - image_count += 1 - - # Add remaining text to output list - output.append({"type": "text", "text": prompt[last_index:]}) - return output - - -def extract_img_paths(paragraph: str) -> list: - """ - Extract image paths (URLs or local paths) from a text paragraph. - - Parameters: - paragraph (str): The input text paragraph. - - Returns: - list: A list of extracted image paths. - """ - # Regular expression to match image URLs and file paths - img_path_pattern = re.compile( - r"\b(?:http[s]?://\S+\.(?:jpg|jpeg|png|gif|bmp)|\S+\.(?:jpg|jpeg|png|gif|bmp))\b", re.IGNORECASE - ) - - # Find all matches in the paragraph - img_paths = re.findall(img_path_pattern, paragraph) - return img_paths - - -def _to_pil(data: str) -> Image.Image: - """ - Converts a base64 encoded image data string to a PIL Image object. - - This function first decodes the base64 encoded string to bytes, then creates a BytesIO object from the bytes, - and finally creates and returns a PIL Image object from the BytesIO object. - - Parameters: - data (str): The encoded image data string. - - Returns: - Image.Image: The PIL Image object created from the input data. - """ - return Image.open(BytesIO(base64.b64decode(data))) - - -def message_formatter_pil_to_b64(messages: List[Dict]) -> List[Dict]: - """ - Converts the PIL image URLs in the messages to base64 encoded data URIs. - - This function iterates over a list of message dictionaries. For each message, - if it contains a 'content' key with a list of items, it looks for items - with an 'image_url' key. The function then converts the PIL image URL - (pointed to by 'image_url') to a base64 encoded data URI. - - Parameters: - messages (List[Dict]): A list of message dictionaries. Each dictionary - may contain a 'content' key with a list of items, - some of which might be image URLs. - - Returns: - List[Dict]: A new list of message dictionaries with PIL image URLs in the - 'image_url' key converted to base64 encoded data URIs. - - Example Input: - [ - {'content': [{'type': 'text', 'text': 'You are a helpful AI assistant.'}], 'role': 'system'}, - {'content': [ - {'type': 'text', 'text': "What's the breed of this dog here? \n"}, - {'type': 'image_url', 'image_url': {'url': a PIL.Image.Image}}, - {'type': 'text', 'text': '.'}], - 'role': 'user'} - ] - - Example Output: - [ - {'content': [{'type': 'text', 'text': 'You are a helpful AI assistant.'}], 'role': 'system'}, - {'content': [ - {'type': 'text', 'text': "What's the breed of this dog here? \n"}, - {'type': 'image_url', 'image_url': {'url': a B64 Image}}, - {'type': 'text', 'text': '.'}], - 'role': 'user'} - ] - """ - new_messages = [] - for message in messages: - # Handle the new GPT messages format. - if isinstance(message, dict) and "content" in message and isinstance(message["content"], list): - message = copy.deepcopy(message) - for item in message["content"]: - if isinstance(item, dict) and "image_url" in item: - item["image_url"]["url"] = pil_to_data_uri(item["image_url"]["url"]) - - new_messages.append(message) - - return new_messages diff --git a/autogen/agentchat/contrib/llamaindex_conversable_agent.py b/autogen/agentchat/contrib/llamaindex_conversable_agent.py deleted file mode 100644 index dbf6f274ae87..000000000000 --- a/autogen/agentchat/contrib/llamaindex_conversable_agent.py +++ /dev/null @@ -1,108 +0,0 @@ -from typing import Dict, List, Optional, Tuple, Union - -from autogen import OpenAIWrapper -from autogen.agentchat import Agent, ConversableAgent -from autogen.agentchat.contrib.vectordb.utils import get_logger - -logger = get_logger(__name__) - -try: - from llama_index.core.agent.runner.base import AgentRunner - from llama_index.core.base.llms.types import ChatMessage - from llama_index.core.chat_engine.types import AgentChatResponse -except ImportError as e: - logger.fatal("Failed to import llama-index. Try running 'pip install llama-index'") - raise e - - -class LLamaIndexConversableAgent(ConversableAgent): - def __init__( - self, - name: str, - llama_index_agent: AgentRunner, - description: Optional[str] = None, - **kwargs, - ): - """ - Args: - name (str): agent name. - llama_index_agent (AgentRunner): llama index agent. - Please override this attribute if you want to reprogram the agent. - description (str): a short description of the agent. This description is used by other agents - (e.g. the GroupChatManager) to decide when to call upon this agent. - **kwargs (dict): Please refer to other kwargs in - [ConversableAgent](../conversable_agent#__init__). - """ - - if llama_index_agent is None: - raise ValueError("llama_index_agent must be provided") - - if description is None or description.isspace(): - raise ValueError("description must be provided") - - super().__init__( - name, - description=description, - **kwargs, - ) - - self._llama_index_agent = llama_index_agent - - # Override the `generate_oai_reply` - self.replace_reply_func(ConversableAgent.generate_oai_reply, LLamaIndexConversableAgent._generate_oai_reply) - - self.replace_reply_func(ConversableAgent.a_generate_oai_reply, LLamaIndexConversableAgent._a_generate_oai_reply) - - def _generate_oai_reply( - self, - messages: Optional[List[Dict]] = None, - sender: Optional[Agent] = None, - config: Optional[OpenAIWrapper] = None, - ) -> Tuple[bool, Union[str, Dict, None]]: - """Generate a reply using autogen.oai.""" - user_message, history = self._extract_message_and_history(messages=messages, sender=sender) - - chatResponse: AgentChatResponse = self._llama_index_agent.chat(message=user_message, chat_history=history) - - extracted_response = chatResponse.response - - return (True, extracted_response) - - async def _a_generate_oai_reply( - self, - messages: Optional[List[Dict]] = None, - sender: Optional[Agent] = None, - config: Optional[OpenAIWrapper] = None, - ) -> Tuple[bool, Union[str, Dict, None]]: - """Generate a reply using autogen.oai.""" - user_message, history = self._extract_message_and_history(messages=messages, sender=sender) - - chatResponse: AgentChatResponse = await self._llama_index_agent.achat( - message=user_message, chat_history=history - ) - - extracted_response = chatResponse.response - - return (True, extracted_response) - - def _extract_message_and_history( - self, messages: Optional[List[Dict]] = None, sender: Optional[Agent] = None - ) -> Tuple[str, List[ChatMessage]]: - """Extract the message and history from the messages.""" - if not messages: - messages = self._oai_messages[sender] - - if not messages: - return "", [] - - message = messages[-1].get("content", "") - - history = messages[:-1] - history_messages: List[ChatMessage] = [] - for history_message in history: - content = history_message.get("content", "") - role = history_message.get("role", "user") - if role: - if role == "user" or role == "assistant": - history_messages.append(ChatMessage(content=content, role=role, additional_kwargs={})) - return message, history_messages diff --git a/autogen/agentchat/contrib/llava_agent.py b/autogen/agentchat/contrib/llava_agent.py deleted file mode 100644 index 063b256d3cdf..000000000000 --- a/autogen/agentchat/contrib/llava_agent.py +++ /dev/null @@ -1,170 +0,0 @@ -import json -import logging -from typing import List, Optional, Tuple - -import replicate -import requests - -from autogen.agentchat.agent import Agent -from autogen.agentchat.contrib.img_utils import get_image_data, llava_formatter -from autogen.agentchat.contrib.multimodal_conversable_agent import MultimodalConversableAgent -from autogen.code_utils import content_str - -from ...formatting_utils import colored - -logger = logging.getLogger(__name__) - -# we will override the following variables later. -SEP = "###" - -DEFAULT_LLAVA_SYS_MSG = "You are an AI agent and you can view images." - - -class LLaVAAgent(MultimodalConversableAgent): - def __init__( - self, - name: str, - system_message: Optional[Tuple[str, List]] = DEFAULT_LLAVA_SYS_MSG, - *args, - **kwargs, - ): - """ - Args: - name (str): agent name. - system_message (str): system message for the ChatCompletion inference. - Please override this attribute if you want to reprogram the agent. - **kwargs (dict): Please refer to other kwargs in - [ConversableAgent](../conversable_agent#__init__). - """ - super().__init__( - name, - system_message=system_message, - *args, - **kwargs, - ) - - assert self.llm_config is not None, "llm_config must be provided." - self.register_reply([Agent, None], reply_func=LLaVAAgent._image_reply, position=2) - - def _image_reply(self, messages=None, sender=None, config=None): - # Note: we did not use "llm_config" yet. - - if all((messages is None, sender is None)): - error_msg = f"Either {messages=} or {sender=} must be provided." - logger.error(error_msg) - raise AssertionError(error_msg) - - if messages is None: - messages = self._oai_messages[sender] - - # The formats for LLaVA and GPT are different. So, we manually handle them here. - images = [] - prompt = content_str(self.system_message) + "\n" - for msg in messages: - role = "Human" if msg["role"] == "user" else "Assistant" - # pdb.set_trace() - images += [d["image_url"]["url"] for d in msg["content"] if d["type"] == "image_url"] - content_prompt = content_str(msg["content"]) - prompt += f"{SEP}{role}: {content_prompt}\n" - prompt += "\n" + SEP + "Assistant: " - - # TODO: PIL to base64 - images = [get_image_data(im) for im in images] - print(colored(prompt, "blue")) - - out = "" - retry = 10 - while len(out) == 0 and retry > 0: - # image names will be inferred automatically from llava_call - out = llava_call_binary( - prompt=prompt, - images=images, - config_list=self.llm_config["config_list"], - temperature=self.llm_config.get("temperature", 0.5), - max_new_tokens=self.llm_config.get("max_new_tokens", 2000), - ) - retry -= 1 - - assert out != "", "Empty response from LLaVA." - - return True, out - - -def _llava_call_binary_with_config( - prompt: str, images: list, config: dict, max_new_tokens: int = 1000, temperature: float = 0.5, seed: int = 1 -): - if config["base_url"].find("0.0.0.0") >= 0 or config["base_url"].find("localhost") >= 0: - llava_mode = "local" - else: - llava_mode = "remote" - - if llava_mode == "local": - headers = {"User-Agent": "LLaVA Client"} - pload = { - "model": config["model"], - "prompt": prompt, - "max_new_tokens": max_new_tokens, - "temperature": temperature, - "stop": SEP, - "images": images, - } - - response = requests.post( - config["base_url"].rstrip("/") + "/worker_generate_stream", headers=headers, json=pload, stream=False - ) - - for chunk in response.iter_lines(chunk_size=8192, decode_unicode=False, delimiter=b"\0"): - if chunk: - data = json.loads(chunk.decode("utf-8")) - output = data["text"].split(SEP)[-1] - elif llava_mode == "remote": - # The Replicate version of the model only support 1 image for now. - img = "data:image/jpeg;base64," + images[0] - response = replicate.run( - config["base_url"], input={"image": img, "prompt": prompt.replace("", " "), "seed": seed} - ) - # The yorickvp/llava-13b model can stream output as it's running. - # The predict method returns an iterator, and you can iterate over that output. - output = "" - for item in response: - # https://replicate.com/yorickvp/llava-13b/versions/2facb4a474a0462c15041b78b1ad70952ea46b5ec6ad29583c0b29dbd4249591/api#output-schema - output += item - - # Remove the prompt and the space. - output = output.replace(prompt, "").strip().rstrip() - return output - - -def llava_call_binary( - prompt: str, images: list, config_list: list, max_new_tokens: int = 1000, temperature: float = 0.5, seed: int = 1 -): - # TODO 1: add caching around the LLaVA call to save compute and cost - # TODO 2: add `seed` to ensure reproducibility. The seed is not working now. - - for config in config_list: - try: - return _llava_call_binary_with_config(prompt, images, config, max_new_tokens, temperature, seed) - except Exception as e: - print(f"Error: {e}") - continue - - -def llava_call(prompt: str, llm_config: dict) -> str: - """ - Makes a call to the LLaVA service to generate text based on a given prompt - """ - - prompt, images = llava_formatter(prompt, order_image_tokens=False) - - for im in images: - if len(im) == 0: - raise RuntimeError("An image is empty!") - - return llava_call_binary( - prompt, - images, - config_list=llm_config["config_list"], - max_new_tokens=llm_config.get("max_new_tokens", 2000), - temperature=llm_config.get("temperature", 0.5), - seed=llm_config.get("seed", None), - ) diff --git a/autogen/agentchat/contrib/math_user_proxy_agent.py b/autogen/agentchat/contrib/math_user_proxy_agent.py deleted file mode 100644 index 699caeb85b3b..000000000000 --- a/autogen/agentchat/contrib/math_user_proxy_agent.py +++ /dev/null @@ -1,465 +0,0 @@ -import os -import re -from time import sleep -from typing import Any, Callable, Dict, List, Literal, Optional, Tuple, Union - -from pydantic import BaseModel, Extra, root_validator - -from autogen._pydantic import PYDANTIC_V1 -from autogen.agentchat import Agent, UserProxyAgent -from autogen.code_utils import UNKNOWN, execute_code, extract_code, infer_lang -from autogen.math_utils import get_answer - -PROMPTS = { - # default - "default": """Let's use Python to solve a math problem. - -Query requirements: -You should always use the 'print' function for the output and use fractions/radical forms instead of decimals. -You can use packages like sympy to help you. -You must follow the formats below to write your code: -```python -# your code -``` - -First state the key idea to solve the problem. You may choose from three ways to solve the problem: -Case 1: If the problem can be solved with Python code directly, please write a program to solve it. You can enumerate all possible arrangements if needed. -Case 2: If the problem is mostly reasoning, you can solve it by yourself directly. -Case 3: If the problem cannot be handled in the above two ways, please follow this process: -1. Solve the problem step by step (do not over-divide the steps). -2. Take out any queries that can be asked through Python (for example, any calculations or equations that can be calculated). -3. Wait for me to give the results. -4. Continue if you think the result is correct. If the result is invalid or unexpected, please correct your query or reasoning. - -After all the queries are run and you get the answer, put the answer in \\boxed{}. - -Problem: -""", - # select python or wolfram - "two_tools": """Let's use two tools (Python and Wolfram alpha) to solve a math problem. - -Query requirements: -You must follow the formats below to write your query: -For Wolfram Alpha: -```wolfram -# one wolfram query -``` -For Python: -```python -# your code -``` -When using Python, you should always use the 'print' function for the output and use fractions/radical forms instead of decimals. You can use packages like sympy to help you. -When using wolfram, give one query in each code block. - -Please follow this process: -1. Solve the problem step by step (do not over-divide the steps). -2. Take out any queries that can be asked through Python or Wolfram Alpha, select the most suitable tool to be used (for example, any calculations or equations that can be calculated). -3. Wait for me to give the results. -4. Continue if you think the result is correct. If the result is invalid or unexpected, please correct your query or reasoning. - -After all the queries are run and you get the answer, put the final answer in \\boxed{}. - -Problem: """, - # use python step by step - "python": """Let's use Python to solve a math problem. - -Query requirements: -You should always use the 'print' function for the output and use fractions/radical forms instead of decimals. -You can use packages like sympy to help you. -You must follow the formats below to write your code: -```python -# your code -``` - -Please follow this process: -1. Solve the problem step by step (do not over-divide the steps). -2. Take out any queries that can be asked through Python (for example, any calculations or equations that can be calculated). -3. Wait for me to give the results. -4. Continue if you think the result is correct. If the result is invalid or unexpected, please correct your query or reasoning. - -After all the queries are run and you get the answer, put the answer in \\boxed{}. - -Problem: """, -} - - -def _is_termination_msg_mathchat(message): - """Check if a message is a termination message.""" - if isinstance(message, dict): - message = message.get("content") - if message is None: - return False - cb = extract_code(message) - contain_code = False - for c in cb: - if c[0] == "python" or c[0] == "wolfram": - contain_code = True - break - return not contain_code and get_answer(message) is not None and get_answer(message) != "" - - -def _add_print_to_last_line(code): - """Add print() to the last line of a string.""" - # 1. check if there is already a print statement - if "print(" in code: - return code - # 2. extract the last line, enclose it in print() and return the new string - lines = code.splitlines() - last_line = lines[-1] - if "\t" in last_line or "=" in last_line: - return code - if "=" in last_line: - last_line = "print(" + last_line.split(" = ")[0] + ")" - lines.append(last_line) - else: - lines[-1] = "print(" + last_line + ")" - # 3. join the lines back together - return "\n".join(lines) - - -def _remove_print(code): - """remove all print statements from a string.""" - lines = code.splitlines() - lines = [line for line in lines if not line.startswith("print(")] - return "\n".join(lines) - - -class MathUserProxyAgent(UserProxyAgent): - """(Experimental) A MathChat agent that can handle math problems.""" - - MAX_CONSECUTIVE_AUTO_REPLY = 15 # maximum number of consecutive auto replies (subject to future change) - DEFAULT_REPLY = "Continue. Please keep solving the problem until you need to query. (If you get to the answer, put it in \\boxed{}.)" - - def __init__( - self, - name: Optional[str] = "MathChatAgent", # default set to MathChatAgent - is_termination_msg: Optional[ - Callable[[Dict], bool] - ] = _is_termination_msg_mathchat, # terminate if \boxed{} in message - human_input_mode: Literal["ALWAYS", "NEVER", "TERMINATE"] = "NEVER", # Fully automated - default_auto_reply: Optional[Union[str, Dict, None]] = DEFAULT_REPLY, - max_invalid_q_per_step=3, # a parameter needed in MathChat - **kwargs, - ): - """ - Args: - name (str): name of the agent - is_termination_msg (function): a function that takes a message in the form of a dictionary and returns a boolean value indicating if this received message is a termination message. - The dict can contain the following keys: "content", "role", "name", "function_call". - human_input_mode (str): whether to ask for human inputs every time a message is received. - Possible values are "ALWAYS", "TERMINATE", "NEVER". - (1) When "ALWAYS", the agent prompts for human input every time a message is received. - Under this mode, the conversation stops when the human input is "exit", - or when is_termination_msg is True and there is no human input. - (2) When "TERMINATE", the agent only prompts for human input only when a termination message is received or - the number of auto reply reaches the max_consecutive_auto_reply. - (3) (Default) When "NEVER", the agent will never prompt for human input. Under this mode, the conversation stops - when the number of auto reply reaches the max_consecutive_auto_reply or when is_termination_msg is True. - default_auto_reply (str or dict or None): the default auto reply message when no code execution or llm based reply is generated. - max_invalid_q_per_step (int): (ADDED) the maximum number of invalid queries per step. - **kwargs (dict): other kwargs in [UserProxyAgent](../user_proxy_agent#__init__). - """ - super().__init__( - name=name, - is_termination_msg=is_termination_msg, - human_input_mode=human_input_mode, - default_auto_reply=default_auto_reply, - **kwargs, - ) - self.register_reply([Agent, None], MathUserProxyAgent._generate_math_reply, position=2) - # fixed var - self._max_invalid_q_per_step = max_invalid_q_per_step - - # mutable - self._valid_q_count = 0 - self._total_q_count = 0 - self._accum_invalid_q_per_step = 0 - self._previous_code = "" - self.last_reply = None - - @staticmethod - def message_generator(sender, recipient, context): - """Generate a prompt for the assistant agent with the given problem and prompt. - - Args: - sender (Agent): the sender of the message. - recipient (Agent): the recipient of the message. - context (dict): a dictionary with the following fields: - problem (str): the problem to be solved. - prompt_type (str, Optional): the type of the prompt. Possible values are "default", "python", "wolfram". - (1) "default": the prompt that allows the agent to choose between 3 ways to solve a problem: - 1. write a python program to solve it directly. - 2. solve it directly without python. - 3. solve it step by step with python. - (2) "python": - a simplified prompt from the third way of the "default" prompt, that asks the assistant - to solve the problem step by step with python. - (3) "two_tools": - a simplified prompt similar to the "python" prompt, but allows the model to choose between - Python and Wolfram Alpha to solve the problem. - customized_prompt (str, Optional): a customized prompt to be used. If it is not None, the prompt_type will be ignored. - - Returns: - str: the generated prompt ready to be sent to the assistant agent. - """ - sender._reset() - problem = context.get("problem") - prompt_type = context.get("prompt_type", "default") - customized_prompt = context.get("customized_prompt", None) - if customized_prompt is not None: - return customized_prompt + problem - return PROMPTS[prompt_type] + problem - - def _reset(self): - # super().reset() - self._valid_q_count = 0 - self._total_q_count = 0 - self._accum_invalid_q_per_step = 0 - self._previous_code = "" - self.last_reply = None - - def execute_one_python_code(self, pycode): - """Execute python code blocks. - - Previous python code will be saved and executed together with the new code. - the "print" function will also be added to the last line of the code if needed - """ - # Need to replace all "; " with "\n" to avoid syntax error when adding `print` to the last line - pycode = pycode.replace("; ", "\n").replace(";", "\n") - pycode = self._previous_code + _add_print_to_last_line(pycode) - - return_code, output, _ = execute_code(pycode, **self._code_execution_config, timeout=5) - is_success = return_code == 0 - - if not is_success: - # Remove the file information from the error string - pattern = r'File "/[^"]+\.py", line \d+, in .+\n' - if isinstance(output, str): - output = re.sub(pattern, "", output) - output = "Error: " + output - elif output == "": - # Check if there is any print statement - if "print" not in pycode: - output = "No output found. Make sure you print the results." - is_success = False - else: - output = "No output found." - is_success = True - - if len(output) > 2000: - output = "Your requested query response is too long. You might have made a mistake. Please revise your reasoning and query." - is_success = False - - if is_success: - # remove print and check if it still works - tmp = self._previous_code + "\n" + _remove_print(pycode) + "\n" - rcode, _, _ = execute_code(tmp, **self._code_execution_config) - else: - # only add imports and check if it works - tmp = self._previous_code + "\n" - for line in pycode.split("\n"): - if "import" in line: - tmp += line + "\n" - rcode, _, _ = execute_code(tmp, **self._code_execution_config) - - if rcode == 0: - self._previous_code = tmp - return output, is_success - - def execute_one_wolfram_query(self, query: str): - """Run one wolfram query and return the output. - - Args: - query: string of the query. - - Returns: - output: string with the output of the query. - is_success: boolean indicating whether the query was successful. - """ - # wolfram query handler - wolfram = WolframAlphaAPIWrapper() - output, is_success = wolfram.run(query) - if output == "": - output = "Error: The wolfram query is invalid." - is_success = False - return output, is_success - - def _generate_math_reply( - self, - messages: Optional[List[Dict]] = None, - sender: Optional[Agent] = None, - config: Optional[Any] = None, - ): - """Generate an auto reply.""" - if messages is None: - messages = self._oai_messages[sender] - message = messages[-1] - message = message.get("content", "") - code_blocks = extract_code(message) - - if len(code_blocks) == 1 and code_blocks[0][0] == UNKNOWN: - # no code block is found, lang should be `UNKNOWN`` - return True, self._default_auto_reply - is_success, all_success = True, True - reply = "" - for code_block in code_blocks: - lang, code = code_block - if not lang: - lang = infer_lang(code) - if lang == "python": - output, is_success = self.execute_one_python_code(code) - elif lang == "wolfram": - output, is_success = self.execute_one_wolfram_query(code) - else: - output = "Error: Unknown language." - is_success = False - - reply += output + "\n" - if not is_success: - all_success = False - self._valid_q_count -= 1 # count invalid queries - - reply = reply.strip() - - if self.last_reply == reply: - return True, reply + "\nYour query or result is same from the last, please try a new approach." - self.last_reply = reply - - if not all_success: - self._accum_invalid_q_per_step += 1 - if self._accum_invalid_q_per_step > self._max_invalid_q_per_step: - self._accum_invalid_q_per_step = 0 - reply = "Please revisit the problem statement and your reasoning. If you think this step is correct, solve it yourself and continue the next step. Otherwise, correct this step." - - return True, reply - - -# Modified based on langchain. Langchain is licensed under MIT License: -# The MIT License - -# Copyright (c) Harrison Chase - -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: - -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. - -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -# THE SOFTWARE. - - -def get_from_dict_or_env(data: Dict[str, Any], key: str, env_key: str, default: Optional[str] = None) -> str: - """Get a value from a dictionary or an environment variable.""" - if key in data and data[key]: - return data[key] - elif env_key in os.environ and os.environ[env_key]: - return os.environ[env_key] - elif default is not None: - return default - else: - raise ValueError( - f"Did not find {key}, please add an environment variable" - f" `{env_key}` which contains it, or pass" - f" `{key}` as a named parameter." - ) - - -class WolframAlphaAPIWrapper(BaseModel): - """Wrapper for Wolfram Alpha. - - Docs for using: - - 1. Go to wolfram alpha and sign up for a developer account - 2. Create an app and get your APP ID - 3. Save your APP ID into WOLFRAM_ALPHA_APPID env variable - 4. pip install wolframalpha - - """ - - wolfram_client: Any #: :meta private: - wolfram_alpha_appid: Optional[str] = None - - class Config: - """Configuration for this pydantic object.""" - - if PYDANTIC_V1: - extra = Extra.forbid - - @root_validator(skip_on_failure=True) - def validate_environment(cls, values: Dict) -> Dict: - """Validate that api key and python package exists in environment.""" - wolfram_alpha_appid = get_from_dict_or_env(values, "wolfram_alpha_appid", "WOLFRAM_ALPHA_APPID") - values["wolfram_alpha_appid"] = wolfram_alpha_appid - - try: - import wolframalpha - - except ImportError as e: - raise ImportError("wolframalpha is not installed. Please install it with `pip install wolframalpha`") from e - client = wolframalpha.Client(wolfram_alpha_appid) - values["wolfram_client"] = client - - return values - - def run(self, query: str) -> Tuple[str, bool]: - """Run query through WolframAlpha and parse result.""" - from urllib.error import HTTPError - - is_success = False # added - res = None - for _ in range(20): - try: - res = self.wolfram_client.query(query) - break - except HTTPError: - sleep(1) - except Exception: - return ( - "Wolfram Alpha wasn't able to answer it. Please try a new query for wolfram or use python.", - is_success, - ) - if res is None: - return ( - "Wolfram Alpha wasn't able to answer it (may due to web error), you can try again or use python.", - is_success, - ) - - try: - if not res["@success"]: - return ( - "Your Wolfram query is invalid. Please try a new query for wolfram or use python.", - is_success, - ) - assumption = next(res.pods).text - answer = "" - for result in res["pod"]: - if result["@title"] == "Solution": - answer = result["subpod"]["plaintext"] - if result["@title"] == "Results" or result["@title"] == "Solutions": - for i, sub in enumerate(result["subpod"]): - answer += f"ans {i}: " + sub["plaintext"] + "\n" - break - if answer == "": - answer = next(res.results).text - - except Exception: - return ( - "Wolfram Alpha wasn't able to answer it. Please try a new query for wolfram or use python.", - is_success, - ) - - if answer is None or answer == "": - # We don't want to return the assumption alone if answer is empty - return "No good Wolfram Alpha Result was found", is_success - is_success = True - return f"Assumption: {assumption} \nAnswer: {answer}", is_success diff --git a/autogen/agentchat/contrib/multimodal_conversable_agent.py b/autogen/agentchat/contrib/multimodal_conversable_agent.py deleted file mode 100644 index edeb88cd5317..000000000000 --- a/autogen/agentchat/contrib/multimodal_conversable_agent.py +++ /dev/null @@ -1,120 +0,0 @@ -import copy -from typing import Dict, List, Optional, Tuple, Union - -from autogen import OpenAIWrapper -from autogen.agentchat import Agent, ConversableAgent -from autogen.agentchat.contrib.img_utils import ( - gpt4v_formatter, - message_formatter_pil_to_b64, -) -from autogen.code_utils import content_str - -from ..._pydantic import model_dump - -DEFAULT_LMM_SYS_MSG = """You are a helpful AI assistant.""" -DEFAULT_MODEL = "gpt-4-vision-preview" - - -class MultimodalConversableAgent(ConversableAgent): - DEFAULT_CONFIG = { - "model": DEFAULT_MODEL, - } - - def __init__( - self, - name: str, - system_message: Optional[Union[str, List]] = DEFAULT_LMM_SYS_MSG, - is_termination_msg: str = None, - *args, - **kwargs, - ): - """ - Args: - name (str): agent name. - system_message (str): system message for the OpenAIWrapper inference. - Please override this attribute if you want to reprogram the agent. - **kwargs (dict): Please refer to other kwargs in - [ConversableAgent](../conversable_agent#__init__). - """ - super().__init__( - name, - system_message, - is_termination_msg=is_termination_msg, - *args, - **kwargs, - ) - # call the setter to handle special format. - self.update_system_message(system_message) - self._is_termination_msg = ( - is_termination_msg - if is_termination_msg is not None - else (lambda x: content_str(x.get("content")) == "TERMINATE") - ) - - # Override the `generate_oai_reply` - self.replace_reply_func(ConversableAgent.generate_oai_reply, MultimodalConversableAgent.generate_oai_reply) - self.replace_reply_func( - ConversableAgent.a_generate_oai_reply, - MultimodalConversableAgent.a_generate_oai_reply, - ) - - def update_system_message(self, system_message: Union[Dict, List, str]): - """Update the system message. - - Args: - system_message (str): system message for the OpenAIWrapper inference. - """ - self._oai_system_message[0]["content"] = self._message_to_dict(system_message)["content"] - self._oai_system_message[0]["role"] = "system" - - @staticmethod - def _message_to_dict(message: Union[Dict, List, str]) -> Dict: - """Convert a message to a dictionary. This implementation - handles the GPT-4V formatting for easier prompts. - - The message can be a string, a dictionary, or a list of dictionaries: - - If it's a string, it will be cast into a list and placed in the 'content' field. - - If it's a list, it will be directly placed in the 'content' field. - - If it's a dictionary, it is already in message dict format. The 'content' field of this dictionary - will be processed using the gpt4v_formatter. - """ - if isinstance(message, str): - return {"content": gpt4v_formatter(message, img_format="pil")} - if isinstance(message, list): - return {"content": message} - if isinstance(message, dict): - assert "content" in message, "The message dict must have a `content` field" - if isinstance(message["content"], str): - message = copy.deepcopy(message) - message["content"] = gpt4v_formatter(message["content"], img_format="pil") - try: - content_str(message["content"]) - except (TypeError, ValueError) as e: - print("The `content` field should be compatible with the content_str function!") - raise e - return message - raise ValueError(f"Unsupported message type: {type(message)}") - - def generate_oai_reply( - self, - messages: Optional[List[Dict]] = None, - sender: Optional[Agent] = None, - config: Optional[OpenAIWrapper] = None, - ) -> Tuple[bool, Union[str, Dict, None]]: - """Generate a reply using autogen.oai.""" - client = self.client if config is None else config - if client is None: - return False, None - if messages is None: - messages = self._oai_messages[sender] - - messages_with_b64_img = message_formatter_pil_to_b64(self._oai_system_message + messages) - - # TODO: #1143 handle token limit exceeded error - response = client.create(context=messages[-1].pop("context", None), messages=messages_with_b64_img) - - # TODO: line 301, line 271 is converting messages to dict. Can be removed after ChatCompletionMessage_to_dict is merged. - extracted_response = client.extract_text_or_completion_object(response)[0] - if not isinstance(extracted_response, str): - extracted_response = model_dump(extracted_response) - return True, extracted_response diff --git a/autogen/agentchat/contrib/qdrant_retrieve_user_proxy_agent.py b/autogen/agentchat/contrib/qdrant_retrieve_user_proxy_agent.py deleted file mode 100644 index f1cc6947d50e..000000000000 --- a/autogen/agentchat/contrib/qdrant_retrieve_user_proxy_agent.py +++ /dev/null @@ -1,319 +0,0 @@ -import warnings -from typing import Callable, Dict, List, Literal, Optional - -from autogen.agentchat.contrib.retrieve_user_proxy_agent import RetrieveUserProxyAgent -from autogen.agentchat.contrib.vectordb.utils import ( - chroma_results_to_query_results, - filter_results_by_distance, - get_logger, -) -from autogen.retrieve_utils import TEXT_FORMATS, get_files_from_dir, split_files_to_chunks - -logger = get_logger(__name__) - -try: - import fastembed - from qdrant_client import QdrantClient, models - from qdrant_client.fastembed_common import QueryResponse -except ImportError as e: - logger.fatal("Failed to import qdrant_client with fastembed. Try running 'pip install qdrant_client[fastembed]'") - raise e - - -class QdrantRetrieveUserProxyAgent(RetrieveUserProxyAgent): - def __init__( - self, - name="RetrieveChatAgent", # default set to RetrieveChatAgent - human_input_mode: Literal["ALWAYS", "NEVER", "TERMINATE"] = "ALWAYS", - is_termination_msg: Optional[Callable[[Dict], bool]] = None, - retrieve_config: Optional[Dict] = None, # config for the retrieve agent - **kwargs, - ): - """ - Args: - name (str): name of the agent. - human_input_mode (str): whether to ask for human inputs every time a message is received. - Possible values are "ALWAYS", "TERMINATE", "NEVER". - 1. When "ALWAYS", the agent prompts for human input every time a message is received. - Under this mode, the conversation stops when the human input is "exit", - or when is_termination_msg is True and there is no human input. - 2. When "TERMINATE", the agent only prompts for human input only when a termination message is received or - the number of auto reply reaches the max_consecutive_auto_reply. - 3. When "NEVER", the agent will never prompt for human input. Under this mode, the conversation stops - when the number of auto reply reaches the max_consecutive_auto_reply or when is_termination_msg is True. - is_termination_msg (function): a function that takes a message in the form of a dictionary - and returns a boolean value indicating if this received message is a termination message. - The dict can contain the following keys: "content", "role", "name", "function_call". - retrieve_config (dict or None): config for the retrieve agent. - To use default config, set to None. Otherwise, set to a dictionary with the following keys: - - task (Optional, str): the task of the retrieve chat. Possible values are "code", "qa" and "default". System - prompt will be different for different tasks. The default value is `default`, which supports both code and qa. - - client (Optional, qdrant_client.QdrantClient(":memory:")): A QdrantClient instance. If not provided, an in-memory instance will be assigned. Not recommended for production. - will be used. If you want to use other vector db, extend this class and override the `retrieve_docs` function. - - docs_path (Optional, Union[str, List[str]]): the path to the docs directory. It can also be the path to a single file, - the url to a single file or a list of directories, files and urls. Default is None, which works only if the collection is already created. - - extra_docs (Optional, bool): when true, allows adding documents with unique IDs without overwriting existing ones; when false, it replaces existing documents using default IDs, risking collection overwrite., - when set to true it enables the system to assign unique IDs starting from "length+i" for new document chunks, preventing the replacement of existing documents and facilitating the addition of more content to the collection.. - By default, "extra_docs" is set to false, starting document IDs from zero. This poses a risk as new documents might overwrite existing ones, potentially causing unintended loss or alteration of data in the collection. - - collection_name (Optional, str): the name of the collection. - If key not provided, a default name `autogen-docs` will be used. - - model (Optional, str): the model to use for the retrieve chat. - If key not provided, a default model `gpt-4` will be used. - - chunk_token_size (Optional, int): the chunk token size for the retrieve chat. - If key not provided, a default size `max_tokens * 0.4` will be used. - - context_max_tokens (Optional, int): the context max token size for the retrieve chat. - If key not provided, a default size `max_tokens * 0.8` will be used. - - chunk_mode (Optional, str): the chunk mode for the retrieve chat. Possible values are - "multi_lines" and "one_line". If key not provided, a default mode `multi_lines` will be used. - - must_break_at_empty_line (Optional, bool): chunk will only break at empty line if True. Default is True. - If chunk_mode is "one_line", this parameter will be ignored. - - embedding_model (Optional, str): the embedding model to use for the retrieve chat. - If key not provided, a default model `BAAI/bge-small-en-v1.5` will be used. All available models - can be found at `https://qdrant.github.io/fastembed/examples/Supported_Models/`. - - customized_prompt (Optional, str): the customized prompt for the retrieve chat. Default is None. - - customized_answer_prefix (Optional, str): the customized answer prefix for the retrieve chat. Default is "". - If not "" and the customized_answer_prefix is not in the answer, `Update Context` will be triggered. - - update_context (Optional, bool): if False, will not apply `Update Context` for interactive retrieval. Default is True. - - custom_token_count_function (Optional, Callable): a custom function to count the number of tokens in a string. - The function should take a string as input and return three integers (token_count, tokens_per_message, tokens_per_name). - Default is None, tiktoken will be used and may not be accurate for non-OpenAI models. - - custom_text_split_function (Optional, Callable): a custom function to split a string into a list of strings. - Default is None, will use the default function in `autogen.retrieve_utils.split_text_to_chunks`. - - custom_text_types (Optional, List[str]): a list of file types to be processed. Default is `autogen.retrieve_utils.TEXT_FORMATS`. - This only applies to files under the directories in `docs_path`. Explicitly included files and urls will be chunked regardless of their types. - - recursive (Optional, bool): whether to search documents recursively in the docs_path. Default is True. - - parallel (Optional, int): How many parallel workers to use for embedding. Defaults to the number of CPU cores. - - on_disk (Optional, bool): Whether to store the collection on disk. Default is False. - - quantization_config: Quantization configuration. If None, quantization will be disabled. - - hnsw_config: HNSW configuration. If None, default configuration will be used. - You can find more info about the hnsw configuration options at https://qdrant.tech/documentation/concepts/indexing/#vector-index. - API Reference: https://qdrant.github.io/qdrant/redoc/index.html#tag/collections/operation/create_collection - - payload_indexing: Whether to create a payload index for the document field. Default is False. - You can find more info about the payload indexing options at https://qdrant.tech/documentation/concepts/indexing/#payload-index - API Reference: https://qdrant.github.io/qdrant/redoc/index.html#tag/collections/operation/create_field_index - **kwargs (dict): other kwargs in [UserProxyAgent](../user_proxy_agent#__init__). - - """ - warnings.warn( - "The QdrantRetrieveUserProxyAgent is deprecated. Please use the RetrieveUserProxyAgent instead, set `vector_db` to `qdrant`.", - DeprecationWarning, - stacklevel=2, - ) - super().__init__(name, human_input_mode, is_termination_msg, retrieve_config, **kwargs) - self._client = self._retrieve_config.get("client", QdrantClient(":memory:")) - self._embedding_model = self._retrieve_config.get("embedding_model", "BAAI/bge-small-en-v1.5") - # Uses all available CPU cores to encode data when set to 0 - self._parallel = self._retrieve_config.get("parallel", 0) - self._on_disk = self._retrieve_config.get("on_disk", False) - self._quantization_config = self._retrieve_config.get("quantization_config", None) - self._hnsw_config = self._retrieve_config.get("hnsw_config", None) - self._payload_indexing = self._retrieve_config.get("payload_indexing", False) - - def retrieve_docs(self, problem: str, n_results: int = 20, search_string: str = ""): - """ - Args: - problem (str): the problem to be solved. - n_results (int): the number of results to be retrieved. Default is 20. - search_string (str): only docs that contain an exact match of this string will be retrieved. Default is "". - """ - if not self._collection: - print("Trying to create collection.") - create_qdrant_from_dir( - dir_path=self._docs_path, - max_tokens=self._chunk_token_size, - client=self._client, - collection_name=self._collection_name, - chunk_mode=self._chunk_mode, - must_break_at_empty_line=self._must_break_at_empty_line, - embedding_model=self._embedding_model, - custom_text_split_function=self.custom_text_split_function, - custom_text_types=self._custom_text_types, - recursive=self._recursive, - extra_docs=self._extra_docs, - parallel=self._parallel, - on_disk=self._on_disk, - quantization_config=self._quantization_config, - hnsw_config=self._hnsw_config, - payload_indexing=self._payload_indexing, - ) - self._collection = True - - results = query_qdrant( - query_texts=problem, - n_results=n_results, - search_string=search_string, - client=self._client, - collection_name=self._collection_name, - embedding_model=self._embedding_model, - ) - results["contents"] = results.pop("documents") - results = chroma_results_to_query_results(results, "distances") - results = filter_results_by_distance(results, self._distance_threshold) - - self._search_string = search_string - self._results = results - - -def create_qdrant_from_dir( - dir_path: str, - max_tokens: int = 4000, - client: QdrantClient = None, - collection_name: str = "all-my-documents", - chunk_mode: str = "multi_lines", - must_break_at_empty_line: bool = True, - embedding_model: str = "BAAI/bge-small-en-v1.5", - custom_text_split_function: Callable = None, - custom_text_types: List[str] = TEXT_FORMATS, - recursive: bool = True, - extra_docs: bool = False, - parallel: int = 0, - on_disk: bool = False, - quantization_config: Optional[models.QuantizationConfig] = None, - hnsw_config: Optional[models.HnswConfigDiff] = None, - payload_indexing: bool = False, - qdrant_client_options: Optional[Dict] = {}, -): - """Create a Qdrant collection from all the files in a given directory, the directory can also be a single file or a - url to a single file. - - Args: - dir_path (str): the path to the directory, file or url. - max_tokens (Optional, int): the maximum number of tokens per chunk. Default is 4000. - client (Optional, QdrantClient): the QdrantClient instance. Default is None. - collection_name (Optional, str): the name of the collection. Default is "all-my-documents". - chunk_mode (Optional, str): the chunk mode. Default is "multi_lines". - must_break_at_empty_line (Optional, bool): Whether to break at empty line. Default is True. - embedding_model (Optional, str): the embedding model to use. Default is "BAAI/bge-small-en-v1.5". - The list of all the available models can be at https://qdrant.github.io/fastembed/examples/Supported_Models/. - custom_text_split_function (Optional, Callable): a custom function to split a string into a list of strings. - Default is None, will use the default function in `autogen.retrieve_utils.split_text_to_chunks`. - custom_text_types (Optional, List[str]): a list of file types to be processed. Default is TEXT_FORMATS. - recursive (Optional, bool): whether to search documents recursively in the dir_path. Default is True. - extra_docs (Optional, bool): whether to add more documents in the collection. Default is False - parallel (Optional, int): How many parallel workers to use for embedding. Defaults to the number of CPU cores - on_disk (Optional, bool): Whether to store the collection on disk. Default is False. - quantization_config: Quantization configuration. If None, quantization will be disabled. - Ref: https://qdrant.github.io/qdrant/redoc/index.html#tag/collections/operation/create_collection - hnsw_config: HNSW configuration. If None, default configuration will be used. - Ref: https://qdrant.github.io/qdrant/redoc/index.html#tag/collections/operation/create_collection - payload_indexing: Whether to create a payload index for the document field. Default is False. - qdrant_client_options: (Optional, dict): the options for instantiating the qdrant client. - Ref: https://github.com/qdrant/qdrant-client/blob/master/qdrant_client/qdrant_client.py#L36-L58. - """ - if client is None: - client = QdrantClient(**qdrant_client_options) - client.set_model(embedding_model) - - if custom_text_split_function is not None: - chunks, sources = split_files_to_chunks( - get_files_from_dir(dir_path, custom_text_types, recursive), - custom_text_split_function=custom_text_split_function, - ) - else: - chunks, sources = split_files_to_chunks( - get_files_from_dir(dir_path, custom_text_types, recursive), max_tokens, chunk_mode, must_break_at_empty_line - ) - logger.info(f"Found {len(chunks)} chunks.") - - collection = None - # Check if collection by same name exists, if not, create it with custom options - try: - collection = client.get_collection(collection_name=collection_name) - except Exception: - client.create_collection( - collection_name=collection_name, - vectors_config=client.get_fastembed_vector_params( - on_disk=on_disk, quantization_config=quantization_config, hnsw_config=hnsw_config - ), - ) - collection = client.get_collection(collection_name=collection_name) - - length = 0 - if extra_docs: - length = len(collection.get()["ids"]) - - # Upsert in batch of 100 or less if the total number of chunks is less than 100 - for i in range(0, len(chunks), min(100, len(chunks))): - end_idx = i + min(100, len(chunks) - i) - client.add( - collection_name, - documents=chunks[i:end_idx], - ids=[(j + length) for j in range(i, end_idx)], - parallel=parallel, - ) - - # Create a payload index for the document field - # Enables highly efficient payload filtering. Reference: https://qdrant.tech/documentation/concepts/indexing/#indexing - # Creating an index requires additional computational resources and memory. - # If filtering performance is critical, we can consider creating an index. - if payload_indexing: - client.create_payload_index( - collection_name=collection_name, - field_name="document", - field_schema=models.TextIndexParams( - type="text", - tokenizer=models.TokenizerType.WORD, - min_token_len=2, - max_token_len=15, - ), - ) - - -def query_qdrant( - query_texts: List[str], - n_results: int = 10, - client: QdrantClient = None, - collection_name: str = "all-my-documents", - search_string: str = "", - embedding_model: str = "BAAI/bge-small-en-v1.5", - qdrant_client_options: Optional[Dict] = {}, -) -> List[List[QueryResponse]]: - """Perform a similarity search with filters on a Qdrant collection - - Args: - query_texts (List[str]): the query texts. - n_results (Optional, int): the number of results to return. Default is 10. - client (Optional, API): the QdrantClient instance. A default in-memory client will be instantiated if None. - collection_name (Optional, str): the name of the collection. Default is "all-my-documents". - search_string (Optional, str): the search string. Default is "". - embedding_model (Optional, str): the embedding model to use. Default is "all-MiniLM-L6-v2". Will be ignored if embedding_function is not None. - qdrant_client_options: (Optional, dict): the options for instantiating the qdrant client. Reference: https://github.com/qdrant/qdrant-client/blob/master/qdrant_client/qdrant_client.py#L36-L58. - - Returns: - List[List[QueryResponse]]: the query result. The format is: - class QueryResponse(BaseModel, extra="forbid"): # type: ignore - id: Union[str, int] - embedding: Optional[List[float]] - metadata: Dict[str, Any] - document: str - score: float - """ - if client is None: - client = QdrantClient(**qdrant_client_options) - client.set_model(embedding_model) - - results = client.query_batch( - collection_name, - query_texts, - limit=n_results, - query_filter=( - models.Filter( - must=[ - models.FieldCondition( - key="document", - match=models.MatchText(text=search_string), - ) - ] - ) - if search_string - else None - ), - ) - - data = { - "ids": [[result.id for result in sublist] for sublist in results], - "documents": [[result.document for result in sublist] for sublist in results], - "distances": [[result.score for result in sublist] for sublist in results], - "metadatas": [[result.metadata for result in sublist] for sublist in results], - } - return data diff --git a/autogen/agentchat/contrib/retrieve_assistant_agent.py b/autogen/agentchat/contrib/retrieve_assistant_agent.py deleted file mode 100644 index 173bc4432e78..000000000000 --- a/autogen/agentchat/contrib/retrieve_assistant_agent.py +++ /dev/null @@ -1,50 +0,0 @@ -import warnings -from typing import Any, Dict, List, Optional, Tuple, Union - -from autogen.agentchat.agent import Agent -from autogen.agentchat.assistant_agent import AssistantAgent - - -class RetrieveAssistantAgent(AssistantAgent): - """(Experimental) Retrieve Assistant agent, designed to solve a task with LLM. - - RetrieveAssistantAgent is a subclass of AssistantAgent configured with a default system message. - The default system message is designed to solve a task with LLM, - including suggesting python code blocks and debugging. - `human_input_mode` is default to "NEVER" - and `code_execution_config` is default to False. - This agent doesn't execute code by default, and expects the user to execute the code. - """ - - def __init__(self, *args, **kwargs): - warnings.warn( - "The RetrieveAssistantAgent is deprecated. Please use the AssistantAgent instead.", - DeprecationWarning, - stacklevel=2, - ) - super().__init__(*args, **kwargs) - self.register_reply(Agent, RetrieveAssistantAgent._generate_retrieve_assistant_reply) - - def _generate_retrieve_assistant_reply( - self, - messages: Optional[List[Dict]] = None, - sender: Optional[Agent] = None, - config: Optional[Any] = None, - ) -> Tuple[bool, Union[str, Dict, None]]: - if config is None: - config = self - if messages is None: - messages = self._oai_messages[sender] - message = messages[-1] - if "exitcode: 0 (execution succeeded)" in message.get("content", ""): - # Terminate the conversation when the code execution succeeds. Although sometimes even when the - # code execution succeeds, the task is not solved, but it's hard to tell. If the human_input_mode - # of RetrieveUserProxyAgent is "TERMINATE" or "ALWAYS", user can still continue the conversation. - return True, "TERMINATE" - elif ( - "UPDATE CONTEXT" in message.get("content", "")[-20:].upper() - or "UPDATE CONTEXT" in message.get("content", "")[:20].upper() - ): - return True, "UPDATE CONTEXT" - else: - return False, None diff --git a/autogen/agentchat/contrib/retrieve_user_proxy_agent.py b/autogen/agentchat/contrib/retrieve_user_proxy_agent.py deleted file mode 100644 index b247d7a158f6..000000000000 --- a/autogen/agentchat/contrib/retrieve_user_proxy_agent.py +++ /dev/null @@ -1,695 +0,0 @@ -import hashlib -import os -import re -import uuid -from typing import Any, Callable, Dict, List, Literal, Optional, Tuple, Union - -from IPython import get_ipython - -try: - import chromadb -except ImportError as e: - raise ImportError(f"{e}. You can try `pip install pyautogen[retrievechat]`, or install `chromadb` manually.") -from autogen.agentchat import UserProxyAgent -from autogen.agentchat.agent import Agent -from autogen.agentchat.contrib.vectordb.base import Document, QueryResults, VectorDB, VectorDBFactory -from autogen.agentchat.contrib.vectordb.utils import ( - chroma_results_to_query_results, - filter_results_by_distance, - get_logger, -) -from autogen.code_utils import extract_code -from autogen.retrieve_utils import ( - TEXT_FORMATS, - create_vector_db_from_dir, - get_files_from_dir, - query_vector_db, - split_files_to_chunks, -) -from autogen.token_count_utils import count_token - -from ...formatting_utils import colored - -logger = get_logger(__name__) - -PROMPT_DEFAULT = """You're a retrieve augmented chatbot. You answer user's questions based on your own knowledge and the -context provided by the user. You should follow the following steps to answer a question: -Step 1, you estimate the user's intent based on the question and context. The intent can be a code generation task or -a question answering task. -Step 2, you reply based on the intent. -If you can't answer the question with or without the current context, you should reply exactly `UPDATE CONTEXT`. -If user's intent is code generation, you must obey the following rules: -Rule 1. You MUST NOT install any packages because all the packages needed are already installed. -Rule 2. You must follow the formats below to write your code: -```language -# your code -``` - -If user's intent is question answering, you must give as short an answer as possible. - -User's question is: {input_question} - -Context is: {input_context} - -The source of the context is: {input_sources} - -If you can answer the question, in the end of your answer, add the source of the context in the format of `Sources: source1, source2, ...`. -""" - -PROMPT_CODE = """You're a retrieve augmented coding assistant. You answer user's questions based on your own knowledge and the -context provided by the user. -If you can't answer the question with or without the current context, you should reply exactly `UPDATE CONTEXT`. -For code generation, you must obey the following rules: -Rule 1. You MUST NOT install any packages because all the packages needed are already installed. -Rule 2. You must follow the formats below to write your code: -```language -# your code -``` - -User's question is: {input_question} - -Context is: {input_context} -""" - -PROMPT_QA = """You're a retrieve augmented chatbot. You answer user's questions based on your own knowledge and the -context provided by the user. -If you can't answer the question with or without the current context, you should reply exactly `UPDATE CONTEXT`. -You must give as short an answer as possible. - -User's question is: {input_question} - -Context is: {input_context} -""" - -HASH_LENGTH = int(os.environ.get("HASH_LENGTH", 8)) -UPDATE_CONTEXT_IN_PROMPT = "you should reply exactly `UPDATE CONTEXT`" - - -class RetrieveUserProxyAgent(UserProxyAgent): - """(In preview) The Retrieval-Augmented User Proxy retrieves document chunks based on the embedding - similarity, and sends them along with the question to the Retrieval-Augmented Assistant - """ - - def __init__( - self, - name="RetrieveChatAgent", # default set to RetrieveChatAgent - human_input_mode: Literal["ALWAYS", "NEVER", "TERMINATE"] = "ALWAYS", - is_termination_msg: Optional[Callable[[Dict], bool]] = None, - retrieve_config: Optional[Dict] = None, # config for the retrieve agent - **kwargs, - ): - r""" - Args: - name (str): name of the agent. - - human_input_mode (str): whether to ask for human inputs every time a message is received. - Possible values are "ALWAYS", "TERMINATE", "NEVER". - 1. When "ALWAYS", the agent prompts for human input every time a message is received. - Under this mode, the conversation stops when the human input is "exit", - or when is_termination_msg is True and there is no human input. - 2. When "TERMINATE", the agent only prompts for human input only when a termination - message is received or the number of auto reply reaches - the max_consecutive_auto_reply. - 3. When "NEVER", the agent will never prompt for human input. Under this mode, the - conversation stops when the number of auto reply reaches the - max_consecutive_auto_reply or when is_termination_msg is True. - - is_termination_msg (function): a function that takes a message in the form of a dictionary - and returns a boolean value indicating if this received message is a termination message. - The dict can contain the following keys: "content", "role", "name", "function_call". - - retrieve_config (dict or None): config for the retrieve agent. - - To use default config, set to None. Otherwise, set to a dictionary with the - following keys: - - `task` (Optional, str) - the task of the retrieve chat. Possible values are - "code", "qa" and "default". System prompt will be different for different tasks. - The default value is `default`, which supports both code and qa, and provides - source information in the end of the response. - - `vector_db` (Optional, Union[str, VectorDB]) - the vector db for the retrieve chat. - If it's a string, it should be the type of the vector db, such as "chroma"; otherwise, - it should be an instance of the VectorDB protocol. Default is "chroma". - Set `None` to use the deprecated `client`. - - `db_config` (Optional, Dict) - the config for the vector db. Default is `{}`. Please make - sure you understand the config for the vector db you are using, otherwise, leave it as `{}`. - Only valid when `vector_db` is a string. - - `client` (Optional, chromadb.Client) - the chromadb client. If key not provided, a - default client `chromadb.Client()` will be used. If you want to use other - vector db, extend this class and override the `retrieve_docs` function. - *[Deprecated]* use `vector_db` instead. - - `docs_path` (Optional, Union[str, List[str]]) - the path to the docs directory. It - can also be the path to a single file, the url to a single file or a list - of directories, files and urls. Default is None, which works only if the - collection is already created. - - `extra_docs` (Optional, bool) - when true, allows adding documents with unique IDs - without overwriting existing ones; when false, it replaces existing documents - using default IDs, risking collection overwrite., when set to true it enables - the system to assign unique IDs starting from "length+i" for new document - chunks, preventing the replacement of existing documents and facilitating the - addition of more content to the collection.. - By default, "extra_docs" is set to false, starting document IDs from zero. - This poses a risk as new documents might overwrite existing ones, potentially - causing unintended loss or alteration of data in the collection. - *[Deprecated]* use `new_docs` when use `vector_db` instead of `client`. - - `new_docs` (Optional, bool) - when True, only adds new documents to the collection; - when False, updates existing documents and adds new ones. Default is True. - Document id is used to determine if a document is new or existing. By default, the - id is the hash value of the content. - - `model` (Optional, str) - the model to use for the retrieve chat. - If key not provided, a default model `gpt-4` will be used. - - `chunk_token_size` (Optional, int) - the chunk token size for the retrieve chat. - If key not provided, a default size `max_tokens * 0.4` will be used. - - `context_max_tokens` (Optional, int) - the context max token size for the - retrieve chat. - If key not provided, a default size `max_tokens * 0.8` will be used. - - `chunk_mode` (Optional, str) - the chunk mode for the retrieve chat. Possible values - are "multi_lines" and "one_line". If key not provided, a default mode - `multi_lines` will be used. - - `must_break_at_empty_line` (Optional, bool) - chunk will only break at empty line - if True. Default is True. - If chunk_mode is "one_line", this parameter will be ignored. - - `embedding_model` (Optional, str) - the embedding model to use for the retrieve chat. - If key not provided, a default model `all-MiniLM-L6-v2` will be used. All available - models can be found at `https://www.sbert.net/docs/pretrained_models.html`. - The default model is a fast model. If you want to use a high performance model, - `all-mpnet-base-v2` is recommended. - *[Deprecated]* no need when use `vector_db` instead of `client`. - - `embedding_function` (Optional, Callable) - the embedding function for creating the - vector db. Default is None, SentenceTransformer with the given `embedding_model` - will be used. If you want to use OpenAI, Cohere, HuggingFace or other embedding - functions, you can pass it here, - follow the examples in `https://docs.trychroma.com/guides/embeddings`. - - `customized_prompt` (Optional, str) - the customized prompt for the retrieve chat. - Default is None. - - `customized_answer_prefix` (Optional, str) - the customized answer prefix for the - retrieve chat. Default is "". - If not "" and the customized_answer_prefix is not in the answer, - `Update Context` will be triggered. - - `update_context` (Optional, bool) - if False, will not apply `Update Context` for - interactive retrieval. Default is True. - - `collection_name` (Optional, str) - the name of the collection. - If key not provided, a default name `autogen-docs` will be used. - - `get_or_create` (Optional, bool) - Whether to get the collection if it exists. Default is False. - - `overwrite` (Optional, bool) - Whether to overwrite the collection if it exists. Default is False. - Case 1. if the collection does not exist, create the collection. - Case 2. the collection exists, if overwrite is True, it will overwrite the collection. - Case 3. the collection exists and overwrite is False, if get_or_create is True, it will get the collection, - otherwise it raise a ValueError. - - `custom_token_count_function` (Optional, Callable) - a custom function to count the - number of tokens in a string. - The function should take (text:str, model:str) as input and return the - token_count(int). the retrieve_config["model"] will be passed in the function. - Default is autogen.token_count_utils.count_token that uses tiktoken, which may - not be accurate for non-OpenAI models. - - `custom_text_split_function` (Optional, Callable) - a custom function to split a - string into a list of strings. - Default is None, will use the default function in - `autogen.retrieve_utils.split_text_to_chunks`. - - `custom_text_types` (Optional, List[str]) - a list of file types to be processed. - Default is `autogen.retrieve_utils.TEXT_FORMATS`. - This only applies to files under the directories in `docs_path`. Explicitly - included files and urls will be chunked regardless of their types. - - `recursive` (Optional, bool) - whether to search documents recursively in the - docs_path. Default is True. - - `distance_threshold` (Optional, float) - the threshold for the distance score, only - distance smaller than it will be returned. Will be ignored if < 0. Default is -1. - - `**kwargs` (dict): other kwargs in [UserProxyAgent](../user_proxy_agent#__init__). - - Example: - - Example of overriding retrieve_docs - If you have set up a customized vector db, and it's - not compatible with chromadb, you can easily plug in it with below code. - *[Deprecated]* use `vector_db` instead. You can extend VectorDB and pass it to the agent. - ```python - class MyRetrieveUserProxyAgent(RetrieveUserProxyAgent): - def query_vector_db( - self, - query_texts: List[str], - n_results: int = 10, - search_string: str = "", - **kwargs, - ) -> Dict[str, Union[List[str], List[List[str]]]]: - # define your own query function here - pass - - def retrieve_docs(self, problem: str, n_results: int = 20, search_string: str = "", **kwargs): - results = self.query_vector_db( - query_texts=[problem], - n_results=n_results, - search_string=search_string, - **kwargs, - ) - - self._results = results - print("doc_ids: ", results["ids"]) - ``` - """ - super().__init__( - name=name, - human_input_mode=human_input_mode, - **kwargs, - ) - - self._retrieve_config = {} if retrieve_config is None else retrieve_config - self._task = self._retrieve_config.get("task", "default") - self._vector_db = self._retrieve_config.get("vector_db", "chroma") - self._db_config = self._retrieve_config.get("db_config", {}) - self._client = self._retrieve_config.get("client", None) - if self._client is None: - self._client = chromadb.Client() - self._docs_path = self._retrieve_config.get("docs_path", None) - self._extra_docs = self._retrieve_config.get("extra_docs", False) - self._new_docs = self._retrieve_config.get("new_docs", True) - self._collection_name = self._retrieve_config.get("collection_name", "autogen-docs") - if "docs_path" not in self._retrieve_config: - logger.warning( - "docs_path is not provided in retrieve_config. " - f"Will raise ValueError if the collection `{self._collection_name}` doesn't exist. " - "Set docs_path to None to suppress this warning." - ) - self._model = self._retrieve_config.get("model", "gpt-4") - self._max_tokens = self.get_max_tokens(self._model) - self._chunk_token_size = int(self._retrieve_config.get("chunk_token_size", self._max_tokens * 0.4)) - self._chunk_mode = self._retrieve_config.get("chunk_mode", "multi_lines") - self._must_break_at_empty_line = self._retrieve_config.get("must_break_at_empty_line", True) - self._embedding_model = self._retrieve_config.get("embedding_model", "all-MiniLM-L6-v2") - self._embedding_function = self._retrieve_config.get("embedding_function", None) - self.customized_prompt = self._retrieve_config.get("customized_prompt", None) - self.customized_answer_prefix = self._retrieve_config.get("customized_answer_prefix", "").upper() - self.update_context = self._retrieve_config.get("update_context", True) - self._get_or_create = self._retrieve_config.get("get_or_create", False) if self._docs_path is not None else True - self._overwrite = self._retrieve_config.get("overwrite", False) - self.custom_token_count_function = self._retrieve_config.get("custom_token_count_function", count_token) - self.custom_text_split_function = self._retrieve_config.get("custom_text_split_function", None) - self._custom_text_types = self._retrieve_config.get("custom_text_types", TEXT_FORMATS) - self._recursive = self._retrieve_config.get("recursive", True) - self._context_max_tokens = self._retrieve_config.get("context_max_tokens", self._max_tokens * 0.8) - self._collection = True if self._docs_path is None else False # whether the collection is created - self._ipython = get_ipython() - self._doc_idx = -1 # the index of the current used doc - self._results = [] # the results of the current query - self._intermediate_answers = set() # the intermediate answers - self._doc_contents = [] # the contents of the current used doc - self._doc_ids = [] # the ids of the current used doc - self._current_docs_in_context = [] # the ids of the current context sources - self._search_string = "" # the search string used in the current query - self._distance_threshold = self._retrieve_config.get("distance_threshold", -1) - # update the termination message function - self._is_termination_msg = ( - self._is_termination_msg_retrievechat if is_termination_msg is None else is_termination_msg - ) - if isinstance(self._vector_db, str): - if not isinstance(self._db_config, dict): - raise ValueError("`db_config` should be a dictionary.") - if "embedding_function" in self._retrieve_config: - self._db_config["embedding_function"] = self._embedding_function - self._vector_db = VectorDBFactory.create_vector_db(db_type=self._vector_db, **self._db_config) - self.register_reply(Agent, RetrieveUserProxyAgent._generate_retrieve_user_reply, position=2) - self.register_hook( - hookable_method="process_message_before_send", - hook=self._check_update_context_before_send, - ) - - def _init_db(self): - if not self._vector_db: - return - - IS_TO_CHUNK = False # whether to chunk the raw files - if self._new_docs: - IS_TO_CHUNK = True - if not self._docs_path: - try: - self._vector_db.get_collection(self._collection_name) - logger.warning(f"`docs_path` is not provided. Use the existing collection `{self._collection_name}`.") - self._overwrite = False - self._get_or_create = True - IS_TO_CHUNK = False - except ValueError: - raise ValueError( - "`docs_path` is not provided. " - f"The collection `{self._collection_name}` doesn't exist either. " - "Please provide `docs_path` or create the collection first." - ) - elif self._get_or_create and not self._overwrite: - try: - self._vector_db.get_collection(self._collection_name) - logger.info(f"Use the existing collection `{self._collection_name}`.", color="green") - except ValueError: - IS_TO_CHUNK = True - else: - IS_TO_CHUNK = True - - self._vector_db.active_collection = self._vector_db.create_collection( - self._collection_name, overwrite=self._overwrite, get_or_create=self._get_or_create - ) - - docs = None - if IS_TO_CHUNK: - if self.custom_text_split_function is not None: - chunks, sources = split_files_to_chunks( - get_files_from_dir(self._docs_path, self._custom_text_types, self._recursive), - custom_text_split_function=self.custom_text_split_function, - ) - else: - chunks, sources = split_files_to_chunks( - get_files_from_dir(self._docs_path, self._custom_text_types, self._recursive), - self._chunk_token_size, - self._chunk_mode, - self._must_break_at_empty_line, - ) - logger.info(f"Found {len(chunks)} chunks.") - - if self._new_docs: - all_docs_ids = set( - [ - doc["id"] - for doc in self._vector_db.get_docs_by_ids(ids=None, collection_name=self._collection_name) - ] - ) - else: - all_docs_ids = set() - - chunk_ids = ( - [hashlib.blake2b(chunk.encode("utf-8")).hexdigest()[:HASH_LENGTH] for chunk in chunks] - if not self._vector_db.type == "qdrant" - else [str(uuid.UUID(hex=hashlib.md5(chunk.encode("utf-8")).hexdigest())) for chunk in chunks] - ) - chunk_ids_set = set(chunk_ids) - chunk_ids_set_idx = [chunk_ids.index(hash_value) for hash_value in chunk_ids_set] - docs = [ - Document(id=chunk_ids[idx], content=chunks[idx], metadata=sources[idx]) - for idx in chunk_ids_set_idx - if chunk_ids[idx] not in all_docs_ids - ] - - self._vector_db.insert_docs(docs=docs, collection_name=self._collection_name, upsert=True) - - def _is_termination_msg_retrievechat(self, message): - """Check if a message is a termination message. - For code generation, terminate when no code block is detected. Currently only detect python code blocks. - For question answering, terminate when don't update context, i.e., answer is given. - """ - if isinstance(message, dict): - message = message.get("content") - if message is None: - return False - cb = extract_code(message) - contain_code = False - for c in cb: - # todo: support more languages - if c[0] == "python": - contain_code = True - break - update_context_case1, update_context_case2 = self._check_update_context(message) - return not (contain_code or update_context_case1 or update_context_case2) - - def _check_update_context_before_send(self, sender, message, recipient, silent): - if not isinstance(message, (str, dict)): - return message - elif isinstance(message, dict): - msg_text = message.get("content", message) - else: - msg_text = message - - if "UPDATE CONTEXT" == msg_text.strip().upper(): - doc_contents = self._get_context(self._results) - - # Always use self.problem as the query text to retrieve docs, but each time we replace the context with the - # next similar docs in the retrieved doc results. - if not doc_contents: - for _tmp_retrieve_count in range(1, 5): - self._reset(intermediate=True) - self.retrieve_docs( - self.problem, self.n_results * (2 * _tmp_retrieve_count + 1), self._search_string - ) - doc_contents = self._get_context(self._results) - if doc_contents or self.n_results * (2 * _tmp_retrieve_count + 1) >= len(self._results[0]): - break - msg_text = self._generate_message(doc_contents, task=self._task) - - if isinstance(message, dict): - message["content"] = msg_text - return message - - @staticmethod - def get_max_tokens(model="gpt-3.5-turbo"): - if "32k" in model: - return 32000 - elif "16k" in model: - return 16000 - elif "gpt-4" in model: - return 8000 - else: - return 4000 - - def _reset(self, intermediate=False): - self._doc_idx = -1 # the index of the current used doc - self._results = [] # the results of the current query - if not intermediate: - self._intermediate_answers = set() # the intermediate answers - self._doc_contents = [] # the contents of the current used doc - self._doc_ids = [] # the ids of the current used doc - - def _get_context(self, results: QueryResults): - doc_contents = "" - self._current_docs_in_context = [] - current_tokens = 0 - _doc_idx = self._doc_idx - _tmp_retrieve_count = 0 - for idx, doc in enumerate(results[0]): - doc = doc[0] - if idx <= _doc_idx: - continue - if doc["id"] in self._doc_ids: - continue - _doc_tokens = self.custom_token_count_function(doc["content"], self._model) - if _doc_tokens > self._context_max_tokens: - func_print = f"Skip doc_id {doc['id']} as it is too long to fit in the context." - print(colored(func_print, "green"), flush=True) - self._doc_idx = idx - continue - if current_tokens + _doc_tokens > self._context_max_tokens: - break - func_print = f"Adding content of doc {doc['id']} to context." - print(colored(func_print, "green"), flush=True) - current_tokens += _doc_tokens - doc_contents += doc["content"] + "\n" - _metadata = doc.get("metadata") - if isinstance(_metadata, dict): - self._current_docs_in_context.append(_metadata.get("source", "")) - self._doc_idx = idx - self._doc_ids.append(doc["id"]) - self._doc_contents.append(doc["content"]) - _tmp_retrieve_count += 1 - if _tmp_retrieve_count >= self.n_results: - break - return doc_contents - - def _generate_message(self, doc_contents, task="default"): - if not doc_contents: - print(colored("No more context, will terminate.", "green"), flush=True) - return "TERMINATE" - if self.customized_prompt: - message = self.customized_prompt.format(input_question=self.problem, input_context=doc_contents) - elif task.upper() == "CODE": - message = PROMPT_CODE.format(input_question=self.problem, input_context=doc_contents) - elif task.upper() == "QA": - message = PROMPT_QA.format(input_question=self.problem, input_context=doc_contents) - elif task.upper() == "DEFAULT": - message = PROMPT_DEFAULT.format( - input_question=self.problem, input_context=doc_contents, input_sources=self._current_docs_in_context - ) - else: - raise NotImplementedError(f"task {task} is not implemented.") - return message - - def _check_update_context(self, message): - if isinstance(message, dict): - message = message.get("content", "") - elif not isinstance(message, str): - message = "" - update_context_case1 = "UPDATE CONTEXT" in message.upper() and UPDATE_CONTEXT_IN_PROMPT not in message - update_context_case2 = self.customized_answer_prefix and self.customized_answer_prefix not in message.upper() - return update_context_case1, update_context_case2 - - def _generate_retrieve_user_reply( - self, - messages: Optional[List[Dict]] = None, - sender: Optional[Agent] = None, - config: Optional[Any] = None, - ) -> Tuple[bool, Union[str, Dict, None]]: - """In this function, we will update the context and reset the conversation based on different conditions. - We'll update the context and reset the conversation if update_context is True and either of the following: - (1) the last message contains "UPDATE CONTEXT", - (2) the last message doesn't contain "UPDATE CONTEXT" and the customized_answer_prefix is not in the message. - """ - if config is None: - config = self - if messages is None: - messages = self._oai_messages[sender] - message = messages[-1] - update_context_case1, update_context_case2 = self._check_update_context(message) - if (update_context_case1 or update_context_case2) and self.update_context: - print(colored("Updating context and resetting conversation.", "green"), flush=True) - # extract the first sentence in the response as the intermediate answer - _message = message.get("content", "").split("\n")[0].strip() - _intermediate_info = re.split(r"(?<=[.!?])\s+", _message) - self._intermediate_answers.add(_intermediate_info[0]) - - if update_context_case1: - # try to get more context from the current retrieved doc results because the results may be too long to fit - # in the LLM context. - doc_contents = self._get_context(self._results) - - # Always use self.problem as the query text to retrieve docs, but each time we replace the context with the - # next similar docs in the retrieved doc results. - if not doc_contents: - for _tmp_retrieve_count in range(1, 5): - self._reset(intermediate=True) - self.retrieve_docs( - self.problem, self.n_results * (2 * _tmp_retrieve_count + 1), self._search_string - ) - doc_contents = self._get_context(self._results) - if doc_contents or self.n_results * (2 * _tmp_retrieve_count + 1) >= len(self._results[0]): - break - elif update_context_case2: - # Use the current intermediate info as the query text to retrieve docs, and each time we append the top similar - # docs in the retrieved doc results to the context. - for _tmp_retrieve_count in range(5): - self._reset(intermediate=True) - self.retrieve_docs( - _intermediate_info[0], self.n_results * (2 * _tmp_retrieve_count + 1), self._search_string - ) - self._get_context(self._results) - doc_contents = "\n".join(self._doc_contents) # + "\n" + "\n".join(self._intermediate_answers) - if doc_contents or self.n_results * (2 * _tmp_retrieve_count + 1) >= len(self._results[0]): - break - - self.clear_history() - sender.clear_history() - return True, self._generate_message(doc_contents, task=self._task) - else: - return False, None - - def retrieve_docs(self, problem: str, n_results: int = 20, search_string: str = ""): - """Retrieve docs based on the given problem and assign the results to the class property `_results`. - The retrieved docs should be type of `QueryResults` which is a list of tuples containing the document and - the distance. - - Args: - problem (str): the problem to be solved. - n_results (int): the number of results to be retrieved. Default is 20. - search_string (str): only docs that contain an exact match of this string will be retrieved. Default is "". - Not used if the vector_db doesn't support it. - - Returns: - None. - """ - if isinstance(self._vector_db, VectorDB): - if not self._collection or not self._get_or_create: - print("Trying to create collection.") - self._init_db() - self._collection = True - self._get_or_create = True - - kwargs = {} - if hasattr(self._vector_db, "type") and self._vector_db.type == "chroma": - kwargs["where_document"] = {"$contains": search_string} if search_string else None - results = self._vector_db.retrieve_docs( - queries=[problem], - n_results=n_results, - collection_name=self._collection_name, - distance_threshold=self._distance_threshold, - **kwargs, - ) - self._search_string = search_string - self._results = results - print("VectorDB returns doc_ids: ", [[r[0]["id"] for r in rr] for rr in results]) - return - - if not self._collection or not self._get_or_create: - print("Trying to create collection.") - self._client = create_vector_db_from_dir( - dir_path=self._docs_path, - max_tokens=self._chunk_token_size, - client=self._client, - collection_name=self._collection_name, - chunk_mode=self._chunk_mode, - must_break_at_empty_line=self._must_break_at_empty_line, - embedding_model=self._embedding_model, - get_or_create=self._get_or_create, - embedding_function=self._embedding_function, - custom_text_split_function=self.custom_text_split_function, - custom_text_types=self._custom_text_types, - recursive=self._recursive, - extra_docs=self._extra_docs, - ) - self._collection = True - self._get_or_create = True - - results = query_vector_db( - query_texts=[problem], - n_results=n_results, - search_string=search_string, - client=self._client, - collection_name=self._collection_name, - embedding_model=self._embedding_model, - embedding_function=self._embedding_function, - ) - results["contents"] = results.pop("documents") - results = chroma_results_to_query_results(results, "distances") - results = filter_results_by_distance(results, self._distance_threshold) - - self._search_string = search_string - self._results = results - print("doc_ids: ", [[r[0]["id"] for r in rr] for rr in results]) - - @staticmethod - def message_generator(sender, recipient, context): - """ - Generate an initial message with the given context for the RetrieveUserProxyAgent. - Args: - sender (Agent): the sender agent. It should be the instance of RetrieveUserProxyAgent. - recipient (Agent): the recipient agent. Usually it's the assistant agent. - context (dict): the context for the message generation. It should contain the following keys: - - `problem` (str) - the problem to be solved. - - `n_results` (int) - the number of results to be retrieved. Default is 20. - - `search_string` (str) - only docs that contain an exact match of this string will be retrieved. Default is "". - Returns: - str: the generated message ready to be sent to the recipient agent. - """ - sender._reset() - - problem = context.get("problem", "") - n_results = context.get("n_results", 20) - search_string = context.get("search_string", "") - - sender.retrieve_docs(problem, n_results, search_string) - sender.problem = problem - sender.n_results = n_results - doc_contents = sender._get_context(sender._results) - message = sender._generate_message(doc_contents, sender._task) - return message - - def run_code(self, code, **kwargs): - lang = kwargs.get("lang", None) - if code.startswith("!") or code.startswith("pip") or lang in ["bash", "shell", "sh"]: - return ( - 0, - "You MUST NOT install any packages because all the packages needed are already installed.", - None, - ) - if self._ipython is None or lang != "python": - return super().run_code(code, **kwargs) - else: - result = self._ipython.run_cell(code) - log = str(result.result) - exitcode = 0 if result.success else 1 - if result.error_before_exec is not None: - log += f"\n{result.error_before_exec}" - exitcode = 1 - if result.error_in_exec is not None: - log += f"\n{result.error_in_exec}" - exitcode = 1 - return exitcode, log, None diff --git a/autogen/agentchat/contrib/society_of_mind_agent.py b/autogen/agentchat/contrib/society_of_mind_agent.py deleted file mode 100644 index e76768187c9f..000000000000 --- a/autogen/agentchat/contrib/society_of_mind_agent.py +++ /dev/null @@ -1,197 +0,0 @@ -# ruff: noqa: E722 -import copy -import traceback -from typing import Callable, Dict, List, Literal, Optional, Tuple, Union - -from autogen import Agent, ConversableAgent, GroupChat, GroupChatManager, OpenAIWrapper - - -class SocietyOfMindAgent(ConversableAgent): - """(In preview) A single agent that runs a Group Chat as an inner monologue. - At the end of the conversation (termination for any reason), the SocietyOfMindAgent - applies the response_preparer method on the entire inner monologue message history to - extract a final answer for the reply. - - Most arguments are inherited from ConversableAgent. New arguments are: - chat_manager (GroupChatManager): the group chat manager that will be running the inner monologue - response_preparer (Optional, Callable or String): If response_preparer is a callable function, then - it should have the signature: - f( self: SocietyOfMindAgent, messages: List[Dict]) - where `self` is this SocietyOfMindAgent, and `messages` is a list of inner-monologue messages. - The function should return a string representing the final response (extracted or prepared) - from that history. - If response_preparer is a string, then it should be the LLM prompt used to extract the final - message from the inner chat transcript. - The default response_preparer depends on if an llm_config is provided. If llm_config is False, - then the response_preparer deterministically returns the last message in the inner-monolgue. If - llm_config is set to anything else, then a default LLM prompt is used. - """ - - def __init__( - self, - name: str, - chat_manager: GroupChatManager, - response_preparer: Optional[Union[str, Callable]] = None, - is_termination_msg: Optional[Callable[[Dict], bool]] = None, - max_consecutive_auto_reply: Optional[int] = None, - human_input_mode: Literal["ALWAYS", "NEVER", "TERMINATE"] = "TERMINATE", - function_map: Optional[Dict[str, Callable]] = None, - code_execution_config: Union[Dict, Literal[False]] = False, - llm_config: Optional[Union[Dict, Literal[False]]] = False, - default_auto_reply: Optional[Union[str, Dict, None]] = "", - **kwargs, - ): - super().__init__( - name=name, - system_message="", - is_termination_msg=is_termination_msg, - max_consecutive_auto_reply=max_consecutive_auto_reply, - human_input_mode=human_input_mode, - function_map=function_map, - code_execution_config=code_execution_config, - llm_config=llm_config, - default_auto_reply=default_auto_reply, - **kwargs, - ) - - self.update_chat_manager(chat_manager) - - # response_preparer default depends on if the llm_config is set, and if a client was created - if response_preparer is None: - if self.client is not None: - response_preparer = "Output a standalone response to the original request, without mentioning any of the intermediate discussion." - else: - - def response_preparer(agent, messages): - return messages[-1]["content"].replace("TERMINATE", "").strip() - - # Create the response_preparer callable, if given only a prompt string - if isinstance(response_preparer, str): - self.response_preparer = lambda agent, messages: agent._llm_response_preparer(response_preparer, messages) - else: - self.response_preparer = response_preparer - - # NOTE: Async reply functions are not yet supported with this contrib agent - self._reply_func_list = [] - self.register_reply([Agent, None], SocietyOfMindAgent.generate_inner_monologue_reply) - self.register_reply([Agent, None], ConversableAgent.generate_code_execution_reply) - self.register_reply([Agent, None], ConversableAgent.generate_function_call_reply) - self.register_reply([Agent, None], ConversableAgent.check_termination_and_human_reply) - - def _llm_response_preparer(self, prompt, messages): - """Default response_preparer when provided with a string prompt, rather than a callable. - - Args: - prompt (str): The prompt used to extract the final response from the transcript. - messages (list): The messages generated as part of the inner monologue group chat. - """ - - _messages = [ - { - "role": "system", - "content": """Earlier you were asked to fulfill a request. You and your team worked diligently to address that request. Here is a transcript of that conversation:""", - } - ] - - for message in messages: - message = copy.deepcopy(message) - message["role"] = "user" - - # Convert tool and function calls to basic messages to avoid an error on the LLM call - if "content" not in message: - message["content"] = "" - - if "tool_calls" in message: - del message["tool_calls"] - if "tool_responses" in message: - del message["tool_responses"] - if "function_call" in message: - if message["content"] == "": - try: - message["content"] = ( - message["function_call"]["name"] + "(" + message["function_call"]["arguments"] + ")" - ) - except KeyError: - pass - del message["function_call"] - - # Add the modified message to the transcript - _messages.append(message) - - _messages.append( - { - "role": "system", - "content": prompt, - } - ) - - response = self.client.create(context=None, messages=_messages, cache=self.client_cache) - extracted_response = self.client.extract_text_or_completion_object(response)[0] - if not isinstance(extracted_response, str): - return str(extracted_response.model_dump(mode="dict")) - else: - return extracted_response - - @property - def chat_manager(self) -> Union[GroupChatManager, None]: - """Return the group chat manager.""" - return self._chat_manager - - def update_chat_manager(self, chat_manager: Union[GroupChatManager, None]): - """Update the chat manager. - - Args: - chat_manager (GroupChatManager): the group chat manager - """ - self._chat_manager = chat_manager - - # Awkward, but due to object cloning, there's no better way to do this - # Read the GroupChat object from the callback - self._group_chat = None - if self._chat_manager is not None: - for item in self._chat_manager._reply_func_list: - if isinstance(item["config"], GroupChat): - self._group_chat = item["config"] - break - - def generate_inner_monologue_reply( - self, - messages: Optional[List[Dict]] = None, - sender: Optional[Agent] = None, - config: Optional[OpenAIWrapper] = None, - ) -> Tuple[bool, Union[str, Dict, None]]: - """Generate a reply by running the group chat""" - if self.chat_manager is None: - return False, None - if messages is None: - messages = self._oai_messages[sender] - - # We want to clear the inner monolgue, keeping only the exteranl chat for context. - # Reset all the counters and histories, then populate agents with necessary context from the external chat - self.chat_manager.reset() - self.update_chat_manager(self.chat_manager) - - external_history = [] - if len(messages) > 1: - external_history = messages[0 : len(messages) - 1] # All but the current message - - for agent in self._group_chat.agents: - agent.reset() - for message in external_history: - # Assign each message a name - attributed_message = message.copy() - if "name" not in attributed_message: - if attributed_message["role"] == "assistant": - attributed_message["name"] = self.name - else: - attributed_message["name"] = sender.name - - self.chat_manager.send(attributed_message, agent, request_reply=False, silent=True) - - try: - self.initiate_chat(self.chat_manager, message=messages[-1], clear_history=False) - except: - traceback.print_exc() - - response_preparer = self.response_preparer - return True, response_preparer(self, self._group_chat.messages) diff --git a/autogen/agentchat/contrib/text_analyzer_agent.py b/autogen/agentchat/contrib/text_analyzer_agent.py deleted file mode 100644 index 62345156a53a..000000000000 --- a/autogen/agentchat/contrib/text_analyzer_agent.py +++ /dev/null @@ -1,70 +0,0 @@ -from typing import Any, Dict, List, Literal, Optional, Tuple, Union - -from autogen.agentchat.agent import Agent -from autogen.agentchat.assistant_agent import ConversableAgent - -system_message = """You are an expert in text analysis. -The user will give you TEXT to analyze. -The user will give you analysis INSTRUCTIONS copied twice, at both the beginning and the end. -You will follow these INSTRUCTIONS in analyzing the TEXT, then give the results of your expert analysis in the format requested.""" - - -class TextAnalyzerAgent(ConversableAgent): - """(Experimental) Text Analysis agent, a subclass of ConversableAgent designed to analyze text as instructed.""" - - def __init__( - self, - name="analyzer", - system_message: Optional[str] = system_message, - human_input_mode: Literal["ALWAYS", "NEVER", "TERMINATE"] = "NEVER", - llm_config: Optional[Union[Dict, bool]] = None, - **kwargs, - ): - """ - Args: - name (str): name of the agent. - system_message (str): system message for the ChatCompletion inference. - human_input_mode (str): This agent should NEVER prompt the human for input. - llm_config (dict or False): llm inference configuration. - Please refer to [OpenAIWrapper.create](/docs/reference/oai/client#create) - for available options. - To disable llm-based auto reply, set to False. - **kwargs (dict): other kwargs in [ConversableAgent](../conversable_agent#__init__). - """ - super().__init__( - name=name, - system_message=system_message, - human_input_mode=human_input_mode, - llm_config=llm_config, - **kwargs, - ) - self.register_reply(Agent, TextAnalyzerAgent._analyze_in_reply, position=2) - - def _analyze_in_reply( - self, - messages: Optional[List[Dict]] = None, - sender: Optional[Agent] = None, - config: Optional[Any] = None, - ) -> Tuple[bool, Union[str, Dict, None]]: - """Analyzes the given text as instructed, and returns the analysis as a message. - Assumes exactly two messages containing the text to analyze and the analysis instructions. - See Teachability.analyze for an example of how to use this method.""" - if self.llm_config is False: - raise ValueError("TextAnalyzerAgent requires self.llm_config to be set in its base class.") - if messages is None: - messages = self._oai_messages[sender] # In case of a direct call. - assert len(messages) == 2 - - # Delegate to the analysis method. - return True, self.analyze_text(messages[0]["content"], messages[1]["content"]) - - def analyze_text(self, text_to_analyze, analysis_instructions): - """Analyzes the given text as instructed, and returns the analysis.""" - # Assemble the message. - text_to_analyze = "# TEXT\n" + text_to_analyze + "\n" - analysis_instructions = "# INSTRUCTIONS\n" + analysis_instructions + "\n" - msg_text = "\n".join( - [analysis_instructions, text_to_analyze, analysis_instructions] - ) # Repeat the instructions. - # Generate and return the analysis string. - return self.generate_oai_reply([{"role": "user", "content": msg_text}], None, None)[1] diff --git a/autogen/agentchat/contrib/vectordb/base.py b/autogen/agentchat/contrib/vectordb/base.py deleted file mode 100644 index c0519b5717f1..000000000000 --- a/autogen/agentchat/contrib/vectordb/base.py +++ /dev/null @@ -1,241 +0,0 @@ -from typing import ( - Any, - Callable, - List, - Mapping, - Optional, - Protocol, - Sequence, - Tuple, - TypedDict, - Union, - runtime_checkable, -) - -Metadata = Union[Mapping[str, Any], None] -Vector = Union[Sequence[float], Sequence[int]] -ItemID = Union[str, int] # chromadb doesn't support int ids, VikingDB does - - -class Document(TypedDict): - """A Document is a record in the vector database. - - id: ItemID | the unique identifier of the document. - content: str | the text content of the chunk. - metadata: Metadata, Optional | contains additional information about the document such as source, date, etc. - embedding: Vector, Optional | the vector representation of the content. - """ - - id: ItemID - content: str - metadata: Optional[Metadata] - embedding: Optional[Vector] - - -"""QueryResults is the response from the vector database for a query/queries. -A query is a list containing one string while queries is a list containing multiple strings. -The response is a list of query results, each query result is a list of tuples containing the document and the distance. -""" -QueryResults = List[List[Tuple[Document, float]]] - - -@runtime_checkable -class VectorDB(Protocol): - """ - Abstract class for vector database. A vector database is responsible for storing and retrieving documents. - - Attributes: - active_collection: Any | The active collection in the vector database. Make get_collection faster. Default is None. - type: str | The type of the vector database, chroma, pgvector, etc. Default is "". - - Methods: - create_collection: Callable[[str, bool, bool], Any] | Create a collection in the vector database. - get_collection: Callable[[str], Any] | Get the collection from the vector database. - delete_collection: Callable[[str], Any] | Delete the collection from the vector database. - insert_docs: Callable[[List[Document], str, bool], None] | Insert documents into the collection of the vector database. - update_docs: Callable[[List[Document], str], None] | Update documents in the collection of the vector database. - delete_docs: Callable[[List[ItemID], str], None] | Delete documents from the collection of the vector database. - retrieve_docs: Callable[[List[str], str, int, float], QueryResults] | Retrieve documents from the collection of the vector database based on the queries. - get_docs_by_ids: Callable[[List[ItemID], str], List[Document]] | Retrieve documents from the collection of the vector database based on the ids. - """ - - active_collection: Any = None - type: str = "" - embedding_function: Optional[Callable[[List[str]], List[List[float]]]] = ( - None # embeddings = embedding_function(sentences) - ) - - def create_collection(self, collection_name: str, overwrite: bool = False, get_or_create: bool = True) -> Any: - """ - Create a collection in the vector database. - Case 1. if the collection does not exist, create the collection. - Case 2. the collection exists, if overwrite is True, it will overwrite the collection. - Case 3. the collection exists and overwrite is False, if get_or_create is True, it will get the collection, - otherwise it raise a ValueError. - - Args: - collection_name: str | The name of the collection. - overwrite: bool | Whether to overwrite the collection if it exists. Default is False. - get_or_create: bool | Whether to get the collection if it exists. Default is True. - - Returns: - Any | The collection object. - """ - ... - - def get_collection(self, collection_name: str = None) -> Any: - """ - Get the collection from the vector database. - - Args: - collection_name: str | The name of the collection. Default is None. If None, return the - current active collection. - - Returns: - Any | The collection object. - """ - ... - - def delete_collection(self, collection_name: str) -> Any: - """ - Delete the collection from the vector database. - - Args: - collection_name: str | The name of the collection. - - Returns: - Any - """ - ... - - def insert_docs(self, docs: List[Document], collection_name: str = None, upsert: bool = False, **kwargs) -> None: - """ - Insert documents into the collection of the vector database. - - Args: - docs: List[Document] | A list of documents. Each document is a TypedDict `Document`. - collection_name: str | The name of the collection. Default is None. - upsert: bool | Whether to update the document if it exists. Default is False. - kwargs: Dict | Additional keyword arguments. - - Returns: - None - """ - ... - - def update_docs(self, docs: List[Document], collection_name: str = None, **kwargs) -> None: - """ - Update documents in the collection of the vector database. - - Args: - docs: List[Document] | A list of documents. - collection_name: str | The name of the collection. Default is None. - kwargs: Dict | Additional keyword arguments. - - Returns: - None - """ - ... - - def delete_docs(self, ids: List[ItemID], collection_name: str = None, **kwargs) -> None: - """ - Delete documents from the collection of the vector database. - - Args: - ids: List[ItemID] | A list of document ids. Each id is a typed `ItemID`. - collection_name: str | The name of the collection. Default is None. - kwargs: Dict | Additional keyword arguments. - - Returns: - None - """ - ... - - def retrieve_docs( - self, - queries: List[str], - collection_name: str = None, - n_results: int = 10, - distance_threshold: float = -1, - **kwargs, - ) -> QueryResults: - """ - Retrieve documents from the collection of the vector database based on the queries. - - Args: - queries: List[str] | A list of queries. Each query is a string. - collection_name: str | The name of the collection. Default is None. - n_results: int | The number of relevant documents to return. Default is 10. - distance_threshold: float | The threshold for the distance score, only distance smaller than it will be - returned. Don't filter with it if < 0. Default is -1. - kwargs: Dict | Additional keyword arguments. - - Returns: - QueryResults | The query results. Each query result is a list of list of tuples containing the document and - the distance. - """ - ... - - def get_docs_by_ids( - self, ids: List[ItemID] = None, collection_name: str = None, include=None, **kwargs - ) -> List[Document]: - """ - Retrieve documents from the collection of the vector database based on the ids. - - Args: - ids: List[ItemID] | A list of document ids. If None, will return all the documents. Default is None. - collection_name: str | The name of the collection. Default is None. - include: List[str] | The fields to include. Default is None. - If None, will include ["metadatas", "documents"], ids will always be included. This may differ - depending on the implementation. - kwargs: dict | Additional keyword arguments. - - Returns: - List[Document] | The results. - """ - ... - - -class VectorDBFactory: - """ - Factory class for creating vector databases. - """ - - PREDEFINED_VECTOR_DB = ["chroma", "pgvector", "mongodb", "qdrant", "couchbase"] - - @staticmethod - def create_vector_db(db_type: str, **kwargs) -> VectorDB: - """ - Create a vector database. - - Args: - db_type: str | The type of the vector database. - kwargs: Dict | The keyword arguments for initializing the vector database. - - Returns: - VectorDB | The vector database. - """ - if db_type.lower() in ["chroma", "chromadb"]: - from .chromadb import ChromaVectorDB - - return ChromaVectorDB(**kwargs) - if db_type.lower() in ["pgvector", "pgvectordb"]: - from .pgvectordb import PGVectorDB - - return PGVectorDB(**kwargs) - if db_type.lower() in ["mdb", "mongodb", "atlas"]: - from .mongodb import MongoDBAtlasVectorDB - - return MongoDBAtlasVectorDB(**kwargs) - if db_type.lower() in ["qdrant", "qdrantdb"]: - from .qdrant import QdrantVectorDB - - return QdrantVectorDB(**kwargs) - if db_type.lower() in ["couchbase", "couchbasedb", "capella"]: - from .couchbase import CouchbaseVectorDB - - return CouchbaseVectorDB(**kwargs) - else: - raise ValueError( - f"Unsupported vector database type: {db_type}. Valid types are {VectorDBFactory.PREDEFINED_VECTOR_DB}." - ) diff --git a/autogen/agentchat/contrib/vectordb/chromadb.py b/autogen/agentchat/contrib/vectordb/chromadb.py deleted file mode 100644 index bef4a1090219..000000000000 --- a/autogen/agentchat/contrib/vectordb/chromadb.py +++ /dev/null @@ -1,325 +0,0 @@ -import os -from typing import Callable, List - -from .base import Document, ItemID, QueryResults, VectorDB -from .utils import chroma_results_to_query_results, filter_results_by_distance, get_logger - -try: - import chromadb - - if chromadb.__version__ < "0.4.15": - raise ImportError("Please upgrade chromadb to version 0.4.15 or later.") - import chromadb.utils.embedding_functions as ef - from chromadb.api.models.Collection import Collection -except ImportError: - raise ImportError("Please install chromadb: `pip install chromadb`") - -try: - from chromadb.errors import ChromaError -except ImportError: - ChromaError = Exception - -CHROMADB_MAX_BATCH_SIZE = os.environ.get("CHROMADB_MAX_BATCH_SIZE", 40000) -logger = get_logger(__name__) - - -class ChromaVectorDB(VectorDB): - """ - A vector database that uses ChromaDB as the backend. - """ - - def __init__( - self, *, client=None, path: str = "tmp/db", embedding_function: Callable = None, metadata: dict = None, **kwargs - ) -> None: - """ - Initialize the vector database. - - Args: - client: chromadb.Client | The client object of the vector database. Default is None. - If provided, it will use the client object directly and ignore other arguments. - path: str | The path to the vector database. Default is `tmp/db`. The default was `None` for version <=0.2.24. - embedding_function: Callable | The embedding function used to generate the vector representation - of the documents. Default is None, SentenceTransformerEmbeddingFunction("all-MiniLM-L6-v2") will be used. - metadata: dict | The metadata of the vector database. Default is None. If None, it will use this - setting: {"hnsw:space": "ip", "hnsw:construction_ef": 30, "hnsw:M": 32}. For more details of - the metadata, please refer to [distances](https://github.com/nmslib/hnswlib#supported-distances), - [hnsw](https://github.com/chroma-core/chroma/blob/566bc80f6c8ee29f7d99b6322654f32183c368c4/chromadb/segment/impl/vector/local_hnsw.py#L184), - and [ALGO_PARAMS](https://github.com/nmslib/hnswlib/blob/master/ALGO_PARAMS.md). - kwargs: dict | Additional keyword arguments. - - Returns: - None - """ - self.client = client - self.path = path - self.embedding_function = ( - ef.SentenceTransformerEmbeddingFunction("all-MiniLM-L6-v2") - if embedding_function is None - else embedding_function - ) - self.metadata = metadata if metadata else {"hnsw:space": "ip", "hnsw:construction_ef": 30, "hnsw:M": 32} - if not self.client: - if self.path is not None: - self.client = chromadb.PersistentClient(path=self.path, **kwargs) - else: - self.client = chromadb.Client(**kwargs) - self.active_collection = None - self.type = "chroma" - - def create_collection( - self, collection_name: str, overwrite: bool = False, get_or_create: bool = True - ) -> Collection: - """ - Create a collection in the vector database. - Case 1. if the collection does not exist, create the collection. - Case 2. the collection exists, if overwrite is True, it will overwrite the collection. - Case 3. the collection exists and overwrite is False, if get_or_create is True, it will get the collection, - otherwise it raise a ValueError. - - Args: - collection_name: str | The name of the collection. - overwrite: bool | Whether to overwrite the collection if it exists. Default is False. - get_or_create: bool | Whether to get the collection if it exists. Default is True. - - Returns: - Collection | The collection object. - """ - try: - if self.active_collection and self.active_collection.name == collection_name: - collection = self.active_collection - else: - collection = self.client.get_collection(collection_name, embedding_function=self.embedding_function) - except (ValueError, ChromaError): - collection = None - if collection is None: - return self.client.create_collection( - collection_name, - embedding_function=self.embedding_function, - get_or_create=get_or_create, - metadata=self.metadata, - ) - elif overwrite: - self.client.delete_collection(collection_name) - return self.client.create_collection( - collection_name, - embedding_function=self.embedding_function, - get_or_create=get_or_create, - metadata=self.metadata, - ) - elif get_or_create: - return collection - else: - raise ValueError(f"Collection {collection_name} already exists.") - - def get_collection(self, collection_name: str = None) -> Collection: - """ - Get the collection from the vector database. - - Args: - collection_name: str | The name of the collection. Default is None. If None, return the - current active collection. - - Returns: - Collection | The collection object. - """ - if collection_name is None: - if self.active_collection is None: - raise ValueError("No collection is specified.") - else: - logger.info( - f"No collection is specified. Using current active collection {self.active_collection.name}." - ) - else: - if not (self.active_collection and self.active_collection.name == collection_name): - self.active_collection = self.client.get_collection( - collection_name, embedding_function=self.embedding_function - ) - return self.active_collection - - def delete_collection(self, collection_name: str) -> None: - """ - Delete the collection from the vector database. - - Args: - collection_name: str | The name of the collection. - - Returns: - None - """ - self.client.delete_collection(collection_name) - if self.active_collection and self.active_collection.name == collection_name: - self.active_collection = None - - def _batch_insert( - self, collection: Collection, embeddings=None, ids=None, metadatas=None, documents=None, upsert=False - ) -> None: - batch_size = int(CHROMADB_MAX_BATCH_SIZE) - for i in range(0, len(documents), min(batch_size, len(documents))): - end_idx = i + min(batch_size, len(documents) - i) - collection_kwargs = { - "documents": documents[i:end_idx], - "ids": ids[i:end_idx], - "metadatas": metadatas[i:end_idx] if metadatas else None, - "embeddings": embeddings[i:end_idx] if embeddings else None, - } - if upsert: - collection.upsert(**collection_kwargs) - else: - collection.add(**collection_kwargs) - - def insert_docs(self, docs: List[Document], collection_name: str = None, upsert: bool = False) -> None: - """ - Insert documents into the collection of the vector database. - - Args: - docs: List[Document] | A list of documents. Each document is a TypedDict `Document`. - collection_name: str | The name of the collection. Default is None. - upsert: bool | Whether to update the document if it exists. Default is False. - kwargs: Dict | Additional keyword arguments. - - Returns: - None - """ - if not docs: - return - if docs[0].get("content") is None: - raise ValueError("The document content is required.") - if docs[0].get("id") is None: - raise ValueError("The document id is required.") - documents = [doc.get("content") for doc in docs] - ids = [doc.get("id") for doc in docs] - collection = self.get_collection(collection_name) - if docs[0].get("embedding") is None: - logger.info( - "No content embedding is provided. Will use the VectorDB's embedding function to generate the content embedding." - ) - embeddings = None - else: - embeddings = [doc.get("embedding") for doc in docs] - if docs[0].get("metadata") is None: - metadatas = None - else: - metadatas = [doc.get("metadata") for doc in docs] - self._batch_insert(collection, embeddings, ids, metadatas, documents, upsert) - - def update_docs(self, docs: List[Document], collection_name: str = None) -> None: - """ - Update documents in the collection of the vector database. - - Args: - docs: List[Document] | A list of documents. - collection_name: str | The name of the collection. Default is None. - - Returns: - None - """ - self.insert_docs(docs, collection_name, upsert=True) - - def delete_docs(self, ids: List[ItemID], collection_name: str = None, **kwargs) -> None: - """ - Delete documents from the collection of the vector database. - - Args: - ids: List[ItemID] | A list of document ids. Each id is a typed `ItemID`. - collection_name: str | The name of the collection. Default is None. - kwargs: Dict | Additional keyword arguments. - - Returns: - None - """ - collection = self.get_collection(collection_name) - collection.delete(ids, **kwargs) - - def retrieve_docs( - self, - queries: List[str], - collection_name: str = None, - n_results: int = 10, - distance_threshold: float = -1, - **kwargs, - ) -> QueryResults: - """ - Retrieve documents from the collection of the vector database based on the queries. - - Args: - queries: List[str] | A list of queries. Each query is a string. - collection_name: str | The name of the collection. Default is None. - n_results: int | The number of relevant documents to return. Default is 10. - distance_threshold: float | The threshold for the distance score, only distance smaller than it will be - returned. Don't filter with it if < 0. Default is -1. - kwargs: Dict | Additional keyword arguments. - - Returns: - QueryResults | The query results. Each query result is a list of list of tuples containing the document and - the distance. - """ - collection = self.get_collection(collection_name) - if isinstance(queries, str): - queries = [queries] - results = collection.query( - query_texts=queries, - n_results=n_results, - **kwargs, - ) - results["contents"] = results.pop("documents") - results = chroma_results_to_query_results(results) - results = filter_results_by_distance(results, distance_threshold) - return results - - @staticmethod - def _chroma_get_results_to_list_documents(data_dict) -> List[Document]: - """Converts a dictionary with list values to a list of Document. - - Args: - data_dict: A dictionary where keys map to lists or None. - - Returns: - List[Document] | The list of Document. - - Example: - data_dict = { - "key1s": [1, 2, 3], - "key2s": ["a", "b", "c"], - "key3s": None, - "key4s": ["x", "y", "z"], - } - - results = [ - {"key1": 1, "key2": "a", "key4": "x"}, - {"key1": 2, "key2": "b", "key4": "y"}, - {"key1": 3, "key2": "c", "key4": "z"}, - ] - """ - - results = [] - keys = [key for key in data_dict if data_dict[key] is not None] - - for i in range(len(data_dict[keys[0]])): - sub_dict = {} - for key in data_dict.keys(): - if data_dict[key] is not None and len(data_dict[key]) > i: - sub_dict[key[:-1]] = data_dict[key][i] - results.append(sub_dict) - return results - - def get_docs_by_ids( - self, ids: List[ItemID] = None, collection_name: str = None, include=None, **kwargs - ) -> List[Document]: - """ - Retrieve documents from the collection of the vector database based on the ids. - - Args: - ids: List[ItemID] | A list of document ids. If None, will return all the documents. Default is None. - collection_name: str | The name of the collection. Default is None. - include: List[str] | The fields to include. Default is None. - If None, will include ["metadatas", "documents"], ids will always be included. - kwargs: dict | Additional keyword arguments. - - Returns: - List[Document] | The results. - """ - collection = self.get_collection(collection_name) - include = include if include else ["metadatas", "documents"] - results = collection.get(ids, include=include, **kwargs) - results = self._chroma_get_results_to_list_documents(results) - return results diff --git a/autogen/agentchat/contrib/vectordb/couchbase.py b/autogen/agentchat/contrib/vectordb/couchbase.py deleted file mode 100644 index 66691fa2f2b7..000000000000 --- a/autogen/agentchat/contrib/vectordb/couchbase.py +++ /dev/null @@ -1,396 +0,0 @@ -import json -import time -from datetime import timedelta -from typing import Any, Callable, Dict, List, Literal, Tuple, Union - -import numpy as np -from couchbase import search -from couchbase.auth import PasswordAuthenticator -from couchbase.cluster import Cluster, ClusterOptions -from couchbase.collection import Collection -from couchbase.management.search import SearchIndex -from couchbase.options import SearchOptions -from couchbase.vector_search import VectorQuery, VectorSearch -from sentence_transformers import SentenceTransformer - -from .base import Document, ItemID, QueryResults, VectorDB -from .utils import get_logger - -logger = get_logger(__name__) - -DEFAULT_BATCH_SIZE = 1000 -_SAMPLE_SENTENCE = ["The weather is lovely today in paradise."] -TEXT_KEY = "content" -EMBEDDING_KEY = "embedding" - - -class CouchbaseVectorDB(VectorDB): - """ - A vector database implementation that uses Couchbase as the backend. - """ - - def __init__( - self, - connection_string: str = "couchbase://localhost", - username: str = "Administrator", - password: str = "password", - bucket_name: str = "vector_db", - embedding_function: Callable = SentenceTransformer("all-MiniLM-L6-v2").encode, - scope_name: str = "_default", - collection_name: str = "_default", - index_name: str = None, - ): - """ - Initialize the vector database. - - Args: - connection_string (str): The Couchbase connection string to connect to. Default is 'couchbase://localhost'. - username (str): The username for Couchbase authentication. Default is 'Administrator'. - password (str): The password for Couchbase authentication. Default is 'password'. - bucket_name (str): The name of the bucket. Default is 'vector_db'. - embedding_function (Callable): The embedding function used to generate the vector representation. Default is SentenceTransformer("all-MiniLM-L6-v2").encode. - scope_name (str): The name of the scope. Default is '_default'. - collection_name (str): The name of the collection to create for this vector database. Default is '_default'. - index_name (str): Index name for the vector database. Default is None. - overwrite (bool): Whether to overwrite existing data. Default is False. - wait_until_index_ready (float | None): Blocking call to wait until the database indexes are ready. None means no wait. Default is None. - wait_until_document_ready (float | None): Blocking call to wait until the database documents are ready. None means no wait. Default is None. - """ - print( - "CouchbaseVectorDB", - connection_string, - username, - password, - bucket_name, - scope_name, - collection_name, - index_name, - ) - self.embedding_function = embedding_function - self.index_name = index_name - - # This will get the model dimension size by computing the embeddings dimensions - self.dimensions = self._get_embedding_size() - - try: - auth = PasswordAuthenticator(username, password) - cluster = Cluster(connection_string, ClusterOptions(auth)) - cluster.wait_until_ready(timedelta(seconds=5)) - self.cluster = cluster - - self.bucket = cluster.bucket(bucket_name) - self.scope = self.bucket.scope(scope_name) - self.collection = self.scope.collection(collection_name) - self.active_collection = self.collection - - logger.debug("Successfully connected to Couchbase") - except Exception as err: - raise ConnectionError("Could not connect to Couchbase server") from err - - def search_index_exists(self, index_name: str): - """Check if the specified index is ready""" - try: - search_index_mgr = self.scope.search_indexes() - index = search_index_mgr.get_index(index_name) - return index.is_valid() - except Exception: - return False - - def _get_embedding_size(self): - return len(self.embedding_function(_SAMPLE_SENTENCE)[0]) - - def create_collection( - self, - collection_name: str, - overwrite: bool = False, - get_or_create: bool = True, - ) -> Collection: - """ - Create a collection in the vector database and create a vector search index in the collection. - - Args: - collection_name: str | The name of the collection. - overwrite: bool | Whether to overwrite the collection if it exists. Default is False. - get_or_create: bool | Whether to get or create the collection. Default is True - """ - if overwrite: - self.delete_collection(collection_name) - - try: - collection_mgr = self.bucket.collections() - collection_mgr.create_collection(self.scope.name, collection_name) - - except Exception: - if not get_or_create: - raise ValueError(f"Collection {collection_name} already exists.") - else: - logger.debug(f"Collection {collection_name} already exists. Getting the collection.") - - collection = self.scope.collection(collection_name) - self.create_index_if_not_exists(index_name=self.index_name, collection=collection) - return collection - - def create_index_if_not_exists(self, index_name: str = "vector_index", collection=None) -> None: - """ - Creates a vector search index on the specified collection in Couchbase. - - Args: - index_name (str, optional): The name of the vector search index to create. Defaults to "vector_search_index". - collection (Collection, optional): The Couchbase collection to create the index on. Defaults to None. - """ - if not self.search_index_exists(index_name): - self.create_vector_search_index(collection, index_name) - - def get_collection(self, collection_name: str = None) -> Collection: - """ - Get the collection from the vector database. - - Args: - collection_name: str | The name of the collection. Default is None. If None, return the - current active collection. - - Returns: - Collection | The collection object. - """ - if collection_name is None: - if self.active_collection is None: - raise ValueError("No collection is specified.") - else: - logger.debug( - f"No collection is specified. Using current active collection {self.active_collection.name}." - ) - else: - self.active_collection = self.scope.collection(collection_name) - - return self.active_collection - - def delete_collection(self, collection_name: str) -> None: - """ - Delete the collection from the vector database. - - Args: - collection_name: str | The name of the collection. - """ - try: - collection_mgr = self.bucket.collections() - collection_mgr.drop_collection(self.scope.name, collection_name) - except Exception as e: - logger.error(f"Error deleting collection: {e}") - - def create_vector_search_index( - self, - collection, - index_name: Union[str, None] = "vector_index", - similarity: Literal["l2_norm", "dot_product"] = "dot_product", - ) -> None: - """Create a vector search index in the collection.""" - search_index_mgr = self.scope.search_indexes() - dims = self._get_embedding_size() - index_definition = { - "type": "fulltext-index", - "name": index_name, - "sourceType": "couchbase", - "sourceName": self.bucket.name, - "planParams": {"maxPartitionsPerPIndex": 1024, "indexPartitions": 1}, - "params": { - "doc_config": { - "docid_prefix_delim": "", - "docid_regexp": "", - "mode": "scope.collection.type_field", - "type_field": "type", - }, - "mapping": { - "analysis": {}, - "default_analyzer": "standard", - "default_datetime_parser": "dateTimeOptional", - "default_field": "_all", - "default_mapping": {"dynamic": True, "enabled": False}, - "default_type": "_default", - "docvalues_dynamic": False, - "index_dynamic": True, - "store_dynamic": True, - "type_field": "_type", - "types": { - f"{self.scope.name}.{collection.name}": { - "dynamic": False, - "enabled": True, - "properties": { - "embedding": { - "dynamic": False, - "enabled": True, - "fields": [ - { - "dims": dims, - "index": True, - "name": "embedding", - "similarity": similarity, - "type": "vector", - "vector_index_optimized_for": "recall", - } - ], - }, - "metadata": {"dynamic": True, "enabled": True}, - "content": { - "dynamic": False, - "enabled": True, - "fields": [ - { - "include_in_all": True, - "index": True, - "name": "content", - "store": True, - "type": "text", - } - ], - }, - }, - } - }, - }, - "store": {"indexType": "scorch", "segmentVersion": 16}, - }, - "sourceParams": {}, - } - - search_index_def = SearchIndex.from_json(json.dumps(index_definition)) - max_attempts = 10 - attempt = 0 - while attempt < max_attempts: - try: - search_index_mgr.upsert_index(search_index_def) - break - except Exception as e: - logger.debug(f"Attempt {attempt + 1}/{max_attempts}: Error creating search index: {e}") - time.sleep(3) - attempt += 1 - - if attempt == max_attempts: - logger.error(f"Error creating search index after {max_attempts} attempts.") - raise RuntimeError(f"Error creating search index after {max_attempts} attempts.") - - logger.info(f"Search index {index_name} created successfully.") - - def upsert_docs( - self, docs: List[Document], collection: Collection, batch_size=DEFAULT_BATCH_SIZE, **kwargs: Any - ) -> None: - if docs[0].get("content") is None: - raise ValueError("The document content is required.") - if docs[0].get("id") is None: - raise ValueError("The document id is required.") - - for i in range(0, len(docs), batch_size): - batch = docs[i : i + batch_size] - docs_to_upsert = dict() - for doc in batch: - doc_id = doc["id"] - embedding = self.embedding_function( - [doc["content"]] - ).tolist() # Gets new embedding even in case of document update - - doc_content = {TEXT_KEY: doc["content"], "metadata": doc.get("metadata", {}), EMBEDDING_KEY: embedding} - docs_to_upsert[doc_id] = doc_content - collection.upsert_multi(docs_to_upsert) - - def insert_docs( - self, - docs: List[Document], - collection_name: str = None, - upsert: bool = False, - batch_size=DEFAULT_BATCH_SIZE, - **kwargs, - ) -> None: - """Insert Documents and Vector Embeddings into the collection of the vector database. Documents are upserted in all cases.""" - if not docs: - logger.info("No documents to insert.") - return - - collection = self.get_collection(collection_name) - self.upsert_docs(docs, collection, batch_size=batch_size) - - def update_docs( - self, docs: List[Document], collection_name: str = None, batch_size=DEFAULT_BATCH_SIZE, **kwargs: Any - ) -> None: - """Update documents, including their embeddings, in the Collection.""" - collection = self.get_collection(collection_name) - self.upsert_docs(docs, collection, batch_size) - - def delete_docs(self, ids: List[ItemID], collection_name: str = None, batch_size=DEFAULT_BATCH_SIZE, **kwargs): - """Delete documents from the collection of the vector database.""" - collection = self.get_collection(collection_name) - # based on batch size, delete the documents - for i in range(0, len(ids), batch_size): - batch = ids[i : i + batch_size] - collection.remove_multi(batch) - - def get_docs_by_ids( - self, ids: List[ItemID] | None = None, collection_name: str = None, include: List[str] | None = None, **kwargs - ) -> List[Document]: - """Retrieve documents from the collection of the vector database based on the ids.""" - if include is None: - include = [TEXT_KEY, "metadata", "id"] - elif "id" not in include: - include.append("id") - - collection = self.get_collection(collection_name) - if ids is not None: - docs = [collection.get(doc_id) for doc_id in ids] - else: - # Get all documents using couchbase query - include_str = ", ".join(include) - query = f"SELECT {include_str} FROM {self.bucket.name}.{self.scope.name}.{collection.name}" - result = self.cluster.query(query) - docs = [] - for row in result: - docs.append(row) - - return [{k: v for k, v in doc.items() if k in include or k == "id"} for doc in docs] - - def retrieve_docs( - self, - queries: List[str], - collection_name: str = None, - n_results: int = 10, - distance_threshold: float = -1, - **kwargs, - ) -> QueryResults: - """Retrieve documents from the collection of the vector database based on the queries. - Note: Distance threshold is not supported in Couchbase FTS. - """ - - results: QueryResults = [] - for query_text in queries: - query_vector = np.array(self.embedding_function([query_text])).tolist()[0] - query_result = self._vector_search( - query_vector, - n_results, - **kwargs, - ) - results.append(query_result) - return results - - def _vector_search(self, embedding_vector: List[float], n_results: int = 10, **kwargs) -> List[Tuple[Dict, float]]: - """Core vector search using Couchbase FTS.""" - - search_req = search.SearchRequest.create( - VectorSearch.from_vector_query( - VectorQuery( - EMBEDDING_KEY, - embedding_vector, - n_results, - ) - ) - ) - - search_options = SearchOptions(limit=n_results, fields=["*"]) - result = self.scope.search(self.index_name, search_req, search_options) - - docs_with_score = [] - - for row in result.rows(): - doc = row.fields - doc["id"] = row.id - score = row.score - - docs_with_score.append((doc, score)) - - return docs_with_score diff --git a/autogen/agentchat/contrib/vectordb/mongodb.py b/autogen/agentchat/contrib/vectordb/mongodb.py deleted file mode 100644 index 2e0580fe826b..000000000000 --- a/autogen/agentchat/contrib/vectordb/mongodb.py +++ /dev/null @@ -1,553 +0,0 @@ -from copy import deepcopy -from time import monotonic, sleep -from typing import Any, Callable, Dict, Iterable, List, Literal, Mapping, Set, Tuple, Union - -import numpy as np -from pymongo import MongoClient, UpdateOne, errors -from pymongo.collection import Collection -from pymongo.driver_info import DriverInfo -from pymongo.operations import SearchIndexModel -from sentence_transformers import SentenceTransformer - -from .base import Document, ItemID, QueryResults, VectorDB -from .utils import get_logger - -logger = get_logger(__name__) - -DEFAULT_INSERT_BATCH_SIZE = 100_000 -_SAMPLE_SENTENCE = ["The weather is lovely today in paradise."] -_DELAY = 0.5 - - -def with_id_rename(docs: Iterable) -> List[Dict[str, Any]]: - """Utility changes _id field from Collection into id for Document.""" - return [{**{k: v for k, v in d.items() if k != "_id"}, "id": d["_id"]} for d in docs] - - -class MongoDBAtlasVectorDB(VectorDB): - """ - A Collection object for MongoDB. - """ - - def __init__( - self, - connection_string: str = "", - database_name: str = "vector_db", - embedding_function: Callable = SentenceTransformer("all-MiniLM-L6-v2").encode, - collection_name: str = None, - index_name: str = "vector_index", - overwrite: bool = False, - wait_until_index_ready: float = None, - wait_until_document_ready: float = None, - ): - """ - Initialize the vector database. - - Args: - connection_string: str | The MongoDB connection string to connect to. Default is ''. - database_name: str | The name of the database. Default is 'vector_db'. - embedding_function: Callable | The embedding function used to generate the vector representation. - collection_name: str | The name of the collection to create for this vector database - Defaults to None - index_name: str | Index name for the vector database, defaults to 'vector_index' - overwrite: bool = False - wait_until_index_ready: float | None | Blocking call to wait until the - database indexes are ready. None, the default, means no wait. - wait_until_document_ready: float | None | Blocking call to wait until the - database indexes are ready. None, the default, means no wait. - """ - self.embedding_function = embedding_function - self.index_name = index_name - self._wait_until_index_ready = wait_until_index_ready - self._wait_until_document_ready = wait_until_document_ready - - # This will get the model dimension size by computing the embeddings dimensions - self.dimensions = self._get_embedding_size() - - try: - self.client = MongoClient(connection_string, driver=DriverInfo(name="autogen")) - self.client.admin.command("ping") - logger.debug("Successfully created MongoClient") - except errors.ServerSelectionTimeoutError as err: - raise ConnectionError("Could not connect to MongoDB server") from err - - self.db = self.client[database_name] - logger.debug(f"Atlas Database name: {self.db.name}") - if collection_name: - self.active_collection = self.create_collection(collection_name, overwrite) - else: - self.active_collection = None - - def _is_index_ready(self, collection: Collection, index_name: str): - """Check for the index name in the list of available search indexes to see if the - specified index is of status READY - - Args: - collection (Collection): MongoDB Collection to for the search indexes - index_name (str): Vector Search Index name - - Returns: - bool : True if the index is present and READY false otherwise - """ - for index in collection.list_search_indexes(index_name): - if index["type"] == "vectorSearch" and index["status"] == "READY": - return True - return False - - def _wait_for_index(self, collection: Collection, index_name: str, action: str = "create"): - """Waits for the index action to be completed. Otherwise throws a TimeoutError. - - Timeout set on instantiation. - action: "create" or "delete" - """ - assert action in ["create", "delete"], f"{action=} must be create or delete." - start = monotonic() - while monotonic() - start < self._wait_until_index_ready: - if action == "create" and self._is_index_ready(collection, index_name): - return - elif action == "delete" and len(list(collection.list_search_indexes())) == 0: - return - sleep(_DELAY) - - raise TimeoutError(f"Index {self.index_name} is not ready!") - - def _wait_for_document(self, collection: Collection, index_name: str, doc: Document): - start = monotonic() - while monotonic() - start < self._wait_until_document_ready: - query_result = _vector_search( - embedding_vector=np.array(self.embedding_function(doc["content"])).tolist(), - n_results=1, - collection=collection, - index_name=index_name, - ) - if query_result and query_result[0][0]["_id"] == doc["id"]: - return - sleep(_DELAY) - - raise TimeoutError(f"Document {self.index_name} is not ready!") - - def _get_embedding_size(self): - return len(self.embedding_function(_SAMPLE_SENTENCE)[0]) - - def list_collections(self): - """ - List the collections in the vector database. - - Returns: - List[str] | The list of collections. - """ - return self.db.list_collection_names() - - def create_collection( - self, - collection_name: str, - overwrite: bool = False, - get_or_create: bool = True, - ) -> Collection: - """ - Create a collection in the vector database and create a vector search index in the collection. - - Args: - collection_name: str | The name of the collection. - overwrite: bool | Whether to overwrite the collection if it exists. Default is False. - get_or_create: bool | Whether to get or create the collection. Default is True - """ - if overwrite: - self.delete_collection(collection_name) - - if collection_name not in self.db.list_collection_names(): - # Create a new collection - coll = self.db.create_collection(collection_name) - self.create_index_if_not_exists(index_name=self.index_name, collection=coll) - return coll - - if get_or_create: - # The collection already exists, return it. - coll = self.db[collection_name] - self.create_index_if_not_exists(index_name=self.index_name, collection=coll) - return coll - else: - # get_or_create is False and the collection already exists, raise an error. - raise ValueError(f"Collection {collection_name} already exists.") - - def create_index_if_not_exists(self, index_name: str = "vector_index", collection: Collection = None) -> None: - """ - Creates a vector search index on the specified collection in MongoDB. - - Args: - MONGODB_INDEX (str, optional): The name of the vector search index to create. Defaults to "vector_search_index". - collection (Collection, optional): The MongoDB collection to create the index on. Defaults to None. - """ - if not self._is_index_ready(collection, index_name): - self.create_vector_search_index(collection, index_name) - - def get_collection(self, collection_name: str = None) -> Collection: - """ - Get the collection from the vector database. - - Args: - collection_name: str | The name of the collection. Default is None. If None, return the - current active collection. - - Returns: - Collection | The collection object. - """ - if collection_name is None: - if self.active_collection is None: - raise ValueError("No collection is specified.") - else: - logger.debug( - f"No collection is specified. Using current active collection {self.active_collection.name}." - ) - else: - self.active_collection = self.db[collection_name] - - return self.active_collection - - def delete_collection(self, collection_name: str) -> None: - """ - Delete the collection from the vector database. - - Args: - collection_name: str | The name of the collection. - """ - for index in self.db[collection_name].list_search_indexes(): - self.db[collection_name].drop_search_index(index["name"]) - if self._wait_until_index_ready: - self._wait_for_index(self.db[collection_name], index["name"], "delete") - return self.db[collection_name].drop() - - def create_vector_search_index( - self, - collection: Collection, - index_name: Union[str, None] = "vector_index", - similarity: Literal["euclidean", "cosine", "dotProduct"] = "cosine", - ) -> None: - """Create a vector search index in the collection. - - Args: - collection: An existing Collection in the Atlas Database. - index_name: Vector Search Index name. - similarity: Algorithm used for measuring vector similarity. - kwargs: Additional keyword arguments. - - Returns: - None - """ - search_index_model = SearchIndexModel( - definition={ - "fields": [ - { - "type": "vector", - "numDimensions": self.dimensions, - "path": "embedding", - "similarity": similarity, - }, - ] - }, - name=index_name, - type="vectorSearch", - ) - # Create the search index - try: - collection.create_search_index(model=search_index_model) - if self._wait_until_index_ready: - self._wait_for_index(collection, index_name, "create") - logger.debug(f"Search index {index_name} created successfully.") - except Exception as e: - logger.error( - f"Error creating search index: {e}. \n" - f"Your client must be connected to an Atlas cluster. " - f"You may have to manually create a Collection and Search Index " - f"if you are on a free/shared cluster." - ) - raise e - - def insert_docs( - self, - docs: List[Document], - collection_name: str = None, - upsert: bool = False, - batch_size=DEFAULT_INSERT_BATCH_SIZE, - **kwargs, - ) -> None: - """Insert Documents and Vector Embeddings into the collection of the vector database. - - For large numbers of Documents, insertion is performed in batches. - - Args: - docs: List[Document] | A list of documents. Each document is a TypedDict `Document`. - collection_name: str | The name of the collection. Default is None. - upsert: bool | Whether to update the document if it exists. Default is False. - batch_size: Number of documents to be inserted in each batch - """ - if not docs: - logger.info("No documents to insert.") - return - - collection = self.get_collection(collection_name) - if upsert: - self.update_docs(docs, collection.name, upsert=True) - else: - # Sanity checking the first document - if docs[0].get("content") is None: - raise ValueError("The document content is required.") - if docs[0].get("id") is None: - raise ValueError("The document id is required.") - - input_ids = set() - result_ids = set() - id_batch = [] - text_batch = [] - metadata_batch = [] - size = 0 - i = 0 - for doc in docs: - id = doc["id"] - text = doc["content"] - metadata = doc.get("metadata", {}) - id_batch.append(id) - text_batch.append(text) - metadata_batch.append(metadata) - id_size = 1 if isinstance(id, int) else len(id) - size += len(text) + len(metadata) + id_size - if (i + 1) % batch_size == 0 or size >= 47_000_000: - result_ids.update(self._insert_batch(collection, text_batch, metadata_batch, id_batch)) - input_ids.update(id_batch) - id_batch = [] - text_batch = [] - metadata_batch = [] - size = 0 - i += 1 - if text_batch: - result_ids.update(self._insert_batch(collection, text_batch, metadata_batch, id_batch)) # type: ignore - input_ids.update(id_batch) - - if result_ids != input_ids: - logger.warning( - "Possible data corruption. " - "input_ids not in result_ids: {in_diff}.\n" - "result_ids not in input_ids: {out_diff}".format( - in_diff=input_ids.difference(result_ids), out_diff=result_ids.difference(input_ids) - ) - ) - if self._wait_until_document_ready and docs: - self._wait_for_document(collection, self.index_name, docs[-1]) - - def _insert_batch( - self, collection: Collection, texts: List[str], metadatas: List[Mapping[str, Any]], ids: List[ItemID] - ) -> Set[ItemID]: - """Compute embeddings for and insert a batch of Documents into the Collection. - - For performance reasons, we chose to call self.embedding_function just once, - with the hopefully small tradeoff of having recreating Document dicts. - - Args: - collection: MongoDB Collection - texts: List of the main contents of each document - metadatas: List of metadata mappings - ids: List of ids. Note that these are stored as _id in Collection. - - Returns: - List of ids inserted. - """ - n_texts = len(texts) - if n_texts == 0: - return [] - # Embed and create the documents - embeddings = self.embedding_function(texts).tolist() - assert ( - len(embeddings) == n_texts - ), f"The number of embeddings produced by self.embedding_function ({len(embeddings)} does not match the number of texts provided to it ({n_texts})." - to_insert = [ - {"_id": i, "content": t, "metadata": m, "embedding": e} - for i, t, m, e in zip(ids, texts, metadatas, embeddings) - ] - # insert the documents in MongoDB Atlas - insert_result = collection.insert_many(to_insert) # type: ignore - return insert_result.inserted_ids # TODO Remove this. Replace by log like update_docs - - def update_docs(self, docs: List[Document], collection_name: str = None, **kwargs: Any) -> None: - """Update documents, including their embeddings, in the Collection. - - Optionally allow upsert as kwarg. - - Uses deepcopy to avoid changing docs. - - Args: - docs: List[Document] | A list of documents. - collection_name: str | The name of the collection. Default is None. - kwargs: Any | Use upsert=True` to insert documents whose ids are not present in collection. - """ - - n_docs = len(docs) - logger.info(f"Preparing to embed and update {n_docs=}") - # Compute the embeddings - embeddings: list[list[float]] = self.embedding_function([doc["content"] for doc in docs]).tolist() - # Prepare the updates - all_updates = [] - for i in range(n_docs): - doc = deepcopy(docs[i]) - doc["embedding"] = embeddings[i] - doc["_id"] = doc.pop("id") - - all_updates.append(UpdateOne({"_id": doc["_id"]}, {"$set": doc}, upsert=kwargs.get("upsert", False))) - # Perform update in bulk - collection = self.get_collection(collection_name) - result = collection.bulk_write(all_updates) - - if self._wait_until_document_ready and docs: - self._wait_for_document(collection, self.index_name, docs[-1]) - - # Log a result summary - logger.info( - "Matched: %s, Modified: %s, Upserted: %s", - result.matched_count, - result.modified_count, - result.upserted_count, - ) - - def delete_docs(self, ids: List[ItemID], collection_name: str = None, **kwargs): - """ - Delete documents from the collection of the vector database. - - Args: - ids: List[ItemID] | A list of document ids. Each id is a typed `ItemID`. - collection_name: str | The name of the collection. Default is None. - """ - collection = self.get_collection(collection_name) - return collection.delete_many({"_id": {"$in": ids}}) - - def get_docs_by_ids( - self, ids: List[ItemID] = None, collection_name: str = None, include: List[str] = None, **kwargs - ) -> List[Document]: - """ - Retrieve documents from the collection of the vector database based on the ids. - - Args: - ids: List[ItemID] | A list of document ids. If None, will return all the documents. Default is None. - collection_name: str | The name of the collection. Default is None. - include: List[str] | The fields to include. - If None, will include ["metadata", "content"], ids will always be included. - Basically, use include to choose whether to include embedding and metadata - kwargs: dict | Additional keyword arguments. - - Returns: - List[Document] | The results. - """ - if include is None: - include_fields = {"_id": 1, "content": 1, "metadata": 1} - else: - include_fields = {k: 1 for k in set(include).union({"_id"})} - collection = self.get_collection(collection_name) - if ids is not None: - docs = collection.find({"_id": {"$in": ids}}, include_fields) - # Return with _id field from Collection into id for Document - return with_id_rename(docs) - else: - docs = collection.find({}, include_fields) - # Return with _id field from Collection into id for Document - return with_id_rename(docs) - - def retrieve_docs( - self, - queries: List[str], - collection_name: str = None, - n_results: int = 10, - distance_threshold: float = -1, - **kwargs, - ) -> QueryResults: - """ - Retrieve documents from the collection of the vector database based on the queries. - - Args: - queries: List[str] | A list of queries. Each query is a string. - collection_name: str | The name of the collection. Default is None. - n_results: int | The number of relevant documents to return. Default is 10. - distance_threshold: float | The threshold for the distance score, only distance smaller than it will be - returned. Don't filter with it if < 0. Default is -1. - kwargs: Dict | Additional keyword arguments. Ones of importance follow: - oversampling_factor: int | This times n_results is 'ef' in the HNSW algorithm. - It determines the number of nearest neighbor candidates to consider during the search phase. - A higher value leads to more accuracy, but is slower. Default is 10 - - Returns: - QueryResults | For each query string, a list of nearest documents and their scores. - """ - collection = self.get_collection(collection_name) - # Trivial case of an empty collection - if collection.count_documents({}) == 0: - return [] - - logger.debug(f"Using index: {self.index_name}") - results = [] - for query_text in queries: - # Compute embedding vector from semantic query - logger.debug(f"Query: {query_text}") - query_vector = np.array(self.embedding_function([query_text])).tolist()[0] - # Find documents with similar vectors using the specified index - query_result = _vector_search( - query_vector, - n_results, - collection, - self.index_name, - distance_threshold, - **kwargs, - oversampling_factor=kwargs.get("oversampling_factor", 10), - ) - # Change each _id key to id. with_id_rename, but with (doc, score) tuples - results.append( - [({**{k: v for k, v in d[0].items() if k != "_id"}, "id": d[0]["_id"]}, d[1]) for d in query_result] - ) - return results - - -def _vector_search( - embedding_vector: List[float], - n_results: int, - collection: Collection, - index_name: str, - distance_threshold: float = -1.0, - oversampling_factor=10, - include_embedding=False, -) -> List[Tuple[Dict, float]]: - """Core $vectorSearch Aggregation pipeline. - - Args: - embedding_vector: Embedding vector of semantic query - n_results: Number of documents to return. Defaults to 4. - collection: MongoDB Collection with vector index - index_name: Name of the vector index - distance_threshold: Only distance measures smaller than this will be returned. - Don't filter with it if 1 < x < 0. Default is -1. - oversampling_factor: int | This times n_results is 'ef' in the HNSW algorithm. - It determines the number of nearest neighbor candidates to consider during the search phase. - A higher value leads to more accuracy, but is slower. Default = 10 - - Returns: - List of tuples of length n_results from Collection. - Each tuple contains a document dict and a score. - """ - - pipeline = [ - { - "$vectorSearch": { - "index": index_name, - "limit": n_results, - "numCandidates": n_results * oversampling_factor, - "queryVector": embedding_vector, - "path": "embedding", - } - }, - {"$set": {"score": {"$meta": "vectorSearchScore"}}}, - ] - if distance_threshold >= 0.0: - similarity_threshold = 1.0 - distance_threshold - pipeline.append({"$match": {"score": {"$gte": similarity_threshold}}}) - - if not include_embedding: - pipeline.append({"$project": {"embedding": 0}}) - - logger.debug("pipeline: %s", pipeline) - agg = collection.aggregate(pipeline) - return [(doc, doc.pop("score")) for doc in agg] diff --git a/autogen/agentchat/contrib/vectordb/pgvectordb.py b/autogen/agentchat/contrib/vectordb/pgvectordb.py deleted file mode 100644 index 6fce4a6db803..000000000000 --- a/autogen/agentchat/contrib/vectordb/pgvectordb.py +++ /dev/null @@ -1,953 +0,0 @@ -import os -import re -import urllib.parse -from typing import Callable, List, Optional, Union - -import numpy as np -from sentence_transformers import SentenceTransformer - -from .base import Document, ItemID, QueryResults, VectorDB -from .utils import get_logger - -try: - import pgvector - from pgvector.psycopg import register_vector -except ImportError: - raise ImportError("Please install pgvector: `pip install pgvector`") - -try: - import psycopg -except ImportError: - raise ImportError("Please install pgvector: `pip install psycopg`") - -PGVECTOR_MAX_BATCH_SIZE = os.environ.get("PGVECTOR_MAX_BATCH_SIZE", 40000) -logger = get_logger(__name__) - - -class Collection: - """ - A Collection object for PGVector. - - Attributes: - client: The PGVector client. - collection_name (str): The name of the collection. Default is "documents". - embedding_function (Callable): The embedding function used to generate the vector representation. - Default is None. SentenceTransformer("all-MiniLM-L6-v2").encode will be used when None. - Models can be chosen from: - https://huggingface.co/models?library=sentence-transformers - metadata (Optional[dict]): The metadata of the collection. - get_or_create (Optional): The flag indicating whether to get or create the collection. - """ - - def __init__( - self, - client=None, - collection_name: str = "autogen-docs", - embedding_function: Callable = None, - metadata=None, - get_or_create=None, - ): - """ - Initialize the Collection object. - - Args: - client: The PostgreSQL client. - collection_name: The name of the collection. Default is "documents". - embedding_function: The embedding function used to generate the vector representation. - metadata: The metadata of the collection. - get_or_create: The flag indicating whether to get or create the collection. - Returns: - None - """ - self.client = client - self.name = self.set_collection_name(collection_name) - self.require_embeddings_or_documents = False - self.ids = [] - if embedding_function: - self.embedding_function = embedding_function - else: - self.embedding_function = SentenceTransformer("all-MiniLM-L6-v2").encode - self.metadata = metadata if metadata else {"hnsw:space": "ip", "hnsw:construction_ef": 32, "hnsw:M": 16} - self.documents = "" - self.get_or_create = get_or_create - # This will get the model dimension size by computing the embeddings dimensions - sentences = [ - "The weather is lovely today in paradise.", - ] - embeddings = self.embedding_function(sentences) - self.dimension = len(embeddings[0]) - - def set_collection_name(self, collection_name) -> str: - name = re.sub("-", "_", collection_name) - self.name = name - return self.name - - def add(self, ids: List[ItemID], documents: List, embeddings: List = None, metadatas: List = None) -> None: - """ - Add documents to the collection. - - Args: - ids (List[ItemID]): A list of document IDs. - embeddings (List): A list of document embeddings. Optional - metadatas (List): A list of document metadatas. Optional - documents (List): A list of documents. - - Returns: - None - """ - cursor = self.client.cursor() - sql_values = [] - if embeddings is not None and metadatas is not None: - for doc_id, embedding, metadata, document in zip(ids, embeddings, metadatas, documents): - metadata = re.sub("'", '"', str(metadata)) - sql_values.append((doc_id, embedding, metadata, document)) - sql_string = ( - f"INSERT INTO {self.name} (id, embedding, metadatas, documents)\n" f"VALUES (%s, %s, %s, %s);\n" - ) - elif embeddings is not None: - for doc_id, embedding, document in zip(ids, embeddings, documents): - sql_values.append((doc_id, embedding, document)) - sql_string = f"INSERT INTO {self.name} (id, embedding, documents) " f"VALUES (%s, %s, %s);\n" - elif metadatas is not None: - for doc_id, metadata, document in zip(ids, metadatas, documents): - metadata = re.sub("'", '"', str(metadata)) - embedding = self.embedding_function(document) - sql_values.append((doc_id, metadata, embedding, document)) - sql_string = ( - f"INSERT INTO {self.name} (id, metadatas, embedding, documents)\n" f"VALUES (%s, %s, %s, %s);\n" - ) - else: - for doc_id, document in zip(ids, documents): - embedding = self.embedding_function(document) - sql_values.append((doc_id, document, embedding)) - sql_string = f"INSERT INTO {self.name} (id, documents, embedding)\n" f"VALUES (%s, %s, %s);\n" - logger.debug(f"Add SQL String:\n{sql_string}\n{sql_values}") - cursor.executemany(sql_string, sql_values) - cursor.close() - - def upsert(self, ids: List[ItemID], documents: List, embeddings: List = None, metadatas: List = None) -> None: - """ - Upsert documents into the collection. - - Args: - ids (List[ItemID]): A list of document IDs. - documents (List): A list of documents. - embeddings (List): A list of document embeddings. - metadatas (List): A list of document metadatas. - - Returns: - None - """ - cursor = self.client.cursor() - sql_values = [] - if embeddings is not None and metadatas is not None: - for doc_id, embedding, metadata, document in zip(ids, embeddings, metadatas, documents): - metadata = re.sub("'", '"', str(metadata)) - sql_values.append((doc_id, embedding, metadata, document, embedding, metadata, document)) - sql_string = ( - f"INSERT INTO {self.name} (id, embedding, metadatas, documents)\n" - f"VALUES (%s, %s, %s, %s)\n" - f"ON CONFLICT (id)\n" - f"DO UPDATE SET embedding = %s,\n" - f"metadatas = %s, documents = %s;\n" - ) - elif embeddings is not None: - for doc_id, embedding, document in zip(ids, embeddings, documents): - sql_values.append((doc_id, embedding, document, embedding, document)) - sql_string = ( - f"INSERT INTO {self.name} (id, embedding, documents) " - f"VALUES (%s, %s, %s) ON CONFLICT (id)\n" - f"DO UPDATE SET embedding = %s, documents = %s;\n" - ) - elif metadatas is not None: - for doc_id, metadata, document in zip(ids, metadatas, documents): - metadata = re.sub("'", '"', str(metadata)) - embedding = self.embedding_function(document) - sql_values.append((doc_id, metadata, embedding, document, metadata, document, embedding)) - sql_string = ( - f"INSERT INTO {self.name} (id, metadatas, embedding, documents)\n" - f"VALUES (%s, %s, %s, %s)\n" - f"ON CONFLICT (id)\n" - f"DO UPDATE SET metadatas = %s, documents = %s, embedding = %s;\n" - ) - else: - for doc_id, document in zip(ids, documents): - embedding = self.embedding_function(document) - sql_values.append((doc_id, document, embedding, document)) - sql_string = ( - f"INSERT INTO {self.name} (id, documents, embedding)\n" - f"VALUES (%s, %s, %s)\n" - f"ON CONFLICT (id)\n" - f"DO UPDATE SET documents = %s;\n" - ) - logger.debug(f"Upsert SQL String:\n{sql_string}\n{sql_values}") - cursor.executemany(sql_string, sql_values) - cursor.close() - - def count(self) -> int: - """ - Get the total number of documents in the collection. - - Returns: - int: The total number of documents. - """ - cursor = self.client.cursor() - query = f"SELECT COUNT(*) FROM {self.name}" - cursor.execute(query) - total = cursor.fetchone()[0] - cursor.close() - try: - total = int(total) - except (TypeError, ValueError): - total = None - return total - - def table_exists(self, table_name: str) -> bool: - """ - Check if a table exists in the PostgreSQL database. - - Args: - table_name (str): The name of the table to check. - - Returns: - bool: True if the table exists, False otherwise. - """ - - cursor = self.client.cursor() - cursor.execute( - """ - SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_name = %s - ) - """, - (table_name,), - ) - exists = cursor.fetchone()[0] - return exists - - def get( - self, - ids: Optional[str] = None, - include: Optional[str] = None, - where: Optional[str] = None, - limit: Optional[Union[int, str]] = None, - offset: Optional[Union[int, str]] = None, - ) -> List[Document]: - """ - Retrieve documents from the collection. - - Args: - ids (Optional[List]): A list of document IDs. - include (Optional): The fields to include. - where (Optional): Additional filtering criteria. - limit (Optional): The maximum number of documents to retrieve. - offset (Optional): The offset for pagination. - - Returns: - List: The retrieved documents. - """ - cursor = self.client.cursor() - - # Initialize variables for query components - select_clause = "SELECT id, metadatas, documents, embedding" - from_clause = f"FROM {self.name}" - where_clause = "" - limit_clause = "" - offset_clause = "" - - # Handle include clause - if include: - select_clause = f"SELECT id, {', '.join(include)}, embedding" - - # Handle where clause - if ids: - where_clause = f"WHERE id IN ({', '.join(['%s' for _ in ids])})" - elif where: - where_clause = f"WHERE {where}" - - # Handle limit and offset clauses - if limit: - limit_clause = "LIMIT %s" - if offset: - offset_clause = "OFFSET %s" - - # Construct the full query - query = f"{select_clause} {from_clause} {where_clause} {limit_clause} {offset_clause}" - retrieved_documents = [] - try: - # Execute the query with the appropriate values - if ids is not None: - cursor.execute(query, ids) - else: - query_params = [] - if limit: - query_params.append(limit) - if offset: - query_params.append(offset) - cursor.execute(query, query_params) - - retrieval = cursor.fetchall() - for retrieved_document in retrieval: - retrieved_documents.append( - Document( - id=retrieved_document[0].strip(), - metadata=retrieved_document[1], - content=retrieved_document[2], - embedding=retrieved_document[3], - ) - ) - except (psycopg.errors.UndefinedTable, psycopg.errors.UndefinedColumn) as e: - logger.info(f"Error executing select on non-existent table: {self.name}. Creating it instead. Error: {e}") - self.create_collection(collection_name=self.name, dimension=self.dimension) - logger.info(f"Created table {self.name}") - - cursor.close() - return retrieved_documents - - def update(self, ids: List, embeddings: List, metadatas: List, documents: List) -> None: - """ - Update documents in the collection. - - Args: - ids (List): A list of document IDs. - embeddings (List): A list of document embeddings. - metadatas (List): A list of document metadatas. - documents (List): A list of documents. - - Returns: - None - """ - cursor = self.client.cursor() - sql_values = [] - for doc_id, embedding, metadata, document in zip(ids, embeddings, metadatas, documents): - sql_values.append((doc_id, embedding, metadata, document, doc_id, embedding, metadata, document)) - sql_string = ( - f"INSERT INTO {self.name} (id, embedding, metadata, document) " - f"VALUES (%s, %s, %s, %s) " - f"ON CONFLICT (id) " - f"DO UPDATE SET id = %s, embedding = %s, " - f"metadata = %s, document = %s;\n" - ) - logger.debug(f"Upsert SQL String:\n{sql_string}\n") - cursor.executemany(sql_string, sql_values) - cursor.close() - - @staticmethod - def euclidean_distance(arr1: List[float], arr2: List[float]) -> float: - """ - Calculate the Euclidean distance between two vectors. - - Parameters: - - arr1 (List[float]): The first vector. - - arr2 (List[float]): The second vector. - - Returns: - - float: The Euclidean distance between arr1 and arr2. - """ - dist = np.linalg.norm(arr1 - arr2) - return dist - - @staticmethod - def cosine_distance(arr1: List[float], arr2: List[float]) -> float: - """ - Calculate the cosine distance between two vectors. - - Parameters: - - arr1 (List[float]): The first vector. - - arr2 (List[float]): The second vector. - - Returns: - - float: The cosine distance between arr1 and arr2. - """ - dist = np.dot(arr1, arr2) / (np.linalg.norm(arr1) * np.linalg.norm(arr2)) - return dist - - @staticmethod - def inner_product_distance(arr1: List[float], arr2: List[float]) -> float: - """ - Calculate the Euclidean distance between two vectors. - - Parameters: - - arr1 (List[float]): The first vector. - - arr2 (List[float]): The second vector. - - Returns: - - float: The Euclidean distance between arr1 and arr2. - """ - dist = np.linalg.norm(arr1 - arr2) - return dist - - def query( - self, - query_texts: List[str], - collection_name: Optional[str] = None, - n_results: Optional[int] = 10, - distance_type: Optional[str] = "euclidean", - distance_threshold: Optional[float] = -1, - include_embedding: Optional[bool] = False, - ) -> QueryResults: - """ - Query documents in the collection. - - Args: - query_texts (List[str]): A list of query texts. - collection_name (Optional[str]): The name of the collection. - n_results (int): The maximum number of results to return. - distance_type (Optional[str]): Distance search type - euclidean or cosine - distance_threshold (Optional[float]): Distance threshold to limit searches - include_embedding (Optional[bool]): Include embedding values in QueryResults - Returns: - QueryResults: The query results. - """ - if collection_name: - self.name = collection_name - - clause = "ORDER BY" - if distance_threshold == -1: - distance_threshold = "" - clause = "ORDER BY" - elif distance_threshold > 0: - distance_threshold = f"< {distance_threshold}" - clause = "WHERE" - - cursor = self.client.cursor() - results = [] - for query_text in query_texts: - vector = self.embedding_function(query_text) - - if distance_type.lower() == "cosine": - index_function = "<=>" - elif distance_type.lower() == "euclidean": - index_function = "<->" - elif distance_type.lower() == "inner-product": - index_function = "<#>" - else: - index_function = "<->" - query = ( - f"SELECT id, documents, embedding, metadatas " - f"FROM {self.name} " - f"{clause} embedding {index_function} '{str(vector)}' {distance_threshold} " - f"LIMIT {n_results}" - ) - cursor.execute(query) - result = [] - for row in cursor.fetchall(): - fetched_document = Document(id=row[0].strip(), content=row[1], embedding=row[2], metadata=row[3]) - fetched_document_array = self.convert_string_to_array(array_string=fetched_document.get("embedding")) - if distance_type.lower() == "cosine": - distance = self.cosine_distance(fetched_document_array, vector) - elif distance_type.lower() == "euclidean": - distance = self.euclidean_distance(fetched_document_array, vector) - elif distance_type.lower() == "inner-product": - distance = self.inner_product_distance(fetched_document_array, vector) - else: - distance = self.euclidean_distance(fetched_document_array, vector) - if not include_embedding: - fetched_document = Document(id=row[0].strip(), content=row[1], metadata=row[3]) - result.append((fetched_document, distance)) - results.append(result) - cursor.close() - logger.debug(f"Query Results: {results}") - return results - - @staticmethod - def convert_string_to_array(array_string: str) -> List[float]: - """ - Convert a string representation of an array to a list of floats. - - Parameters: - - array_string (str): The string representation of the array. - - Returns: - - list: A list of floats parsed from the input string. If the input is - not a string, it returns the input itself. - """ - if not isinstance(array_string, str): - return array_string - array_string = array_string.strip("[]") - array = [float(num) for num in array_string.split()] - return array - - def modify(self, metadata, collection_name: Optional[str] = None) -> None: - """ - Modify metadata for the collection. - - Args: - collection_name: The name of the collection. - metadata: The new metadata. - - Returns: - None - """ - if collection_name: - self.name = collection_name - cursor = self.client.cursor() - cursor.execute( - "UPDATE collections" "SET metadata = '%s'" "WHERE collection_name = '%s';", (metadata, self.name) - ) - cursor.close() - - def delete(self, ids: List[ItemID], collection_name: Optional[str] = None) -> None: - """ - Delete documents from the collection. - - Args: - ids (List[ItemID]): A list of document IDs to delete. - collection_name (str): The name of the collection to delete. - - Returns: - None - """ - if collection_name: - self.name = collection_name - cursor = self.client.cursor() - id_placeholders = ", ".join(["%s" for _ in ids]) - cursor.execute(f"DELETE FROM {self.name} WHERE id IN ({id_placeholders});", ids) - cursor.close() - - def delete_collection(self, collection_name: Optional[str] = None) -> None: - """ - Delete the entire collection. - - Args: - collection_name (Optional[str]): The name of the collection to delete. - - Returns: - None - """ - if collection_name: - self.name = collection_name - cursor = self.client.cursor() - cursor.execute(f"DROP TABLE IF EXISTS {self.name}") - cursor.close() - - def create_collection( - self, collection_name: Optional[str] = None, dimension: Optional[Union[str, int]] = None - ) -> None: - """ - Create a new collection. - - Args: - collection_name (Optional[str]): The name of the new collection. - dimension (Optional[Union[str, int]]): The dimension size of the sentence embedding model - - Returns: - None - """ - if collection_name: - self.name = collection_name - - if dimension: - self.dimension = dimension - elif self.dimension is None: - self.dimension = 384 - - cursor = self.client.cursor() - cursor.execute( - f"CREATE TABLE {self.name} (" - f"documents text, id CHAR(8) PRIMARY KEY, metadatas JSONB, embedding vector({self.dimension}));" - f"CREATE INDEX " - f'ON {self.name} USING hnsw (embedding vector_l2_ops) WITH (m = {self.metadata["hnsw:M"]}, ' - f'ef_construction = {self.metadata["hnsw:construction_ef"]});' - f"CREATE INDEX " - f'ON {self.name} USING hnsw (embedding vector_cosine_ops) WITH (m = {self.metadata["hnsw:M"]}, ' - f'ef_construction = {self.metadata["hnsw:construction_ef"]});' - f"CREATE INDEX " - f'ON {self.name} USING hnsw (embedding vector_ip_ops) WITH (m = {self.metadata["hnsw:M"]}, ' - f'ef_construction = {self.metadata["hnsw:construction_ef"]});' - ) - cursor.close() - - -class PGVectorDB(VectorDB): - """ - A vector database that uses PGVector as the backend. - """ - - def __init__( - self, - *, - conn: Optional[psycopg.Connection] = None, - connection_string: Optional[str] = None, - host: Optional[str] = None, - port: Optional[Union[int, str]] = None, - dbname: Optional[str] = None, - username: Optional[str] = None, - password: Optional[str] = None, - connect_timeout: Optional[int] = 10, - embedding_function: Callable = None, - metadata: Optional[dict] = None, - ) -> None: - """ - Initialize the vector database. - - Note: connection_string or host + port + dbname must be specified - - Args: - conn: psycopg.Connection | A customer connection object to connect to the database. - A connection object may include additional key/values: - https://www.postgresql.org/docs/current/libpq-connect.html#LIBPQ-CONNSTRING - connection_string: "postgresql://username:password@hostname:port/database" | The PGVector connection string. Default is None. - host: str | The host to connect to. Default is None. - port: int | The port to connect to. Default is None. - dbname: str | The database name to connect to. Default is None. - username: str | The database username to use. Default is None. - password: str | The database user password to use. Default is None. - connect_timeout: int | The timeout to set for the connection. Default is 10. - embedding_function: Callable | The embedding function used to generate the vector representation. - Default is None. SentenceTransformer("all-MiniLM-L6-v2").encode will be used when None. - Models can be chosen from: - https://huggingface.co/models?library=sentence-transformers - metadata: dict | The metadata of the vector database. Default is None. If None, it will use this - setting: {"hnsw:space": "ip", "hnsw:construction_ef": 30, "hnsw:M": 16}. Creates Index on table - using hnsw (embedding vector_l2_ops) WITH (m = hnsw:M) ef_construction = "hnsw:construction_ef". - For more info: https://github.com/pgvector/pgvector?tab=readme-ov-file#hnsw - Returns: - None - """ - self.client = self.establish_connection( - conn=conn, - connection_string=connection_string, - host=host, - port=port, - dbname=dbname, - username=username, - password=password, - connect_timeout=connect_timeout, - ) - if embedding_function: - self.embedding_function = embedding_function - else: - self.embedding_function = lambda s: SentenceTransformer("all-MiniLM-L6-v2").encode(s).tolist() - self.metadata = metadata - register_vector(self.client) - self.active_collection = None - - def establish_connection( - self, - conn: Optional[psycopg.Connection] = None, - connection_string: Optional[str] = None, - host: Optional[str] = None, - port: Optional[Union[int, str]] = None, - dbname: Optional[str] = None, - username: Optional[str] = None, - password: Optional[str] = None, - connect_timeout: Optional[int] = 10, - ) -> psycopg.Connection: - """ - Establishes a connection to a PostgreSQL database using psycopg. - - Args: - conn: An existing psycopg connection object. If provided, this connection will be used. - connection_string: A string containing the connection information. If provided, a new connection will be established using this string. - host: The hostname of the PostgreSQL server. Used if connection_string is not provided. - port: The port number to connect to at the server host. Used if connection_string is not provided. - dbname: The database name. Used if connection_string is not provided. - username: The username to connect as. Used if connection_string is not provided. - password: The user's password. Used if connection_string is not provided. - connect_timeout: Maximum wait for connection, in seconds. The default is 10 seconds. - - Returns: - A psycopg.Connection object representing the established connection. - - Raises: - PermissionError if no credentials are supplied - psycopg.Error: If an error occurs while trying to connect to the database. - """ - try: - if conn: - self.client = conn - elif connection_string: - parsed_connection = urllib.parse.urlparse(connection_string) - encoded_username = urllib.parse.quote(parsed_connection.username, safe="") - encoded_password = urllib.parse.quote(parsed_connection.password, safe="") - encoded_password = f":{encoded_password}@" - encoded_host = urllib.parse.quote(parsed_connection.hostname, safe="") - encoded_port = f":{parsed_connection.port}" - encoded_database = urllib.parse.quote(parsed_connection.path[1:], safe="") - connection_string_encoded = ( - f"{parsed_connection.scheme}://{encoded_username}{encoded_password}" - f"{encoded_host}{encoded_port}/{encoded_database}" - ) - self.client = psycopg.connect(conninfo=connection_string_encoded, autocommit=True) - elif host: - connection_string = "" - if host: - encoded_host = urllib.parse.quote(host, safe="") - connection_string += f"host={encoded_host} " - if port: - connection_string += f"port={port} " - if dbname: - encoded_database = urllib.parse.quote(dbname, safe="") - connection_string += f"dbname={encoded_database} " - if username: - encoded_username = urllib.parse.quote(username, safe="") - connection_string += f"user={encoded_username} " - if password: - encoded_password = urllib.parse.quote(password, safe="") - connection_string += f"password={encoded_password} " - - self.client = psycopg.connect( - conninfo=connection_string, - connect_timeout=connect_timeout, - autocommit=True, - ) - else: - logger.error("Credentials were not supplied...") - raise PermissionError - self.client.execute("CREATE EXTENSION IF NOT EXISTS vector") - except psycopg.Error as e: - logger.error("Error connecting to the database: ", e) - raise e - return self.client - - def create_collection( - self, collection_name: str, overwrite: bool = False, get_or_create: bool = True - ) -> Collection: - """ - Create a collection in the vector database. - Case 1. if the collection does not exist, create the collection. - Case 2. the collection exists, if overwrite is True, it will overwrite the collection. - Case 3. the collection exists and overwrite is False, if get_or_create is True, it will get the collection, - otherwise it raise a ValueError. - - Args: - collection_name: str | The name of the collection. - overwrite: bool | Whether to overwrite the collection if it exists. Default is False. - get_or_create: bool | Whether to get the collection if it exists. Default is True. - - Returns: - Collection | The collection object. - """ - try: - if self.active_collection and self.active_collection.name == collection_name: - collection = self.active_collection - else: - collection = self.get_collection(collection_name) - except ValueError: - collection = None - if collection is None: - collection = Collection( - client=self.client, - collection_name=collection_name, - embedding_function=self.embedding_function, - get_or_create=get_or_create, - metadata=self.metadata, - ) - collection.set_collection_name(collection_name=collection_name) - collection.create_collection(collection_name=collection_name) - return collection - elif overwrite: - self.delete_collection(collection_name) - collection = Collection( - client=self.client, - collection_name=collection_name, - embedding_function=self.embedding_function, - get_or_create=get_or_create, - metadata=self.metadata, - ) - collection.set_collection_name(collection_name=collection_name) - collection.create_collection(collection_name=collection_name) - return collection - elif get_or_create: - return collection - elif not collection.table_exists(table_name=collection_name): - collection = Collection( - client=self.client, - collection_name=collection_name, - embedding_function=self.embedding_function, - get_or_create=get_or_create, - metadata=self.metadata, - ) - collection.set_collection_name(collection_name=collection_name) - collection.create_collection(collection_name=collection_name) - return collection - else: - raise ValueError(f"Collection {collection_name} already exists.") - - def get_collection(self, collection_name: str = None) -> Collection: - """ - Get the collection from the vector database. - - Args: - collection_name: str | The name of the collection. Default is None. If None, return the - current active collection. - - Returns: - Collection | The collection object. - """ - if collection_name is None: - if self.active_collection is None: - raise ValueError("No collection is specified.") - else: - logger.debug( - f"No collection is specified. Using current active collection {self.active_collection.name}." - ) - else: - if not (self.active_collection and self.active_collection.name == collection_name): - self.active_collection = Collection( - client=self.client, - collection_name=collection_name, - embedding_function=self.embedding_function, - ) - return self.active_collection - - def delete_collection(self, collection_name: str) -> None: - """ - Delete the collection from the vector database. - - Args: - collection_name: str | The name of the collection. - - Returns: - None - """ - if self.active_collection: - self.active_collection.delete_collection(collection_name) - else: - collection = self.get_collection(collection_name) - collection.delete_collection(collection_name) - if self.active_collection and self.active_collection.name == collection_name: - self.active_collection = None - - def _batch_insert( - self, collection: Collection, embeddings=None, ids=None, metadatas=None, documents=None, upsert=False - ) -> None: - batch_size = int(PGVECTOR_MAX_BATCH_SIZE) - default_metadata = {"hnsw:space": "ip", "hnsw:construction_ef": 32, "hnsw:M": 16} - default_metadatas = [default_metadata] * min(batch_size, len(documents)) - for i in range(0, len(documents), min(batch_size, len(documents))): - end_idx = i + min(batch_size, len(documents) - i) - collection_kwargs = { - "documents": documents[i:end_idx], - "ids": ids[i:end_idx], - "metadatas": metadatas[i:end_idx] if metadatas else default_metadatas, - "embeddings": embeddings[i:end_idx] if embeddings else None, - } - if upsert: - collection.upsert(**collection_kwargs) - else: - collection.add(**collection_kwargs) - - def insert_docs(self, docs: List[Document], collection_name: str = None, upsert: bool = False) -> None: - """ - Insert documents into the collection of the vector database. - - Args: - docs: List[Document] | A list of documents. Each document is a TypedDict `Document`. - collection_name: str | The name of the collection. Default is None. - upsert: bool | Whether to update the document if it exists. Default is False. - kwargs: Dict | Additional keyword arguments. - - Returns: - None - """ - if not docs: - return - if docs[0].get("content") is None: - raise ValueError("The document content is required.") - if docs[0].get("id") is None: - raise ValueError("The document id is required.") - documents = [doc.get("content") for doc in docs] - ids = [doc.get("id") for doc in docs] - - collection = self.get_collection(collection_name) - if docs[0].get("embedding") is None: - logger.debug( - "No content embedding is provided. " - "Will use the VectorDB's embedding function to generate the content embedding." - ) - embeddings = None - else: - embeddings = [doc.get("embedding") for doc in docs] - if docs[0].get("metadata") is None: - metadatas = None - else: - metadatas = [doc.get("metadata") for doc in docs] - - self._batch_insert(collection, embeddings, ids, metadatas, documents, upsert) - - def update_docs(self, docs: List[Document], collection_name: str = None) -> None: - """ - Update documents in the collection of the vector database. - - Args: - docs: List[Document] | A list of documents. - collection_name: str | The name of the collection. Default is None. - - Returns: - None - """ - self.insert_docs(docs, collection_name, upsert=True) - - def delete_docs(self, ids: List[ItemID], collection_name: str = None) -> None: - """ - Delete documents from the collection of the vector database. - - Args: - ids: List[ItemID] | A list of document ids. Each id is a typed `ItemID`. - collection_name: str | The name of the collection. Default is None. - kwargs: Dict | Additional keyword arguments. - - Returns: - None - """ - collection = self.get_collection(collection_name) - collection.delete(ids=ids, collection_name=collection_name) - - def retrieve_docs( - self, - queries: List[str], - collection_name: str = None, - n_results: int = 10, - distance_threshold: float = -1, - ) -> QueryResults: - """ - Retrieve documents from the collection of the vector database based on the queries. - - Args: - queries: List[str] | A list of queries. Each query is a string. - collection_name: str | The name of the collection. Default is None. - n_results: int | The number of relevant documents to return. Default is 10. - distance_threshold: float | The threshold for the distance score, only distance smaller than it will be - returned. Don't filter with it if < 0. Default is -1. - kwargs: Dict | Additional keyword arguments. - - Returns: - QueryResults | The query results. Each query result is a list of list of tuples containing the document and - the distance. - """ - collection = self.get_collection(collection_name) - if isinstance(queries, str): - queries = [queries] - results = collection.query( - query_texts=queries, - n_results=n_results, - distance_threshold=distance_threshold, - ) - logger.debug(f"Retrieve Docs Results:\n{results}") - return results - - def get_docs_by_ids( - self, ids: List[ItemID] = None, collection_name: str = None, include=None, **kwargs - ) -> List[Document]: - """ - Retrieve documents from the collection of the vector database based on the ids. - - Args: - ids: List[ItemID] | A list of document ids. If None, will return all the documents. Default is None. - collection_name: str | The name of the collection. Default is None. - include: List[str] | The fields to include. Default is None. - If None, will include ["metadatas", "documents"], ids will always be included. - kwargs: dict | Additional keyword arguments. - - Returns: - List[Document] | The results. - """ - collection = self.get_collection(collection_name) - include = include if include else ["metadatas", "documents"] - results = collection.get(ids, include=include, **kwargs) - logger.debug(f"Retrieve Documents by ID Results:\n{results}") - return results diff --git a/autogen/agentchat/contrib/vectordb/qdrant.py b/autogen/agentchat/contrib/vectordb/qdrant.py deleted file mode 100644 index 2c5194a9f73f..000000000000 --- a/autogen/agentchat/contrib/vectordb/qdrant.py +++ /dev/null @@ -1,328 +0,0 @@ -import abc -import logging -import os -from typing import Callable, List, Optional, Sequence, Tuple, Union - -from .base import Document, ItemID, QueryResults, VectorDB -from .utils import get_logger - -try: - from qdrant_client import QdrantClient, models -except ImportError: - raise ImportError("Please install qdrant-client: `pip install qdrant-client`") - -logger = get_logger(__name__) - -Embeddings = Union[Sequence[float], Sequence[int]] - - -class EmbeddingFunction(abc.ABC): - @abc.abstractmethod - def __call__(self, inputs: List[str]) -> List[Embeddings]: - raise NotImplementedError - - -class FastEmbedEmbeddingFunction(EmbeddingFunction): - """Embedding function implementation using FastEmbed - https://qdrant.github.io/fastembed.""" - - def __init__( - self, - model_name: str = "BAAI/bge-small-en-v1.5", - batch_size: int = 256, - cache_dir: Optional[str] = None, - threads: Optional[int] = None, - parallel: Optional[int] = None, - **kwargs, - ): - """Initialize fastembed.TextEmbedding. - - Args: - model_name (str): The name of the model to use. Defaults to `"BAAI/bge-small-en-v1.5"`. - batch_size (int): Batch size for encoding. Higher values will use more memory, but be faster.\ - Defaults to 256. - cache_dir (str, optional): The path to the model cache directory.\ - Can also be set using the `FASTEMBED_CACHE_PATH` env variable. - threads (int, optional): The number of threads single onnxruntime session can use. - parallel (int, optional): If `>1`, data-parallel encoding will be used, recommended for large datasets.\ - If `0`, use all available cores.\ - If `None`, don't use data-parallel processing, use default onnxruntime threading.\ - Defaults to None. - **kwargs: Additional options to pass to fastembed.TextEmbedding - Raises: - ValueError: If the model_name is not in the format / e.g. BAAI/bge-small-en-v1.5. - """ - try: - from fastembed import TextEmbedding - except ImportError as e: - raise ValueError( - "The 'fastembed' package is not installed. Please install it with `pip install fastembed`", - ) from e - self._batch_size = batch_size - self._parallel = parallel - self._model = TextEmbedding(model_name=model_name, cache_dir=cache_dir, threads=threads, **kwargs) - - def __call__(self, inputs: List[str]) -> List[Embeddings]: - embeddings = self._model.embed(inputs, batch_size=self._batch_size, parallel=self._parallel) - - return [embedding.tolist() for embedding in embeddings] - - -class QdrantVectorDB(VectorDB): - """ - A vector database implementation that uses Qdrant as the backend. - """ - - def __init__( - self, - *, - client=None, - embedding_function: EmbeddingFunction = None, - content_payload_key: str = "_content", - metadata_payload_key: str = "_metadata", - collection_options: dict = {}, - **kwargs, - ) -> None: - """ - Initialize the vector database. - - Args: - client: qdrant_client.QdrantClient | An instance of QdrantClient. - embedding_function: Callable | The embedding function used to generate the vector representation - of the documents. Defaults to FastEmbedEmbeddingFunction. - collection_options: dict | The options for creating the collection. - kwargs: dict | Additional keyword arguments. - """ - self.client: QdrantClient = client or QdrantClient(location=":memory:") - self.embedding_function = embedding_function or FastEmbedEmbeddingFunction() - self.collection_options = collection_options - self.content_payload_key = content_payload_key - self.metadata_payload_key = metadata_payload_key - self.type = "qdrant" - - def create_collection(self, collection_name: str, overwrite: bool = False, get_or_create: bool = True) -> None: - """ - Create a collection in the vector database. - Case 1. if the collection does not exist, create the collection. - Case 2. the collection exists, if overwrite is True, it will overwrite the collection. - Case 3. the collection exists and overwrite is False, if get_or_create is True, it will get the collection, - otherwise it raise a ValueError. - - Args: - collection_name: str | The name of the collection. - overwrite: bool | Whether to overwrite the collection if it exists. Default is False. - get_or_create: bool | Whether to get the collection if it exists. Default is True. - - Returns: - Any | The collection object. - """ - embeddings_size = len(self.embedding_function(["test"])[0]) - - if self.client.collection_exists(collection_name) and overwrite: - self.client.delete_collection(collection_name) - - if not self.client.collection_exists(collection_name): - self.client.create_collection( - collection_name, - vectors_config=models.VectorParams(size=embeddings_size, distance=models.Distance.COSINE), - **self.collection_options, - ) - elif not get_or_create: - raise ValueError(f"Collection {collection_name} already exists.") - - def get_collection(self, collection_name: Optional[str] = None): - """ - Get the collection from the vector database. - - Args: - collection_name: str | The name of the collection. - - Returns: - Any | The collection object. - """ - if collection_name is None: - raise ValueError("The collection name is required.") - - return self.client.get_collection(collection_name) - - def delete_collection(self, collection_name: str) -> None: - """Delete the collection from the vector database. - - Args: - collection_name: str | The name of the collection. - - Returns: - Any - """ - return self.client.delete_collection(collection_name) - - def insert_docs(self, docs: List[Document], collection_name: str = None, upsert: bool = False) -> None: - """ - Insert documents into the collection of the vector database. - - Args: - docs: List[Document] | A list of documents. Each document is a TypedDict `Document`. - collection_name: str | The name of the collection. Default is None. - upsert: bool | Whether to update the document if it exists. Default is False. - kwargs: Dict | Additional keyword arguments. - - Returns: - None - """ - if not docs: - return - if any(doc.get("content") is None for doc in docs): - raise ValueError("The document content is required.") - if any(doc.get("id") is None for doc in docs): - raise ValueError("The document id is required.") - - if not upsert and not self._validate_upsert_ids(collection_name, [doc["id"] for doc in docs]): - logger.log("Some IDs already exist. Skipping insert", level=logging.WARN) - - self.client.upsert(collection_name, points=self._documents_to_points(docs)) - - def update_docs(self, docs: List[Document], collection_name: str = None) -> None: - if not docs: - return - if any(doc.get("id") is None for doc in docs): - raise ValueError("The document id is required.") - if any(doc.get("content") is None for doc in docs): - raise ValueError("The document content is required.") - if self._validate_update_ids(collection_name, [doc["id"] for doc in docs]): - return self.client.upsert(collection_name, points=self._documents_to_points(docs)) - - raise ValueError("Some IDs do not exist. Skipping update") - - def delete_docs(self, ids: List[ItemID], collection_name: str = None, **kwargs) -> None: - """ - Delete documents from the collection of the vector database. - - Args: - ids: List[ItemID] | A list of document ids. Each id is a typed `ItemID`. - collection_name: str | The name of the collection. Default is None. - kwargs: Dict | Additional keyword arguments. - - Returns: - None - """ - self.client.delete(collection_name, ids) - - def retrieve_docs( - self, - queries: List[str], - collection_name: str = None, - n_results: int = 10, - distance_threshold: float = 0, - **kwargs, - ) -> QueryResults: - """ - Retrieve documents from the collection of the vector database based on the queries. - - Args: - queries: List[str] | A list of queries. Each query is a string. - collection_name: str | The name of the collection. Default is None. - n_results: int | The number of relevant documents to return. Default is 10. - distance_threshold: float | The threshold for the distance score, only distance smaller than it will be - returned. Don't filter with it if < 0. Default is 0. - kwargs: Dict | Additional keyword arguments. - - Returns: - QueryResults | The query results. Each query result is a list of list of tuples containing the document and - the distance. - """ - embeddings = self.embedding_function(queries) - requests = [ - models.QueryRequest( - query=embedding, - limit=n_results, - score_threshold=distance_threshold, - with_payload=True, - with_vector=False, - ) - for embedding in embeddings - ] - - batch_results = self.client.query_batch_points(collection_name, requests) - return [self._scored_points_to_documents(results.points) for results in batch_results] - - def get_docs_by_ids( - self, ids: List[ItemID] = None, collection_name: str = None, include=True, **kwargs - ) -> List[Document]: - """ - Retrieve documents from the collection of the vector database based on the ids. - - Args: - ids: List[ItemID] | A list of document ids. If None, will return all the documents. Default is None. - collection_name: str | The name of the collection. Default is None. - include: List[str] | The fields to include. Default is True. - If None, will include ["metadatas", "documents"], ids will always be included. - kwargs: dict | Additional keyword arguments. - - Returns: - List[Document] | The results. - """ - if ids is None: - results = self.client.scroll(collection_name=collection_name, with_payload=include, with_vectors=True)[0] - else: - results = self.client.retrieve(collection_name, ids=ids, with_payload=include, with_vectors=True) - return [self._point_to_document(result) for result in results] - - def _point_to_document(self, point) -> Document: - return { - "id": point.id, - "content": point.payload.get(self.content_payload_key, ""), - "metadata": point.payload.get(self.metadata_payload_key, {}), - "embedding": point.vector, - } - - def _points_to_documents(self, points) -> List[Document]: - return [self._point_to_document(point) for point in points] - - def _scored_point_to_document(self, scored_point: models.ScoredPoint) -> Tuple[Document, float]: - return self._point_to_document(scored_point), scored_point.score - - def _documents_to_points(self, documents: List[Document]): - contents = [document["content"] for document in documents] - embeddings = self.embedding_function(contents) - points = [ - models.PointStruct( - id=documents[i]["id"], - vector=embeddings[i], - payload={ - self.content_payload_key: documents[i].get("content"), - self.metadata_payload_key: documents[i].get("metadata"), - }, - ) - for i in range(len(documents)) - ] - return points - - def _scored_points_to_documents(self, scored_points: List[models.ScoredPoint]) -> List[Tuple[Document, float]]: - return [self._scored_point_to_document(scored_point) for scored_point in scored_points] - - def _validate_update_ids(self, collection_name: str, ids: List[str]) -> bool: - """ - Validates all the IDs exist in the collection - """ - retrieved_ids = [ - point.id for point in self.client.retrieve(collection_name, ids=ids, with_payload=False, with_vectors=False) - ] - - if missing_ids := set(ids) - set(retrieved_ids): - logger.log(f"Missing IDs: {missing_ids}. Skipping update", level=logging.WARN) - return False - - return True - - def _validate_upsert_ids(self, collection_name: str, ids: List[str]) -> bool: - """ - Validate none of the IDs exist in the collection - """ - retrieved_ids = [ - point.id for point in self.client.retrieve(collection_name, ids=ids, with_payload=False, with_vectors=False) - ] - - if existing_ids := set(ids) & set(retrieved_ids): - logger.log(f"Existing IDs: {existing_ids}.", level=logging.WARN) - return False - - return True diff --git a/autogen/agentchat/contrib/vectordb/utils.py b/autogen/agentchat/contrib/vectordb/utils.py deleted file mode 100644 index 7812f2186541..000000000000 --- a/autogen/agentchat/contrib/vectordb/utils.py +++ /dev/null @@ -1,120 +0,0 @@ -import logging -from typing import Any, Dict, List - -from termcolor import colored - -from .base import QueryResults - - -class ColoredLogger(logging.Logger): - def __init__(self, name, level=logging.NOTSET): - super().__init__(name, level) - - def debug(self, msg, *args, color=None, **kwargs): - super().debug(colored(msg, color), *args, **kwargs) - - def info(self, msg, *args, color=None, **kwargs): - super().info(colored(msg, color), *args, **kwargs) - - def warning(self, msg, *args, color="yellow", **kwargs): - super().warning(colored(msg, color), *args, **kwargs) - - def error(self, msg, *args, color="light_red", **kwargs): - super().error(colored(msg, color), *args, **kwargs) - - def critical(self, msg, *args, color="red", **kwargs): - super().critical(colored(msg, color), *args, **kwargs) - - def fatal(self, msg, *args, color="red", **kwargs): - super().fatal(colored(msg, color), *args, **kwargs) - - -def get_logger(name: str, level: int = logging.INFO) -> ColoredLogger: - logger = ColoredLogger(name, level) - console_handler = logging.StreamHandler() - logger.addHandler(console_handler) - formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s") - logger.handlers[0].setFormatter(formatter) - return logger - - -logger = get_logger(__name__) - - -def filter_results_by_distance(results: QueryResults, distance_threshold: float = -1) -> QueryResults: - """Filters results based on a distance threshold. - - Args: - results: QueryResults | The query results. List[List[Tuple[Document, float]]] - distance_threshold: The maximum distance allowed for results. - - Returns: - QueryResults | A filtered results containing only distances smaller than the threshold. - """ - - if distance_threshold > 0: - results = [[(key, value) for key, value in data if value < distance_threshold] for data in results] - - return results - - -def chroma_results_to_query_results(data_dict: Dict[str, List[List[Any]]], special_key="distances") -> QueryResults: - """Converts a dictionary with list-of-list values to a list of tuples. - - Args: - data_dict: A dictionary where keys map to lists of lists or None. - special_key: The key in the dictionary containing the special values - for each tuple. - - Returns: - A list of tuples, where each tuple contains a sub-dictionary with - some keys from the original dictionary and the value from the - special_key. - - Example: - data_dict = { - "key1s": [[1, 2, 3], [4, 5, 6], [7, 8, 9]], - "key2s": [["a", "b", "c"], ["c", "d", "e"], ["e", "f", "g"]], - "key3s": None, - "key4s": [["x", "y", "z"], ["1", "2", "3"], ["4", "5", "6"]], - "distances": [[0.1, 0.2, 0.3], [0.4, 0.5, 0.6], [0.7, 0.8, 0.9]], - } - - results = [ - [ - ({"key1": 1, "key2": "a", "key4": "x"}, 0.1), - ({"key1": 2, "key2": "b", "key4": "y"}, 0.2), - ({"key1": 3, "key2": "c", "key4": "z"}, 0.3), - ], - [ - ({"key1": 4, "key2": "c", "key4": "1"}, 0.4), - ({"key1": 5, "key2": "d", "key4": "2"}, 0.5), - ({"key1": 6, "key2": "e", "key4": "3"}, 0.6), - ], - [ - ({"key1": 7, "key2": "e", "key4": "4"}, 0.7), - ({"key1": 8, "key2": "f", "key4": "5"}, 0.8), - ({"key1": 9, "key2": "g", "key4": "6"}, 0.9), - ], - ] - """ - - keys = [ - key - for key in data_dict - if key != special_key and data_dict[key] is not None and isinstance(data_dict[key][0], list) - ] - result = [] - data_special_key = data_dict[special_key] - - for i in range(len(data_special_key)): - sub_result = [] - for j, distance in enumerate(data_special_key[i]): - sub_dict = {} - for key in keys: - if len(data_dict[key]) > i: - sub_dict[key[:-1]] = data_dict[key][i][j] # remove 's' in the end from key - sub_result.append((sub_dict, distance)) - result.append(sub_result) - - return result diff --git a/autogen/agentchat/contrib/web_surfer.py b/autogen/agentchat/contrib/web_surfer.py deleted file mode 100644 index 1a2dd2e3236b..000000000000 --- a/autogen/agentchat/contrib/web_surfer.py +++ /dev/null @@ -1,370 +0,0 @@ -import copy -import logging -import re -from dataclasses import dataclass -from typing import Any, Callable, Dict, List, Literal, Optional, Tuple, Union - -from typing_extensions import Annotated - -from ... import Agent, AssistantAgent, ConversableAgent, GroupChat, GroupChatManager, OpenAIWrapper, UserProxyAgent -from ...browser_utils import AbstractMarkdownBrowser, BingMarkdownSearch, RequestsMarkdownBrowser -from ...code_utils import content_str -from ...oai.openai_utils import filter_config -from ...token_count_utils import count_token, get_max_token_limit - -logger = logging.getLogger(__name__) - - -class WebSurferAgent(ConversableAgent): - """(In preview) An agent that acts as a basic web surfer that can search the web and visit web pages.""" - - DEFAULT_PROMPT = "You are a helpful AI assistant with access to a web browser (via the provided functions). In fact, YOU ARE THE ONLY MEMBER OF YOUR PARTY WITH ACCESS TO A WEB BROWSER, so please help out where you can by performing web searches, navigating pages, and reporting what you find." - - DEFAULT_DESCRIPTION = "A helpful assistant with access to a web browser. Ask them to perform web searches, open pages, navigate to Wikipedia, download files, etc. Once on a desired page, ask them to answer questions by reading the page, generate summaries, find specific words or phrases on the page (ctrl+f), or even just scroll up or down in the viewport." - - def __init__( - self, - name: str, - system_message: Optional[Union[str, List[str]]] = DEFAULT_PROMPT, - description: Optional[str] = DEFAULT_DESCRIPTION, - is_termination_msg: Optional[Callable[[Dict[str, Any]], bool]] = None, - max_consecutive_auto_reply: Optional[int] = None, - human_input_mode: Literal["ALWAYS", "NEVER", "TERMINATE"] = "TERMINATE", - function_map: Optional[Dict[str, Callable]] = None, - code_execution_config: Union[Dict, Literal[False]] = False, - llm_config: Optional[Union[Dict, Literal[False]]] = None, - summarizer_llm_config: Optional[Union[Dict, Literal[False]]] = None, - default_auto_reply: Optional[Union[str, Dict, None]] = "", - browser_config: Optional[Union[Dict, None]] = None, # Deprecated - browser: Optional[Union[AbstractMarkdownBrowser, None]] = None, - **kwargs, - ): - super().__init__( - name=name, - system_message=system_message, - description=description, - is_termination_msg=is_termination_msg, - max_consecutive_auto_reply=max_consecutive_auto_reply, - human_input_mode=human_input_mode, - function_map=function_map, - code_execution_config=code_execution_config, - llm_config=llm_config, - default_auto_reply=default_auto_reply, - **kwargs, - ) - - self._create_summarizer_client(summarizer_llm_config, llm_config) - - # Create the browser - if browser_config is not None: - if browser is not None: - raise ValueError( - "WebSurferAgent cannot accept both a 'browser_config' (deprecated) parameter and 'browser' parameter at the same time. Use only one or the other." - ) - - # Print a warning - logger.warning( - "Warning: the parameter 'browser_config' in WebSurferAgent.__init__() is deprecated. Use 'browser' instead." - ) - - # Update the settings to the new format - _bconfig = {} - _bconfig.update(browser_config) - - if "bing_api_key" in _bconfig: - _bconfig["search_engine"] = BingMarkdownSearch( - bing_api_key=_bconfig["bing_api_key"], interleave_results=False - ) - del _bconfig["bing_api_key"] - else: - _bconfig["search_engine"] = BingMarkdownSearch() - - if "request_kwargs" in _bconfig: - _bconfig["requests_get_kwargs"] = _bconfig["request_kwargs"] - del _bconfig["request_kwargs"] - - self.browser = RequestsMarkdownBrowser(**_bconfig) - else: - self.browser = browser - - # Set up the inner monologue - inner_llm_config = copy.deepcopy(llm_config) - self._assistant = AssistantAgent( - self.name + "_inner_assistant", - system_message=system_message, # type: ignore[arg-type] - llm_config=inner_llm_config, - is_termination_msg=lambda m: False, - ) - - self._user_proxy = UserProxyAgent( - self.name + "_inner_user_proxy", - human_input_mode="NEVER", - code_execution_config=False, - default_auto_reply="", - is_termination_msg=lambda m: False, - ) - - if inner_llm_config not in [None, False]: - self._register_functions() - - self.register_reply([Agent, None], WebSurferAgent.generate_surfer_reply, remove_other_reply_funcs=True) - self.register_reply([Agent, None], ConversableAgent.generate_code_execution_reply) - self.register_reply([Agent, None], ConversableAgent.generate_function_call_reply) - self.register_reply([Agent, None], ConversableAgent.check_termination_and_human_reply) - - def _create_summarizer_client(self, summarizer_llm_config: Dict[str, Any], llm_config: Dict[str, Any]) -> None: - # If the summarizer_llm_config is None, we copy it from the llm_config - if summarizer_llm_config is None: - if llm_config is None: # Nothing to copy - self.summarizer_llm_config = None - elif llm_config is False: # LLMs disabled - self.summarizer_llm_config = False - else: # Create a suitable config - self.summarizer_llm_config = copy.deepcopy(llm_config) # type: ignore[assignment] - if "config_list" in self.summarizer_llm_config: # type: ignore[operator] - preferred_models = filter_config( # type: ignore[no-untyped-call] - self.summarizer_llm_config["config_list"], # type: ignore[index] - {"model": ["gpt-3.5-turbo-1106", "gpt-3.5-turbo-16k-0613", "gpt-3.5-turbo-16k"]}, - ) - if len(preferred_models) == 0: - logger.warning( - "The summarizer did not find the preferred model (gpt-3.5-turbo-16k) in the config list. " - "Semantic operations on webpages (summarization or Q&A) might be costly or ineffective." - ) - else: - self.summarizer_llm_config["config_list"] = preferred_models # type: ignore[index] - else: - self.summarizer_llm_config = summarizer_llm_config # type: ignore[assignment] - - # Create the summarizer client - self.summarization_client = ( - None if self.summarizer_llm_config is False else OpenAIWrapper(**self.summarizer_llm_config) - ) # type: ignore[arg-type] - - def _register_functions(self) -> None: - """Register the functions for the inner assistant and user proxy.""" - - # Helper functions - def _browser_state() -> Tuple[str, str]: - header = f"Address: {self.browser.address}\n" - if self.browser.page_title is not None: - header += f"Title: {self.browser.page_title}\n" - - current_page = self.browser.viewport_current_page - total_pages = len(self.browser.viewport_pages) - - header += f"Viewport position: Showing page {current_page+1} of {total_pages}.\n" - - return (header, self.browser.viewport) - - @self._user_proxy.register_for_execution() - @self._assistant.register_for_llm( - name="informational_web_search", - description="Perform an INFORMATIONAL web search query then return the search results.", - ) - def _informational_search(query: Annotated[str, "The informational web search query to perform."]) -> str: - self.browser.visit_page(f"search: {query}") - header, content = _browser_state() - return header.strip() + "\n=======================\n" + content - - @self._user_proxy.register_for_execution() - @self._assistant.register_for_llm( - name="navigational_web_search", - description="Perform a NAVIGATIONAL web search query then immediately navigate to the top result. Useful, for example, to navigate to a particular Wikipedia article or other known destination. Equivalent to Google's \"I'm Feeling Lucky\" button.", - ) - def _navigational_search(query: Annotated[str, "The navigational web search query to perform."]) -> str: - self.browser.visit_page(f"search: {query}") - - # Extract the first link - m = re.search(r"\[.*?\]\((http.*?)\)", self.browser.page_content) - if m: - self.browser.visit_page(m.group(1)) - - # Return where we ended up - header, content = _browser_state() - return header.strip() + "\n=======================\n" + content - - @self._user_proxy.register_for_execution() - @self._assistant.register_for_llm( - name="visit_page", description="Visit a webpage at a given URL and return its text." - ) - def _visit_page(url: Annotated[str, "The relative or absolute url of the webapge to visit."]) -> str: - self.browser.visit_page(url) - header, content = _browser_state() - return header.strip() + "\n=======================\n" + content - - @self._user_proxy.register_for_execution() - @self._assistant.register_for_llm( - name="download_file", description="Download a file at a given URL and, if possible, return its text." - ) - def _download_file(url: Annotated[str, "The relative or absolute url of the file to be downloaded."]) -> str: - self.browser.visit_page(url) - header, content = _browser_state() - return header.strip() + "\n=======================\n" + content - - @self._user_proxy.register_for_execution() - @self._assistant.register_for_llm( - name="page_up", - description="Scroll the viewport UP one page-length in the current webpage and return the new viewport content.", - ) - def _page_up() -> str: - self.browser.page_up() - header, content = _browser_state() - return header.strip() + "\n=======================\n" + content - - @self._user_proxy.register_for_execution() - @self._assistant.register_for_llm( - name="page_down", - description="Scroll the viewport DOWN one page-length in the current webpage and return the new viewport content.", - ) - def _page_down() -> str: - self.browser.page_down() - header, content = _browser_state() - return header.strip() + "\n=======================\n" + content - - @self._user_proxy.register_for_execution() - @self._assistant.register_for_llm( - name="find_on_page_ctrl_f", - description="Scroll the viewport to the first occurrence of the search string. This is equivalent to Ctrl+F.", - ) - def _find_on_page_ctrl_f( - search_string: Annotated[ - str, "The string to search for on the page. This search string supports wildcards like '*'" - ] - ) -> str: - find_result = self.browser.find_on_page(search_string) - header, content = _browser_state() - - if find_result is None: - return ( - header.strip() - + "\n=======================\nThe search string '" - + search_string - + "' was not found on this page." - ) - else: - return header.strip() + "\n=======================\n" + content - - @self._user_proxy.register_for_execution() - @self._assistant.register_for_llm( - name="find_next", - description="Scroll the viewport to next occurrence of the search string.", - ) - def _find_next() -> str: - find_result = self.browser.find_next() - header, content = _browser_state() - - if find_result is None: - return header.strip() + "\n=======================\nThe search string was not found on this page." - else: - return header.strip() + "\n=======================\n" + content - - if self.summarization_client is not None: - - @self._user_proxy.register_for_execution() - @self._assistant.register_for_llm( - name="read_page_and_answer", - description="Uses AI to read the page and directly answer a given question based on the content.", - ) - def _read_page_and_answer( - question: Annotated[Optional[str], "The question to directly answer."], - url: Annotated[Optional[str], "[Optional] The url of the page. (Defaults to the current page)"] = None, - ) -> str: - if url is not None and url != self.browser.address: - self.browser.visit_page(url) - - # We are likely going to need to fix this later, but summarize only as many tokens that fit in the buffer - limit = 4096 - try: - limit = get_max_token_limit(self.summarizer_llm_config["config_list"][0]["model"]) # type: ignore[index] - except ValueError: - pass # limit is unknown - except TypeError: - pass # limit is unknown - - if limit < 16000: - logger.warning( - f"The token limit ({limit}) of the WebSurferAgent.summarizer_llm_config, is below the recommended 16k." - ) - - buffer = "" - for line in re.split(r"([\r\n]+)", self.browser.page_content): - tokens = count_token(buffer + line) - if tokens + 1024 > limit: # Leave room for our summary - break - buffer += line - - buffer = buffer.strip() - if len(buffer) == 0: - return "Nothing to summarize." - - messages = [ - { - "role": "system", - "content": "You are a helpful assistant that can summarize long documents to answer question.", - } - ] - - prompt = f"Please summarize the following into one or two paragraph:\n\n{buffer}" - if question is not None: - prompt = f"Please summarize the following into one or two paragraphs with respect to '{question}':\n\n{buffer}" - - messages.append( - {"role": "user", "content": prompt}, - ) - - response = self.summarization_client.create(context=None, messages=messages) # type: ignore[union-attr] - extracted_response = self.summarization_client.extract_text_or_completion_object(response)[0] # type: ignore[union-attr] - return str(extracted_response) - - @self._user_proxy.register_for_execution() - @self._assistant.register_for_llm( - name="summarize_page", - description="Uses AI to summarize the content found at a given url. If the url is not provided, the current page is summarized.", - ) - def _summarize_page( - url: Annotated[ - Optional[str], "[Optional] The url of the page to summarize. (Defaults to current page)" - ] = None, - ) -> str: - return _read_page_and_answer(url=url, question=None) - - def generate_surfer_reply( - self, - messages: Optional[List[Dict[str, str]]] = None, - sender: Optional[Agent] = None, - config: Optional[OpenAIWrapper] = None, - ) -> Tuple[bool, Optional[Union[str, Dict[str, str]]]]: - """Generate a reply using autogen.oai.""" - if messages is None: - messages = self._oai_messages[sender] - - self._user_proxy.reset() # type: ignore[no-untyped-call] - self._assistant.reset() # type: ignore[no-untyped-call] - - # Clone the messages to give context - self._assistant.chat_messages[self._user_proxy] = list() - history = messages[0 : len(messages) - 1] - for message in history: - self._assistant.chat_messages[self._user_proxy].append(message) - - # Remind the agent where it is - self._user_proxy.send( - f"Your browser is currently open to the page '{self.browser.page_title}' at the address '{self.browser.address}'.", - self._assistant, - request_reply=False, - silent=True, - ) - - self._user_proxy.send(messages[-1]["content"], self._assistant, request_reply=True, silent=True) - agent_reply = self._user_proxy.chat_messages[self._assistant][-1] - # print("Agent Reply: " + str(agent_reply)) - proxy_reply = self._user_proxy.generate_reply( - messages=self._user_proxy.chat_messages[self._assistant], sender=self._assistant - ) - # print("Proxy Reply: " + str(proxy_reply)) - - if proxy_reply == "": # Was the default reply - return True, None if agent_reply is None else agent_reply["content"] - else: - return True, None if proxy_reply is None else proxy_reply["content"] # type: ignore[index] diff --git a/autogen/agentchat/conversable_agent.py b/autogen/agentchat/conversable_agent.py deleted file mode 100644 index e19cbd56de2b..000000000000 --- a/autogen/agentchat/conversable_agent.py +++ /dev/null @@ -1,2898 +0,0 @@ -import asyncio -import copy -import functools -import inspect -import json -import logging -import re -import warnings -from collections import defaultdict -from typing import Any, Callable, Dict, List, Literal, Optional, Tuple, Type, TypeVar, Union - -from openai import BadRequestError - -from autogen.agentchat.chat import _post_process_carryover_item -from autogen.exception_utils import InvalidCarryOverType, SenderRequired - -from .._pydantic import model_dump -from ..cache.cache import AbstractCache -from ..code_utils import ( - PYTHON_VARIANTS, - UNKNOWN, - check_can_use_docker_or_throw, - content_str, - decide_use_docker, - execute_code, - extract_code, - infer_lang, -) -from ..coding.base import CodeExecutor -from ..coding.factory import CodeExecutorFactory -from ..formatting_utils import colored -from ..function_utils import get_function_schema, load_basemodels_if_needed, serialize_to_str -from ..io.base import IOStream -from ..oai.client import ModelClient, OpenAIWrapper -from ..runtime_logging import log_event, log_function_use, log_new_agent, logging_enabled -from .agent import Agent, LLMAgent -from .chat import ChatResult, a_initiate_chats, initiate_chats -from .utils import consolidate_chat_info, gather_usage_summary - -__all__ = ("ConversableAgent",) - -logger = logging.getLogger(__name__) - -F = TypeVar("F", bound=Callable[..., Any]) - - -class ConversableAgent(LLMAgent): - """(In preview) A class for generic conversable agents which can be configured as assistant or user proxy. - - After receiving each message, the agent will send a reply to the sender unless the msg is a termination msg. - For example, AssistantAgent and UserProxyAgent are subclasses of this class, - configured with different default settings. - - To modify auto reply, override `generate_reply` method. - To disable/enable human response in every turn, set `human_input_mode` to "NEVER" or "ALWAYS". - To modify the way to get human input, override `get_human_input` method. - To modify the way to execute code blocks, single code block, or function call, override `execute_code_blocks`, - `run_code`, and `execute_function` methods respectively. - """ - - DEFAULT_CONFIG = False # False or dict, the default config for llm inference - MAX_CONSECUTIVE_AUTO_REPLY = 100 # maximum number of consecutive auto replies (subject to future change) - - DEFAULT_SUMMARY_PROMPT = "Summarize the takeaway from the conversation. Do not add any introductory phrases." - DEFAULT_SUMMARY_METHOD = "last_msg" - llm_config: Union[Dict, Literal[False]] - - def __init__( - self, - name: str, - system_message: Optional[Union[str, List]] = "You are a helpful AI Assistant.", - is_termination_msg: Optional[Callable[[Dict], bool]] = None, - max_consecutive_auto_reply: Optional[int] = None, - human_input_mode: Literal["ALWAYS", "NEVER", "TERMINATE"] = "TERMINATE", - function_map: Optional[Dict[str, Callable]] = None, - code_execution_config: Union[Dict, Literal[False]] = False, - llm_config: Optional[Union[Dict, Literal[False]]] = None, - default_auto_reply: Union[str, Dict] = "", - description: Optional[str] = None, - chat_messages: Optional[Dict[Agent, List[Dict]]] = None, - silent: Optional[bool] = None, - ): - """ - Args: - name (str): name of the agent. - system_message (str or list): system message for the ChatCompletion inference. - is_termination_msg (function): a function that takes a message in the form of a dictionary - and returns a boolean value indicating if this received message is a termination message. - The dict can contain the following keys: "content", "role", "name", "function_call". - max_consecutive_auto_reply (int): the maximum number of consecutive auto replies. - default to None (no limit provided, class attribute MAX_CONSECUTIVE_AUTO_REPLY will be used as the limit in this case). - When set to 0, no auto reply will be generated. - human_input_mode (str): whether to ask for human inputs every time a message is received. - Possible values are "ALWAYS", "TERMINATE", "NEVER". - (1) When "ALWAYS", the agent prompts for human input every time a message is received. - Under this mode, the conversation stops when the human input is "exit", - or when is_termination_msg is True and there is no human input. - (2) When "TERMINATE", the agent only prompts for human input only when a termination message is received or - the number of auto reply reaches the max_consecutive_auto_reply. - (3) When "NEVER", the agent will never prompt for human input. Under this mode, the conversation stops - when the number of auto reply reaches the max_consecutive_auto_reply or when is_termination_msg is True. - function_map (dict[str, callable]): Mapping function names (passed to openai) to callable functions, also used for tool calls. - code_execution_config (dict or False): config for the code execution. - To disable code execution, set to False. Otherwise, set to a dictionary with the following keys: - - work_dir (Optional, str): The working directory for the code execution. - If None, a default working directory will be used. - The default working directory is the "extensions" directory under - "path_to_autogen". - - use_docker (Optional, list, str or bool): The docker image to use for code execution. - Default is True, which means the code will be executed in a docker container. A default list of images will be used. - If a list or a str of image name(s) is provided, the code will be executed in a docker container - with the first image successfully pulled. - If False, the code will be executed in the current environment. - We strongly recommend using docker for code execution. - - timeout (Optional, int): The maximum execution time in seconds. - - last_n_messages (Experimental, int or str): The number of messages to look back for code execution. - If set to 'auto', it will scan backwards through all messages arriving since the agent last spoke, which is typically the last time execution was attempted. (Default: auto) - llm_config (dict or False or None): llm inference configuration. - Please refer to [OpenAIWrapper.create](/docs/reference/oai/client#create) - for available options. - When using OpenAI or Azure OpenAI endpoints, please specify a non-empty 'model' either in `llm_config` or in each config of 'config_list' in `llm_config`. - To disable llm-based auto reply, set to False. - When set to None, will use self.DEFAULT_CONFIG, which defaults to False. - default_auto_reply (str or dict): default auto reply when no code execution or llm-based reply is generated. - description (str): a short description of the agent. This description is used by other agents - (e.g. the GroupChatManager) to decide when to call upon this agent. (Default: system_message) - chat_messages (dict or None): the previous chat messages that this agent had in the past with other agents. - Can be used to give the agent a memory by providing the chat history. This will allow the agent to - resume previous had conversations. Defaults to an empty chat history. - silent (bool or None): (Experimental) whether to print the message sent. If None, will use the value of - silent in each function. - """ - # we change code_execution_config below and we have to make sure we don't change the input - # in case of UserProxyAgent, without this we could even change the default value {} - code_execution_config = ( - code_execution_config.copy() if hasattr(code_execution_config, "copy") else code_execution_config - ) - - self._name = name - # a dictionary of conversations, default value is list - if chat_messages is None: - self._oai_messages = defaultdict(list) - else: - self._oai_messages = chat_messages - - self._oai_system_message = [{"content": system_message, "role": "system"}] - self._description = description if description is not None else system_message - self._is_termination_msg = ( - is_termination_msg - if is_termination_msg is not None - else (lambda x: content_str(x.get("content")) == "TERMINATE") - ) - self.silent = silent - # Take a copy to avoid modifying the given dict - if isinstance(llm_config, dict): - try: - llm_config = copy.deepcopy(llm_config) - except TypeError as e: - raise TypeError( - "Please implement __deepcopy__ method for each value class in llm_config to support deepcopy." - " Refer to the docs for more details: https://microsoft.github.io/autogen/docs/topics/llm_configuration#adding-http-client-in-llm_config-for-proxy" - ) from e - - self._validate_llm_config(llm_config) - - if logging_enabled(): - log_new_agent(self, locals()) - - # Initialize standalone client cache object. - self.client_cache = None - - self.human_input_mode = human_input_mode - self._max_consecutive_auto_reply = ( - max_consecutive_auto_reply if max_consecutive_auto_reply is not None else self.MAX_CONSECUTIVE_AUTO_REPLY - ) - self._consecutive_auto_reply_counter = defaultdict(int) - self._max_consecutive_auto_reply_dict = defaultdict(self.max_consecutive_auto_reply) - self._function_map = ( - {} - if function_map is None - else {name: callable for name, callable in function_map.items() if self._assert_valid_name(name)} - ) - self._default_auto_reply = default_auto_reply - self._reply_func_list = [] - self._human_input = [] - self.reply_at_receive = defaultdict(bool) - self.register_reply([Agent, None], ConversableAgent.generate_oai_reply) - self.register_reply([Agent, None], ConversableAgent.a_generate_oai_reply, ignore_async_in_sync_chat=True) - - # Setting up code execution. - # Do not register code execution reply if code execution is disabled. - if code_execution_config is not False: - # If code_execution_config is None, set it to an empty dict. - if code_execution_config is None: - warnings.warn( - "Using None to signal a default code_execution_config is deprecated. " - "Use {} to use default or False to disable code execution.", - stacklevel=2, - ) - code_execution_config = {} - if not isinstance(code_execution_config, dict): - raise ValueError("code_execution_config must be a dict or False.") - - # We have got a valid code_execution_config. - self._code_execution_config = code_execution_config - - if self._code_execution_config.get("executor") is not None: - if "use_docker" in self._code_execution_config: - raise ValueError( - "'use_docker' in code_execution_config is not valid when 'executor' is set. Use the appropriate arg in the chosen executor instead." - ) - - if "work_dir" in self._code_execution_config: - raise ValueError( - "'work_dir' in code_execution_config is not valid when 'executor' is set. Use the appropriate arg in the chosen executor instead." - ) - - if "timeout" in self._code_execution_config: - raise ValueError( - "'timeout' in code_execution_config is not valid when 'executor' is set. Use the appropriate arg in the chosen executor instead." - ) - - # Use the new code executor. - self._code_executor = CodeExecutorFactory.create(self._code_execution_config) - self.register_reply([Agent, None], ConversableAgent._generate_code_execution_reply_using_executor) - else: - # Legacy code execution using code_utils. - use_docker = self._code_execution_config.get("use_docker", None) - use_docker = decide_use_docker(use_docker) - check_can_use_docker_or_throw(use_docker) - self._code_execution_config["use_docker"] = use_docker - self.register_reply([Agent, None], ConversableAgent.generate_code_execution_reply) - else: - # Code execution is disabled. - self._code_execution_config = False - - self.register_reply([Agent, None], ConversableAgent.generate_tool_calls_reply) - self.register_reply([Agent, None], ConversableAgent.a_generate_tool_calls_reply, ignore_async_in_sync_chat=True) - self.register_reply([Agent, None], ConversableAgent.generate_function_call_reply) - self.register_reply( - [Agent, None], ConversableAgent.a_generate_function_call_reply, ignore_async_in_sync_chat=True - ) - self.register_reply([Agent, None], ConversableAgent.check_termination_and_human_reply) - self.register_reply( - [Agent, None], ConversableAgent.a_check_termination_and_human_reply, ignore_async_in_sync_chat=True - ) - - # Registered hooks are kept in lists, indexed by hookable method, to be called in their order of registration. - # New hookable methods should be added to this list as required to support new agent capabilities. - self.hook_lists: Dict[str, List[Callable]] = { - "process_last_received_message": [], - "process_all_messages_before_reply": [], - "process_message_before_send": [], - } - - def _validate_llm_config(self, llm_config): - assert llm_config in (None, False) or isinstance( - llm_config, dict - ), "llm_config must be a dict or False or None." - if llm_config is None: - llm_config = self.DEFAULT_CONFIG - self.llm_config = self.DEFAULT_CONFIG if llm_config is None else llm_config - # TODO: more complete validity check - if self.llm_config in [{}, {"config_list": []}, {"config_list": [{"model": ""}]}]: - raise ValueError( - "When using OpenAI or Azure OpenAI endpoints, specify a non-empty 'model' either in 'llm_config' or in each config of 'config_list'." - ) - self.client = None if self.llm_config is False else OpenAIWrapper(**self.llm_config) - - @staticmethod - def _is_silent(agent: Agent, silent: Optional[bool] = False) -> bool: - return agent.silent if agent.silent is not None else silent - - @property - def name(self) -> str: - """Get the name of the agent.""" - return self._name - - @property - def description(self) -> str: - """Get the description of the agent.""" - return self._description - - @description.setter - def description(self, description: str): - """Set the description of the agent.""" - self._description = description - - @property - def code_executor(self) -> Optional[CodeExecutor]: - """The code executor used by this agent. Returns None if code execution is disabled.""" - if not hasattr(self, "_code_executor"): - return None - return self._code_executor - - def register_reply( - self, - trigger: Union[Type[Agent], str, Agent, Callable[[Agent], bool], List], - reply_func: Callable, - position: int = 0, - config: Optional[Any] = None, - reset_config: Optional[Callable] = None, - *, - ignore_async_in_sync_chat: bool = False, - remove_other_reply_funcs: bool = False, - ): - """Register a reply function. - - The reply function will be called when the trigger matches the sender. - The function registered later will be checked earlier by default. - To change the order, set the position to a positive integer. - - Both sync and async reply functions can be registered. The sync reply function will be triggered - from both sync and async chats. However, an async reply function will only be triggered from async - chats (initiated with `ConversableAgent.a_initiate_chat`). If an `async` reply function is registered - and a chat is initialized with a sync function, `ignore_async_in_sync_chat` determines the behaviour as follows: - if `ignore_async_in_sync_chat` is set to `False` (default value), an exception will be raised, and - if `ignore_async_in_sync_chat` is set to `True`, the reply function will be ignored. - - Args: - trigger (Agent class, str, Agent instance, callable, or list): the trigger. - If a class is provided, the reply function will be called when the sender is an instance of the class. - If a string is provided, the reply function will be called when the sender's name matches the string. - If an agent instance is provided, the reply function will be called when the sender is the agent instance. - If a callable is provided, the reply function will be called when the callable returns True. - If a list is provided, the reply function will be called when any of the triggers in the list is activated. - If None is provided, the reply function will be called only when the sender is None. - Note: Be sure to register `None` as a trigger if you would like to trigger an auto-reply function with non-empty messages and `sender=None`. - reply_func (Callable): the reply function. - The function takes a recipient agent, a list of messages, a sender agent and a config as input and returns a reply message. - - ```python - def reply_func( - recipient: ConversableAgent, - messages: Optional[List[Dict]] = None, - sender: Optional[Agent] = None, - config: Optional[Any] = None, - ) -> Tuple[bool, Union[str, Dict, None]]: - ``` - position (int): the position of the reply function in the reply function list. - The function registered later will be checked earlier by default. - To change the order, set the position to a positive integer. - config (Any): the config to be passed to the reply function. - When an agent is reset, the config will be reset to the original value. - reset_config (Callable): the function to reset the config. - The function returns None. Signature: ```def reset_config(config: Any)``` - ignore_async_in_sync_chat (bool): whether to ignore the async reply function in sync chats. If `False`, an exception - will be raised if an async reply function is registered and a chat is initialized with a sync - function. - remove_other_reply_funcs (bool): whether to remove other reply functions when registering this reply function. - """ - if not isinstance(trigger, (type, str, Agent, Callable, list)): - raise ValueError("trigger must be a class, a string, an agent, a callable or a list.") - if remove_other_reply_funcs: - self._reply_func_list.clear() - self._reply_func_list.insert( - position, - { - "trigger": trigger, - "reply_func": reply_func, - "config": copy.copy(config), - "init_config": config, - "reset_config": reset_config, - "ignore_async_in_sync_chat": ignore_async_in_sync_chat and inspect.iscoroutinefunction(reply_func), - }, - ) - - def replace_reply_func(self, old_reply_func: Callable, new_reply_func: Callable): - """Replace a registered reply function with a new one. - - Args: - old_reply_func (Callable): the old reply function to be replaced. - new_reply_func (Callable): the new reply function to replace the old one. - """ - for f in self._reply_func_list: - if f["reply_func"] == old_reply_func: - f["reply_func"] = new_reply_func - - @staticmethod - def _get_chats_to_run( - chat_queue: List[Dict[str, Any]], recipient: Agent, messages: Union[str, Callable], sender: Agent, config: Any - ) -> List[Dict[str, Any]]: - """A simple chat reply function. - This function initiate one or a sequence of chats between the "recipient" and the agents in the - chat_queue. - - It extracts and returns a summary from the nested chat based on the "summary_method" in each chat in chat_queue. - - Returns: - Tuple[bool, str]: A tuple where the first element indicates the completion of the chat, and the second element contains the summary of the last chat if any chats were initiated. - """ - last_msg = messages[-1].get("content") - chat_to_run = [] - for i, c in enumerate(chat_queue): - current_c = c.copy() - if current_c.get("sender") is None: - current_c["sender"] = recipient - message = current_c.get("message") - # If message is not provided in chat_queue, we by default use the last message from the original chat history as the first message in this nested chat (for the first chat in the chat queue). - # NOTE: This setting is prone to change. - if message is None and i == 0: - message = last_msg - if callable(message): - message = message(recipient, messages, sender, config) - # We only run chat that has a valid message. NOTE: This is prone to change dependin on applications. - if message: - current_c["message"] = message - chat_to_run.append(current_c) - return chat_to_run - - @staticmethod - def _summary_from_nested_chats( - chat_queue: List[Dict[str, Any]], recipient: Agent, messages: Union[str, Callable], sender: Agent, config: Any - ) -> Tuple[bool, Union[str, None]]: - """A simple chat reply function. - This function initiate one or a sequence of chats between the "recipient" and the agents in the - chat_queue. - - It extracts and returns a summary from the nested chat based on the "summary_method" in each chat in chat_queue. - - Returns: - Tuple[bool, str]: A tuple where the first element indicates the completion of the chat, and the second element contains the summary of the last chat if any chats were initiated. - """ - chat_to_run = ConversableAgent._get_chats_to_run(chat_queue, recipient, messages, sender, config) - if not chat_to_run: - return True, None - res = initiate_chats(chat_to_run) - return True, res[-1].summary - - @staticmethod - async def _a_summary_from_nested_chats( - chat_queue: List[Dict[str, Any]], recipient: Agent, messages: Union[str, Callable], sender: Agent, config: Any - ) -> Tuple[bool, Union[str, None]]: - """A simple chat reply function. - This function initiate one or a sequence of chats between the "recipient" and the agents in the - chat_queue. - - It extracts and returns a summary from the nested chat based on the "summary_method" in each chat in chat_queue. - - Returns: - Tuple[bool, str]: A tuple where the first element indicates the completion of the chat, and the second element contains the summary of the last chat if any chats were initiated. - """ - chat_to_run = ConversableAgent._get_chats_to_run(chat_queue, recipient, messages, sender, config) - if not chat_to_run: - return True, None - res = await a_initiate_chats(chat_to_run) - index_of_last_chat = chat_to_run[-1]["chat_id"] - return True, res[index_of_last_chat].summary - - def register_nested_chats( - self, - chat_queue: List[Dict[str, Any]], - trigger: Union[Type[Agent], str, Agent, Callable[[Agent], bool], List], - reply_func_from_nested_chats: Union[str, Callable] = "summary_from_nested_chats", - position: int = 2, - use_async: Union[bool, None] = None, - **kwargs, - ) -> None: - """Register a nested chat reply function. - Args: - chat_queue (list): a list of chat objects to be initiated. If use_async is used, then all messages in chat_queue must have a chat-id associated with them. - trigger (Agent class, str, Agent instance, callable, or list): refer to `register_reply` for details. - reply_func_from_nested_chats (Callable, str): the reply function for the nested chat. - The function takes a chat_queue for nested chat, recipient agent, a list of messages, a sender agent and a config as input and returns a reply message. - Default to "summary_from_nested_chats", which corresponds to a built-in reply function that get summary from the nested chat_queue. - ```python - def reply_func_from_nested_chats( - chat_queue: List[Dict], - recipient: ConversableAgent, - messages: Optional[List[Dict]] = None, - sender: Optional[Agent] = None, - config: Optional[Any] = None, - ) -> Tuple[bool, Union[str, Dict, None]]: - ``` - position (int): Ref to `register_reply` for details. Default to 2. It means we first check the termination and human reply, then check the registered nested chat reply. - use_async: Uses a_initiate_chats internally to start nested chats. If the original chat is initiated with a_initiate_chats, you may set this to true so nested chats do not run in sync. - kwargs: Ref to `register_reply` for details. - """ - if use_async: - for chat in chat_queue: - if chat.get("chat_id") is None: - raise ValueError("chat_id is required for async nested chats") - - if use_async: - if reply_func_from_nested_chats == "summary_from_nested_chats": - reply_func_from_nested_chats = self._a_summary_from_nested_chats - if not callable(reply_func_from_nested_chats) or not inspect.iscoroutinefunction( - reply_func_from_nested_chats - ): - raise ValueError("reply_func_from_nested_chats must be a callable and a coroutine") - - async def wrapped_reply_func(recipient, messages=None, sender=None, config=None): - return await reply_func_from_nested_chats(chat_queue, recipient, messages, sender, config) - - else: - if reply_func_from_nested_chats == "summary_from_nested_chats": - reply_func_from_nested_chats = self._summary_from_nested_chats - if not callable(reply_func_from_nested_chats): - raise ValueError("reply_func_from_nested_chats must be a callable") - - def wrapped_reply_func(recipient, messages=None, sender=None, config=None): - return reply_func_from_nested_chats(chat_queue, recipient, messages, sender, config) - - functools.update_wrapper(wrapped_reply_func, reply_func_from_nested_chats) - - self.register_reply( - trigger, - wrapped_reply_func, - position, - kwargs.get("config"), - kwargs.get("reset_config"), - ignore_async_in_sync_chat=( - not use_async if use_async is not None else kwargs.get("ignore_async_in_sync_chat") - ), - ) - - @property - def system_message(self) -> str: - """Return the system message.""" - return self._oai_system_message[0]["content"] - - def update_system_message(self, system_message: str) -> None: - """Update the system message. - - Args: - system_message (str): system message for the ChatCompletion inference. - """ - self._oai_system_message[0]["content"] = system_message - - def update_max_consecutive_auto_reply(self, value: int, sender: Optional[Agent] = None): - """Update the maximum number of consecutive auto replies. - - Args: - value (int): the maximum number of consecutive auto replies. - sender (Agent): when the sender is provided, only update the max_consecutive_auto_reply for that sender. - """ - if sender is None: - self._max_consecutive_auto_reply = value - for k in self._max_consecutive_auto_reply_dict: - self._max_consecutive_auto_reply_dict[k] = value - else: - self._max_consecutive_auto_reply_dict[sender] = value - - def max_consecutive_auto_reply(self, sender: Optional[Agent] = None) -> int: - """The maximum number of consecutive auto replies.""" - return self._max_consecutive_auto_reply if sender is None else self._max_consecutive_auto_reply_dict[sender] - - @property - def chat_messages(self) -> Dict[Agent, List[Dict]]: - """A dictionary of conversations from agent to list of messages.""" - return self._oai_messages - - def chat_messages_for_summary(self, agent: Agent) -> List[Dict]: - """A list of messages as a conversation to summarize.""" - return self._oai_messages[agent] - - def last_message(self, agent: Optional[Agent] = None) -> Optional[Dict]: - """The last message exchanged with the agent. - - Args: - agent (Agent): The agent in the conversation. - If None and more than one agent's conversations are found, an error will be raised. - If None and only one conversation is found, the last message of the only conversation will be returned. - - Returns: - The last message exchanged with the agent. - """ - if agent is None: - n_conversations = len(self._oai_messages) - if n_conversations == 0: - return None - if n_conversations == 1: - for conversation in self._oai_messages.values(): - return conversation[-1] - raise ValueError("More than one conversation is found. Please specify the sender to get the last message.") - if agent not in self._oai_messages.keys(): - raise KeyError( - f"The agent '{agent.name}' is not present in any conversation. No history available for this agent." - ) - return self._oai_messages[agent][-1] - - @property - def use_docker(self) -> Union[bool, str, None]: - """Bool value of whether to use docker to execute the code, - or str value of the docker image name to use, or None when code execution is disabled. - """ - return None if self._code_execution_config is False else self._code_execution_config.get("use_docker") - - @staticmethod - def _message_to_dict(message: Union[Dict, str]) -> Dict: - """Convert a message to a dictionary. - - The message can be a string or a dictionary. The string will be put in the "content" field of the new dictionary. - """ - if isinstance(message, str): - return {"content": message} - elif isinstance(message, dict): - return message - else: - return dict(message) - - @staticmethod - def _normalize_name(name): - """ - LLMs sometimes ask functions while ignoring their own format requirements, this function should be used to replace invalid characters with "_". - - Prefer _assert_valid_name for validating user configuration or input - """ - return re.sub(r"[^a-zA-Z0-9_-]", "_", name)[:64] - - @staticmethod - def _assert_valid_name(name): - """ - Ensure that configured names are valid, raises ValueError if not. - - For munging LLM responses use _normalize_name to ensure LLM specified names don't break the API. - """ - if not re.match(r"^[a-zA-Z0-9_-]+$", name): - raise ValueError(f"Invalid name: {name}. Only letters, numbers, '_' and '-' are allowed.") - if len(name) > 64: - raise ValueError(f"Invalid name: {name}. Name must be less than 64 characters.") - return name - - def _append_oai_message(self, message: Union[Dict, str], role, conversation_id: Agent, is_sending: bool) -> bool: - """Append a message to the ChatCompletion conversation. - - If the message received is a string, it will be put in the "content" field of the new dictionary. - If the message received is a dictionary but does not have any of the three fields "content", "function_call", or "tool_calls", - this message is not a valid ChatCompletion message. - If only "function_call" or "tool_calls" is provided, "content" will be set to None if not provided, and the role of the message will be forced "assistant". - - Args: - message (dict or str): message to be appended to the ChatCompletion conversation. - role (str): role of the message, can be "assistant" or "function". - conversation_id (Agent): id of the conversation, should be the recipient or sender. - is_sending (bool): If the agent (aka self) is sending to the conversation_id agent, otherwise receiving. - - Returns: - bool: whether the message is appended to the ChatCompletion conversation. - """ - message = self._message_to_dict(message) - # create oai message to be appended to the oai conversation that can be passed to oai directly. - oai_message = { - k: message[k] - for k in ("content", "function_call", "tool_calls", "tool_responses", "tool_call_id", "name", "context") - if k in message and message[k] is not None - } - if "content" not in oai_message: - if "function_call" in oai_message or "tool_calls" in oai_message: - oai_message["content"] = None # if only function_call is provided, content will be set to None. - else: - return False - - if message.get("role") in ["function", "tool"]: - oai_message["role"] = message.get("role") - elif "override_role" in message: - # If we have a direction to override the role then set the - # role accordingly. Used to customise the role for the - # select speaker prompt. - oai_message["role"] = message.get("override_role") - else: - oai_message["role"] = role - - if oai_message.get("function_call", False) or oai_message.get("tool_calls", False): - oai_message["role"] = "assistant" # only messages with role 'assistant' can have a function call. - elif "name" not in oai_message: - # If we don't have a name field, append it - if is_sending: - oai_message["name"] = self.name - else: - oai_message["name"] = conversation_id.name - - self._oai_messages[conversation_id].append(oai_message) - - return True - - def _process_message_before_send( - self, message: Union[Dict, str], recipient: Agent, silent: bool - ) -> Union[Dict, str]: - """Process the message before sending it to the recipient.""" - hook_list = self.hook_lists["process_message_before_send"] - for hook in hook_list: - message = hook( - sender=self, message=message, recipient=recipient, silent=ConversableAgent._is_silent(self, silent) - ) - return message - - def send( - self, - message: Union[Dict, str], - recipient: Agent, - request_reply: Optional[bool] = None, - silent: Optional[bool] = False, - ): - """Send a message to another agent. - - Args: - message (dict or str): message to be sent. - The message could contain the following fields: - - content (str or List): Required, the content of the message. (Can be None) - - function_call (str): the name of the function to be called. - - name (str): the name of the function to be called. - - role (str): the role of the message, any role that is not "function" - will be modified to "assistant". - - context (dict): the context of the message, which will be passed to - [OpenAIWrapper.create](../oai/client#create). - For example, one agent can send a message A as: - ```python - { - "content": lambda context: context["use_tool_msg"], - "context": { - "use_tool_msg": "Use tool X if they are relevant." - } - } - ``` - Next time, one agent can send a message B with a different "use_tool_msg". - Then the content of message A will be refreshed to the new "use_tool_msg". - So effectively, this provides a way for an agent to send a "link" and modify - the content of the "link" later. - recipient (Agent): the recipient of the message. - request_reply (bool or None): whether to request a reply from the recipient. - silent (bool or None): (Experimental) whether to print the message sent. - - Raises: - ValueError: if the message can't be converted into a valid ChatCompletion message. - """ - message = self._process_message_before_send(message, recipient, ConversableAgent._is_silent(self, silent)) - # When the agent composes and sends the message, the role of the message is "assistant" - # unless it's "function". - valid = self._append_oai_message(message, "assistant", recipient, is_sending=True) - if valid: - recipient.receive(message, self, request_reply, silent) - else: - raise ValueError( - "Message can't be converted into a valid ChatCompletion message. Either content or function_call must be provided." - ) - - async def a_send( - self, - message: Union[Dict, str], - recipient: Agent, - request_reply: Optional[bool] = None, - silent: Optional[bool] = False, - ): - """(async) Send a message to another agent. - - Args: - message (dict or str): message to be sent. - The message could contain the following fields: - - content (str or List): Required, the content of the message. (Can be None) - - function_call (str): the name of the function to be called. - - name (str): the name of the function to be called. - - role (str): the role of the message, any role that is not "function" - will be modified to "assistant". - - context (dict): the context of the message, which will be passed to - [OpenAIWrapper.create](../oai/client#create). - For example, one agent can send a message A as: - ```python - { - "content": lambda context: context["use_tool_msg"], - "context": { - "use_tool_msg": "Use tool X if they are relevant." - } - } - ``` - Next time, one agent can send a message B with a different "use_tool_msg". - Then the content of message A will be refreshed to the new "use_tool_msg". - So effectively, this provides a way for an agent to send a "link" and modify - the content of the "link" later. - recipient (Agent): the recipient of the message. - request_reply (bool or None): whether to request a reply from the recipient. - silent (bool or None): (Experimental) whether to print the message sent. - - Raises: - ValueError: if the message can't be converted into a valid ChatCompletion message. - """ - message = self._process_message_before_send(message, recipient, ConversableAgent._is_silent(self, silent)) - # When the agent composes and sends the message, the role of the message is "assistant" - # unless it's "function". - valid = self._append_oai_message(message, "assistant", recipient, is_sending=True) - if valid: - await recipient.a_receive(message, self, request_reply, silent) - else: - raise ValueError( - "Message can't be converted into a valid ChatCompletion message. Either content or function_call must be provided." - ) - - def _print_received_message(self, message: Union[Dict, str], sender: Agent): - iostream = IOStream.get_default() - # print the message received - iostream.print(colored(sender.name, "yellow"), "(to", f"{self.name}):\n", flush=True) - message = self._message_to_dict(message) - - if message.get("tool_responses"): # Handle tool multi-call responses - for tool_response in message["tool_responses"]: - self._print_received_message(tool_response, sender) - if message.get("role") == "tool": - return # If role is tool, then content is just a concatenation of all tool_responses - - if message.get("role") in ["function", "tool"]: - if message["role"] == "function": - id_key = "name" - else: - id_key = "tool_call_id" - id = message.get(id_key, "No id found") - func_print = f"***** Response from calling {message['role']} ({id}) *****" - iostream.print(colored(func_print, "green"), flush=True) - iostream.print(message["content"], flush=True) - iostream.print(colored("*" * len(func_print), "green"), flush=True) - else: - content = message.get("content") - if content is not None: - if "context" in message: - content = OpenAIWrapper.instantiate( - content, - message["context"], - self.llm_config and self.llm_config.get("allow_format_str_template", False), - ) - iostream.print(content_str(content), flush=True) - if "function_call" in message and message["function_call"]: - function_call = dict(message["function_call"]) - func_print = ( - f"***** Suggested function call: {function_call.get('name', '(No function name found)')} *****" - ) - iostream.print(colored(func_print, "green"), flush=True) - iostream.print( - "Arguments: \n", - function_call.get("arguments", "(No arguments found)"), - flush=True, - sep="", - ) - iostream.print(colored("*" * len(func_print), "green"), flush=True) - if "tool_calls" in message and message["tool_calls"]: - for tool_call in message["tool_calls"]: - id = tool_call.get("id", "No tool call id found") - function_call = dict(tool_call.get("function", {})) - func_print = f"***** Suggested tool call ({id}): {function_call.get('name', '(No function name found)')} *****" - iostream.print(colored(func_print, "green"), flush=True) - iostream.print( - "Arguments: \n", - function_call.get("arguments", "(No arguments found)"), - flush=True, - sep="", - ) - iostream.print(colored("*" * len(func_print), "green"), flush=True) - - iostream.print("\n", "-" * 80, flush=True, sep="") - - def _process_received_message(self, message: Union[Dict, str], sender: Agent, silent: bool): - # When the agent receives a message, the role of the message is "user". (If 'role' exists and is 'function', it will remain unchanged.) - valid = self._append_oai_message(message, "user", sender, is_sending=False) - if logging_enabled(): - log_event(self, "received_message", message=message, sender=sender.name, valid=valid) - - if not valid: - raise ValueError( - "Received message can't be converted into a valid ChatCompletion message. Either content or function_call must be provided." - ) - - if not ConversableAgent._is_silent(sender, silent): - self._print_received_message(message, sender) - - def receive( - self, - message: Union[Dict, str], - sender: Agent, - request_reply: Optional[bool] = None, - silent: Optional[bool] = False, - ): - """Receive a message from another agent. - - Once a message is received, this function sends a reply to the sender or stop. - The reply can be generated automatically or entered manually by a human. - - Args: - message (dict or str): message from the sender. If the type is dict, it may contain the following reserved fields (either content or function_call need to be provided). - 1. "content": content of the message, can be None. - 2. "function_call": a dictionary containing the function name and arguments. (deprecated in favor of "tool_calls") - 3. "tool_calls": a list of dictionaries containing the function name and arguments. - 4. "role": role of the message, can be "assistant", "user", "function", "tool". - This field is only needed to distinguish between "function" or "assistant"/"user". - 5. "name": In most cases, this field is not needed. When the role is "function", this field is needed to indicate the function name. - 6. "context" (dict): the context of the message, which will be passed to - [OpenAIWrapper.create](../oai/client#create). - sender: sender of an Agent instance. - request_reply (bool or None): whether a reply is requested from the sender. - If None, the value is determined by `self.reply_at_receive[sender]`. - silent (bool or None): (Experimental) whether to print the message received. - - Raises: - ValueError: if the message can't be converted into a valid ChatCompletion message. - """ - self._process_received_message(message, sender, silent) - if request_reply is False or request_reply is None and self.reply_at_receive[sender] is False: - return - reply = self.generate_reply(messages=self.chat_messages[sender], sender=sender) - if reply is not None: - self.send(reply, sender, silent=silent) - - async def a_receive( - self, - message: Union[Dict, str], - sender: Agent, - request_reply: Optional[bool] = None, - silent: Optional[bool] = False, - ): - """(async) Receive a message from another agent. - - Once a message is received, this function sends a reply to the sender or stop. - The reply can be generated automatically or entered manually by a human. - - Args: - message (dict or str): message from the sender. If the type is dict, it may contain the following reserved fields (either content or function_call need to be provided). - 1. "content": content of the message, can be None. - 2. "function_call": a dictionary containing the function name and arguments. (deprecated in favor of "tool_calls") - 3. "tool_calls": a list of dictionaries containing the function name and arguments. - 4. "role": role of the message, can be "assistant", "user", "function". - This field is only needed to distinguish between "function" or "assistant"/"user". - 5. "name": In most cases, this field is not needed. When the role is "function", this field is needed to indicate the function name. - 6. "context" (dict): the context of the message, which will be passed to - [OpenAIWrapper.create](../oai/client#create). - sender: sender of an Agent instance. - request_reply (bool or None): whether a reply is requested from the sender. - If None, the value is determined by `self.reply_at_receive[sender]`. - silent (bool or None): (Experimental) whether to print the message received. - - Raises: - ValueError: if the message can't be converted into a valid ChatCompletion message. - """ - self._process_received_message(message, sender, silent) - if request_reply is False or request_reply is None and self.reply_at_receive[sender] is False: - return - reply = await self.a_generate_reply(sender=sender) - if reply is not None: - await self.a_send(reply, sender, silent=silent) - - def _prepare_chat( - self, - recipient: "ConversableAgent", - clear_history: bool, - prepare_recipient: bool = True, - reply_at_receive: bool = True, - ) -> None: - self.reset_consecutive_auto_reply_counter(recipient) - self.reply_at_receive[recipient] = reply_at_receive - if clear_history: - self.clear_history(recipient) - self._human_input = [] - if prepare_recipient: - recipient._prepare_chat(self, clear_history, False, reply_at_receive) - - def _raise_exception_on_async_reply_functions(self) -> None: - """Raise an exception if any async reply functions are registered. - - Raises: - RuntimeError: if any async reply functions are registered. - """ - reply_functions = { - f["reply_func"] for f in self._reply_func_list if not f.get("ignore_async_in_sync_chat", False) - } - - async_reply_functions = [f for f in reply_functions if inspect.iscoroutinefunction(f)] - if async_reply_functions: - msg = ( - "Async reply functions can only be used with ConversableAgent.a_initiate_chat(). The following async reply functions are found: " - + ", ".join([f.__name__ for f in async_reply_functions]) - ) - - raise RuntimeError(msg) - - def initiate_chat( - self, - recipient: "ConversableAgent", - clear_history: bool = True, - silent: Optional[bool] = False, - cache: Optional[AbstractCache] = None, - max_turns: Optional[int] = None, - summary_method: Optional[Union[str, Callable]] = DEFAULT_SUMMARY_METHOD, - summary_args: Optional[dict] = {}, - message: Optional[Union[Dict, str, Callable]] = None, - **kwargs, - ) -> ChatResult: - """Initiate a chat with the recipient agent. - - Reset the consecutive auto reply counter. - If `clear_history` is True, the chat history with the recipient agent will be cleared. - - - Args: - recipient: the recipient agent. - clear_history (bool): whether to clear the chat history with the agent. Default is True. - silent (bool or None): (Experimental) whether to print the messages for this conversation. Default is False. - cache (AbstractCache or None): the cache client to be used for this conversation. Default is None. - max_turns (int or None): the maximum number of turns for the chat between the two agents. One turn means one conversation round trip. Note that this is different from - [max_consecutive_auto_reply](#max_consecutive_auto_reply) which is the maximum number of consecutive auto replies; and it is also different from [max_rounds in GroupChat](./groupchat#groupchat-objects) which is the maximum number of rounds in a group chat session. - If max_turns is set to None, the chat will continue until a termination condition is met. Default is None. - summary_method (str or callable): a method to get a summary from the chat. Default is DEFAULT_SUMMARY_METHOD, i.e., "last_msg". - - Supported strings are "last_msg" and "reflection_with_llm": - - when set to "last_msg", it returns the last message of the dialog as the summary. - - when set to "reflection_with_llm", it returns a summary extracted using an llm client. - `llm_config` must be set in either the recipient or sender. - - A callable summary_method should take the recipient and sender agent in a chat as input and return a string of summary. E.g., - - ```python - def my_summary_method( - sender: ConversableAgent, - recipient: ConversableAgent, - summary_args: dict, - ): - return recipient.last_message(sender)["content"] - ``` - summary_args (dict): a dictionary of arguments to be passed to the summary_method. - One example key is "summary_prompt", and value is a string of text used to prompt a LLM-based agent (the sender or receiver agent) to reflect - on the conversation and extract a summary when summary_method is "reflection_with_llm". - The default summary_prompt is DEFAULT_SUMMARY_PROMPT, i.e., "Summarize takeaway from the conversation. Do not add any introductory phrases. If the intended request is NOT properly addressed, please point it out." - Another available key is "summary_role", which is the role of the message sent to the agent in charge of summarizing. Default is "system". - message (str, dict or Callable): the initial message to be sent to the recipient. Needs to be provided. Otherwise, input() will be called to get the initial message. - - If a string or a dict is provided, it will be used as the initial message. `generate_init_message` is called to generate the initial message for the agent based on this string and the context. - If dict, it may contain the following reserved fields (either content or tool_calls need to be provided). - - 1. "content": content of the message, can be None. - 2. "function_call": a dictionary containing the function name and arguments. (deprecated in favor of "tool_calls") - 3. "tool_calls": a list of dictionaries containing the function name and arguments. - 4. "role": role of the message, can be "assistant", "user", "function". - This field is only needed to distinguish between "function" or "assistant"/"user". - 5. "name": In most cases, this field is not needed. When the role is "function", this field is needed to indicate the function name. - 6. "context" (dict): the context of the message, which will be passed to - [OpenAIWrapper.create](../oai/client#create). - - - If a callable is provided, it will be called to get the initial message in the form of a string or a dict. - If the returned type is dict, it may contain the reserved fields mentioned above. - - Example of a callable message (returning a string): - - ```python - def my_message(sender: ConversableAgent, recipient: ConversableAgent, context: dict) -> Union[str, Dict]: - carryover = context.get("carryover", "") - if isinstance(message, list): - carryover = carryover[-1] - final_msg = "Write a blogpost." + "\\nContext: \\n" + carryover - return final_msg - ``` - - Example of a callable message (returning a dict): - - ```python - def my_message(sender: ConversableAgent, recipient: ConversableAgent, context: dict) -> Union[str, Dict]: - final_msg = {} - carryover = context.get("carryover", "") - if isinstance(message, list): - carryover = carryover[-1] - final_msg["content"] = "Write a blogpost." + "\\nContext: \\n" + carryover - final_msg["context"] = {"prefix": "Today I feel"} - return final_msg - ``` - **kwargs: any additional information. It has the following reserved fields: - - "carryover": a string or a list of string to specify the carryover information to be passed to this chat. - If provided, we will combine this carryover (by attaching a "context: " string and the carryover content after the message content) with the "message" content when generating the initial chat - message in `generate_init_message`. - - "verbose": a boolean to specify whether to print the message and carryover in a chat. Default is False. - - Raises: - RuntimeError: if any async reply functions are registered and not ignored in sync chat. - - Returns: - ChatResult: an ChatResult object. - """ - _chat_info = locals().copy() - _chat_info["sender"] = self - consolidate_chat_info(_chat_info, uniform_sender=self) - for agent in [self, recipient]: - agent._raise_exception_on_async_reply_functions() - agent.previous_cache = agent.client_cache - agent.client_cache = cache - if isinstance(max_turns, int): - self._prepare_chat(recipient, clear_history, reply_at_receive=False) - for _ in range(max_turns): - if _ == 0: - if isinstance(message, Callable): - msg2send = message(_chat_info["sender"], _chat_info["recipient"], kwargs) - else: - msg2send = self.generate_init_message(message, **kwargs) - else: - msg2send = self.generate_reply(messages=self.chat_messages[recipient], sender=recipient) - if msg2send is None: - break - self.send(msg2send, recipient, request_reply=True, silent=silent) - else: - self._prepare_chat(recipient, clear_history) - if isinstance(message, Callable): - msg2send = message(_chat_info["sender"], _chat_info["recipient"], kwargs) - else: - msg2send = self.generate_init_message(message, **kwargs) - self.send(msg2send, recipient, silent=silent) - summary = self._summarize_chat( - summary_method, - summary_args, - recipient, - cache=cache, - ) - for agent in [self, recipient]: - agent.client_cache = agent.previous_cache - agent.previous_cache = None - chat_result = ChatResult( - chat_history=self.chat_messages[recipient], - summary=summary, - cost=gather_usage_summary([self, recipient]), - human_input=self._human_input, - ) - return chat_result - - async def a_initiate_chat( - self, - recipient: "ConversableAgent", - clear_history: bool = True, - silent: Optional[bool] = False, - cache: Optional[AbstractCache] = None, - max_turns: Optional[int] = None, - summary_method: Optional[Union[str, Callable]] = DEFAULT_SUMMARY_METHOD, - summary_args: Optional[dict] = {}, - message: Optional[Union[str, Callable]] = None, - **kwargs, - ) -> ChatResult: - """(async) Initiate a chat with the recipient agent. - - Reset the consecutive auto reply counter. - If `clear_history` is True, the chat history with the recipient agent will be cleared. - `a_generate_init_message` is called to generate the initial message for the agent. - - Args: Please refer to `initiate_chat`. - - Returns: - ChatResult: an ChatResult object. - """ - _chat_info = locals().copy() - _chat_info["sender"] = self - consolidate_chat_info(_chat_info, uniform_sender=self) - for agent in [self, recipient]: - agent.previous_cache = agent.client_cache - agent.client_cache = cache - if isinstance(max_turns, int): - self._prepare_chat(recipient, clear_history, reply_at_receive=False) - for _ in range(max_turns): - if _ == 0: - if isinstance(message, Callable): - msg2send = message(_chat_info["sender"], _chat_info["recipient"], kwargs) - else: - msg2send = await self.a_generate_init_message(message, **kwargs) - else: - msg2send = await self.a_generate_reply(messages=self.chat_messages[recipient], sender=recipient) - if msg2send is None: - break - await self.a_send(msg2send, recipient, request_reply=True, silent=silent) - else: - self._prepare_chat(recipient, clear_history) - if isinstance(message, Callable): - msg2send = message(_chat_info["sender"], _chat_info["recipient"], kwargs) - else: - msg2send = await self.a_generate_init_message(message, **kwargs) - await self.a_send(msg2send, recipient, silent=silent) - summary = self._summarize_chat( - summary_method, - summary_args, - recipient, - cache=cache, - ) - for agent in [self, recipient]: - agent.client_cache = agent.previous_cache - agent.previous_cache = None - chat_result = ChatResult( - chat_history=self.chat_messages[recipient], - summary=summary, - cost=gather_usage_summary([self, recipient]), - human_input=self._human_input, - ) - return chat_result - - def _summarize_chat( - self, - summary_method, - summary_args, - recipient: Optional[Agent] = None, - cache: Optional[AbstractCache] = None, - ) -> str: - """Get a chat summary from an agent participating in a chat. - - Args: - summary_method (str or callable): the summary_method to get the summary. - The callable summary_method should take the recipient and sender agent in a chat as input and return a string of summary. E.g, - ```python - def my_summary_method( - sender: ConversableAgent, - recipient: ConversableAgent, - summary_args: dict, - ): - return recipient.last_message(sender)["content"] - ``` - summary_args (dict): a dictionary of arguments to be passed to the summary_method. - recipient: the recipient agent in a chat. - prompt (str): the prompt used to get a summary when summary_method is "reflection_with_llm". - - Returns: - str: a chat summary from the agent. - """ - summary = "" - if summary_method is None: - return summary - if "cache" not in summary_args: - summary_args["cache"] = cache - if summary_method == "reflection_with_llm": - summary_method = self._reflection_with_llm_as_summary - elif summary_method == "last_msg": - summary_method = self._last_msg_as_summary - - if isinstance(summary_method, Callable): - summary = summary_method(self, recipient, summary_args) - else: - raise ValueError( - "If not None, the summary_method must be a string from [`reflection_with_llm`, `last_msg`] or a callable." - ) - return summary - - @staticmethod - def _last_msg_as_summary(sender, recipient, summary_args) -> str: - """Get a chat summary from the last message of the recipient.""" - summary = "" - try: - content = recipient.last_message(sender)["content"] - if isinstance(content, str): - summary = content.replace("TERMINATE", "") - elif isinstance(content, list): - # Remove the `TERMINATE` word in the content list. - summary = "\n".join( - x["text"].replace("TERMINATE", "") for x in content if isinstance(x, dict) and "text" in x - ) - except (IndexError, AttributeError) as e: - warnings.warn(f"Cannot extract summary using last_msg: {e}. Using an empty str as summary.", UserWarning) - return summary - - @staticmethod - def _reflection_with_llm_as_summary(sender, recipient, summary_args): - prompt = summary_args.get("summary_prompt") - prompt = ConversableAgent.DEFAULT_SUMMARY_PROMPT if prompt is None else prompt - if not isinstance(prompt, str): - raise ValueError("The summary_prompt must be a string.") - msg_list = recipient.chat_messages_for_summary(sender) - agent = sender if recipient is None else recipient - role = summary_args.get("summary_role", None) - if role and not isinstance(role, str): - raise ValueError("The summary_role in summary_arg must be a string.") - try: - summary = sender._reflection_with_llm( - prompt, msg_list, llm_agent=agent, cache=summary_args.get("cache"), role=role - ) - except BadRequestError as e: - warnings.warn( - f"Cannot extract summary using reflection_with_llm: {e}. Using an empty str as summary.", UserWarning - ) - summary = "" - return summary - - def _reflection_with_llm( - self, - prompt, - messages, - llm_agent: Optional[Agent] = None, - cache: Optional[AbstractCache] = None, - role: Union[str, None] = None, - ) -> str: - """Get a chat summary using reflection with an llm client based on the conversation history. - - Args: - prompt (str): The prompt (in this method it is used as system prompt) used to get the summary. - messages (list): The messages generated as part of a chat conversation. - llm_agent: the agent with an llm client. - cache (AbstractCache or None): the cache client to be used for this conversation. - role (str): the role of the message, usually "system" or "user". Default is "system". - """ - if not role: - role = "system" - - system_msg = [ - { - "role": role, - "content": prompt, - } - ] - - messages = messages + system_msg - if llm_agent and llm_agent.client is not None: - llm_client = llm_agent.client - elif self.client is not None: - llm_client = self.client - else: - raise ValueError("No OpenAIWrapper client is found.") - response = self._generate_oai_reply_from_client(llm_client=llm_client, messages=messages, cache=cache) - return response - - def _check_chat_queue_for_sender(self, chat_queue: List[Dict[str, Any]]) -> List[Dict[str, Any]]: - """ - Check the chat queue and add the "sender" key if it's missing. - - Args: - chat_queue (List[Dict[str, Any]]): A list of dictionaries containing chat information. - - Returns: - List[Dict[str, Any]]: A new list of dictionaries with the "sender" key added if it was missing. - """ - chat_queue_with_sender = [] - for chat_info in chat_queue: - if chat_info.get("sender") is None: - chat_info["sender"] = self - chat_queue_with_sender.append(chat_info) - return chat_queue_with_sender - - def initiate_chats(self, chat_queue: List[Dict[str, Any]]) -> List[ChatResult]: - """(Experimental) Initiate chats with multiple agents. - - Args: - chat_queue (List[Dict]): a list of dictionaries containing the information of the chats. - Each dictionary should contain the input arguments for [`initiate_chat`](conversable_agent#initiate_chat) - - Returns: a list of ChatResult objects corresponding to the finished chats in the chat_queue. - """ - _chat_queue = self._check_chat_queue_for_sender(chat_queue) - self._finished_chats = initiate_chats(_chat_queue) - return self._finished_chats - - async def a_initiate_chats(self, chat_queue: List[Dict[str, Any]]) -> Dict[int, ChatResult]: - _chat_queue = self._check_chat_queue_for_sender(chat_queue) - self._finished_chats = await a_initiate_chats(_chat_queue) - return self._finished_chats - - def get_chat_results(self, chat_index: Optional[int] = None) -> Union[List[ChatResult], ChatResult]: - """A summary from the finished chats of particular agents.""" - if chat_index is not None: - return self._finished_chats[chat_index] - else: - return self._finished_chats - - def reset(self): - """Reset the agent.""" - self.clear_history() - self.reset_consecutive_auto_reply_counter() - self.stop_reply_at_receive() - if self.client is not None: - self.client.clear_usage_summary() - for reply_func_tuple in self._reply_func_list: - if reply_func_tuple["reset_config"] is not None: - reply_func_tuple["reset_config"](reply_func_tuple["config"]) - else: - reply_func_tuple["config"] = copy.copy(reply_func_tuple["init_config"]) - - def stop_reply_at_receive(self, sender: Optional[Agent] = None): - """Reset the reply_at_receive of the sender.""" - if sender is None: - self.reply_at_receive.clear() - else: - self.reply_at_receive[sender] = False - - def reset_consecutive_auto_reply_counter(self, sender: Optional[Agent] = None): - """Reset the consecutive_auto_reply_counter of the sender.""" - if sender is None: - self._consecutive_auto_reply_counter.clear() - else: - self._consecutive_auto_reply_counter[sender] = 0 - - def clear_history(self, recipient: Optional[Agent] = None, nr_messages_to_preserve: Optional[int] = None): - """Clear the chat history of the agent. - - Args: - recipient: the agent with whom the chat history to clear. If None, clear the chat history with all agents. - nr_messages_to_preserve: the number of newest messages to preserve in the chat history. - """ - iostream = IOStream.get_default() - if recipient is None: - if nr_messages_to_preserve: - for key in self._oai_messages: - nr_messages_to_preserve_internal = nr_messages_to_preserve - # if breaking history between function call and function response, save function call message - # additionally, otherwise openai will return error - first_msg_to_save = self._oai_messages[key][-nr_messages_to_preserve_internal] - if "tool_responses" in first_msg_to_save: - nr_messages_to_preserve_internal += 1 - iostream.print( - f"Preserving one more message for {self.name} to not divide history between tool call and " - f"tool response." - ) - # Remove messages from history except last `nr_messages_to_preserve` messages. - self._oai_messages[key] = self._oai_messages[key][-nr_messages_to_preserve_internal:] - else: - self._oai_messages.clear() - else: - self._oai_messages[recipient].clear() - if nr_messages_to_preserve: - iostream.print( - colored( - "WARNING: `nr_preserved_messages` is ignored when clearing chat history with a specific agent.", - "yellow", - ), - flush=True, - ) - - def generate_oai_reply( - self, - messages: Optional[List[Dict]] = None, - sender: Optional[Agent] = None, - config: Optional[OpenAIWrapper] = None, - ) -> Tuple[bool, Union[str, Dict, None]]: - """Generate a reply using autogen.oai.""" - client = self.client if config is None else config - if client is None: - return False, None - if messages is None: - messages = self._oai_messages[sender] - extracted_response = self._generate_oai_reply_from_client( - client, self._oai_system_message + messages, self.client_cache - ) - return (False, None) if extracted_response is None else (True, extracted_response) - - def _generate_oai_reply_from_client(self, llm_client, messages, cache) -> Union[str, Dict, None]: - # unroll tool_responses - all_messages = [] - for message in messages: - tool_responses = message.get("tool_responses", []) - if tool_responses: - all_messages += tool_responses - # tool role on the parent message means the content is just concatenation of all of the tool_responses - if message.get("role") != "tool": - all_messages.append({key: message[key] for key in message if key != "tool_responses"}) - else: - all_messages.append(message) - - # TODO: #1143 handle token limit exceeded error - response = llm_client.create( - context=messages[-1].pop("context", None), messages=all_messages, cache=cache, agent=self - ) - extracted_response = llm_client.extract_text_or_completion_object(response)[0] - - if extracted_response is None: - warnings.warn(f"Extracted_response from {response} is None.", UserWarning) - return None - # ensure function and tool calls will be accepted when sent back to the LLM - if not isinstance(extracted_response, str) and hasattr(extracted_response, "model_dump"): - extracted_response = model_dump(extracted_response) - if isinstance(extracted_response, dict): - if extracted_response.get("function_call"): - extracted_response["function_call"]["name"] = self._normalize_name( - extracted_response["function_call"]["name"] - ) - for tool_call in extracted_response.get("tool_calls") or []: - tool_call["function"]["name"] = self._normalize_name(tool_call["function"]["name"]) - # Remove id and type if they are not present. - # This is to make the tool call object compatible with Mistral API. - if tool_call.get("id") is None: - tool_call.pop("id") - if tool_call.get("type") is None: - tool_call.pop("type") - return extracted_response - - async def a_generate_oai_reply( - self, - messages: Optional[List[Dict]] = None, - sender: Optional[Agent] = None, - config: Optional[Any] = None, - ) -> Tuple[bool, Union[str, Dict, None]]: - """Generate a reply using autogen.oai asynchronously.""" - iostream = IOStream.get_default() - - def _generate_oai_reply( - self, iostream: IOStream, *args: Any, **kwargs: Any - ) -> Tuple[bool, Union[str, Dict, None]]: - with IOStream.set_default(iostream): - return self.generate_oai_reply(*args, **kwargs) - - return await asyncio.get_event_loop().run_in_executor( - None, - functools.partial( - _generate_oai_reply, self=self, iostream=iostream, messages=messages, sender=sender, config=config - ), - ) - - def _generate_code_execution_reply_using_executor( - self, - messages: Optional[List[Dict]] = None, - sender: Optional[Agent] = None, - config: Optional[Union[Dict, Literal[False]]] = None, - ): - """Generate a reply using code executor.""" - iostream = IOStream.get_default() - - if config is not None: - raise ValueError("config is not supported for _generate_code_execution_reply_using_executor.") - if self._code_execution_config is False: - return False, None - if messages is None: - messages = self._oai_messages[sender] - last_n_messages = self._code_execution_config.get("last_n_messages", "auto") - - if not (isinstance(last_n_messages, (int, float)) and last_n_messages >= 0) and last_n_messages != "auto": - raise ValueError("last_n_messages must be either a non-negative integer, or the string 'auto'.") - - num_messages_to_scan = last_n_messages - if last_n_messages == "auto": - # Find when the agent last spoke - num_messages_to_scan = 0 - for message in reversed(messages): - if "role" not in message: - break - elif message["role"] != "user": - break - else: - num_messages_to_scan += 1 - num_messages_to_scan = min(len(messages), num_messages_to_scan) - messages_to_scan = messages[-num_messages_to_scan:] - - # iterate through the last n messages in reverse - # if code blocks are found, execute the code blocks and return the output - # if no code blocks are found, continue - for message in reversed(messages_to_scan): - if not message["content"]: - continue - code_blocks = self._code_executor.code_extractor.extract_code_blocks(message["content"]) - if len(code_blocks) == 0: - continue - - num_code_blocks = len(code_blocks) - if num_code_blocks == 1: - iostream.print( - colored( - f"\n>>>>>>>> EXECUTING CODE BLOCK (inferred language is {code_blocks[0].language})...", - "red", - ), - flush=True, - ) - else: - iostream.print( - colored( - f"\n>>>>>>>> EXECUTING {num_code_blocks} CODE BLOCKS (inferred languages are [{', '.join([x.language for x in code_blocks])}])...", - "red", - ), - flush=True, - ) - - # found code blocks, execute code. - code_result = self._code_executor.execute_code_blocks(code_blocks) - exitcode2str = "execution succeeded" if code_result.exit_code == 0 else "execution failed" - return True, f"exitcode: {code_result.exit_code} ({exitcode2str})\nCode output: {code_result.output}" - - return False, None - - def generate_code_execution_reply( - self, - messages: Optional[List[Dict]] = None, - sender: Optional[Agent] = None, - config: Optional[Union[Dict, Literal[False]]] = None, - ): - """Generate a reply using code execution.""" - code_execution_config = config if config is not None else self._code_execution_config - if code_execution_config is False: - return False, None - if messages is None: - messages = self._oai_messages[sender] - last_n_messages = code_execution_config.pop("last_n_messages", "auto") - - if not (isinstance(last_n_messages, (int, float)) and last_n_messages >= 0) and last_n_messages != "auto": - raise ValueError("last_n_messages must be either a non-negative integer, or the string 'auto'.") - - messages_to_scan = last_n_messages - if last_n_messages == "auto": - # Find when the agent last spoke - messages_to_scan = 0 - for i in range(len(messages)): - message = messages[-(i + 1)] - if "role" not in message: - break - elif message["role"] != "user": - break - else: - messages_to_scan += 1 - - # iterate through the last n messages in reverse - # if code blocks are found, execute the code blocks and return the output - # if no code blocks are found, continue - for i in range(min(len(messages), messages_to_scan)): - message = messages[-(i + 1)] - if not message["content"]: - continue - code_blocks = extract_code(message["content"]) - if len(code_blocks) == 1 and code_blocks[0][0] == UNKNOWN: - continue - - # found code blocks, execute code and push "last_n_messages" back - exitcode, logs = self.execute_code_blocks(code_blocks) - code_execution_config["last_n_messages"] = last_n_messages - exitcode2str = "execution succeeded" if exitcode == 0 else "execution failed" - return True, f"exitcode: {exitcode} ({exitcode2str})\nCode output: {logs}" - - # no code blocks are found, push last_n_messages back and return. - code_execution_config["last_n_messages"] = last_n_messages - - return False, None - - def generate_function_call_reply( - self, - messages: Optional[List[Dict]] = None, - sender: Optional[Agent] = None, - config: Optional[Any] = None, - ) -> Tuple[bool, Union[Dict, None]]: - """ - Generate a reply using function call. - - "function_call" replaced by "tool_calls" as of [OpenAI API v1.1.0](https://github.com/openai/openai-python/releases/tag/v1.1.0) - See https://platform.openai.com/docs/api-reference/chat/create#chat-create-functions - """ - if config is None: - config = self - if messages is None: - messages = self._oai_messages[sender] - message = messages[-1] - if "function_call" in message and message["function_call"]: - func_call = message["function_call"] - func = self._function_map.get(func_call.get("name", None), None) - if inspect.iscoroutinefunction(func): - try: - # get the running loop if it was already created - loop = asyncio.get_running_loop() - close_loop = False - except RuntimeError: - # create a loop if there is no running loop - loop = asyncio.new_event_loop() - close_loop = True - - _, func_return = loop.run_until_complete(self.a_execute_function(func_call)) - if close_loop: - loop.close() - else: - _, func_return = self.execute_function(message["function_call"]) - return True, func_return - return False, None - - async def a_generate_function_call_reply( - self, - messages: Optional[List[Dict]] = None, - sender: Optional[Agent] = None, - config: Optional[Any] = None, - ) -> Tuple[bool, Union[Dict, None]]: - """ - Generate a reply using async function call. - - "function_call" replaced by "tool_calls" as of [OpenAI API v1.1.0](https://github.com/openai/openai-python/releases/tag/v1.1.0) - See https://platform.openai.com/docs/api-reference/chat/create#chat-create-functions - """ - if config is None: - config = self - if messages is None: - messages = self._oai_messages[sender] - message = messages[-1] - func_call = message.get("function_call") - if func_call: - func_name = func_call.get("name", "") - func = self._function_map.get(func_name, None) - if func and inspect.iscoroutinefunction(func): - _, func_return = await self.a_execute_function(func_call) - else: - _, func_return = self.execute_function(func_call) - return True, func_return - - return False, None - - def _str_for_tool_response(self, tool_response): - return str(tool_response.get("content", "")) - - def generate_tool_calls_reply( - self, - messages: Optional[List[Dict]] = None, - sender: Optional[Agent] = None, - config: Optional[Any] = None, - ) -> Tuple[bool, Union[Dict, None]]: - """Generate a reply using tool call.""" - if config is None: - config = self - if messages is None: - messages = self._oai_messages[sender] - message = messages[-1] - tool_returns = [] - for tool_call in message.get("tool_calls", []): - function_call = tool_call.get("function", {}) - func = self._function_map.get(function_call.get("name", None), None) - if inspect.iscoroutinefunction(func): - try: - # get the running loop if it was already created - loop = asyncio.get_running_loop() - close_loop = False - except RuntimeError: - # create a loop if there is no running loop - loop = asyncio.new_event_loop() - close_loop = True - - _, func_return = loop.run_until_complete(self.a_execute_function(function_call)) - if close_loop: - loop.close() - else: - _, func_return = self.execute_function(function_call) - content = func_return.get("content", "") - if content is None: - content = "" - tool_call_id = tool_call.get("id", None) - if tool_call_id is not None: - tool_call_response = { - "tool_call_id": tool_call_id, - "role": "tool", - "content": content, - } - else: - # Do not include tool_call_id if it is not present. - # This is to make the tool call object compatible with Mistral API. - tool_call_response = { - "role": "tool", - "content": content, - } - tool_returns.append(tool_call_response) - if tool_returns: - return True, { - "role": "tool", - "tool_responses": tool_returns, - "content": "\n\n".join([self._str_for_tool_response(tool_return) for tool_return in tool_returns]), - } - return False, None - - async def _a_execute_tool_call(self, tool_call): - id = tool_call["id"] - function_call = tool_call.get("function", {}) - _, func_return = await self.a_execute_function(function_call) - return { - "tool_call_id": id, - "role": "tool", - "content": func_return.get("content", ""), - } - - async def a_generate_tool_calls_reply( - self, - messages: Optional[List[Dict]] = None, - sender: Optional[Agent] = None, - config: Optional[Any] = None, - ) -> Tuple[bool, Union[Dict, None]]: - """Generate a reply using async function call.""" - if config is None: - config = self - if messages is None: - messages = self._oai_messages[sender] - message = messages[-1] - async_tool_calls = [] - for tool_call in message.get("tool_calls", []): - async_tool_calls.append(self._a_execute_tool_call(tool_call)) - if async_tool_calls: - tool_returns = await asyncio.gather(*async_tool_calls) - return True, { - "role": "tool", - "tool_responses": tool_returns, - "content": "\n\n".join([self._str_for_tool_response(tool_return) for tool_return in tool_returns]), - } - - return False, None - - def check_termination_and_human_reply( - self, - messages: Optional[List[Dict]] = None, - sender: Optional[Agent] = None, - config: Optional[Any] = None, - ) -> Tuple[bool, Union[str, None]]: - """Check if the conversation should be terminated, and if human reply is provided. - - This method checks for conditions that require the conversation to be terminated, such as reaching - a maximum number of consecutive auto-replies or encountering a termination message. Additionally, - it prompts for and processes human input based on the configured human input mode, which can be - 'ALWAYS', 'NEVER', or 'TERMINATE'. The method also manages the consecutive auto-reply counter - for the conversation and prints relevant messages based on the human input received. - - Args: - - messages (Optional[List[Dict]]): A list of message dictionaries, representing the conversation history. - - sender (Optional[Agent]): The agent object representing the sender of the message. - - config (Optional[Any]): Configuration object, defaults to the current instance if not provided. - - Returns: - - Tuple[bool, Union[str, Dict, None]]: A tuple containing a boolean indicating if the conversation - should be terminated, and a human reply which can be a string, a dictionary, or None. - """ - iostream = IOStream.get_default() - - if config is None: - config = self - if messages is None: - messages = self._oai_messages[sender] if sender else [] - message = messages[-1] - reply = "" - no_human_input_msg = "" - sender_name = "the sender" if sender is None else sender.name - if self.human_input_mode == "ALWAYS": - reply = self.get_human_input( - f"Replying as {self.name}. Provide feedback to {sender_name}. Press enter to skip and use auto-reply, or type 'exit' to end the conversation: " - ) - no_human_input_msg = "NO HUMAN INPUT RECEIVED." if not reply else "" - # if the human input is empty, and the message is a termination message, then we will terminate the conversation - reply = reply if reply or not self._is_termination_msg(message) else "exit" - else: - if self._consecutive_auto_reply_counter[sender] >= self._max_consecutive_auto_reply_dict[sender]: - if self.human_input_mode == "NEVER": - reply = "exit" - else: - # self.human_input_mode == "TERMINATE": - terminate = self._is_termination_msg(message) - reply = self.get_human_input( - f"Please give feedback to {sender_name}. Press enter or type 'exit' to stop the conversation: " - if terminate - else f"Please give feedback to {sender_name}. Press enter to skip and use auto-reply, or type 'exit' to stop the conversation: " - ) - no_human_input_msg = "NO HUMAN INPUT RECEIVED." if not reply else "" - # if the human input is empty, and the message is a termination message, then we will terminate the conversation - reply = reply if reply or not terminate else "exit" - elif self._is_termination_msg(message): - if self.human_input_mode == "NEVER": - reply = "exit" - else: - # self.human_input_mode == "TERMINATE": - reply = self.get_human_input( - f"Please give feedback to {sender_name}. Press enter or type 'exit' to stop the conversation: " - ) - no_human_input_msg = "NO HUMAN INPUT RECEIVED." if not reply else "" - # if the human input is empty, and the message is a termination message, then we will terminate the conversation - reply = reply or "exit" - - # print the no_human_input_msg - if no_human_input_msg: - iostream.print(colored(f"\n>>>>>>>> {no_human_input_msg}", "red"), flush=True) - - # stop the conversation - if reply == "exit": - # reset the consecutive_auto_reply_counter - self._consecutive_auto_reply_counter[sender] = 0 - return True, None - - # send the human reply - if reply or self._max_consecutive_auto_reply_dict[sender] == 0: - # reset the consecutive_auto_reply_counter - self._consecutive_auto_reply_counter[sender] = 0 - # User provided a custom response, return function and tool failures indicating user interruption - tool_returns = [] - if message.get("function_call", False): - tool_returns.append( - { - "role": "function", - "name": message["function_call"].get("name", ""), - "content": "USER INTERRUPTED", - } - ) - - if message.get("tool_calls", False): - tool_returns.extend( - [ - {"role": "tool", "tool_call_id": tool_call.get("id", ""), "content": "USER INTERRUPTED"} - for tool_call in message["tool_calls"] - ] - ) - - response = {"role": "user", "content": reply} - if tool_returns: - response["tool_responses"] = tool_returns - - return True, response - - # increment the consecutive_auto_reply_counter - self._consecutive_auto_reply_counter[sender] += 1 - if self.human_input_mode != "NEVER": - iostream.print(colored("\n>>>>>>>> USING AUTO REPLY...", "red"), flush=True) - - return False, None - - async def a_check_termination_and_human_reply( - self, - messages: Optional[List[Dict]] = None, - sender: Optional[Agent] = None, - config: Optional[Any] = None, - ) -> Tuple[bool, Union[str, None]]: - """(async) Check if the conversation should be terminated, and if human reply is provided. - - This method checks for conditions that require the conversation to be terminated, such as reaching - a maximum number of consecutive auto-replies or encountering a termination message. Additionally, - it prompts for and processes human input based on the configured human input mode, which can be - 'ALWAYS', 'NEVER', or 'TERMINATE'. The method also manages the consecutive auto-reply counter - for the conversation and prints relevant messages based on the human input received. - - Args: - - messages (Optional[List[Dict]]): A list of message dictionaries, representing the conversation history. - - sender (Optional[Agent]): The agent object representing the sender of the message. - - config (Optional[Any]): Configuration object, defaults to the current instance if not provided. - - Returns: - - Tuple[bool, Union[str, Dict, None]]: A tuple containing a boolean indicating if the conversation - should be terminated, and a human reply which can be a string, a dictionary, or None. - """ - iostream = IOStream.get_default() - - if config is None: - config = self - if messages is None: - messages = self._oai_messages[sender] if sender else [] - message = messages[-1] if messages else {} - reply = "" - no_human_input_msg = "" - sender_name = "the sender" if sender is None else sender.name - if self.human_input_mode == "ALWAYS": - reply = await self.a_get_human_input( - f"Replying as {self.name}. Provide feedback to {sender_name}. Press enter to skip and use auto-reply, or type 'exit' to end the conversation: " - ) - no_human_input_msg = "NO HUMAN INPUT RECEIVED." if not reply else "" - # if the human input is empty, and the message is a termination message, then we will terminate the conversation - reply = reply if reply or not self._is_termination_msg(message) else "exit" - else: - if self._consecutive_auto_reply_counter[sender] >= self._max_consecutive_auto_reply_dict[sender]: - if self.human_input_mode == "NEVER": - reply = "exit" - else: - # self.human_input_mode == "TERMINATE": - terminate = self._is_termination_msg(message) - reply = await self.a_get_human_input( - f"Please give feedback to {sender_name}. Press enter or type 'exit' to stop the conversation: " - if terminate - else f"Please give feedback to {sender_name}. Press enter to skip and use auto-reply, or type 'exit' to stop the conversation: " - ) - no_human_input_msg = "NO HUMAN INPUT RECEIVED." if not reply else "" - # if the human input is empty, and the message is a termination message, then we will terminate the conversation - reply = reply if reply or not terminate else "exit" - elif self._is_termination_msg(message): - if self.human_input_mode == "NEVER": - reply = "exit" - else: - # self.human_input_mode == "TERMINATE": - reply = await self.a_get_human_input( - f"Please give feedback to {sender_name}. Press enter or type 'exit' to stop the conversation: " - ) - no_human_input_msg = "NO HUMAN INPUT RECEIVED." if not reply else "" - # if the human input is empty, and the message is a termination message, then we will terminate the conversation - reply = reply or "exit" - - # print the no_human_input_msg - if no_human_input_msg: - iostream.print(colored(f"\n>>>>>>>> {no_human_input_msg}", "red"), flush=True) - - # stop the conversation - if reply == "exit": - # reset the consecutive_auto_reply_counter - self._consecutive_auto_reply_counter[sender] = 0 - return True, None - - # send the human reply - if reply or self._max_consecutive_auto_reply_dict[sender] == 0: - # User provided a custom response, return function and tool results indicating user interruption - # reset the consecutive_auto_reply_counter - self._consecutive_auto_reply_counter[sender] = 0 - tool_returns = [] - if message.get("function_call", False): - tool_returns.append( - { - "role": "function", - "name": message["function_call"].get("name", ""), - "content": "USER INTERRUPTED", - } - ) - - if message.get("tool_calls", False): - tool_returns.extend( - [ - {"role": "tool", "tool_call_id": tool_call.get("id", ""), "content": "USER INTERRUPTED"} - for tool_call in message["tool_calls"] - ] - ) - - response = {"role": "user", "content": reply} - if tool_returns: - response["tool_responses"] = tool_returns - - return True, response - - # increment the consecutive_auto_reply_counter - self._consecutive_auto_reply_counter[sender] += 1 - if self.human_input_mode != "NEVER": - iostream.print(colored("\n>>>>>>>> USING AUTO REPLY...", "red"), flush=True) - - return False, None - - def generate_reply( - self, - messages: Optional[List[Dict[str, Any]]] = None, - sender: Optional["Agent"] = None, - **kwargs: Any, - ) -> Union[str, Dict, None]: - """Reply based on the conversation history and the sender. - - Either messages or sender must be provided. - Register a reply_func with `None` as one trigger for it to be activated when `messages` is non-empty and `sender` is `None`. - Use registered auto reply functions to generate replies. - By default, the following functions are checked in order: - 1. check_termination_and_human_reply - 2. generate_function_call_reply (deprecated in favor of tool_calls) - 3. generate_tool_calls_reply - 4. generate_code_execution_reply - 5. generate_oai_reply - Every function returns a tuple (final, reply). - When a function returns final=False, the next function will be checked. - So by default, termination and human reply will be checked first. - If not terminating and human reply is skipped, execute function or code and return the result. - AI replies are generated only when no code execution is performed. - - Args: - messages: a list of messages in the conversation history. - sender: sender of an Agent instance. - - Additional keyword arguments: - exclude (List[Callable]): a list of reply functions to be excluded. - - Returns: - str or dict or None: reply. None if no reply is generated. - """ - if all((messages is None, sender is None)): - error_msg = f"Either {messages=} or {sender=} must be provided." - logger.error(error_msg) - raise AssertionError(error_msg) - - if messages is None: - messages = self._oai_messages[sender] - - # Call the hookable method that gives registered hooks a chance to process the last message. - # Message modifications do not affect the incoming messages or self._oai_messages. - messages = self.process_last_received_message(messages) - - # Call the hookable method that gives registered hooks a chance to process all messages. - # Message modifications do not affect the incoming messages or self._oai_messages. - messages = self.process_all_messages_before_reply(messages) - - for reply_func_tuple in self._reply_func_list: - reply_func = reply_func_tuple["reply_func"] - if "exclude" in kwargs and reply_func in kwargs["exclude"]: - continue - if inspect.iscoroutinefunction(reply_func): - continue - if self._match_trigger(reply_func_tuple["trigger"], sender): - final, reply = reply_func(self, messages=messages, sender=sender, config=reply_func_tuple["config"]) - if logging_enabled(): - log_event( - self, - "reply_func_executed", - reply_func_module=reply_func.__module__, - reply_func_name=reply_func.__name__, - final=final, - reply=reply, - ) - if final: - return reply - return self._default_auto_reply - - async def a_generate_reply( - self, - messages: Optional[List[Dict[str, Any]]] = None, - sender: Optional["Agent"] = None, - **kwargs: Any, - ) -> Union[str, Dict[str, Any], None]: - """(async) Reply based on the conversation history and the sender. - - Either messages or sender must be provided. - Register a reply_func with `None` as one trigger for it to be activated when `messages` is non-empty and `sender` is `None`. - Use registered auto reply functions to generate replies. - By default, the following functions are checked in order: - 1. check_termination_and_human_reply - 2. generate_function_call_reply - 3. generate_tool_calls_reply - 4. generate_code_execution_reply - 5. generate_oai_reply - Every function returns a tuple (final, reply). - When a function returns final=False, the next function will be checked. - So by default, termination and human reply will be checked first. - If not terminating and human reply is skipped, execute function or code and return the result. - AI replies are generated only when no code execution is performed. - - Args: - messages: a list of messages in the conversation history. - sender: sender of an Agent instance. - - Additional keyword arguments: - exclude (List[Callable]): a list of reply functions to be excluded. - - Returns: - str or dict or None: reply. None if no reply is generated. - """ - if all((messages is None, sender is None)): - error_msg = f"Either {messages=} or {sender=} must be provided." - logger.error(error_msg) - raise AssertionError(error_msg) - - if messages is None: - messages = self._oai_messages[sender] - - # Call the hookable method that gives registered hooks a chance to process all messages. - # Message modifications do not affect the incoming messages or self._oai_messages. - messages = self.process_all_messages_before_reply(messages) - - # Call the hookable method that gives registered hooks a chance to process the last message. - # Message modifications do not affect the incoming messages or self._oai_messages. - messages = self.process_last_received_message(messages) - - for reply_func_tuple in self._reply_func_list: - reply_func = reply_func_tuple["reply_func"] - if "exclude" in kwargs and reply_func in kwargs["exclude"]: - continue - - if self._match_trigger(reply_func_tuple["trigger"], sender): - if inspect.iscoroutinefunction(reply_func): - final, reply = await reply_func( - self, messages=messages, sender=sender, config=reply_func_tuple["config"] - ) - else: - final, reply = reply_func(self, messages=messages, sender=sender, config=reply_func_tuple["config"]) - if final: - return reply - return self._default_auto_reply - - def _match_trigger(self, trigger: Union[None, str, type, Agent, Callable, List], sender: Optional[Agent]) -> bool: - """Check if the sender matches the trigger. - - Args: - - trigger (Union[None, str, type, Agent, Callable, List]): The condition to match against the sender. - Can be `None`, string, type, `Agent` instance, callable, or a list of these. - - sender (Agent): The sender object or type to be matched against the trigger. - - Returns: - - bool: Returns `True` if the sender matches the trigger, otherwise `False`. - - Raises: - - ValueError: If the trigger type is unsupported. - """ - if trigger is None: - return sender is None - elif isinstance(trigger, str): - if sender is None: - raise SenderRequired() - return trigger == sender.name - elif isinstance(trigger, type): - return isinstance(sender, trigger) - elif isinstance(trigger, Agent): - # return True if the sender is the same type (class) as the trigger - return trigger == sender - elif isinstance(trigger, Callable): - rst = trigger(sender) - assert isinstance(rst, bool), f"trigger {trigger} must return a boolean value." - return rst - elif isinstance(trigger, list): - return any(self._match_trigger(t, sender) for t in trigger) - else: - raise ValueError(f"Unsupported trigger type: {type(trigger)}") - - def get_human_input(self, prompt: str) -> str: - """Get human input. - - Override this method to customize the way to get human input. - - Args: - prompt (str): prompt for the human input. - - Returns: - str: human input. - """ - iostream = IOStream.get_default() - - reply = iostream.input(prompt) - self._human_input.append(reply) - return reply - - async def a_get_human_input(self, prompt: str) -> str: - """(Async) Get human input. - - Override this method to customize the way to get human input. - - Args: - prompt (str): prompt for the human input. - - Returns: - str: human input. - """ - loop = asyncio.get_running_loop() - reply = await loop.run_in_executor(None, functools.partial(self.get_human_input, prompt)) - return reply - - def run_code(self, code, **kwargs): - """Run the code and return the result. - - Override this function to modify the way to run the code. - Args: - code (str): the code to be executed. - **kwargs: other keyword arguments. - - Returns: - A tuple of (exitcode, logs, image). - exitcode (int): the exit code of the code execution. - logs (str): the logs of the code execution. - image (str or None): the docker image used for the code execution. - """ - return execute_code(code, **kwargs) - - def execute_code_blocks(self, code_blocks): - """Execute the code blocks and return the result.""" - iostream = IOStream.get_default() - - logs_all = "" - for i, code_block in enumerate(code_blocks): - lang, code = code_block - if not lang: - lang = infer_lang(code) - iostream.print( - colored( - f"\n>>>>>>>> EXECUTING CODE BLOCK {i} (inferred language is {lang})...", - "red", - ), - flush=True, - ) - if lang in ["bash", "shell", "sh"]: - exitcode, logs, image = self.run_code(code, lang=lang, **self._code_execution_config) - elif lang in PYTHON_VARIANTS: - if code.startswith("# filename: "): - filename = code[11 : code.find("\n")].strip() - else: - filename = None - exitcode, logs, image = self.run_code( - code, - lang="python", - filename=filename, - **self._code_execution_config, - ) - else: - # In case the language is not supported, we return an error message. - exitcode, logs, image = ( - 1, - f"unknown language {lang}", - None, - ) - # raise NotImplementedError - if image is not None: - self._code_execution_config["use_docker"] = image - logs_all += "\n" + logs - if exitcode != 0: - return exitcode, logs_all - return exitcode, logs_all - - @staticmethod - def _format_json_str(jstr): - """Remove newlines outside of quotes, and handle JSON escape sequences. - - 1. this function removes the newline in the query outside of quotes otherwise json.loads(s) will fail. - Ex 1: - "{\n"tool": "python",\n"query": "print('hello')\nprint('world')"\n}" -> "{"tool": "python","query": "print('hello')\nprint('world')"}" - Ex 2: - "{\n \"location\": \"Boston, MA\"\n}" -> "{"location": "Boston, MA"}" - - 2. this function also handles JSON escape sequences inside quotes. - Ex 1: - '{"args": "a\na\na\ta"}' -> '{"args": "a\\na\\na\\ta"}' - """ - result = [] - inside_quotes = False - last_char = " " - for char in jstr: - if last_char != "\\" and char == '"': - inside_quotes = not inside_quotes - last_char = char - if not inside_quotes and char == "\n": - continue - if inside_quotes and char == "\n": - char = "\\n" - if inside_quotes and char == "\t": - char = "\\t" - result.append(char) - return "".join(result) - - def execute_function(self, func_call, verbose: bool = False) -> Tuple[bool, Dict[str, str]]: - """Execute a function call and return the result. - - Override this function to modify the way to execute function and tool calls. - - Args: - func_call: a dictionary extracted from openai message at "function_call" or "tool_calls" with keys "name" and "arguments". - - Returns: - A tuple of (is_exec_success, result_dict). - is_exec_success (boolean): whether the execution is successful. - result_dict: a dictionary with keys "name", "role", and "content". Value of "role" is "function". - - "function_call" deprecated as of [OpenAI API v1.1.0](https://github.com/openai/openai-python/releases/tag/v1.1.0) - See https://platform.openai.com/docs/api-reference/chat/create#chat-create-function_call - """ - iostream = IOStream.get_default() - - func_name = func_call.get("name", "") - func = self._function_map.get(func_name, None) - - is_exec_success = False - if func is not None: - # Extract arguments from a json-like string and put it into a dict. - input_string = self._format_json_str(func_call.get("arguments", "{}")) - try: - arguments = json.loads(input_string) - except json.JSONDecodeError as e: - arguments = None - content = f"Error: {e}\n The argument must be in JSON format." - - # Try to execute the function - if arguments is not None: - iostream.print( - colored(f"\n>>>>>>>> EXECUTING FUNCTION {func_name}...", "magenta"), - flush=True, - ) - try: - content = func(**arguments) - is_exec_success = True - except Exception as e: - content = f"Error: {e}" - else: - content = f"Error: Function {func_name} not found." - - if verbose: - iostream.print( - colored(f"\nInput arguments: {arguments}\nOutput:\n{content}", "magenta"), - flush=True, - ) - - return is_exec_success, { - "name": func_name, - "role": "function", - "content": str(content), - } - - async def a_execute_function(self, func_call): - """Execute an async function call and return the result. - - Override this function to modify the way async functions and tools are executed. - - Args: - func_call: a dictionary extracted from openai message at key "function_call" or "tool_calls" with keys "name" and "arguments". - - Returns: - A tuple of (is_exec_success, result_dict). - is_exec_success (boolean): whether the execution is successful. - result_dict: a dictionary with keys "name", "role", and "content". Value of "role" is "function". - - "function_call" deprecated as of [OpenAI API v1.1.0](https://github.com/openai/openai-python/releases/tag/v1.1.0) - See https://platform.openai.com/docs/api-reference/chat/create#chat-create-function_call - """ - iostream = IOStream.get_default() - - func_name = func_call.get("name", "") - func = self._function_map.get(func_name, None) - - is_exec_success = False - if func is not None: - # Extract arguments from a json-like string and put it into a dict. - input_string = self._format_json_str(func_call.get("arguments", "{}")) - try: - arguments = json.loads(input_string) - except json.JSONDecodeError as e: - arguments = None - content = f"Error: {e}\n The argument must be in JSON format." - - # Try to execute the function - if arguments is not None: - iostream.print( - colored(f"\n>>>>>>>> EXECUTING ASYNC FUNCTION {func_name}...", "magenta"), - flush=True, - ) - try: - if inspect.iscoroutinefunction(func): - content = await func(**arguments) - else: - # Fallback to sync function if the function is not async - content = func(**arguments) - is_exec_success = True - except Exception as e: - content = f"Error: {e}" - else: - content = f"Error: Function {func_name} not found." - - return is_exec_success, { - "name": func_name, - "role": "function", - "content": str(content), - } - - def generate_init_message(self, message: Union[Dict, str, None], **kwargs) -> Union[str, Dict]: - """Generate the initial message for the agent. - If message is None, input() will be called to get the initial message. - - Args: - message (str or None): the message to be processed. - **kwargs: any additional information. It has the following reserved fields: - "carryover": a string or a list of string to specify the carryover information to be passed to this chat. It can be a string or a list of string. - If provided, we will combine this carryover with the "message" content when generating the initial chat - message. - Returns: - str or dict: the processed message. - """ - if message is None: - message = self.get_human_input(">") - - return self._handle_carryover(message, kwargs) - - def _handle_carryover(self, message: Union[str, Dict], kwargs: dict) -> Union[str, Dict]: - if not kwargs.get("carryover"): - return message - - if isinstance(message, str): - return self._process_carryover(message, kwargs) - - elif isinstance(message, dict): - if isinstance(message.get("content"), str): - # Makes sure the original message is not mutated - message = message.copy() - message["content"] = self._process_carryover(message["content"], kwargs) - elif isinstance(message.get("content"), list): - # Makes sure the original message is not mutated - message = message.copy() - message["content"] = self._process_multimodal_carryover(message["content"], kwargs) - else: - raise InvalidCarryOverType("Carryover should be a string or a list of strings.") - - return message - - def _process_carryover(self, content: str, kwargs: dict) -> str: - # Makes sure there's a carryover - if not kwargs.get("carryover"): - return content - - # if carryover is string - if isinstance(kwargs["carryover"], str): - content += "\nContext: \n" + kwargs["carryover"] - elif isinstance(kwargs["carryover"], list): - content += "\nContext: \n" + ("\n").join([_post_process_carryover_item(t) for t in kwargs["carryover"]]) - else: - raise InvalidCarryOverType( - "Carryover should be a string or a list of strings. Not adding carryover to the message." - ) - return content - - def _process_multimodal_carryover(self, content: List[Dict], kwargs: dict) -> List[Dict]: - """Prepends the context to a multimodal message.""" - # Makes sure there's a carryover - if not kwargs.get("carryover"): - return content - - return [{"type": "text", "text": self._process_carryover("", kwargs)}] + content - - async def a_generate_init_message(self, message: Union[Dict, str, None], **kwargs) -> Union[str, Dict]: - """Generate the initial message for the agent. - If message is None, input() will be called to get the initial message. - - Args: - Please refer to `generate_init_message` for the description of the arguments. - - Returns: - str or dict: the processed message. - """ - if message is None: - message = await self.a_get_human_input(">") - - return self._handle_carryover(message, kwargs) - - def register_function(self, function_map: Dict[str, Union[Callable, None]]): - """Register functions to the agent. - - Args: - function_map: a dictionary mapping function names to functions. if function_map[name] is None, the function will be removed from the function_map. - """ - for name, func in function_map.items(): - self._assert_valid_name(name) - if func is None and name not in self._function_map.keys(): - warnings.warn(f"The function {name} to remove doesn't exist", name) - if name in self._function_map: - warnings.warn(f"Function '{name}' is being overridden.", UserWarning) - self._function_map.update(function_map) - self._function_map = {k: v for k, v in self._function_map.items() if v is not None} - - def update_function_signature(self, func_sig: Union[str, Dict], is_remove: None): - """update a function_signature in the LLM configuration for function_call. - - Args: - func_sig (str or dict): description/name of the function to update/remove to the model. See: https://platform.openai.com/docs/api-reference/chat/create#chat/create-functions - is_remove: whether removing the function from llm_config with name 'func_sig' - - Deprecated as of [OpenAI API v1.1.0](https://github.com/openai/openai-python/releases/tag/v1.1.0) - See https://platform.openai.com/docs/api-reference/chat/create#chat-create-function_call - """ - - if not isinstance(self.llm_config, dict): - error_msg = "To update a function signature, agent must have an llm_config" - logger.error(error_msg) - raise AssertionError(error_msg) - - if is_remove: - if "functions" not in self.llm_config.keys(): - error_msg = "The agent config doesn't have function {name}.".format(name=func_sig) - logger.error(error_msg) - raise AssertionError(error_msg) - else: - self.llm_config["functions"] = [ - func for func in self.llm_config["functions"] if func["name"] != func_sig - ] - else: - if not isinstance(func_sig, dict): - raise ValueError( - f"The function signature must be of the type dict. Received function signature type {type(func_sig)}" - ) - - self._assert_valid_name(func_sig["name"]) - if "functions" in self.llm_config.keys(): - if any(func["name"] == func_sig["name"] for func in self.llm_config["functions"]): - warnings.warn(f"Function '{func_sig['name']}' is being overridden.", UserWarning) - - self.llm_config["functions"] = [ - func for func in self.llm_config["functions"] if func.get("name") != func_sig["name"] - ] + [func_sig] - else: - self.llm_config["functions"] = [func_sig] - - if len(self.llm_config["functions"]) == 0: - del self.llm_config["functions"] - - self.client = OpenAIWrapper(**self.llm_config) - - def update_tool_signature(self, tool_sig: Union[str, Dict], is_remove: None): - """update a tool_signature in the LLM configuration for tool_call. - - Args: - tool_sig (str or dict): description/name of the tool to update/remove to the model. See: https://platform.openai.com/docs/api-reference/chat/create#chat-create-tools - is_remove: whether removing the tool from llm_config with name 'tool_sig' - """ - - if not self.llm_config: - error_msg = "To update a tool signature, agent must have an llm_config" - logger.error(error_msg) - raise AssertionError(error_msg) - - if is_remove: - if "tools" not in self.llm_config.keys(): - error_msg = "The agent config doesn't have tool {name}.".format(name=tool_sig) - logger.error(error_msg) - raise AssertionError(error_msg) - else: - self.llm_config["tools"] = [ - tool for tool in self.llm_config["tools"] if tool["function"]["name"] != tool_sig - ] - else: - if not isinstance(tool_sig, dict): - raise ValueError( - f"The tool signature must be of the type dict. Received tool signature type {type(tool_sig)}" - ) - self._assert_valid_name(tool_sig["function"]["name"]) - if "tools" in self.llm_config: - if any(tool["function"]["name"] == tool_sig["function"]["name"] for tool in self.llm_config["tools"]): - warnings.warn(f"Function '{tool_sig['function']['name']}' is being overridden.", UserWarning) - self.llm_config["tools"] = [ - tool - for tool in self.llm_config["tools"] - if tool.get("function", {}).get("name") != tool_sig["function"]["name"] - ] + [tool_sig] - else: - self.llm_config["tools"] = [tool_sig] - - if len(self.llm_config["tools"]) == 0: - del self.llm_config["tools"] - - self.client = OpenAIWrapper(**self.llm_config) - - def can_execute_function(self, name: Union[List[str], str]) -> bool: - """Whether the agent can execute the function.""" - names = name if isinstance(name, list) else [name] - return all([n in self._function_map for n in names]) - - @property - def function_map(self) -> Dict[str, Callable]: - """Return the function map.""" - return self._function_map - - def _wrap_function(self, func: F) -> F: - """Wrap the function to dump the return value to json. - - Handles both sync and async functions. - - Args: - func: the function to be wrapped. - - Returns: - The wrapped function. - """ - - @load_basemodels_if_needed - @functools.wraps(func) - def _wrapped_func(*args, **kwargs): - retval = func(*args, **kwargs) - if logging_enabled(): - log_function_use(self, func, kwargs, retval) - return serialize_to_str(retval) - - @load_basemodels_if_needed - @functools.wraps(func) - async def _a_wrapped_func(*args, **kwargs): - retval = await func(*args, **kwargs) - if logging_enabled(): - log_function_use(self, func, kwargs, retval) - return serialize_to_str(retval) - - wrapped_func = _a_wrapped_func if inspect.iscoroutinefunction(func) else _wrapped_func - - # needed for testing - wrapped_func._origin = func - - return wrapped_func - - def register_for_llm( - self, - *, - name: Optional[str] = None, - description: Optional[str] = None, - api_style: Literal["function", "tool"] = "tool", - ) -> Callable[[F], F]: - """Decorator factory for registering a function to be used by an agent. - - It's return value is used to decorate a function to be registered to the agent. The function uses type hints to - specify the arguments and return type. The function name is used as the default name for the function, - but a custom name can be provided. The function description is used to describe the function in the - agent's configuration. - - Args: - name (optional(str)): name of the function. If None, the function name will be used (default: None). - description (optional(str)): description of the function (default: None). It is mandatory - for the initial decorator, but the following ones can omit it. - api_style: (literal): the API style for function call. - For Azure OpenAI API, use version 2023-12-01-preview or later. - `"function"` style will be deprecated. For earlier version use - `"function"` if `"tool"` doesn't work. - See [Azure OpenAI documentation](https://learn.microsoft.com/en-us/azure/ai-services/openai/how-to/function-calling?tabs=python) for details. - - Returns: - The decorator for registering a function to be used by an agent. - - Examples: - ``` - @user_proxy.register_for_execution() - @agent2.register_for_llm() - @agent1.register_for_llm(description="This is a very useful function") - def my_function(a: Annotated[str, "description of a parameter"] = "a", b: int, c=3.14) -> str: - return a + str(b * c) - ``` - - For Azure OpenAI versions prior to 2023-12-01-preview, set `api_style` - to `"function"` if `"tool"` doesn't work: - ``` - @agent2.register_for_llm(api_style="function") - def my_function(a: Annotated[str, "description of a parameter"] = "a", b: int, c=3.14) -> str: - return a + str(b * c) - ``` - - """ - - def _decorator(func: F) -> F: - """Decorator for registering a function to be used by an agent. - - Args: - func: the function to be registered. - - Returns: - The function to be registered, with the _description attribute set to the function description. - - Raises: - ValueError: if the function description is not provided and not propagated by a previous decorator. - RuntimeError: if the LLM config is not set up before registering a function. - - """ - # name can be overwritten by the parameter, by default it is the same as function name - if name: - func._name = name - elif not hasattr(func, "_name"): - func._name = func.__name__ - - # description is propagated from the previous decorator, but it is mandatory for the first one - if description: - func._description = description - else: - if not hasattr(func, "_description"): - raise ValueError("Function description is required, none found.") - - # get JSON schema for the function - f = get_function_schema(func, name=func._name, description=func._description) - - # register the function to the agent if there is LLM config, raise an exception otherwise - if self.llm_config is None: - raise RuntimeError("LLM config must be setup before registering a function for LLM.") - - if api_style == "function": - f = f["function"] - self.update_function_signature(f, is_remove=False) - elif api_style == "tool": - self.update_tool_signature(f, is_remove=False) - else: - raise ValueError(f"Unsupported API style: {api_style}") - - return func - - return _decorator - - def register_for_execution( - self, - name: Optional[str] = None, - ) -> Callable[[F], F]: - """Decorator factory for registering a function to be executed by an agent. - - It's return value is used to decorate a function to be registered to the agent. - - Args: - name (optional(str)): name of the function. If None, the function name will be used (default: None). - - Returns: - The decorator for registering a function to be used by an agent. - - Examples: - ``` - @user_proxy.register_for_execution() - @agent2.register_for_llm() - @agent1.register_for_llm(description="This is a very useful function") - def my_function(a: Annotated[str, "description of a parameter"] = "a", b: int, c=3.14): - return a + str(b * c) - ``` - - """ - - def _decorator(func: F) -> F: - """Decorator for registering a function to be used by an agent. - - Args: - func: the function to be registered. - - Returns: - The function to be registered, with the _description attribute set to the function description. - - Raises: - ValueError: if the function description is not provided and not propagated by a previous decorator. - - """ - # name can be overwritten by the parameter, by default it is the same as function name - if name: - func._name = name - elif not hasattr(func, "_name"): - func._name = func.__name__ - - self.register_function({func._name: self._wrap_function(func)}) - - return func - - return _decorator - - def register_model_client(self, model_client_cls: ModelClient, **kwargs): - """Register a model client. - - Args: - model_client_cls: A custom client class that follows the Client interface - **kwargs: The kwargs for the custom client class to be initialized with - """ - self.client.register_model_client(model_client_cls, **kwargs) - - def register_hook(self, hookable_method: str, hook: Callable): - """ - Registers a hook to be called by a hookable method, in order to add a capability to the agent. - Registered hooks are kept in lists (one per hookable method), and are called in their order of registration. - - Args: - hookable_method: A hookable method name implemented by ConversableAgent. - hook: A method implemented by a subclass of AgentCapability. - """ - assert hookable_method in self.hook_lists, f"{hookable_method} is not a hookable method." - hook_list = self.hook_lists[hookable_method] - assert hook not in hook_list, f"{hook} is already registered as a hook." - hook_list.append(hook) - - def process_all_messages_before_reply(self, messages: List[Dict]) -> List[Dict]: - """ - Calls any registered capability hooks to process all messages, potentially modifying the messages. - """ - hook_list = self.hook_lists["process_all_messages_before_reply"] - # If no hooks are registered, or if there are no messages to process, return the original message list. - if len(hook_list) == 0 or messages is None: - return messages - - # Call each hook (in order of registration) to process the messages. - processed_messages = messages - for hook in hook_list: - processed_messages = hook(processed_messages) - return processed_messages - - def process_last_received_message(self, messages: List[Dict]) -> List[Dict]: - """ - Calls any registered capability hooks to use and potentially modify the text of the last message, - as long as the last message is not a function call or exit command. - """ - - # If any required condition is not met, return the original message list. - hook_list = self.hook_lists["process_last_received_message"] - if len(hook_list) == 0: - return messages # No hooks registered. - if messages is None: - return None # No message to process. - if len(messages) == 0: - return messages # No message to process. - last_message = messages[-1] - if "function_call" in last_message: - return messages # Last message is a function call. - if "context" in last_message: - return messages # Last message contains a context key. - if "content" not in last_message: - return messages # Last message has no content. - - user_content = last_message["content"] - if not isinstance(user_content, str) and not isinstance(user_content, list): - # if the user_content is a string, it is for regular LLM - # if the user_content is a list, it should follow the multimodal LMM format. - return messages - if user_content == "exit": - return messages # Last message is an exit command. - - # Call each hook (in order of registration) to process the user's message. - processed_user_content = user_content - for hook in hook_list: - processed_user_content = hook(processed_user_content) - - if processed_user_content == user_content: - return messages # No hooks actually modified the user's message. - - # Replace the last user message with the expanded one. - messages = messages.copy() - messages[-1]["content"] = processed_user_content - return messages - - def print_usage_summary(self, mode: Union[str, List[str]] = ["actual", "total"]) -> None: - """Print the usage summary.""" - iostream = IOStream.get_default() - - if self.client is None: - iostream.print(f"No cost incurred from agent '{self.name}'.") - else: - iostream.print(f"Agent '{self.name}':") - self.client.print_usage_summary(mode) - - def get_actual_usage(self) -> Union[None, Dict[str, int]]: - """Get the actual usage summary.""" - if self.client is None: - return None - else: - return self.client.actual_usage_summary - - def get_total_usage(self) -> Union[None, Dict[str, int]]: - """Get the total usage summary.""" - if self.client is None: - return None - else: - return self.client.total_usage_summary - - -def register_function( - f: Callable[..., Any], - *, - caller: ConversableAgent, - executor: ConversableAgent, - name: Optional[str] = None, - description: str, -) -> None: - """Register a function to be proposed by an agent and executed for an executor. - - This function can be used instead of function decorators `@ConversationAgent.register_for_llm` and - `@ConversationAgent.register_for_execution`. - - Args: - f: the function to be registered. - caller: the agent calling the function, typically an instance of ConversableAgent. - executor: the agent executing the function, typically an instance of UserProxy. - name: name of the function. If None, the function name will be used (default: None). - description: description of the function. The description is used by LLM to decode whether the function - is called. Make sure the description is properly describing what the function does or it might not be - called by LLM when needed. - - """ - f = caller.register_for_llm(name=name, description=description)(f) - executor.register_for_execution(name=name)(f) diff --git a/autogen/agentchat/groupchat.py b/autogen/agentchat/groupchat.py deleted file mode 100644 index c6355a13b94d..000000000000 --- a/autogen/agentchat/groupchat.py +++ /dev/null @@ -1,1621 +0,0 @@ -import copy -import json -import logging -import random -import re -import sys -from dataclasses import dataclass, field -from typing import Any, Callable, Dict, List, Literal, Optional, Tuple, Union - -from ..code_utils import content_str -from ..exception_utils import AgentNameConflict, NoEligibleSpeaker, UndefinedNextAgent -from ..formatting_utils import colored -from ..graph_utils import check_graph_validity, invert_disallowed_to_allowed -from ..io.base import IOStream -from ..runtime_logging import log_new_agent, logging_enabled -from .agent import Agent -from .chat import ChatResult -from .conversable_agent import ConversableAgent - -try: - # Non-core module - from .contrib.capabilities import transform_messages -except ImportError: - transform_messages = None - -logger = logging.getLogger(__name__) - - -@dataclass -class GroupChat: - """(In preview) A group chat class that contains the following data fields: - - agents: a list of participating agents. - - messages: a list of messages in the group chat. - - max_round: the maximum number of rounds. - - admin_name: the name of the admin agent if there is one. Default is "Admin". - KeyBoardInterrupt will make the admin agent take over. - - func_call_filter: whether to enforce function call filter. Default is True. - When set to True and when a message is a function call suggestion, - the next speaker will be chosen from an agent which contains the corresponding function name - in its `function_map`. - - select_speaker_message_template: customize the select speaker message (used in "auto" speaker selection), which appears first in the message context and generally includes the agent descriptions and list of agents. If the string contains "{roles}" it will replaced with the agent's and their role descriptions. If the string contains "{agentlist}" it will be replaced with a comma-separated list of agent names in square brackets. The default value is: - "You are in a role play game. The following roles are available: - {roles}. - Read the following conversation. - Then select the next role from {agentlist} to play. Only return the role." - - select_speaker_prompt_template: customize the select speaker prompt (used in "auto" speaker selection), which appears last in the message context and generally includes the list of agents and guidance for the LLM to select the next agent. If the string contains "{agentlist}" it will be replaced with a comma-separated list of agent names in square brackets. The default value is: - "Read the above conversation. Then select the next role from {agentlist} to play. Only return the role." - To ignore this prompt being used, set this to None. If set to None, ensure your instructions for selecting a speaker are in the select_speaker_message_template string. - - select_speaker_auto_multiple_template: customize the follow-up prompt used when selecting a speaker fails with a response that contains multiple agent names. This prompt guides the LLM to return just one agent name. Applies only to "auto" speaker selection method. If the string contains "{agentlist}" it will be replaced with a comma-separated list of agent names in square brackets. The default value is: - "You provided more than one name in your text, please return just the name of the next speaker. To determine the speaker use these prioritised rules: - 1. If the context refers to themselves as a speaker e.g. "As the..." , choose that speaker's name - 2. If it refers to the "next" speaker name, choose that name - 3. Otherwise, choose the first provided speaker's name in the context - The names are case-sensitive and should not be abbreviated or changed. - Respond with ONLY the name of the speaker and DO NOT provide a reason." - - select_speaker_auto_none_template: customize the follow-up prompt used when selecting a speaker fails with a response that contains no agent names. This prompt guides the LLM to return an agent name and provides a list of agent names. Applies only to "auto" speaker selection method. If the string contains "{agentlist}" it will be replaced with a comma-separated list of agent names in square brackets. The default value is: - "You didn't choose a speaker. As a reminder, to determine the speaker use these prioritised rules: - 1. If the context refers to themselves as a speaker e.g. "As the..." , choose that speaker's name - 2. If it refers to the "next" speaker name, choose that name - 3. Otherwise, choose the first provided speaker's name in the context - The names are case-sensitive and should not be abbreviated or changed. - The only names that are accepted are {agentlist}. - Respond with ONLY the name of the speaker and DO NOT provide a reason." - - speaker_selection_method: the method for selecting the next speaker. Default is "auto". - Could be any of the following (case insensitive), will raise ValueError if not recognized: - - "auto": the next speaker is selected automatically by LLM. - - "manual": the next speaker is selected manually by user input. - - "random": the next speaker is selected randomly. - - "round_robin": the next speaker is selected in a round robin fashion, i.e., iterating in the same order as provided in `agents`. - - a customized speaker selection function (Callable): the function will be called to select the next speaker. - The function should take the last speaker and the group chat as input and return one of the following: - 1. an `Agent` class, it must be one of the agents in the group chat. - 2. a string from ['auto', 'manual', 'random', 'round_robin'] to select a default method to use. - 3. None, which would terminate the conversation gracefully. - ```python - def custom_speaker_selection_func( - last_speaker: Agent, groupchat: GroupChat - ) -> Union[Agent, str, None]: - ``` - - max_retries_for_selecting_speaker: the maximum number of times the speaker selection requery process will run. - If, during speaker selection, multiple agent names or no agent names are returned by the LLM as the next agent, it will be queried again up to the maximum number - of times until a single agent is returned or it exhausts the maximum attempts. - Applies only to "auto" speaker selection method. - Default is 2. - - select_speaker_transform_messages: (optional) the message transformations to apply to the nested select speaker agent-to-agent chat messages. - Takes a TransformMessages object, defaults to None and is only utilised when the speaker selection method is "auto". - - select_speaker_auto_verbose: whether to output the select speaker responses and selections - If set to True, the outputs from the two agents in the nested select speaker chat will be output, along with - whether the responses were successful, or not, in selecting an agent - Applies only to "auto" speaker selection method. - - allow_repeat_speaker: whether to allow the same speaker to speak consecutively. - Default is True, in which case all speakers are allowed to speak consecutively. - If `allow_repeat_speaker` is a list of Agents, then only those listed agents are allowed to repeat. - If set to False, then no speakers are allowed to repeat. - `allow_repeat_speaker` and `allowed_or_disallowed_speaker_transitions` are mutually exclusive. - - allowed_or_disallowed_speaker_transitions: dict. - The keys are source agents, and the values are agents that the key agent can/can't transit to, - depending on speaker_transitions_type. Default is None, which means all agents can transit to all other agents. - `allow_repeat_speaker` and `allowed_or_disallowed_speaker_transitions` are mutually exclusive. - - speaker_transitions_type: whether the speaker_transitions_type is a dictionary containing lists of allowed agents or disallowed agents. - "allowed" means the `allowed_or_disallowed_speaker_transitions` is a dictionary containing lists of allowed agents. - If set to "disallowed", then the `allowed_or_disallowed_speaker_transitions` is a dictionary containing lists of disallowed agents. - Must be supplied if `allowed_or_disallowed_speaker_transitions` is not None. - - enable_clear_history: enable possibility to clear history of messages for agents manually by providing - "clear history" phrase in user prompt. This is experimental feature. - See description of GroupChatManager.clear_agents_history function for more info. - - send_introductions: send a round of introductions at the start of the group chat, so agents know who they can speak to (default: False) - - role_for_select_speaker_messages: sets the role name for speaker selection when in 'auto' mode, typically 'user' or 'system'. (default: 'system') - """ - - agents: List[Agent] - messages: List[Dict] - max_round: int = 10 - admin_name: str = "Admin" - func_call_filter: bool = True - speaker_selection_method: Union[Literal["auto", "manual", "random", "round_robin"], Callable] = "auto" - max_retries_for_selecting_speaker: int = 2 - allow_repeat_speaker: Optional[Union[bool, List[Agent]]] = None - allowed_or_disallowed_speaker_transitions: Optional[Dict] = None - speaker_transitions_type: Literal["allowed", "disallowed", None] = None - enable_clear_history: bool = False - send_introductions: bool = False - select_speaker_message_template: str = """You are in a role play game. The following roles are available: - {roles}. - Read the following conversation. - Then select the next role from {agentlist} to play. Only return the role.""" - select_speaker_prompt_template: str = ( - "Read the above conversation. Then select the next role from {agentlist} to play. Only return the role." - ) - select_speaker_auto_multiple_template: str = """You provided more than one name in your text, please return just the name of the next speaker. To determine the speaker use these prioritised rules: - 1. If the context refers to themselves as a speaker e.g. "As the..." , choose that speaker's name - 2. If it refers to the "next" speaker name, choose that name - 3. Otherwise, choose the first provided speaker's name in the context - The names are case-sensitive and should not be abbreviated or changed. - Respond with ONLY the name of the speaker and DO NOT provide a reason.""" - select_speaker_auto_none_template: str = """You didn't choose a speaker. As a reminder, to determine the speaker use these prioritised rules: - 1. If the context refers to themselves as a speaker e.g. "As the..." , choose that speaker's name - 2. If it refers to the "next" speaker name, choose that name - 3. Otherwise, choose the first provided speaker's name in the context - The names are case-sensitive and should not be abbreviated or changed. - The only names that are accepted are {agentlist}. - Respond with ONLY the name of the speaker and DO NOT provide a reason.""" - select_speaker_transform_messages: Optional[Any] = None - select_speaker_auto_verbose: Optional[bool] = False - role_for_select_speaker_messages: Optional[str] = "system" - - _VALID_SPEAKER_SELECTION_METHODS = ["auto", "manual", "random", "round_robin"] - _VALID_SPEAKER_TRANSITIONS_TYPE = ["allowed", "disallowed", None] - - # Define a class attribute for the default introduction message - DEFAULT_INTRO_MSG = ( - "Hello everyone. We have assembled a great team today to answer questions and solve tasks. In attendance are:" - ) - - allowed_speaker_transitions_dict: Dict = field(init=False) - - def __post_init__(self): - # Post init steers clears of the automatically generated __init__ method from dataclass - - if self.allow_repeat_speaker is not None and not isinstance(self.allow_repeat_speaker, (bool, list)): - raise ValueError("GroupChat allow_repeat_speaker should be a bool or a list of Agents.") - - # Here, we create allowed_speaker_transitions_dict from the supplied allowed_or_disallowed_speaker_transitions and speaker_transitions_type, and lastly checks for validity. - - # Check input - if self.speaker_transitions_type is not None: - self.speaker_transitions_type = self.speaker_transitions_type.lower() - - if self.speaker_transitions_type not in self._VALID_SPEAKER_TRANSITIONS_TYPE: - raise ValueError( - f"GroupChat speaker_transitions_type is set to '{self.speaker_transitions_type}'. " - f"It should be one of {self._VALID_SPEAKER_TRANSITIONS_TYPE} (case insensitive). " - ) - - # If both self.allowed_or_disallowed_speaker_transitions is None and self.allow_repeat_speaker is None, set allow_repeat_speaker to True to ensure backward compatibility - # Discussed in https://github.com/microsoft/autogen/pull/857#discussion_r1451541204 - if self.allowed_or_disallowed_speaker_transitions is None and self.allow_repeat_speaker is None: - self.allow_repeat_speaker = True - - # self.allowed_or_disallowed_speaker_transitions and self.allow_repeat_speaker are mutually exclusive parameters. - # Discussed in https://github.com/microsoft/autogen/pull/857#discussion_r1451266661 - if self.allowed_or_disallowed_speaker_transitions is not None and self.allow_repeat_speaker is not None: - raise ValueError( - "Don't provide both allowed_or_disallowed_speaker_transitions and allow_repeat_speaker in group chat. " - "Please set one of them to None." - ) - - # Asks the user to specify whether the speaker_transitions_type is allowed or disallowed if speaker_transitions_type is supplied - # Discussed in https://github.com/microsoft/autogen/pull/857#discussion_r1451259524 - if self.allowed_or_disallowed_speaker_transitions is not None and self.speaker_transitions_type is None: - raise ValueError( - "GroupChat allowed_or_disallowed_speaker_transitions is not None, but speaker_transitions_type is None. " - "Please set speaker_transitions_type to either 'allowed' or 'disallowed'." - ) - - # Inferring self.allowed_speaker_transitions_dict - # Create self.allowed_speaker_transitions_dict if allowed_or_disallowed_speaker_transitions is None, using allow_repeat_speaker - if self.allowed_or_disallowed_speaker_transitions is None: - self.allowed_speaker_transitions_dict = {} - - # Create a fully connected allowed_speaker_transitions_dict not including self loops - for agent in self.agents: - self.allowed_speaker_transitions_dict[agent] = [ - other_agent for other_agent in self.agents if other_agent != agent - ] - - # If self.allow_repeat_speaker is True, add self loops to all agents - if self.allow_repeat_speaker is True: - for agent in self.agents: - self.allowed_speaker_transitions_dict[agent].append(agent) - - # Else if self.allow_repeat_speaker is a list of Agents, add self loops to the agents in the list - elif isinstance(self.allow_repeat_speaker, list): - for agent in self.allow_repeat_speaker: - self.allowed_speaker_transitions_dict[agent].append(agent) - - # Create self.allowed_speaker_transitions_dict if allowed_or_disallowed_speaker_transitions is not None, using allowed_or_disallowed_speaker_transitions - else: - # Process based on speaker_transitions_type - if self.speaker_transitions_type == "allowed": - self.allowed_speaker_transitions_dict = self.allowed_or_disallowed_speaker_transitions - else: - # Logic for processing disallowed allowed_or_disallowed_speaker_transitions to allowed_speaker_transitions_dict - self.allowed_speaker_transitions_dict = invert_disallowed_to_allowed( - self.allowed_or_disallowed_speaker_transitions, self.agents - ) - - # Check for validity - check_graph_validity( - allowed_speaker_transitions_dict=self.allowed_speaker_transitions_dict, - agents=self.agents, - ) - - # Check select speaker messages, prompts, roles, and retries have values - if self.select_speaker_message_template is None or len(self.select_speaker_message_template) == 0: - raise ValueError("select_speaker_message_template cannot be empty or None.") - - if self.select_speaker_prompt_template is not None and len(self.select_speaker_prompt_template) == 0: - self.select_speaker_prompt_template = None - - if self.role_for_select_speaker_messages is None or len(self.role_for_select_speaker_messages) == 0: - raise ValueError("role_for_select_speaker_messages cannot be empty or None.") - - if self.select_speaker_auto_multiple_template is None or len(self.select_speaker_auto_multiple_template) == 0: - raise ValueError("select_speaker_auto_multiple_template cannot be empty or None.") - - if self.select_speaker_auto_none_template is None or len(self.select_speaker_auto_none_template) == 0: - raise ValueError("select_speaker_auto_none_template cannot be empty or None.") - - if self.max_retries_for_selecting_speaker is None or len(self.role_for_select_speaker_messages) == 0: - raise ValueError("role_for_select_speaker_messages cannot be empty or None.") - - # Validate max select speakers retries - if self.max_retries_for_selecting_speaker is None or not isinstance( - self.max_retries_for_selecting_speaker, int - ): - raise ValueError("max_retries_for_selecting_speaker cannot be None or non-int") - elif self.max_retries_for_selecting_speaker < 0: - raise ValueError("max_retries_for_selecting_speaker must be greater than or equal to zero") - - # Load message transforms here (load once for the Group Chat so we don't have to re-initiate it and it maintains the cache across subsequent select speaker calls) - self._speaker_selection_transforms = None - if self.select_speaker_transform_messages is not None: - if transform_messages is not None: - if isinstance(self.select_speaker_transform_messages, transform_messages.TransformMessages): - self._speaker_selection_transforms = self.select_speaker_transform_messages - else: - raise ValueError("select_speaker_transform_messages must be None or MessageTransforms.") - else: - logger.warning( - "TransformMessages could not be loaded, the 'select_speaker_transform_messages' transform" - "will not apply." - ) - - # Validate select_speaker_auto_verbose - if self.select_speaker_auto_verbose is None or not isinstance(self.select_speaker_auto_verbose, bool): - raise ValueError("select_speaker_auto_verbose cannot be None or non-bool") - - @property - def agent_names(self) -> List[str]: - """Return the names of the agents in the group chat.""" - return [agent.name for agent in self.agents] - - def reset(self): - """Reset the group chat.""" - self.messages.clear() - - def append(self, message: Dict, speaker: Agent): - """Append a message to the group chat. - We cast the content to str here so that it can be managed by text-based - model. - """ - # set the name to speaker's name if the role is not function - # if the role is tool, it is OK to modify the name - if message["role"] != "function": - message["name"] = speaker.name - message["content"] = content_str(message["content"]) - self.messages.append(message) - - def agent_by_name( - self, name: str, recursive: bool = False, raise_on_name_conflict: bool = False - ) -> Optional[Agent]: - """Returns the agent with a given name. If recursive is True, it will search in nested teams.""" - agents = self.nested_agents() if recursive else self.agents - filtered_agents = [agent for agent in agents if agent.name == name] - - if raise_on_name_conflict and len(filtered_agents) > 1: - raise AgentNameConflict() - - return filtered_agents[0] if filtered_agents else None - - def nested_agents(self) -> List[Agent]: - """Returns all agents in the group chat manager.""" - agents = self.agents.copy() - for agent in agents: - if isinstance(agent, GroupChatManager): - # Recursive call for nested teams - agents.extend(agent.groupchat.nested_agents()) - return agents - - def next_agent(self, agent: Agent, agents: Optional[List[Agent]] = None) -> Agent: - """Return the next agent in the list.""" - if agents is None: - agents = self.agents - - # Ensure the provided list of agents is a subset of self.agents - if not set(agents).issubset(set(self.agents)): - raise UndefinedNextAgent() - - # What index is the agent? (-1 if not present) - idx = self.agent_names.index(agent.name) if agent.name in self.agent_names else -1 - - # Return the next agent - if agents == self.agents: - return agents[(idx + 1) % len(agents)] - else: - offset = idx + 1 - for i in range(len(self.agents)): - if self.agents[(offset + i) % len(self.agents)] in agents: - return self.agents[(offset + i) % len(self.agents)] - - # Explicitly handle cases where no valid next agent exists in the provided subset. - raise UndefinedNextAgent() - - def select_speaker_msg(self, agents: Optional[List[Agent]] = None) -> str: - """Return the system message for selecting the next speaker. This is always the *first* message in the context.""" - if agents is None: - agents = self.agents - - roles = self._participant_roles(agents) - agentlist = f"{[agent.name for agent in agents]}" - - return_msg = self.select_speaker_message_template.format(roles=roles, agentlist=agentlist) - return return_msg - - def select_speaker_prompt(self, agents: Optional[List[Agent]] = None) -> str: - """Return the floating system prompt selecting the next speaker. - This is always the *last* message in the context. - Will return None if the select_speaker_prompt_template is None.""" - - if self.select_speaker_prompt_template is None: - return None - - if agents is None: - agents = self.agents - - agentlist = f"{[agent.name for agent in agents]}" - - return_prompt = self.select_speaker_prompt_template.format(agentlist=agentlist) - return return_prompt - - def introductions_msg(self, agents: Optional[List[Agent]] = None) -> str: - """Return the system message for selecting the next speaker. This is always the *first* message in the context.""" - if agents is None: - agents = self.agents - - # Use the class attribute instead of a hardcoded string - intro_msg = self.DEFAULT_INTRO_MSG - participant_roles = self._participant_roles(agents) - - return f"{intro_msg}\n\n{participant_roles}" - - def manual_select_speaker(self, agents: Optional[List[Agent]] = None) -> Union[Agent, None]: - """Manually select the next speaker.""" - iostream = IOStream.get_default() - - if agents is None: - agents = self.agents - - iostream.print("Please select the next speaker from the following list:") - _n_agents = len(agents) - for i in range(_n_agents): - iostream.print(f"{i+1}: {agents[i].name}") - try_count = 0 - # Assume the user will enter a valid number within 3 tries, otherwise use auto selection to avoid blocking. - while try_count <= 3: - try_count += 1 - if try_count >= 3: - iostream.print(f"You have tried {try_count} times. The next speaker will be selected automatically.") - break - try: - i = iostream.input( - "Enter the number of the next speaker (enter nothing or `q` to use auto selection): " - ) - if i == "" or i == "q": - break - i = int(i) - if i > 0 and i <= _n_agents: - return agents[i - 1] - else: - raise ValueError - except ValueError: - iostream.print(f"Invalid input. Please enter a number between 1 and {_n_agents}.") - return None - - def random_select_speaker(self, agents: Optional[List[Agent]] = None) -> Union[Agent, None]: - """Randomly select the next speaker.""" - if agents is None: - agents = self.agents - return random.choice(agents) - - def _prepare_and_select_agents( - self, - last_speaker: Agent, - ) -> Tuple[Optional[Agent], List[Agent], Optional[List[Dict]]]: - # If self.speaker_selection_method is a callable, call it to get the next speaker. - # If self.speaker_selection_method is a string, return it. - speaker_selection_method = self.speaker_selection_method - if isinstance(self.speaker_selection_method, Callable): - selected_agent = self.speaker_selection_method(last_speaker, self) - if selected_agent is None: - raise NoEligibleSpeaker("Custom speaker selection function returned None. Terminating conversation.") - elif isinstance(selected_agent, Agent): - if selected_agent in self.agents: - return selected_agent, self.agents, None - else: - raise ValueError( - f"Custom speaker selection function returned an agent {selected_agent.name} not in the group chat." - ) - elif isinstance(selected_agent, str): - # If returned a string, assume it is a speaker selection method - speaker_selection_method = selected_agent - else: - raise ValueError( - f"Custom speaker selection function returned an object of type {type(selected_agent)} instead of Agent or str." - ) - - if speaker_selection_method.lower() not in self._VALID_SPEAKER_SELECTION_METHODS: - raise ValueError( - f"GroupChat speaker_selection_method is set to '{speaker_selection_method}'. " - f"It should be one of {self._VALID_SPEAKER_SELECTION_METHODS} (case insensitive). " - ) - - # If provided a list, make sure the agent is in the list - allow_repeat_speaker = ( - self.allow_repeat_speaker - if isinstance(self.allow_repeat_speaker, bool) or self.allow_repeat_speaker is None - else last_speaker in self.allow_repeat_speaker - ) - - agents = self.agents - n_agents = len(agents) - # Warn if GroupChat is underpopulated - if n_agents < 2: - raise ValueError( - f"GroupChat is underpopulated with {n_agents} agents. " - "Please add more agents to the GroupChat or use direct communication instead." - ) - elif n_agents == 2 and speaker_selection_method.lower() != "round_robin" and allow_repeat_speaker: - logger.warning( - f"GroupChat is underpopulated with {n_agents} agents. " - "Consider setting speaker_selection_method to 'round_robin' or allow_repeat_speaker to False, " - "or use direct communication, unless repeated speaker is desired." - ) - - if ( - self.func_call_filter - and self.messages - and ("function_call" in self.messages[-1] or "tool_calls" in self.messages[-1]) - ): - funcs = [] - if "function_call" in self.messages[-1]: - funcs += [self.messages[-1]["function_call"]["name"]] - if "tool_calls" in self.messages[-1]: - funcs += [ - tool["function"]["name"] for tool in self.messages[-1]["tool_calls"] if tool["type"] == "function" - ] - - # find agents with the right function_map which contains the function name - agents = [agent for agent in self.agents if agent.can_execute_function(funcs)] - if len(agents) == 1: - # only one agent can execute the function - return agents[0], agents, None - elif not agents: - # find all the agents with function_map - agents = [agent for agent in self.agents if agent.function_map] - if len(agents) == 1: - return agents[0], agents, None - elif not agents: - raise ValueError( - f"No agent can execute the function {', '.join(funcs)}. " - "Please check the function_map of the agents." - ) - # remove the last speaker from the list to avoid selecting the same speaker if allow_repeat_speaker is False - agents = [agent for agent in agents if agent != last_speaker] if allow_repeat_speaker is False else agents - - # Filter agents with allowed_speaker_transitions_dict - - is_last_speaker_in_group = last_speaker in self.agents - - # this condition means last_speaker is a sink in the graph, then no agents are eligible - if last_speaker not in self.allowed_speaker_transitions_dict and is_last_speaker_in_group: - raise NoEligibleSpeaker(f"Last speaker {last_speaker.name} is not in the allowed_speaker_transitions_dict.") - # last_speaker is not in the group, so all agents are eligible - elif last_speaker not in self.allowed_speaker_transitions_dict and not is_last_speaker_in_group: - graph_eligible_agents = [] - else: - # Extract agent names from the list of agents - graph_eligible_agents = [ - agent for agent in agents if agent in self.allowed_speaker_transitions_dict[last_speaker] - ] - - # If there is only one eligible agent, just return it to avoid the speaker selection prompt - if len(graph_eligible_agents) == 1: - return graph_eligible_agents[0], graph_eligible_agents, None - - # If there are no eligible agents, return None, which means all agents will be taken into consideration in the next step - if len(graph_eligible_agents) == 0: - graph_eligible_agents = None - - # Use the selected speaker selection method - select_speaker_messages = None - if speaker_selection_method.lower() == "manual": - selected_agent = self.manual_select_speaker(graph_eligible_agents) - elif speaker_selection_method.lower() == "round_robin": - selected_agent = self.next_agent(last_speaker, graph_eligible_agents) - elif speaker_selection_method.lower() == "random": - selected_agent = self.random_select_speaker(graph_eligible_agents) - else: # auto - selected_agent = None - select_speaker_messages = self.messages.copy() - # If last message is a tool call or function call, blank the call so the api doesn't throw - if select_speaker_messages[-1].get("function_call", False): - select_speaker_messages[-1] = dict(select_speaker_messages[-1], function_call=None) - if select_speaker_messages[-1].get("tool_calls", False): - select_speaker_messages[-1] = dict(select_speaker_messages[-1], tool_calls=None) - return selected_agent, graph_eligible_agents, select_speaker_messages - - def select_speaker(self, last_speaker: Agent, selector: ConversableAgent) -> Agent: - """Select the next speaker (with requery).""" - - # Prepare the list of available agents and select an agent if selection method allows (non-auto) - selected_agent, agents, messages = self._prepare_and_select_agents(last_speaker) - if selected_agent: - return selected_agent - elif self.speaker_selection_method == "manual": - # An agent has not been selected while in manual mode, so move to the next agent - return self.next_agent(last_speaker) - - # auto speaker selection with 2-agent chat - return self._auto_select_speaker(last_speaker, selector, messages, agents) - - async def a_select_speaker(self, last_speaker: Agent, selector: ConversableAgent) -> Agent: - """Select the next speaker (with requery), asynchronously.""" - - selected_agent, agents, messages = self._prepare_and_select_agents(last_speaker) - if selected_agent: - return selected_agent - elif self.speaker_selection_method == "manual": - # An agent has not been selected while in manual mode, so move to the next agent - return self.next_agent(last_speaker) - - # auto speaker selection with 2-agent chat - return await self.a_auto_select_speaker(last_speaker, selector, messages, agents) - - def _finalize_speaker(self, last_speaker: Agent, final: bool, name: str, agents: Optional[List[Agent]]) -> Agent: - if not final: - # the LLM client is None, thus no reply is generated. Use round robin instead. - return self.next_agent(last_speaker, agents) - - # If exactly one agent is mentioned, use it. Otherwise, leave the OAI response unmodified - mentions = self._mentioned_agents(name, agents) - if len(mentions) == 1: - name = next(iter(mentions)) - else: - logger.warning( - f"GroupChat select_speaker failed to resolve the next speaker's name. This is because the speaker selection OAI call returned:\n{name}" - ) - - # Return the result - agent = self.agent_by_name(name) - return agent if agent else self.next_agent(last_speaker, agents) - - def _auto_select_speaker( - self, - last_speaker: Agent, - selector: ConversableAgent, - messages: Optional[List[Dict]], - agents: Optional[List[Agent]], - ) -> Agent: - """Selects next speaker for the "auto" speaker selection method. Utilises its own two-agent chat to determine the next speaker and supports requerying. - - Speaker selection for "auto" speaker selection method: - 1. Create a two-agent chat with a speaker selector agent and a speaker validator agent, like a nested chat - 2. Inject the group messages into the new chat - 3. Run the two-agent chat, evaluating the result of response from the speaker selector agent: - - If a single agent is provided then we return it and finish. If not, we add an additional message to this nested chat in an attempt to guide the LLM to a single agent response - 4. Chat continues until a single agent is nominated or there are no more attempts left - 5. If we run out of turns and no single agent can be determined, the next speaker in the list of agents is returned - - Args: - last_speaker Agent: The previous speaker in the group chat - selector ConversableAgent: - messages Optional[List[Dict]]: Current chat messages - agents Optional[List[Agent]]: Valid list of agents for speaker selection - - Returns: - Dict: a counter for mentioned agents. - """ - - # If no agents are passed in, assign all the group chat's agents - if agents is None: - agents = self.agents - - # The maximum number of speaker selection attempts (including requeries) - # is the initial speaker selection attempt plus the maximum number of retries. - # We track these and use them in the validation function as we can't - # access the max_turns from within validate_speaker_name. - max_attempts = 1 + self.max_retries_for_selecting_speaker - attempts_left = max_attempts - attempt = 0 - - # Registered reply function for checking_agent, checks the result of the response for agent names - def validate_speaker_name(recipient, messages, sender, config) -> Tuple[bool, Union[str, Dict, None]]: - # The number of retries left, starting at max_retries_for_selecting_speaker - nonlocal attempts_left - nonlocal attempt - - attempt = attempt + 1 - attempts_left = attempts_left - 1 - - return self._validate_speaker_name(recipient, messages, sender, config, attempts_left, attempt, agents) - - # Two-agent chat for speaker selection - - # Agent for checking the response from the speaker_select_agent - checking_agent = ConversableAgent("checking_agent", default_auto_reply=max_attempts) - - # Register the speaker validation function with the checking agent - checking_agent.register_reply( - [ConversableAgent, None], - reply_func=validate_speaker_name, # Validate each response - remove_other_reply_funcs=True, - ) - - # NOTE: Do we have a speaker prompt (select_speaker_prompt_template is not None)? If we don't, we need to feed in the last message to start the nested chat - - # Agent for selecting a single agent name from the response - speaker_selection_agent = ConversableAgent( - "speaker_selection_agent", - system_message=self.select_speaker_msg(agents), - chat_messages=( - {checking_agent: messages} - if self.select_speaker_prompt_template is not None - else {checking_agent: messages[:-1]} - ), - llm_config=selector.llm_config, - human_input_mode="NEVER", # Suppresses some extra terminal outputs, outputs will be handled by select_speaker_auto_verbose - ) - - # Create the starting message - if self.select_speaker_prompt_template is not None: - start_message = { - "content": self.select_speaker_prompt(agents), - "name": "checking_agent", - "override_role": self.role_for_select_speaker_messages, - } - else: - start_message = messages[-1] - - # Add the message transforms, if any, to the speaker selection agent - if self._speaker_selection_transforms is not None: - self._speaker_selection_transforms.add_to_agent(speaker_selection_agent) - - # Run the speaker selection chat - result = checking_agent.initiate_chat( - speaker_selection_agent, - cache=None, # don't use caching for the speaker selection chat - message=start_message, - max_turns=2 - * max(1, max_attempts), # Limiting the chat to the number of attempts, including the initial one - clear_history=False, - silent=not self.select_speaker_auto_verbose, # Base silence on the verbose attribute - ) - - return self._process_speaker_selection_result(result, last_speaker, agents) - - async def a_auto_select_speaker( - self, - last_speaker: Agent, - selector: ConversableAgent, - messages: Optional[List[Dict]], - agents: Optional[List[Agent]], - ) -> Agent: - """(Asynchronous) Selects next speaker for the "auto" speaker selection method. Utilises its own two-agent chat to determine the next speaker and supports requerying. - - Speaker selection for "auto" speaker selection method: - 1. Create a two-agent chat with a speaker selector agent and a speaker validator agent, like a nested chat - 2. Inject the group messages into the new chat - 3. Run the two-agent chat, evaluating the result of response from the speaker selector agent: - - If a single agent is provided then we return it and finish. If not, we add an additional message to this nested chat in an attempt to guide the LLM to a single agent response - 4. Chat continues until a single agent is nominated or there are no more attempts left - 5. If we run out of turns and no single agent can be determined, the next speaker in the list of agents is returned - - Args: - last_speaker Agent: The previous speaker in the group chat - selector ConversableAgent: - messages Optional[List[Dict]]: Current chat messages - agents Optional[List[Agent]]: Valid list of agents for speaker selection - - Returns: - Dict: a counter for mentioned agents. - """ - - # If no agents are passed in, assign all the group chat's agents - if agents is None: - agents = self.agents - - # The maximum number of speaker selection attempts (including requeries) - # We track these and use them in the validation function as we can't - # access the max_turns from within validate_speaker_name - max_attempts = 1 + self.max_retries_for_selecting_speaker - attempts_left = max_attempts - attempt = 0 - - # Registered reply function for checking_agent, checks the result of the response for agent names - def validate_speaker_name(recipient, messages, sender, config) -> Tuple[bool, Union[str, Dict, None]]: - # The number of retries left, starting at max_retries_for_selecting_speaker - nonlocal attempts_left - nonlocal attempt - - attempt = attempt + 1 - attempts_left = attempts_left - 1 - - return self._validate_speaker_name(recipient, messages, sender, config, attempts_left, attempt, agents) - - # Two-agent chat for speaker selection - - # Agent for checking the response from the speaker_select_agent - checking_agent = ConversableAgent("checking_agent", default_auto_reply=max_attempts) - - # Register the speaker validation function with the checking agent - checking_agent.register_reply( - [ConversableAgent, None], - reply_func=validate_speaker_name, # Validate each response - remove_other_reply_funcs=True, - ) - - # NOTE: Do we have a speaker prompt (select_speaker_prompt_template is not None)? If we don't, we need to feed in the last message to start the nested chat - - # Agent for selecting a single agent name from the response - speaker_selection_agent = ConversableAgent( - "speaker_selection_agent", - system_message=self.select_speaker_msg(agents), - chat_messages={checking_agent: messages}, - llm_config=selector.llm_config, - human_input_mode="NEVER", # Suppresses some extra terminal outputs, outputs will be handled by select_speaker_auto_verbose - ) - - # Create the starting message - if self.select_speaker_prompt_template is not None: - start_message = { - "content": self.select_speaker_prompt(agents), - "override_role": self.role_for_select_speaker_messages, - } - else: - start_message = messages[-1] - - # Add the message transforms, if any, to the speaker selection agent - if self._speaker_selection_transforms is not None: - self._speaker_selection_transforms.add_to_agent(speaker_selection_agent) - - # Run the speaker selection chat - result = await checking_agent.a_initiate_chat( - speaker_selection_agent, - cache=None, # don't use caching for the speaker selection chat - message=start_message, - max_turns=2 - * max(1, max_attempts), # Limiting the chat to the number of attempts, including the initial one - clear_history=False, - silent=not self.select_speaker_auto_verbose, # Base silence on the verbose attribute - ) - - return self._process_speaker_selection_result(result, last_speaker, agents) - - def _validate_speaker_name( - self, recipient, messages, sender, config, attempts_left, attempt, agents - ) -> Tuple[bool, Union[str, Dict, None]]: - """Validates the speaker response for each round in the internal 2-agent - chat within the auto select speaker method. - - Used by auto_select_speaker and a_auto_select_speaker. - """ - - # Output the query and requery results - if self.select_speaker_auto_verbose: - iostream = IOStream.get_default() - - # Validate the speaker name selected - select_name = messages[-1]["content"].strip() - - mentions = self._mentioned_agents(select_name, agents) - - if len(mentions) == 1: - # Success on retry, we have just one name mentioned - selected_agent_name = next(iter(mentions)) - - # Add the selected agent to the response so we can return it - messages.append({"role": "user", "content": f"[AGENT SELECTED]{selected_agent_name}"}) - - if self.select_speaker_auto_verbose: - iostream.print( - colored( - f">>>>>>>> Select speaker attempt {attempt} of {attempt + attempts_left} successfully selected: {selected_agent_name}", - "green", - ), - flush=True, - ) - - elif len(mentions) > 1: - # More than one name on requery so add additional reminder prompt for next retry - - if self.select_speaker_auto_verbose: - iostream.print( - colored( - f">>>>>>>> Select speaker attempt {attempt} of {attempt + attempts_left} failed as it included multiple agent names.", - "red", - ), - flush=True, - ) - - if attempts_left: - # Message to return to the chat for the next attempt - agentlist = f"{[agent.name for agent in agents]}" - - return True, { - "content": self.select_speaker_auto_multiple_template.format(agentlist=agentlist), - "name": "checking_agent", - "override_role": self.role_for_select_speaker_messages, - } - else: - # Final failure, no attempts left - messages.append( - { - "role": "user", - "content": f"[AGENT SELECTION FAILED]Select speaker attempt #{attempt} of {attempt + attempts_left} failed as it returned multiple names.", - } - ) - - else: - # No names at all on requery so add additional reminder prompt for next retry - - if self.select_speaker_auto_verbose: - iostream.print( - colored( - f">>>>>>>> Select speaker attempt #{attempt} failed as it did not include any agent names.", - "red", - ), - flush=True, - ) - - if attempts_left: - # Message to return to the chat for the next attempt - agentlist = f"{[agent.name for agent in agents]}" - - return True, { - "content": self.select_speaker_auto_none_template.format(agentlist=agentlist), - "name": "checking_agent", - "override_role": self.role_for_select_speaker_messages, - } - else: - # Final failure, no attempts left - messages.append( - { - "role": "user", - "content": f"[AGENT SELECTION FAILED]Select speaker attempt #{attempt} of {attempt + attempts_left} failed as it did not include any agent names.", - } - ) - - return True, None - - def _process_speaker_selection_result(self, result, last_speaker: ConversableAgent, agents: Optional[List[Agent]]): - """Checks the result of the auto_select_speaker function, returning the - agent to speak. - - Used by auto_select_speaker and a_auto_select_speaker.""" - if len(result.chat_history) > 0: - # Use the final message, which will have the selected agent or reason for failure - final_message = result.chat_history[-1]["content"] - - if "[AGENT SELECTED]" in final_message: - # Have successfully selected an agent, return it - return self.agent_by_name(final_message.replace("[AGENT SELECTED]", "")) - - else: # "[AGENT SELECTION FAILED]" - # Failed to select an agent, so we'll select the next agent in the list - next_agent = self.next_agent(last_speaker, agents) - - # No agent, return the failed reason - return next_agent - - def _participant_roles(self, agents: List[Agent] = None) -> str: - # Default to all agents registered - if agents is None: - agents = self.agents - - roles = [] - for agent in agents: - if agent.description.strip() == "": - logger.warning( - f"The agent '{agent.name}' has an empty description, and may not work well with GroupChat." - ) - roles.append(f"{agent.name}: {agent.description}".strip()) - return "\n".join(roles) - - def _mentioned_agents(self, message_content: Union[str, List], agents: Optional[List[Agent]]) -> Dict: - """Counts the number of times each agent is mentioned in the provided message content. - Agent names will match under any of the following conditions (all case-sensitive): - - Exact name match - - If the agent name has underscores it will match with spaces instead (e.g. 'Story_writer' == 'Story writer') - - If the agent name has underscores it will match with '\\_' instead of '_' (e.g. 'Story_writer' == 'Story\\_writer') - - Args: - message_content (Union[str, List]): The content of the message, either as a single string or a list of strings. - agents (List[Agent]): A list of Agent objects, each having a 'name' attribute to be searched in the message content. - - Returns: - Dict: a counter for mentioned agents. - """ - if agents is None: - agents = self.agents - - # Cast message content to str - if isinstance(message_content, dict): - message_content = message_content["content"] - message_content = content_str(message_content) - - mentions = dict() - for agent in agents: - # Finds agent mentions, taking word boundaries into account, - # accommodates escaping underscores and underscores as spaces - regex = ( - r"(?<=\W)(" - + re.escape(agent.name) - + r"|" - + re.escape(agent.name.replace("_", " ")) - + r"|" - + re.escape(agent.name.replace("_", r"\_")) - + r")(?=\W)" - ) - count = len(re.findall(regex, f" {message_content} ")) # Pad the message to help with matching - if count > 0: - mentions[agent.name] = count - return mentions - - -class GroupChatManager(ConversableAgent): - """(In preview) A chat manager agent that can manage a group chat of multiple agents.""" - - def __init__( - self, - groupchat: GroupChat, - name: Optional[str] = "chat_manager", - # unlimited consecutive auto reply by default - max_consecutive_auto_reply: Optional[int] = sys.maxsize, - human_input_mode: Literal["ALWAYS", "NEVER", "TERMINATE"] = "NEVER", - system_message: Optional[Union[str, List]] = "Group chat manager.", - silent: bool = False, - **kwargs, - ): - if ( - kwargs.get("llm_config") - and isinstance(kwargs["llm_config"], dict) - and (kwargs["llm_config"].get("functions") or kwargs["llm_config"].get("tools")) - ): - raise ValueError( - "GroupChatManager is not allowed to make function/tool calls. Please remove the 'functions' or 'tools' config in 'llm_config' you passed in." - ) - - super().__init__( - name=name, - max_consecutive_auto_reply=max_consecutive_auto_reply, - human_input_mode=human_input_mode, - system_message=system_message, - **kwargs, - ) - if logging_enabled(): - log_new_agent(self, locals()) - # Store groupchat - self._groupchat = groupchat - - self._last_speaker = None - self._silent = silent - - # Order of register_reply is important. - # Allow sync chat if initiated using initiate_chat - self.register_reply(Agent, GroupChatManager.run_chat, config=groupchat, reset_config=GroupChat.reset) - # Allow async chat if initiated using a_initiate_chat - self.register_reply( - Agent, - GroupChatManager.a_run_chat, - config=groupchat, - reset_config=GroupChat.reset, - ignore_async_in_sync_chat=True, - ) - - @property - def groupchat(self) -> GroupChat: - """Returns the group chat managed by the group chat manager.""" - return self._groupchat - - def chat_messages_for_summary(self, agent: Agent) -> List[Dict]: - """The list of messages in the group chat as a conversation to summarize. - The agent is ignored. - """ - return self._groupchat.messages - - def _prepare_chat( - self, - recipient: ConversableAgent, - clear_history: bool, - prepare_recipient: bool = True, - reply_at_receive: bool = True, - ) -> None: - super()._prepare_chat(recipient, clear_history, prepare_recipient, reply_at_receive) - - if clear_history: - self._groupchat.reset() - - for agent in self._groupchat.agents: - if (recipient != agent or prepare_recipient) and isinstance(agent, ConversableAgent): - agent._prepare_chat(self, clear_history, False, reply_at_receive) - - @property - def last_speaker(self) -> Agent: - """Return the agent who sent the last message to group chat manager. - - In a group chat, an agent will always send a message to the group chat manager, and the group chat manager will - send the message to all other agents in the group chat. So, when an agent receives a message, it will always be - from the group chat manager. With this property, the agent receiving the message can know who actually sent the - message. - - Example: - ```python - from autogen import ConversableAgent - from autogen import GroupChat, GroupChatManager - - - def print_messages(recipient, messages, sender, config): - # Print the message immediately - print( - f"Sender: {sender.name} | Recipient: {recipient.name} | Message: {messages[-1].get('content')}" - ) - print(f"Real Sender: {sender.last_speaker.name}") - assert sender.last_speaker.name in messages[-1].get("content") - return False, None # Required to ensure the agent communication flow continues - - - agent_a = ConversableAgent("agent A", default_auto_reply="I'm agent A.") - agent_b = ConversableAgent("agent B", default_auto_reply="I'm agent B.") - agent_c = ConversableAgent("agent C", default_auto_reply="I'm agent C.") - for agent in [agent_a, agent_b, agent_c]: - agent.register_reply( - [ConversableAgent, None], reply_func=print_messages, config=None - ) - group_chat = GroupChat( - [agent_a, agent_b, agent_c], - messages=[], - max_round=6, - speaker_selection_method="random", - allow_repeat_speaker=True, - ) - chat_manager = GroupChatManager(group_chat) - groupchat_result = agent_a.initiate_chat( - chat_manager, message="Hi, there, I'm agent A." - ) - ``` - """ - return self._last_speaker - - def run_chat( - self, - messages: Optional[List[Dict]] = None, - sender: Optional[Agent] = None, - config: Optional[GroupChat] = None, - ) -> Tuple[bool, Optional[str]]: - """Run a group chat.""" - if messages is None: - messages = self._oai_messages[sender] - message = messages[-1] - speaker = sender - groupchat = config - send_introductions = getattr(groupchat, "send_introductions", False) - silent = getattr(self, "_silent", False) - - if send_introductions: - # Broadcast the intro - intro = groupchat.introductions_msg() - for agent in groupchat.agents: - self.send(intro, agent, request_reply=False, silent=True) - # NOTE: We do not also append to groupchat.messages, - # since groupchat handles its own introductions - - if self.client_cache is not None: - for a in groupchat.agents: - a.previous_cache = a.client_cache - a.client_cache = self.client_cache - for i in range(groupchat.max_round): - self._last_speaker = speaker - groupchat.append(message, speaker) - # broadcast the message to all agents except the speaker - for agent in groupchat.agents: - if agent != speaker: - self.send(message, agent, request_reply=False, silent=True) - if self._is_termination_msg(message) or i == groupchat.max_round - 1: - # The conversation is over or it's the last round - break - try: - # select the next speaker - speaker = groupchat.select_speaker(speaker, self) - if not silent: - iostream = IOStream.get_default() - iostream.print(colored(f"\nNext speaker: {speaker.name}\n", "green"), flush=True) - # let the speaker speak - reply = speaker.generate_reply(sender=self) - except KeyboardInterrupt: - # let the admin agent speak if interrupted - if groupchat.admin_name in groupchat.agent_names: - # admin agent is one of the participants - speaker = groupchat.agent_by_name(groupchat.admin_name) - reply = speaker.generate_reply(sender=self) - else: - # admin agent is not found in the participants - raise - except NoEligibleSpeaker: - # No eligible speaker, terminate the conversation - break - - if reply is None: - # no reply is generated, exit the chat - break - - # check for "clear history" phrase in reply and activate clear history function if found - if ( - groupchat.enable_clear_history - and isinstance(reply, dict) - and reply["content"] - and "CLEAR HISTORY" in reply["content"].upper() - ): - reply["content"] = self.clear_agents_history(reply, groupchat) - - # The speaker sends the message without requesting a reply - speaker.send(reply, self, request_reply=False, silent=silent) - message = self.last_message(speaker) - if self.client_cache is not None: - for a in groupchat.agents: - a.client_cache = a.previous_cache - a.previous_cache = None - return True, None - - async def a_run_chat( - self, - messages: Optional[List[Dict]] = None, - sender: Optional[Agent] = None, - config: Optional[GroupChat] = None, - ): - """Run a group chat asynchronously.""" - if messages is None: - messages = self._oai_messages[sender] - message = messages[-1] - speaker = sender - groupchat = config - send_introductions = getattr(groupchat, "send_introductions", False) - silent = getattr(self, "_silent", False) - - if send_introductions: - # Broadcast the intro - intro = groupchat.introductions_msg() - for agent in groupchat.agents: - await self.a_send(intro, agent, request_reply=False, silent=True) - # NOTE: We do not also append to groupchat.messages, - # since groupchat handles its own introductions - - if self.client_cache is not None: - for a in groupchat.agents: - a.previous_cache = a.client_cache - a.client_cache = self.client_cache - for i in range(groupchat.max_round): - groupchat.append(message, speaker) - - if self._is_termination_msg(message): - # The conversation is over - break - - # broadcast the message to all agents except the speaker - for agent in groupchat.agents: - if agent != speaker: - await self.a_send(message, agent, request_reply=False, silent=True) - if i == groupchat.max_round - 1: - # the last round - break - try: - # select the next speaker - speaker = await groupchat.a_select_speaker(speaker, self) - # let the speaker speak - reply = await speaker.a_generate_reply(sender=self) - except KeyboardInterrupt: - # let the admin agent speak if interrupted - if groupchat.admin_name in groupchat.agent_names: - # admin agent is one of the participants - speaker = groupchat.agent_by_name(groupchat.admin_name) - reply = await speaker.a_generate_reply(sender=self) - else: - # admin agent is not found in the participants - raise - if reply is None: - break - # The speaker sends the message without requesting a reply - await speaker.a_send(reply, self, request_reply=False, silent=silent) - message = self.last_message(speaker) - if self.client_cache is not None: - for a in groupchat.agents: - a.client_cache = a.previous_cache - a.previous_cache = None - return True, None - - def resume( - self, - messages: Union[List[Dict], str], - remove_termination_string: Union[str, Callable[[str], str]] = None, - silent: Optional[bool] = False, - ) -> Tuple[ConversableAgent, Dict]: - """Resumes a group chat using the previous messages as a starting point. Requires the agents, group chat, and group chat manager to be established - as per the original group chat. - - Args: - - messages Union[List[Dict], str]: The content of the previous chat's messages, either as a Json string or a list of message dictionaries. - - remove_termination_string (str or function): Remove the termination string from the last message to prevent immediate termination - If a string is provided, this string will be removed from last message. - If a function is provided, the last message will be passed to this function. - - silent (bool or None): (Experimental) whether to print the messages for this conversation. Default is False. - - Returns: - - Tuple[ConversableAgent, Dict]: A tuple containing the last agent who spoke and their message - """ - - # Convert messages from string to messages list, if needed - if isinstance(messages, str): - messages = self.messages_from_string(messages) - elif isinstance(messages, list) and all(isinstance(item, dict) for item in messages): - messages = copy.deepcopy(messages) - else: - raise Exception("Messages is not of type str or List[Dict]") - - # Clean up the objects, ensuring there are no messages in the agents and group chat - - # Clear agent message history - for agent in self._groupchat.agents: - if isinstance(agent, ConversableAgent): - agent.clear_history() - - # Clear Manager message history - self.clear_history() - - # Clear GroupChat messages - self._groupchat.reset() - - # Validation of message and agents - - try: - self._valid_resume_messages(messages) - except: - raise - - # Load the messages into the group chat - for i, message in enumerate(messages): - if "name" in message: - message_speaker_agent = self._groupchat.agent_by_name(message["name"]) - else: - # If there's no name, assign the group chat manager (this is an indication the ChatResult messages was used instead of groupchat.messages as state) - message_speaker_agent = self - message["name"] = self.name - - # If it wasn't an agent speaking, it may be the manager - if not message_speaker_agent and message["name"] == self.name: - message_speaker_agent = self - - # Add previous messages to each agent (except the last message, as we'll kick off the conversation with it) - if i != len(messages) - 1: - for agent in self._groupchat.agents: - self.send(message, self._groupchat.agent_by_name(agent.name), request_reply=False, silent=True) - - # Add previous message to the new groupchat, if it's an admin message the name may not match so add the message directly - if message_speaker_agent: - self._groupchat.append(message, message_speaker_agent) - else: - self._groupchat.messages.append(message) - - # Last speaker agent - last_speaker_name = message["name"] - - # Last message to check for termination (we could avoid this by ignoring termination check for resume in the future) - last_message = message - - # Get last speaker as an agent - previous_last_agent = self._groupchat.agent_by_name(name=last_speaker_name) - - # If we didn't match a last speaker agent, we check that it's the group chat's admin name and assign the manager, if so - if not previous_last_agent and ( - last_speaker_name == self._groupchat.admin_name or last_speaker_name == self.name - ): - previous_last_agent = self - - # Termination removal and check - self._process_resume_termination(remove_termination_string, messages) - - if not silent: - iostream = IOStream.get_default() - iostream.print( - f"Prepared group chat with {len(messages)} messages, the last speaker is", - colored(last_speaker_name, "yellow"), - flush=True, - ) - - # Update group chat settings for resuming - self._groupchat.send_introductions = False - - return previous_last_agent, last_message - - async def a_resume( - self, - messages: Union[List[Dict], str], - remove_termination_string: Union[str, Callable[[str], str]] = None, - silent: Optional[bool] = False, - ) -> Tuple[ConversableAgent, Dict]: - """Resumes a group chat using the previous messages as a starting point, asynchronously. Requires the agents, group chat, and group chat manager to be established - as per the original group chat. - - Args: - - messages Union[List[Dict], str]: The content of the previous chat's messages, either as a Json string or a list of message dictionaries. - - remove_termination_string (str or function): Remove the termination string from the last message to prevent immediate termination - If a string is provided, this string will be removed from last message. - If a function is provided, the last message will be passed to this function, and the function returns the string after processing. - - silent (bool or None): (Experimental) whether to print the messages for this conversation. Default is False. - - Returns: - - Tuple[ConversableAgent, Dict]: A tuple containing the last agent who spoke and their message - """ - - # Convert messages from string to messages list, if needed - if isinstance(messages, str): - messages = self.messages_from_string(messages) - elif isinstance(messages, list) and all(isinstance(item, dict) for item in messages): - messages = copy.deepcopy(messages) - else: - raise Exception("Messages is not of type str or List[Dict]") - - # Clean up the objects, ensuring there are no messages in the agents and group chat - - # Clear agent message history - for agent in self._groupchat.agents: - if isinstance(agent, ConversableAgent): - agent.clear_history() - - # Clear Manager message history - self.clear_history() - - # Clear GroupChat messages - self._groupchat.reset() - - # Validation of message and agents - - try: - self._valid_resume_messages(messages) - except: - raise - - # Load the messages into the group chat - for i, message in enumerate(messages): - if "name" in message: - message_speaker_agent = self._groupchat.agent_by_name(message["name"]) - else: - # If there's no name, assign the group chat manager (this is an indication the ChatResult messages was used instead of groupchat.messages as state) - message_speaker_agent = self - message["name"] = self.name - - # If it wasn't an agent speaking, it may be the manager - if not message_speaker_agent and message["name"] == self.name: - message_speaker_agent = self - - # Add previous messages to each agent (except the last message, as we'll kick off the conversation with it) - if i != len(messages) - 1: - for agent in self._groupchat.agents: - await self.a_send( - message, self._groupchat.agent_by_name(agent.name), request_reply=False, silent=True - ) - - # Add previous message to the new groupchat, if it's an admin message the name may not match so add the message directly - if message_speaker_agent: - self._groupchat.append(message, message_speaker_agent) - else: - self._groupchat.messages.append(message) - - # Last speaker agent - last_speaker_name = message["name"] - - # Last message to check for termination (we could avoid this by ignoring termination check for resume in the future) - last_message = message - - # Get last speaker as an agent - previous_last_agent = self._groupchat.agent_by_name(name=last_speaker_name) - - # If we didn't match a last speaker agent, we check that it's the group chat's admin name and assign the manager, if so - if not previous_last_agent and ( - last_speaker_name == self._groupchat.admin_name or last_speaker_name == self.name - ): - previous_last_agent = self - - # Termination removal and check - self._process_resume_termination(remove_termination_string, messages) - - if not silent: - iostream = IOStream.get_default() - iostream.print( - f"Prepared group chat with {len(messages)} messages, the last speaker is", - colored(last_speaker_name, "yellow"), - flush=True, - ) - - # Update group chat settings for resuming - self._groupchat.send_introductions = False - - return previous_last_agent, last_message - - def _valid_resume_messages(self, messages: List[Dict]): - """Validates the messages used for resuming - - args: - messages (List[Dict]): list of messages to resume with - - returns: - - bool: Whether they are valid for resuming - """ - # Must have messages to start with, otherwise they should run run_chat - if not messages: - raise Exception( - "Cannot resume group chat as no messages were provided. Use GroupChatManager.run_chat or ConversableAgent.initiate_chat to start a new chat." - ) - - # Check that all agents in the chat messages exist in the group chat - for message in messages: - if message.get("name"): - if ( - not self._groupchat.agent_by_name(message["name"]) - and not message["name"] == self._groupchat.admin_name # ignore group chat's name - and not message["name"] == self.name # ignore group chat manager's name - ): - raise Exception(f"Agent name in message doesn't exist as agent in group chat: {message['name']}") - - def _process_resume_termination( - self, remove_termination_string: Union[str, Callable[[str], str]], messages: List[Dict] - ): - """Removes termination string, if required, and checks if termination may occur. - - args: - remove_termination_string (str or function): Remove the termination string from the last message to prevent immediate termination - If a string is provided, this string will be removed from last message. - If a function is provided, the last message will be passed to this function, and the function returns the string after processing. - - returns: - None - """ - - last_message = messages[-1] - - # Replace any given termination string in the last message - if isinstance(remove_termination_string, str): - - def _remove_termination_string(content: str) -> str: - return content.replace(remove_termination_string, "") - - else: - _remove_termination_string = remove_termination_string - - if _remove_termination_string: - if messages[-1].get("content"): - messages[-1]["content"] = _remove_termination_string(messages[-1]["content"]) - - # Check if the last message meets termination (if it has one) - if self._is_termination_msg: - if self._is_termination_msg(last_message): - logger.warning("WARNING: Last message meets termination criteria and this may terminate the chat.") - - def messages_from_string(self, message_string: str) -> List[Dict]: - """Reads the saved state of messages in Json format for resume and returns as a messages list - - args: - - message_string: Json string, the saved state - - returns: - - List[Dict]: List of messages - """ - try: - state = json.loads(message_string) - except json.JSONDecodeError: - raise Exception("Messages string is not a valid JSON string") - - return state - - def messages_to_string(self, messages: List[Dict]) -> str: - """Converts the provided messages into a Json string that can be used for resuming the chat. - The state is made up of a list of messages - - args: - - messages (List[Dict]): set of messages to convert to a string - - returns: - - str: Json representation of the messages which can be persisted for resuming later - """ - - return json.dumps(messages) - - def _raise_exception_on_async_reply_functions(self) -> None: - """Raise an exception if any async reply functions are registered. - - Raises: - RuntimeError: if any async reply functions are registered. - """ - super()._raise_exception_on_async_reply_functions() - - for agent in self._groupchat.agents: - agent._raise_exception_on_async_reply_functions() - - def clear_agents_history(self, reply: dict, groupchat: GroupChat) -> str: - """Clears history of messages for all agents or selected one. Can preserve selected number of last messages. - That function is called when user manually provide "clear history" phrase in his reply. - When "clear history" is provided, the history of messages for all agents is cleared. - When "clear history " is provided, the history of messages for selected agent is cleared. - When "clear history " is provided, the history of messages for all agents is cleared - except last messages. - When "clear history " is provided, the history of messages for selected - agent is cleared except last messages. - Phrase "clear history" and optional arguments are cut out from the reply before it passed to the chat. - - Args: - reply (dict): reply message dict to analyze. - groupchat (GroupChat): GroupChat object. - """ - iostream = IOStream.get_default() - - reply_content = reply["content"] - # Split the reply into words - words = reply_content.split() - # Find the position of "clear" to determine where to start processing - clear_word_index = next(i for i in reversed(range(len(words))) if words[i].upper() == "CLEAR") - # Extract potential agent name and steps - words_to_check = words[clear_word_index + 2 : clear_word_index + 4] - nr_messages_to_preserve = None - nr_messages_to_preserve_provided = False - agent_to_memory_clear = None - - for word in words_to_check: - if word.isdigit(): - nr_messages_to_preserve = int(word) - nr_messages_to_preserve_provided = True - elif word[:-1].isdigit(): # for the case when number of messages is followed by dot or other sign - nr_messages_to_preserve = int(word[:-1]) - nr_messages_to_preserve_provided = True - else: - for agent in groupchat.agents: - if agent.name == word: - agent_to_memory_clear = agent - break - elif agent.name == word[:-1]: # for the case when agent name is followed by dot or other sign - agent_to_memory_clear = agent - break - # preserve last tool call message if clear history called inside of tool response - if "tool_responses" in reply and not nr_messages_to_preserve: - nr_messages_to_preserve = 1 - logger.warning( - "The last tool call message will be saved to prevent errors caused by tool response without tool call." - ) - # clear history - if agent_to_memory_clear: - if nr_messages_to_preserve: - iostream.print( - f"Clearing history for {agent_to_memory_clear.name} except last {nr_messages_to_preserve} messages." - ) - else: - iostream.print(f"Clearing history for {agent_to_memory_clear.name}.") - agent_to_memory_clear.clear_history(nr_messages_to_preserve=nr_messages_to_preserve) - else: - if nr_messages_to_preserve: - iostream.print(f"Clearing history for all agents except last {nr_messages_to_preserve} messages.") - # clearing history for groupchat here - temp = groupchat.messages[-nr_messages_to_preserve:] - groupchat.messages.clear() - groupchat.messages.extend(temp) - else: - iostream.print("Clearing history for all agents.") - # clearing history for groupchat here - groupchat.messages.clear() - # clearing history for agents - for agent in groupchat.agents: - agent.clear_history(nr_messages_to_preserve=nr_messages_to_preserve) - - # Reconstruct the reply without the "clear history" command and parameters - skip_words_number = 2 + int(bool(agent_to_memory_clear)) + int(nr_messages_to_preserve_provided) - reply_content = " ".join(words[:clear_word_index] + words[clear_word_index + skip_words_number :]) - - return reply_content diff --git a/autogen/agentchat/user_proxy_agent.py b/autogen/agentchat/user_proxy_agent.py deleted file mode 100644 index d50e4d8b89c5..000000000000 --- a/autogen/agentchat/user_proxy_agent.py +++ /dev/null @@ -1,103 +0,0 @@ -from typing import Callable, Dict, List, Literal, Optional, Union - -from ..runtime_logging import log_new_agent, logging_enabled -from .conversable_agent import ConversableAgent - - -class UserProxyAgent(ConversableAgent): - """(In preview) A proxy agent for the user, that can execute code and provide feedback to the other agents. - - UserProxyAgent is a subclass of ConversableAgent configured with `human_input_mode` to ALWAYS - and `llm_config` to False. By default, the agent will prompt for human input every time a message is received. - Code execution is enabled by default. LLM-based auto reply is disabled by default. - To modify auto reply, register a method with [`register_reply`](conversable_agent#register_reply). - To modify the way to get human input, override `get_human_input` method. - To modify the way to execute code blocks, single code block, or function call, override `execute_code_blocks`, - `run_code`, and `execute_function` methods respectively. - """ - - # Default UserProxyAgent.description values, based on human_input_mode - DEFAULT_USER_PROXY_AGENT_DESCRIPTIONS = { - "ALWAYS": "An attentive HUMAN user who can answer questions about the task, and can perform tasks such as running Python code or inputting command line commands at a Linux terminal and reporting back the execution results.", - "TERMINATE": "A user that can run Python code or input command line commands at a Linux terminal and report back the execution results.", - "NEVER": "A computer terminal that performs no other action than running Python scripts (provided to it quoted in ```python code blocks), or sh shell scripts (provided to it quoted in ```sh code blocks).", - } - - def __init__( - self, - name: str, - is_termination_msg: Optional[Callable[[Dict], bool]] = None, - max_consecutive_auto_reply: Optional[int] = None, - human_input_mode: Literal["ALWAYS", "TERMINATE", "NEVER"] = "ALWAYS", - function_map: Optional[Dict[str, Callable]] = None, - code_execution_config: Union[Dict, Literal[False]] = {}, - default_auto_reply: Optional[Union[str, Dict, None]] = "", - llm_config: Optional[Union[Dict, Literal[False]]] = False, - system_message: Optional[Union[str, List]] = "", - description: Optional[str] = None, - **kwargs, - ): - """ - Args: - name (str): name of the agent. - is_termination_msg (function): a function that takes a message in the form of a dictionary - and returns a boolean value indicating if this received message is a termination message. - The dict can contain the following keys: "content", "role", "name", "function_call". - max_consecutive_auto_reply (int): the maximum number of consecutive auto replies. - default to None (no limit provided, class attribute MAX_CONSECUTIVE_AUTO_REPLY will be used as the limit in this case). - The limit only plays a role when human_input_mode is not "ALWAYS". - human_input_mode (str): whether to ask for human inputs every time a message is received. - Possible values are "ALWAYS", "TERMINATE", "NEVER". - (1) When "ALWAYS", the agent prompts for human input every time a message is received. - Under this mode, the conversation stops when the human input is "exit", - or when is_termination_msg is True and there is no human input. - (2) When "TERMINATE", the agent only prompts for human input only when a termination message is received or - the number of auto reply reaches the max_consecutive_auto_reply. - (3) When "NEVER", the agent will never prompt for human input. Under this mode, the conversation stops - when the number of auto reply reaches the max_consecutive_auto_reply or when is_termination_msg is True. - function_map (dict[str, callable]): Mapping function names (passed to openai) to callable functions. - code_execution_config (dict or False): config for the code execution. - To disable code execution, set to False. Otherwise, set to a dictionary with the following keys: - - work_dir (Optional, str): The working directory for the code execution. - If None, a default working directory will be used. - The default working directory is the "extensions" directory under - "path_to_autogen". - - use_docker (Optional, list, str or bool): The docker image to use for code execution. - Default is True, which means the code will be executed in a docker container. A default list of images will be used. - If a list or a str of image name(s) is provided, the code will be executed in a docker container - with the first image successfully pulled. - If False, the code will be executed in the current environment. - We strongly recommend using docker for code execution. - - timeout (Optional, int): The maximum execution time in seconds. - - last_n_messages (Experimental, Optional, int): The number of messages to look back for code execution. Default to 1. - default_auto_reply (str or dict or None): the default auto reply message when no code execution or llm based reply is generated. - llm_config (dict or False or None): llm inference configuration. - Please refer to [OpenAIWrapper.create](/docs/reference/oai/client#create) - for available options. - Default to False, which disables llm-based auto reply. - When set to None, will use self.DEFAULT_CONFIG, which defaults to False. - system_message (str or List): system message for ChatCompletion inference. - Only used when llm_config is not False. Use it to reprogram the agent. - description (str): a short description of the agent. This description is used by other agents - (e.g. the GroupChatManager) to decide when to call upon this agent. (Default: system_message) - **kwargs (dict): Please refer to other kwargs in - [ConversableAgent](conversable_agent#__init__). - """ - super().__init__( - name=name, - system_message=system_message, - is_termination_msg=is_termination_msg, - max_consecutive_auto_reply=max_consecutive_auto_reply, - human_input_mode=human_input_mode, - function_map=function_map, - code_execution_config=code_execution_config, - llm_config=llm_config, - default_auto_reply=default_auto_reply, - description=( - description if description is not None else self.DEFAULT_USER_PROXY_AGENT_DESCRIPTIONS[human_input_mode] - ), - **kwargs, - ) - - if logging_enabled(): - log_new_agent(self, locals()) diff --git a/autogen/agentchat/utils.py b/autogen/agentchat/utils.py deleted file mode 100644 index b32c2f5f0a07..000000000000 --- a/autogen/agentchat/utils.py +++ /dev/null @@ -1,201 +0,0 @@ -import re -from typing import Any, Callable, Dict, List, Union - -from .agent import Agent - - -def consolidate_chat_info(chat_info, uniform_sender=None) -> None: - if isinstance(chat_info, dict): - chat_info = [chat_info] - for c in chat_info: - if uniform_sender is None: - assert "sender" in c, "sender must be provided." - sender = c["sender"] - else: - sender = uniform_sender - assert "recipient" in c, "recipient must be provided." - summary_method = c.get("summary_method") - assert ( - summary_method is None - or isinstance(summary_method, Callable) - or summary_method in ("last_msg", "reflection_with_llm") - ), "summary_method must be a string chosen from 'reflection_with_llm' or 'last_msg' or a callable, or None." - if summary_method == "reflection_with_llm": - assert ( - sender.client is not None or c["recipient"].client is not None - ), "llm client must be set in either the recipient or sender when summary_method is reflection_with_llm." - - -def gather_usage_summary(agents: List[Agent]) -> Dict[Dict[str, Dict], Dict[str, Dict]]: - r"""Gather usage summary from all agents. - - Args: - agents: (list): List of agents. - - Returns: - dictionary: A dictionary containing two keys: - - "usage_including_cached_inference": Cost information on the total usage, including the tokens in cached inference. - - "usage_excluding_cached_inference": Cost information on the usage of tokens, excluding the tokens in cache. No larger than "usage_including_cached_inference". - - Example: - - ```python - { - "usage_including_cached_inference" : { - "total_cost": 0.0006090000000000001, - "gpt-35-turbo": { - "cost": 0.0006090000000000001, - "prompt_tokens": 242, - "completion_tokens": 123, - "total_tokens": 365 - }, - }, - - "usage_excluding_cached_inference" : { - "total_cost": 0.0006090000000000001, - "gpt-35-turbo": { - "cost": 0.0006090000000000001, - "prompt_tokens": 242, - "completion_tokens": 123, - "total_tokens": 365 - }, - } - } - ``` - - Note: - - If none of the agents incurred any cost (not having a client), then the usage_including_cached_inference and usage_excluding_cached_inference will be `{'total_cost': 0}`. - """ - - def aggregate_summary(usage_summary: Dict[str, Any], agent_summary: Dict[str, Any]) -> None: - if agent_summary is None: - return - usage_summary["total_cost"] += agent_summary.get("total_cost", 0) - for model, data in agent_summary.items(): - if model != "total_cost": - if model not in usage_summary: - usage_summary[model] = data.copy() - else: - usage_summary[model]["cost"] += data.get("cost", 0) - usage_summary[model]["prompt_tokens"] += data.get("prompt_tokens", 0) - usage_summary[model]["completion_tokens"] += data.get("completion_tokens", 0) - usage_summary[model]["total_tokens"] += data.get("total_tokens", 0) - - usage_including_cached_inference = {"total_cost": 0} - usage_excluding_cached_inference = {"total_cost": 0} - - for agent in agents: - if getattr(agent, "client", None): - aggregate_summary(usage_including_cached_inference, agent.client.total_usage_summary) - aggregate_summary(usage_excluding_cached_inference, agent.client.actual_usage_summary) - - return { - "usage_including_cached_inference": usage_including_cached_inference, - "usage_excluding_cached_inference": usage_excluding_cached_inference, - } - - -def parse_tags_from_content(tag: str, content: Union[str, List[Dict[str, Any]]]) -> List[Dict[str, Dict[str, str]]]: - """Parses HTML style tags from message contents. - - The parsing is done by looking for patterns in the text that match the format of HTML tags. The tag to be parsed is - specified as an argument to the function. The function looks for this tag in the text and extracts its content. The - content of a tag is everything that is inside the tag, between the opening and closing angle brackets. The content - can be a single string or a set of attribute-value pairs. - - Examples: - -> [{"tag": "img", "attr": {"src": "http://example.com/image.png"}, "match": re.Match}] -