From c17067a0fbb0e174d43fdad9d9da1fdab9999685 Mon Sep 17 00:00:00 2001 From: Ben Constable Date: Wed, 19 Feb 2025 13:51:24 +0000 Subject: [PATCH] Add Unit Tests for Image Processing + Page Number Tracking --- .github/workflows/ci-checks.yaml | 26 + .pre-commit-config.yaml | 1 + .../src/deploy_ai_search_indexes/ai_search.py | 36 +- .../image_processing.py | 44 +- image_processing/.coveragerc | 11 + image_processing/pyproject.toml | 5 + image_processing/pytest.ini | 2 + .../src/image_processing/__init__.py | 0 .../src/image_processing/layout_analysis.py | 42 +- .../src/image_processing/layout_holders.py | 22 +- .../src/image_processing/mark_up_cleaner.py | 62 ++- .../src/image_processing/requirements.txt | 14 +- .../image_processing/semantic_text_chunker.py | 53 +- .../tests/image_processing/__init__.py | 0 .../image_processing/test_figure_analysis.py | 298 +++++++++++ .../image_processing/test_layout_analysis.py | 493 ++++++++++++++++++ .../test_layout_and_figure_merger.py | 114 ++++ .../image_processing/test_layout_holders.py | 107 ++++ .../image_processing/test_mark_up_cleaner.py | 249 +++++++++ .../test_semantic_text_chunker.py | 355 +++++++++++++ pyproject.toml | 5 + uv.lock | 395 ++++++++++---- 22 files changed, 2160 insertions(+), 174 deletions(-) create mode 100644 image_processing/.coveragerc create mode 100644 image_processing/pytest.ini create mode 100644 image_processing/src/image_processing/__init__.py create mode 100644 image_processing/tests/image_processing/__init__.py create mode 100644 image_processing/tests/image_processing/test_figure_analysis.py create mode 100644 image_processing/tests/image_processing/test_layout_analysis.py create mode 100644 image_processing/tests/image_processing/test_layout_and_figure_merger.py create mode 100644 image_processing/tests/image_processing/test_layout_holders.py create mode 100644 image_processing/tests/image_processing/test_mark_up_cleaner.py create mode 100644 image_processing/tests/image_processing/test_semantic_text_chunker.py diff --git a/.github/workflows/ci-checks.yaml b/.github/workflows/ci-checks.yaml index e8c59ffb..3e244d52 100644 --- a/.github/workflows/ci-checks.yaml +++ b/.github/workflows/ci-checks.yaml @@ -36,3 +36,29 @@ jobs: - name: Run pre-commit run: uv run pre-commit run --all-files + + job-image-processing-unit-tests: + name: Image Processing Unit Tests + runs-on: ubuntu-latest + + steps: + - name: Checkout code + uses: actions/checkout@v3 + + - name: Set up Python + uses: actions/setup-python@v3 + with: + python-version: ${{ env.MIN_PYTHON_VERSION }} + + - name: Install uv + uses: astral-sh/setup-uv@v4 + with: + enable-cache: true + + - name: Install the project + run: uv sync + working-directory: image_processing + + - name: Run PyTest + run: uv run pytest --cov=. --cov-config=.coveragerc + working-directory: image_processing diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 96101597..563d8062 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -18,6 +18,7 @@ repos: # Python checks - id: name-tests-test + args: [--pytest-test-first] # JSON files - id: pretty-format-json diff --git a/deploy_ai_search_indexes/src/deploy_ai_search_indexes/ai_search.py b/deploy_ai_search_indexes/src/deploy_ai_search_indexes/ai_search.py index 44be7f0a..8a872f89 100644 --- a/deploy_ai_search_indexes/src/deploy_ai_search_indexes/ai_search.py +++ b/deploy_ai_search_indexes/src/deploy_ai_search_indexes/ai_search.py @@ -219,7 +219,11 @@ def get_mark_up_cleaner_skill(self, chunk_by_page: False) -> WebApiSkill: mark_up_cleaner_context = "/document/page_wise_layout/*" inputs = [ InputFieldMappingEntry( - name="chunk", source="/document/page_wise_layout/*/merged_content" + name="mark_up", source="/document/page_wise_layout/*/merged_content" + ), + InputFieldMappingEntry( + name="page_number", + source="/document/page_wise_layout/*/page_number", ), InputFieldMappingEntry( name="figures", @@ -230,7 +234,10 @@ def get_mark_up_cleaner_skill(self, chunk_by_page: False) -> WebApiSkill: mark_up_cleaner_context = "/document/chunk_mark_ups/*" inputs = [ InputFieldMappingEntry( - name="chunk", source="/document/chunk_mark_ups/*" + name="mark_up", source="/document/chunk_mark_ups/*/mark_up" + ), + InputFieldMappingEntry( + name="page_number", source="/document/chunk_mark_ups/*/page_number" ), InputFieldMappingEntry( name="figures", source="/document/layout/figures/*/updated_figure" @@ -238,12 +245,15 @@ def get_mark_up_cleaner_skill(self, chunk_by_page: False) -> WebApiSkill: ] mark_up_cleaner_skill_outputs = [ - OutputFieldMappingEntry(name="chunk_cleaned", target_name="chunk_cleaned"), OutputFieldMappingEntry( - name="chunk_sections", target_name="chunk_sections" + name="cleaned_text", target_name="final_cleaned_text" + ), + OutputFieldMappingEntry(name="sections", target_name="final_sections"), + OutputFieldMappingEntry(name="mark_up", target_name="final_mark_up"), + OutputFieldMappingEntry(name="figures", target_name="final_chunk_figures"), + OutputFieldMappingEntry( + name="page_number", target_name="final_page_number" ), - OutputFieldMappingEntry(name="chunk_mark_up", target_name="chunk_mark_up"), - OutputFieldMappingEntry(name="chunk_figures", target_name="chunk_figures"), ] mark_up_cleaner_skill = WebApiSkill( @@ -302,7 +312,11 @@ def get_semantic_chunker_skill( semantic_text_chunker_skill_inputs = [ InputFieldMappingEntry( name="content", source="/document/layout_merged_content" - ) + ), + InputFieldMappingEntry( + name="per_page_starting_sentences", + source="/document/per_page_starting_sentences", + ), ] semantic_text_chunker_skill_outputs = [ @@ -368,7 +382,13 @@ def get_layout_analysis_skill( ) ] else: - output = [OutputFieldMappingEntry(name="layout", target_name="layout")] + output = [ + OutputFieldMappingEntry(name="layout", target_name="layout"), + OutputFieldMappingEntry( + name="per_page_starting_sentences", + target_name="per_page_starting_sentences", + ), + ] layout_analysis_skill = WebApiSkill( name="Layout Analysis Skill", diff --git a/deploy_ai_search_indexes/src/deploy_ai_search_indexes/image_processing.py b/deploy_ai_search_indexes/src/deploy_ai_search_indexes/image_processing.py index eb11fba6..b1f875b4 100644 --- a/deploy_ai_search_indexes/src/deploy_ai_search_indexes/image_processing.py +++ b/deploy_ai_search_indexes/src/deploy_ai_search_indexes/image_processing.py @@ -81,6 +81,13 @@ def get_index_fields(self) -> list[SearchableField]: type=SearchFieldDataType.String, collection=True, ), + SimpleField( + name="PageNumber", + type=SearchFieldDataType.Int64, + sortable=True, + filterable=True, + facetable=True, + ), SearchField( name="ChunkEmbedding", type=SearchFieldDataType.Collection(SearchFieldDataType.Single), @@ -137,19 +144,6 @@ def get_index_fields(self) -> list[SearchableField]: ), ] - if self.enable_page_by_chunking: - fields.extend( - [ - SimpleField( - name="PageNumber", - type=SearchFieldDataType.Int64, - sortable=True, - filterable=True, - facetable=True, - ) - ] - ) - return fields def get_semantic_search(self) -> SemanticSearch: @@ -194,11 +188,12 @@ def get_skills(self) -> list: if self.enable_page_by_chunking: embedding_skill = self.get_vector_skill( "/document/page_wise_layout/*", - "/document/page_wise_layout/*/chunk_cleaned", + "/document/page_wise_layout/*/final_cleaned_text", ) else: embedding_skill = self.get_vector_skill( - "/document/chunk_mark_ups/*", "/document/chunk_mark_ups/*/chunk_cleaned" + "/document/chunk_mark_ups/*", + "/document/chunk_mark_ups/*/final_cleaned_text", ) if self.enable_page_by_chunking: @@ -229,7 +224,7 @@ def get_index_projections(self) -> SearchIndexerIndexProjection: source_context = "/document/page_wise_layout/*" mappings = [ InputFieldMappingEntry( - name="Chunk", source="/document/page_wise_layout/*/chunk_mark_up" + name="Chunk", source="/document/page_wise_layout/*/final_mark_up" ), InputFieldMappingEntry( name="ChunkEmbedding", @@ -239,24 +234,25 @@ def get_index_projections(self) -> SearchIndexerIndexProjection: InputFieldMappingEntry(name="SourceUri", source="/document/SourceUri"), InputFieldMappingEntry( name="Sections", - source="/document/page_wise_layout/*/chunk_sections", + source="/document/page_wise_layout/*/final_sections", ), InputFieldMappingEntry( name="ChunkFigures", - source="/document/page_wise_layout/*/chunk_figures/*", + source="/document/page_wise_layout/*/final_chunk_figures/*", ), InputFieldMappingEntry( name="DateLastModified", source="/document/DateLastModified" ), InputFieldMappingEntry( - name="PageNumber", source="/document/page_wise_layout/*/page_number" + name="PageNumber", + source="/document/page_wise_layout/*/final_page_number", ), ] else: source_context = "/document/chunk_mark_ups/*" mappings = [ InputFieldMappingEntry( - name="Chunk", source="/document/chunk_mark_ups/*/chunk_mark_up" + name="Chunk", source="/document/chunk_mark_ups/*/final_mark_up" ), InputFieldMappingEntry( name="ChunkEmbedding", @@ -265,15 +261,19 @@ def get_index_projections(self) -> SearchIndexerIndexProjection: InputFieldMappingEntry(name="Title", source="/document/Title"), InputFieldMappingEntry(name="SourceUri", source="/document/SourceUri"), InputFieldMappingEntry( - name="Sections", source="/document/chunk_mark_ups/*/chunk_sections" + name="Sections", source="/document/chunk_mark_ups/*/final_sections" ), InputFieldMappingEntry( name="ChunkFigures", - source="/document/chunk_mark_ups/*/chunk_figures/*", + source="/document/chunk_mark_ups/*/final_chunk_figures/*", ), InputFieldMappingEntry( name="DateLastModified", source="/document/DateLastModified" ), + InputFieldMappingEntry( + name="PageNumber", + source="/document/chunk_mark_ups/*/final_page_number", + ), ] index_projections = SearchIndexerIndexProjection( diff --git a/image_processing/.coveragerc b/image_processing/.coveragerc new file mode 100644 index 00000000..50cceb10 --- /dev/null +++ b/image_processing/.coveragerc @@ -0,0 +1,11 @@ +[run] +omit = + tests/* + */__init__.py + +[report] +omit = + tests/* + */__init__.py +exclude_lines = + if __name__ == "__main__": diff --git a/image_processing/pyproject.toml b/image_processing/pyproject.toml index c7b082e2..6b153d18 100644 --- a/image_processing/pyproject.toml +++ b/image_processing/pyproject.toml @@ -43,4 +43,9 @@ dev = [ "pygments>=2.18.0", "ruff>=0.8.1", "python-dotenv>=1.0.1", + "coverage>=7.6.12", + "pytest>=8.3.4", + "pytest-asyncio>=0.25.3", + "pytest-cov>=6.0.0", + "pytest-mock>=3.14.0", ] diff --git a/image_processing/pytest.ini b/image_processing/pytest.ini new file mode 100644 index 00000000..84624a01 --- /dev/null +++ b/image_processing/pytest.ini @@ -0,0 +1,2 @@ +[pytest] +pythonpath = src/image_processing diff --git a/image_processing/src/image_processing/__init__.py b/image_processing/src/image_processing/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/image_processing/src/image_processing/layout_analysis.py b/image_processing/src/image_processing/layout_analysis.py index 081b76fa..5a1ef4f9 100644 --- a/image_processing/src/image_processing/layout_analysis.py +++ b/image_processing/src/image_processing/layout_analysis.py @@ -22,6 +22,7 @@ LayoutHolder, PageWiseContentHolder, NonPageWiseContentHolder, + PerPageStartingSentenceHolder, ) @@ -340,6 +341,40 @@ def create_page_wise_content(self) -> list[LayoutHolder]: return page_wise_contents + def create_per_page_starting_sentence(self) -> list[PerPageStartingSentenceHolder]: + """Create a list of the starting sentence of each page so we can assign the starting sentence to the page number. + + Returns: + -------- + list: A list of the starting sentence of each page.""" + + per_page_starting_sentences = [] + + for page in self.result.pages: + page_content = self.result.content[ + page.spans[0]["offset"] : page.spans[0]["offset"] + + page.spans[0]["length"] + ] + + # Remove any leading whitespace/newlines. + cleaned_content = page_content.lstrip() + # If a newline appears before a period, split on newline; otherwise, on period. + if "\n" in cleaned_content: + first_line = cleaned_content.split("\n", 1)[0] + elif "." in cleaned_content: + first_line = cleaned_content.split(".", 1)[0] + else: + first_line = cleaned_content + + per_page_starting_sentences.append( + PerPageStartingSentenceHolder( + page_number=page.page_number, + starting_sentence=first_line.strip(), + ) + ) + + return per_page_starting_sentences + async def get_document_intelligence_client(self) -> DocumentIntelligenceClient: """Get the Azure Document Intelligence client. @@ -487,7 +522,12 @@ async def analyse(self): if self.extract_figures: await self.process_figures_from_extracted_content(text_content) - output_record = NonPageWiseContentHolder(layout=text_content) + per_page_starting_sentences = self.create_per_page_starting_sentence() + + output_record = NonPageWiseContentHolder( + layout=text_content, + per_page_starting_sentences=per_page_starting_sentences, + ) except Exception as e: logging.error(e) diff --git a/image_processing/src/image_processing/layout_holders.py b/image_processing/src/image_processing/layout_holders.py index 08d1ab37..8d1535fe 100644 --- a/image_processing/src/image_processing/layout_holders.py +++ b/image_processing/src/image_processing/layout_holders.py @@ -6,7 +6,6 @@ class FigureHolder(BaseModel): - """A class to hold the figure extracted from the document.""" figure_id: str = Field(..., alias="FigureId") @@ -48,7 +47,28 @@ class PageWiseContentHolder(BaseModel): page_wise_layout: list[LayoutHolder] +class PerPageStartingSentenceHolder(BaseModel): + """A class to hold the starting sentence of each page.""" + + page_number: int + starting_sentence: str + + class NonPageWiseContentHolder(BaseModel): """A class to hold the non-page-wise content extracted from the document.""" layout: LayoutHolder + per_page_starting_sentences: list[PerPageStartingSentenceHolder] = Field( + default_factory=list + ) + + +class ChunkHolder(BaseModel): + """A class to hold the text extracted from the document after it has been chunked.""" + + mark_up: str + sections: Optional[list[str]] = Field(default_factory=list) + figures: Optional[list[FigureHolder]] = Field(default_factory=list) + starting_sentence: Optional[str] = None + cleaned_text: Optional[str] = None + page_number: Optional[int] = Field(default=None) diff --git a/image_processing/src/image_processing/mark_up_cleaner.py b/image_processing/src/image_processing/mark_up_cleaner.py index 30a58133..3daac77b 100644 --- a/image_processing/src/image_processing/mark_up_cleaner.py +++ b/image_processing/src/image_processing/mark_up_cleaner.py @@ -3,7 +3,7 @@ import logging import json import regex as re -from layout_holders import FigureHolder +from layout_holders import FigureHolder, ChunkHolder class MarkUpCleaner: @@ -18,8 +18,8 @@ def get_sections(self, text) -> list: list: The sections related to text """ # Updated regex pattern to capture markdown headers like ### Header - combined_pattern = r"(?<=\n|^)[#]+\s*(.*?)(?=\n)" - doc_metadata = re.findall(combined_pattern, text, re.DOTALL) + combined_pattern = r"^\s*[#]+\s*(.*?)(?=\n|$)" + doc_metadata = re.findall(combined_pattern, text, re.MULTILINE) return self.clean_sections(doc_metadata) def get_figure_ids(self, text: str) -> list: @@ -61,12 +61,14 @@ def remove_markdown_tags(self, text: str, tag_patterns: dict) -> str: for tag, pattern in tag_patterns.items(): try: # Replace the tags using the specific pattern, keeping the content inside the tags - if tag == "header": + if tag in ["header", "figure"]: text = re.sub( pattern, r"\2", text, flags=re.DOTALL | re.MULTILINE ) else: - text = re.sub(pattern, r"\1", text, flags=re.DOTALL) + text = re.sub( + pattern, r"\1", text, flags=re.DOTALL | re.MULTILINE + ) except re.error as e: logging.error(f"Regex error for tag '{tag}': {e}") except Exception as e: @@ -74,7 +76,7 @@ def remove_markdown_tags(self, text: str, tag_patterns: dict) -> str: return text def clean_text_and_extract_metadata( - self, text: str, figures: list[FigureHolder] + self, chunk: ChunkHolder, figures: list[FigureHolder] ) -> tuple[str, str]: """This function performs following cleanup activities on the text, remove all unicode characters remove line spacing,remove stop words, normalize characters @@ -86,36 +88,39 @@ def clean_text_and_extract_metadata( Returns: str: The clean text.""" - return_record = {} - try: - logging.info(f"Input text: {text}") - if len(text) == 0: + logging.info(f"Input text: {chunk.mark_up}") + if len(chunk.mark_up) == 0: logging.error("Input text is empty") raise ValueError("Input text is empty") - return_record["chunk_mark_up"] = text - - figure_ids = self.get_figure_ids(text) + figure_ids = self.get_figure_ids(chunk.mark_up) - return_record["chunk_sections"] = self.get_sections(text) - return_record["chunk_figures"] = [ - figure.model_dump(by_alias=True) - for figure in figures - if figure.figure_id in figure_ids + chunk.sections = self.get_sections(chunk.mark_up) + chunk.figures = [ + figure for figure in figures if figure.figure_id in figure_ids ] - logging.info(f"Sections: {return_record['chunk_sections']}") + logging.info(f"Sections: {chunk.sections}") + + # Check if the chunk contains only figure tags (plus whitespace). + figure_tag_pattern = ( + r"(.*?)" + ) + text_without_figures = re.sub(figure_tag_pattern, "", chunk.mark_up).strip() + if not text_without_figures and chunk.figures: + # When no text outside of figure tags is present, set page_number from the first figure. + chunk.page_number = chunk.figures[0].page_number # Define specific patterns for each tag tag_patterns = { "figurecontent": r"", - "figure": r"(.*?)", + "figure": r"(.*?)", "figures": r"\(figures/\d+\)(.*?)\(figures/\d+\)", "figcaption": r"
(.*?)
", "header": r"^\s*(#{1,6})\s*(.*?)\s*$", } - cleaned_text = self.remove_markdown_tags(text, tag_patterns) + cleaned_text = self.remove_markdown_tags(chunk.mark_up, tag_patterns) logging.info(f"Removed markdown tags: {cleaned_text}") @@ -128,11 +133,11 @@ def clean_text_and_extract_metadata( logging.error("Cleaned text is empty") raise ValueError("Cleaned text is empty") else: - return_record["chunk_cleaned"] = cleaned_text + chunk.cleaned_text = cleaned_text except Exception as e: logging.error(f"An error occurred in clean_text_and_extract_metadata: {e}") - return "" - return return_record + raise e + return chunk.model_dump(by_alias=True) async def clean(self, record: dict) -> dict: """Cleanup the data using standard python libraries. @@ -157,12 +162,17 @@ async def clean(self, record: dict) -> dict: figures = [FigureHolder(**figure) for figure in record["data"]["figures"]] + chunk_holder = ChunkHolder(mark_up=record["data"]["mark_up"]) + + if "page_number" in record["data"]: + chunk_holder.page_number = record["data"]["page_number"] + cleaned_record["data"] = self.clean_text_and_extract_metadata( - record["data"]["chunk"], figures + chunk_holder, figures ) except Exception as e: - logging.error("string cleanup Error: %s", e) + logging.error("Cleanup Error: %s", e) return { "recordId": record["recordId"], "data": None, diff --git a/image_processing/src/image_processing/requirements.txt b/image_processing/src/image_processing/requirements.txt index b755870f..519759b6 100644 --- a/image_processing/src/image_processing/requirements.txt +++ b/image_processing/src/image_processing/requirements.txt @@ -1,6 +1,6 @@ # This file was autogenerated by uv via the following command: # uv export --frozen --no-hashes --no-editable --no-sources --no-group dev --directory image_processing -o src/image_processing/requirements.txt -aiohappyeyeballs==2.4.4 +aiohappyeyeballs==2.4.6 aiohttp==3.11.12 aiosignal==1.3.2 annotated-types==0.7.0 @@ -12,7 +12,7 @@ azure-ai-vision-imageanalysis==1.0.0 azure-common==1.1.28 azure-core==1.32.0 azure-functions==1.21.3 -azure-identity==1.19.0 +azure-identity==1.20.0 azure-search==1.0.0b2 azure-search-documents==11.6.0b8 azure-storage-blob==12.24.1 @@ -27,7 +27,7 @@ click==8.1.8 cloudpathlib==0.20.0 colorama==0.4.6 ; sys_platform == 'win32' confection==0.1.5 -cryptography==44.0.0 +cryptography==44.0.1 cymem==2.0.11 distro==1.9.0 en-core-web-md @ https://github.com/explosion/spacy-models/releases/download/en_core_web_md-3.7.1/en_core_web_md-3.7.1.tar.gz @@ -38,7 +38,7 @@ fsspec==2025.2.0 h11==0.14.0 httpcore==1.0.7 httpx==0.28.1 -huggingface-hub==0.28.1 +huggingface-hub==0.29.0 idna==3.10 isodate==0.7.2 jinja2==3.1.5 @@ -50,7 +50,7 @@ marisa-trie==1.2.1 markdown-it-py==3.0.0 markupsafe==3.0.2 mdurl==0.1.2 -model2vec==0.3.9 +model2vec==0.4.0 msal==1.31.1 msal-extensions==1.2.0 msrest==0.7.1 @@ -58,7 +58,7 @@ multidict==6.1.0 murmurhash==1.0.12 numpy==1.26.4 oauthlib==3.2.2 -openai==1.61.1 +openai==1.63.2 openpyxl==3.1.5 packaging==24.2 pandas==2.2.3 @@ -94,7 +94,7 @@ spacy-loggers==1.0.5 srsly==2.5.1 tenacity==9.0.0 thinc==8.2.5 -tiktoken==0.8.0 +tiktoken==0.9.0 tokenizers==0.21.0 tqdm==4.67.1 typer==0.15.1 diff --git a/image_processing/src/image_processing/semantic_text_chunker.py b/image_processing/src/image_processing/semantic_text_chunker.py index 5a2c5b6c..b97c667f 100644 --- a/image_processing/src/image_processing/semantic_text_chunker.py +++ b/image_processing/src/image_processing/semantic_text_chunker.py @@ -7,6 +7,7 @@ import spacy import numpy as np from model2vec import StaticModel +from layout_holders import PerPageStartingSentenceHolder, ChunkHolder class SemanticTextChunker: @@ -75,7 +76,7 @@ def clean_chunks_and_map(self, chunks, is_table_or_figure_map): return cleaned_chunks, cleaned_is_table_or_figure_map - async def chunk(self, text: str) -> list[dict]: + async def chunk(self, text: str) -> list[ChunkHolder]: """Attempts to chunk the text by: Splitting into sentences Grouping sentences that contain figures and tables @@ -128,7 +129,7 @@ async def chunk(self, text: str) -> list[dict]: for chunk in reversed_backwards_pass_chunks: stripped_chunk = chunk.strip() if len(stripped_chunk) > 0: - cleaned_final_chunks.append(stripped_chunk) + cleaned_final_chunks.append(ChunkHolder(mark_up=stripped_chunk)) logging.info(f"Number of final chunks: {len(cleaned_final_chunks)}") logging.info(f"Chunks: {cleaned_final_chunks}") @@ -491,6 +492,34 @@ def sentence_similarity(self, text_1, text_2): ) return similarity + def assign_page_number_to_chunks( + self, + chunks: list[ChunkHolder], + per_page_starting_sentences: list[PerPageStartingSentenceHolder], + ) -> list[ChunkHolder]: + """Assigns page numbers to the chunks based on the starting sentences of each page. + + Args: + chunks (list[ChunkHolder]): The list of chunks. + per_page_starting_sentences (list[PerPageStartingSentenceHolder]): The list of starting sentences of each page. + + Returns: + list[ChunkHolder]: The list of chunks with page numbers assigned.""" + page_number = 1 + for chunk in chunks: + for per_page_starting_sentence in per_page_starting_sentences[ + page_number - 1 : + ]: + if per_page_starting_sentence.starting_sentence in chunk: + logging.info( + "Assigning page number %i to chunk", + per_page_starting_sentence.page_number, + ) + page_number = per_page_starting_sentence.page_number + break + chunk.page_number = page_number + return chunks + async def process_semantic_text_chunker(record: dict, text_chunker) -> dict: """Chunk the data. @@ -514,9 +543,23 @@ async def process_semantic_text_chunker(record: dict, text_chunker) -> dict: } # scenarios when page by chunking is enabled - cleaned_record["data"]["chunks"] = await text_chunker.chunk( - record["data"]["content"] - ) + chunks = await text_chunker.chunk(record["data"]["content"]) + + if "per_page_starting_sentences" in record["data"]: + per_page_starting_sentences = [ + PerPageStartingSentenceHolder(**sentence) + for sentence in record["data"]["per_page_starting_sentences"] + ] + + logging.info(f"Per page starting sentences: {per_page_starting_sentences}") + + chunks = text_chunker.assign_page_number_to_chunks( + chunks, per_page_starting_sentences + ) + + cleaned_record["data"]["chunks"] = [ + chunk.model_dump(by_alias=True) for chunk in chunks + ] except Exception as e: logging.error("Chunking Error: %s", e) diff --git a/image_processing/tests/image_processing/__init__.py b/image_processing/tests/image_processing/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/image_processing/tests/image_processing/test_figure_analysis.py b/image_processing/tests/image_processing/test_figure_analysis.py new file mode 100644 index 00000000..9c1d58fa --- /dev/null +++ b/image_processing/tests/image_processing/test_figure_analysis.py @@ -0,0 +1,298 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +import pytest +import base64 +import io +from PIL import Image +from unittest.mock import AsyncMock, MagicMock +from tenacity import RetryError +from openai import OpenAIError, RateLimitError +from figure_analysis import FigureAnalysis +from layout_holders import FigureHolder +from httpx import Response, Request + +# ------------------------ +# Fixtures for Image Data +# ------------------------ + + +@pytest.fixture +def image_data_100x100(): + """Return a base64-encoded PNG image of size 100x100.""" + img = Image.new("RGB", (100, 100), color="red") + buffer = io.BytesIO() + img.save(buffer, format="PNG") + data = buffer.getvalue() + return base64.b64encode(data).decode("utf-8") + + +@pytest.fixture +def image_data_50x50(): + """Return a base64-encoded PNG image of size 50x50 (small image).""" + img = Image.new("RGB", (50, 50), color="blue") + buffer = io.BytesIO() + img.save(buffer, format="PNG") + data = buffer.getvalue() + return base64.b64encode(data).decode("utf-8") + + +# ------------------------ +# Fixtures for FigureHolder +# ------------------------ + + +@pytest.fixture +def valid_figure(image_data_100x100): + """ + A valid figure with sufficient size. + Example: FigureHolder(figure_id='12345', description="Figure 1", uri="https://example.com/12345.png", offset=50, length=17) + """ + return FigureHolder( + figure_id="12345", + description="Figure 1", + uri="https://example.com/12345.png", + offset=50, + length=17, + data=image_data_100x100, + ) + + +@pytest.fixture +def small_figure(image_data_50x50): + """A figure whose image is too small (both dimensions below 75).""" + return FigureHolder( + figure_id="small1", + description="", + uri="https://example.com/small1.png", + offset=0, + length=10, + data=image_data_50x50, + ) + + +# ------------------------ +# Tests for get_image_size +# ------------------------ + + +def test_get_image_size(valid_figure): + analysis = FigureAnalysis() + width, height = analysis.get_image_size(valid_figure) + assert width == 100 + assert height == 100 + + +def test_get_image_size_small(small_figure): + analysis = FigureAnalysis() + width, height = analysis.get_image_size(small_figure) + assert width == 50 + assert height == 50 + + +# ------------------------ +# Tests for understand_image_with_gptv +# ------------------------ + + +@pytest.mark.asyncio +async def test_understand_image_with_gptv_small(small_figure): + """ + If both width and height are below 75, the image should be considered too small, + and its description set to "Irrelevant Image". + """ + analysis = FigureAnalysis() + updated_figure = await analysis.understand_image_with_gptv(small_figure) + assert updated_figure.description == "Irrelevant Image" + + +@pytest.mark.asyncio +async def test_understand_image_with_gptv_success(valid_figure, monkeypatch): + """ + Test the success branch of understand_image_with_gptv. + Patch AsyncAzureOpenAI to simulate a successful response. + """ + analysis = FigureAnalysis() + + # Set up required environment variables. + monkeypatch.setenv("OpenAI__ApiVersion", "2023-07-01-preview") + monkeypatch.setenv("OpenAI__MiniCompletionDeployment", "deployment123") + monkeypatch.setenv("OpenAI__Endpoint", "https://example.openai.azure.com") + + # Create a dummy response object to mimic the client's response. + dummy_response = MagicMock() + dummy_choice = MagicMock() + dummy_message = MagicMock() + dummy_message.content = "Generated image description" + dummy_choice.message = dummy_message + dummy_response.choices = [dummy_choice] + + # Create a dummy async client whose chat.completions.create returns dummy_response. + dummy_client = AsyncMock() + dummy_client.chat.completions.create.return_value = dummy_response + + # Create a dummy async context manager that returns dummy_client. + dummy_async_context = AsyncMock() + dummy_async_context.__aenter__.return_value = dummy_client + + # Patch AsyncAzureOpenAI so that instantiating it returns our dummy context manager. + monkeypatch.setattr( + "figure_analysis.AsyncAzureOpenAI", lambda **kwargs: dummy_async_context + ) + + # Call the function and verify the description is set from the dummy response. + updated_figure = await analysis.understand_image_with_gptv(valid_figure) + assert updated_figure.description == "Generated image description" + + # Now simulate the case when the API returns an empty description. + dummy_message.content = "" + updated_figure = await analysis.understand_image_with_gptv(valid_figure) + assert updated_figure.description == "Irrelevant Image" + + +@pytest.mark.asyncio +async def test_understand_image_with_gptv_policy_violation(valid_figure, monkeypatch): + """ + If the OpenAI API raises an error with "ResponsibleAIPolicyViolation" in its message, + the description should be set to "Irrelevant Image". + """ + analysis = FigureAnalysis() + monkeypatch.setenv("OpenAI__ApiVersion", "2023-07-01-preview") + monkeypatch.setenv("OpenAI__MiniCompletionDeployment", "deployment123") + monkeypatch.setenv("OpenAI__Endpoint", "https://example.openai.azure.com") + + # Define a dummy exception that mimics an OpenAI error with a ResponsibleAIPolicyViolation message. + class DummyOpenAIError(OpenAIError): + def __init__(self, message): + self.message = message + + async def dummy_create(*args, **kwargs): + raise DummyOpenAIError("Error: ResponsibleAIPolicyViolation occurred") + + dummy_client = AsyncMock() + dummy_client.chat.completions.create.side_effect = dummy_create + dummy_async_context = AsyncMock() + dummy_async_context.__aenter__.return_value = dummy_client + monkeypatch.setattr( + "figure_analysis.AsyncAzureOpenAI", lambda **kwargs: dummy_async_context + ) + + updated_figure = await analysis.understand_image_with_gptv(valid_figure) + assert updated_figure.description == "Irrelevant Image" + + +@pytest.mark.asyncio +async def test_understand_image_with_gptv_general_error(valid_figure, monkeypatch): + """ + If the OpenAI API raises an error that does not include "ResponsibleAIPolicyViolation", + the error should propagate. + """ + analysis = FigureAnalysis() + monkeypatch.setenv("OpenAI__ApiVersion", "2023-07-01-preview") + monkeypatch.setenv("OpenAI__MiniCompletionDeployment", "deployment123") + monkeypatch.setenv("OpenAI__Endpoint", "https://example.openai.azure.com") + + class DummyOpenAIError(OpenAIError): + def __init__(self, message): + self.message = message + + async def dummy_create(*args, **kwargs): + raise DummyOpenAIError("Some other error") + + dummy_client = AsyncMock() + dummy_client.chat.completions.create.side_effect = dummy_create + dummy_async_context = AsyncMock() + dummy_async_context.__aenter__.return_value = dummy_client + monkeypatch.setattr( + "figure_analysis.AsyncAzureOpenAI", lambda **kwargs: dummy_async_context + ) + + with pytest.raises(RetryError) as e: + await analysis.understand_image_with_gptv(valid_figure) + + root_cause = e.last_attempt.exception() + assert isinstance(root_cause, DummyOpenAIError) + + +# ------------------------ +# Tests for analyse +# ------------------------ + + +@pytest.mark.asyncio +async def test_analyse_success(valid_figure, monkeypatch): + """ + Test the successful execution of the analyse method. + Patch understand_image_with_gptv to return a figure with an updated description. + """ + analysis = FigureAnalysis() + record = {"recordId": "rec1", "data": {"figure": valid_figure.model_dump()}} + + async def dummy_understand(figure): + figure.description = "Updated Description" + return figure + + monkeypatch.setattr(analysis, "understand_image_with_gptv", dummy_understand) + result = await analysis.analyse(record) + assert result["recordId"] == "rec1" + assert result["data"]["updated_figure"]["description"] == "Updated Description" + assert result["errors"] is None + + +@pytest.mark.asyncio +async def test_analyse_retry_rate_limit(valid_figure, monkeypatch): + """ + Simulate a RetryError whose last attempt raised a RateLimitError. + The analyse method should return an error message indicating a rate limit error. + """ + analysis = FigureAnalysis() + record = {"recordId": "rec2", "data": {"figure": valid_figure.model_dump()}} + + # Create a mock request object + dummy_request = Request( + method="POST", url="https://api.openai.com/v1/chat/completions" + ) + + # Create a mock response object with the request set + dummy_response = Response( + status_code=429, content=b"Rate limit exceeded", request=dummy_request + ) + + # Create a RateLimitError instance + dummy_rate_error = RateLimitError( + message="Rate limit exceeded", + response=dummy_response, + body="Rate limit exceeded", + ) + dummy_retry_error = RetryError( + last_attempt=MagicMock(exception=lambda: dummy_rate_error) + ) + + async def dummy_understand(figure): + raise dummy_retry_error + + monkeypatch.setattr(analysis, "understand_image_with_gptv", dummy_understand) + result = await analysis.analyse(record) + assert result["recordId"] == "rec2" + assert result["data"] is None + assert result["errors"] is not None + assert "rate limit error" in result["errors"][0]["message"].lower() + + +@pytest.mark.asyncio +async def test_analyse_general_exception(valid_figure, monkeypatch): + """ + If understand_image_with_gptv raises a general Exception, + analyse should catch it and return an error response. + """ + analysis = FigureAnalysis() + record = {"recordId": "rec3", "data": {"figure": valid_figure.model_dump()}} + + async def dummy_understand(figure): + raise Exception("General error") + + monkeypatch.setattr(analysis, "understand_image_with_gptv", dummy_understand) + result = await analysis.analyse(record) + assert result["recordId"] == "rec3" + assert result["data"] is None + assert result["errors"] is not None + assert "check the logs for more details" in result["errors"][0]["message"].lower() diff --git a/image_processing/tests/image_processing/test_layout_analysis.py b/image_processing/tests/image_processing/test_layout_analysis.py new file mode 100644 index 00000000..e9de95ad --- /dev/null +++ b/image_processing/tests/image_processing/test_layout_analysis.py @@ -0,0 +1,493 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +import os +import pytest +import tempfile +import base64 +from unittest.mock import AsyncMock + +from layout_analysis import ( + process_layout_analysis, + LayoutAnalysis, +) + + +# --- Dummy classes to simulate ADI results and figures --- +class DummySpan: + def __init__(self, offset, length): + self.offset = offset + self.length = length + + +class DummyPage: + def __init__(self, offset, length, page_number): + # Simulate a page span as a dictionary. + self.spans = [{"offset": offset, "length": length}] + self.page_number = page_number + + +class DummyRegion: + def __init__(self, page_number): + self.page_number = page_number + + +class DummyCaption: + def __init__(self, content): + self.content = content + + +class DummyPoller: + def __init__(self, result, operation_id): + self._result = result + self.details = {"operation_id": operation_id} + + async def result(self): + return self._result + + +class DummyDocIntelligenceClient: + async def __aenter__(self): + return self + + async def __aexit__(self, exc_type, exc, tb): + pass + + async def begin_analyze_document(self, **kwargs): + # Create a dummy page spanning the first 5 characters. + dummy_page = DummyPage(0, 5, 1) + dummy_result = DummyResult("HelloWorld", pages=[dummy_page], figures=[]) + return DummyPoller(dummy_result, "dummy_op") + + +class DummyFigure: + def __init__(self, id, offset, length, page_number, caption_content): + self.id = id # note: process_figures_from_extracted_content checks "if figure.id is None" + self.bounding_regions = [DummyRegion(page_number)] + self.caption = DummyCaption(caption_content) + self.spans = [DummySpan(offset, length)] + + +class DummyResult: + def __init__(self, content, pages, figures, model_id="model123"): + self.content = content + self.pages = pages + self.figures = figures + self.model_id = model_id + + +# --- Dummy StorageAccountHelper for testing purposes --- +class DummyStorageAccountHelper: + @property + def account_url(self): + return "http://dummy.storage" + + async def upload_blob(self, container, blob, data, content_type): + # Simulate a successful upload returning a URL. + return f"http://dummy.url/{blob}" + + async def download_blob_to_temp_dir(self, source, container, target_file_name): + # Write dummy content to a temp file and return its path along with empty metadata. + temp_file_path = os.path.join(tempfile.gettempdir(), target_file_name) + with open(temp_file_path, "wb") as f: + f.write(b"dummy file content") + return temp_file_path, {} + + async def add_metadata_to_blob(self, source, container, metadata, upsert=False): + # Dummy method; do nothing. + return + + +# --- Fixtures and environment setup --- +@pytest.fixture(autouse=True) +def set_env_vars(monkeypatch): + monkeypatch.setenv("StorageAccount__Name", "dummyaccount") + monkeypatch.setenv( + "AIService__DocumentIntelligence__Endpoint", "http://dummy.ai.endpoint" + ) + + +@pytest.fixture +def dummy_storage_helper(): + return DummyStorageAccountHelper() + + +# --- Tests for LayoutAnalysis and process_layout_analysis --- + + +def test_extract_file_info(): + # Given a typical blob URL, extract_file_info should correctly set properties. + source = "https://dummyaccount.blob.core.windows.net/container/path/to/file.pdf" + la = LayoutAnalysis(record_id=1, source=source) + la.extract_file_info() + assert la.blob == "path/to/file.pdf" + assert la.container == "container" + assert la.images_container == "container-figures" + assert la.file_extension == "pdf" + assert la.target_file_name == "1.pdf" + + +# Test non-page-wise analysis without figures. +@pytest.mark.asyncio +async def test_analyse_non_page_wise_no_figures(monkeypatch, dummy_storage_helper): + source = "https://dummyaccount.blob.core.windows.net/container/path/to/file.txt" + la = LayoutAnalysis( + page_wise=False, extract_figures=True, record_id=123, source=source + ) + la.extract_file_info() + # Patch get_storage_account_helper to return our dummy helper. + monkeypatch.setattr( + la, "get_storage_account_helper", AsyncMock(return_value=dummy_storage_helper) + ) + # Patch download_blob_to_temp_dir to simulate a successful download. + monkeypatch.setattr( + dummy_storage_helper, + "download_blob_to_temp_dir", + AsyncMock(return_value=("/tmp/dummy.txt", {})), + ) + # Patch analyse_document to simulate a successful ADI analysis. + dummy_result = DummyResult( + content="Full document content", pages=[DummyPage(0, 21, 1)], figures=[] + ) + + async def dummy_analyse_document(file_path): + la.result = dummy_result + la.operation_id = "op123" + + monkeypatch.setattr(la, "analyse_document", dummy_analyse_document) + # Patch process_figures_from_extracted_content to do nothing (since there are no figures). + monkeypatch.setattr(la, "process_figures_from_extracted_content", AsyncMock()) + result = await la.analyse() + assert result["recordId"] == 123 + data = result["data"] + # In non-page-wise mode, the output record is a NonPageWiseContentHolder + assert "layout" in data + layout = data["layout"] + assert layout["content"] == "Full document content" + # No figures were processed. + assert layout.get("figures", []) == [] + assert result["errors"] is None + + +# Test page-wise analysis without figures. +@pytest.mark.asyncio +async def test_analyse_page_wise_no_figures(monkeypatch, dummy_storage_helper): + source = "https://dummyaccount.blob.core.windows.net/container/path/to/file.txt" + la = LayoutAnalysis( + page_wise=True, extract_figures=True, record_id=456, source=source + ) + la.extract_file_info() + monkeypatch.setattr( + la, "get_storage_account_helper", AsyncMock(return_value=dummy_storage_helper) + ) + monkeypatch.setattr( + dummy_storage_helper, + "download_blob_to_temp_dir", + AsyncMock(return_value=("/tmp/dummy.txt", {})), + ) + # Create a dummy result with one page and no figures. + dummy_page = DummyPage(0, 12, 1) + dummy_result = DummyResult(content="Page content", pages=[dummy_page], figures=[]) + + async def dummy_analyse_document(file_path): + la.result = dummy_result + la.operation_id = "op456" + + monkeypatch.setattr(la, "analyse_document", dummy_analyse_document) + result = await la.analyse() + assert result["recordId"] == 456 + data = result["data"] + # In page-wise mode, the output should have a "page_wise_layout" key. + assert "page_wise_layout" in data + layouts = data["page_wise_layout"] + assert len(layouts) == 1 + layout = layouts[0] + # The content is extracted from dummy_result.content using the page span. + expected_content = dummy_result.content[0:12] + assert layout["content"] == expected_content + assert layout["page_number"] == 1 + assert result["errors"] is None + + +# Test page-wise analysis with figures (covering figure download and upload). +@pytest.mark.asyncio +async def test_analyse_page_wise_with_figures(monkeypatch, dummy_storage_helper): + source = "https://dummyaccount.blob.core.windows.net/container/path/to/file.txt" + la = LayoutAnalysis( + page_wise=True, extract_figures=True, record_id=789, source=source + ) + la.extract_file_info() + monkeypatch.setattr( + la, "get_storage_account_helper", AsyncMock(return_value=dummy_storage_helper) + ) + monkeypatch.setattr( + dummy_storage_helper, + "download_blob_to_temp_dir", + AsyncMock(return_value=("/tmp/dummy.txt", {})), + ) + # Create a dummy page and a dummy figure. + dummy_page = DummyPage(0, 12, 1) + dummy_figure = DummyFigure( + "fig1", offset=5, length=5, page_number=1, caption_content="Caption text" + ) + dummy_result = DummyResult( + content="Page content", pages=[dummy_page], figures=[dummy_figure] + ) + + async def dummy_analyse_document(file_path): + la.result = dummy_result + la.operation_id = "op789" + + monkeypatch.setattr(la, "analyse_document", dummy_analyse_document) + # Patch download_figure_image to simulate downloading image bytes. + monkeypatch.setattr( + la, "download_figure_image", AsyncMock(return_value=b"fake_image") + ) + # Patch upload_blob to simulate a successful upload. + monkeypatch.setattr( + dummy_storage_helper, + "upload_blob", + AsyncMock(return_value="http://dummy.url/fig1.png"), + ) + result = await la.analyse() + assert result["recordId"] == 789 + data = result["data"] + assert "page_wise_layout" in data + layouts = data["page_wise_layout"] + # The page layout should have a figures list containing our processed figure. + assert len(layouts) == 1 + layout = layouts[0] + assert "figures" in layout + figures_list = layout["figures"] + assert len(figures_list) == 1 + figure_data = figures_list[0] + assert figure_data["figure_id"] == "fig1" + # The data field should contain the base64-encoded image. + expected_b64 = base64.b64encode(b"fake_image").decode("utf-8") + assert figure_data["data"] == expected_b64 + # Verify that the caption are set as expected. + assert figure_data["caption"] == "Caption text" + assert result["errors"] is None + + +# Test failure during blob download. +@pytest.mark.asyncio +async def test_analyse_download_blob_failure(monkeypatch, dummy_storage_helper): + source = "https://dummyaccount.blob.core.windows.net/container/path/to/file.txt" + la = LayoutAnalysis( + page_wise=False, extract_figures=True, record_id=321, source=source + ) + la.extract_file_info() + monkeypatch.setattr( + la, "get_storage_account_helper", AsyncMock(return_value=dummy_storage_helper) + ) + # Simulate a failure in download_blob_to_temp_dir. + monkeypatch.setattr( + dummy_storage_helper, + "download_blob_to_temp_dir", + AsyncMock(side_effect=Exception("Download error")), + ) + result = await la.analyse() + assert result["recordId"] == 321 + assert result["data"] is None + assert result["errors"] is not None + assert "Failed to download the blob" in result["errors"][0]["message"] + + +# Test failure during analyse_document (simulate ADI failure) and ensure metadata is updated. +@pytest.mark.asyncio +async def test_analyse_document_failure(monkeypatch, dummy_storage_helper): + source = "https://dummyaccount.blob.core.windows.net/container/path/to/file.txt" + la = LayoutAnalysis( + page_wise=False, extract_figures=True, record_id=654, source=source + ) + la.extract_file_info() + monkeypatch.setattr( + la, "get_storage_account_helper", AsyncMock(return_value=dummy_storage_helper) + ) + monkeypatch.setattr( + dummy_storage_helper, + "download_blob_to_temp_dir", + AsyncMock(return_value=("/tmp/dummy.txt", {})), + ) + + # Simulate analyse_document throwing an exception. + async def dummy_analyse_document_failure(file_path): + raise Exception("Analyse document error") + + monkeypatch.setattr(la, "analyse_document", dummy_analyse_document_failure) + # Track whether add_metadata_to_blob is called. + metadata_called = False + + async def dummy_add_metadata(source, container, metadata, upsert=False): + nonlocal metadata_called + metadata_called = True + + monkeypatch.setattr( + dummy_storage_helper, "add_metadata_to_blob", dummy_add_metadata + ) + result = await la.analyse() + assert result["recordId"] == 654 + assert result["data"] is None + assert result["errors"] is not None + assert ( + "Failed to analyze the document with Azure Document Intelligence" + in result["errors"][0]["message"] + ) + assert metadata_called is True + + +# Test failure during processing of extracted content (e.g. page-wise content creation). +@pytest.mark.asyncio +async def test_analyse_processing_content_failure(monkeypatch, dummy_storage_helper): + source = "https://dummyaccount.blob.core.windows.net/container/path/to/file.txt" + la = LayoutAnalysis( + page_wise=True, extract_figures=True, record_id=987, source=source + ) + la.extract_file_info() + monkeypatch.setattr( + la, "get_storage_account_helper", AsyncMock(return_value=dummy_storage_helper) + ) + monkeypatch.setattr( + dummy_storage_helper, + "download_blob_to_temp_dir", + AsyncMock(return_value=("/tmp/dummy.txt", {})), + ) + # Simulate a successful analyse_document. + dummy_page = DummyPage(0, 12, 1) + dummy_result = DummyResult(content="Page content", pages=[dummy_page], figures=[]) + + async def dummy_analyse_document(file_path): + la.result = dummy_result + la.operation_id = "op987" + + monkeypatch.setattr(la, "analyse_document", dummy_analyse_document) + + # Patch create_page_wise_content to raise an exception. + def raise_exception(): + raise Exception("Processing error") + + monkeypatch.setattr(la, "create_page_wise_content", raise_exception) + result = await la.analyse() + assert result["recordId"] == 987 + assert result["data"] is None + assert result["errors"] is not None + assert "Failed to process the extracted content" in result["errors"][0]["message"] + + +# Test process_layout_analysis when 'source' is missing (KeyError branch). +@pytest.mark.asyncio +async def test_process_layout_analysis_missing_source(): + record = {"recordId": "111", "data": {}} # missing 'source' key + result = await process_layout_analysis(record) + assert result["recordId"] == "111" + assert result["data"] is None + assert result["errors"] is not None + assert "Pass a valid source" in result["errors"][0]["message"] + + +@pytest.mark.asyncio +async def test_analyse_document_success(monkeypatch, tmp_path): + # Create a temporary file with dummy content. + tmp_file = tmp_path / "dummy.txt" + tmp_file.write_bytes(b"dummy content") + + la = LayoutAnalysis( + record_id=999, + source="https://dummyaccount.blob.core.windows.net/container/path/to/dummy.txt", + ) + + # Use an async function to return our dummy Document Intelligence client. + async def dummy_get_doc_intelligence_client(): + return DummyDocIntelligenceClient() + + monkeypatch.setattr( + la, "get_document_intelligence_client", dummy_get_doc_intelligence_client + ) + + await la.analyse_document(str(tmp_file)) + + assert la.result is not None + assert la.operation_id == "dummy_op" + # Check that the dummy result contains the expected content. + assert la.result.content == "HelloWorld" + + +def test_create_page_wise_content(): + # Test create_page_wise_content using a dummy result with one page. + la = LayoutAnalysis(record_id=100, source="dummy") + + # Create a dummy result with content "HelloWorld" + # and a page with a span from index 0 with length 5. + class DummyResultContent: + pass + + dummy_result = DummyResultContent() + dummy_result.content = "HelloWorld" + dummy_result.pages = [DummyPage(0, 5, 1)] + la.result = dummy_result + + layouts = la.create_page_wise_content() + assert isinstance(layouts, list) + assert len(layouts) == 1 + layout = layouts[0] + # The page content should be the substring "Hello" + assert layout.content == "Hello" + assert layout.page_number == 1 + assert layout.page_offsets == 0 + + +def test_create_per_page_starting_sentence(): + # Create a LayoutAnalysis instance. + la = LayoutAnalysis(record_id=200, source="dummy") + + # Create a dummy result with content and pages. + # For this test, the first page's content slice will be "HelloWorld" (from index 0 with length 10), + # so the starting sentence extracted should be "HelloWorld". + class DummyResultContent: + pass + + dummy_result = DummyResultContent() + dummy_result.content = "HelloWorld. This is a test sentence." + # DummyPage creates a page with spans as a list of dictionaries. + dummy_result.pages = [DummyPage(0, 10, 1)] + la.result = dummy_result + + sentences = la.create_per_page_starting_sentence() + assert len(sentences) == 1 + sentence = sentences[0] + assert sentence.page_number == 1 + assert sentence.starting_sentence == "HelloWorld" + + +def test_create_per_page_starting_sentence_multiple_pages(): + # Create a LayoutAnalysis instance. + la = LayoutAnalysis(record_id=300, source="dummy") + + # Create a dummy result with content spanning two pages. + # Use DummyPage to simulate pages; DummyPage expects "spans" as a list of dicts. + class DummyResultContent: + pass + + dummy_result = DummyResultContent() + # Define content as two parts: + # Page 1: Offset 0, length 10 gives "Page one." (starting sentence "Page one") + # Page 2: Offset 10, length 15 gives " Page two text" (starting sentence " Page two text") + dummy_result.content = "Page one.Page two text and more content. This is more random content that is on page 2." + dummy_result.pages = [ + DummyPage(0, 9, 1), # "Page one." (9 characters: indices 0-8) + DummyPage(9, 78, 2), # "Page two text and" (16 characters: indices 9-24) + ] + la.result = dummy_result + + # Call create_per_page_starting_sentence and check results. + sentences = la.create_per_page_starting_sentence() + assert len(sentences) == 2 + + # For page 1, the substring is "Page one." -> split on "." gives "Page one" + assert sentences[0].page_number == 1 + assert sentences[0].starting_sentence == "Page one" + + # For page 2, the substring is "Page two text and" -> split on "." gives the entire string + assert sentences[1].page_number == 2 + # We strip potential leading/trailing spaces for validation. + assert sentences[1].starting_sentence.strip() == "Page two text and more content" diff --git a/image_processing/tests/image_processing/test_layout_and_figure_merger.py b/image_processing/tests/image_processing/test_layout_and_figure_merger.py new file mode 100644 index 00000000..3deb271c --- /dev/null +++ b/image_processing/tests/image_processing/test_layout_and_figure_merger.py @@ -0,0 +1,114 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +import pytest +from layout_holders import LayoutHolder, FigureHolder +from layout_and_figure_merger import LayoutAndFigureMerger + + +@pytest.fixture +def layout_holder(): + return LayoutHolder( + content="This is a sample layout with a figure placeholder.
This is a sentence after." + ) + + +@pytest.fixture +def figure_holder(): + return FigureHolder( + figure_id="12345", + description="Figure 1", + uri="https://example.com/12345.png", + offset=50, + length=17, + ) + + +@pytest.fixture +def merger(): + return LayoutAndFigureMerger() + + +def test_insert_figure_description(merger, layout_holder, figure_holder): + updated_layout, inserted_length = merger.insert_figure_description( + layout_holder, figure_holder + ) + assert "
Figure 1
" in updated_layout.content + assert ( + inserted_length + == len("
Figure 1
") - figure_holder.length + ) + assert ( + updated_layout.content + == "This is a sample layout with a figure placeholder.
Figure 1
This is a sentence after." + ) + + +def test_insert_figure_invalid_offset(merger, layout_holder): + invalid_figure = FigureHolder( + figure_id="12345", + offset=100, + length=5, + description="Invalid figure", + uri="https://example.com/12345.png", + ) + with pytest.raises(ValueError, match="Figure offset is out of bounds"): + merger.insert_figure_description(layout_holder, invalid_figure) + + +@pytest.mark.asyncio +async def test_merge_figures_into_layout(merger, layout_holder, figure_holder): + figures = [figure_holder] + updated_layout = await merger.merge_figures_into_layout(layout_holder, figures) + assert "
Figure 1
" in updated_layout.content + assert ( + updated_layout.content + == "This is a sample layout with a figure placeholder.
Figure 1
This is a sentence after." + ) + + +@pytest.mark.asyncio +async def test_merge_removes_irrelevant_figures(merger): + layout_holder = LayoutHolder( + content="Before
'Irrelevant Image'
After" + ) + updated_layout = await merger.merge_figures_into_layout(layout_holder, []) + assert "Irrelevant Image" not in updated_layout.content + assert "Before After" in updated_layout.content + + +@pytest.mark.asyncio +async def test_merge_removes_empty_figures(merger): + layout_holder = LayoutHolder(content="Before
After") + updated_layout = await merger.merge_figures_into_layout(layout_holder, []) + assert "
" not in updated_layout.content + assert "Before After" in updated_layout.content + + +@pytest.mark.asyncio +async def test_merge_removes_html_comments(merger): + layout_holder = LayoutHolder(content="Before After") + updated_layout = await merger.merge_figures_into_layout(layout_holder, []) + assert "" not in updated_layout.content + assert "Before After" in updated_layout.content + + +@pytest.mark.asyncio +async def test_merge_handles_exception(merger): + record = { + "recordId": "1", + "data": { + "layout": {"content": "Sample"}, + "figures": [ + { + "figure_id": "12345", + "offset": 1000, + "length": 5, + "description": "Invalid", + "uri": "https://example.com/12345.png", + } + ], + }, + } + response = await merger.merge(record) + assert response["data"] is None + assert response["errors"] is not None diff --git a/image_processing/tests/image_processing/test_layout_holders.py b/image_processing/tests/image_processing/test_layout_holders.py new file mode 100644 index 00000000..3d2d1c4a --- /dev/null +++ b/image_processing/tests/image_processing/test_layout_holders.py @@ -0,0 +1,107 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +import pytest +from pydantic import ValidationError +from layout_holders import ( + FigureHolder, + LayoutHolder, + PageWiseContentHolder, + NonPageWiseContentHolder, + ChunkHolder, + PerPageStartingSentenceHolder, +) + + +def test_figure_holder_creation(): + figure = FigureHolder( + FigureId="fig1", + offset=10, + length=5, + Uri="http://example.com/fig1.png", + Description="Sample figure", + ) + + assert figure.figure_id == "fig1" + assert figure.offset == 10 + assert figure.length == 5 + assert figure.uri == "http://example.com/fig1.png" + assert figure.description == "Sample figure" + assert figure.markdown == "
Sample figure
" + + +def test_figure_holder_missing_required_fields(): + with pytest.raises(ValidationError): + FigureHolder(offset=10, length=5, Uri="http://example.com/fig1.png") + + +def test_layout_holder_creation(): + layout = LayoutHolder(content="Sample content") + assert layout.content == "Sample content" + assert layout.page_number is None + assert layout.page_offsets == 0 + assert layout.figures == [] + + +def test_layout_holder_with_figures(): + figure = FigureHolder( + FigureId="fig1", + offset=10, + length=5, + Uri="http://example.com/fig1.png", + Description="Sample figure", + ) + layout = LayoutHolder(content="Sample content", figures=[figure]) + assert len(layout.figures) == 1 + assert layout.figures[0].figure_id == "fig1" + + +def test_page_wise_content_holder(): + layout1 = LayoutHolder(content="Page 1") + layout2 = LayoutHolder(content="Page 2") + page_holder = PageWiseContentHolder(page_wise_layout=[layout1, layout2]) + assert len(page_holder.page_wise_layout) == 2 + assert page_holder.page_wise_layout[0].content == "Page 1" + + +def test_non_page_wise_content_holder(): + layout = LayoutHolder(content="Full document") + non_page_holder = NonPageWiseContentHolder(layout=layout) + assert non_page_holder.layout.content == "Full document" + + +def test_chunk_holder_creation(): + chunk = ChunkHolder( + mark_up="Sample markup", + sections=["Section1", "Section2"], + figures=[], + starting_sentence="First sentence", + cleaned_text="Cleaned text content", + page_number=1, + ) + assert chunk.mark_up == "Sample markup" + assert chunk.sections == ["Section1", "Section2"] + assert chunk.starting_sentence == "First sentence" + assert chunk.cleaned_text == "Cleaned text content" + assert chunk.page_number == 1 + + +def test_per_page_starting_sentence_holder_creation(): + sentence = PerPageStartingSentenceHolder( + page_number=1, starting_sentence="This is the starting sentence." + ) + assert sentence.page_number == 1 + assert sentence.starting_sentence == "This is the starting sentence." + + +def test_non_page_wise_content_holder_with_sentences(): + layout = LayoutHolder(content="Full document") + sentences = [ + PerPageStartingSentenceHolder(page_number=1, starting_sentence="Start 1"), + PerPageStartingSentenceHolder(page_number=2, starting_sentence="Start 2"), + ] + non_page_holder = NonPageWiseContentHolder( + layout=layout, per_page_starting_sentences=sentences + ) + assert non_page_holder.layout.content == "Full document" + assert len(non_page_holder.per_page_starting_sentences) == 2 + assert non_page_holder.per_page_starting_sentences[0].starting_sentence == "Start 1" diff --git a/image_processing/tests/image_processing/test_mark_up_cleaner.py b/image_processing/tests/image_processing/test_mark_up_cleaner.py new file mode 100644 index 00000000..82497dc2 --- /dev/null +++ b/image_processing/tests/image_processing/test_mark_up_cleaner.py @@ -0,0 +1,249 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +import pytest +from mark_up_cleaner import MarkUpCleaner +from layout_holders import FigureHolder, ChunkHolder + + +# Fixtures +@pytest.fixture +def cleaner(): + return MarkUpCleaner() + + +@pytest.fixture +def sample_text(): + return """ + # Header 1 + Some text. + ## Header 2 + More text. +
+ """ + + +@pytest.fixture +def figures(): + # We'll use the object-based representation for figures. + return [ + FigureHolder( + FigureId="fig1", + offset=10, + length=5, + Uri="http://example.com/fig1.png", + Description="Sample figure", + ), + # This figure won't appear since its id won't be matched. + FigureHolder( + FigureId="12345", + offset=0, + length=8, + Uri="https://example.com/12345.png", + Description="Figure 1", + ), + ] + + +# Test get_sections: It calls get_sections, then clean_sections internally. +def test_get_sections(cleaner, sample_text): + sections = cleaner.get_sections(sample_text) + # Expecting headers extracted and cleaned. + assert sections == ["Header 1", "Header 2"] + + +# Test get_figure_ids: using regex extraction. +def test_get_figure_ids(cleaner, sample_text): + figure_ids = cleaner.get_figure_ids(sample_text) + assert figure_ids == ["12345"] + + +# Test clean_sections: Remove leading hashes and extra chars. +def test_clean_sections(cleaner): + sections = ["### Section 1", "## Section 2"] + cleaned = cleaner.clean_sections(sections) + assert cleaned == ["Section 1", "Section 2"] + + +# Test remove_markdown_tags: Ensure tags are removed/replaced. +def test_remove_markdown_tags(cleaner): + text = """ +
Some figure
+ + # Header + Random sentence + """ + tag_patterns = { + "figurecontent": r"", + "figure": r"(.*?)", + } + cleaned_text = cleaner.remove_markdown_tags(text, tag_patterns) + # Check that the inner contents are retained but tags removed. + assert "Some figure" in cleaned_text + assert "Some content" in cleaned_text + assert "" not in cleaned_text + assert "
Some figure
" not in cleaned_text + + +# Test clean_text_and_extract_metadata: Pass a ChunkHolder instance +def test_clean_text_and_extract_metadata(cleaner, sample_text, figures): + # Create a ChunkHolder from the sample text. + chunk = ChunkHolder(mark_up=sample_text) + result = cleaner.clean_text_and_extract_metadata(chunk, figures) + # result is a dict returned from model_dump (by alias) + assert isinstance(result, dict) + # The input text is stored under 'mark_up' + assert result["mark_up"] == sample_text + # get_sections should extract the headers. + assert result["sections"] == ["Header 1", "Header 2"] + # get_figure_ids returns ["12345"] so only the matching figure is kept. + assert len(result["figures"]) == 1 + # FigureHolder uses alias "FigureId" for its id. + assert result["figures"][0]["FigureId"] == "12345" + # The cleaned text should have removed markup such as FigureId info. + assert "FigureId='12345'" not in result["cleaned_text"] + + +# Async test for clean: using record dict with data holding a chunk sub-dict. +@pytest.mark.asyncio +async def test_clean(cleaner, sample_text): + record = { + "recordId": "1", + "data": { + "mark_up": sample_text, + "figures": [ + { + "figure_id": "12345", + "uri": "https://example.com/12345.png", + "description": "Figure 1", + "offset": 0, + "length": 8, + }, + { + "figure_id": "123456789", + "uri": "https://example.com/123456789.png", + "description": "Figure 2", + "offset": 10, + "length": 8, + }, + ], + }, + } + result = await cleaner.clean(record) + assert isinstance(result, dict) + assert result["recordId"] == "1" + # Ensure data was successfully cleaned + assert result["data"] is not None + assert result["data"]["cleaned_text"] + # Check that the expected keys are in the cleaned data. + assert "mark_up" in result["data"] + assert "sections" in result["data"] + assert "figures" in result["data"] + # Only one figure must match because get_figure_ids extracted "12345" + assert len(result["data"]["figures"]) == 1 + assert result["data"]["figures"][0]["FigureId"] == "12345" + + +# Test get_sections with empty text returns empty list. +def test_get_sections_empty_text(cleaner): + sections = cleaner.get_sections("") + assert sections == [] + + +# Test get_figure_ids with no figure tags. +def test_get_figure_ids_no_figures(cleaner): + text = "This text does not include any figures." + assert cleaner.get_figure_ids(text) == [] + + +# Test remove_markdown_tags with unknown tag patterns (should remain unchanged). +def test_remove_markdown_tags_unknown_tag(cleaner): + text = "This is a basic text without markdown." + tag_patterns = {"nonexistent": r"(pattern)"} + result = cleaner.remove_markdown_tags(text, tag_patterns) + assert result == text + + +# Test clean_text_and_extract_metadata with empty text: Should raise ValueError. +def test_clean_text_and_extract_metadata_empty_text(cleaner, figures): + chunk = ChunkHolder(mark_up="") + with pytest.raises(ValueError): + cleaner.clean_text_and_extract_metadata(chunk, figures) + + +# Async test: missing "chunk" key in record -> error branch of clean(). +@pytest.mark.asyncio +async def test_clean_missing_chunk(cleaner): + record = { + "recordId": "3", + "data": {"figures": []}, + } + result = await cleaner.clean(record) + assert result["recordId"] == "3" + assert result["data"] is None + assert result["errors"] is not None + assert "Failed to cleanup data" in result["errors"][0]["message"] + + +# Async test: invalid figure structure causing an exception in clean() +@pytest.mark.asyncio +async def test_clean_with_invalid_figures_structure(cleaner): + record = { + "recordId": "4", + "data": { + "chunk": {"mark_up": "Some text with # Header"}, + # Figures missing required keys for FigureHolder. + "figures": [{"invalid_key": "no_fig_id"}], + }, + } + result = await cleaner.clean(record) + assert result["recordId"] == "4" + assert result["data"] is None + assert result["errors"] is not None + + +def test_clean_only_figures_sets_page_number(cleaner): + # Input contains only a figure tag. + text = "
I am a random description
" + chunk = ChunkHolder(mark_up=text, page_number=1) + figs = [ + FigureHolder( + FigureId="12345", + offset=0, + length=10, + Uri="http://example.com/12345.png", + Description="Figure 1", + page_number=2, # This page number should be picked up. + ), + FigureHolder( + FigureId="67890", + offset=20, + length=10, + Uri="http://example.com/67890.png", + Description="Figure 2", + page_number=4, + ), + ] + result = cleaner.clean_text_and_extract_metadata(chunk, figs) + # Because no text outside the figure tag is present, sections should be empty, + # and page_number should be set from the first matching figure. + assert result.get("sections") == [] + assert result["page_number"] == 2 + + +def test_clean_text_with_mixed_content_leaves_page_number_unset(cleaner): + # Input contains text outside of the figure tag. + # Even though a figure appears, the presence of other text means page_number should not be auto-set as the chunk could overlap pages. + text = "More text before the figure.
" + chunk = ChunkHolder(mark_up=text, page_number=4) + figs = [ + FigureHolder( + FigureId="12345", + offset=0, + length=10, + Uri="http://example.com/12345.png", + Description="Figure 1", + page_number=5, # This should be ignored since text exists. + ) + ] + result = cleaner.clean_text_and_extract_metadata(chunk, figs) + assert result.get("page_number") == 4 diff --git a/image_processing/tests/image_processing/test_semantic_text_chunker.py b/image_processing/tests/image_processing/test_semantic_text_chunker.py new file mode 100644 index 00000000..59e83644 --- /dev/null +++ b/image_processing/tests/image_processing/test_semantic_text_chunker.py @@ -0,0 +1,355 @@ +import pytest +from unittest.mock import AsyncMock, MagicMock + +from semantic_text_chunker import ( + process_semantic_text_chunker, + SemanticTextChunker, +) + +# --- Dummy Classes for Process-Level Tests --- + + +class DummyChunkHolder: + def __init__(self, mark_up, page_number=None): + self.mark_up = mark_up + self.page_number = page_number + + def model_dump(self, by_alias=False): + return {"mark_up": self.mark_up, "page_number": self.page_number} + + +class DummyPerPageStartingSentenceHolder: + def __init__(self, starting_sentence, page_number): + self.starting_sentence = starting_sentence + self.page_number = page_number + + +# --- Process-Level Tests (Using Dummy Chunker) --- + + +@pytest.mark.asyncio +async def test_process_semantic_text_chunker_success_without_page(): + """Test a successful chunking when no per-page starting sentences are provided.""" + record = {"recordId": "1", "data": {"content": "Some content to be chunked."}} + + dummy_chunk = DummyChunkHolder("chunk1") + dummy_text_chunker = MagicMock() + dummy_text_chunker.chunk = AsyncMock(return_value=[dummy_chunk]) + dummy_text_chunker.assign_page_number_to_chunks = MagicMock() + + result = await process_semantic_text_chunker(record, dummy_text_chunker) + assert result["recordId"] == "1" + assert result["data"] is not None + chunks = result["data"]["chunks"] + assert isinstance(chunks, list) + assert len(chunks) == 1 + assert chunks[0]["mark_up"] == "chunk1" + # When no page info is provided, page_number remains unchanged (None in our dummy). + assert chunks[0]["page_number"] is None + + +@pytest.mark.asyncio +async def test_process_semantic_text_chunker_success_with_page(): + """Test a successful chunking when per-page starting sentences are provided and match a chunk.""" + record = { + "recordId": "2", + "data": { + "content": "Some content to be chunked.", + "per_page_starting_sentences": [ + {"starting_sentence": "chunk", "page_number": 5} + ], + }, + } + + dummy_chunk = DummyChunkHolder("This dummy chunk contains chunk in its text") + dummy_text_chunker = MagicMock() + dummy_text_chunker.chunk = AsyncMock(return_value=[dummy_chunk]) + + def dummy_assign_page(chunks, per_page_starting_sentences): + ps_objs = [ + DummyPerPageStartingSentenceHolder(**ps.__dict__) + for ps in per_page_starting_sentences + ] + page_number = 1 + for chunk in chunks: + for ps in ps_objs: + if ps.starting_sentence in chunk.mark_up: + page_number = ps.page_number + break + chunk.page_number = page_number + return chunks + + dummy_text_chunker.assign_page_number_to_chunks = dummy_assign_page + + result = await process_semantic_text_chunker(record, dummy_text_chunker) + assert result["recordId"] == "2" + chunks = result["data"]["chunks"] + assert isinstance(chunks, list) + assert len(chunks) == 1 + assert chunks[0]["page_number"] == 5 + + +@pytest.mark.asyncio +async def test_process_semantic_text_chunker_failure(): + """Test that an exception during chunking is caught and returns an error record.""" + record = { + "recordId": "3", + "data": {"content": "Content that will trigger an error."}, + } + + dummy_text_chunker = MagicMock() + dummy_text_chunker.chunk = AsyncMock(side_effect=Exception("Chunking error")) + dummy_text_chunker.assign_page_number_to_chunks = MagicMock() + + result = await process_semantic_text_chunker(record, dummy_text_chunker) + assert result["recordId"] == "3" + assert result["data"] is None + assert "errors" in result + assert isinstance(result["errors"], list) + assert result["errors"][0]["message"].startswith("Failed to chunk data") + + +@pytest.mark.asyncio +async def test_process_semantic_text_chunker_multiple_chunks(): + """ + Test a record where chunk() returns multiple chunks and per-page starting sentences + assign different page numbers to different chunks. + """ + record = { + "recordId": "4", + "data": { + "content": "Content that generates multiple chunks.", + "per_page_starting_sentences": [ + {"starting_sentence": "first_page", "page_number": 3}, + {"starting_sentence": "second_page", "page_number": 4}, + ], + }, + } + + dummy_chunk1 = DummyChunkHolder("This chunk contains first_page indicator") + dummy_chunk2 = DummyChunkHolder("This chunk contains second_page indicator") + dummy_text_chunker = MagicMock() + dummy_text_chunker.chunk = AsyncMock(return_value=[dummy_chunk1, dummy_chunk2]) + + def dummy_assign_page(chunks, per_page_starting_sentences): + ps_objs = [ + DummyPerPageStartingSentenceHolder(**ps.__dict__) + for ps in per_page_starting_sentences + ] + page_number = 1 + for chunk in chunks: + for ps in ps_objs: + if ps.starting_sentence in chunk.mark_up: + page_number = ps.page_number + break + chunk.page_number = page_number + return chunks + + dummy_text_chunker.assign_page_number_to_chunks = dummy_assign_page + + result = await process_semantic_text_chunker(record, dummy_text_chunker) + assert result["recordId"] == "4" + chunks = result["data"]["chunks"] + assert isinstance(chunks, list) + assert len(chunks) == 2 + assert chunks[0]["page_number"] == 3 + assert chunks[1]["page_number"] == 4 + + +@pytest.mark.asyncio +async def test_process_semantic_text_chunker_empty_page_sentences(): + """ + Test a record where 'per_page_starting_sentences' exists but is empty. + In this case, the default page (1) is assigned. + """ + record = { + "recordId": "5", + "data": { + "content": "Some content to be chunked.", + "per_page_starting_sentences": [], + }, + } + + dummy_chunk = DummyChunkHolder("Chunk without any page indicator") + dummy_text_chunker = MagicMock() + dummy_text_chunker.chunk = AsyncMock(return_value=[dummy_chunk]) + + def dummy_assign_page(chunks, per_page_starting_sentences): + for chunk in chunks: + chunk.page_number = 1 + return chunks + + dummy_text_chunker.assign_page_number_to_chunks = dummy_assign_page + + result = await process_semantic_text_chunker(record, dummy_text_chunker) + assert result["recordId"] == "5" + chunks = result["data"]["chunks"] + assert isinstance(chunks, list) + assert len(chunks) == 1 + assert chunks[0]["page_number"] == 1 + + +@pytest.mark.asyncio +async def test_process_semantic_text_chunker_missing_data(): + """ + Test that if the record is missing the 'data' key, the function returns an error. + """ + record = {"recordId": "6"} + dummy_text_chunker = MagicMock() + dummy_text_chunker.chunk = AsyncMock(return_value=[DummyChunkHolder("chunk")]) + dummy_text_chunker.assign_page_number_to_chunks = MagicMock() + + result = await process_semantic_text_chunker(record, dummy_text_chunker) + assert result["recordId"] == "6" + assert result["data"] is None + assert "errors" in result + + +@pytest.mark.asyncio +async def test_process_semantic_text_chunker_empty_content(): + """ + Test that if the content is empty and chunk() raises a ValueError (e.g. because no chunks were generated), + the error is handled and an error record is returned. + """ + record = {"recordId": "7", "data": {"content": ""}} + dummy_text_chunker = MagicMock() + dummy_text_chunker.chunk = AsyncMock( + side_effect=ValueError("No chunks were generated") + ) + dummy_text_chunker.assign_page_number_to_chunks = MagicMock() + + result = await process_semantic_text_chunker(record, dummy_text_chunker) + assert result["recordId"] == "7" + assert result["data"] is None + assert "errors" in result + assert isinstance(result["errors"], list) + assert result["errors"][0]["message"].startswith("Failed to chunk data") + + +# --- Helper Classes for Chunk Splitting Tests --- + + +# A simple dummy spaCy-like model for sentence segmentation. +class DummySpan: + def __init__(self, text): + self.text = text + + +class DummyDoc: + def __init__(self, text): + # Naively split on period. + # (Ensure test texts include periods as sentence delimiters.) + sentences = [s.strip() for s in text.split(".") if s.strip()] + self.sents = [DummySpan(s) for s in sentences] + + +class DummyNLP: + def __call__(self, text): + return DummyDoc(text) + + +# Fixture that returns a SemanticTextChunker instance with patched components. +@pytest.fixture +def chunker(): + # Use relaxed thresholds so that even short sentences qualify. + stc = SemanticTextChunker( + num_surrounding_sentences=1, + similarity_threshold=0.8, + max_chunk_tokens=1000, + min_chunk_tokens=1, + ) + # Override the spaCy model with our dummy. + stc._nlp_model = DummyNLP() + # Override token counting to simply count words. + stc.num_tokens_from_string = lambda s: len(s.split()) + # For these tests, assume all sentences are very similar (so merge_similar_chunks doesn’t force a split). + stc.sentence_similarity = lambda a, b: 1.0 + return stc + + +# --- Chunk Splitting Tests Using Real (Patched) Chunker --- + + +@pytest.mark.asyncio +async def test_chunk_complete_figure(chunker): + """ + Test a text containing a complete
element. + Expect that the sentence with the complete figure is detected and grouped. + """ + text = "Text before.
Figure content
. Text after." + chunks = await chunker.chunk(text) + # For our dummy segmentation, we expect two final chunks: + # one that combines "Text before" and the figure, and one for "Text after". + assert len(chunks) == 2 + # Check that the first chunk contains a complete figure. + assert "" in chunks[0].mark_up + + +@pytest.mark.asyncio +async def test_chunk_incomplete_figure(chunker): + """ + Test a text with an incomplete figure element spanning multiple sentences. + The start and end of the figure should be grouped together. + """ + text = ( + "Text before.
Start of figure. Figure continues
. Text after." + ) + chunks = await chunker.chunk(text) + # Expected grouping: one chunk combining the normal text and the grouped figure, + # and another chunk for text after. + assert len(chunks) == 2 + # Check that the grouped chunk contains both the start and the end of the figure. + assert "" in chunks[0].mark_up + + +@pytest.mark.asyncio +async def test_chunk_markdown_heading(chunker): + """ + Test that a markdown heading is padded with newlines. + """ + text = "Introduction. # Heading. More text." + chunks = await chunker.chunk(text) + # The heading should have been transformed to include "\n\n" before and after. + # Because merge_chunks may merge sentences, check that the final text contains the padded heading. + combined = " ".join(chunk.mark_up for chunk in chunks) + assert "\n\n# Heading\n\n" in combined + + +@pytest.mark.asyncio +async def test_chunk_table(chunker): + """ + Test that a complete table element is detected. + """ + text = "Before table. Table content
. After table." + chunks = await chunker.chunk(text) + # Expect at least one chunk containing a complete table. + table_chunks = [ + c.mark_up for c in chunks if "" in c.mark_up + ] + assert len(table_chunks) >= 1 + + +@pytest.mark.asyncio +async def test_chunk_long_sentence(): + """ + Test that a sentence with many words (exceeding max_chunk_tokens) is immediately emitted as a chunk. + """ + # Create a chunker that forces a long sentence to exceed the max token threshold. + stc = SemanticTextChunker( + num_surrounding_sentences=1, + similarity_threshold=0.8, + max_chunk_tokens=5, # set low so even a few words exceed it + min_chunk_tokens=1, + ) + stc._nlp_model = DummyNLP() + stc.num_tokens_from_string = lambda s: len(s.split()) + stc.sentence_similarity = lambda a, b: 1.0 + # This sentence has 12 words. + text = "This sentence has many words that exceed the maximum chunk token limit." + chunks = await stc.chunk(text) + # Since our dummy NLP splits on period, we expect one sentence. + # And because 12 >= 5, that sentence is immediately appended as a chunk. + assert len(chunks) == 1 + assert "exceed" in chunks[0].mark_up diff --git a/pyproject.toml b/pyproject.toml index ab9e813f..2f248f71 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -14,6 +14,11 @@ dev = [ "pygments>=2.18.0", "ruff>=0.8.1", "python-dotenv>=1.0.1", + "coverage>=7.6.12", + "pytest>=8.3.4", + "pytest-asyncio>=0.25.3", + "pytest-cov>=6.0.0", + "pytest-mock>=3.14.0", ] [tool.uv.workspace] diff --git a/uv.lock b/uv.lock index 0711baf8..8d052e9d 100644 --- a/uv.lock +++ b/uv.lock @@ -25,11 +25,11 @@ wheels = [ [[package]] name = "aiohappyeyeballs" -version = "2.4.4" +version = "2.4.6" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/7f/55/e4373e888fdacb15563ef6fa9fa8c8252476ea071e96fb46defac9f18bf2/aiohappyeyeballs-2.4.4.tar.gz", hash = "sha256:5fdd7d87889c63183afc18ce9271f9b0a7d32c2303e394468dd45d514a757745", size = 21977 } +sdist = { url = "https://files.pythonhosted.org/packages/08/07/508f9ebba367fc3370162e53a3cfd12f5652ad79f0e0bfdf9f9847c6f159/aiohappyeyeballs-2.4.6.tar.gz", hash = "sha256:9b05052f9042985d32ecbe4b59a77ae19c006a78f1344d7fdad69d28ded3d0b0", size = 21726 } wheels = [ - { url = "https://files.pythonhosted.org/packages/b9/74/fbb6559de3607b3300b9be3cc64e97548d55678e44623db17820dbd20002/aiohappyeyeballs-2.4.4-py3-none-any.whl", hash = "sha256:a980909d50efcd44795c4afeca523296716d50cd756ddca6af8c65b996e27de8", size = 14756 }, + { url = "https://files.pythonhosted.org/packages/44/4c/03fb05f56551828ec67ceb3665e5dc51638042d204983a03b0a1541475b6/aiohappyeyeballs-2.4.6-py3-none-any.whl", hash = "sha256:147ec992cf873d74f5062644332c539fcd42956dc69453fe5204195e560517e1", size = 14543 }, ] [[package]] @@ -401,16 +401,16 @@ wheels = [ [[package]] name = "azure-ai-inference" -version = "1.0.0b8" +version = "1.0.0b9" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "azure-core" }, { name = "isodate" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/73/bf/0c352b13299613124c4cb249c225702c807e4b5ee22d190e84756685c79a/azure_ai_inference-1.0.0b8.tar.gz", hash = "sha256:b7bcaaac5f53f2be06804ac6c755be9583ac6ba99df533a3970da081838b4cc1", size = 177657 } +sdist = { url = "https://files.pythonhosted.org/packages/4e/6a/ed85592e5c64e08c291992f58b1a94dab6869f28fb0f40fd753dced73ba6/azure_ai_inference-1.0.0b9.tar.gz", hash = "sha256:1feb496bd84b01ee2691befc04358fa25d7c344d8288e99364438859ad7cd5a4", size = 182408 } wheels = [ - { url = "https://files.pythonhosted.org/packages/3c/5f/5d09fdef2a67a646bdc1f39027b2d5b134d5403d39be3043b03b945c3e67/azure_ai_inference-1.0.0b8-py3-none-any.whl", hash = "sha256:9bfcfe6ef5b1699fed6c70058027c253bcbc88f4730e7409fbfc675636ec05e4", size = 123426 }, + { url = "https://files.pythonhosted.org/packages/4f/0f/27520da74769db6e58327d96c98e7b9a07ce686dff582c9a5ec60b03f9dd/azure_ai_inference-1.0.0b9-py3-none-any.whl", hash = "sha256:49823732e674092dad83bb8b0d1b65aa73111fab924d61349eb2a8cdc0493990", size = 124885 }, ] [[package]] @@ -489,7 +489,7 @@ wheels = [ [[package]] name = "azure-identity" -version = "1.19.0" +version = "1.20.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "azure-core" }, @@ -498,9 +498,9 @@ dependencies = [ { name = "msal-extensions" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/aa/91/cbaeff9eb0b838f0d35b4607ac1c6195c735c8eb17db235f8f60e622934c/azure_identity-1.19.0.tar.gz", hash = "sha256:500144dc18197d7019b81501165d4fa92225f03778f17d7ca8a2a180129a9c83", size = 263058 } +sdist = { url = "https://files.pythonhosted.org/packages/ee/89/7d170fab0b85d9650cdb7abda087e849644beb52bd28f6804620dd0cecd9/azure_identity-1.20.0.tar.gz", hash = "sha256:40597210d56c83e15031b0fe2ea3b26420189e1e7f3e20bdbb292315da1ba014", size = 264447 } wheels = [ - { url = "https://files.pythonhosted.org/packages/f0/d5/3995ed12f941f4a41a273d9b1709282e825ef87ed8eab3833038fee54d59/azure_identity-1.19.0-py3-none-any.whl", hash = "sha256:e3f6558c181692d7509f09de10cca527c7dce426776454fb97df512a46527e81", size = 187587 }, + { url = "https://files.pythonhosted.org/packages/de/aa/819513c1dbef990af690bb5eefb5e337f8698d75dfdb7302528f50ce1994/azure_identity-1.20.0-py3-none-any.whl", hash = "sha256:5f23fc4889a66330e840bd78830287e14f3761820fe3c5f77ac875edcb9ec998", size = 188243 }, ] [[package]] @@ -857,35 +857,93 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/0c/00/3106b1854b45bd0474ced037dfe6b73b90fe68a68968cef47c23de3d43d2/confection-0.1.5-py3-none-any.whl", hash = "sha256:e29d3c3f8eac06b3f77eb9dfb4bf2fc6bcc9622a98ca00a698e3d019c6430b14", size = 35451 }, ] +[[package]] +name = "coverage" +version = "7.6.12" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/0c/d6/2b53ab3ee99f2262e6f0b8369a43f6d66658eab45510331c0b3d5c8c4272/coverage-7.6.12.tar.gz", hash = "sha256:48cfc4641d95d34766ad41d9573cc0f22a48aa88d22657a1fe01dca0dbae4de2", size = 805941 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/64/2d/da78abbfff98468c91fd63a73cccdfa0e99051676ded8dd36123e3a2d4d5/coverage-7.6.12-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:e18aafdfb3e9ec0d261c942d35bd7c28d031c5855dadb491d2723ba54f4c3015", size = 208464 }, + { url = "https://files.pythonhosted.org/packages/31/f2/c269f46c470bdabe83a69e860c80a82e5e76840e9f4bbd7f38f8cebbee2f/coverage-7.6.12-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:66fe626fd7aa5982cdebad23e49e78ef7dbb3e3c2a5960a2b53632f1f703ea45", size = 208893 }, + { url = "https://files.pythonhosted.org/packages/47/63/5682bf14d2ce20819998a49c0deadb81e608a59eed64d6bc2191bc8046b9/coverage-7.6.12-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0ef01d70198431719af0b1f5dcbefc557d44a190e749004042927b2a3fed0702", size = 241545 }, + { url = "https://files.pythonhosted.org/packages/6a/b6/6b6631f1172d437e11067e1c2edfdb7238b65dff965a12bce3b6d1bf2be2/coverage-7.6.12-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:07e92ae5a289a4bc4c0aae710c0948d3c7892e20fd3588224ebe242039573bf0", size = 239230 }, + { url = "https://files.pythonhosted.org/packages/c7/01/9cd06cbb1be53e837e16f1b4309f6357e2dfcbdab0dd7cd3b1a50589e4e1/coverage-7.6.12-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e695df2c58ce526eeab11a2e915448d3eb76f75dffe338ea613c1201b33bab2f", size = 241013 }, + { url = "https://files.pythonhosted.org/packages/4b/26/56afefc03c30871326e3d99709a70d327ac1f33da383cba108c79bd71563/coverage-7.6.12-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:d74c08e9aaef995f8c4ef6d202dbd219c318450fe2a76da624f2ebb9c8ec5d9f", size = 239750 }, + { url = "https://files.pythonhosted.org/packages/dd/ea/88a1ff951ed288f56aa561558ebe380107cf9132facd0b50bced63ba7238/coverage-7.6.12-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:e995b3b76ccedc27fe4f477b349b7d64597e53a43fc2961db9d3fbace085d69d", size = 238462 }, + { url = "https://files.pythonhosted.org/packages/6e/d4/1d9404566f553728889409eff82151d515fbb46dc92cbd13b5337fa0de8c/coverage-7.6.12-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:b1f097878d74fe51e1ddd1be62d8e3682748875b461232cf4b52ddc6e6db0bba", size = 239307 }, + { url = "https://files.pythonhosted.org/packages/12/c1/e453d3b794cde1e232ee8ac1d194fde8e2ba329c18bbf1b93f6f5eef606b/coverage-7.6.12-cp311-cp311-win32.whl", hash = "sha256:1f7ffa05da41754e20512202c866d0ebfc440bba3b0ed15133070e20bf5aeb5f", size = 211117 }, + { url = "https://files.pythonhosted.org/packages/d5/db/829185120c1686fa297294f8fcd23e0422f71070bf85ef1cc1a72ecb2930/coverage-7.6.12-cp311-cp311-win_amd64.whl", hash = "sha256:e216c5c45f89ef8971373fd1c5d8d1164b81f7f5f06bbf23c37e7908d19e8558", size = 212019 }, + { url = "https://files.pythonhosted.org/packages/e2/7f/4af2ed1d06ce6bee7eafc03b2ef748b14132b0bdae04388e451e4b2c529b/coverage-7.6.12-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:b172f8e030e8ef247b3104902cc671e20df80163b60a203653150d2fc204d1ad", size = 208645 }, + { url = "https://files.pythonhosted.org/packages/dc/60/d19df912989117caa95123524d26fc973f56dc14aecdec5ccd7d0084e131/coverage-7.6.12-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:641dfe0ab73deb7069fb972d4d9725bf11c239c309ce694dd50b1473c0f641c3", size = 208898 }, + { url = "https://files.pythonhosted.org/packages/bd/10/fecabcf438ba676f706bf90186ccf6ff9f6158cc494286965c76e58742fa/coverage-7.6.12-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0e549f54ac5f301e8e04c569dfdb907f7be71b06b88b5063ce9d6953d2d58574", size = 242987 }, + { url = "https://files.pythonhosted.org/packages/4c/53/4e208440389e8ea936f5f2b0762dcd4cb03281a7722def8e2bf9dc9c3d68/coverage-7.6.12-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:959244a17184515f8c52dcb65fb662808767c0bd233c1d8a166e7cf74c9ea985", size = 239881 }, + { url = "https://files.pythonhosted.org/packages/c4/47/2ba744af8d2f0caa1f17e7746147e34dfc5f811fb65fc153153722d58835/coverage-7.6.12-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bda1c5f347550c359f841d6614fb8ca42ae5cb0b74d39f8a1e204815ebe25750", size = 242142 }, + { url = "https://files.pythonhosted.org/packages/e9/90/df726af8ee74d92ee7e3bf113bf101ea4315d71508952bd21abc3fae471e/coverage-7.6.12-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:1ceeb90c3eda1f2d8c4c578c14167dbd8c674ecd7d38e45647543f19839dd6ea", size = 241437 }, + { url = "https://files.pythonhosted.org/packages/f6/af/995263fd04ae5f9cf12521150295bf03b6ba940d0aea97953bb4a6db3e2b/coverage-7.6.12-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:0f16f44025c06792e0fb09571ae454bcc7a3ec75eeb3c36b025eccf501b1a4c3", size = 239724 }, + { url = "https://files.pythonhosted.org/packages/1c/8e/5bb04f0318805e190984c6ce106b4c3968a9562a400180e549855d8211bd/coverage-7.6.12-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:b076e625396e787448d27a411aefff867db2bffac8ed04e8f7056b07024eed5a", size = 241329 }, + { url = "https://files.pythonhosted.org/packages/9e/9d/fa04d9e6c3f6459f4e0b231925277cfc33d72dfab7fa19c312c03e59da99/coverage-7.6.12-cp312-cp312-win32.whl", hash = "sha256:00b2086892cf06c7c2d74983c9595dc511acca00665480b3ddff749ec4fb2a95", size = 211289 }, + { url = "https://files.pythonhosted.org/packages/53/40/53c7ffe3c0c3fff4d708bc99e65f3d78c129110d6629736faf2dbd60ad57/coverage-7.6.12-cp312-cp312-win_amd64.whl", hash = "sha256:7ae6eabf519bc7871ce117fb18bf14e0e343eeb96c377667e3e5dd12095e0288", size = 212079 }, + { url = "https://files.pythonhosted.org/packages/76/89/1adf3e634753c0de3dad2f02aac1e73dba58bc5a3a914ac94a25b2ef418f/coverage-7.6.12-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:488c27b3db0ebee97a830e6b5a3ea930c4a6e2c07f27a5e67e1b3532e76b9ef1", size = 208673 }, + { url = "https://files.pythonhosted.org/packages/ce/64/92a4e239d64d798535c5b45baac6b891c205a8a2e7c9cc8590ad386693dc/coverage-7.6.12-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:5d1095bbee1851269f79fd8e0c9b5544e4c00c0c24965e66d8cba2eb5bb535fd", size = 208945 }, + { url = "https://files.pythonhosted.org/packages/b4/d0/4596a3ef3bca20a94539c9b1e10fd250225d1dec57ea78b0867a1cf9742e/coverage-7.6.12-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0533adc29adf6a69c1baa88c3d7dbcaadcffa21afbed3ca7a225a440e4744bf9", size = 242484 }, + { url = "https://files.pythonhosted.org/packages/1c/ef/6fd0d344695af6718a38d0861408af48a709327335486a7ad7e85936dc6e/coverage-7.6.12-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:53c56358d470fa507a2b6e67a68fd002364d23c83741dbc4c2e0680d80ca227e", size = 239525 }, + { url = "https://files.pythonhosted.org/packages/0c/4b/373be2be7dd42f2bcd6964059fd8fa307d265a29d2b9bcf1d044bcc156ed/coverage-7.6.12-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:64cbb1a3027c79ca6310bf101014614f6e6e18c226474606cf725238cf5bc2d4", size = 241545 }, + { url = "https://files.pythonhosted.org/packages/a6/7d/0e83cc2673a7790650851ee92f72a343827ecaaea07960587c8f442b5cd3/coverage-7.6.12-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:79cac3390bfa9836bb795be377395f28410811c9066bc4eefd8015258a7578c6", size = 241179 }, + { url = "https://files.pythonhosted.org/packages/ff/8c/566ea92ce2bb7627b0900124e24a99f9244b6c8c92d09ff9f7633eb7c3c8/coverage-7.6.12-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:9b148068e881faa26d878ff63e79650e208e95cf1c22bd3f77c3ca7b1d9821a3", size = 239288 }, + { url = "https://files.pythonhosted.org/packages/7d/e4/869a138e50b622f796782d642c15fb5f25a5870c6d0059a663667a201638/coverage-7.6.12-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:8bec2ac5da793c2685ce5319ca9bcf4eee683b8a1679051f8e6ec04c4f2fd7dc", size = 241032 }, + { url = "https://files.pythonhosted.org/packages/ae/28/a52ff5d62a9f9e9fe9c4f17759b98632edd3a3489fce70154c7d66054dd3/coverage-7.6.12-cp313-cp313-win32.whl", hash = "sha256:200e10beb6ddd7c3ded322a4186313d5ca9e63e33d8fab4faa67ef46d3460af3", size = 211315 }, + { url = "https://files.pythonhosted.org/packages/bc/17/ab849b7429a639f9722fa5628364c28d675c7ff37ebc3268fe9840dda13c/coverage-7.6.12-cp313-cp313-win_amd64.whl", hash = "sha256:2b996819ced9f7dbb812c701485d58f261bef08f9b85304d41219b1496b591ef", size = 212099 }, + { url = "https://files.pythonhosted.org/packages/d2/1c/b9965bf23e171d98505eb5eb4fb4d05c44efd256f2e0f19ad1ba8c3f54b0/coverage-7.6.12-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:299cf973a7abff87a30609879c10df0b3bfc33d021e1adabc29138a48888841e", size = 209511 }, + { url = "https://files.pythonhosted.org/packages/57/b3/119c201d3b692d5e17784fee876a9a78e1b3051327de2709392962877ca8/coverage-7.6.12-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:4b467a8c56974bf06e543e69ad803c6865249d7a5ccf6980457ed2bc50312703", size = 209729 }, + { url = "https://files.pythonhosted.org/packages/52/4e/a7feb5a56b266304bc59f872ea07b728e14d5a64f1ad3a2cc01a3259c965/coverage-7.6.12-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2458f275944db8129f95d91aee32c828a408481ecde3b30af31d552c2ce284a0", size = 253988 }, + { url = "https://files.pythonhosted.org/packages/65/19/069fec4d6908d0dae98126aa7ad08ce5130a6decc8509da7740d36e8e8d2/coverage-7.6.12-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0a9d8be07fb0832636a0f72b80d2a652fe665e80e720301fb22b191c3434d924", size = 249697 }, + { url = "https://files.pythonhosted.org/packages/1c/da/5b19f09ba39df7c55f77820736bf17bbe2416bbf5216a3100ac019e15839/coverage-7.6.12-cp313-cp313t-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:14d47376a4f445e9743f6c83291e60adb1b127607a3618e3185bbc8091f0467b", size = 252033 }, + { url = "https://files.pythonhosted.org/packages/1e/89/4c2750df7f80a7872267f7c5fe497c69d45f688f7b3afe1297e52e33f791/coverage-7.6.12-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:b95574d06aa9d2bd6e5cc35a5bbe35696342c96760b69dc4287dbd5abd4ad51d", size = 251535 }, + { url = "https://files.pythonhosted.org/packages/78/3b/6d3ae3c1cc05f1b0460c51e6f6dcf567598cbd7c6121e5ad06643974703c/coverage-7.6.12-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:ecea0c38c9079570163d663c0433a9af4094a60aafdca491c6a3d248c7432827", size = 249192 }, + { url = "https://files.pythonhosted.org/packages/6e/8e/c14a79f535ce41af7d436bbad0d3d90c43d9e38ec409b4770c894031422e/coverage-7.6.12-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:2251fabcfee0a55a8578a9d29cecfee5f2de02f11530e7d5c5a05859aa85aee9", size = 250627 }, + { url = "https://files.pythonhosted.org/packages/cb/79/b7cee656cfb17a7f2c1b9c3cee03dd5d8000ca299ad4038ba64b61a9b044/coverage-7.6.12-cp313-cp313t-win32.whl", hash = "sha256:eb5507795caabd9b2ae3f1adc95f67b1104971c22c624bb354232d65c4fc90b3", size = 212033 }, + { url = "https://files.pythonhosted.org/packages/b6/c3/f7aaa3813f1fa9a4228175a7bd368199659d392897e184435a3b66408dd3/coverage-7.6.12-cp313-cp313t-win_amd64.whl", hash = "sha256:f60a297c3987c6c02ffb29effc70eadcbb412fe76947d394a1091a3615948e2f", size = 213240 }, + { url = "https://files.pythonhosted.org/packages/fb/b2/f655700e1024dec98b10ebaafd0cedbc25e40e4abe62a3c8e2ceef4f8f0a/coverage-7.6.12-py3-none-any.whl", hash = "sha256:eb8668cfbc279a536c633137deeb9435d2962caec279c3f8cf8b91fff6ff8953", size = 200552 }, +] + +[package.optional-dependencies] +toml = [ + { name = "tomli", marker = "python_full_version <= '3.11'" }, +] + [[package]] name = "cryptography" -version = "44.0.0" +version = "44.0.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "cffi", marker = "platform_python_implementation != 'PyPy'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/91/4c/45dfa6829acffa344e3967d6006ee4ae8be57af746ae2eba1c431949b32c/cryptography-44.0.0.tar.gz", hash = "sha256:cd4e834f340b4293430701e772ec543b0fbe6c2dea510a5286fe0acabe153a02", size = 710657 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/55/09/8cc67f9b84730ad330b3b72cf867150744bf07ff113cda21a15a1c6d2c7c/cryptography-44.0.0-cp37-abi3-macosx_10_9_universal2.whl", hash = "sha256:84111ad4ff3f6253820e6d3e58be2cc2a00adb29335d4cacb5ab4d4d34f2a123", size = 6541833 }, - { url = "https://files.pythonhosted.org/packages/7e/5b/3759e30a103144e29632e7cb72aec28cedc79e514b2ea8896bb17163c19b/cryptography-44.0.0-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b15492a11f9e1b62ba9d73c210e2416724633167de94607ec6069ef724fad092", size = 3922710 }, - { url = "https://files.pythonhosted.org/packages/5f/58/3b14bf39f1a0cfd679e753e8647ada56cddbf5acebffe7db90e184c76168/cryptography-44.0.0-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:831c3c4d0774e488fdc83a1923b49b9957d33287de923d58ebd3cec47a0ae43f", size = 4137546 }, - { url = "https://files.pythonhosted.org/packages/98/65/13d9e76ca19b0ba5603d71ac8424b5694415b348e719db277b5edc985ff5/cryptography-44.0.0-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:761817a3377ef15ac23cd7834715081791d4ec77f9297ee694ca1ee9c2c7e5eb", size = 3915420 }, - { url = "https://files.pythonhosted.org/packages/b1/07/40fe09ce96b91fc9276a9ad272832ead0fddedcba87f1190372af8e3039c/cryptography-44.0.0-cp37-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:3c672a53c0fb4725a29c303be906d3c1fa99c32f58abe008a82705f9ee96f40b", size = 4154498 }, - { url = "https://files.pythonhosted.org/packages/75/ea/af65619c800ec0a7e4034207aec543acdf248d9bffba0533342d1bd435e1/cryptography-44.0.0-cp37-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:4ac4c9f37eba52cb6fbeaf5b59c152ea976726b865bd4cf87883a7e7006cc543", size = 3932569 }, - { url = "https://files.pythonhosted.org/packages/c7/af/d1deb0c04d59612e3d5e54203159e284d3e7a6921e565bb0eeb6269bdd8a/cryptography-44.0.0-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:ed3534eb1090483c96178fcb0f8893719d96d5274dfde98aa6add34614e97c8e", size = 4016721 }, - { url = "https://files.pythonhosted.org/packages/bd/69/7ca326c55698d0688db867795134bdfac87136b80ef373aaa42b225d6dd5/cryptography-44.0.0-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:f3f6fdfa89ee2d9d496e2c087cebef9d4fcbb0ad63c40e821b39f74bf48d9c5e", size = 4240915 }, - { url = "https://files.pythonhosted.org/packages/ef/d4/cae11bf68c0f981e0413906c6dd03ae7fa864347ed5fac40021df1ef467c/cryptography-44.0.0-cp37-abi3-win32.whl", hash = "sha256:eb33480f1bad5b78233b0ad3e1b0be21e8ef1da745d8d2aecbb20671658b9053", size = 2757925 }, - { url = "https://files.pythonhosted.org/packages/64/b1/50d7739254d2002acae64eed4fc43b24ac0cc44bf0a0d388d1ca06ec5bb1/cryptography-44.0.0-cp37-abi3-win_amd64.whl", hash = "sha256:abc998e0c0eee3c8a1904221d3f67dcfa76422b23620173e28c11d3e626c21bd", size = 3202055 }, - { url = "https://files.pythonhosted.org/packages/11/18/61e52a3d28fc1514a43b0ac291177acd1b4de00e9301aaf7ef867076ff8a/cryptography-44.0.0-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:660cb7312a08bc38be15b696462fa7cc7cd85c3ed9c576e81f4dc4d8b2b31591", size = 6542801 }, - { url = "https://files.pythonhosted.org/packages/1a/07/5f165b6c65696ef75601b781a280fc3b33f1e0cd6aa5a92d9fb96c410e97/cryptography-44.0.0-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1923cb251c04be85eec9fda837661c67c1049063305d6be5721643c22dd4e2b7", size = 3922613 }, - { url = "https://files.pythonhosted.org/packages/28/34/6b3ac1d80fc174812486561cf25194338151780f27e438526f9c64e16869/cryptography-44.0.0-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:404fdc66ee5f83a1388be54300ae978b2efd538018de18556dde92575e05defc", size = 4137925 }, - { url = "https://files.pythonhosted.org/packages/d0/c7/c656eb08fd22255d21bc3129625ed9cd5ee305f33752ef2278711b3fa98b/cryptography-44.0.0-cp39-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:c5eb858beed7835e5ad1faba59e865109f3e52b3783b9ac21e7e47dc5554e289", size = 3915417 }, - { url = "https://files.pythonhosted.org/packages/ef/82/72403624f197af0db6bac4e58153bc9ac0e6020e57234115db9596eee85d/cryptography-44.0.0-cp39-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:f53c2c87e0fb4b0c00fa9571082a057e37690a8f12233306161c8f4b819960b7", size = 4155160 }, - { url = "https://files.pythonhosted.org/packages/a2/cd/2f3c440913d4329ade49b146d74f2e9766422e1732613f57097fea61f344/cryptography-44.0.0-cp39-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:9e6fc8a08e116fb7c7dd1f040074c9d7b51d74a8ea40d4df2fc7aa08b76b9e6c", size = 3932331 }, - { url = "https://files.pythonhosted.org/packages/7f/df/8be88797f0a1cca6e255189a57bb49237402b1880d6e8721690c5603ac23/cryptography-44.0.0-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:d2436114e46b36d00f8b72ff57e598978b37399d2786fd39793c36c6d5cb1c64", size = 4017372 }, - { url = "https://files.pythonhosted.org/packages/af/36/5ccc376f025a834e72b8e52e18746b927f34e4520487098e283a719c205e/cryptography-44.0.0-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:a01956ddfa0a6790d594f5b34fc1bfa6098aca434696a03cfdbe469b8ed79285", size = 4239657 }, - { url = "https://files.pythonhosted.org/packages/46/b0/f4f7d0d0bcfbc8dd6296c1449be326d04217c57afb8b2594f017eed95533/cryptography-44.0.0-cp39-abi3-win32.whl", hash = "sha256:eca27345e1214d1b9f9490d200f9db5a874479be914199194e746c893788d417", size = 2758672 }, - { url = "https://files.pythonhosted.org/packages/97/9b/443270b9210f13f6ef240eff73fd32e02d381e7103969dc66ce8e89ee901/cryptography-44.0.0-cp39-abi3-win_amd64.whl", hash = "sha256:708ee5f1bafe76d041b53a4f95eb28cdeb8d18da17e597d46d7833ee59b97ede", size = 3202071 }, +sdist = { url = "https://files.pythonhosted.org/packages/c7/67/545c79fe50f7af51dbad56d16b23fe33f63ee6a5d956b3cb68ea110cbe64/cryptography-44.0.1.tar.gz", hash = "sha256:f51f5705ab27898afda1aaa430f34ad90dc117421057782022edf0600bec5f14", size = 710819 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/72/27/5e3524053b4c8889da65cf7814a9d0d8514a05194a25e1e34f46852ee6eb/cryptography-44.0.1-cp37-abi3-macosx_10_9_universal2.whl", hash = "sha256:bf688f615c29bfe9dfc44312ca470989279f0e94bb9f631f85e3459af8efc009", size = 6642022 }, + { url = "https://files.pythonhosted.org/packages/34/b9/4d1fa8d73ae6ec350012f89c3abfbff19fc95fe5420cf972e12a8d182986/cryptography-44.0.1-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dd7c7e2d71d908dc0f8d2027e1604102140d84b155e658c20e8ad1304317691f", size = 3943865 }, + { url = "https://files.pythonhosted.org/packages/6e/57/371a9f3f3a4500807b5fcd29fec77f418ba27ffc629d88597d0d1049696e/cryptography-44.0.1-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:887143b9ff6bad2b7570da75a7fe8bbf5f65276365ac259a5d2d5147a73775f2", size = 4162562 }, + { url = "https://files.pythonhosted.org/packages/c5/1d/5b77815e7d9cf1e3166988647f336f87d5634a5ccecec2ffbe08ef8dd481/cryptography-44.0.1-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:322eb03ecc62784536bc173f1483e76747aafeb69c8728df48537eb431cd1911", size = 3951923 }, + { url = "https://files.pythonhosted.org/packages/28/01/604508cd34a4024467cd4105887cf27da128cba3edd435b54e2395064bfb/cryptography-44.0.1-cp37-abi3-manylinux_2_28_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:21377472ca4ada2906bc313168c9dc7b1d7ca417b63c1c3011d0c74b7de9ae69", size = 3685194 }, + { url = "https://files.pythonhosted.org/packages/c6/3d/d3c55d4f1d24580a236a6753902ef6d8aafd04da942a1ee9efb9dc8fd0cb/cryptography-44.0.1-cp37-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:df978682c1504fc93b3209de21aeabf2375cb1571d4e61907b3e7a2540e83026", size = 4187790 }, + { url = "https://files.pythonhosted.org/packages/ea/a6/44d63950c8588bfa8594fd234d3d46e93c3841b8e84a066649c566afb972/cryptography-44.0.1-cp37-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:eb3889330f2a4a148abead555399ec9a32b13b7c8ba969b72d8e500eb7ef84cd", size = 3951343 }, + { url = "https://files.pythonhosted.org/packages/c1/17/f5282661b57301204cbf188254c1a0267dbd8b18f76337f0a7ce1038888c/cryptography-44.0.1-cp37-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:8e6a85a93d0642bd774460a86513c5d9d80b5c002ca9693e63f6e540f1815ed0", size = 4187127 }, + { url = "https://files.pythonhosted.org/packages/f3/68/abbae29ed4f9d96596687f3ceea8e233f65c9645fbbec68adb7c756bb85a/cryptography-44.0.1-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:6f76fdd6fd048576a04c5210d53aa04ca34d2ed63336d4abd306d0cbe298fddf", size = 4070666 }, + { url = "https://files.pythonhosted.org/packages/0f/10/cf91691064a9e0a88ae27e31779200b1505d3aee877dbe1e4e0d73b4f155/cryptography-44.0.1-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:6c8acf6f3d1f47acb2248ec3ea261171a671f3d9428e34ad0357148d492c7864", size = 4288811 }, + { url = "https://files.pythonhosted.org/packages/38/78/74ea9eb547d13c34e984e07ec8a473eb55b19c1451fe7fc8077c6a4b0548/cryptography-44.0.1-cp37-abi3-win32.whl", hash = "sha256:24979e9f2040c953a94bf3c6782e67795a4c260734e5264dceea65c8f4bae64a", size = 2771882 }, + { url = "https://files.pythonhosted.org/packages/cf/6c/3907271ee485679e15c9f5e93eac6aa318f859b0aed8d369afd636fafa87/cryptography-44.0.1-cp37-abi3-win_amd64.whl", hash = "sha256:fd0ee90072861e276b0ff08bd627abec29e32a53b2be44e41dbcdf87cbee2b00", size = 3206989 }, + { url = "https://files.pythonhosted.org/packages/9f/f1/676e69c56a9be9fd1bffa9bc3492366901f6e1f8f4079428b05f1414e65c/cryptography-44.0.1-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:a2d8a7045e1ab9b9f803f0d9531ead85f90c5f2859e653b61497228b18452008", size = 6643714 }, + { url = "https://files.pythonhosted.org/packages/ba/9f/1775600eb69e72d8f9931a104120f2667107a0ee478f6ad4fe4001559345/cryptography-44.0.1-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b8272f257cf1cbd3f2e120f14c68bff2b6bdfcc157fafdee84a1b795efd72862", size = 3943269 }, + { url = "https://files.pythonhosted.org/packages/25/ba/e00d5ad6b58183829615be7f11f55a7b6baa5a06910faabdc9961527ba44/cryptography-44.0.1-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1e8d181e90a777b63f3f0caa836844a1182f1f265687fac2115fcf245f5fbec3", size = 4166461 }, + { url = "https://files.pythonhosted.org/packages/b3/45/690a02c748d719a95ab08b6e4decb9d81e0ec1bac510358f61624c86e8a3/cryptography-44.0.1-cp39-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:436df4f203482f41aad60ed1813811ac4ab102765ecae7a2bbb1dbb66dcff5a7", size = 3950314 }, + { url = "https://files.pythonhosted.org/packages/e6/50/bf8d090911347f9b75adc20f6f6569ed6ca9b9bff552e6e390f53c2a1233/cryptography-44.0.1-cp39-abi3-manylinux_2_28_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:4f422e8c6a28cf8b7f883eb790695d6d45b0c385a2583073f3cec434cc705e1a", size = 3686675 }, + { url = "https://files.pythonhosted.org/packages/e1/e7/cfb18011821cc5f9b21efb3f94f3241e3a658d267a3bf3a0f45543858ed8/cryptography-44.0.1-cp39-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:72198e2b5925155497a5a3e8c216c7fb3e64c16ccee11f0e7da272fa93b35c4c", size = 4190429 }, + { url = "https://files.pythonhosted.org/packages/07/ef/77c74d94a8bfc1a8a47b3cafe54af3db537f081742ee7a8a9bd982b62774/cryptography-44.0.1-cp39-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:2a46a89ad3e6176223b632056f321bc7de36b9f9b93b2cc1cccf935a3849dc62", size = 3950039 }, + { url = "https://files.pythonhosted.org/packages/6d/b9/8be0ff57c4592382b77406269b1e15650c9f1a167f9e34941b8515b97159/cryptography-44.0.1-cp39-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:53f23339864b617a3dfc2b0ac8d5c432625c80014c25caac9082314e9de56f41", size = 4189713 }, + { url = "https://files.pythonhosted.org/packages/78/e1/4b6ac5f4100545513b0847a4d276fe3c7ce0eacfa73e3b5ebd31776816ee/cryptography-44.0.1-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:888fcc3fce0c888785a4876ca55f9f43787f4c5c1cc1e2e0da71ad481ff82c5b", size = 4071193 }, + { url = "https://files.pythonhosted.org/packages/3d/cb/afff48ceaed15531eab70445abe500f07f8f96af2bb35d98af6bfa89ebd4/cryptography-44.0.1-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:00918d859aa4e57db8299607086f793fa7813ae2ff5a4637e318a25ef82730f7", size = 4289566 }, + { url = "https://files.pythonhosted.org/packages/30/6f/4eca9e2e0f13ae459acd1ca7d9f0257ab86e68f44304847610afcb813dc9/cryptography-44.0.1-cp39-abi3-win32.whl", hash = "sha256:9b336599e2cb77b1008cb2ac264b290803ec5e8e89d618a5e978ff5eb6f715d9", size = 2772371 }, + { url = "https://files.pythonhosted.org/packages/d2/05/5533d30f53f10239616a357f080892026db2d550a40c393d0a8a7af834a9/cryptography-44.0.1-cp39-abi3-win_amd64.whl", hash = "sha256:e403f7f766ded778ecdb790da786b418a9f2394f36e8cc8b796cc056ab05f44f", size = 3207303 }, ] [[package]] @@ -1076,10 +1134,15 @@ source = { virtual = "." } [package.dev-dependencies] dev = [ { name = "black" }, + { name = "coverage" }, { name = "ipykernel" }, { name = "jupyter" }, { name = "pre-commit" }, { name = "pygments" }, + { name = "pytest" }, + { name = "pytest-asyncio" }, + { name = "pytest-cov" }, + { name = "pytest-mock" }, { name = "python-dotenv" }, { name = "ruff" }, ] @@ -1089,10 +1152,15 @@ dev = [ [package.metadata.requires-dev] dev = [ { name = "black", specifier = ">=24.10.0" }, + { name = "coverage", specifier = ">=7.6.12" }, { name = "ipykernel", specifier = ">=6.29.5" }, { name = "jupyter", specifier = ">=1.1.1" }, { name = "pre-commit", specifier = ">=4.0.1" }, { name = "pygments", specifier = ">=2.18.0" }, + { name = "pytest", specifier = ">=8.3.4" }, + { name = "pytest-asyncio", specifier = ">=0.25.3" }, + { name = "pytest-cov", specifier = ">=6.0.0" }, + { name = "pytest-mock", specifier = ">=3.14.0" }, { name = "python-dotenv", specifier = ">=1.0.1" }, { name = "ruff", specifier = ">=0.8.1" }, ] @@ -1291,7 +1359,7 @@ wheels = [ [[package]] name = "huggingface-hub" -version = "0.28.1" +version = "0.29.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "filelock" }, @@ -1302,18 +1370,18 @@ dependencies = [ { name = "tqdm" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/e7/ce/a734204aaae6c35a22f9956ebcd8d8708ae5b842e15d6f42bd6f49e634a4/huggingface_hub-0.28.1.tar.gz", hash = "sha256:893471090c98e3b6efbdfdacafe4052b20b84d59866fb6f54c33d9af18c303ae", size = 387074 } +sdist = { url = "https://files.pythonhosted.org/packages/e2/ac/9f7010c8b050d80b64bfddcc09ef4a4450ae4369940d1b01fa13f5d083de/huggingface_hub-0.29.0.tar.gz", hash = "sha256:64034c852be270cac16c5743fe1f659b14515a9de6342d6f42cbb2ede191fc80", size = 389753 } wheels = [ - { url = "https://files.pythonhosted.org/packages/ea/da/6c2bea5327b640920267d3bf2c9fc114cfbd0a5de234d81cda80cc9e33c8/huggingface_hub-0.28.1-py3-none-any.whl", hash = "sha256:aa6b9a3ffdae939b72c464dbb0d7f99f56e649b55c3d52406f49e0a5a620c0a7", size = 464068 }, + { url = "https://files.pythonhosted.org/packages/2a/4d/8092df2cb0cafa9fcaf691db851b2fccfe9cad4048e081436bbbdf56e4e1/huggingface_hub-0.29.0-py3-none-any.whl", hash = "sha256:c02daa0b6bafbdacb1320fdfd1dc7151d0940825c88c4ef89837fdb1f6ea0afe", size = 468012 }, ] [[package]] name = "identify" -version = "2.6.6" +version = "2.6.7" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/82/bf/c68c46601bacd4c6fb4dd751a42b6e7087240eaabc6487f2ef7a48e0e8fc/identify-2.6.6.tar.gz", hash = "sha256:7bec12768ed44ea4761efb47806f0a41f86e7c0a5fdf5950d4648c90eca7e251", size = 99217 } +sdist = { url = "https://files.pythonhosted.org/packages/83/d1/524aa3350f78bcd714d148ade6133d67d6b7de2cdbae7d99039c024c9a25/identify-2.6.7.tar.gz", hash = "sha256:3fa266b42eba321ee0b2bb0936a6a6b9e36a1351cbb69055b3082f4193035684", size = 99260 } wheels = [ - { url = "https://files.pythonhosted.org/packages/74/a1/68a395c17eeefb04917034bd0a1bfa765e7654fa150cca473d669aa3afb5/identify-2.6.6-py2.py3-none-any.whl", hash = "sha256:cbd1810bce79f8b671ecb20f53ee0ae8e86ae84b557de31d89709dc2a48ba881", size = 99083 }, + { url = "https://files.pythonhosted.org/packages/03/00/1fd4a117c6c93f2dcc5b7edaeaf53ea45332ef966429be566ca16c2beb94/identify-2.6.7-py2.py3-none-any.whl", hash = "sha256:155931cb617a401807b09ecec6635d6c692d180090a1cedca8ef7d58ba5b6aa0", size = 99097 }, ] [[package]] @@ -1359,10 +1427,15 @@ dependencies = [ [package.dev-dependencies] dev = [ { name = "black" }, + { name = "coverage" }, { name = "ipykernel" }, { name = "jupyter" }, { name = "pre-commit" }, { name = "pygments" }, + { name = "pytest" }, + { name = "pytest-asyncio" }, + { name = "pytest-cov" }, + { name = "pytest-mock" }, { name = "python-dotenv" }, { name = "ruff" }, ] @@ -1398,10 +1471,15 @@ requires-dist = [ [package.metadata.requires-dev] dev = [ { name = "black", specifier = ">=24.10.0" }, + { name = "coverage", specifier = ">=7.6.12" }, { name = "ipykernel", specifier = ">=6.29.5" }, { name = "jupyter", specifier = ">=1.1.1" }, { name = "pre-commit", specifier = ">=4.0.1" }, { name = "pygments", specifier = ">=2.18.0" }, + { name = "pytest", specifier = ">=8.3.4" }, + { name = "pytest-asyncio", specifier = ">=0.25.3" }, + { name = "pytest-cov", specifier = ">=6.0.0" }, + { name = "pytest-mock", specifier = ">=3.14.0" }, { name = "python-dotenv", specifier = ">=1.0.1" }, { name = "ruff", specifier = ">=0.8.1" }, ] @@ -1418,6 +1496,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/a0/d9/a1e041c5e7caa9a05c925f4bdbdfb7f006d1f74996af53467bc394c97be7/importlib_metadata-8.5.0-py3-none-any.whl", hash = "sha256:45e54197d28b7a7f1559e60b95e7c567032b602131fbd588f1497f47880aa68b", size = 26514 }, ] +[[package]] +name = "iniconfig" +version = "2.0.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d7/4b/cbd8e699e64a6f16ca3a8220661b5f83792b3017d0f79807cb8708d33913/iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3", size = 4646 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ef/a6/62565a6e1cf69e10f5727360368e451d4b7f58beeac6173dc9db836a5b46/iniconfig-2.0.0-py3-none-any.whl", hash = "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374", size = 5892 }, +] + [[package]] name = "ipykernel" version = "6.29.5" @@ -2023,16 +2110,16 @@ wheels = [ [[package]] name = "mistune" -version = "3.1.1" +version = "3.1.2" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/c6/1d/6b2b634e43bacc3239006e61800676aa6c41ac1836b2c57497ed27a7310b/mistune-3.1.1.tar.gz", hash = "sha256:e0740d635f515119f7d1feb6f9b192ee60f0cc649f80a8f944f905706a21654c", size = 94645 } +sdist = { url = "https://files.pythonhosted.org/packages/80/f7/f6d06304c61c2a73213c0a4815280f70d985429cda26272f490e42119c1a/mistune-3.1.2.tar.gz", hash = "sha256:733bf018ba007e8b5f2d3a9eb624034f6ee26c4ea769a98ec533ee111d504dff", size = 94613 } wheels = [ - { url = "https://files.pythonhosted.org/packages/c6/02/c66bdfdadbb021adb642ca4e8a5ed32ada0b4a3e4b39c5d076d19543452f/mistune-3.1.1-py3-none-any.whl", hash = "sha256:02106ac2aa4f66e769debbfa028509a275069dcffce0dfa578edd7b991ee700a", size = 53696 }, + { url = "https://files.pythonhosted.org/packages/12/92/30b4e54c4d7c48c06db61595cffbbf4f19588ea177896f9b78f0fbe021fd/mistune-3.1.2-py3-none-any.whl", hash = "sha256:4b47731332315cdca99e0ded46fc0004001c1299ff773dfb48fbe1fd226de319", size = 53696 }, ] [[package]] name = "model2vec" -version = "0.3.9" +version = "0.4.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "jinja2" }, @@ -2044,9 +2131,9 @@ dependencies = [ { name = "tokenizers" }, { name = "tqdm" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/35/bd/2d6dc9395b4f32c3f91c244d51387227102d98c8b8a8c022626d567fd1af/model2vec-0.3.9.tar.gz", hash = "sha256:43b163822b97377264a4008b9a234727b4895ca6650cbfbc6ad497582508ea5e", size = 2431808 } +sdist = { url = "https://files.pythonhosted.org/packages/83/e2/3fb7bd8c612f71ad3abded92e7401f97f1e71427d3a68a3fb85f39394b17/model2vec-0.4.0.tar.gz", hash = "sha256:48d4a3da040499b0090f736eb8f22ea0fdd35b67462d81d789c70004423adbae", size = 2486998 } wheels = [ - { url = "https://files.pythonhosted.org/packages/11/a9/eea35652dcc53c064d2ebf9617267a7c751d58dd275ff40dc5bb6f9886a1/model2vec-0.3.9-py3-none-any.whl", hash = "sha256:b66ca217ddb2218876c359e1cd49a094c31e5839dcc6999391bd95769bb41913", size = 28643 }, + { url = "https://files.pythonhosted.org/packages/93/7d/39ff093c4e45303a06e3c5825c6144cbd21f18a1393a154bbf93232b0f1a/model2vec-0.4.0-py3-none-any.whl", hash = "sha256:df30685a55841c61c6638e4f329648e76b148507bd778801d7bfcd6b970a4f2f", size = 38593 }, ] [[package]] @@ -2344,7 +2431,7 @@ wheels = [ [[package]] name = "openai" -version = "1.61.1" +version = "1.63.2" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "anyio" }, @@ -2356,9 +2443,9 @@ dependencies = [ { name = "tqdm" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/d9/cf/61e71ce64cf0a38f029da0f9a5f10c9fa0e69a7a977b537126dac50adfea/openai-1.61.1.tar.gz", hash = "sha256:ce1851507218209961f89f3520e06726c0aa7d0512386f0f977e3ac3e4f2472e", size = 350784 } +sdist = { url = "https://files.pythonhosted.org/packages/e6/1c/11b520deb71f9ea54ced3c52cd6a5f7131215deba63ad07f23982e328141/openai-1.63.2.tar.gz", hash = "sha256:aeabeec984a7d2957b4928ceaa339e2ead19c61cfcf35ae62b7c363368d26360", size = 356902 } wheels = [ - { url = "https://files.pythonhosted.org/packages/9a/b6/2e2a011b2dc27a6711376808b4cd8c922c476ea0f1420b39892117fa8563/openai-1.61.1-py3-none-any.whl", hash = "sha256:72b0826240ce26026ac2cd17951691f046e5be82ad122d20a8e1b30ca18bd11e", size = 463126 }, + { url = "https://files.pythonhosted.org/packages/15/64/db3462b358072387b8e93e6e6a38d3c741a17b4a84171ef01d6c85c63f25/openai-1.63.2-py3-none-any.whl", hash = "sha256:1f38b27b5a40814c2b7d8759ec78110df58c4a614c25f182809ca52b080ff4d4", size = 472282 }, ] [[package]] @@ -2542,6 +2629,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/3c/a6/bc1012356d8ece4d66dd75c4b9fc6c1f6650ddd5991e421177d9f8f671be/platformdirs-4.3.6-py3-none-any.whl", hash = "sha256:73e575e1408ab8103900836b97580d5307456908a03e92031bab39e4554cc3fb", size = 18439 }, ] +[[package]] +name = "pluggy" +version = "1.5.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/96/2d/02d4312c973c6050a18b314a5ad0b3210edb65a906f868e31c111dede4a6/pluggy-1.5.0.tar.gz", hash = "sha256:2cffa88e94fdc978c4c574f15f9e59b7f4201d439195c3715ca9e2486f1d0cf1", size = 67955 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/88/5f/e351af9a41f866ac3f1fac4ca0613908d9a41741cfcf2228f4ad853b697d/pluggy-1.5.0-py3-none-any.whl", hash = "sha256:44e1ad92c8ca002de6377e165f3e0f1be63266ab4d554740532335b9d75ea669", size = 20556 }, +] + [[package]] name = "portalocker" version = "2.10.1" @@ -2686,17 +2782,17 @@ wheels = [ [[package]] name = "psutil" -version = "6.1.1" +version = "7.0.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/1f/5a/07871137bb752428aa4b659f910b399ba6f291156bdea939be3e96cae7cb/psutil-6.1.1.tar.gz", hash = "sha256:cf8496728c18f2d0b45198f06895be52f36611711746b7f30c464b422b50e2f5", size = 508502 } +sdist = { url = "https://files.pythonhosted.org/packages/2a/80/336820c1ad9286a4ded7e845b2eccfcb27851ab8ac6abece774a6ff4d3de/psutil-7.0.0.tar.gz", hash = "sha256:7be9c3eba38beccb6495ea33afd982a44074b78f28c434a1f51cc07fd315c456", size = 497003 } wheels = [ - { url = "https://files.pythonhosted.org/packages/61/99/ca79d302be46f7bdd8321089762dd4476ee725fce16fc2b2e1dbba8cac17/psutil-6.1.1-cp36-abi3-macosx_10_9_x86_64.whl", hash = "sha256:fc0ed7fe2231a444fc219b9c42d0376e0a9a1a72f16c5cfa0f68d19f1a0663e8", size = 247511 }, - { url = "https://files.pythonhosted.org/packages/0b/6b/73dbde0dd38f3782905d4587049b9be64d76671042fdcaf60e2430c6796d/psutil-6.1.1-cp36-abi3-macosx_11_0_arm64.whl", hash = "sha256:0bdd4eab935276290ad3cb718e9809412895ca6b5b334f5a9111ee6d9aff9377", size = 248985 }, - { url = "https://files.pythonhosted.org/packages/17/38/c319d31a1d3f88c5b79c68b3116c129e5133f1822157dd6da34043e32ed6/psutil-6.1.1-cp36-abi3-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b6e06c20c05fe95a3d7302d74e7097756d4ba1247975ad6905441ae1b5b66003", size = 284488 }, - { url = "https://files.pythonhosted.org/packages/9c/39/0f88a830a1c8a3aba27fededc642da37613c57cbff143412e3536f89784f/psutil-6.1.1-cp36-abi3-manylinux_2_12_x86_64.manylinux2010_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:97f7cb9921fbec4904f522d972f0c0e1f4fabbdd4e0287813b21215074a0f160", size = 287477 }, - { url = "https://files.pythonhosted.org/packages/47/da/99f4345d4ddf2845cb5b5bd0d93d554e84542d116934fde07a0c50bd4e9f/psutil-6.1.1-cp36-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:33431e84fee02bc84ea36d9e2c4a6d395d479c9dd9bba2376c1f6ee8f3a4e0b3", size = 289017 }, - { url = "https://files.pythonhosted.org/packages/38/53/bd755c2896f4461fd4f36fa6a6dcb66a88a9e4b9fd4e5b66a77cf9d4a584/psutil-6.1.1-cp37-abi3-win32.whl", hash = "sha256:eaa912e0b11848c4d9279a93d7e2783df352b082f40111e078388701fd479e53", size = 250602 }, - { url = "https://files.pythonhosted.org/packages/7b/d7/7831438e6c3ebbfa6e01a927127a6cb42ad3ab844247f3c5b96bea25d73d/psutil-6.1.1-cp37-abi3-win_amd64.whl", hash = "sha256:f35cfccb065fff93529d2afb4a2e89e363fe63ca1e4a5da22b603a85833c2649", size = 254444 }, + { url = "https://files.pythonhosted.org/packages/ed/e6/2d26234410f8b8abdbf891c9da62bee396583f713fb9f3325a4760875d22/psutil-7.0.0-cp36-abi3-macosx_10_9_x86_64.whl", hash = "sha256:101d71dc322e3cffd7cea0650b09b3d08b8e7c4109dd6809fe452dfd00e58b25", size = 238051 }, + { url = "https://files.pythonhosted.org/packages/04/8b/30f930733afe425e3cbfc0e1468a30a18942350c1a8816acfade80c005c4/psutil-7.0.0-cp36-abi3-macosx_11_0_arm64.whl", hash = "sha256:39db632f6bb862eeccf56660871433e111b6ea58f2caea825571951d4b6aa3da", size = 239535 }, + { url = "https://files.pythonhosted.org/packages/2a/ed/d362e84620dd22876b55389248e522338ed1bf134a5edd3b8231d7207f6d/psutil-7.0.0-cp36-abi3-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1fcee592b4c6f146991ca55919ea3d1f8926497a713ed7faaf8225e174581e91", size = 275004 }, + { url = "https://files.pythonhosted.org/packages/bf/b9/b0eb3f3cbcb734d930fdf839431606844a825b23eaf9a6ab371edac8162c/psutil-7.0.0-cp36-abi3-manylinux_2_12_x86_64.manylinux2010_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4b1388a4f6875d7e2aff5c4ca1cc16c545ed41dd8bb596cefea80111db353a34", size = 277986 }, + { url = "https://files.pythonhosted.org/packages/eb/a2/709e0fe2f093556c17fbafda93ac032257242cabcc7ff3369e2cb76a97aa/psutil-7.0.0-cp36-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a5f098451abc2828f7dc6b58d44b532b22f2088f4999a937557b603ce72b1993", size = 279544 }, + { url = "https://files.pythonhosted.org/packages/50/e6/eecf58810b9d12e6427369784efe814a1eec0f492084ce8eb8f4d89d6d61/psutil-7.0.0-cp37-abi3-win32.whl", hash = "sha256:ba3fcef7523064a6c9da440fc4d6bd07da93ac726b5733c29027d7dc95b39d99", size = 241053 }, + { url = "https://files.pythonhosted.org/packages/50/1b/6921afe68c74868b4c9fa424dad3be35b095e16687989ebbb50ce4fceb7c/psutil-7.0.0-cp37-abi3-win_amd64.whl", hash = "sha256:4cf3d4eb1aa9b348dec30105c55cd9b7d4629285735a102beb4441e38db90553", size = 244885 }, ] [[package]] @@ -2906,6 +3002,58 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/42/22/40f9162e943f86f0fc927ebc648078be87def360d9d8db346619fb97df2b/pyOpenSSL-24.3.0-py3-none-any.whl", hash = "sha256:e474f5a473cd7f92221cc04976e48f4d11502804657a08a989fb3be5514c904a", size = 56111 }, ] +[[package]] +name = "pytest" +version = "8.3.4" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "colorama", marker = "sys_platform == 'win32'" }, + { name = "iniconfig" }, + { name = "packaging" }, + { name = "pluggy" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/05/35/30e0d83068951d90a01852cb1cef56e5d8a09d20c7f511634cc2f7e0372a/pytest-8.3.4.tar.gz", hash = "sha256:965370d062bce11e73868e0335abac31b4d3de0e82f4007408d242b4f8610761", size = 1445919 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/11/92/76a1c94d3afee238333bc0a42b82935dd8f9cf8ce9e336ff87ee14d9e1cf/pytest-8.3.4-py3-none-any.whl", hash = "sha256:50e16d954148559c9a74109af1eaf0c945ba2d8f30f0a3d3335edde19788b6f6", size = 343083 }, +] + +[[package]] +name = "pytest-asyncio" +version = "0.25.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pytest" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/f2/a8/ecbc8ede70921dd2f544ab1cadd3ff3bf842af27f87bbdea774c7baa1d38/pytest_asyncio-0.25.3.tar.gz", hash = "sha256:fc1da2cf9f125ada7e710b4ddad05518d4cee187ae9412e9ac9271003497f07a", size = 54239 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/67/17/3493c5624e48fd97156ebaec380dcaafee9506d7e2c46218ceebbb57d7de/pytest_asyncio-0.25.3-py3-none-any.whl", hash = "sha256:9e89518e0f9bd08928f97a3482fdc4e244df17529460bc038291ccaf8f85c7c3", size = 19467 }, +] + +[[package]] +name = "pytest-cov" +version = "6.0.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "coverage", extra = ["toml"] }, + { name = "pytest" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/be/45/9b538de8cef30e17c7b45ef42f538a94889ed6a16f2387a6c89e73220651/pytest-cov-6.0.0.tar.gz", hash = "sha256:fde0b595ca248bb8e2d76f020b465f3b107c9632e6a1d1705f17834c89dcadc0", size = 66945 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/36/3b/48e79f2cd6a61dbbd4807b4ed46cb564b4fd50a76166b1c4ea5c1d9e2371/pytest_cov-6.0.0-py3-none-any.whl", hash = "sha256:eee6f1b9e61008bd34975a4d5bab25801eb31898b032dd55addc93e96fcaaa35", size = 22949 }, +] + +[[package]] +name = "pytest-mock" +version = "3.14.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pytest" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/c6/90/a955c3ab35ccd41ad4de556596fa86685bf4fc5ffcc62d22d856cfd4e29a/pytest-mock-3.14.0.tar.gz", hash = "sha256:2719255a1efeceadbc056d6bf3df3d1c5015530fb40cf347c0f9afac88410bd0", size = 32814 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f2/3b/b26f90f74e2986a82df6e7ac7e319b8ea7ccece1caec9f8ab6104dc70603/pytest_mock-3.14.0-py3-none-any.whl", hash = "sha256:0b72c38033392a5f4621342fe11e9219ac11ec9d375f8e2a0c164539e0d70f6f", size = 9863 }, +] + [[package]] name = "python-dateutil" version = "2.9.0.post0" @@ -3255,27 +3403,27 @@ wheels = [ [[package]] name = "ruff" -version = "0.9.4" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/c0/17/529e78f49fc6f8076f50d985edd9a2cf011d1dbadb1cdeacc1d12afc1d26/ruff-0.9.4.tar.gz", hash = "sha256:6907ee3529244bb0ed066683e075f09285b38dd5b4039370df6ff06041ca19e7", size = 3599458 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/b6/f8/3fafb7804d82e0699a122101b5bee5f0d6e17c3a806dcbc527bb7d3f5b7a/ruff-0.9.4-py3-none-linux_armv6l.whl", hash = "sha256:64e73d25b954f71ff100bb70f39f1ee09e880728efb4250c632ceed4e4cdf706", size = 11668400 }, - { url = "https://files.pythonhosted.org/packages/2e/a6/2efa772d335da48a70ab2c6bb41a096c8517ca43c086ea672d51079e3d1f/ruff-0.9.4-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:6ce6743ed64d9afab4fafeaea70d3631b4d4b28b592db21a5c2d1f0ef52934bf", size = 11628395 }, - { url = "https://files.pythonhosted.org/packages/dc/d7/cd822437561082f1c9d7225cc0d0fbb4bad117ad7ac3c41cd5d7f0fa948c/ruff-0.9.4-py3-none-macosx_11_0_arm64.whl", hash = "sha256:54499fb08408e32b57360f6f9de7157a5fec24ad79cb3f42ef2c3f3f728dfe2b", size = 11090052 }, - { url = "https://files.pythonhosted.org/packages/9e/67/3660d58e893d470abb9a13f679223368ff1684a4ef40f254a0157f51b448/ruff-0.9.4-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:37c892540108314a6f01f105040b5106aeb829fa5fb0561d2dcaf71485021137", size = 11882221 }, - { url = "https://files.pythonhosted.org/packages/79/d1/757559995c8ba5f14dfec4459ef2dd3fcea82ac43bc4e7c7bf47484180c0/ruff-0.9.4-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:de9edf2ce4b9ddf43fd93e20ef635a900e25f622f87ed6e3047a664d0e8f810e", size = 11424862 }, - { url = "https://files.pythonhosted.org/packages/c0/96/7915a7c6877bb734caa6a2af424045baf6419f685632469643dbd8eb2958/ruff-0.9.4-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:87c90c32357c74f11deb7fbb065126d91771b207bf9bfaaee01277ca59b574ec", size = 12626735 }, - { url = "https://files.pythonhosted.org/packages/0e/cc/dadb9b35473d7cb17c7ffe4737b4377aeec519a446ee8514123ff4a26091/ruff-0.9.4-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:56acd6c694da3695a7461cc55775f3a409c3815ac467279dfa126061d84b314b", size = 13255976 }, - { url = "https://files.pythonhosted.org/packages/5f/c3/ad2dd59d3cabbc12df308cced780f9c14367f0321e7800ca0fe52849da4c/ruff-0.9.4-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e0c93e7d47ed951b9394cf352d6695b31498e68fd5782d6cbc282425655f687a", size = 12752262 }, - { url = "https://files.pythonhosted.org/packages/c7/17/5f1971e54bd71604da6788efd84d66d789362b1105e17e5ccc53bba0289b/ruff-0.9.4-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1d4c8772670aecf037d1bf7a07c39106574d143b26cfe5ed1787d2f31e800214", size = 14401648 }, - { url = "https://files.pythonhosted.org/packages/30/24/6200b13ea611b83260501b6955b764bb320e23b2b75884c60ee7d3f0b68e/ruff-0.9.4-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bfc5f1d7afeda8d5d37660eeca6d389b142d7f2b5a1ab659d9214ebd0e025231", size = 12414702 }, - { url = "https://files.pythonhosted.org/packages/34/cb/f5d50d0c4ecdcc7670e348bd0b11878154bc4617f3fdd1e8ad5297c0d0ba/ruff-0.9.4-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:faa935fc00ae854d8b638c16a5f1ce881bc3f67446957dd6f2af440a5fc8526b", size = 11859608 }, - { url = "https://files.pythonhosted.org/packages/d6/f4/9c8499ae8426da48363bbb78d081b817b0f64a9305f9b7f87eab2a8fb2c1/ruff-0.9.4-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:a6c634fc6f5a0ceae1ab3e13c58183978185d131a29c425e4eaa9f40afe1e6d6", size = 11485702 }, - { url = "https://files.pythonhosted.org/packages/18/59/30490e483e804ccaa8147dd78c52e44ff96e1c30b5a95d69a63163cdb15b/ruff-0.9.4-py3-none-musllinux_1_2_i686.whl", hash = "sha256:433dedf6ddfdec7f1ac7575ec1eb9844fa60c4c8c2f8887a070672b8d353d34c", size = 12067782 }, - { url = "https://files.pythonhosted.org/packages/3d/8c/893fa9551760b2f8eb2a351b603e96f15af167ceaf27e27ad873570bc04c/ruff-0.9.4-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:d612dbd0f3a919a8cc1d12037168bfa536862066808960e0cc901404b77968f0", size = 12483087 }, - { url = "https://files.pythonhosted.org/packages/23/15/f6751c07c21ca10e3f4a51ea495ca975ad936d780c347d9808bcedbd7182/ruff-0.9.4-py3-none-win32.whl", hash = "sha256:db1192ddda2200671f9ef61d9597fcef89d934f5d1705e571a93a67fb13a4402", size = 9852302 }, - { url = "https://files.pythonhosted.org/packages/12/41/2d2d2c6a72e62566f730e49254f602dfed23019c33b5b21ea8f8917315a1/ruff-0.9.4-py3-none-win_amd64.whl", hash = "sha256:05bebf4cdbe3ef75430d26c375773978950bbf4ee3c95ccb5448940dc092408e", size = 10850051 }, - { url = "https://files.pythonhosted.org/packages/c6/e6/3d6ec3bc3d254e7f005c543a661a41c3e788976d0e52a1ada195bd664344/ruff-0.9.4-py3-none-win_arm64.whl", hash = "sha256:585792f1e81509e38ac5123492f8875fbc36f3ede8185af0a26df348e5154f41", size = 10078251 }, +version = "0.9.6" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/2a/e1/e265aba384343dd8ddd3083f5e33536cd17e1566c41453a5517b5dd443be/ruff-0.9.6.tar.gz", hash = "sha256:81761592f72b620ec8fa1068a6fd00e98a5ebee342a3642efd84454f3031dca9", size = 3639454 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/76/e3/3d2c022e687e18cf5d93d6bfa2722d46afc64eaa438c7fbbdd603b3597be/ruff-0.9.6-py3-none-linux_armv6l.whl", hash = "sha256:2f218f356dd2d995839f1941322ff021c72a492c470f0b26a34f844c29cdf5ba", size = 11714128 }, + { url = "https://files.pythonhosted.org/packages/e1/22/aff073b70f95c052e5c58153cba735748c9e70107a77d03420d7850710a0/ruff-0.9.6-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:b908ff4df65dad7b251c9968a2e4560836d8f5487c2f0cc238321ed951ea0504", size = 11682539 }, + { url = "https://files.pythonhosted.org/packages/75/a7/f5b7390afd98a7918582a3d256cd3e78ba0a26165a467c1820084587cbf9/ruff-0.9.6-py3-none-macosx_11_0_arm64.whl", hash = "sha256:b109c0ad2ececf42e75fa99dc4043ff72a357436bb171900714a9ea581ddef83", size = 11132512 }, + { url = "https://files.pythonhosted.org/packages/a6/e3/45de13ef65047fea2e33f7e573d848206e15c715e5cd56095589a7733d04/ruff-0.9.6-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1de4367cca3dac99bcbd15c161404e849bb0bfd543664db39232648dc00112dc", size = 11929275 }, + { url = "https://files.pythonhosted.org/packages/7d/f2/23d04cd6c43b2e641ab961ade8d0b5edb212ecebd112506188c91f2a6e6c/ruff-0.9.6-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ac3ee4d7c2c92ddfdaedf0bf31b2b176fa7aa8950efc454628d477394d35638b", size = 11466502 }, + { url = "https://files.pythonhosted.org/packages/b5/6f/3a8cf166f2d7f1627dd2201e6cbc4cb81f8b7d58099348f0c1ff7b733792/ruff-0.9.6-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5dc1edd1775270e6aa2386119aea692039781429f0be1e0949ea5884e011aa8e", size = 12676364 }, + { url = "https://files.pythonhosted.org/packages/f5/c4/db52e2189983c70114ff2b7e3997e48c8318af44fe83e1ce9517570a50c6/ruff-0.9.6-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:4a091729086dffa4bd070aa5dab7e39cc6b9d62eb2bef8f3d91172d30d599666", size = 13335518 }, + { url = "https://files.pythonhosted.org/packages/66/44/545f8a4d136830f08f4d24324e7db957c5374bf3a3f7a6c0bc7be4623a37/ruff-0.9.6-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d1bbc6808bf7b15796cef0815e1dfb796fbd383e7dbd4334709642649625e7c5", size = 12823287 }, + { url = "https://files.pythonhosted.org/packages/c5/26/8208ef9ee7431032c143649a9967c3ae1aae4257d95e6f8519f07309aa66/ruff-0.9.6-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:589d1d9f25b5754ff230dce914a174a7c951a85a4e9270613a2b74231fdac2f5", size = 14592374 }, + { url = "https://files.pythonhosted.org/packages/31/70/e917781e55ff39c5b5208bda384fd397ffd76605e68544d71a7e40944945/ruff-0.9.6-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dc61dd5131742e21103fbbdcad683a8813be0e3c204472d520d9a5021ca8b217", size = 12500173 }, + { url = "https://files.pythonhosted.org/packages/84/f5/e4ddee07660f5a9622a9c2b639afd8f3104988dc4f6ba0b73ffacffa9a8c/ruff-0.9.6-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:5e2d9126161d0357e5c8f30b0bd6168d2c3872372f14481136d13de9937f79b6", size = 11906555 }, + { url = "https://files.pythonhosted.org/packages/f1/2b/6ff2fe383667075eef8656b9892e73dd9b119b5e3add51298628b87f6429/ruff-0.9.6-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:68660eab1a8e65babb5229a1f97b46e3120923757a68b5413d8561f8a85d4897", size = 11538958 }, + { url = "https://files.pythonhosted.org/packages/3c/db/98e59e90de45d1eb46649151c10a062d5707b5b7f76f64eb1e29edf6ebb1/ruff-0.9.6-py3-none-musllinux_1_2_i686.whl", hash = "sha256:c4cae6c4cc7b9b4017c71114115db0445b00a16de3bcde0946273e8392856f08", size = 12117247 }, + { url = "https://files.pythonhosted.org/packages/ec/bc/54e38f6d219013a9204a5a2015c09e7a8c36cedcd50a4b01ac69a550b9d9/ruff-0.9.6-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:19f505b643228b417c1111a2a536424ddde0db4ef9023b9e04a46ed8a1cb4656", size = 12554647 }, + { url = "https://files.pythonhosted.org/packages/a5/7d/7b461ab0e2404293c0627125bb70ac642c2e8d55bf590f6fce85f508f1b2/ruff-0.9.6-py3-none-win32.whl", hash = "sha256:194d8402bceef1b31164909540a597e0d913c0e4952015a5b40e28c146121b5d", size = 9949214 }, + { url = "https://files.pythonhosted.org/packages/ee/30/c3cee10f915ed75a5c29c1e57311282d1a15855551a64795c1b2bbe5cf37/ruff-0.9.6-py3-none-win_amd64.whl", hash = "sha256:03482d5c09d90d4ee3f40d97578423698ad895c87314c4de39ed2af945633caa", size = 10999914 }, + { url = "https://files.pythonhosted.org/packages/e8/a8/d71f44b93e3aa86ae232af1f2126ca7b95c0f515ec135462b3e1f351441c/ruff-0.9.6-py3-none-win_arm64.whl", hash = "sha256:0e2bb706a2be7ddfea4a4af918562fdc1bcb16df255e5fa595bbd800ce322a5a", size = 10177499 }, ] [[package]] @@ -3470,11 +3618,11 @@ wheels = [ [[package]] name = "sqlglot" -version = "26.4.1" +version = "26.6.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/86/38/a6b01d6f291c09dc4e17f87f131310cdc104a719910eca3878c893fcffa5/sqlglot-26.4.1.tar.gz", hash = "sha256:97cc367e364d0ac00a85ea1a1b7ff0e1a91e9e6e97c4824b34f3bf6b619ab3b0", size = 5315460 } +sdist = { url = "https://files.pythonhosted.org/packages/ec/0e/07d21d8b3e28e2cf8db91978ea4b39e6d60afc32f400616edd976cc68525/sqlglot-26.6.0.tar.gz", hash = "sha256:b73af723ee2d239b0ba544dc71b0f33ff6c99817dadb8994f399c144b2d9d46c", size = 5317429 } wheels = [ - { url = "https://files.pythonhosted.org/packages/88/e4/0d9749dc9b837e47e19e5db12c8ca6b1f4f02c079145a8ab7278d0f5727d/sqlglot-26.4.1-py3-none-any.whl", hash = "sha256:c9dae7b974fdb88f443408ceac817a4627d9e8c33afdb2bb4ebdd87864b0d1ba", size = 447000 }, + { url = "https://files.pythonhosted.org/packages/e0/e6/d4f64343ceca4fc7f802772548eb460a5700733b3f8fddd8c7920f27a947/sqlglot-26.6.0-py3-none-any.whl", hash = "sha256:8c90ebcace2629f7d67c308d9fd148e0906b331357213d9b8905758e67da113f", size = 447607 }, ] [package.optional-dependencies] @@ -3739,32 +3887,32 @@ sdist = { url = "https://files.pythonhosted.org/packages/3c/2d/8946864f716ac82dc [[package]] name = "tiktoken" -version = "0.8.0" +version = "0.9.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "regex" }, { name = "requests" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/37/02/576ff3a6639e755c4f70997b2d315f56d6d71e0d046f4fb64cb81a3fb099/tiktoken-0.8.0.tar.gz", hash = "sha256:9ccbb2740f24542534369c5635cfd9b2b3c2490754a78ac8831d99f89f94eeb2", size = 35107 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/f6/1e/ca48e7bfeeccaf76f3a501bd84db1fa28b3c22c9d1a1f41af9fb7579c5f6/tiktoken-0.8.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:d622d8011e6d6f239297efa42a2657043aaed06c4f68833550cac9e9bc723ef1", size = 1039700 }, - { url = "https://files.pythonhosted.org/packages/8c/f8/f0101d98d661b34534769c3818f5af631e59c36ac6d07268fbfc89e539ce/tiktoken-0.8.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:2efaf6199717b4485031b4d6edb94075e4d79177a172f38dd934d911b588d54a", size = 982413 }, - { url = "https://files.pythonhosted.org/packages/ac/3c/2b95391d9bd520a73830469f80a96e3790e6c0a5ac2444f80f20b4b31051/tiktoken-0.8.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5637e425ce1fc49cf716d88df3092048359a4b3bbb7da762840426e937ada06d", size = 1144242 }, - { url = "https://files.pythonhosted.org/packages/01/c4/c4a4360de845217b6aa9709c15773484b50479f36bb50419c443204e5de9/tiktoken-0.8.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9fb0e352d1dbe15aba082883058b3cce9e48d33101bdaac1eccf66424feb5b47", size = 1176588 }, - { url = "https://files.pythonhosted.org/packages/f8/a3/ef984e976822cd6c2227c854f74d2e60cf4cd6fbfca46251199914746f78/tiktoken-0.8.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:56edfefe896c8f10aba372ab5706b9e3558e78db39dd497c940b47bf228bc419", size = 1237261 }, - { url = "https://files.pythonhosted.org/packages/1e/86/eea2309dc258fb86c7d9b10db536434fc16420feaa3b6113df18b23db7c2/tiktoken-0.8.0-cp311-cp311-win_amd64.whl", hash = "sha256:326624128590def898775b722ccc327e90b073714227175ea8febbc920ac0a99", size = 884537 }, - { url = "https://files.pythonhosted.org/packages/c1/22/34b2e136a6f4af186b6640cbfd6f93400783c9ef6cd550d9eab80628d9de/tiktoken-0.8.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:881839cfeae051b3628d9823b2e56b5cc93a9e2efb435f4cf15f17dc45f21586", size = 1039357 }, - { url = "https://files.pythonhosted.org/packages/04/d2/c793cf49c20f5855fd6ce05d080c0537d7418f22c58e71f392d5e8c8dbf7/tiktoken-0.8.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:fe9399bdc3f29d428f16a2f86c3c8ec20be3eac5f53693ce4980371c3245729b", size = 982616 }, - { url = "https://files.pythonhosted.org/packages/b3/a1/79846e5ef911cd5d75c844de3fa496a10c91b4b5f550aad695c5df153d72/tiktoken-0.8.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9a58deb7075d5b69237a3ff4bb51a726670419db6ea62bdcd8bd80c78497d7ab", size = 1144011 }, - { url = "https://files.pythonhosted.org/packages/26/32/e0e3a859136e95c85a572e4806dc58bf1ddf651108ae8b97d5f3ebe1a244/tiktoken-0.8.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d2908c0d043a7d03ebd80347266b0e58440bdef5564f84f4d29fb235b5df3b04", size = 1175432 }, - { url = "https://files.pythonhosted.org/packages/c7/89/926b66e9025b97e9fbabeaa59048a736fe3c3e4530a204109571104f921c/tiktoken-0.8.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:294440d21a2a51e12d4238e68a5972095534fe9878be57d905c476017bff99fc", size = 1236576 }, - { url = "https://files.pythonhosted.org/packages/45/e2/39d4aa02a52bba73b2cd21ba4533c84425ff8786cc63c511d68c8897376e/tiktoken-0.8.0-cp312-cp312-win_amd64.whl", hash = "sha256:d8f3192733ac4d77977432947d563d7e1b310b96497acd3c196c9bddb36ed9db", size = 883824 }, - { url = "https://files.pythonhosted.org/packages/e3/38/802e79ba0ee5fcbf240cd624143f57744e5d411d2e9d9ad2db70d8395986/tiktoken-0.8.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:02be1666096aff7da6cbd7cdaa8e7917bfed3467cd64b38b1f112e96d3b06a24", size = 1039648 }, - { url = "https://files.pythonhosted.org/packages/b1/da/24cdbfc302c98663fbea66f5866f7fa1048405c7564ab88483aea97c3b1a/tiktoken-0.8.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:c94ff53c5c74b535b2cbf431d907fc13c678bbd009ee633a2aca269a04389f9a", size = 982763 }, - { url = "https://files.pythonhosted.org/packages/e4/f0/0ecf79a279dfa41fc97d00adccf976ecc2556d3c08ef3e25e45eb31f665b/tiktoken-0.8.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6b231f5e8982c245ee3065cd84a4712d64692348bc609d84467c57b4b72dcbc5", size = 1144417 }, - { url = "https://files.pythonhosted.org/packages/ab/d3/155d2d4514f3471a25dc1d6d20549ef254e2aa9bb5b1060809b1d3b03d3a/tiktoken-0.8.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4177faa809bd55f699e88c96d9bb4635d22e3f59d635ba6fd9ffedf7150b9953", size = 1175108 }, - { url = "https://files.pythonhosted.org/packages/19/eb/5989e16821ee8300ef8ee13c16effc20dfc26c777d05fbb6825e3c037b81/tiktoken-0.8.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:5376b6f8dc4753cd81ead935c5f518fa0fbe7e133d9e25f648d8c4dabdd4bad7", size = 1236520 }, - { url = "https://files.pythonhosted.org/packages/40/59/14b20465f1d1cb89cfbc96ec27e5617b2d41c79da12b5e04e96d689be2a7/tiktoken-0.8.0-cp313-cp313-win_amd64.whl", hash = "sha256:18228d624807d66c87acd8f25fc135665617cab220671eb65b50f5d70fa51f69", size = 883849 }, +sdist = { url = "https://files.pythonhosted.org/packages/ea/cf/756fedf6981e82897f2d570dd25fa597eb3f4459068ae0572d7e888cfd6f/tiktoken-0.9.0.tar.gz", hash = "sha256:d02a5ca6a938e0490e1ff957bc48c8b078c88cb83977be1625b1fd8aac792c5d", size = 35991 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/4d/ae/4613a59a2a48e761c5161237fc850eb470b4bb93696db89da51b79a871f1/tiktoken-0.9.0-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:f32cc56168eac4851109e9b5d327637f15fd662aa30dd79f964b7c39fbadd26e", size = 1065987 }, + { url = "https://files.pythonhosted.org/packages/3f/86/55d9d1f5b5a7e1164d0f1538a85529b5fcba2b105f92db3622e5d7de6522/tiktoken-0.9.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:45556bc41241e5294063508caf901bf92ba52d8ef9222023f83d2483a3055348", size = 1009155 }, + { url = "https://files.pythonhosted.org/packages/03/58/01fb6240df083b7c1916d1dcb024e2b761213c95d576e9f780dfb5625a76/tiktoken-0.9.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:03935988a91d6d3216e2ec7c645afbb3d870b37bcb67ada1943ec48678e7ee33", size = 1142898 }, + { url = "https://files.pythonhosted.org/packages/b1/73/41591c525680cd460a6becf56c9b17468d3711b1df242c53d2c7b2183d16/tiktoken-0.9.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8b3d80aad8d2c6b9238fc1a5524542087c52b860b10cbf952429ffb714bc1136", size = 1197535 }, + { url = "https://files.pythonhosted.org/packages/7d/7c/1069f25521c8f01a1a182f362e5c8e0337907fae91b368b7da9c3e39b810/tiktoken-0.9.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:b2a21133be05dc116b1d0372af051cd2c6aa1d2188250c9b553f9fa49301b336", size = 1259548 }, + { url = "https://files.pythonhosted.org/packages/6f/07/c67ad1724b8e14e2b4c8cca04b15da158733ac60136879131db05dda7c30/tiktoken-0.9.0-cp311-cp311-win_amd64.whl", hash = "sha256:11a20e67fdf58b0e2dea7b8654a288e481bb4fc0289d3ad21291f8d0849915fb", size = 893895 }, + { url = "https://files.pythonhosted.org/packages/cf/e5/21ff33ecfa2101c1bb0f9b6df750553bd873b7fb532ce2cb276ff40b197f/tiktoken-0.9.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:e88f121c1c22b726649ce67c089b90ddda8b9662545a8aeb03cfef15967ddd03", size = 1065073 }, + { url = "https://files.pythonhosted.org/packages/8e/03/a95e7b4863ee9ceec1c55983e4cc9558bcfd8f4f80e19c4f8a99642f697d/tiktoken-0.9.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:a6600660f2f72369acb13a57fb3e212434ed38b045fd8cc6cdd74947b4b5d210", size = 1008075 }, + { url = "https://files.pythonhosted.org/packages/40/10/1305bb02a561595088235a513ec73e50b32e74364fef4de519da69bc8010/tiktoken-0.9.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:95e811743b5dfa74f4b227927ed86cbc57cad4df859cb3b643be797914e41794", size = 1140754 }, + { url = "https://files.pythonhosted.org/packages/1b/40/da42522018ca496432ffd02793c3a72a739ac04c3794a4914570c9bb2925/tiktoken-0.9.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:99376e1370d59bcf6935c933cb9ba64adc29033b7e73f5f7569f3aad86552b22", size = 1196678 }, + { url = "https://files.pythonhosted.org/packages/5c/41/1e59dddaae270ba20187ceb8aa52c75b24ffc09f547233991d5fd822838b/tiktoken-0.9.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:badb947c32739fb6ddde173e14885fb3de4d32ab9d8c591cbd013c22b4c31dd2", size = 1259283 }, + { url = "https://files.pythonhosted.org/packages/5b/64/b16003419a1d7728d0d8c0d56a4c24325e7b10a21a9dd1fc0f7115c02f0a/tiktoken-0.9.0-cp312-cp312-win_amd64.whl", hash = "sha256:5a62d7a25225bafed786a524c1b9f0910a1128f4232615bf3f8257a73aaa3b16", size = 894897 }, + { url = "https://files.pythonhosted.org/packages/7a/11/09d936d37f49f4f494ffe660af44acd2d99eb2429d60a57c71318af214e0/tiktoken-0.9.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:2b0e8e05a26eda1249e824156d537015480af7ae222ccb798e5234ae0285dbdb", size = 1064919 }, + { url = "https://files.pythonhosted.org/packages/80/0e/f38ba35713edb8d4197ae602e80837d574244ced7fb1b6070b31c29816e0/tiktoken-0.9.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:27d457f096f87685195eea0165a1807fae87b97b2161fe8c9b1df5bd74ca6f63", size = 1007877 }, + { url = "https://files.pythonhosted.org/packages/fe/82/9197f77421e2a01373e27a79dd36efdd99e6b4115746ecc553318ecafbf0/tiktoken-0.9.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2cf8ded49cddf825390e36dd1ad35cd49589e8161fdcb52aa25f0583e90a3e01", size = 1140095 }, + { url = "https://files.pythonhosted.org/packages/f2/bb/4513da71cac187383541facd0291c4572b03ec23c561de5811781bbd988f/tiktoken-0.9.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cc156cb314119a8bb9748257a2eaebd5cc0753b6cb491d26694ed42fc7cb3139", size = 1195649 }, + { url = "https://files.pythonhosted.org/packages/fa/5c/74e4c137530dd8504e97e3a41729b1103a4ac29036cbfd3250b11fd29451/tiktoken-0.9.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:cd69372e8c9dd761f0ab873112aba55a0e3e506332dd9f7522ca466e817b1b7a", size = 1258465 }, + { url = "https://files.pythonhosted.org/packages/de/a8/8f499c179ec900783ffe133e9aab10044481679bb9aad78436d239eee716/tiktoken-0.9.0-cp313-cp313-win_amd64.whl", hash = "sha256:5ea0edb6f83dc56d794723286215918c1cde03712cbbafa0348b33448faf5b95", size = 894669 }, ] [[package]] @@ -3804,6 +3952,45 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/44/69/d21eb253fa91622da25585d362a874fa4710be600f0ea9446d8d0217cec1/tokenizers-0.21.0-cp39-abi3-win_amd64.whl", hash = "sha256:87841da5a25a3a5f70c102de371db120f41873b854ba65e52bccd57df5a3780c", size = 2389192 }, ] +[[package]] +name = "tomli" +version = "2.2.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/18/87/302344fed471e44a87289cf4967697d07e532f2421fdaf868a303cbae4ff/tomli-2.2.1.tar.gz", hash = "sha256:cd45e1dc79c835ce60f7404ec8119f2eb06d38b1deba146f07ced3bbc44505ff", size = 17175 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/43/ca/75707e6efa2b37c77dadb324ae7d9571cb424e61ea73fad7c56c2d14527f/tomli-2.2.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:678e4fa69e4575eb77d103de3df8a895e1591b48e740211bd1067378c69e8249", size = 131077 }, + { url = "https://files.pythonhosted.org/packages/c7/16/51ae563a8615d472fdbffc43a3f3d46588c264ac4f024f63f01283becfbb/tomli-2.2.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:023aa114dd824ade0100497eb2318602af309e5a55595f76b626d6d9f3b7b0a6", size = 123429 }, + { url = "https://files.pythonhosted.org/packages/f1/dd/4f6cd1e7b160041db83c694abc78e100473c15d54620083dbd5aae7b990e/tomli-2.2.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ece47d672db52ac607a3d9599a9d48dcb2f2f735c6c2d1f34130085bb12b112a", size = 226067 }, + { url = "https://files.pythonhosted.org/packages/a9/6b/c54ede5dc70d648cc6361eaf429304b02f2871a345bbdd51e993d6cdf550/tomli-2.2.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6972ca9c9cc9f0acaa56a8ca1ff51e7af152a9f87fb64623e31d5c83700080ee", size = 236030 }, + { url = "https://files.pythonhosted.org/packages/1f/47/999514fa49cfaf7a92c805a86c3c43f4215621855d151b61c602abb38091/tomli-2.2.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c954d2250168d28797dd4e3ac5cf812a406cd5a92674ee4c8f123c889786aa8e", size = 240898 }, + { url = "https://files.pythonhosted.org/packages/73/41/0a01279a7ae09ee1573b423318e7934674ce06eb33f50936655071d81a24/tomli-2.2.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:8dd28b3e155b80f4d54beb40a441d366adcfe740969820caf156c019fb5c7ec4", size = 229894 }, + { url = "https://files.pythonhosted.org/packages/55/18/5d8bc5b0a0362311ce4d18830a5d28943667599a60d20118074ea1b01bb7/tomli-2.2.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:e59e304978767a54663af13c07b3d1af22ddee3bb2fb0618ca1593e4f593a106", size = 245319 }, + { url = "https://files.pythonhosted.org/packages/92/a3/7ade0576d17f3cdf5ff44d61390d4b3febb8a9fc2b480c75c47ea048c646/tomli-2.2.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:33580bccab0338d00994d7f16f4c4ec25b776af3ffaac1ed74e0b3fc95e885a8", size = 238273 }, + { url = "https://files.pythonhosted.org/packages/72/6f/fa64ef058ac1446a1e51110c375339b3ec6be245af9d14c87c4a6412dd32/tomli-2.2.1-cp311-cp311-win32.whl", hash = "sha256:465af0e0875402f1d226519c9904f37254b3045fc5084697cefb9bdde1ff99ff", size = 98310 }, + { url = "https://files.pythonhosted.org/packages/6a/1c/4a2dcde4a51b81be3530565e92eda625d94dafb46dbeb15069df4caffc34/tomli-2.2.1-cp311-cp311-win_amd64.whl", hash = "sha256:2d0f2fdd22b02c6d81637a3c95f8cd77f995846af7414c5c4b8d0545afa1bc4b", size = 108309 }, + { url = "https://files.pythonhosted.org/packages/52/e1/f8af4c2fcde17500422858155aeb0d7e93477a0d59a98e56cbfe75070fd0/tomli-2.2.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:4a8f6e44de52d5e6c657c9fe83b562f5f4256d8ebbfe4ff922c495620a7f6cea", size = 132762 }, + { url = "https://files.pythonhosted.org/packages/03/b8/152c68bb84fc00396b83e7bbddd5ec0bd3dd409db4195e2a9b3e398ad2e3/tomli-2.2.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8d57ca8095a641b8237d5b079147646153d22552f1c637fd3ba7f4b0b29167a8", size = 123453 }, + { url = "https://files.pythonhosted.org/packages/c8/d6/fc9267af9166f79ac528ff7e8c55c8181ded34eb4b0e93daa767b8841573/tomli-2.2.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4e340144ad7ae1533cb897d406382b4b6fede8890a03738ff1683af800d54192", size = 233486 }, + { url = "https://files.pythonhosted.org/packages/5c/51/51c3f2884d7bab89af25f678447ea7d297b53b5a3b5730a7cb2ef6069f07/tomli-2.2.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:db2b95f9de79181805df90bedc5a5ab4c165e6ec3fe99f970d0e302f384ad222", size = 242349 }, + { url = "https://files.pythonhosted.org/packages/ab/df/bfa89627d13a5cc22402e441e8a931ef2108403db390ff3345c05253935e/tomli-2.2.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:40741994320b232529c802f8bc86da4e1aa9f413db394617b9a256ae0f9a7f77", size = 252159 }, + { url = "https://files.pythonhosted.org/packages/9e/6e/fa2b916dced65763a5168c6ccb91066f7639bdc88b48adda990db10c8c0b/tomli-2.2.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:400e720fe168c0f8521520190686ef8ef033fb19fc493da09779e592861b78c6", size = 237243 }, + { url = "https://files.pythonhosted.org/packages/b4/04/885d3b1f650e1153cbb93a6a9782c58a972b94ea4483ae4ac5cedd5e4a09/tomli-2.2.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:02abe224de6ae62c19f090f68da4e27b10af2b93213d36cf44e6e1c5abd19fdd", size = 259645 }, + { url = "https://files.pythonhosted.org/packages/9c/de/6b432d66e986e501586da298e28ebeefd3edc2c780f3ad73d22566034239/tomli-2.2.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:b82ebccc8c8a36f2094e969560a1b836758481f3dc360ce9a3277c65f374285e", size = 244584 }, + { url = "https://files.pythonhosted.org/packages/1c/9a/47c0449b98e6e7d1be6cbac02f93dd79003234ddc4aaab6ba07a9a7482e2/tomli-2.2.1-cp312-cp312-win32.whl", hash = "sha256:889f80ef92701b9dbb224e49ec87c645ce5df3fa2cc548664eb8a25e03127a98", size = 98875 }, + { url = "https://files.pythonhosted.org/packages/ef/60/9b9638f081c6f1261e2688bd487625cd1e660d0a85bd469e91d8db969734/tomli-2.2.1-cp312-cp312-win_amd64.whl", hash = "sha256:7fc04e92e1d624a4a63c76474610238576942d6b8950a2d7f908a340494e67e4", size = 109418 }, + { url = "https://files.pythonhosted.org/packages/04/90/2ee5f2e0362cb8a0b6499dc44f4d7d48f8fff06d28ba46e6f1eaa61a1388/tomli-2.2.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:f4039b9cbc3048b2416cc57ab3bda989a6fcf9b36cf8937f01a6e731b64f80d7", size = 132708 }, + { url = "https://files.pythonhosted.org/packages/c0/ec/46b4108816de6b385141f082ba99e315501ccd0a2ea23db4a100dd3990ea/tomli-2.2.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:286f0ca2ffeeb5b9bd4fcc8d6c330534323ec51b2f52da063b11c502da16f30c", size = 123582 }, + { url = "https://files.pythonhosted.org/packages/a0/bd/b470466d0137b37b68d24556c38a0cc819e8febe392d5b199dcd7f578365/tomli-2.2.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a92ef1a44547e894e2a17d24e7557a5e85a9e1d0048b0b5e7541f76c5032cb13", size = 232543 }, + { url = "https://files.pythonhosted.org/packages/d9/e5/82e80ff3b751373f7cead2815bcbe2d51c895b3c990686741a8e56ec42ab/tomli-2.2.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9316dc65bed1684c9a98ee68759ceaed29d229e985297003e494aa825ebb0281", size = 241691 }, + { url = "https://files.pythonhosted.org/packages/05/7e/2a110bc2713557d6a1bfb06af23dd01e7dde52b6ee7dadc589868f9abfac/tomli-2.2.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e85e99945e688e32d5a35c1ff38ed0b3f41f43fad8df0bdf79f72b2ba7bc5272", size = 251170 }, + { url = "https://files.pythonhosted.org/packages/64/7b/22d713946efe00e0adbcdfd6d1aa119ae03fd0b60ebed51ebb3fa9f5a2e5/tomli-2.2.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:ac065718db92ca818f8d6141b5f66369833d4a80a9d74435a268c52bdfa73140", size = 236530 }, + { url = "https://files.pythonhosted.org/packages/38/31/3a76f67da4b0cf37b742ca76beaf819dca0ebef26d78fc794a576e08accf/tomli-2.2.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:d920f33822747519673ee656a4b6ac33e382eca9d331c87770faa3eef562aeb2", size = 258666 }, + { url = "https://files.pythonhosted.org/packages/07/10/5af1293da642aded87e8a988753945d0cf7e00a9452d3911dd3bb354c9e2/tomli-2.2.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:a198f10c4d1b1375d7687bc25294306e551bf1abfa4eace6650070a5c1ae2744", size = 243954 }, + { url = "https://files.pythonhosted.org/packages/5b/b9/1ed31d167be802da0fc95020d04cd27b7d7065cc6fbefdd2f9186f60d7bd/tomli-2.2.1-cp313-cp313-win32.whl", hash = "sha256:d3f5614314d758649ab2ab3a62d4f2004c825922f9e370b29416484086b264ec", size = 98724 }, + { url = "https://files.pythonhosted.org/packages/c7/32/b0963458706accd9afcfeb867c0f9175a741bf7b19cd424230714d722198/tomli-2.2.1-cp313-cp313-win_amd64.whl", hash = "sha256:a38aa0308e754b0e3c67e344754dff64999ff9b513e691d0e786265c93583c69", size = 109383 }, + { url = "https://files.pythonhosted.org/packages/6e/c2/61d3e0f47e2b74ef40a68b9e6ad5984f6241a942f7cd3bbfbdbd03861ea9/tomli-2.2.1-py3-none-any.whl", hash = "sha256:cb55c73c5f4408779d0cf3eef9f762b9c9f147a77de7b258bef0a5628adc85cc", size = 14257 }, +] + [[package]] name = "tomlkit" version = "0.13.2" @@ -3914,16 +4101,16 @@ wheels = [ [[package]] name = "virtualenv" -version = "20.29.1" +version = "20.29.2" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "distlib" }, { name = "filelock" }, { name = "platformdirs" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/a7/ca/f23dcb02e161a9bba141b1c08aa50e8da6ea25e6d780528f1d385a3efe25/virtualenv-20.29.1.tar.gz", hash = "sha256:b8b8970138d32fb606192cb97f6cd4bb644fa486be9308fb9b63f81091b5dc35", size = 7658028 } +sdist = { url = "https://files.pythonhosted.org/packages/f1/88/dacc875dd54a8acadb4bcbfd4e3e86df8be75527116c91d8f9784f5e9cab/virtualenv-20.29.2.tar.gz", hash = "sha256:fdaabebf6d03b5ba83ae0a02cfe96f48a716f4fae556461d180825866f75b728", size = 4320272 } wheels = [ - { url = "https://files.pythonhosted.org/packages/89/9b/599bcfc7064fbe5740919e78c5df18e5dceb0887e676256a1061bb5ae232/virtualenv-20.29.1-py3-none-any.whl", hash = "sha256:4e4cb403c0b0da39e13b46b1b2476e505cb0046b25f242bee80f62bf990b2779", size = 4282379 }, + { url = "https://files.pythonhosted.org/packages/93/fa/849483d56773ae29740ae70043ad88e068f98a6401aa819b5d6bee604683/virtualenv-20.29.2-py3-none-any.whl", hash = "sha256:febddfc3d1ea571bdb1dc0f98d7b45d24def7428214d4fb73cc486c9568cce6a", size = 4301478 }, ] [[package]]