Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add new parsing mode and prompt parameters #622

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
71 changes: 53 additions & 18 deletions llama_cloud_services/parse/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,14 +163,7 @@ class LlamaParse(BasePydanticReader):
default=None,
description="The top margin of the bounding box to use to extract text from documents expressed as a float between 0 and 1 representing the percentage of the page height.",
)
complemental_formatting_instruction: Optional[str] = Field(
default=None,
description="The complemental formatting instruction for the parser. Tell llamaParse how some thing should to be formatted, while retaining the markdown output.",
)
content_guideline_instruction: Optional[str] = Field(
default=None,
description="The content guideline for the parser. Tell LlamaParse how the content should be changed / transformed.",
)

continuous_mode: Optional[bool] = Field(
default=False,
description="Parse documents continuously, leading to better results on documents where tables span across two pages.",
Expand Down Expand Up @@ -203,10 +196,7 @@ class LlamaParse(BasePydanticReader):
default=False,
description="Note: Non compatible with gpt-4o. If set to true, the parser will use a faster mode to extract text from documents. This mode will skip OCR of images, and table/heading reconstruction.",
)
formatting_instruction: Optional[str] = Field(
default=None,
description="The Formatting instruction for the parser. Override default llamaParse behavior. In most case you want to use complemental_formatting_instruction instead.",
)

guess_xlsx_sheet_names: Optional[bool] = Field(
default=False,
description="Whether to guess the sheet names of the xlsx file.",
Expand Down Expand Up @@ -282,6 +272,10 @@ class LlamaParse(BasePydanticReader):
default=None,
description="A templated suffix to add to the beginning of each page. If it contain `{page_number}`, it will be replaced by the page number.",
)
parsing_mode: Optional[str] = Field(
default=None,
description="The parsing mode to use, see ParsingMode enum for possible values ",
)
premium_mode: Optional[bool] = Field(
default=False,
description="Use our best parser mode if set to True.",
Expand Down Expand Up @@ -327,6 +321,14 @@ class LlamaParse(BasePydanticReader):
default=None,
description="The named JSON Schema to use to structure the output of the parsing job. For convenience / testing, LlamaParse provides a few named JSON Schema that can be used directly. Use 'imFeelingLucky' to let llamaParse dream the schema.",
)
system_prompt: Optional[str] = Field(
default=None,
description="The system prompt. Replace llamaParse default system prompt, may impact accuracy",
)
system_prompt_append: Optional[str] = Field(
default=None,
description="String to append to default system prompt.",
)
take_screenshot: Optional[bool] = Field(
default=False,
description="Whether to take screenshot of each page of the document.",
Expand All @@ -335,9 +337,9 @@ class LlamaParse(BasePydanticReader):
default=None,
description="The target pages to extract text from documents. Describe as a comma separated list of page numbers. The first page of the document is page 0",
)
use_vendor_multimodal_model: Optional[bool] = Field(
default=False,
description="Whether to use the vendor multimodal API.",
user_prompt: Optional[str] = Field(
default=None,
description="The user prompt. Replace llamaParse default user prompt",
)
vendor_multimodal_api_key: Optional[str] = Field(
default=None,
Expand All @@ -357,6 +359,18 @@ class LlamaParse(BasePydanticReader):
default=None,
description="The bounding box to use to extract text from documents describe as a string containing the bounding box margins",
)
complemental_formatting_instruction: Optional[str] = Field(
default=None,
description="The complemental formatting instruction for the parser. Tell llamaParse how some thing should to be formatted, while retaining the markdown output.",
)
content_guideline_instruction: Optional[str] = Field(
default=None,
description="The content guideline for the parser. Tell LlamaParse how the content should be changed / transformed.",
)
formatting_instruction: Optional[str] = Field(
default=None,
description="The Formatting instruction for the parser. Override default llamaParse behavior. In most case you want to use complemental_formatting_instruction instead.",
)
gpt4o_mode: Optional[bool] = Field(
default=False,
description="Whether to use gpt-4o extract text from documents.",
Expand All @@ -373,6 +387,11 @@ class LlamaParse(BasePydanticReader):
default="", description="The parsing instruction for the parser."
)

use_vendor_multimodal_model: Optional[bool] = Field(
default=False,
description="Whether to use the vendor multimodal API.",
)

@field_validator("api_key", mode="before", check_fields=True)
@classmethod
def validate_api_key(cls, v: str) -> str:
Expand Down Expand Up @@ -552,11 +571,17 @@ async def _create_job(
data["bbox_top"] = self.bbox_top

if self.complemental_formatting_instruction:
print(
"WARNING: complemental_formatting_instruction is deprecated and may be remove in a future release. Use system_prompt, system_prompt_append or user_prompt instead."
)
data[
"complemental_formatting_instruction"
] = self.complemental_formatting_instruction

if self.content_guideline_instruction:
print(
"WARNING: content_guideline_instruction is deprecated and may be remove in a future release. Use system_prompt, system_prompt_append or user_prompt instead."
)
data["content_guideline_instruction"] = self.content_guideline_instruction

if self.continuous_mode:
Expand Down Expand Up @@ -584,6 +609,9 @@ async def _create_job(
data["fast_mode"] = self.fast_mode

if self.formatting_instruction:
print(
"WARNING: formatting_instruction is deprecated and may be remove in a future release. Use system_prompt, system_prompt_append or user_prompt instead."
)
data["formatting_instruction"] = self.formatting_instruction

if self.guess_xlsx_sheet_names:
Expand Down Expand Up @@ -623,6 +651,9 @@ async def _create_job(
data["invalidate_cache"] = self.invalidate_cache

if self.is_formatting_instruction:
print(
"WARNING: formatting_instruction is deprecated and may be remove in a future release. Use system_prompt, system_prompt_append or user_prompt instead."
)
data["is_formatting_instruction"] = self.is_formatting_instruction

if self.job_timeout_extra_time_per_page_in_seconds is not None:
Expand Down Expand Up @@ -664,7 +695,7 @@ async def _create_job(

if self.parsing_instruction:
print(
"WARNING: parsing_instruction is deprecated. Use complemental_formatting_instruction or content_guideline_instruction instead."
"WARNING: parsing_instruction is deprecated. Use system_prompt, system_prompt_append or user_prompt instead."
)
data["parsing_instruction"] = self.parsing_instruction

Expand Down Expand Up @@ -699,13 +730,17 @@ async def _create_job(
data[
"structured_output_json_schema_name"
] = self.structured_output_json_schema_name

if self.system_prompt is not None:
data["system_prompt"] = self.system_prompt
if self.system_prompt_append is not None:
data["system_prompt_append"] = self.system_prompt_append
if self.take_screenshot:
data["take_screenshot"] = self.take_screenshot

if self.target_pages is not None:
data["target_pages"] = self.target_pages

if self.user_prompt is not None:
data["user_prompt"] = self.user_prompt
if self.use_vendor_multimodal_model:
data["use_vendor_multimodal_model"] = self.use_vendor_multimodal_model

Expand Down
10 changes: 10 additions & 0 deletions llama_cloud_services/parse/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,16 @@ class ResultType(str, Enum):
STRUCTURED = "structured"


class ParsingMode(str, Enum):
"""The parsing mode for the parser."""

parse_page_without_llm = "parse_page_without_llm"
parse_page_with_llm = "parse_page_with_llm"
parse_page_with_lvm = "parse_page_with_lvm"
parse_page_with_agent = "parse_page_with_agent"
parse_document_with_llm = "parse_document_with_llm"


class Language(str, Enum):
BAZA = "abq"
ADYGHE = "ady"
Expand Down
Loading