From d98186d6e29439d677ce3c4cb70aa38668e5f59e Mon Sep 17 00:00:00 2001 From: Jambay Kinley Date: Wed, 29 Jan 2025 20:19:17 -0800 Subject: [PATCH] CLI: Expose chat template in data options (#1582) ## Describe your changes Provide `--use_chat_template` option in the CLI to use the default chat template instead of text_template or text_field. ## Checklist before requesting a review - [ ] Add unit tests for this change. - [ ] Make sure all tests can pass. - [ ] Update documents if necessary. - [ ] Lint and apply fixes to your code by running `lintrunner -a` - [ ] Is this a user-facing change? If yes, give a description of this change to be included in the release notes. - [ ] Is this PR including examples changes? If yes, please remember to update [example documentation](https://github.com/microsoft/Olive/blob/main/docs/source/examples.md) in a follow-up PR. ## (Optional) Issue link --- olive/cli/base.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/olive/cli/base.py b/olive/cli/base.py index 55f2fdaaf..9ccbca71f 100644 --- a/olive/cli/base.py +++ b/olive/cli/base.py @@ -481,6 +481,7 @@ def add_dataset_options(sub_parser, required=True, include_train=True, include_e type=unescaped_str, help=r"Template to generate text field from. E.g. '### Question: {prompt} \n### Answer: {response}'", ) + text_group.add_argument("--use_chat_template", action="store_true", help="Use chat template for text field.") dataset_group.add_argument( "--max_seq_len", type=int, @@ -526,6 +527,7 @@ def update_dataset_options(args, config): ), ((*preprocess_key, "text_cols"), args.text_field), ((*preprocess_key, "text_template"), args.text_template), + ((*preprocess_key, "chat_template"), args.use_chat_template), ((*preprocess_key, "max_seq_len"), args.max_seq_len), ((*preprocess_key, "add_special_tokens"), args.add_special_tokens), ((*preprocess_key, "max_samples"), args.max_samples),