Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bump Transformer v4.49.0 #1735

Merged
merged 53 commits into from
Mar 12, 2025
Merged
Show file tree
Hide file tree
Changes from 40 commits
Commits
Show all changes
53 commits
Select commit Hold shift + click to select a range
a0a1eaa
bump transformer
Mar 5, 2025
24a7ddc
test
Mar 6, 2025
9d50606
Merge branch 'main' into bump-transformer-v4.49.0
KuuCi Mar 6, 2025
f7a3286
test
Mar 6, 2025
9497b7d
cpu tests
Mar 6, 2025
6a36a51
typo
Mar 6, 2025
0793be0
cpu test
Mar 6, 2025
fabcd99
rm debug code
Mar 6, 2025
b07e13d
rm debug code
Mar 6, 2025
a6f8fed
gpu test 1
Mar 6, 2025
fb67ad5
clip
Mar 6, 2025
c707e8e
fix
Mar 7, 2025
b222487
test
Mar 7, 2025
c1310de
test
Mar 7, 2025
c0a56fc
test
Mar 7, 2025
fa642c6
fix
Mar 7, 2025
eecdf6b
fix
Mar 7, 2025
7fcd88e
fix
Mar 7, 2025
7fb5d46
test
Mar 7, 2025
c022016
test
Mar 7, 2025
4f94d8e
test
Mar 7, 2025
52c2205
test
Mar 8, 2025
8b93963
test
Mar 9, 2025
83bcc90
flash
Mar 10, 2025
b5282f7
typo
Mar 10, 2025
dae4572
precommit
Mar 10, 2025
8bcce0a
test
Mar 10, 2025
6d2d5cb
precisionchanges
Mar 10, 2025
26dc16e
pyright
Mar 10, 2025
f0d6650
clean check_hf_tokenizer_equivalence
Mar 10, 2025
78b2d59
precommit
Mar 11, 2025
80a909b
more patches
Mar 11, 2025
834c668
test
Mar 11, 2025
2d5f897
precommit
Mar 11, 2025
7525f6f
additional precision tests
Mar 11, 2025
0036564
clean
Mar 11, 2025
bd1488b
clean
Mar 11, 2025
567f882
rm duplicate
Mar 11, 2025
0c4ba54
update yamls
Mar 11, 2025
b616b8f
no longer support mpt :(
Mar 11, 2025
fc11ced
llama 3.1
Mar 11, 2025
c160580
update error
Mar 11, 2025
1884078
precommit
Mar 11, 2025
363566c
composer
Mar 11, 2025
0af94af
Update llmfoundry/models/hf/hf_base.py
dakinggg Mar 11, 2025
6ea80d3
8b, lol
Mar 11, 2025
7203209
precommit
Mar 11, 2025
3a2ec93
typo
Mar 11, 2025
348b0bf
update path
Mar 11, 2025
f0aa50b
update docs
Mar 11, 2025
4d05bc5
revert test
Mar 12, 2025
59d65fe
fix test
Mar 12, 2025
61e2ede
fix
Mar 12, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion llmfoundry/data/text_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,7 @@ def __init__(

# How to tokenize a text sample to a token sample
def _tokenize(self, text_sample: Mapping) -> dict[str, list[int]]:
if self.tokenizer._pad_token is None:
if self.tokenizer.pad_token is None:
# Some tokenizers (e.g. GPT2 tokenizer) have no padding token which causes bugs
raise RuntimeError(
'If tokenizing on-the-fly, tokenizer must have a pad_token_id',
Expand Down
2 changes: 1 addition & 1 deletion llmfoundry/data/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -211,7 +211,7 @@ def get_text_collator(
collate_fn = transformers.DataCollatorForLanguageModeling(
tokenizer=tokenizer,
mlm=mlm_probability is not None,
mlm_probability=mlm_probability,
mlm_probability=mlm_probability if mlm_probability else 0,
)

if (eos_token_id is not None) or (bos_token_id is not None):
Expand Down
6 changes: 2 additions & 4 deletions llmfoundry/models/hf/hf_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -228,13 +228,11 @@ def build_inner_model(
Returns:
Union[PreTrainedModel, 'PeftModel']: The built inner model.
"""
if not trust_remote_code and pretrained_model_name_or_path.startswith(
if pretrained_model_name_or_path.startswith(
'mosaicml/mpt',
):
raise ValueError(
'trust_remote_code must be set to True for MPT models. Without this, the MPT model code will come from the transformers library, '
+
'which is significantly slower and not compatible with the LLM foundry training code, rather than the code release by MosaicML.',
'MPT models are no longer supported by LLM Foundry due to transformer version v4.49.0 incompatibilities'
)
# Resolve "mixed" init device to either "cpu" or "meta"
resolved_init_device = hf_get_init_device(init_device)
Expand Down
9 changes: 5 additions & 4 deletions mcli/mcli-hf-eval.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -28,19 +28,20 @@ parameters:

models:
-
model_name: mosaicml/mpt-7b-instruct
model_name: meta-llama/Llama-2-7b
# Tokenizer
tokenizer:
name: EleutherAI/gpt-neox-20b
name: meta-llama/Llama-2-7b
kwargs:
model_max_length: ${max_seq_len}

model:
name: hf_causal_lm
pretrained_model_name_or_path: mosaicml/mpt-7b-instruct
pretrained_model_name_or_path: meta-llama/Llama-2-7b
init_device: mixed
pretrained: true
use_auth_token: false
# Note: you must have set the HF_TOKEN environment variable and have access to the llama2 models
use_auth_token: true

# FSDP config for model sharding
fsdp_config:
Expand Down
7 changes: 4 additions & 3 deletions scripts/eval/yamls/hf_eval.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,16 +23,17 @@ models:
model_max_length: ${variables.max_seq_len}
# # if you are evaluating more than one model, list them all as YAML blocks without variable interpolation
# -
# model_name: mosaicml/mpt-7b
# model_name: meta-llama/Llama-2-7b
# model:
# name: hf_causal_lm
# pretrained_model_name_or_path: mosaicml/mpt-7b
# pretrained_model_name_or_path: meta-llama/Llama-2-7b
# init_device: cpu
# pretrained: true
# config_overrides:
# max_seq_len: ${variables.max_seq_len}
# use_auth_token: true
# tokenizer:
# name: mosaicml/mpt-7b
# name: meta-llama/Llama-2-7b
# kwargs:
# model_max_length: ${variables.max_seq_len}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,18 +7,20 @@ run_name: # If left blank, will be read from env var $COMPOSER_RUN_NAME
# Model
model:
name: hf_causal_lm
pretrained_model_name_or_path: mosaicml/mpt-7b
pretrained_model_name_or_path: meta-llama/Llama-2-7b
pretrained: true # false: only use the architecture; true: initialize with pretrained weights
config_overrides:
max_seq_len: ${max_seq_len}
attn_config:
attn_impl: flash
# Set this to `true` if using `train_loader.dataset.packing_ratio` below
attn_uses_sequence_id: false
# Note: you must have set the HF_TOKEN environment variable and have access to the llama2 models
use_auth_token: true

# Tokenizer
tokenizer:
name: mosaicml/mpt-7b
name: meta-llama/Llama-2-7b
kwargs:
model_max_length: ${max_seq_len}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,17 +12,19 @@ run_name: ${variables.run_name}
model:
name: hf_causal_lm
pretrained: true
pretrained_model_name_or_path: mosaicml/mpt-7b
pretrained_model_name_or_path: meta-llama/Llama-2-7b
config_overrides:
max_seq_len: ${variables.max_seq_len}
attn_config:
attn_impl: flash
# Set this to `true` if using `train_loader.dataset.packing_ratio` below
attn_uses_sequence_id: false
# Note: you must have set the HF_TOKEN environment variable and have access to the llama2 models
use_auth_token: true

# Tokenizer
tokenizer:
name: mosaicml/mpt-7b
name: meta-llama/Llama-2-7b
kwargs:
model_max_length: ${variables.max_seq_len}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,16 +15,18 @@ run_name: # If left blank, will be read from env var $COMPOSER_RUN_NAME
model:
name: hf_causal_lm
pretrained: true
pretrained_model_name_or_path: mosaicml/mpt-7b
pretrained_model_name_or_path: meta-llama/Llama-2-7b
config_overrides:
max_seq_len: ${variables.max_seq_len}
attn_config:
attn_impl: flash
attn_uses_sequence_id: false
# Note: you must have set the HF_TOKEN environment variable and have access to the llama2 models
use_auth_token: true

# Tokenizer
tokenizer:
name: mosaicml/mpt-7b
name: meta-llama/Llama-2-7b
kwargs:
model_max_length: ${variables.max_seq_len}

Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@
'mosaicml[libcloud,wandb,oci,gcs,mlflow]>=0.29.0,<0.30',
'mlflow>=2.14.1,<2.19',
'accelerate>=0.25,<1.4', # for HF inference `device_map`
'transformers>=4.43.2,<4.47',
'transformers>=v4.49.0,<4.50',
'mosaicml-streaming>=0.11.0,<0.12',
'torch>=2.5.1,<2.5.2',
'datasets>=3.3.2,<3.4',
Expand Down
126 changes: 109 additions & 17 deletions tests/a_scripts/inference/test_convert_composer_to_hf.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
# SPDX-License-Identifier: Apache-2.0

import contextlib
import glob
import json
import math
import os
Expand Down Expand Up @@ -220,11 +221,43 @@ def check_hf_tokenizer_equivalence(
if attr1 is None and attr2 is None:
continue

attr_value1 = attr1 if isinstance(attr1, str) else attr1.content
attr_value2 = attr2 if isinstance(attr2, str) else attr2.content
# Handle the case when the attribute is an AddedToken object
attr_value1 = attr1 if isinstance(
attr1,
str,
) else attr1.content if hasattr(attr1, 'content') else str(attr1)
attr_value2 = attr2 if isinstance(
attr2,
str,
) else attr2.content if hasattr(attr2, 'content') else str(attr2)
assert attr_value1 == attr_value2

assert tokenizer1.__dict__ == tokenizer2.__dict__
# Ignore 'extra_special_tokens' as it was added by the transformers library during save/load
if 'extra_special_tokens' in tokenizer2.init_kwargs and 'extra_special_tokens' not in tokenizer1.init_kwargs:
tokenizer2.init_kwargs.pop('extra_special_tokens')

# Process special tokens map and added tokens decoder
for dict_map_key in ['_special_tokens_map', '_added_tokens_decoder']:
if dict_map_key in tokenizer1.__dict__ and dict_map_key in tokenizer2.__dict__:
# Get the nested dictionaries
token_map1 = tokenizer1.__dict__[dict_map_key]
token_map2 = tokenizer2.__dict__[dict_map_key]

# Process values in the first tokenizer's map
for key in list(token_map1.keys()):
if hasattr(token_map1[key], 'content'):
token_map1[key] = token_map1[key].content

# Process values in the second tokenizer's map
for key in list(token_map2.keys()):
if hasattr(token_map2[key], 'content'):
token_map2[key] = token_map2[key].content

# Final comparison of dictionaries
t1_dict = tokenizer1.__dict__
t2_dict = tokenizer2.__dict__

assert t1_dict == t2_dict, 'Tokenizer dictionaries are not equal'


def remove_moe_world_size(config: MPTConfig):
Expand Down Expand Up @@ -274,6 +307,52 @@ def check_hf_model_equivalence(
assert torch.equal(p1.cpu(), p2.cpu())


def check_safetensors_precision(
model_path: str,
model: torch.nn.Module,
expected_precision: torch.dtype,
tolerance: float = 0.2,
):
"""Verify that the safetensors files in model_path have the expected size.

Args:
model_path: Path to the directory containing the safetensors files
model: The original model to count parameters from
expected_precision: The expected precision (torch.float32, torch.bfloat16, etc.)
tolerance: Allowed deviation from expected file size (as a ratio)

Returns:
bool: True if the safetensors files have the expected size, False otherwise
"""
total_params = sum(p.numel() for p in model.parameters())
# Determine expected bytes per parameter based on precision
bytes_per_param = {
torch.float32: 4,
torch.float16: 2,
torch.bfloat16: 2,
torch.int8: 1,
}.get(expected_precision)
assert bytes_per_param

expected_size = total_params * bytes_per_param

safetensors_files = glob.glob(os.path.join(model_path, '*.safetensors'))
if not safetensors_files:
# If no safetensors files found, check pytorch_model.bin
safetensors_files = glob.glob(
os.path.join(model_path, 'pytorch_model*.bin'),
)

if not safetensors_files:
return False

total_size = sum(os.path.getsize(f) for f in safetensors_files)
size_ratio = total_size / expected_size

is_correct_size = (1.0 - tolerance) <= size_ratio <= (1.0 + tolerance)
return is_correct_size


# TODO(GRT-2435): Change to fixture
def delete_transformers_cache():
# Only delete the files on local rank 0, otherwise race conditions are created
Expand Down Expand Up @@ -578,15 +657,27 @@ def test_huggingface_conversion_callback_interval(
assert len(normal_checkpoints) == expected_normal_checkpoints
assert len(huggingface_checkpoints) == expected_hf_checkpoints

# Get path to the last checkpoint
checkpoint_path = os.path.join(
tmp_path,
'checkpoints',
'huggingface',
f'ba{batches_per_epoch}',
)

# Verify the safetensors file size matches the expected precision
is_size_correct = check_safetensors_precision(
model_path=checkpoint_path,
model=trainer.state.model.model,
expected_precision=precision,
)
assert is_size_correct, f"Safetensors file size doesn't match expected precision {precision_str}"

# Load the last huggingface checkpoint
loaded_model = transformers.AutoModelForCausalLM.from_pretrained(
os.path.join(
tmp_path,
'checkpoints',
'huggingface',
f'ba{batches_per_epoch}',
),
checkpoint_path,
trust_remote_code=True,
torch_dtype=precision,
)

# Check that the loaded model has the correct precision, and then set it back
Expand All @@ -603,15 +694,16 @@ def test_huggingface_conversion_callback_interval(
loaded_model.config.init_device = original_model.model.config.init_device

loaded_tokenizer = transformers.AutoTokenizer.from_pretrained(
os.path.join(
tmp_path,
'checkpoints',
'huggingface',
f'ba{batches_per_epoch}',
),
checkpoint_path,
trust_remote_code=True,
)

# Also check that at least one parameter has the expected precision
for param_name, param in loaded_model.named_parameters():
assert param.dtype == precision, \
f'Parameter {param_name} has dtype {param.dtype}, expected {precision}'
break

check_hf_model_equivalence(
trainer.state.model.model.to(precision),
loaded_model,
Expand Down Expand Up @@ -873,6 +965,7 @@ def _assert_checkpoint_equivalence(
loaded_model = transformers.AutoModelForCausalLM.from_pretrained(
checkpoint_path,
trust_remote_code=True,
torch_dtype=precision,
)

# Check that the loaded model has the correct precision, and then set it back
Expand Down Expand Up @@ -1426,7 +1519,6 @@ def test_mptmoe_huggingface_conversion_callback(
device_batch_size = 1
dataset_size = 2
precision_str = 'float32'
precision = torch.float32
batches_per_epoch = math.ceil(dataset_size / (device_batch_size * 2))

checkpointer_callback = HuggingFaceCheckpointer(
Expand Down Expand Up @@ -1617,7 +1709,7 @@ def test_mptmoe_huggingface_conversion_callback(

# Check that the loaded model has the correct precision, and then set it back
# to the original for the equivalence check
assert loaded_model.config.torch_dtype == precision
assert loaded_model.config.torch_dtype == precision_str
loaded_model.config.torch_dtype = original_model.model.config.torch_dtype

loaded_tokenizer = transformers.AutoTokenizer.from_pretrained(
Expand Down
Loading
Loading