mcli/mcli-hf-eval.yaml

integrations:
- integration_type: git_repo
  git_repo: mosaicml/llm-foundry
  git_branch: v0.16.0
  # git_commit:  # OR use your commit hash
  pip_install: .[gpu]
  ssh_clone: false  # Should be true if using a private repo

command: |
  cd llm-foundry/scripts
  composer eval/eval.py /mnt/config/parameters.yaml

# Mosaic Cloud will use run_name (with a unique suffix) to populate the env var $RUN_NAME
run_name: mpt-eval
gpu_num: 8
# gpu_type:
# cluster:  # replace with your cluster here!

image: mosaicml/llm-foundry:2.5.1_cu124-latest

# The below is injected as a YAML file: /mnt/config/parameters.yaml
parameters:
  dist_timeout: 6000
  seed: 1
  max_seq_len: 1024
  device_eval_batch_size: 4
  precision: amp_fp16

  models:
  -
    model_name: meta-llama/Llama-3.1-70B-Instruct
    # Tokenizer
    tokenizer:
      name: meta-llama/Llama-3.1-70B-Instruct
      kwargs:
        model_max_length: ${max_seq_len}

    model:
      name: hf_causal_lm
      pretrained_model_name_or_path: meta-llama/Llama-3.1-70B-Instruct
      init_device: mixed
      pretrained: true
      # Note: you must have set the HF_TOKEN environment variable and have access to the llama2 models
      use_auth_token: true

  # FSDP config for model sharding
  fsdp_config:
    sharding_strategy: FULL_SHARD
    mixed_precision: FULL
    forward_prefetch: True
    limit_all_gathers: True


  icl_tasks: "eval/yamls/tasks_v0.3.yaml"
  eval_gauntlet: "eval/yamls/eval_gauntlet_v0.3.yaml"