Skip to content

Commit

Permalink
Merge pull request #6713 from oobabooga/dev
Browse files Browse the repository at this point in the history
Merge dev branch
  • Loading branch information
oobabooga authored Jan 29, 2025
2 parents a1c353a + b614ea6 commit 9ac4d81
Show file tree
Hide file tree
Showing 24 changed files with 359 additions and 226 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
.DS_Store
.eslintrc.js
.idea
.installer_state.json
.venv
venv
.envrc
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -380,7 +380,7 @@ text-generation-webui
│   │   └── tokenizer.model
```

In both cases, you can use the "Model" tab of the UI to download the model from Hugging Face automatically. It is also possible to download it via the command-line with
In both cases, you can use the "Model" tab of the UI to download the model from Hugging Face automatically. It is also possible to download it via the command-line with:

```
python download-model.py organization/model
Expand Down
10 changes: 10 additions & 0 deletions css/main.css
Original file line number Diff line number Diff line change
Expand Up @@ -1259,6 +1259,16 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* {
left: 25px;
}

.footer-button.footer-continue-button {
bottom: -23px;
left: 50px;
}

.footer-button.footer-remove-button {
bottom: -23px;
left: 75px;
}

.message:hover .footer-button,
.user-message:hover .footer-button,
.assistant-message:hover .footer-button {
Expand Down
32 changes: 24 additions & 8 deletions docs/12 - OpenAI API.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ Add `--api` to your command-line flags.
* To create a public Cloudflare URL, add the `--public-api` flag.
* To listen on your local network, add the `--listen` flag.
* To change the port, which is 5000 by default, use `--api-port 1234` (change 1234 to your desired port number).
* To use SSL, add `--ssl-keyfile key.pem --ssl-certfile cert.pem`. Note that it doesn't work with `--public-api`.
* To use SSL, add `--ssl-keyfile key.pem --ssl-certfile cert.pem`. ⚠️ **Note**: this doesn't work with `--public-api` since Cloudflare already uses HTTPS by default.
* To use an API key for authentication, add `--api-key yourkey`.

### Examples
Expand Down Expand Up @@ -51,8 +51,7 @@ curl http://127.0.0.1:5000/v1/chat/completions \
"content": "Hello!"
}
],
"mode": "instruct",
"instruction_template": "Alpaca"
"mode": "instruct"
}'
```

Expand Down Expand Up @@ -86,7 +85,6 @@ curl http://127.0.0.1:5000/v1/chat/completions \
}
],
"mode": "instruct",
"instruction_template": "Alpaca",
"stream": true
}'
```
Expand Down Expand Up @@ -131,9 +129,6 @@ curl -k http://127.0.0.1:5000/v1/internal/model/load \
"args": {
"load_in_4bit": true,
"n_gpu_layers": 12
},
"settings": {
"instruction_template": "Alpaca"
}
}'
```
Expand Down Expand Up @@ -198,7 +193,7 @@ while True:
assistant_message = ''
for event in client.events():
payload = json.loads(event.data)
chunk = payload['choices'][0]['message']['content']
chunk = payload['choices'][0]['delta']['content']
assistant_message += chunk
print(chunk, end='')

Expand Down Expand Up @@ -241,6 +236,27 @@ for event in client.events():
print()
```

#### Python example with API key

Replace

```python
headers = {
"Content-Type": "application/json"
}
```

with

```python
headers = {
"Content-Type": "application/json",
"Authorization": "Bearer yourPassword123"
}
```

in any of the examples above.

### Environment variables

The following environment variables can be used (they take precedence over everything else):
Expand Down
165 changes: 103 additions & 62 deletions download-model.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import os
import re
import sys
from multiprocessing import Array
from pathlib import Path
from time import sleep

Expand All @@ -27,9 +28,10 @@


class ModelDownloader:
def __init__(self, max_retries=5):
def __init__(self, max_retries=7):
self.max_retries = max_retries
self.session = self.get_session()
self._progress_bar_slots = None

def get_session(self):
session = requests.Session()
Expand Down Expand Up @@ -186,73 +188,112 @@ def get_output_folder(self, model, branch, is_lora, is_llamacpp=False, model_dir
output_folder = Path(base_folder) / output_folder
return output_folder

@property
def progress_bar_slots(self):
if self._progress_bar_slots is None:
raise RuntimeError("Progress bar slots not initialized. Start download threads first.")

return self._progress_bar_slots

def initialize_progress_bar_slots(self, num_threads):
self._progress_bar_slots = Array("B", [0] * num_threads)

def get_progress_bar_position(self):
with self.progress_bar_slots.get_lock():
for i in range(len(self.progress_bar_slots)):
if self.progress_bar_slots[i] == 0:
self.progress_bar_slots[i] = 1
return i

return 0 # fallback

def release_progress_bar_position(self, slot):
with self.progress_bar_slots.get_lock():
self.progress_bar_slots[slot] = 0

def get_single_file(self, url, output_folder, start_from_scratch=False):
filename = Path(url.rsplit('/', 1)[1])
output_path = output_folder / filename
progress_bar_position = self.get_progress_bar_position()

max_retries = 7
max_retries = self.max_retries
attempt = 0
while attempt < max_retries:
attempt += 1
session = self.session
headers = {}
mode = 'wb'

try:
if output_path.exists() and not start_from_scratch:
# Resume download
r = session.get(url, stream=True, timeout=20)
total_size = int(r.headers.get('content-length', 0))
if output_path.stat().st_size >= total_size:
return

headers = {'Range': f'bytes={output_path.stat().st_size}-'}
mode = 'ab'

with session.get(url, stream=True, headers=headers, timeout=30) as r:
r.raise_for_status() # If status is not 2xx, raise an error
total_size = int(r.headers.get('content-length', 0))
block_size = 1024 * 1024 # 1MB

filename_str = str(filename) # Convert PosixPath to string if necessary

tqdm_kwargs = {
'total': total_size,
'unit': 'B',
'unit_scale': True,
'unit_divisor': 1024,
'bar_format': '{desc}{percentage:3.0f}%|{bar:50}| {n_fmt}/{total_fmt} [{elapsed}<{remaining}, {rate_fmt}]',
'desc': f"{filename_str}: "
}

if 'COLAB_GPU' in os.environ:
tqdm_kwargs.update({
'position': 0,
'leave': True
})

with open(output_path, mode) as f:
with tqdm.tqdm(**tqdm_kwargs) as t:
count = 0
for data in r.iter_content(block_size):
f.write(data)
t.update(len(data))
if total_size != 0 and self.progress_bar is not None:
count += len(data)
self.progress_bar(float(count) / float(total_size), f"{filename_str}")

break # Exit loop if successful
except (RequestException, ConnectionError, Timeout) as e:
print(f"Error downloading {filename}: {e}.")
print(f"That was attempt {attempt}/{max_retries}.", end=' ')
if attempt < max_retries:
print(f"Retry begins in {2 ** attempt} seconds.")
sleep(2 ** attempt)
else:
print("Failed to download after the maximum number of attempts.")
try:
while attempt < max_retries:
attempt += 1
session = self.session
headers = {}
mode = 'wb'

try:
if output_path.exists() and not start_from_scratch:
# Resume download
r = session.get(url, stream=True, timeout=20)
total_size = int(r.headers.get('content-length', 0))
if output_path.stat().st_size >= total_size:
return

headers = {'Range': f'bytes={output_path.stat().st_size}-'}
mode = 'ab'

with session.get(url, stream=True, headers=headers, timeout=30) as r:
r.raise_for_status() # If status is not 2xx, raise an error
total_size = int(r.headers.get('content-length', 0))
block_size = 1024 * 1024 # 1MB

filename_str = str(filename) # Convert PosixPath to string if necessary

tqdm_kwargs = {
'total': total_size,
'unit': 'B',
'unit_scale': True,
'unit_divisor': 1024,
'bar_format': '{desc}{percentage:3.0f}%|{bar:50}| {n_fmt}/{total_fmt} [{elapsed}<{remaining}, {rate_fmt}]',
'desc': f"{filename_str}: ",
'position': progress_bar_position,
'leave': False
}

if 'COLAB_GPU' in os.environ:
tqdm_kwargs.update({
'position': 0,
'leave': True
})

with open(output_path, mode) as f:
with tqdm.tqdm(**tqdm_kwargs) as t:
count = 0
for data in r.iter_content(block_size):
f.write(data)
t.update(len(data))
if total_size != 0 and self.progress_bar is not None:
count += len(data)
self.progress_bar(float(count) / float(total_size), f"{filename_str}")

break # Exit loop if successful
except (RequestException, ConnectionError, Timeout) as e:
print(f"Error downloading {filename}: {e}.")
print(f"That was attempt {attempt}/{max_retries}.", end=' ')
if attempt < max_retries:
print(f"Retry begins in {2 ** attempt} seconds.")
sleep(2 ** attempt)
else:
print("Failed to download after the maximum number of attempts.")
finally:
self.release_progress_bar_position(progress_bar_position)

def start_download_threads(self, file_list, output_folder, start_from_scratch=False, threads=4):
thread_map(lambda url: self.get_single_file(url, output_folder, start_from_scratch=start_from_scratch), file_list, max_workers=threads, disable=True)
self.initialize_progress_bar_slots(threads)
tqdm.tqdm.set_lock(tqdm.tqdm.get_lock())
try:
thread_map(
lambda url: self.get_single_file(url, output_folder, start_from_scratch=start_from_scratch),
file_list,
max_workers=threads,
disable=True
)
finally:
print(f"\nDownload of {len(file_list)} files to {output_folder} completed.")

def download_model_files(self, model, branch, links, sha256, output_folder, progress_bar=None, start_from_scratch=False, threads=4, specific_file=None, is_llamacpp=False):
self.progress_bar = progress_bar
Expand Down Expand Up @@ -318,7 +359,7 @@ def check_model_files(self, model, branch, links, sha256, output_folder):
parser.add_argument('--model-dir', type=str, default=None, help='Save the model files to a subfolder of this folder instead of the default one (text-generation-webui/models).')
parser.add_argument('--clean', action='store_true', help='Does not resume the previous download.')
parser.add_argument('--check', action='store_true', help='Validates the checksums of model files.')
parser.add_argument('--max-retries', type=int, default=5, help='Max retries count when get error in download time.')
parser.add_argument('--max-retries', type=int, default=7, help='Max retries count when get error in download time.')
args = parser.parse_args()

branch = args.branch
Expand Down
19 changes: 0 additions & 19 deletions extensions/Training_PRO/script.py
Original file line number Diff line number Diff line change
Expand Up @@ -557,12 +557,6 @@ def calc_trainable_parameters(model):

def do_train(lora_name: str, always_override: bool, save_steps: int, micro_batch_size: int, batch_size: int, epochs: int, learning_rate: str, lr_scheduler_type: str, lora_rank: int, lora_alpha: int, lora_dropout: float, cutoff_len: int, dataset: str, eval_dataset: str, format: str, eval_steps: int, raw_text_file: str, higher_rank_limit: bool, warmup_steps: int, optimizer: str, hard_cut_string: str, train_only_after: str, stop_at_loss: float, add_eos_token: bool, min_chars: int, report_to: str, precize_slicing_overlap: bool, add_eos_token_type: str, save_steps_under_loss: float, add_bos_token: bool, training_projection: str,sliding_window:bool,warmup_ratio:float, grad_accumulation: int,neft_noise_alpha:float):

if shared.args.monkey_patch:
from alpaca_lora_4bit.monkeypatch.peft_tuners_lora_monkey_patch import (
replace_peft_model_with_int4_lora_model
)
replace_peft_model_with_int4_lora_model()

global train_log_graph
global WANT_INTERRUPT
WANT_INTERRUPT = False
Expand Down Expand Up @@ -600,10 +594,6 @@ def do_train(lora_name: str, always_override: bool, save_steps: int, micro_batch

time.sleep(5)

if shared.args.loader == 'GPTQ-for-LLaMa' and not shared.args.monkey_patch:
yield "LoRA training with GPTQ-for-LLaMa requires loading with `--monkey-patch`", zero_pd
return

if cutoff_len <= 0 or micro_batch_size <= 0 or actual_lr <= 0 or lora_rank <= 0 or lora_alpha <= 0:
yield "Cannot input zeroes.", zero_pd
return
Expand Down Expand Up @@ -865,15 +855,6 @@ def generate_and_tokenize_prompt(data_point):
yield traceback.format_exc().replace('\n', '\n\n'), zero_pd
return

if shared.args.monkey_patch:
from alpaca_lora_4bit.autograd_4bit import Autograd4bitQuantLinear
from alpaca_lora_4bit.models import Linear4bitLt
for _, m in lora_model.named_modules():
if isinstance(m, Autograd4bitQuantLinear) or isinstance(m, Linear4bitLt):
if m.is_v1_model:
m.zeros = m.zeros.half()
m.scales = m.scales.half()

class Tracked():
def __init__(self):
self.current_steps = 0
Expand Down
4 changes: 2 additions & 2 deletions extensions/openai/completions.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ def convert_history(history):
for item in entry['content']:
if not isinstance(item, dict):
continue

image_url = None
content = None
if item['type'] == 'image_url' and isinstance(item['image_url'], dict):
Expand Down Expand Up @@ -205,7 +205,7 @@ def convert_history(history):
else:
chat_dialogue.append(['', current_reply])
elif role == "system":
system_message = content
system_message += f"\n{content}" if system_message else content

if not user_input_last:
user_input = ""
Expand Down
8 changes: 8 additions & 0 deletions js/global_scope_js.js
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,14 @@ function regenerateClick() {
document.getElementById("Regenerate").click();
}

function continueClick() {
document.getElementById("Continue").click();
}

function removeLastClick() {
document.getElementById("Remove-last").click();
}

function handleMorphdomUpdate(text) {
morphdom(
document.getElementById("chat").parentNode,
Expand Down
7 changes: 6 additions & 1 deletion modules/chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,13 @@
)
from modules.utils import delete_file, get_available_characters, save_file

# Copied from the Transformers library

def strftime_now(format):
return datetime.now().strftime(format)


jinja_env = ImmutableSandboxedEnvironment(trim_blocks=True, lstrip_blocks=True)
jinja_env.globals["strftime_now"] = strftime_now


def str_presenter(dumper, data):
Expand Down
Loading

0 comments on commit 9ac4d81

Please sign in to comment.