|
1 | 1 | from ftllm import llm
|
2 |
| -from ftllm.llm import ComputeGraph |
| 2 | +from qwen2 import Qwen2Model |
3 | 3 | import os
|
4 |
| -import math |
5 | 4 |
|
6 | 5 | root_path = "/mnt/hfmodels/"
|
7 | 6 | model_path = os.path.join(root_path, "Qwen/Qwen2-7B-Instruct")
|
8 | 7 |
|
9 |
| -class Qwen2Model(ComputeGraph): |
10 |
| - def build(self): |
11 |
| - weight, data, config = self.weight, self.data, self.config |
12 |
| - head_dim = config["hidden_size"] // config["num_attention_heads"] |
13 |
| - self.Embedding(data["inputIds"], weight["model.embed_tokens.weight"], data["hiddenStates"]); |
14 |
| - self.DataTypeAs(data["hiddenStates"], data["atype"]) |
15 |
| - for i in range(config["num_hidden_layers"]): |
16 |
| - pastKey = data["pastKey."][i] |
17 |
| - pastValue = data["pastValue."][i] |
18 |
| - layer = weight["model.layers."][i] |
19 |
| - self.RMSNorm(data["hiddenStates"], layer[".input_layernorm.weight"], config["rms_norm_eps"], data["attenInput"]) |
20 |
| - self.Linear(data["attenInput"], layer[".self_attn.q_proj.weight"], layer[".self_attn.q_proj.bias"], data["q"]) |
21 |
| - self.Linear(data["attenInput"], layer[".self_attn.k_proj.weight"], layer[".self_attn.k_proj.bias"], data["k"]) |
22 |
| - self.Linear(data["attenInput"], layer[".self_attn.v_proj.weight"], layer[".self_attn.v_proj.bias"], data["v"]) |
23 |
| - self.ExpandHead(data["q"], head_dim) |
24 |
| - self.ExpandHead(data["k"], head_dim) |
25 |
| - self.ExpandHead(data["v"], head_dim) |
26 |
| - self.LlamaRotatePosition2D(data["q"], data["positionIds"], data["sin"], data["cos"], head_dim // 2) |
27 |
| - self.LlamaRotatePosition2D(data["k"], data["positionIds"], data["sin"], data["cos"], head_dim // 2) |
28 |
| - self.FusedAttention(data["q"], pastKey, pastValue, data["k"], data["v"], data["attenInput"], |
29 |
| - data["attentionMask"], data["attenOutput"], data["seqLens"], 1.0 / math.sqrt(head_dim)) |
30 |
| - self.Linear(data["attenOutput"], layer[".self_attn.o_proj.weight"], layer[".self_attn.o_proj.bias"], data["attenLastOutput"]); |
31 |
| - self.AddTo(data["hiddenStates"], data["attenLastOutput"]); |
32 |
| - self.RMSNorm(data["hiddenStates"], layer[".post_attention_layernorm.weight"], config["rms_norm_eps"], data["attenInput"]) |
33 |
| - self.Linear(data["attenInput"], layer[".mlp.gate_proj.weight"], layer[".mlp.gate_proj.bias"], data["w1"]) |
34 |
| - self.Linear(data["attenInput"], layer[".mlp.up_proj.weight"], layer[".mlp.up_proj.bias"], data["w3"]) |
35 |
| - self.Silu(data["w1"], data["w1"]) |
36 |
| - self.MulTo(data["w1"], data["w3"]) |
37 |
| - self.Linear(data["w1"], layer[".mlp.down_proj.weight"], layer[".mlp.down_proj.bias"], data["w2"]) |
38 |
| - self.AddTo(data["hiddenStates"], data["w2"]) |
39 |
| - self.SplitLastTokenStates(data["hiddenStates"], data["seqLens"], data["lastTokensStates"]) |
40 |
| - self.RMSNorm(data["lastTokensStates"], weight["model.norm.weight"], config["rms_norm_eps"], data["lastTokensStates"]) |
41 |
| - self.Linear(data["lastTokensStates"], weight["lm_head.weight"], weight["lm_head.bias"], data["logits"]) |
42 |
| - |
43 | 8 | model = llm.model(model_path, graph = Qwen2Model)
|
44 | 9 | prompt = "北京有什么景点?"
|
45 | 10 | messages = [
|
|
0 commit comments