add fourth chapter

This commit is contained in:
skindhu 2024-11-05 17:46:19 +08:00
parent be238a37c5
commit d9e1a27bf2
1 changed files with 15 additions and 14 deletions

View File

@ -164,7 +164,7 @@ inputs = torch.tensor(
[[0.43, 0.15, 0.89], # Your (x^1)
[0.55, 0.87, 0.66], # journey (x^2)
[0.57, 0.85, 0.64], # starts (x^3)
[0.22, 0.58, 0.33], # with (x^4)
[0.22, 0.58, 0.33], # with (x^4)
[0.77, 0.25, 0.10], # one (x^5)
[0.05, 0.80, 0.55]] # step (x^6)
@ -233,7 +233,7 @@ tensor([0.9544, 1.4950, 1.4754, 0.8434, 0.7070, 1.0865])
> ```python
> res = 0.
> for idx, element in enumerate(inputs[0]):
> res += inputs[0][idx] * query[idx]
> res += inputs[0][idx] * query[idx]
> print(res)
> print(torch.dot(inputs[0], query))
> ```
@ -609,7 +609,7 @@ print(attn_scores_2)
可以看到,输出中的第二个元素与我们之前计算的 `attn_score_22` 相同:
```python
tensor([1.2705, 1.8524, 1.8111, 1.0795, 0.5577, 1.5440])
tensor([1.2705, 1.8524, 1.8111, 1.0795, 0.5577, 1.5440])
```
第三步是将注意力得分转换为注意力权重,如图 3.16 所示。
@ -627,7 +627,7 @@ print(attn_weights_2)
结果如下:
```python
tensor([0.1500, 0.2264, 0.2199, 0.1311, 0.0906, 0.1820])
tensor([0.1500, 0.2264, 0.2199, 0.1311, 0.0906, 0.1820])
```
> [!NOTE]
@ -929,13 +929,13 @@ print(masked)
由此生成以下掩码:
```python
tensor([[0.2899, -inf, -inf, -inf, -inf, -inf],
[0.4656, 0.1723, -inf, -inf, -inf, -inf],
[0.4594, 0.1703, 0.1731, -inf, -inf, -inf],
[0.2642, 0.1024, 0.1036, 0.0186, -inf, -inf],
[0.2183, 0.0874, 0.0882, 0.0177, 0.0786, -inf],
[0.3408, 0.1270, 0.1290, 0.0198, 0.1290, 0.0078]],
grad_fn=<MaskedFillBackward0>)
tensor([[0.2899, -inf, -inf, -inf, -inf, -inf],
[0.4656, 0.1723, -inf, -inf, -inf, -inf],
[0.4594, 0.1703, 0.1731, -inf, -inf, -inf],
[0.2642, 0.1024, 0.1036, 0.0186, -inf, -inf],
[0.2183, 0.0874, 0.0882, 0.0177, 0.0786, -inf],
[0.3408, 0.1270, 0.1290, 0.0198, 0.1290, 0.0078]],
grad_fn=<MaskedFillBackward0>)
```
现在我们只需要对这些掩码后的结果应用 softmax 函数,就可以完成了:
@ -1076,7 +1076,7 @@ print(batch.shape) #A
以上代码生成一个三维张量,包含 2 个输入文本,每个文本包含 6 个 token每个 token 表示为一个 3 维嵌入向量:
```python
torch.Size([2, 6, 3])
torch.Size([2, 6, 3])
```
以下的 CausalAttention 类与我们之前实现的 SelfAttention 类类似不同之处在于我们现在添加了dropout和因果掩码组件如以下代码所示
@ -1095,7 +1095,7 @@ class CausalAttention(nn.Module):
'mask',
torch.triu(torch.ones(context_length, context_length),
diagonal=1)
) #B
) #B
def forward(self, x):
b, num_tokens, d_in = x.shape #C
@ -1204,6 +1204,7 @@ tensor([[[-0.4519, 0.2216, 0.4772, 0.1063],
[-0.5675, -0.0843, 0.5478, 0.3589],
[-0.5526, -0.0981, 0.5321, 0.3428],
[-0.5299, -0.1081, 0.5077, 0.3493]],
[[-0.4519, 0.2216, 0.4772, 0.1063],
[-0.5874, 0.0058, 0.5891, 0.3257],
[-0.6300, -0.0632, 0.6202, 0.3860],
@ -1253,7 +1254,7 @@ class MultiHeadAttention(nn.Module):
self.register_buffer(
'mask',
torch.triu(torch.ones(context_length, context_length), diagonal=1)
)
)
def forward(self, x):