add fourth chapter
This commit is contained in:
parent
be238a37c5
commit
d9e1a27bf2
|
|
@ -164,7 +164,7 @@ inputs = torch.tensor(
|
|||
[[0.43, 0.15, 0.89], # Your (x^1)
|
||||
[0.55, 0.87, 0.66], # journey (x^2)
|
||||
[0.57, 0.85, 0.64], # starts (x^3)
|
||||
[0.22, 0.58, 0.33], # with (x^4)
|
||||
[0.22, 0.58, 0.33], # with (x^4)
|
||||
[0.77, 0.25, 0.10], # one (x^5)
|
||||
[0.05, 0.80, 0.55]] # step (x^6)
|
||||
|
||||
|
|
@ -233,7 +233,7 @@ tensor([0.9544, 1.4950, 1.4754, 0.8434, 0.7070, 1.0865])
|
|||
> ```python
|
||||
> res = 0.
|
||||
> for idx, element in enumerate(inputs[0]):
|
||||
> res += inputs[0][idx] * query[idx]
|
||||
> res += inputs[0][idx] * query[idx]
|
||||
> print(res)
|
||||
> print(torch.dot(inputs[0], query))
|
||||
> ```
|
||||
|
|
@ -609,7 +609,7 @@ print(attn_scores_2)
|
|||
可以看到,输出中的第二个元素与我们之前计算的 `attn_score_22` 相同:
|
||||
|
||||
```python
|
||||
tensor([1.2705, 1.8524, 1.8111, 1.0795, 0.5577, 1.5440])
|
||||
tensor([1.2705, 1.8524, 1.8111, 1.0795, 0.5577, 1.5440])
|
||||
```
|
||||
|
||||
第三步是将注意力得分转换为注意力权重,如图 3.16 所示。
|
||||
|
|
@ -627,7 +627,7 @@ print(attn_weights_2)
|
|||
结果如下:
|
||||
|
||||
```python
|
||||
tensor([0.1500, 0.2264, 0.2199, 0.1311, 0.0906, 0.1820])
|
||||
tensor([0.1500, 0.2264, 0.2199, 0.1311, 0.0906, 0.1820])
|
||||
```
|
||||
|
||||
> [!NOTE]
|
||||
|
|
@ -929,13 +929,13 @@ print(masked)
|
|||
由此生成以下掩码:
|
||||
|
||||
```python
|
||||
tensor([[0.2899, -inf, -inf, -inf, -inf, -inf],
|
||||
[0.4656, 0.1723, -inf, -inf, -inf, -inf],
|
||||
[0.4594, 0.1703, 0.1731, -inf, -inf, -inf],
|
||||
[0.2642, 0.1024, 0.1036, 0.0186, -inf, -inf],
|
||||
[0.2183, 0.0874, 0.0882, 0.0177, 0.0786, -inf],
|
||||
[0.3408, 0.1270, 0.1290, 0.0198, 0.1290, 0.0078]],
|
||||
grad_fn=<MaskedFillBackward0>)
|
||||
tensor([[0.2899, -inf, -inf, -inf, -inf, -inf],
|
||||
[0.4656, 0.1723, -inf, -inf, -inf, -inf],
|
||||
[0.4594, 0.1703, 0.1731, -inf, -inf, -inf],
|
||||
[0.2642, 0.1024, 0.1036, 0.0186, -inf, -inf],
|
||||
[0.2183, 0.0874, 0.0882, 0.0177, 0.0786, -inf],
|
||||
[0.3408, 0.1270, 0.1290, 0.0198, 0.1290, 0.0078]],
|
||||
grad_fn=<MaskedFillBackward0>)
|
||||
```
|
||||
|
||||
现在我们只需要对这些掩码后的结果应用 softmax 函数,就可以完成了:
|
||||
|
|
@ -1076,7 +1076,7 @@ print(batch.shape) #A
|
|||
以上代码生成一个三维张量,包含 2 个输入文本,每个文本包含 6 个 token,每个 token 表示为一个 3 维嵌入向量:
|
||||
|
||||
```python
|
||||
torch.Size([2, 6, 3])
|
||||
torch.Size([2, 6, 3])
|
||||
```
|
||||
|
||||
以下的 CausalAttention 类与我们之前实现的 SelfAttention 类类似,不同之处在于我们现在添加了dropout和因果掩码组件,如以下代码所示:
|
||||
|
|
@ -1095,7 +1095,7 @@ class CausalAttention(nn.Module):
|
|||
'mask',
|
||||
torch.triu(torch.ones(context_length, context_length),
|
||||
diagonal=1)
|
||||
) #B
|
||||
) #B
|
||||
|
||||
def forward(self, x):
|
||||
b, num_tokens, d_in = x.shape #C
|
||||
|
|
@ -1204,6 +1204,7 @@ tensor([[[-0.4519, 0.2216, 0.4772, 0.1063],
|
|||
[-0.5675, -0.0843, 0.5478, 0.3589],
|
||||
[-0.5526, -0.0981, 0.5321, 0.3428],
|
||||
[-0.5299, -0.1081, 0.5077, 0.3493]],
|
||||
|
||||
[[-0.4519, 0.2216, 0.4772, 0.1063],
|
||||
[-0.5874, 0.0058, 0.5891, 0.3257],
|
||||
[-0.6300, -0.0632, 0.6202, 0.3860],
|
||||
|
|
@ -1253,7 +1254,7 @@ class MultiHeadAttention(nn.Module):
|
|||
self.register_buffer(
|
||||
'mask',
|
||||
torch.triu(torch.ones(context_length, context_length), diagonal=1)
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
def forward(self, x):
|
||||
|
|
|
|||
Loading…
Reference in New Issue