diff --git a/cn-Book/3.实现注意力机制.md b/cn-Book/3.实现注意力机制.md
index f32ae1f..c1ba5cd 100644
--- a/cn-Book/3.实现注意力机制.md
+++ b/cn-Book/3.实现注意力机制.md
@@ -164,7 +164,7 @@ inputs = torch.tensor(
   [[0.43, 0.15, 0.89], # Your     (x^1)
    [0.55, 0.87, 0.66], # journey  (x^2)
    [0.57, 0.85, 0.64], # starts   (x^3)
-	 [0.22, 0.58, 0.33], # with     (x^4)
+   [0.22, 0.58, 0.33], # with     (x^4)
    [0.77, 0.25, 0.10], # one      (x^5)
    [0.05, 0.80, 0.55]] # step     (x^6)
 
@@ -233,7 +233,7 @@ tensor([0.9544, 1.4950, 1.4754, 0.8434, 0.7070, 1.0865])
 > ```python
 > res = 0.
 > for idx, element in enumerate(inputs[0]):
->  res += inputs[0][idx] * query[idx]
+>      res += inputs[0][idx] * query[idx]
 > print(res)
 > print(torch.dot(inputs[0], query))
 > ```
@@ -609,7 +609,7 @@ print(attn_scores_2)
 可以看到，输出中的第二个元素与我们之前计算的 `attn_score_22` 相同：
 
 ```python
- tensor([1.2705, 1.8524, 1.8111, 1.0795, 0.5577, 1.5440])
+tensor([1.2705, 1.8524, 1.8111, 1.0795, 0.5577, 1.5440])
 ```
 
 第三步是将注意力得分转换为注意力权重，如图 3.16 所示。
@@ -627,7 +627,7 @@ print(attn_weights_2)
 结果如下：
 
 ```python
- tensor([0.1500, 0.2264, 0.2199, 0.1311, 0.0906, 0.1820])
+tensor([0.1500, 0.2264, 0.2199, 0.1311, 0.0906, 0.1820])
 ```
 
 > [!NOTE]
@@ -929,13 +929,13 @@ print(masked)
 由此生成以下掩码：
 
 ```python
-tensor([[0.2899,   -inf,   -inf,   -inf,   -inf,   -inf],
-		   [0.4656, 0.1723,    -inf,   -inf,   -inf,   -inf],
-       [0.4594, 0.1703, 0.1731,    -inf,   -inf,   -inf],
-       [0.2642, 0.1024, 0.1036,  0.0186,   -inf,   -inf],
-       [0.2183, 0.0874, 0.0882,  0.0177,  0.0786,  -inf],
-       [0.3408, 0.1270, 0.1290, 0.0198, 0.1290, 0.0078]],
-       grad_fn=<MaskedFillBackward0>)
+tensor([[0.2899,   -inf,   -inf,   -inf,   -inf,    -inf],
+        [0.4656, 0.1723,    -inf,   -inf,   -inf,   -inf],
+        [0.4594, 0.1703, 0.1731,    -inf,   -inf,   -inf],
+        [0.2642, 0.1024, 0.1036,  0.0186,   -inf,   -inf],
+        [0.2183, 0.0874, 0.0882,  0.0177,  0.0786,  -inf],
+        [0.3408, 0.1270, 0.1290, 0.0198, 0.1290, 0.0078]],
+        grad_fn=<MaskedFillBackward0>)
 ```
 
 现在我们只需要对这些掩码后的结果应用 softmax 函数，就可以完成了：
@@ -1076,7 +1076,7 @@ print(batch.shape)                                              #A
 以上代码生成一个三维张量，包含 2 个输入文本，每个文本包含 6 个 token，每个 token 表示为一个 3 维嵌入向量：
 
 ```python
- torch.Size([2, 6, 3])
+torch.Size([2, 6, 3])
 ```
 
 以下的 CausalAttention 类与我们之前实现的 SelfAttention 类类似，不同之处在于我们现在添加了dropout和因果掩码组件，如以下代码所示：
@@ -1095,7 +1095,7 @@ class CausalAttention(nn.Module):
            'mask',
            torch.triu(torch.ones(context_length, context_length),
            diagonal=1)
-				)                                                         #B
+        )                                                         #B
 
     def forward(self, x):
         b, num_tokens, d_in = x.shape                             #C
@@ -1204,6 +1204,7 @@ tensor([[[-0.4519,  0.2216,  0.4772,  0.1063],
          [-0.5675, -0.0843,  0.5478,  0.3589],
          [-0.5526, -0.0981,  0.5321,  0.3428],
          [-0.5299, -0.1081,  0.5077,  0.3493]],
+        
         [[-0.4519,  0.2216,  0.4772,  0.1063],
          [-0.5874,  0.0058,  0.5891,  0.3257],
          [-0.6300, -0.0632,  0.6202,  0.3860],
@@ -1253,7 +1254,7 @@ class MultiHeadAttention(nn.Module):
         self.register_buffer(
             'mask',
              torch.triu(torch.ones(context_length, context_length), diagonal=1)
-				)
+        )
 
 
     def forward(self, x):