{"id":2080,"date":"2024-02-17T15:15:29","date_gmt":"2024-02-17T07:15:29","guid":{"rendered":"https:\/\/www.aqwu.net\/wp\/?p=2080"},"modified":"2024-04-28T20:04:46","modified_gmt":"2024-04-28T12:04:46","slug":"%e5%a6%82%e4%bd%95%e5%88%9b%e5%bb%ba%e4%b8%80%e4%b8%aa-gpt-%e6%a8%a1%e5%9e%8b%e5%be%ae%e8%b0%83gpt-2","status":"publish","type":"post","link":"https:\/\/www.aqwu.net\/wp\/?p=2080","title":{"rendered":"\u5982\u4f55\u521b\u5efa\u4e00\u4e2a GPT \u6a21\u578b(\u5fae\u8c03GPT-2)"},"content":{"rendered":"\n<p>\u8981\u5c06<a href=\"https:\/\/www.aqwu.net\/wp\/?p=2038\">\u5b57\u7b26\u7ea7\u522b\u7684\u8bed\u8a00<\/a>\u6a21\u578b\u66f4\u6539\u4e3a\u7c7b\u4f3cGPT-2\u8fd9\u6837\u7684\u6a21\u578b\uff0c\u6211\u4eec\u9700\u8981\u8003\u8651\u4ee5\u4e0b\u51e0\u4e2a\u5173\u952e\u7684\u4fee\u6539\uff1a<\/p>\n\n\n\n<ol class=\"wp-block-list\">\n<li><strong>\u6a21\u578b\u67b6\u6784<\/strong>\uff1a\u4f7f\u7528\u66f4\u590d\u6742\u7684Transformer\u67b6\u6784\uff0cGPT-2\u5177\u6709\u66f4\u591a\u7684\u5c42\u3001\u5934\u548c\u9690\u85cf\u5355\u5143\u3002<\/li>\n\n\n\n<li><strong>\u8bcd\u6c47\u8868\u548c\u5d4c\u5165<\/strong>\uff1a\u4ece\u5b57\u7b26\u7ea7\u522b\u8f6c\u6362\u4e3a\u8bcd\u6216\u5b50\u8bcd\uff08token\uff09\u7ea7\u522b\uff0c\u8fd9\u9700\u8981\u4f7f\u7528\u9884\u8bad\u7ec3\u7684\u8bcd\u6c47\u8868\u548c\u5d4c\u5165\u3002<\/li>\n\n\n\n<li><strong>\u8f93\u5165\u6570\u636e\u9884\u5904\u7406<\/strong>\uff1a\u5c06\u6587\u672c\u8f6c\u6362\u4e3a\u8bcd\u6216\u5b50\u8bcd\u7684\u8868\u793a\uff0c\u800c\u4e0d\u662f\u5b57\u7b26\u7684\u8868\u793a\u3002<\/li>\n\n\n\n<li><strong>\u8f93\u51fa\u5904\u7406<\/strong>\uff1a\u7531\u4e8eGPT-2\u4f7f\u7528\u7684\u662f\u66f4\u5927\u7684\u8bcd\u6c47\u8868\uff0c\u56e0\u6b64\u6a21\u578b\u7684\u8f93\u51fa\u548c\u635f\u5931\u8ba1\u7b97\u4e5f\u9700\u8981\u76f8\u5e94\u7684\u8c03\u6574\u3002<\/li>\n\n\n\n<li><strong>\u9884\u8bad\u7ec3\u6a21\u578b\u7684\u52a0\u8f7d\u548c\u5fae\u8c03<\/strong>\uff1a\u53ef\u4ee5\u76f4\u63a5\u52a0\u8f7d\u9884\u8bad\u7ec3\u7684GPT-2\u6a21\u578b\uff0c\u7136\u540e\u6839\u636e\u7279\u5b9a\u4efb\u52a1\u8fdb\u884c\u5fae\u8c03\u3002<\/li>\n<\/ol>\n\n\n\n<p>\u4e0b\u9762\u662f\u4e00\u4e2a\u7b80\u5316\u7684\u793a\u4f8b\uff0c\u8bf4\u660e\u5982\u4f55\u4f7f\u7528Hugging Face\u7684Transformers\u5e93\u6765\u52a0\u8f7d\u9884\u8bad\u7ec3\u7684GPT-2\u6a21\u578b\u5e76\u8fdb\u884c\u5fae\u8c03\u3002\u8fd9\u9700\u8981\u5148\u5b89\u88c5<code>transformers<\/code>\u5e93\uff1a<\/p>\n\n\n\n<pre class=\"wp-block-code has-small-font-size\"><code>pip install transformers<\/code><\/pre>\n\n\n\n<p>\u4e5f\u6709\u53ef\u80fd\u9700\u8981\u5b89\u88c5\u4e0b\u9762\u7684\u6a21\u5757\uff1a<\/p>\n\n\n\n<pre class=\"wp-block-code has-small-font-size\"><code>pip install protobuf\npip install click<\/code><\/pre>\n\n\n\n<p>\u4e3a\u4e86\u8c03\u8bd5\u65b9\u4fbf\uff0c\u5efa\u8bae\u5148\u4e0b\u8f7dgpt2\u6a21\u578b<\/p>\n\n\n\n<p><a href=\"https:\/\/huggingface.co\/openai-community\/gpt2\">https:\/\/huggingface.co\/openai-community\/gpt2<\/a><\/p>\n\n\n\n<h2 class=\"wp-block-heading\"><strong>1. \u5fae\u8c03GPT-2\u6a21\u578b<\/strong><\/h2>\n\n\n\n<p>\u4ee3\u7801 setup1.1.py<\/p>\n\n\n\n<div class=\"wp-block-urvanov-syntax-highlighter-code-block\"><pre class=\"lang:python decode:true \" >from transformers import GPT2Tokenizer, GPT2LMHeadModel, AdamW, get_linear_schedule_with_warmup\nimport torch\n\n# \u52a0\u8f7d\u9884\u8bad\u7ec3\u6a21\u578b\u548c\u5206\u8bcd\u5668\ntokenizer = GPT2Tokenizer.from_pretrained('models\/gpt2')\nmodel = GPT2LMHeadModel.from_pretrained('models\/gpt2')\n\n# \u4e3a\u5206\u8bcd\u5668\u8bbe\u7f6e\u586b\u5145\u4ee4\u724c\n# \u786e\u4fdd\u4e3a\u5206\u8bcd\u5668\u8bbe\u7f6e\u4e86pad_token\nif tokenizer.pad_token is None:\n    tokenizer.add_special_tokens({'pad_token': '[PAD]'})\n    model.resize_token_embeddings(len(tokenizer))\n\n# \u51c6\u5907\u6570\u636e\ntexts = [\"\u8fd9\u662f\u4e00\u4e2a\u793a\u4f8b\u6587\u672c\u3002\", \"\u8fd9\u662f\u53e6\u4e00\u4e2a\u4f8b\u5b50\u3002\"]\ninput_ids = [tokenizer.encode(text, return_tensors='pt', padding=True, truncation=True, max_length=512) for text in texts]\n\n# \u51c6\u5907\u6a21\u578b\u548c\u4f18\u5316\u5668\ndevice = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\nmodel.to(device)\nmodel.train()\noptimizer = AdamW(model.parameters(), lr=5e-5, no_deprecation_warning=True)\n\nfor epoch in range(1):  # \u5fae\u8c03epoch\n    for input_id in input_ids:\n        optimizer.zero_grad()\n        input_id = input_id.to(device)\n        outputs = model(input_id, labels=input_id)\n        loss = outputs.loss\n        loss.backward()\n        optimizer.step()\n\n    print(f\"Epoch {epoch}: Loss {loss.item()}\")\n\n# \u6587\u672c\u751f\u6210\n# \u4f7f\u7528`__call__`\u65b9\u6cd5\u751f\u6210`input_ids`\u548c`attention_mask`\ninputs = tokenizer(\"\u8fd9\u662f\u4e00\u4e2a\u6587\u672c\u751f\u6210\u7684\u4f8b\u5b50\", return_tensors='pt', padding=True, truncation=True, max_length=50)\n\n# \u63d0\u53d6`input_ids`\u548c`attention_mask`\ninput_ids = inputs['input_ids']\nattention_mask = inputs['attention_mask']\n\ninput_ids = input_ids.to(device)\nattention_mask = attention_mask.to(device)\n\n# \u6587\u672c\u751f\u6210\uff0c\u786e\u4fdd\u4f20\u5165`attention_mask`\nsample_outputs = model.generate(\n    input_ids, \n    attention_mask=attention_mask,\n    max_length=50, \n    num_return_sequences=3,\n    do_sample=True,  # \u542f\u7528\u91c7\u6837\u4ee5\u589e\u52a0\u591a\u6837\u6027\n    top_p=0.92,      # \u4f7f\u7528top-p\u91c7\u6837\n)\n\n# \u6253\u5370\u751f\u6210\u7684\u6587\u672c\nfor i, sample_output in enumerate(sample_outputs):\n    print(\"{}: {}\".format(i, tokenizer.decode(sample_output, skip_special_tokens=True)))<\/pre><\/div>\n\n\n\n<p>\u8fd0\u884c setup1.1.py<\/p>\n\n\n\n<pre class=\"wp-block-code has-small-font-size\"><code>$ python setup1.1.py\nEpoch 0: Loss 50.36573791503906\nSetting `pad_token_id` to `eos_token_id`:50256 for open-end generation.\n0: \u8fd9\u662f\u4e00\u4e2a\u6587\u672c\u751f\u6210\u7684\u4f8b\u5b50\n1: \u8fd9\u662f\u4e00\u4e2a\u6587\u672c\u751f\u6210\u7684\u4f8b\u5b50\n2: \u8fd9\u662f\u4e00\u4e2a\u6587\u672c\u751f\u6210\u7684\u4f8b\u5b50<\/code><\/pre>\n\n\n\n<p>\u4ee3\u7801\u8fd0\u884c\u7ed3\u679c\u8868\u660e\uff0c\u6a21\u578b\u5df2\u7ecf\u80fd\u591f\u6210\u529f\u8fdb\u884c\u6587\u672c\u751f\u6210\uff0c\u4f46\u751f\u6210\u7684\u6587\u672c\u5728\u6240\u6709\u60c5\u51b5\u4e0b\u90fd\u662f\u76f8\u540c\u7684\uff0c\u8fd9\u53ef\u80fd\u5e76\u4e0d\u662f\u60a8\u671f\u671b\u7684\u7ed3\u679c\u3002\u8fd9\u79cd\u73b0\u8c61\u53ef\u80fd\u662f\u7531\u4e8e\u51e0\u4e2a\u539f\u56e0\u5bfc\u81f4\u7684\uff1a<\/p>\n\n\n\n<ol class=\"wp-block-list\">\n<li><strong>\u8fc7\u9ad8\u7684\u635f\u5931\u503c<\/strong>\uff1a\u635f\u5931\u503c<code>50.36573791503906<\/code>\u76f8\u5bf9\u8f83\u9ad8\uff0c\u8fd9\u8868\u660e\u6a21\u578b\u5728\u5fae\u8c03\u8fc7\u7a0b\u4e2d\u53ef\u80fd\u6ca1\u6709\u5f88\u597d\u5730\u5b66\u4e60\u5230\u6570\u636e\u7684\u5206\u5e03\u3002\u8fd9\u53ef\u80fd\u662f\u56e0\u4e3a\u6570\u636e\u91cf\u592a\u5c11\u3001\u5b66\u4e60\u7387\u4e0d\u9002\u5f53\u3001\u6216\u8005\u662f\u56e0\u4e3a\u5fae\u8c03\u7684\u8fed\u4ee3\u6b21\u6570\u592a\u5c11\u3002<\/li>\n\n\n\n<li><strong>\u751f\u6210\u8bbe\u7f6e<\/strong>\uff1a\u5728\u60a8\u7684\u751f\u6210\u8bbe\u7f6e\u4e2d\uff0c\u867d\u7136\u5df2\u7ecf\u542f\u7528\u4e86\u91c7\u6837\uff08<code>do_sample=True<\/code>\uff09\u548cTop-p\u91c7\u6837\uff08<code>top_p=0.92<\/code>\uff09\uff0c\u4f46\u5982\u679c\u6a21\u578b\u6ca1\u6709\u5f88\u597d\u5730\u5b66\u4e60\u5230\u6570\u636e\u7684\u5206\u5e03\uff0c\u8fd9\u4e9b\u8bbe\u7f6e\u53ef\u80fd\u4e0d\u8db3\u4ee5\u751f\u6210\u591a\u6837\u5316\u7684\u6587\u672c\u3002<\/li>\n<\/ol>\n\n\n\n<h3 class=\"wp-block-heading has-medium-font-size\">\u89e3\u51b3\u5efa\u8bae\uff1a<\/h3>\n\n\n\n<ol class=\"wp-block-list\">\n<li><strong>\u589e\u52a0\u6570\u636e\u91cf<\/strong>\uff1a\u5982\u679c\u53ef\u80fd\uff0c\u589e\u52a0\u5fae\u8c03\u65f6\u4f7f\u7528\u7684\u6587\u672c\u91cf\u3002\u66f4\u591a\u7684\u6570\u636e\u53ef\u4ee5\u5e2e\u52a9\u6a21\u578b\u66f4\u597d\u5730\u7406\u89e3\u8bed\u8a00\u7684\u591a\u6837\u6027\u3002<\/li>\n\n\n\n<li><strong>\u8c03\u6574\u5b66\u4e60\u7387<\/strong>\uff1a\u5c1d\u8bd5\u4e0d\u540c\u7684\u5b66\u4e60\u7387\u3002\u6709\u65f6\u5019\uff0c\u8f83\u5c0f\u7684\u5b66\u4e60\u7387\u53ef\u80fd\u6709\u52a9\u4e8e\u6a21\u578b\u66f4\u7a33\u5b9a\u5730\u5b66\u4e60\u3002<\/li>\n\n\n\n<li><strong>\u589e\u52a0\u8fed\u4ee3\u6b21\u6570<\/strong>\uff1a\u53ea\u8fdb\u884c\u4e86\u4e00\u4e2aepoch\u7684\u8bad\u7ec3\u53ef\u80fd\u4e0d\u8db3\u4ee5\u4f7f\u6a21\u578b\u9002\u5e94\u60a8\u7684\u6570\u636e\u3002\u589e\u52a0\u8fed\u4ee3\u6b21\u6570\u53ef\u80fd\u6709\u52a9\u4e8e\u964d\u4f4e\u635f\u5931\u5e76\u6539\u5584\u751f\u6210\u7684\u8d28\u91cf\u3002<\/li>\n\n\n\n<li><strong>\u5b9e\u9a8c\u4e0d\u540c\u7684\u751f\u6210\u53c2\u6570<\/strong>\uff1a\u8c03\u6574\u751f\u6210\u6587\u672c\u65f6\u7684\u53c2\u6570\uff0c\u5982<code>temperature<\/code>\uff08\u63a7\u5236\u751f\u6210\u6587\u672c\u7684\u968f\u673a\u6027\uff09\uff0c<code>top_k<\/code>\uff08\u53ea\u4ece\u6982\u7387\u6700\u9ad8\u7684k\u4e2a\u4ee4\u724c\u4e2d\u62bd\u6837\uff09\uff0c\u6216\u8005\u589e\u52a0<code>num_beams<\/code>\u8fdb\u884c\u6ce2\u675f\u641c\u7d22\uff0c\u53ef\u80fd\u4f1a\u4ea7\u751f\u66f4\u591a\u6837\u5316\u7684\u8f93\u51fa\u3002<\/li>\n\n\n\n<li><strong>\u8bc4\u4f30\u6a21\u578b\u6027\u80fd<\/strong>\uff1a\u5728\u5fae\u8c03\u540e\uff0c\u4f7f\u7528\u4e00\u4e9b\u4fdd\u7559\u7684\u6d4b\u8bd5\u6570\u636e\u6765\u8bc4\u4f30\u6a21\u578b\u6027\u80fd\uff0c\u786e\u4fdd\u6a21\u578b\u4e0d\u4ec5\u5728\u8bad\u7ec3\u6570\u636e\u4e0a\u8868\u73b0\u826f\u597d\uff0c\u4e5f\u80fd\u6cdb\u5316\u5230\u672a\u89c1\u8fc7\u7684\u6570\u636e\u4e0a\u3002<\/li>\n\n\n\n<li><strong>\u6ce8\u610f\u529b\u63a9\u7801\u548c\u586b\u5145\u4ee4\u724c<\/strong>\uff1a\u60a8\u7684\u4ee3\u7801\u5df2\u7ecf\u6b63\u786e\u8bbe\u7f6e\u4e86<code>attention_mask<\/code>\u548c<code>pad_token_id<\/code>\uff0c\u8fd9\u6709\u52a9\u4e8e\u63d0\u9ad8\u751f\u6210\u8d28\u91cf\u3002\u786e\u8ba4\u8fd9\u4e9b\u8bbe\u7f6e\u662f\u5426\u6309\u9884\u671f\u5de5\u4f5c\uff0c\u6709\u65f6\u5019\u8c03\u6574\u8fd9\u4e9b\u8bbe\u7f6e\u4e5f\u80fd\u6539\u5584\u7ed3\u679c\u3002<\/li>\n<\/ol>\n\n\n\n<p>\u8bf7\u6ce8\u610f\uff0c\u6587\u672c\u751f\u6210\u4efb\u52a1\u901a\u5e38\u9700\u8981\u5927\u91cf\u7684\u6570\u636e\u548c\u8ba1\u7b97\u8d44\u6e90\u6765\u8fbe\u5230\u826f\u597d\u7684\u6548\u679c\uff0c\u7279\u522b\u662f\u5f53\u4f7f\u7528\u50cfGPT-2\u8fd9\u6837\u7684\u5927\u578b\u8bed\u8a00\u6a21\u578b\u65f6\u3002\u6301\u7eed\u5b9e\u9a8c\u548c\u8c03\u6574\u662f\u4f18\u5316\u6a21\u578b\u6027\u80fd\u7684\u5173\u952e\u3002<\/p>\n\n\n\n<h2 class=\"wp-block-heading\"><strong>2. \u4fdd\u5b58\u5fae\u8c03\u540e\u7684\u6a21\u578b<\/strong><\/h2>\n\n\n\n<p>\u5728\u5b8c\u6210\u5fae\u8c03\u540e\uff0c\u60a8\u53ef\u4ee5\u4f7f\u7528<code>torch.save<\/code>\u6765\u4fdd\u5b58\u6a21\u578b\u7684\u72b6\u6001\u5b57\u5178\uff0c\u4ee5\u53ca\u5206\u8bcd\u5668\u7684\u914d\u7f6e\uff1a<\/p>\n\n\n\n<pre class=\"wp-block-code has-small-font-size\"><code># \u5b8c\u6210\u5fae\u8c03\u540e\u4fdd\u5b58\u6a21\u578b\u548c\u5206\u8bcd\u5668\nmodel_path = \"models\/gpt2_finetuned\"\ntokenizer.save_pretrained(model_path)\nmodel.save_pretrained(model_path)<\/code><\/pre>\n\n\n\n<p>\u4ee3\u7801 setup2.1.py<\/p>\n\n\n\n<pre class=\"wp-block-code has-small-font-size\"><code>from transformers import GPT2Tokenizer, GPT2LMHeadModel, AdamW, get_linear_schedule_with_warmup\nimport torch\n\n# \u52a0\u8f7d\u9884\u8bad\u7ec3\u6a21\u578b\u548c\u5206\u8bcd\u5668\ntokenizer = GPT2Tokenizer.from_pretrained('models\/gpt2')\nmodel = GPT2LMHeadModel.from_pretrained('models\/gpt2')\n\n# \u4e3a\u5206\u8bcd\u5668\u8bbe\u7f6e\u586b\u5145\u4ee4\u724c\n# \u786e\u4fdd\u4e3a\u5206\u8bcd\u5668\u8bbe\u7f6e\u4e86pad_token\nif tokenizer.pad_token is None:\n    tokenizer.add_special_tokens({'pad_token': '&#91;PAD]'})\n    model.resize_token_embeddings(len(tokenizer))\n\n# \u51c6\u5907\u6570\u636e\ntexts = &#91;\"\u8fd9\u662f\u4e00\u4e2a\u793a\u4f8b\u6587\u672c\u3002\", \"\u8fd9\u662f\u53e6\u4e00\u4e2a\u4f8b\u5b50\u3002\"]\ninput_ids = &#91;tokenizer.encode(text, return_tensors='pt', padding=True, truncation=True, max_length=512) for text in texts]\n\n# \u51c6\u5907\u6a21\u578b\u548c\u4f18\u5316\u5668\ndevice = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\nmodel.to(device)\nmodel.train()\noptimizer = AdamW(model.parameters(), lr=5e-5, no_deprecation_warning=True)\n\nfor epoch in range(1):  # \u5fae\u8c03epoch\n    for input_id in input_ids:\n        optimizer.zero_grad()\n        input_id = input_id.to(device)\n        outputs = model(input_id, labels=input_id)\n        loss = outputs.loss\n        loss.backward()\n        optimizer.step()\n\n    print(f\"Epoch {epoch}: Loss {loss.item()}\")\n\n# \u5b8c\u6210\u5fae\u8c03\u540e\u4fdd\u5b58\u6a21\u578b\u548c\u5206\u8bcd\u5668\nmodel_path = \"models\/gpt2_finetuned\"\n\ntokenizer.save_pretrained(model_path)\nmodel.save_pretrained(model_path)\n\nprint(f\"Model saved to {model_path}\")\n\n# print the number of parameters in the model\nprint(sum(p.numel() for p in model.parameters())\/1e6, 'M parameters')\n\n\n# \u6587\u672c\u751f\u6210\n# \u4f7f\u7528`__call__`\u65b9\u6cd5\u751f\u6210`input_ids`\u548c`attention_mask`\ninputs = tokenizer(\"\u8fd9\u662f\u4e00\u4e2a\u6587\u672c\u751f\u6210\u7684\u4f8b\u5b50\", return_tensors='pt', padding=True, truncation=True, max_length=50)\n\n# \u63d0\u53d6`input_ids`\u548c`attention_mask`\ninput_ids = inputs&#91;'input_ids']\nattention_mask = inputs&#91;'attention_mask']\n\ninput_ids = input_ids.to(device)\nattention_mask = attention_mask.to(device)\n\n# \u6587\u672c\u751f\u6210\uff0c\u786e\u4fdd\u4f20\u5165`attention_mask`\nsample_outputs = model.generate(\n    input_ids, \n    attention_mask=attention_mask,\n    max_length=50, \n    num_return_sequences=3,\n    do_sample=True,  # \u542f\u7528\u91c7\u6837\u4ee5\u589e\u52a0\u591a\u6837\u6027\n    top_p=0.92,      # \u4f7f\u7528top-p\u91c7\u6837\n)\n\n# \u6253\u5370\u751f\u6210\u7684\u6587\u672c\nfor i, sample_output in enumerate(sample_outputs):\n    print(\"{}: {}\".format(i, tokenizer.decode(sample_output, skip_special_tokens=True)))<\/code><\/pre>\n\n\n\n<p>\u8fd0\u884c setup2.1.py<\/p>\n\n\n\n<pre class=\"wp-block-code has-small-font-size\"><code>$ python setup2.1.py\nEpoch 0: Loss 51.30159378051758\nModel saved to models\/gpt2_finetuned\n124.440576 M parameters\nSetting `pad_token_id` to `eos_token_id`:50256 for open-end generation.\n0: \u8fd9\u662f\u4e00\u4e2a\u6587\u672c\u751f\u6210\u7684\u4f8b\u5b50\n1: \u8fd9\u662f\u4e00\u4e2a\u6587\u672c\u751f\u6210\u7684\u4f8b\u5b50\n2: \u8fd9\u662f\u4e00\u4e2a\u6587\u672c\u751f\u6210\u7684\u4f8b\u5b50<\/code><\/pre>\n\n\n\n<h2 class=\"wp-block-heading\"><strong>3. \u52a0\u8f7d\u5fae\u8c03\u540e\u7684\u6a21\u578b<\/strong><\/h2>\n\n\n\n<p>\u5f53\u60a8\u9700\u8981\u518d\u6b21\u4f7f\u7528\u6a21\u578b\u65f6\uff0c\u53ef\u4ee5\u901a\u8fc7\u52a0\u8f7d\u4fdd\u5b58\u7684\u6a21\u578b\u72b6\u6001\u548c\u5206\u8bcd\u5668\u6765\u6062\u590d\u6a21\u578b\uff1a<\/p>\n\n\n\n<pre class=\"wp-block-code has-small-font-size\"><code>from transformers import GPT2Tokenizer, GPT2LMHeadModel\n\n# \u52a0\u8f7d\u5fae\u8c03\u540e\u7684\u6a21\u578b\u548c\u5206\u8bcd\u5668\nmodel_path = \"models\/gpt2_finetuned\"\ntokenizer = GPT2Tokenizer.from_pretrained(model_path)\nmodel = GPT2LMHeadModel.from_pretrained(model_path)<\/code><\/pre>\n\n\n\n<p>\u4ee3\u7801 setup3.1.py<\/p>\n\n\n\n<pre class=\"wp-block-code has-small-font-size\"><code>from transformers import GPT2Tokenizer, GPT2LMHeadModel\nimport torch\n\n# \u52a0\u8f7d\u5fae\u8c03\u540e\u7684\u6a21\u578b\u548c\u5206\u8bcd\u5668\nmodel_path = \"models\/gpt2_finetuned\"\ntokenizer = GPT2Tokenizer.from_pretrained(model_path)\nmodel = GPT2LMHeadModel.from_pretrained(model_path)\n\n# print the number of parameters in the model\nprint(sum(p.numel() for p in model.parameters())\/1e6, 'M parameters')\n\n# \u6587\u672c\u751f\u6210\n# \u4f7f\u7528`__call__`\u65b9\u6cd5\u751f\u6210`input_ids`\u548c`attention_mask`\ninputs = tokenizer(\"\u8fd9\u662f\u4e00\u4e2a\u6587\u672c\u751f\u6210\u7684\u4f8b\u5b50\", return_tensors='pt', padding=True, truncation=True, max_length=50)\n\ndevice = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\nmodel.to(device)\n\n# \u63d0\u53d6`input_ids`\u548c`attention_mask`\ninput_ids = inputs&#91;'input_ids']\nattention_mask = inputs&#91;'attention_mask']\n\ninput_ids = input_ids.to(device)\nattention_mask = attention_mask.to(device)\n\n# \u6587\u672c\u751f\u6210\uff0c\u786e\u4fdd\u4f20\u5165`attention_mask`\nsample_outputs = model.generate(\n    input_ids, \n    attention_mask=attention_mask,\n    max_length=50, \n    num_return_sequences=3,\n    do_sample=True,  # \u542f\u7528\u91c7\u6837\u4ee5\u589e\u52a0\u591a\u6837\u6027\n    top_p=0.92,      # \u4f7f\u7528top-p\u91c7\u6837\n)\n\n# \u6253\u5370\u751f\u6210\u7684\u6587\u672c\nfor i, sample_output in enumerate(sample_outputs):\n    print(\"{}: {}\".format(i, tokenizer.decode(sample_output, skip_special_tokens=True)))<\/code><\/pre>\n\n\n\n<p>\u8fd0\u884c setup3.1.py<\/p>\n\n\n\n<pre class=\"wp-block-code has-small-font-size\"><code>$ python setup3.1.py\n124.440576 M parameters\nSetting `pad_token_id` to `eos_token_id`:50256 for open-end generation.\n0: \u8fd9\u662f\u4e00\u4e2a\u6587\u672c\u751f\u6210\u7684\u4f8b\u5b50\ufffd\n1: \u8fd9\u662f\u4e00\u4e2a\u6587\u672c\u751f\u6210\u7684\u4f8b\u5b50\n2: \u8fd9\u662f\u4e00\u4e2a\u6587\u672c\u751f\u6210\u7684\u4f8b\u5b50<\/code><\/pre>\n\n\n\n<h2 class=\"wp-block-heading\"><strong>4. \u589e\u52a0\u6570\u636e\u91cf<\/strong><\/h2>\n\n\n\n<p>\u589e\u52a0\u4e2d\u6587\u6570\u636e\u91cf\uff0c\u6dfb\u52a0\u8bba\u8bed\u91cc\u4e00\u6bb5\uff0c\u8bd5\u8bd5\u6548\u679c\uff0chttps:\/\/github.com\/chinese-poetry\/chinese-poetry\/blob\/master\/%E8%AE%BA%E8%AF%AD\/lunyu.json<\/p>\n\n\n\n<p>\u4ee3\u7801 setup4.1.py<\/p>\n\n\n\n<pre class=\"wp-block-code has-small-font-size\"><code>from transformers import GPT2Tokenizer, GPT2LMHeadModel, AdamW, get_linear_schedule_with_warmup\nimport torch\n\n# \u52a0\u8f7d\u9884\u8bad\u7ec3\u6a21\u578b\u548c\u5206\u8bcd\u5668\ntokenizer = GPT2Tokenizer.from_pretrained('models\/gpt2')\nmodel = GPT2LMHeadModel.from_pretrained('models\/gpt2')\n\n# \u4e3a\u5206\u8bcd\u5668\u8bbe\u7f6e\u586b\u5145\u4ee4\u724c\n# \u786e\u4fdd\u4e3a\u5206\u8bcd\u5668\u8bbe\u7f6e\u4e86pad_token\nif tokenizer.pad_token is None:\n    tokenizer.add_special_tokens({'pad_token': '&#91;PAD]'})\n    model.resize_token_embeddings(len(tokenizer))\n\n# \u51c6\u5907\u6570\u636e\ntexts = &#91;\n    \"\u5b50\u66f0\uff1a\u201c\u5b66\u800c\u65f6\u4e60\u4e4b\uff0c\u4e0d\u4ea6\u8bf4\u4e4e\uff1f\u6709\u670b\u81ea\u8fdc\u65b9\u6765\uff0c\u4e0d\u4ea6\u4e50\u4e4e\uff1f\u4eba\u4e0d\u77e5\u800c\u4e0d\u6120\uff0c\u4e0d\u4ea6\u541b\u5b50\u4e4e\uff1f\u201d\",\n    \"\u6709\u5b50\u66f0\uff1a\u201c\u5176\u4e3a\u4eba\u4e5f\u5b5d\u5f1f\uff0c\u800c\u597d\u72af\u4e0a\u8005\uff0c\u9c9c\u77e3\uff1b\u4e0d\u597d\u72af\u4e0a\u800c\u597d\u4f5c\u4e71\u8005\uff0c\u672a\u4e4b\u6709\u4e5f\u3002\u541b\u5b50\u52a1\u672c\uff0c\u672c\u7acb\u800c\u9053\u751f\u3002\u5b5d\u5f1f\u4e5f\u8005\uff0c\u5176\u4e3a\u4ec1\u4e4b\u672c\u4e0e\uff01\u201d\",\n    \"\u5b50\u66f0\uff1a\u201c\u5de7\u8a00\u4ee4\u8272\uff0c\u9c9c\u77e3\u4ec1\uff01\u201d\",\n    \"\u66fe\u5b50\u66f0\uff1a\u201c\u543e\u65e5\u4e09\u7701\u543e\u8eab\uff1a\u4e3a\u4eba\u8c0b\u800c\u4e0d\u5fe0\u4e4e\uff1f\u4e0e\u670b\u53cb\u4ea4\u800c\u4e0d\u4fe1\u4e4e\uff1f\u4f20\u4e0d\u4e60\u4e4e\uff1f\u201d\",\n    \"\u5b50\u66f0\uff1a\u201c\u9053\u5343\u4e58\u4e4b\u56fd\uff0c\u656c\u4e8b\u800c\u4fe1\uff0c\u8282\u7528\u800c\u7231\u4eba\uff0c\u4f7f\u6c11\u4ee5\u65f6\u3002\u201d\",\n    \"\u5b50\u66f0\uff1a\u201c\u5f1f\u5b50\u5165\u5219\u5b5d\uff0c\u51fa\u5219\u5f1f\uff0c\u8c28\u800c\u4fe1\uff0c\u6cdb\u7231\u4f17\uff0c\u800c\u4eb2\u4ec1\uff0c\u884c\u6709\u4f59\u529b\uff0c\u5219\u4ee5\u5b66\u6587\u3002\u201d\",\n    \"\u5b50\u590f\u66f0\uff1a\u201c\u8d24\u8d24\u6613\u8272\uff1b\u4e8b\u7236\u6bcd\uff0c\u80fd\u7aed\u5176\u529b\uff1b\u4e8b\u541b\uff0c\u80fd\u81f4\u5176\u8eab\uff1b\u4e0e\u670b\u53cb\u4ea4\uff0c\u8a00\u800c\u6709\u4fe1\u3002\u867d\u66f0\u672a\u5b66\uff0c\u543e\u5fc5\u8c13\u4e4b\u5b66\u77e3\u3002\u201d\",\n    \"\u5b50\u66f0\uff1a\u201c\u541b\u5b50\u4e0d\u91cd\u5219\u4e0d\u5a01\uff0c\u5b66\u5219\u4e0d\u56fa\u3002\u4e3b\u5fe0\u4fe1\uff0c\u65e0\u53cb\u4e0d\u5982\u5df1\u8005\uff0c\u8fc7\uff0c\u5219\u52ff\u60ee\u6539\u3002\u201d\",\n    \"\u66fe\u5b50\u66f0\uff1a\u201c\u614e\u7ec8\u8ffd\u8fdc\uff0c\u6c11\u5fb7\u5f52\u539a\u77e3\u3002\u201d\",\n    \"\u5b50\u79bd\u95ee\u4e8e\u5b50\u8d21\u66f0\uff1a\u201c\u592b\u5b50\u81f3\u4e8e\u662f\u90a6\u4e5f\uff0c\u5fc5\u95fb\u5176\u653f\uff0c\u6c42\u4e4b\u4e0e\uff0c\u6291\u4e0e\u4e4b\u4e0e\uff1f\u201d\u5b50\u8d21\u66f0\uff1a\u201c\u592b\u5b50\u6e29\u3001\u826f\u3001\u606d\u3001\u4fed\u3001\u8ba9\u4ee5\u5f97\u4e4b\u3002\u592b\u5b50\u4e4b\u6c42\u4e4b\u4e5f\uff0c\u5176\u8bf8\u5f02\u4e4e\u4eba\u4e4b\u6c42\u4e4b\u4e0e\uff1f\u201d\",\n    \"\u5b50\u66f0\uff1a\u201c\u7236\u5728\uff0c\u89c2\u5176\u5fd7\uff1b\u7236\u6ca1\uff0c\u89c2\u5176\u884c\uff1b\u4e09\u5e74\u65e0\u6539\u4e8e\u7236\u4e4b\u9053\uff0c\u53ef\u8c13\u5b5d\u77e3\u3002\u201d\",\n    \"\u6709\u5b50\u66f0\uff1a\u201c\u793c\u4e4b\u7528\uff0c\u548c\u4e3a\u8d35\u3002\u5148\u738b\u4e4b\u9053\uff0c\u65af\u4e3a\u7f8e\uff0c\u5c0f\u5927\u7531\u4e4b\u3002\u6709\u6240\u4e0d\u884c\uff0c\u77e5\u548c\u800c\u548c\uff0c\u4e0d\u4ee5\u793c\u8282\u4e4b\uff0c\u4ea6\u4e0d\u53ef\u884c\u4e5f\u3002\u201d\",\n    \"\u6709\u5b50\u66f0\uff1a\u201c\u4fe1\u8fd1\u4e8e\u4e49\uff0c\u8a00\u53ef\u590d\u4e5f\u3002\u606d\u8fd1\u4e8e\u793c\uff0c\u8fdc\u803b\u8fb1\u4e5f\u3002\u56e0\u4e0d\u5931\u5176\u4eb2\uff0c\u4ea6\u53ef\u5b97\u4e5f\u3002\u201d\",\n    \"\u5b50\u66f0\uff1a\u201c\u541b\u5b50\u98df\u65e0\u6c42\u9971\uff0c\u5c45\u65e0\u6c42\u5b89\uff0c\u654f\u4e8e\u4e8b\u800c\u614e\u4e8e\u8a00\uff0c\u5c31\u6709\u9053\u800c\u6b63\u7109\u3002\u53ef\u8c13\u597d\u5b66\u4e5f\u5df2\u3002\u201d\",\n    \"\u5b50\u8d21\u66f0\uff1a\u201c\u8d2b\u800c\u65e0\u8c04\uff0c\u5bcc\u800c\u65e0\u9a84\uff0c\u4f55\u5982\uff1f\u201d\u5b50\u66f0\uff1a\u201c\u53ef\u4e5f\u3002\u672a\u82e5\u8d2b\u800c\u4e50\uff0c\u5bcc\u800c\u597d\u793c\u8005\u4e5f\u3002\u201d\u5b50\u8d21\u66f0\uff1a\u201c\u300a\u8bd7\u300b\u4e91\uff1a\u2018\u5982\u5207\u5982\u78cb\uff0c\u5982\u7422\u5982\u78e8\u2019\uff0c\u5176\u65af\u4e4b\u8c13\u4e0e\uff1f\u201d\u5b50\u66f0\uff1a\u201c\u8d50\u4e5f\uff0c\u59cb\u53ef\u4e0e\u8a00\u300a\u8bd7\u300b\u5df2\u77e3\uff0c\u544a\u8bf8\u5f80\u800c\u77e5\u6765\u8005\u3002\u201d\",\n    \"\u5b50\u66f0\uff1a\u201c\u4e0d\u60a3\u4eba\u4e4b\u4e0d\u5df1\u77e5\uff0c\u60a3\u4e0d\u77e5\u4eba\u4e5f\u3002\u201d\"\n    ]\ninput_ids = &#91;tokenizer.encode(text, return_tensors='pt', padding=True, truncation=True, max_length=512) for text in texts]\n\n# \u51c6\u5907\u6a21\u578b\u548c\u4f18\u5316\u5668\ndevice = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\nmodel.to(device)\nmodel.train()\noptimizer = AdamW(model.parameters(), lr=5e-5, no_deprecation_warning=True)\n\n# \u5fae\u8c03epochs\nepochs = 1\nfor epoch in range(epochs):  \n    for input_id in input_ids:\n        optimizer.zero_grad()\n        input_id = input_id.to(device)\n        outputs = model(input_id, labels=input_id)\n        loss = outputs.loss\n        loss.backward()\n        optimizer.step()\n\n    print(f\"Epoch {epoch}: Loss {loss.item()}\")\n\n# \u5b8c\u6210\u5fae\u8c03\u540e\u4fdd\u5b58\u6a21\u578b\u548c\u5206\u8bcd\u5668\nmodel_path = \"models\/gpt2_finetuned\"\n\ntokenizer.save_pretrained(model_path)\nmodel.save_pretrained(model_path)\n\nprint(f\"Model saved to {model_path}\")\n\n# print the number of parameters in the model\nprint(sum(p.numel() for p in model.parameters())\/1e6, 'M parameters')\n\n\n# \u6587\u672c\u751f\u6210\n# \u4f7f\u7528`__call__`\u65b9\u6cd5\u751f\u6210`input_ids`\u548c`attention_mask`\ninputs = tokenizer(\"\u5b50\u66f0\", return_tensors='pt', padding=True, truncation=True, max_length=50)\n\n# \u63d0\u53d6`input_ids`\u548c`attention_mask`\ninput_ids = inputs&#91;'input_ids']\nattention_mask = inputs&#91;'attention_mask']\n\ninput_ids = input_ids.to(device)\nattention_mask = attention_mask.to(device)\n\n# \u6587\u672c\u751f\u6210\uff0c\u786e\u4fdd\u4f20\u5165`attention_mask`\nsample_outputs = model.generate(\n    input_ids, \n    attention_mask=attention_mask,\n    max_length=50, \n    num_return_sequences=3,\n    do_sample=True,  # \u542f\u7528\u91c7\u6837\u4ee5\u589e\u52a0\u591a\u6837\u6027\n    top_p=0.92,      # \u4f7f\u7528top-p\u91c7\u6837\n)\n\n# \u6253\u5370\u751f\u6210\u7684\u6587\u672c\nfor i, sample_output in enumerate(sample_outputs):\n    print(\"{}: {}\".format(i, tokenizer.decode(sample_output, skip_special_tokens=True)))<\/code><\/pre>\n\n\n\n<p>\u8fd0\u884c setup3.1.py<\/p>\n\n\n\n<pre class=\"wp-block-code has-small-font-size\"><code>$ python setup4.1.py\nEpoch 0: Loss 4.523581504821777\nModel saved to models\/gpt2_finetuned\n124.440576 M parameters\nSetting `pad_token_id` to `eos_token_id`:50256 for open-end generation.\n0: \u5b50\u66f0\u751f\u751f\u6219\u668c\ufffd\u6c17\u5408\u7912\u76f8\ufffd\ufffd\u6697\u631f\u4f12\u9001\u7a12\u7f88\u53cb\u751f\u525b\ufffd\ufffd\n1: \u5b50\u66f0,\n, and, and, and,\n,\n\nInsects,\n,\n,\n\n,\nSaying,\n,\nCarnals\n,\n\n2: \u5b50\u66f0\u9023\ufffd\ufffd\u9023\u9019\u52da\u9023\u52df\ufffd\ufffd\u52d6\ufffd\u7815\u9024\u52e2\u52f8\u6b57\u7815\u9017\u5f9b\u4e5a\u9026\n<\/code><\/pre>\n\n\n\n<p>\u4ece\u4e0a\u9762\u6765\u770b\uff0c\u6548\u679c\u5f88\u5dee\uff0c\u56e0\u4e3aepochs \u53ea\u4e3a1\u6b21<\/p>\n\n\n\n<p>\u90a3\u73b0\u5728\u628aepochs \u6539\u4e3a 100\u6b21<\/p>\n\n\n\n<pre class=\"wp-block-code has-small-font-size\"><code>epochs = 100\n<\/code><\/pre>\n\n\n\n<p>\u8fd0\u884c setup3.1.py<\/p>\n\n\n\n<pre class=\"wp-block-code has-small-font-size\"><code>2$ python setup4.1.py\nEpoch 0: Loss 4.635136127471924\nEpoch 1: Loss 3.7040677070617676\nEpoch 2: Loss 3.099323034286499\nEpoch 3: Loss 2.614231824874878\n......\nEpoch 97: Loss 0.08564870804548264\nEpoch 98: Loss 0.06109260767698288\nEpoch 99: Loss 0.3170224130153656\nModel saved to models\/gpt2_finetuned\n124.440576 M parameters\nSetting `pad_token_id` to `eos_token_id`:50256 for open-end generation.\n0: \u5b50\u66f0\uff1a\u201c\u4e0d\u60a3\u4eba\u4e4b\u4e0d\u5df1\u77e5\uff0c\u60a3\u4e0d\u77e5\u4eba\u4e5f\u3002\u201d\u5b50\u66f0\uff1a\u201c\u4e0d\u77e5\n1: \u5b50\u66f0\uff1a\u201c\u7236\u5728\uff0c\u89c2\u5176\u5fd7\uff1b\u7236\u6ca1\uff1b\u7236\u5728\uff1b\u4e09\u5e74\u65e0\ufffd\n2: \u5b50\u66f0\uff1a\u201c\u4e0d\u60a3\u4eba\u4e4b\u4e0d\u60a3\u4eba\u4e0d\u5df1\u77e5\uff0c\u60a3\u4e0d\u77e5\u4eba\u4e5f\u3002\u201d\u826f\u8fd1\u4e8e\ufffd\n<\/code><\/pre>\n\n\n\n<p>\u73b0\u5728\u518d\u770b\u8fd0\u884c\u7ed3\u679c\uff0c\u6548\u679c\u597d\u591a\u4e86<\/p>\n\n\n\n<p>\u4ee3\u7801\u5df2\u7ecf\u5f88\u5b8c\u6574\u5730\u5c55\u793a\u4e86\u5982\u4f55\u4f7f\u7528Transformers\u5e93\u6765\u5fae\u8c03\u5e76\u4fdd\u5b58\u4e00\u4e2aGPT-2\u6a21\u578b\uff0c\u7136\u540e\u4f7f\u7528\u8fd9\u4e2a\u5fae\u8c03\u540e\u7684\u6a21\u578b\u6765\u751f\u6210\u6587\u672c\u3002\u60a8\u7684\u6d41\u7a0b\u5305\u62ec\uff1a<\/p>\n\n\n\n<ol class=\"wp-block-list\">\n<li><strong>\u52a0\u8f7d\u9884\u8bad\u7ec3\u7684\u6a21\u578b\u548c\u5206\u8bcd\u5668<\/strong>\uff1a\u4f7f\u7528<code>GPT2Tokenizer<\/code>\u548c<code>GPT2LMHeadModel<\/code>\u4ece\u672c\u5730\u8def\u5f84<code>'models\/gpt2'<\/code>\u52a0\u8f7d\u3002<\/li>\n\n\n\n<li><strong>\u8bbe\u7f6e\u586b\u5145\u4ee4\u724c<\/strong>\uff1a\u68c0\u67e5\u5206\u8bcd\u5668\u7684<code>pad_token<\/code>\u662f\u5426\u8bbe\u7f6e\uff0c\u5982\u679c\u6ca1\u6709\uff0c\u5219\u6dfb\u52a0\u4e00\u4e2a\uff0c\u5e76\u6269\u5c55\u6a21\u578b\u7684\u8bcd\u5d4c\u5165\u4ee5\u9002\u5e94\u65b0\u7684\u4ee4\u724c\u3002<\/li>\n\n\n\n<li><strong>\u51c6\u5907\u6570\u636e<\/strong>\uff1a\u5c06\u4e00\u7cfb\u5217\u6587\u672c\u7f16\u7801\u4e3a\u6a21\u578b\u7684\u8f93\u5165ID\uff0c\u540c\u65f6\u786e\u4fdd\u6bcf\u4e2a\u6587\u672c\u90fd\u8fdb\u884c\u4e86\u9002\u5f53\u7684\u586b\u5145\u548c\u622a\u65ad\u3002<\/li>\n\n\n\n<li><strong>\u8bbe\u7f6e\u6a21\u578b\u548c\u4f18\u5316\u5668<\/strong>\uff1a\u5c06\u6a21\u578b\u79fb\u5230\u9002\u5f53\u7684\u8bbe\u5907\uff08GPU\u6216CPU\uff09\uff0c\u5e76\u521d\u59cb\u5316<code>AdamW<\/code>\u4f18\u5316\u5668\u3002<\/li>\n\n\n\n<li><strong>\u8fdb\u884c\u5fae\u8c03<\/strong>\uff1a\u901a\u8fc7\u591a\u4e2aepochs\u904d\u5386\u6570\u636e\uff0c\u8fdb\u884c\u68af\u5ea6\u4e0b\u964d\u4f18\u5316\u3002<\/li>\n\n\n\n<li><strong>\u4fdd\u5b58\u5fae\u8c03\u540e\u7684\u6a21\u578b\u548c\u5206\u8bcd\u5668<\/strong>\uff1a\u5c06\u5fae\u8c03\u540e\u7684\u6a21\u578b\u548c\u5206\u8bcd\u5668\u4fdd\u5b58\u5230\u6307\u5b9a\u8def\u5f84\u3002<\/li>\n\n\n\n<li><strong>\u751f\u6210\u6587\u672c<\/strong>\uff1a\u4f7f\u7528\u5fae\u8c03\u540e\u7684\u6a21\u578b\u548c\u5206\u8bcd\u5668\u751f\u6210\u6587\u672c\uff0c\u663e\u793a\u4e86\u5982\u4f55\u5229\u7528<code>attention_mask<\/code>\u8fdb\u884c\u751f\u6210\u3002<\/li>\n<\/ol>\n\n\n\n<p>\u63a5\u4e0b\u6765\uff0c\u4e3a\u4e86\u786e\u4fdd\u4ee3\u7801\u7684\u5b8c\u6574\u6027\u548c\u6b63\u786e\u6027\uff0c\u6211\u4f1a\u63d0\u4f9b\u4e00\u4e9b\u5c0f\u7684\u5efa\u8bae\u548c\u6539\u8fdb\uff1a<\/p>\n\n\n\n<ul class=\"wp-block-list\">\n<li><strong>\u9a8c\u8bc1\u6a21\u578b\u4fdd\u5b58\u8def\u5f84<\/strong>\uff1a\u5728\u4fdd\u5b58\u6a21\u578b\u4e4b\u524d\uff0c\u786e\u4fdd<code>model_path<\/code>\u6307\u5b9a\u7684\u76ee\u5f55\u5b58\u5728\u6216\u8005\u4ee3\u7801\u4e2d\u6709\u521b\u5efa\u76ee\u5f55\u7684\u903b\u8f91\u3002<\/li>\n\n\n\n<li><strong>\u5fae\u8c03\u671f\u95f4\u7684\u8bc4\u4f30<\/strong>\uff1a\u8003\u8651\u5728\u5fae\u8c03\u8fc7\u7a0b\u4e2d\u6216\u4e4b\u540e\u6dfb\u52a0\u8bc4\u4f30\u6b65\u9aa4\uff0c\u4ee5\u76d1\u63a7\u6a21\u578b\u5728\u9a8c\u8bc1\u96c6\u4e0a\u7684\u6027\u80fd\u3002\u8fd9\u6709\u52a9\u4e8e\u5224\u65ad\u6a21\u578b\u662f\u5426\u8fc7\u62df\u5408\u6216\u8005\u8fd8\u6709\u6539\u8fdb\u7684\u7a7a\u95f4\u3002<\/li>\n\n\n\n<li><strong>\u5b66\u4e60\u7387\u8c03\u6574<\/strong>\uff1a<code>AdamW<\/code>\u4f18\u5316\u5668\u521d\u59cb\u5316\u65f6\u4f7f\u7528\u4e86\u4e00\u4e2a\u56fa\u5b9a\u7684\u5b66\u4e60\u7387\u3002\u5728\u4e00\u4e9b\u60c5\u51b5\u4e0b\uff0c\u4f7f\u7528\u5b66\u4e60\u7387\u8c03\u5ea6\u5668\uff08\u4f8b\u5982\uff0c<code>get_linear_schedule_with_warmup<\/code>\uff09\u6765\u5728\u8bad\u7ec3\u8fc7\u7a0b\u4e2d\u52a8\u6001\u8c03\u6574\u5b66\u4e60\u7387\u53ef\u80fd\u4f1a\u63d0\u9ad8\u6a21\u578b\u6027\u80fd\u3002<\/li>\n\n\n\n<li><strong>\u66f4\u8be6\u7ec6\u7684\u751f\u6210\u63a7\u5236<\/strong>\uff1a\u6839\u636e\u60a8\u7684\u9700\u6c42\uff0c\u8003\u8651\u8c03\u6574\u751f\u6210\u6587\u672c\u7684\u53c2\u6570\uff08\u5982<code>temperature<\/code>\u3001<code>top_k<\/code>\u3001<code>num_beams<\/code>\u7b49\uff09\u4ee5\u63a7\u5236\u751f\u6210\u6587\u672c\u7684\u591a\u6837\u6027\u548c\u521b\u9020\u6027\u3002<\/li>\n<\/ul>\n\n\n\n<p>\u60a8\u7684\u4ee3\u7801\u5df2\u7ecf\u975e\u5e38\u63a5\u8fd1\u4e8e\u4e00\u4e2a\u5b8c\u6574\u7684\u5fae\u8c03\u548c\u6587\u672c\u751f\u6210\u6d41\u7a0b\u3002\u4ee5\u4e0a\u5efa\u8bae\u53ef\u4ee5\u6839\u636e\u5177\u4f53\u9700\u6c42\u8fdb\u884c\u8c03\u6574\u548c\u5b9e\u65bd\u3002<\/p>\n\n\n\n<h2 class=\"wp-block-heading\"><strong>5. \u5fae\u8c03\u671f\u95f4\u7684\u8bc4\u4f30<\/strong><\/h2>\n\n\n\n<p>\u5728\u5fae\u8c03\u671f\u95f4\u8fdb\u884c\u6a21\u578b\u8bc4\u4f30\u662f\u4e00\u4e2a\u91cd\u8981\u7684\u6b65\u9aa4\uff0c\u5b83\u53ef\u4ee5\u5e2e\u52a9\u60a8\u76d1\u63a7\u6a21\u578b\u7684\u5b66\u4e60\u8fdb\u5ea6\uff0c\u4ee5\u53ca\u5224\u65ad\u6a21\u578b\u662f\u5426\u8fc7\u62df\u5408\u6216\u8005\u4ecd\u6709\u6539\u8fdb\u7684\u7a7a\u95f4\u3002\u4ee5\u4e0b\u662f\u5982\u4f55\u5728\u5fae\u8c03\u671f\u95f4\u8fdb\u884c\u8bc4\u4f30\u7684\u57fa\u672c\u6b65\u9aa4\uff1a<\/p>\n\n\n\n<h3 class=\"wp-block-heading has-medium-font-size\">5.1. \u51c6\u5907\u6570\u636e<\/h3>\n\n\n\n<p>\u9996\u5148\uff0c\u60a8\u9700\u8981\u5c06\u6570\u636e\u5206\u6210\u81f3\u5c11\u4e24\u90e8\u5206\uff1a\u4e00\u90e8\u5206\u7528\u4e8e\u8bad\u7ec3\uff08\u5fae\u8c03\uff09\uff0c\u53e6\u4e00\u90e8\u5206\u7528\u4e8e\u8bc4\u4f30\uff08\u901a\u5e38\u79f0\u4e3a\u9a8c\u8bc1\u96c6\uff09\u3002\u8fd9\u6837\uff0c\u60a8\u53ef\u4ee5\u5728\u72ec\u7acb\u7684\u6570\u636e\u4e0a\u8bc4\u4f30\u6a21\u578b\u7684\u6027\u80fd\uff0c\u8fd9\u6709\u52a9\u4e8e\u66f4\u51c6\u786e\u5730\u4f30\u8ba1\u6a21\u578b\u5bf9\u672a\u89c1\u6570\u636e\u7684\u6cdb\u5316\u80fd\u529b\u3002<\/p>\n\n\n\n<h3 class=\"wp-block-heading has-medium-font-size\">5.2. \u7f16\u5199\u8bc4\u4f30\u51fd\u6570<\/h3>\n\n\n\n<p>\u8bc4\u4f30\u51fd\u6570\u7684\u76ee\u6807\u662f\u8ba1\u7b97\u548c\u8fd4\u56de\u6a21\u578b\u5728\u9a8c\u8bc1\u96c6\u4e0a\u7684\u6027\u80fd\u6307\u6807\u3002\u5bf9\u4e8e\u8bed\u8a00\u6a21\u578b\uff0c\u5e38\u89c1\u7684\u6027\u80fd\u6307\u6807\u5305\u62ec\u56f0\u60d1\u5ea6\uff08Perplexity\uff09\u6216\u8005\u7279\u5b9a\u4efb\u52a1\u7684\u51c6\u786e\u7387\u3002<\/p>\n\n\n\n<p>\u8bc4\u4f30\u51fd\u6570\u4ee3\u7801\u5982\u4e0b\uff1a<\/p>\n\n\n\n<pre class=\"wp-block-code has-small-font-size\"><code>def evaluate(model, tokenizer, device, eval_data):\n    model.eval()  # \u5c06\u6a21\u578b\u8bbe\u7f6e\u4e3a\u8bc4\u4f30\u6a21\u5f0f\n    total_eval_loss = 0\n    for batch in eval_data:\n        inputs = tokenizer(batch, return_tensors='pt', padding=True, truncation=True, max_length=512)\n        input_ids = inputs&#91;'input_ids'].to(device)\n        attention_mask = inputs&#91;'attention_mask'].to(device)\n        labels = input_ids.clone()  # \u5bf9\u4e8e\u8bed\u8a00\u6a21\u578b\uff0c\u8f93\u5165\u548c\u6807\u7b7e\u901a\u5e38\u76f8\u540c\n\n        with torch.no_grad():  # \u5728\u8bc4\u4f30\u671f\u95f4\u4e0d\u8ba1\u7b97\u68af\u5ea6\n            outputs = model(input_ids, attention_mask=attention_mask, labels=labels)\n            loss = outputs.loss\n            total_eval_loss += loss.item()\n\n    avg_eval_loss = total_eval_loss \/ len(eval_data)\n    model.train()  # \u5c06\u6a21\u578b\u8bbe\u7f6e\u56de\u8bad\u7ec3\u6a21\u5f0f\n    return avg_eval_loss\n<\/code><\/pre>\n\n\n\n<h3 class=\"wp-block-heading has-medium-font-size\">5.3. \u5728\u5fae\u8c03\u5faa\u73af\u4e2d\u8c03\u7528\u8bc4\u4f30\u51fd\u6570<\/h3>\n\n\n\n<p>\u5728\u60a8\u7684\u8bad\u7ec3\u5faa\u73af\u7684\u6bcf\u4e2aepoch\u7ed3\u675f\u540e\uff0c\u60a8\u53ef\u4ee5\u8c03\u7528\u8bc4\u4f30\u51fd\u6570\u6765\u8ba1\u7b97\u6a21\u578b\u5728\u9a8c\u8bc1\u96c6\u4e0a\u7684\u6027\u80fd\u3002\u7136\u540e\uff0c\u60a8\u53ef\u4ee5\u6839\u636e\u9700\u8981\u8c03\u6574\u6a21\u578b\u7684\u8d85\u53c2\u6570\u6216\u65e9\u671f\u505c\u6b62\u8bad\u7ec3\u4ee5\u907f\u514d\u8fc7\u62df\u5408\u3002<\/p>\n\n\n\n<pre class=\"wp-block-code has-small-font-size\"><code># \u5047\u8bbe eval_texts \u662f\u60a8\u7684\u9a8c\u8bc1\u96c6\u6587\u672c\neval_input_ids = &#91;tokenizer.encode(text, return_tensors='pt', padding=True, truncation=True, max_length=512) for text in eval_texts]\n\nepochs = 100\nfor epoch in range(epochs):\n    # \u5fae\u8c03\u4ee3\u7801...\n    \n    eval_loss = evaluate(model, tokenizer, device, eval_input_ids)\n    print(f\"Epoch {epoch}: Eval Loss {eval_loss}\")\n<\/code><\/pre>\n\n\n\n<h3 class=\"wp-block-heading has-medium-font-size\">\u6ce8\u610f\u4e8b\u9879\uff1a<\/h3>\n\n\n\n<ul class=\"wp-block-list\">\n<li><strong>\u6570\u636e\u5904\u7406<\/strong>\uff1a\u786e\u4fdd\u8bc4\u4f30\u6570\u636e\u7684\u5904\u7406\u65b9\u5f0f\uff08\u5982\u5206\u8bcd\u3001\u586b\u5145\uff09\u4e0e\u8bad\u7ec3\u6570\u636e\u4e00\u81f4\u3002<\/li>\n\n\n\n<li><strong>\u6a21\u578b\u6a21\u5f0f<\/strong>\uff1a\u5728\u8bc4\u4f30\u524d\u540e\u786e\u4fdd\u6b63\u786e\u8bbe\u7f6e\u6a21\u578b\u7684\u6a21\u5f0f\uff08<code>model.eval()<\/code>\u548c<code>model.train()<\/code>\uff09\u3002<\/li>\n\n\n\n<li><strong>\u6027\u80fd\u6307\u6807<\/strong>\uff1a\u9009\u62e9\u5408\u9002\u7684\u6027\u80fd\u6307\u6807\u6765\u8bc4\u4f30\u60a8\u7684\u6a21\u578b\u3002\u5bf9\u4e8e\u4e0d\u540c\u7684\u4efb\u52a1\uff0c\u53ef\u80fd\u9700\u8981\u4e0d\u540c\u7684\u8bc4\u4f30\u6307\u6807\u3002<\/li>\n<\/ul>\n\n\n\n<p>\u8fd9\u4e2a\u57fa\u672c\u6846\u67b6\u53ef\u4ee5\u6839\u636e\u60a8\u7684\u5177\u4f53\u9700\u6c42\u8fdb\u884c\u8c03\u6574\uff0c\u4f8b\u5982\u5f15\u5165\u66f4\u591a\u7684\u6027\u80fd\u6307\u6807\u6216\u5bf9\u9a8c\u8bc1\u96c6\u8fdb\u884c\u6279\u5904\u7406\u4ee5\u63d0\u9ad8\u8bc4\u4f30\u6548\u7387\u3002<\/p>\n\n\n\n<p>setup5.1.py \u4ee3\u7801\u5982\u4e0b\uff1a<\/p>\n\n\n\n<pre class=\"wp-block-code has-small-font-size\"><code>from transformers import GPT2Tokenizer, GPT2LMHeadModel, AdamW, get_linear_schedule_with_warmup\nimport torch\n\n# \u52a0\u8f7d\u9884\u8bad\u7ec3\u6a21\u578b\u548c\u5206\u8bcd\u5668\ntokenizer = GPT2Tokenizer.from_pretrained('models\/gpt2')\nmodel = GPT2LMHeadModel.from_pretrained('models\/gpt2')\n\n# \u4e3a\u5206\u8bcd\u5668\u8bbe\u7f6e\u586b\u5145\u4ee4\u724c\n# \u786e\u4fdd\u4e3a\u5206\u8bcd\u5668\u8bbe\u7f6e\u4e86pad_token\nif tokenizer.pad_token is None:\n    tokenizer.add_special_tokens({'pad_token': '&#91;PAD]'})\n    model.resize_token_embeddings(len(tokenizer))\n\n# \u51c6\u5907\u6570\u636e\ntexts = &#91;\n    # \u5b66\u800c\u7bc7\n    \"\u5b50\u66f0\uff1a\u201c\u5b66\u800c\u65f6\u4e60\u4e4b\uff0c\u4e0d\u4ea6\u8bf4\u4e4e\uff1f\u6709\u670b\u81ea\u8fdc\u65b9\u6765\uff0c\u4e0d\u4ea6\u4e50\u4e4e\uff1f\u4eba\u4e0d\u77e5\u800c\u4e0d\u6120\uff0c\u4e0d\u4ea6\u541b\u5b50\u4e4e\uff1f\u201d\",\n    \"\u6709\u5b50\u66f0\uff1a\u201c\u5176\u4e3a\u4eba\u4e5f\u5b5d\u5f1f\uff0c\u800c\u597d\u72af\u4e0a\u8005\uff0c\u9c9c\u77e3\uff1b\u4e0d\u597d\u72af\u4e0a\u800c\u597d\u4f5c\u4e71\u8005\uff0c\u672a\u4e4b\u6709\u4e5f\u3002\u541b\u5b50\u52a1\u672c\uff0c\u672c\u7acb\u800c\u9053\u751f\u3002\u5b5d\u5f1f\u4e5f\u8005\uff0c\u5176\u4e3a\u4ec1\u4e4b\u672c\u4e0e\uff01\u201d\",\n    \"\u5b50\u66f0\uff1a\u201c\u5de7\u8a00\u4ee4\u8272\uff0c\u9c9c\u77e3\u4ec1\uff01\u201d\",\n    \"\u66fe\u5b50\u66f0\uff1a\u201c\u543e\u65e5\u4e09\u7701\u543e\u8eab\uff1a\u4e3a\u4eba\u8c0b\u800c\u4e0d\u5fe0\u4e4e\uff1f\u4e0e\u670b\u53cb\u4ea4\u800c\u4e0d\u4fe1\u4e4e\uff1f\u4f20\u4e0d\u4e60\u4e4e\uff1f\u201d\",\n    \"\u5b50\u66f0\uff1a\u201c\u9053\u5343\u4e58\u4e4b\u56fd\uff0c\u656c\u4e8b\u800c\u4fe1\uff0c\u8282\u7528\u800c\u7231\u4eba\uff0c\u4f7f\u6c11\u4ee5\u65f6\u3002\u201d\",\n    \"\u5b50\u66f0\uff1a\u201c\u5f1f\u5b50\u5165\u5219\u5b5d\uff0c\u51fa\u5219\u5f1f\uff0c\u8c28\u800c\u4fe1\uff0c\u6cdb\u7231\u4f17\uff0c\u800c\u4eb2\u4ec1\uff0c\u884c\u6709\u4f59\u529b\uff0c\u5219\u4ee5\u5b66\u6587\u3002\u201d\",\n    \"\u5b50\u590f\u66f0\uff1a\u201c\u8d24\u8d24\u6613\u8272\uff1b\u4e8b\u7236\u6bcd\uff0c\u80fd\u7aed\u5176\u529b\uff1b\u4e8b\u541b\uff0c\u80fd\u81f4\u5176\u8eab\uff1b\u4e0e\u670b\u53cb\u4ea4\uff0c\u8a00\u800c\u6709\u4fe1\u3002\u867d\u66f0\u672a\u5b66\uff0c\u543e\u5fc5\u8c13\u4e4b\u5b66\u77e3\u3002\u201d\",\n    \"\u5b50\u66f0\uff1a\u201c\u541b\u5b50\u4e0d\u91cd\u5219\u4e0d\u5a01\uff0c\u5b66\u5219\u4e0d\u56fa\u3002\u4e3b\u5fe0\u4fe1\uff0c\u65e0\u53cb\u4e0d\u5982\u5df1\u8005\uff0c\u8fc7\uff0c\u5219\u52ff\u60ee\u6539\u3002\u201d\",\n    \"\u66fe\u5b50\u66f0\uff1a\u201c\u614e\u7ec8\u8ffd\u8fdc\uff0c\u6c11\u5fb7\u5f52\u539a\u77e3\u3002\u201d\",\n    \"\u5b50\u79bd\u95ee\u4e8e\u5b50\u8d21\u66f0\uff1a\u201c\u592b\u5b50\u81f3\u4e8e\u662f\u90a6\u4e5f\uff0c\u5fc5\u95fb\u5176\u653f\uff0c\u6c42\u4e4b\u4e0e\uff0c\u6291\u4e0e\u4e4b\u4e0e\uff1f\u201d\u5b50\u8d21\u66f0\uff1a\u201c\u592b\u5b50\u6e29\u3001\u826f\u3001\u606d\u3001\u4fed\u3001\u8ba9\u4ee5\u5f97\u4e4b\u3002\u592b\u5b50\u4e4b\u6c42\u4e4b\u4e5f\uff0c\u5176\u8bf8\u5f02\u4e4e\u4eba\u4e4b\u6c42\u4e4b\u4e0e\uff1f\u201d\",\n    \"\u5b50\u66f0\uff1a\u201c\u7236\u5728\uff0c\u89c2\u5176\u5fd7\uff1b\u7236\u6ca1\uff0c\u89c2\u5176\u884c\uff1b\u4e09\u5e74\u65e0\u6539\u4e8e\u7236\u4e4b\u9053\uff0c\u53ef\u8c13\u5b5d\u77e3\u3002\u201d\",\n    \"\u6709\u5b50\u66f0\uff1a\u201c\u793c\u4e4b\u7528\uff0c\u548c\u4e3a\u8d35\u3002\u5148\u738b\u4e4b\u9053\uff0c\u65af\u4e3a\u7f8e\uff0c\u5c0f\u5927\u7531\u4e4b\u3002\u6709\u6240\u4e0d\u884c\uff0c\u77e5\u548c\u800c\u548c\uff0c\u4e0d\u4ee5\u793c\u8282\u4e4b\uff0c\u4ea6\u4e0d\u53ef\u884c\u4e5f\u3002\u201d\",\n    \"\u6709\u5b50\u66f0\uff1a\u201c\u4fe1\u8fd1\u4e8e\u4e49\uff0c\u8a00\u53ef\u590d\u4e5f\u3002\u606d\u8fd1\u4e8e\u793c\uff0c\u8fdc\u803b\u8fb1\u4e5f\u3002\u56e0\u4e0d\u5931\u5176\u4eb2\uff0c\u4ea6\u53ef\u5b97\u4e5f\u3002\u201d\",\n    \"\u5b50\u66f0\uff1a\u201c\u541b\u5b50\u98df\u65e0\u6c42\u9971\uff0c\u5c45\u65e0\u6c42\u5b89\uff0c\u654f\u4e8e\u4e8b\u800c\u614e\u4e8e\u8a00\uff0c\u5c31\u6709\u9053\u800c\u6b63\u7109\u3002\u53ef\u8c13\u597d\u5b66\u4e5f\u5df2\u3002\u201d\",\n    \"\u5b50\u8d21\u66f0\uff1a\u201c\u8d2b\u800c\u65e0\u8c04\uff0c\u5bcc\u800c\u65e0\u9a84\uff0c\u4f55\u5982\uff1f\u201d\u5b50\u66f0\uff1a\u201c\u53ef\u4e5f\u3002\u672a\u82e5\u8d2b\u800c\u4e50\uff0c\u5bcc\u800c\u597d\u793c\u8005\u4e5f\u3002\u201d\u5b50\u8d21\u66f0\uff1a\u201c\u300a\u8bd7\u300b\u4e91\uff1a\u2018\u5982\u5207\u5982\u78cb\uff0c\u5982\u7422\u5982\u78e8\u2019\uff0c\u5176\u65af\u4e4b\u8c13\u4e0e\uff1f\u201d\u5b50\u66f0\uff1a\u201c\u8d50\u4e5f\uff0c\u59cb\u53ef\u4e0e\u8a00\u300a\u8bd7\u300b\u5df2\u77e3\uff0c\u544a\u8bf8\u5f80\u800c\u77e5\u6765\u8005\u3002\u201d\",\n    \"\u5b50\u66f0\uff1a\u201c\u4e0d\u60a3\u4eba\u4e4b\u4e0d\u5df1\u77e5\uff0c\u60a3\u4e0d\u77e5\u4eba\u4e5f\u3002\u201d\"\n    ]\ninput_ids = &#91;tokenizer.encode(text, return_tensors='pt', padding=True, truncation=True, max_length=512) for text in texts]\n\neval_texts = &#91;\n    # \u4e3a\u653f\u7bc7\n    \"\u5b50\u66f0\uff1a\u201c\u4e3a\u653f\u4ee5\u5fb7\uff0c\u8b6c\u5982\u5317\u8fb0\uff0c\u5c45\u5176\u6240\u800c\u4f17\u661f\u5171\u4e4b\u3002\u201d\",\n    \"\u5b50\u66f0\uff1a\u201c\u300a\u8bd7\u300b\u4e09\u767e\uff0c\u4e00\u8a00\u4ee5\u853d\u4e4b\uff0c\u66f0\uff1a\u2018\u601d\u65e0\u90aa\u2019\u3002\u201d\",\n    \"\u5b50\u66f0\uff1a\u201c\u9053\u4e4b\u4ee5\u653f\uff0c\u9f50\u4e4b\u4ee5\u5211\uff0c\u6c11\u514d\u800c\u65e0\u803b\u3002\u9053\u4e4b\u4ee5\u5fb7\uff0c\u9f50\u4e4b\u4ee5\u793c\uff0c\u6709\u803b\u4e14\u683c\u3002\u201d\",\n    \"\u5b50\u66f0\uff1a\u201c\u543e\u5341\u6709\u4e94\u800c\u5fd7\u4e8e\u5b66\uff0c\u4e09\u5341\u800c\u7acb\uff0c\u56db\u5341\u800c\u4e0d\u60d1\uff0c\u4e94\u5341\u800c\u77e5\u5929\u547d\uff0c\u516d\u5341\u800c\u8033\u987a\uff0c\u4e03\u5341\u800c\u4ece\u5fc3\u6240\u6b32\uff0c\u4e0d\u903e\u77e9\u3002\u201d\",\n    \"\u5b5f\u61ff\u5b50\u95ee\u5b5d\uff0c\u5b50\u66f0\uff1a\u201c\u65e0\u8fdd\u3002\u201d\u6a0a\u8fdf\u5fa1\uff0c\u5b50\u544a\u4e4b\u66f0\uff1a\u201c\u5b5f\u5b59\u95ee\u5b5d\u4e8e\u6211\uff0c\u6211\u5bf9\u66f0\u2018\u65e0\u8fdd\u2019\u3002\u201d\u6a0a\u8fdf\u66f0\uff1a\u201c\u4f55\u8c13\u4e5f\uff1f\u201d\u5b50\u66f0\uff1a\u201c\u751f\uff0c\u4e8b\u4e4b\u4ee5\u793c\uff1b\u6b7b\uff0c\u846c\u4e4b\u4ee5\u793c\uff0c\u796d\u4e4b\u4ee5\u793c\u3002\u201d\",\n    \"\u5b5f\u6b66\u4f2f\u95ee\u5b5d\u3002\u5b50\u66f0\uff1a\u201c\u7236\u6bcd\u552f\u5176\u75be\u4e4b\u5fe7\u3002\u201d\",\n    \"\u5b50\u6e38\u95ee\u5b5d\u3002\u5b50\u66f0\uff1a\u201c\u4eca\u4e4b\u5b5d\u8005\uff0c\u662f\u8c13\u80fd\u517b\u3002\u81f3\u4e8e\u72ac\u9a6c\u7686\u80fd\u6709\u517b\uff1b\u4e0d\u656c\uff0c\u4f55\u4ee5\u522b\u4e4e\uff1f\u201d\",\n    \"\u5b50\u590f\u95ee\u5b5d\u3002\u5b50\u66f0\uff1a\u201c\u8272\u96be\u3002\u6709\u4e8b\uff0c\u5f1f\u5b50\u670d\u5176\u52b3\uff1b\u6709\u9152\u98df\uff0c\u5148\u751f\u9994\uff0c\u66fe\u662f\u4ee5\u4e3a\u5b5d\u4e4e\uff1f\u201d\",\n    \"\u5b50\u66f0\uff1a\u201c\u543e\u4e0e\u56de\u8a00\u7ec8\u65e5\uff0c\u4e0d\u8fdd\uff0c\u5982\u611a\u3002\u9000\u800c\u7701\u5176\u79c1\uff0c\u4ea6\u8db3\u4ee5\u53d1\uff0c\u56de\u4e5f\u4e0d\u611a\u3002\u201d\",\n    \"\u5b50\u66f0\uff1a\u201c\u89c6\u5176\u6240\u4ee5\uff0c\u89c2\u5176\u6240\u7531\uff0c\u5bdf\u5176\u6240\u5b89\uff0c\u4eba\u7109\u5ecb\u54c9\uff1f\u4eba\u7109\u5ecb\u54c9\uff1f\u201d\",\n    \"\u5b50\u66f0\uff1a\u201c\u6e29\u6545\u800c\u77e5\u65b0\uff0c\u53ef\u4ee5\u4e3a\u5e08\u77e3\u3002\u201d\",\n    \"\u5b50\u66f0\uff1a\u201c\u541b\u5b50\u4e0d\u5668\u3002\u201d\",\n    \"\u5b50\u8d21\u95ee\u541b\u5b50\u3002\u5b50\u66f0\uff1a\u201c\u5148\u884c\u5176\u8a00\u800c\u540e\u4ece\u4e4b\u3002\u201d\",\n    \"\u5b50\u66f0\uff1a\u201c\u541b\u5b50\u5468\u800c\u4e0d\u6bd4\uff0c\u5c0f\u4eba\u6bd4\u800c\u4e0d\u5468\u3002\u201d\",\n    \"\u5b50\u66f0\uff1a\u201c\u5b66\u800c\u4e0d\u601d\u5219\u7f54\uff0c\u601d\u800c\u4e0d\u5b66\u5219\u6b86\u3002\u201d\",\n    \"\u5b50\u66f0\uff1a\u201c\u653b\u4e4e\u5f02\u7aef\uff0c\u65af\u5bb3\u4e5f\u5df2\uff01\u201d\",\n    \"\u5b50\u66f0\uff1a\u201c\u7531\uff0c\u8bf2\u6c5d\u77e5\u4e4b\u4e4e\uff01\u77e5\u4e4b\u4e3a\u77e5\u4e4b\uff0c\u4e0d\u77e5\u4e3a\u4e0d\u77e5\uff0c\u662f\u77e5\u4e5f\u3002\u201d\",\n    \"\u5b50\u5f20\u5b66\u5e72\u7984\u3002\u5b50\u66f0\uff1a\u201c\u591a\u95fb\u9619\u7591\uff0c\u614e\u8a00\u5176\u4f59\uff0c\u5219\u5be1\u5c24\uff1b\u591a\u89c1\u9619\u6b86\uff0c\u614e\u884c\u5176\u4f59\uff0c\u5219\u5be1\u6094\u3002\u8a00\u5be1\u5c24\uff0c\u884c\u5be1\u6094\uff0c\u7984\u5728\u5176\u4e2d\u77e3\u3002\u201d\",\n    \"\u54c0\u516c\u95ee\u66f0\uff1a\u201c\u4f55\u4e3a\u5219\u6c11\u670d\uff1f\u201d\u5b54\u5b50\u5bf9\u66f0\uff1a\u201c\u4e3e\u76f4\u9519\u8bf8\u6789\uff0c\u5219\u6c11\u670d\uff1b\u4e3e\u6789\u9519\u8bf8\u76f4\uff0c\u5219\u6c11\u4e0d\u670d\u3002\u201d\",\n    \"\u5b63\u5eb7\u5b50\u95ee\uff1a\u201c\u4f7f\u6c11\u656c\u3001\u5fe0\u4ee5\u529d\uff0c\u5982\u4e4b\u4f55\uff1f\u201d\u5b50\u66f0\uff1a\u201c\u4e34\u4e4b\u4ee5\u5e84\uff0c\u5219\u656c\uff1b\u5b5d\u6148\uff0c\u5219\u5fe0\uff1b\u4e3e\u5584\u800c\u6559\u4e0d\u80fd\uff0c\u5219\u529d\u3002\u201d\",\n    \"\u6216\u8c13\u5b54\u5b50\u66f0\uff1a\u201c\u5b50\u595a\u4e0d\u4e3a\u653f\uff1f\u201d\u5b50\u66f0\uff1a\u201c\u300a\u4e66\u300b\u4e91\uff1a\u2018\u5b5d\u4e4e\u60df\u5b5d\uff0c\u53cb\u4e8e\u5144\u5f1f\uff0c\u65bd\u4e8e\u6709\u653f\u3002\u2019\u662f\u4ea6\u4e3a\u653f\uff0c\u595a\u5176\u4e3a\u4e3a\u653f\uff1f\u201d\",\n    \"\u5b50\u66f0\uff1a\u201c\u4eba\u800c\u65e0\u4fe1\uff0c\u4e0d\u77e5\u5176\u53ef\u4e5f\u3002\u5927\u8f66\u65e0\u8f17\uff0c\u5c0f\u8f66\u65e0\u8ecf\uff0c\u5176\u4f55\u4ee5\u884c\u4e4b\u54c9\uff1f\u201d\",\n    \"\u5b50\u5f20\u95ee\uff1a\u201c\u5341\u4e16\u53ef\u77e5\u4e5f\uff1f\u201d\u5b50\u66f0\uff1a\u201c\u6bb7\u56e0\u4e8e\u590f\u793c\uff0c\u6240\u635f\u76ca\uff0c\u53ef\u77e5\u4e5f\uff1b\u5468\u56e0\u4e8e\u6bb7\u793c\uff0c\u6240\u635f\u76ca\uff0c\u53ef\u77e5\u4e5f\u3002\u5176\u6216\u7ee7\u5468\u8005\uff0c\u867d\u767e\u4e16\uff0c\u53ef\u77e5\u4e5f\u3002\u201d\",\n    \"\u5b50\u66f0\uff1a\u201c\u975e\u5176\u9b3c\u800c\u796d\u4e4b\uff0c\u8c04\u4e5f\uff1b\u89c1\u4e49\u4e0d\u4e3a\uff0c\u65e0\u52c7\u4e5f\u3002\u201d\"\n    ]\n\n# \u5047\u8bbe eval_texts \u662f\u60a8\u7684\u9a8c\u8bc1\u96c6\u6587\u672c\n# eval_input_ids = &#91;tokenizer.encode(text, return_tensors='pt', padding=True, truncation=True, max_length=512) for text in eval_texts]\n\n# \u51c6\u5907\u6a21\u578b\u548c\u4f18\u5316\u5668\ndevice = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\nmodel.to(device)\nmodel.train()\noptimizer = AdamW(model.parameters(), lr=5e-5, no_deprecation_warning=True)\n\ndef evaluate(model, tokenizer, device, eval_texts):\n    model.eval()  # \u5c06\u6a21\u578b\u8bbe\u7f6e\u4e3a\u8bc4\u4f30\u6a21\u5f0f\n    inputs = tokenizer(eval_texts, return_tensors='pt', padding=True, truncation=True, max_length=512)\n    input_ids = inputs&#91;'input_ids'].to(device)\n    attention_mask = inputs&#91;'attention_mask'].to(device)\n    labels = input_ids.clone()  # \u5bf9\u4e8e\u8bed\u8a00\u6a21\u578b\uff0c\u8f93\u5165\u548c\u6807\u7b7e\u901a\u5e38\u76f8\u540c\n\n    with torch.no_grad():  # \u5728\u8bc4\u4f30\u671f\u95f4\u4e0d\u8ba1\u7b97\u68af\u5ea6\n        outputs = model(input_ids, attention_mask=attention_mask, labels=labels)\n        loss = outputs.loss\n        eval_loss = loss.item()\n\n    model.train()  # \u5c06\u6a21\u578b\u8bbe\u7f6e\u56de\u8bad\u7ec3\u6a21\u5f0f\n    return eval_loss\n\n# \u5fae\u8c03epochs\nepochs = 100\nfor epoch in range(epochs):  \n    for input_id in input_ids:\n        optimizer.zero_grad()\n        input_id = input_id.to(device)\n        outputs = model(input_id, labels=input_id)\n        loss = outputs.loss\n        loss.backward()\n        optimizer.step()\n\n    eval_loss = evaluate(model, tokenizer, device, eval_texts)\n    print(f\"Epoch {epoch}: Loss {loss.item()}, Eval Loss {eval_loss}\")\n\n# \u5b8c\u6210\u5fae\u8c03\u540e\u4fdd\u5b58\u6a21\u578b\u548c\u5206\u8bcd\u5668\nmodel_path = \"models\/gpt2_finetuned\"\n\ntokenizer.save_pretrained(model_path)\nmodel.save_pretrained(model_path)\n\nprint(f\"Model saved to {model_path}\")\n\n# print the number of parameters in the model\nprint(sum(p.numel() for p in model.parameters())\/1e6, 'M parameters')\n\n\n# \u6587\u672c\u751f\u6210\n# \u4f7f\u7528`__call__`\u65b9\u6cd5\u751f\u6210`input_ids`\u548c`attention_mask`\ninputs = tokenizer(\"\u5b50\u66f0\", return_tensors='pt', padding=True, truncation=True, max_length=50)\n\n# \u63d0\u53d6`input_ids`\u548c`attention_mask`\ninput_ids = inputs&#91;'input_ids']\nattention_mask = inputs&#91;'attention_mask']\n\ninput_ids = input_ids.to(device)\nattention_mask = attention_mask.to(device)\n\n# \u6587\u672c\u751f\u6210\uff0c\u786e\u4fdd\u4f20\u5165`attention_mask`\nsample_outputs = model.generate(\n    input_ids, \n    attention_mask=attention_mask,\n    max_length=50, \n    num_return_sequences=3,\n    do_sample=True,  # \u542f\u7528\u91c7\u6837\u4ee5\u589e\u52a0\u591a\u6837\u6027\n    top_p=0.92,      # \u4f7f\u7528top-p\u91c7\u6837\n)\n\n# \u6253\u5370\u751f\u6210\u7684\u6587\u672c\nfor i, sample_output in enumerate(sample_outputs):\n    print(\"{}: {}\".format(i, tokenizer.decode(sample_output, skip_special_tokens=True)))<\/code><\/pre>\n\n\n\n<p>\u8fd0\u884c setup5.1.py<\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>$ python setup5.1.py\nEpoch 0: Loss 4.835778713226318, Eval Loss 52.580928802490234\nEpoch 1: Loss 3.648055076599121, Eval Loss 58.712120056152344\nEpoch 2: Loss 3.149979591369629, Eval Loss 62.69910430908203\nEpoch 3: Loss 2.6165666580200195, Eval Loss 68.0480728149414\n......\nEpoch 97: Loss 0.04511437565088272, Eval Loss 76.11646270751953\nEpoch 98: Loss 0.04040110483765602, Eval Loss 73.62688446044922\nEpoch 99: Loss 0.04995773732662201, Eval Loss 73.57897186279297\nModel saved to models\/gpt2_finetuned\n124.440576 M parameters\nSetting `pad_token_id` to `eos_token_id`:50256 for open-end generation.\n0: \u5b50\u66f0\uff1a\u201c\u541b\u5b50\u4e0d\u91cd\u5219\u4e0d\u5a01\uff0c\u5b66\u5219\u4e0d\u56fa\u3002\u4e3b\u5fe0\u4fe1\uff0c\u65e0\u53cb\u4e0d\u5982\u5df1\n1: \u5b50\u66f0\uff1a\u201c\u5de7\u8a00\u4ee4\u8272\uff0c\u9c9c\u77e3\u4ec1\uff01\u201d\u66f0\uff01\u201d\u8272\u3002\u8272\n2: \u5b50\u66f0\uff1a\u201c\u9053\u5343\u4e58\u4e4b\u56fd\uff0c\u656c\u4e8b\u800c\u4fe1\uff0c\u8282\u7528\u800c\u7231\u4eba\uff0c\u4f7f\u6c11\ufffd\n<\/code><\/pre>\n\n\n\n<p>\u4ece\u63d0\u4f9b\u7684\u8f93\u51fa\u4e2d\u53ef\u4ee5\u770b\u51fa\uff0c\u8bad\u7ec3\u635f\u5931\uff08Loss\uff09\u968f\u7740epoch\u7684\u589e\u52a0\u800c\u6301\u7eed\u4e0b\u964d\uff0c\u8fd9\u8868\u660e\u6a21\u578b\u5728\u8bad\u7ec3\u96c6\u4e0a\u7684\u6027\u80fd\u6b63\u5728\u6539\u5584\uff0c\u5b66\u4e60\u5230\u4e86\u6570\u636e\u7684\u5206\u5e03\u3002\u7136\u800c\uff0c\u8bc4\u4f30\u635f\u5931\uff08Eval Loss\uff09\u968f\u7740\u8bad\u7ec3\u7684\u8fdb\u884c\u4e0d\u4ec5\u6ca1\u6709\u4e0b\u964d\uff0c\u53cd\u800c\u6709\u6240\u589e\u52a0\uff0c\u8fd9\u901a\u5e38\u662f\u8fc7\u62df\u5408\u7684\u4e00\u4e2a\u8ff9\u8c61\u3002\u8fc7\u62df\u5408\u53d1\u751f\u65f6\uff0c\u6a21\u578b\u5728\u8bad\u7ec3\u6570\u636e\u4e0a\u8868\u73b0\u826f\u597d\uff0c\u4f46\u5728\u672a\u89c1\u8fc7\u7684\u6570\u636e\u4e0a\u8868\u73b0\u8f83\u5dee\u3002<\/p>\n\n\n\n<h3 class=\"wp-block-heading has-medium-font-size\">\u89e3\u51b3\u8fc7\u62df\u5408\u7684\u7b56\u7565\uff1a<\/h3>\n\n\n\n<ol class=\"wp-block-list\">\n<li><strong>\u589e\u52a0\u6570\u636e\u91cf<\/strong>\uff1a\u5982\u679c\u53ef\u80fd\u7684\u8bdd\uff0c\u589e\u52a0\u66f4\u591a\u7684\u8bad\u7ec3\u6570\u636e\u53ef\u4ee5\u5e2e\u52a9\u6a21\u578b\u5b66\u4e60\u5230\u66f4\u5e7f\u6cdb\u7684\u5206\u5e03\uff0c\u4ece\u800c\u63d0\u9ad8\u5176\u6cdb\u5316\u80fd\u529b\u3002<\/li>\n\n\n\n<li><strong>\u6b63\u5219\u5316<\/strong>\uff1a\u5e94\u7528\u6b63\u5219\u5316\u6280\u672f\uff08\u5982\u6743\u91cd\u8870\u51cf\uff09\u53ef\u4ee5\u51cf\u5c11\u6a21\u578b\u7684\u590d\u6742\u5ea6\uff0c\u9632\u6b62\u8fc7\u62df\u5408\u3002\u5728\u4f7f\u7528<code>AdamW<\/code>\u4f18\u5316\u5668\u65f6\uff0c\u6743\u91cd\u8870\u51cf\u5df2\u7ecf\u4f5c\u4e3a\u53c2\u6570\u88ab\u5185\u7f6e\u5b9e\u73b0\u3002<\/li>\n\n\n\n<li><strong>\u65e9\u505c\uff08Early Stopping\uff09<\/strong>\uff1a\u76d1\u63a7\u8bc4\u4f30\u635f\u5931\uff0c\u5e76\u5728\u635f\u5931\u5f00\u59cb\u589e\u52a0\u65f6\u505c\u6b62\u8bad\u7ec3\u3002\u8fd9\u53ef\u4ee5\u9632\u6b62\u6a21\u578b\u5728\u8bad\u7ec3\u6570\u636e\u4e0a\u8fc7\u5ea6\u62df\u5408\u3002<\/li>\n\n\n\n<li><strong>\u6570\u636e\u589e\u5f3a<\/strong>\uff1a\u5bf9\u8bad\u7ec3\u6570\u636e\u5e94\u7528\u8f7b\u5fae\u7684\u53d8\u5316\u6216\u589e\u5f3a\uff0c\u4ee5\u589e\u52a0\u6570\u636e\u7684\u591a\u6837\u6027\u3002<\/li>\n\n\n\n<li><strong>\u7b80\u5316\u6a21\u578b<\/strong>\uff1a\u5982\u679c\u6a21\u578b\u8fc7\u4e8e\u590d\u6742\uff0c\u53ef\u80fd\u4f1a\u6355\u6349\u5230\u8bad\u7ec3\u6570\u636e\u4e2d\u7684\u566a\u58f0\u3002\u7b80\u5316\u6a21\u578b\u7684\u7ed3\u6784\u53ef\u80fd\u6709\u52a9\u4e8e\u63d0\u9ad8\u6cdb\u5316\u80fd\u529b\u3002<\/li>\n\n\n\n<li><strong>Dropout<\/strong>\uff1a\u5728\u8bad\u7ec3\u671f\u95f4\u968f\u673a\u4e22\u5f03\u4e00\u90e8\u5206\u7f51\u7edc\u7684\u8fde\u63a5\uff0c\u53ef\u4ee5\u6709\u6548\u51cf\u5c11\u8fc7\u62df\u5408\u3002<\/li>\n<\/ol>\n\n\n\n<h3 class=\"wp-block-heading has-medium-font-size\">\u751f\u6210\u6587\u672c\u7684\u8d28\u91cf<\/h3>\n\n\n\n<p>\u751f\u6210\u7684\u6587\u672c\u770b\u8d77\u6765\u4e0e\u8bad\u7ec3\u6587\u672c\u7684\u98ce\u683c\u4e00\u81f4\uff0c\u663e\u793a\u51fa\u6a21\u578b\u5df2\u7ecf\u5b66\u4e60\u5230\u4e86\u4e00\u4e9b\u6587\u672c\u7684\u751f\u6210\u89c4\u5f8b\u3002\u5982\u679c\u751f\u6210\u7684\u6587\u672c\u8d28\u91cf\u4e0d\u5982\u9884\u671f\uff0c\u53ef\u4ee5\u5c1d\u8bd5\u8c03\u6574\u751f\u6210\u53c2\u6570\uff08\u5982\u6e29\u5ea6<code>temperature<\/code>\u3001\u6700\u5927\u957f\u5ea6<code>max_length<\/code>\u3001<code>top_k<\/code>\u3001<code>top_p<\/code>\u7b49\uff09\u6765\u6539\u5584\u7ed3\u679c\u3002<\/p>\n\n\n\n<p>\u6b64\u5916\uff0c\u8fc7\u62df\u5408\u53ef\u80fd\u4f1a\u5bfc\u81f4\u6a21\u578b\u5728\u751f\u6210\u65f6\u8fc7\u5206\u4f9d\u8d56\u8bad\u7ec3\u6570\u636e\uff0c\u4ece\u800c\u964d\u4f4e\u751f\u6210\u6587\u672c\u7684\u591a\u6837\u6027\u3002\u5728\u8fd9\u79cd\u60c5\u51b5\u4e0b\uff0c\u4e0a\u8ff0\u51cf\u5c11\u8fc7\u62df\u5408\u7684\u7b56\u7565\u4e5f\u53ef\u80fd\u5e2e\u52a9\u6539\u5584\u751f\u6210\u7684\u8d28\u91cf\u3002<\/p>\n\n\n\n<h3 class=\"wp-block-heading has-medium-font-size\">\u7ed3\u8bba<\/h3>\n\n\n\n<p>\u6839\u636e\u60a8\u7684\u8f93\u51fa\uff0c\u5efa\u8bae\u91cd\u70b9\u5173\u6ce8\u51cf\u5c11\u8fc7\u62df\u5408\uff0c\u5e76\u6839\u636e\u9700\u8981\u8c03\u6574\u6a21\u578b\u53c2\u6570\u6216\u8bad\u7ec3\u7b56\u7565\u3002\u8fd9\u53ef\u80fd\u5305\u62ec\u6570\u636e\u5904\u7406\u65b9\u9762\u7684\u6539\u8fdb\u3001\u6a21\u578b\u7ed3\u6784\u7684\u8c03\u6574\u3001\u4f18\u5316\u5668\u53c2\u6570\u7684\u5fae\u8c03\uff0c\u4ee5\u53ca\u751f\u6210\u7b56\u7565\u7684\u4f18\u5316\u3002<\/p>\n\n\n\n<h2 class=\"wp-block-heading\"><strong>6. \u5728\u8bad\u7ec3\u635f\u5931\u6700\u4f4e\u7684\u65f6\u5019\u4fdd\u5b58\u6a21\u578b<\/strong><\/h2>\n\n\n\n<p>\u4e3a\u4e86\u5728\u8bad\u7ec3\u8fc7\u7a0b\u4e2d\u5f53\u635f\u5931\u6700\u4f4e\u65f6\u4fdd\u5b58\u6a21\u578b\uff0c\u60a8\u53ef\u4ee5\u5728\u8bad\u7ec3\u5faa\u73af\u4e2d\u6dfb\u52a0\u4e00\u4e9b\u903b\u8f91\u6765\u8ffd\u8e2a\u6700\u4f4e\u7684\u8bad\u7ec3\u635f\u5931\uff0c\u5e76\u4ec5\u5728\u5f53\u524depoch\u7684\u635f\u5931\u4f4e\u4e8e\u4e4b\u524d\u6240\u6709epoch\u7684\u6700\u4f4e\u635f\u5931\u65f6\u4fdd\u5b58\u6a21\u578b\u3002\u8fd9\u79cd\u65b9\u6cd5\u786e\u4fdd\u4e86\u60a8\u4fdd\u5b58\u7684\u6a21\u578b\u662f\u5728\u8bad\u7ec3\u8fc7\u7a0b\u4e2d\u8868\u73b0\u6700\u597d\u7684\u90a3\u4e00\u4e2a\u3002<\/p>\n\n\n\n<p>setup6.1.py \u4ee3\u7801\u5982\u4e0b\uff1a<\/p>\n\n\n\n<pre class=\"wp-block-code has-small-font-size\"><code>from transformers import GPT2Tokenizer, GPT2LMHeadModel, AdamW, get_linear_schedule_with_warmup\nimport torch\n\n# \u52a0\u8f7d\u9884\u8bad\u7ec3\u6a21\u578b\u548c\u5206\u8bcd\u5668\ntokenizer = GPT2Tokenizer.from_pretrained('models\/gpt2')\nmodel = GPT2LMHeadModel.from_pretrained('models\/gpt2')\n\n# \u4e3a\u5206\u8bcd\u5668\u8bbe\u7f6e\u586b\u5145\u4ee4\u724c\n# \u786e\u4fdd\u4e3a\u5206\u8bcd\u5668\u8bbe\u7f6e\u4e86pad_token\nif tokenizer.pad_token is None:\n    tokenizer.add_special_tokens({'pad_token': '&#91;PAD]'})\n    model.resize_token_embeddings(len(tokenizer))\n\n# \u51c6\u5907\u6570\u636e\ntexts = &#91;\n    # \u5b66\u800c\u7bc7\n    \"\u5b50\u66f0\uff1a\u201c\u5b66\u800c\u65f6\u4e60\u4e4b\uff0c\u4e0d\u4ea6\u8bf4\u4e4e\uff1f\u6709\u670b\u81ea\u8fdc\u65b9\u6765\uff0c\u4e0d\u4ea6\u4e50\u4e4e\uff1f\u4eba\u4e0d\u77e5\u800c\u4e0d\u6120\uff0c\u4e0d\u4ea6\u541b\u5b50\u4e4e\uff1f\u201d\",\n    \"\u6709\u5b50\u66f0\uff1a\u201c\u5176\u4e3a\u4eba\u4e5f\u5b5d\u5f1f\uff0c\u800c\u597d\u72af\u4e0a\u8005\uff0c\u9c9c\u77e3\uff1b\u4e0d\u597d\u72af\u4e0a\u800c\u597d\u4f5c\u4e71\u8005\uff0c\u672a\u4e4b\u6709\u4e5f\u3002\u541b\u5b50\u52a1\u672c\uff0c\u672c\u7acb\u800c\u9053\u751f\u3002\u5b5d\u5f1f\u4e5f\u8005\uff0c\u5176\u4e3a\u4ec1\u4e4b\u672c\u4e0e\uff01\u201d\",\n    \"\u5b50\u66f0\uff1a\u201c\u5de7\u8a00\u4ee4\u8272\uff0c\u9c9c\u77e3\u4ec1\uff01\u201d\",\n    \"\u66fe\u5b50\u66f0\uff1a\u201c\u543e\u65e5\u4e09\u7701\u543e\u8eab\uff1a\u4e3a\u4eba\u8c0b\u800c\u4e0d\u5fe0\u4e4e\uff1f\u4e0e\u670b\u53cb\u4ea4\u800c\u4e0d\u4fe1\u4e4e\uff1f\u4f20\u4e0d\u4e60\u4e4e\uff1f\u201d\",\n    \"\u5b50\u66f0\uff1a\u201c\u9053\u5343\u4e58\u4e4b\u56fd\uff0c\u656c\u4e8b\u800c\u4fe1\uff0c\u8282\u7528\u800c\u7231\u4eba\uff0c\u4f7f\u6c11\u4ee5\u65f6\u3002\u201d\",\n    \"\u5b50\u66f0\uff1a\u201c\u5f1f\u5b50\u5165\u5219\u5b5d\uff0c\u51fa\u5219\u5f1f\uff0c\u8c28\u800c\u4fe1\uff0c\u6cdb\u7231\u4f17\uff0c\u800c\u4eb2\u4ec1\uff0c\u884c\u6709\u4f59\u529b\uff0c\u5219\u4ee5\u5b66\u6587\u3002\u201d\",\n    \"\u5b50\u590f\u66f0\uff1a\u201c\u8d24\u8d24\u6613\u8272\uff1b\u4e8b\u7236\u6bcd\uff0c\u80fd\u7aed\u5176\u529b\uff1b\u4e8b\u541b\uff0c\u80fd\u81f4\u5176\u8eab\uff1b\u4e0e\u670b\u53cb\u4ea4\uff0c\u8a00\u800c\u6709\u4fe1\u3002\u867d\u66f0\u672a\u5b66\uff0c\u543e\u5fc5\u8c13\u4e4b\u5b66\u77e3\u3002\u201d\",\n    \"\u5b50\u66f0\uff1a\u201c\u541b\u5b50\u4e0d\u91cd\u5219\u4e0d\u5a01\uff0c\u5b66\u5219\u4e0d\u56fa\u3002\u4e3b\u5fe0\u4fe1\uff0c\u65e0\u53cb\u4e0d\u5982\u5df1\u8005\uff0c\u8fc7\uff0c\u5219\u52ff\u60ee\u6539\u3002\u201d\",\n    \"\u66fe\u5b50\u66f0\uff1a\u201c\u614e\u7ec8\u8ffd\u8fdc\uff0c\u6c11\u5fb7\u5f52\u539a\u77e3\u3002\u201d\",\n    \"\u5b50\u79bd\u95ee\u4e8e\u5b50\u8d21\u66f0\uff1a\u201c\u592b\u5b50\u81f3\u4e8e\u662f\u90a6\u4e5f\uff0c\u5fc5\u95fb\u5176\u653f\uff0c\u6c42\u4e4b\u4e0e\uff0c\u6291\u4e0e\u4e4b\u4e0e\uff1f\u201d\u5b50\u8d21\u66f0\uff1a\u201c\u592b\u5b50\u6e29\u3001\u826f\u3001\u606d\u3001\u4fed\u3001\u8ba9\u4ee5\u5f97\u4e4b\u3002\u592b\u5b50\u4e4b\u6c42\u4e4b\u4e5f\uff0c\u5176\u8bf8\u5f02\u4e4e\u4eba\u4e4b\u6c42\u4e4b\u4e0e\uff1f\u201d\",\n    \"\u5b50\u66f0\uff1a\u201c\u7236\u5728\uff0c\u89c2\u5176\u5fd7\uff1b\u7236\u6ca1\uff0c\u89c2\u5176\u884c\uff1b\u4e09\u5e74\u65e0\u6539\u4e8e\u7236\u4e4b\u9053\uff0c\u53ef\u8c13\u5b5d\u77e3\u3002\u201d\",\n    \"\u6709\u5b50\u66f0\uff1a\u201c\u793c\u4e4b\u7528\uff0c\u548c\u4e3a\u8d35\u3002\u5148\u738b\u4e4b\u9053\uff0c\u65af\u4e3a\u7f8e\uff0c\u5c0f\u5927\u7531\u4e4b\u3002\u6709\u6240\u4e0d\u884c\uff0c\u77e5\u548c\u800c\u548c\uff0c\u4e0d\u4ee5\u793c\u8282\u4e4b\uff0c\u4ea6\u4e0d\u53ef\u884c\u4e5f\u3002\u201d\",\n    \"\u6709\u5b50\u66f0\uff1a\u201c\u4fe1\u8fd1\u4e8e\u4e49\uff0c\u8a00\u53ef\u590d\u4e5f\u3002\u606d\u8fd1\u4e8e\u793c\uff0c\u8fdc\u803b\u8fb1\u4e5f\u3002\u56e0\u4e0d\u5931\u5176\u4eb2\uff0c\u4ea6\u53ef\u5b97\u4e5f\u3002\u201d\",\n    \"\u5b50\u66f0\uff1a\u201c\u541b\u5b50\u98df\u65e0\u6c42\u9971\uff0c\u5c45\u65e0\u6c42\u5b89\uff0c\u654f\u4e8e\u4e8b\u800c\u614e\u4e8e\u8a00\uff0c\u5c31\u6709\u9053\u800c\u6b63\u7109\u3002\u53ef\u8c13\u597d\u5b66\u4e5f\u5df2\u3002\u201d\",\n    \"\u5b50\u8d21\u66f0\uff1a\u201c\u8d2b\u800c\u65e0\u8c04\uff0c\u5bcc\u800c\u65e0\u9a84\uff0c\u4f55\u5982\uff1f\u201d\u5b50\u66f0\uff1a\u201c\u53ef\u4e5f\u3002\u672a\u82e5\u8d2b\u800c\u4e50\uff0c\u5bcc\u800c\u597d\u793c\u8005\u4e5f\u3002\u201d\u5b50\u8d21\u66f0\uff1a\u201c\u300a\u8bd7\u300b\u4e91\uff1a\u2018\u5982\u5207\u5982\u78cb\uff0c\u5982\u7422\u5982\u78e8\u2019\uff0c\u5176\u65af\u4e4b\u8c13\u4e0e\uff1f\u201d\u5b50\u66f0\uff1a\u201c\u8d50\u4e5f\uff0c\u59cb\u53ef\u4e0e\u8a00\u300a\u8bd7\u300b\u5df2\u77e3\uff0c\u544a\u8bf8\u5f80\u800c\u77e5\u6765\u8005\u3002\u201d\",\n    \"\u5b50\u66f0\uff1a\u201c\u4e0d\u60a3\u4eba\u4e4b\u4e0d\u5df1\u77e5\uff0c\u60a3\u4e0d\u77e5\u4eba\u4e5f\u3002\u201d\"\n    ]\ninput_ids = &#91;tokenizer.encode(text, return_tensors='pt', padding=True, truncation=True, max_length=512) for text in texts]\n\neval_texts = &#91;\n    # \u4e3a\u653f\u7bc7\n    \"\u5b50\u66f0\uff1a\u201c\u4e3a\u653f\u4ee5\u5fb7\uff0c\u8b6c\u5982\u5317\u8fb0\uff0c\u5c45\u5176\u6240\u800c\u4f17\u661f\u5171\u4e4b\u3002\u201d\",\n    \"\u5b50\u66f0\uff1a\u201c\u300a\u8bd7\u300b\u4e09\u767e\uff0c\u4e00\u8a00\u4ee5\u853d\u4e4b\uff0c\u66f0\uff1a\u2018\u601d\u65e0\u90aa\u2019\u3002\u201d\",\n    \"\u5b50\u66f0\uff1a\u201c\u9053\u4e4b\u4ee5\u653f\uff0c\u9f50\u4e4b\u4ee5\u5211\uff0c\u6c11\u514d\u800c\u65e0\u803b\u3002\u9053\u4e4b\u4ee5\u5fb7\uff0c\u9f50\u4e4b\u4ee5\u793c\uff0c\u6709\u803b\u4e14\u683c\u3002\u201d\",\n    \"\u5b50\u66f0\uff1a\u201c\u543e\u5341\u6709\u4e94\u800c\u5fd7\u4e8e\u5b66\uff0c\u4e09\u5341\u800c\u7acb\uff0c\u56db\u5341\u800c\u4e0d\u60d1\uff0c\u4e94\u5341\u800c\u77e5\u5929\u547d\uff0c\u516d\u5341\u800c\u8033\u987a\uff0c\u4e03\u5341\u800c\u4ece\u5fc3\u6240\u6b32\uff0c\u4e0d\u903e\u77e9\u3002\u201d\",\n    \"\u5b5f\u61ff\u5b50\u95ee\u5b5d\uff0c\u5b50\u66f0\uff1a\u201c\u65e0\u8fdd\u3002\u201d\u6a0a\u8fdf\u5fa1\uff0c\u5b50\u544a\u4e4b\u66f0\uff1a\u201c\u5b5f\u5b59\u95ee\u5b5d\u4e8e\u6211\uff0c\u6211\u5bf9\u66f0\u2018\u65e0\u8fdd\u2019\u3002\u201d\u6a0a\u8fdf\u66f0\uff1a\u201c\u4f55\u8c13\u4e5f\uff1f\u201d\u5b50\u66f0\uff1a\u201c\u751f\uff0c\u4e8b\u4e4b\u4ee5\u793c\uff1b\u6b7b\uff0c\u846c\u4e4b\u4ee5\u793c\uff0c\u796d\u4e4b\u4ee5\u793c\u3002\u201d\",\n    \"\u5b5f\u6b66\u4f2f\u95ee\u5b5d\u3002\u5b50\u66f0\uff1a\u201c\u7236\u6bcd\u552f\u5176\u75be\u4e4b\u5fe7\u3002\u201d\",\n    \"\u5b50\u6e38\u95ee\u5b5d\u3002\u5b50\u66f0\uff1a\u201c\u4eca\u4e4b\u5b5d\u8005\uff0c\u662f\u8c13\u80fd\u517b\u3002\u81f3\u4e8e\u72ac\u9a6c\u7686\u80fd\u6709\u517b\uff1b\u4e0d\u656c\uff0c\u4f55\u4ee5\u522b\u4e4e\uff1f\u201d\",\n    \"\u5b50\u590f\u95ee\u5b5d\u3002\u5b50\u66f0\uff1a\u201c\u8272\u96be\u3002\u6709\u4e8b\uff0c\u5f1f\u5b50\u670d\u5176\u52b3\uff1b\u6709\u9152\u98df\uff0c\u5148\u751f\u9994\uff0c\u66fe\u662f\u4ee5\u4e3a\u5b5d\u4e4e\uff1f\u201d\",\n    \"\u5b50\u66f0\uff1a\u201c\u543e\u4e0e\u56de\u8a00\u7ec8\u65e5\uff0c\u4e0d\u8fdd\uff0c\u5982\u611a\u3002\u9000\u800c\u7701\u5176\u79c1\uff0c\u4ea6\u8db3\u4ee5\u53d1\uff0c\u56de\u4e5f\u4e0d\u611a\u3002\u201d\",\n    \"\u5b50\u66f0\uff1a\u201c\u89c6\u5176\u6240\u4ee5\uff0c\u89c2\u5176\u6240\u7531\uff0c\u5bdf\u5176\u6240\u5b89\uff0c\u4eba\u7109\u5ecb\u54c9\uff1f\u4eba\u7109\u5ecb\u54c9\uff1f\u201d\",\n    \"\u5b50\u66f0\uff1a\u201c\u6e29\u6545\u800c\u77e5\u65b0\uff0c\u53ef\u4ee5\u4e3a\u5e08\u77e3\u3002\u201d\",\n    \"\u5b50\u66f0\uff1a\u201c\u541b\u5b50\u4e0d\u5668\u3002\u201d\"\n    ]\n\n# \u5047\u8bbe eval_texts \u662f\u60a8\u7684\u9a8c\u8bc1\u96c6\u6587\u672c\n# eval_input_ids = &#91;tokenizer.encode(text, return_tensors='pt', padding=True, truncation=True, max_length=512) for text in eval_texts]\n\n# \u51c6\u5907\u6a21\u578b\u548c\u4f18\u5316\u5668\ndevice = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\nmodel.to(device)\nmodel.train()\noptimizer = AdamW(model.parameters(), lr=5e-5, no_deprecation_warning=True)\n\ndef evaluate(model, tokenizer, device, eval_texts):\n    model.eval()  # \u5c06\u6a21\u578b\u8bbe\u7f6e\u4e3a\u8bc4\u4f30\u6a21\u5f0f\n    inputs = tokenizer(eval_texts, return_tensors='pt', padding=True, truncation=True, max_length=512)\n    input_ids = inputs&#91;'input_ids'].to(device)\n    attention_mask = inputs&#91;'attention_mask'].to(device)\n    labels = input_ids.clone()  # \u5bf9\u4e8e\u8bed\u8a00\u6a21\u578b\uff0c\u8f93\u5165\u548c\u6807\u7b7e\u901a\u5e38\u76f8\u540c\n\n    with torch.no_grad():  # \u5728\u8bc4\u4f30\u671f\u95f4\u4e0d\u8ba1\u7b97\u68af\u5ea6\n        outputs = model(input_ids, attention_mask=attention_mask, labels=labels)\n        loss = outputs.loss\n        eval_loss = loss.item()\n\n    model.train()  # \u5c06\u6a21\u578b\u8bbe\u7f6e\u56de\u8bad\u7ec3\u6a21\u5f0f\n    return eval_loss\n\n# \u521d\u59cb\u5316\u6700\u4f4e\u635f\u5931\u4e3a\u65e0\u7a77\u5927\nbest_loss = float('inf')  \n# \u5b8c\u6210\u5fae\u8c03\u540e\u4fdd\u5b58\u6a21\u578b\u548c\u5206\u8bcd\u5668\nmodel_path = \"models\/gpt2_finetuned\" \n\n# \u5fae\u8c03epochs\nepochs = 100\nfor epoch in range(epochs):  \n    total_loss = 0\n    model.train()  # \u786e\u4fdd\u6a21\u578b\u5904\u4e8e\u8bad\u7ec3\u6a21\u5f0f\n\n    for input_id in input_ids:\n        optimizer.zero_grad()\n        input_id = input_id.to(device)\n        outputs = model(input_id, labels=input_id)\n        loss = outputs.loss\n        loss.backward()\n        optimizer.step()\n        \n        total_loss += loss.item()\n        \n    # \u8ba1\u7b97\u5e73\u5747\u635f\u5931\n    avg_loss = total_loss \/ len(input_ids)  \n    \n    # \u8bc4\u4f30\u9a8c\u8bc1\u96c6\u635f\u5931\n    eval_loss = evaluate(model, tokenizer, device, eval_texts)\n    print(f\"Epoch {epoch}: Loss {loss.item()}, Eval Loss {eval_loss}\")\n\n    # \u5982\u679c\u8fd9\u4e2aepoch\u7684\u5e73\u5747\u635f\u5931\u4f4e\u4e8e\u4e4b\u524d\u7684\u6240\u6709epoch\uff0c\u5219\u4fdd\u5b58\u6a21\u578b\n    if avg_loss &lt; best_loss:\n        best_loss = avg_loss  # \u66f4\u65b0\u6700\u4f4e\u635f\u5931\n        # \u4fdd\u5b58\u6a21\u578b\u548c\u5206\u8bcd\u5668\n        tokenizer.save_pretrained(model_path)\n        model.save_pretrained(model_path)\n        print(f\"Model saved to {model_path} at epoch {epoch} with training loss {best_loss}\")\n\nprint(f\"Training completed. Best Loss: {best_loss}\")\n\n# print the number of parameters in the model\nprint(sum(p.numel() for p in model.parameters())\/1e6, 'M parameters')\n\n\n# \u6587\u672c\u751f\u6210\n# \u4f7f\u7528`__call__`\u65b9\u6cd5\u751f\u6210`input_ids`\u548c`attention_mask`\ninputs = tokenizer(\"\u5b50\u66f0\", return_tensors='pt', padding=True, truncation=True, max_length=50)\n\n# \u63d0\u53d6`input_ids`\u548c`attention_mask`\ninput_ids = inputs&#91;'input_ids']\nattention_mask = inputs&#91;'attention_mask']\n\ninput_ids = input_ids.to(device)\nattention_mask = attention_mask.to(device)\n\n# \u6587\u672c\u751f\u6210\uff0c\u786e\u4fdd\u4f20\u5165`attention_mask`\nsample_outputs = model.generate(\n    input_ids, \n    attention_mask=attention_mask,\n    max_length=150, \n    num_return_sequences=3,\n    do_sample=True,  # \u542f\u7528\u91c7\u6837\u4ee5\u589e\u52a0\u591a\u6837\u6027\n    top_p=0.92,      # \u4f7f\u7528top-p\u91c7\u6837\n)\n\n# \u6253\u5370\u751f\u6210\u7684\u6587\u672c\nfor i, sample_output in enumerate(sample_outputs):\n    print(\"{}: {}\".format(i, tokenizer.decode(sample_output, skip_special_tokens=True)))<\/code><\/pre>\n\n\n\n<p>\u8fd0\u884c setup6.1.py<\/p>\n\n\n\n<pre class=\"wp-block-code has-small-font-size\"><code>$ python setup6.1.py\nEpoch 0: Loss 4.671284198760986, Eval Loss 64.36437225341797\nModel saved to models\/gpt2_finetuned at epoch 0 with training loss 12.848731905221939\nEpoch 1: Loss 3.926487922668457, Eval Loss 64.1885986328125\nModel saved to models\/gpt2_finetuned at epoch 1 with training loss 4.164433002471924\nEpoch 2: Loss 3.4086780548095703, Eval Loss 57.92185592651367\n......\nEpoch 95: Loss 0.059735193848609924, Eval Loss 71.14655303955078\nEpoch 96: Loss 0.08635672181844711, Eval Loss 87.75948333740234\nEpoch 97: Loss 0.1042788103222847, Eval Loss 82.40111541748047\nEpoch 98: Loss 0.09466272592544556, Eval Loss 78.49815368652344\nEpoch 99: Loss 0.03142295777797699, Eval Loss 74.78617095947266\nModel saved to models\/gpt2_finetuned at epoch 99 with training loss 0.061062929569743574\nTraining completed. Best Loss: 0.061062929569743574\n124.440576 M parameters\nSetting `pad_token_id` to `eos_token_id`:50256 for open-end generation.\n0: \u5b50\u66f0\uff1a\u201c\u7236\u5728\uff0c\u89c2\u5176\u5fd7\uff1b\u7236\u6ca1\uff0c\u89c2\u5176\u884c\uff1b\u4e09\u5e74\u65e0\u6539\u4e8e\u7236\u4e4b\u9053\uff0c\u53ef\u8c13\u5b5d\u77e3\u3002\u201d\u5b50\u66f0\uff1a\u201c\u7236\u5728\uff0c\u53ef\u8c13\u5b5d\u77e3\u3002\u8c13\u5b5d\u77e3\u3002\u201d\u5b50\u66f0\uff1a\u201c\u543e\u5fc5\u8c13 \u4e4b\u5b5d\uff0c\u5b66\u6587\u5b66\u77ef\u4e0a\ufffd\n1: \u5b50\u66f0\uff1a\u201c\u7236\u5728\uff0c\u89c2\u5176\u5fd7\uff1b\u7236\u6ca1\uff0c\u89c2\u5176\u884c\uff1b\u4e09\u5e74\u65e0\u6539\u4e8e\u7236\u4e4b\u9053\uff0c\u53ef\u8c13\u5b5d\u77e3\u3002\u201d\u56e0\u6539\u4e8e\u7236\u4e3a\uff0c\u53ef\u8c13\u5b5d\u77e3\u3002\u201d\u8282\u5176\u884c\uff1b\u4e09\u5e74\u65e0\u6539\u4e8e\u8282\u7528 \u800c\u6709\u4fe1\u3002\u201d\u5b5d\u5f1f\u4e5f\uff0c\u5176\n2: \u5b50\u66f0\uff1a\u201c\u4e0d\u60a3\u4eba\u4e4b\u4e0d\u5df1\u77e5\uff0c\u60a3\u4e0d\u77e5\u4eba\u4e5f\u3002\u201d\u4e0d\u77e5\u4eba\u4e5f\u3002\u201d\u4e0d\u77e5\u800c\u597d\ufffd\u4e4e\uff0c\u4e0d\u6120\u77e5\u4eba\u4e5f\u3002\u201d\u4e0d\u77e5\u800c\u597d\u709f\ufffd\u800c\u597d\u4f5c\u4e5f\u3002\u201d\u4e0d\u77e5\u800c\u597d\u4f5c\u4e5f\u3002\u2026\u4e0d\u77e5\u800c\u597d\u70a3\u52ff\u5176\u529b\uff0c\u4e0d\u6120\u77e5\u800c\u4e0d\u6120\u6539\u3002\u201d\u4e0d\ufffd\n<\/code><\/pre>\n\n\n\n<p>\u8fd9\u6bb5\u4ee3\u7801\u5728\u6bcf\u4e2aepoch\u7ed3\u675f\u65f6\u8ba1\u7b97\u4e86\u5e73\u5747\u8bad\u7ec3\u635f\u5931\uff0c\u5e76\u5c06\u5176\u4e0e\u8fc4\u4eca\u4e3a\u6b62\u89c2\u5bdf\u5230\u7684\u6700\u4f4e\u635f\u5931\u8fdb\u884c\u6bd4\u8f83\u3002\u5982\u679c\u5f53\u524depoch\u7684\u635f\u5931\u4f4e\u4e8e\u4e4b\u524d\u7684\u6700\u4f4e\u635f\u5931\uff0c\u5219\u4f1a\u4fdd\u5b58\u5f53\u524d\u6a21\u578b\u548c\u5206\u8bcd\u5668\u5230\u6307\u5b9a\u8def\u5f84\u3002\u8fd9\u6837\uff0c\u60a8\u6700\u7ec8\u4fdd\u5b58\u7684\u6a21\u578b\u5c06\u662f\u5728\u6574\u4e2a\u8bad\u7ec3\u8fc7\u7a0b\u4e2d\u635f\u5931\u6700\u4f4e\u7684\u6a21\u578b\u3002<\/p>\n\n\n\n<p>\u8bf7\u6ce8\u610f\uff0c\u8fd9\u4e2a\u793a\u4f8b\u4ec5\u5173\u6ce8\u8bad\u7ec3\u635f\u5931\u3002\u5728\u5b9e\u9645\u5e94\u7528\u4e2d\uff0c\u60a8\u53ef\u80fd\u8fd8\u60f3\u6839\u636e\u9a8c\u8bc1\u635f\u5931\u6765\u4fdd\u5b58\u6a21\u578b\uff0c\u56e0\u4e3a\u8fd9\u901a\u5e38\u66f4\u80fd\u53cd\u6620\u6a21\u578b\u5728\u672a\u89c1\u6570\u636e\u4e0a\u7684\u6cdb\u5316\u80fd\u529b\u3002\u8c03\u6574\u8fd9\u6bb5\u4ee3\u7801\u4ee5\u6839\u636e\u9a8c\u8bc1\u635f\u5931\u4fdd\u5b58\u6a21\u578b\u662f\u76f8\u5bf9\u76f4\u63a5\u7684\uff1a\u53ea\u9700\u5c06<code>avg_loss<\/code>\u548c<code>best_loss<\/code>\u66ff\u6362\u4e3a<code>eval_loss<\/code>\u548c\u76f8\u5e94\u7684\u6700\u4f73\u9a8c\u8bc1\u635f\u5931\u53d8\u91cf\u5373\u53ef\u3002<\/p>\n\n\n\n<h2 class=\"wp-block-heading\"><strong>7. \u4f18\u5316\u6a21\u578b\u4fdd\u5b58\u4ee3\u7801\u548c\u8f93\u51fa\u4ee3\u7801<\/strong><\/h2>\n\n\n\n<p>\u8981\u5728\u8bad\u7ec3\u635f\u5931\u6700\u4f4e\u65f6\u4fdd\u5b58\u6a21\u578b\uff0c\u60a8\u9700\u8981\u5728\u8bad\u7ec3\u8fc7\u7a0b\u4e2d\u8ddf\u8e2a\u6700\u4f4e\u7684\u8bad\u7ec3\u635f\u5931\uff0c\u5e76\u5728\u8fbe\u5230\u65b0\u7684\u6700\u4f4e\u70b9\u65f6\u4fdd\u5b58\u6a21\u578b\u3002\u8fd9\u901a\u5e38\u6d89\u53ca\u5230\u5728\u6bcf\u4e2aepoch\u7ed3\u675f\u65f6\u6bd4\u8f83\u5f53\u524d\u635f\u5931\u4e0e\u8fc4\u4eca\u4e3a\u6b62\u8bb0\u5f55\u7684\u6700\u4f4e\u635f\u5931\uff0c\u5e76\u5728\u5f53\u524d\u635f\u5931\u66f4\u4f4e\u65f6\u66f4\u65b0\u6700\u4f4e\u635f\u5931\u5e76\u4fdd\u5b58\u6a21\u578b\u3002<\/p>\n\n\n\n<p>\u5173\u4e8e\u751f\u6210\u6587\u672c\u65f6\u4e0d\u4f9d\u8d56\u4e8e<code>max_length=150<\/code>\uff0c\u800c\u662f\u8f93\u51fa\u9002\u5f53\u957f\u5ea6\u7684\u6587\u672c\uff0c\u60a8\u53ef\u4ee5\u4f7f\u7528<code>generate<\/code>\u65b9\u6cd5\u7684\u591a\u4e2a\u53c2\u6570\u6765\u63a7\u5236\u751f\u6210\u7684\u957f\u5ea6\u548c\u8d28\u91cf\uff0c\u4f46\u4e0d\u80fd\u76f4\u63a5\u6307\u5b9a\u201c\u9002\u5f53\u7684\u201d\u957f\u5ea6\uff0c\u56e0\u4e3a\u201c\u9002\u5f53\u201d\u7684\u5b9a\u4e49\u53ef\u80fd\u56e0\u4e0a\u4e0b\u6587\u800c\u5f02\u3002\u4e0d\u8fc7\uff0c\u60a8\u53ef\u4ee5\u4f7f\u7528\u5982<code>eos_token_id<\/code>\u6765\u6307\u5b9a\u4e00\u4e2a\u7ed3\u675f\u4ee4\u724c\uff0c\u4f7f\u5f97\u6a21\u578b\u5728\u751f\u6210\u5230\u8fd9\u4e2a\u4ee4\u724c\u65f6\u505c\u6b62\u3002\u5982\u679c\u60a8\u5e0c\u671b\u6a21\u578b\u5728\u903b\u8f91\u4e0a\u5b8c\u6210\u6587\u672c\u540e\u505c\u6b62\uff0c\u800c\u4e0d\u662f\u786c\u7f16\u7801\u4e00\u4e2a\u7279\u5b9a\u7684\u6700\u5927\u957f\u5ea6<\/p>\n\n\n\n<p>setup7.1.py \u4ee3\u7801\u5982\u4e0b\uff1a<\/p>\n\n\n\n<pre class=\"wp-block-code has-small-font-size\"><code>from transformers import GPT2Tokenizer, GPT2LMHeadModel, AdamW, get_linear_schedule_with_warmup\nimport torch\n\n# \u52a0\u8f7d\u9884\u8bad\u7ec3\u6a21\u578b\u548c\u5206\u8bcd\u5668\ntokenizer = GPT2Tokenizer.from_pretrained('models\/gpt2')\nmodel = GPT2LMHeadModel.from_pretrained('models\/gpt2')\n\n# \u4e3a\u5206\u8bcd\u5668\u8bbe\u7f6e\u586b\u5145\u4ee4\u724c\n# \u786e\u4fdd\u4e3a\u5206\u8bcd\u5668\u8bbe\u7f6e\u4e86pad_token\nif tokenizer.pad_token is None:\n    tokenizer.add_special_tokens({'pad_token': '&#91;PAD]'})\n    model.resize_token_embeddings(len(tokenizer))\n\n# \u51c6\u5907\u6570\u636e\ntexts = &#91;\n    # \u5b66\u800c\u7bc7\n    \"\u5b50\u66f0\uff1a\u201c\u5b66\u800c\u65f6\u4e60\u4e4b\uff0c\u4e0d\u4ea6\u8bf4\u4e4e\uff1f\u6709\u670b\u81ea\u8fdc\u65b9\u6765\uff0c\u4e0d\u4ea6\u4e50\u4e4e\uff1f\u4eba\u4e0d\u77e5\u800c\u4e0d\u6120\uff0c\u4e0d\u4ea6\u541b\u5b50\u4e4e\uff1f\u201d\",\n    \"\u6709\u5b50\u66f0\uff1a\u201c\u5176\u4e3a\u4eba\u4e5f\u5b5d\u5f1f\uff0c\u800c\u597d\u72af\u4e0a\u8005\uff0c\u9c9c\u77e3\uff1b\u4e0d\u597d\u72af\u4e0a\u800c\u597d\u4f5c\u4e71\u8005\uff0c\u672a\u4e4b\u6709\u4e5f\u3002\u541b\u5b50\u52a1\u672c\uff0c\u672c\u7acb\u800c\u9053\u751f\u3002\u5b5d\u5f1f\u4e5f\u8005\uff0c\u5176\u4e3a\u4ec1\u4e4b\u672c\u4e0e\uff01\u201d\",\n    \"\u5b50\u66f0\uff1a\u201c\u5de7\u8a00\u4ee4\u8272\uff0c\u9c9c\u77e3\u4ec1\uff01\u201d\",\n    \"\u66fe\u5b50\u66f0\uff1a\u201c\u543e\u65e5\u4e09\u7701\u543e\u8eab\uff1a\u4e3a\u4eba\u8c0b\u800c\u4e0d\u5fe0\u4e4e\uff1f\u4e0e\u670b\u53cb\u4ea4\u800c\u4e0d\u4fe1\u4e4e\uff1f\u4f20\u4e0d\u4e60\u4e4e\uff1f\u201d\",\n    \"\u5b50\u66f0\uff1a\u201c\u9053\u5343\u4e58\u4e4b\u56fd\uff0c\u656c\u4e8b\u800c\u4fe1\uff0c\u8282\u7528\u800c\u7231\u4eba\uff0c\u4f7f\u6c11\u4ee5\u65f6\u3002\u201d\",\n    \"\u5b50\u66f0\uff1a\u201c\u5f1f\u5b50\u5165\u5219\u5b5d\uff0c\u51fa\u5219\u5f1f\uff0c\u8c28\u800c\u4fe1\uff0c\u6cdb\u7231\u4f17\uff0c\u800c\u4eb2\u4ec1\uff0c\u884c\u6709\u4f59\u529b\uff0c\u5219\u4ee5\u5b66\u6587\u3002\u201d\",\n    \"\u5b50\u590f\u66f0\uff1a\u201c\u8d24\u8d24\u6613\u8272\uff1b\u4e8b\u7236\u6bcd\uff0c\u80fd\u7aed\u5176\u529b\uff1b\u4e8b\u541b\uff0c\u80fd\u81f4\u5176\u8eab\uff1b\u4e0e\u670b\u53cb\u4ea4\uff0c\u8a00\u800c\u6709\u4fe1\u3002\u867d\u66f0\u672a\u5b66\uff0c\u543e\u5fc5\u8c13\u4e4b\u5b66\u77e3\u3002\u201d\",\n    \"\u5b50\u66f0\uff1a\u201c\u541b\u5b50\u4e0d\u91cd\u5219\u4e0d\u5a01\uff0c\u5b66\u5219\u4e0d\u56fa\u3002\u4e3b\u5fe0\u4fe1\uff0c\u65e0\u53cb\u4e0d\u5982\u5df1\u8005\uff0c\u8fc7\uff0c\u5219\u52ff\u60ee\u6539\u3002\u201d\",\n    \"\u66fe\u5b50\u66f0\uff1a\u201c\u614e\u7ec8\u8ffd\u8fdc\uff0c\u6c11\u5fb7\u5f52\u539a\u77e3\u3002\u201d\",\n    \"\u5b50\u79bd\u95ee\u4e8e\u5b50\u8d21\u66f0\uff1a\u201c\u592b\u5b50\u81f3\u4e8e\u662f\u90a6\u4e5f\uff0c\u5fc5\u95fb\u5176\u653f\uff0c\u6c42\u4e4b\u4e0e\uff0c\u6291\u4e0e\u4e4b\u4e0e\uff1f\u201d\u5b50\u8d21\u66f0\uff1a\u201c\u592b\u5b50\u6e29\u3001\u826f\u3001\u606d\u3001\u4fed\u3001\u8ba9\u4ee5\u5f97\u4e4b\u3002\u592b\u5b50\u4e4b\u6c42\u4e4b\u4e5f\uff0c\u5176\u8bf8\u5f02\u4e4e\u4eba\u4e4b\u6c42\u4e4b\u4e0e\uff1f\u201d\",\n    \"\u5b50\u66f0\uff1a\u201c\u7236\u5728\uff0c\u89c2\u5176\u5fd7\uff1b\u7236\u6ca1\uff0c\u89c2\u5176\u884c\uff1b\u4e09\u5e74\u65e0\u6539\u4e8e\u7236\u4e4b\u9053\uff0c\u53ef\u8c13\u5b5d\u77e3\u3002\u201d\",\n    \"\u6709\u5b50\u66f0\uff1a\u201c\u793c\u4e4b\u7528\uff0c\u548c\u4e3a\u8d35\u3002\u5148\u738b\u4e4b\u9053\uff0c\u65af\u4e3a\u7f8e\uff0c\u5c0f\u5927\u7531\u4e4b\u3002\u6709\u6240\u4e0d\u884c\uff0c\u77e5\u548c\u800c\u548c\uff0c\u4e0d\u4ee5\u793c\u8282\u4e4b\uff0c\u4ea6\u4e0d\u53ef\u884c\u4e5f\u3002\u201d\",\n    \"\u6709\u5b50\u66f0\uff1a\u201c\u4fe1\u8fd1\u4e8e\u4e49\uff0c\u8a00\u53ef\u590d\u4e5f\u3002\u606d\u8fd1\u4e8e\u793c\uff0c\u8fdc\u803b\u8fb1\u4e5f\u3002\u56e0\u4e0d\u5931\u5176\u4eb2\uff0c\u4ea6\u53ef\u5b97\u4e5f\u3002\u201d\",\n    \"\u5b50\u66f0\uff1a\u201c\u541b\u5b50\u98df\u65e0\u6c42\u9971\uff0c\u5c45\u65e0\u6c42\u5b89\uff0c\u654f\u4e8e\u4e8b\u800c\u614e\u4e8e\u8a00\uff0c\u5c31\u6709\u9053\u800c\u6b63\u7109\u3002\u53ef\u8c13\u597d\u5b66\u4e5f\u5df2\u3002\u201d\",\n    \"\u5b50\u8d21\u66f0\uff1a\u201c\u8d2b\u800c\u65e0\u8c04\uff0c\u5bcc\u800c\u65e0\u9a84\uff0c\u4f55\u5982\uff1f\u201d\u5b50\u66f0\uff1a\u201c\u53ef\u4e5f\u3002\u672a\u82e5\u8d2b\u800c\u4e50\uff0c\u5bcc\u800c\u597d\u793c\u8005\u4e5f\u3002\u201d\u5b50\u8d21\u66f0\uff1a\u201c\u300a\u8bd7\u300b\u4e91\uff1a\u2018\u5982\u5207\u5982\u78cb\uff0c\u5982\u7422\u5982\u78e8\u2019\uff0c\u5176\u65af\u4e4b\u8c13\u4e0e\uff1f\u201d\u5b50\u66f0\uff1a\u201c\u8d50\u4e5f\uff0c\u59cb\u53ef\u4e0e\u8a00\u300a\u8bd7\u300b\u5df2\u77e3\uff0c\u544a\u8bf8\u5f80\u800c\u77e5\u6765\u8005\u3002\u201d\",\n    \"\u5b50\u66f0\uff1a\u201c\u4e0d\u60a3\u4eba\u4e4b\u4e0d\u5df1\u77e5\uff0c\u60a3\u4e0d\u77e5\u4eba\u4e5f\u3002\u201d\"\n    ]\ninput_ids = &#91;tokenizer.encode(text, return_tensors='pt', padding=True, truncation=True, max_length=512) for text in texts]\n\neval_texts = &#91;\n    # \u4e3a\u653f\u7bc7\n    \"\u5b50\u66f0\uff1a\u201c\u4e3a\u653f\u4ee5\u5fb7\uff0c\u8b6c\u5982\u5317\u8fb0\uff0c\u5c45\u5176\u6240\u800c\u4f17\u661f\u5171\u4e4b\u3002\u201d\",\n    \"\u5b50\u66f0\uff1a\u201c\u300a\u8bd7\u300b\u4e09\u767e\uff0c\u4e00\u8a00\u4ee5\u853d\u4e4b\uff0c\u66f0\uff1a\u2018\u601d\u65e0\u90aa\u2019\u3002\u201d\",\n    \"\u5b50\u66f0\uff1a\u201c\u9053\u4e4b\u4ee5\u653f\uff0c\u9f50\u4e4b\u4ee5\u5211\uff0c\u6c11\u514d\u800c\u65e0\u803b\u3002\u9053\u4e4b\u4ee5\u5fb7\uff0c\u9f50\u4e4b\u4ee5\u793c\uff0c\u6709\u803b\u4e14\u683c\u3002\u201d\",\n    \"\u5b50\u66f0\uff1a\u201c\u543e\u5341\u6709\u4e94\u800c\u5fd7\u4e8e\u5b66\uff0c\u4e09\u5341\u800c\u7acb\uff0c\u56db\u5341\u800c\u4e0d\u60d1\uff0c\u4e94\u5341\u800c\u77e5\u5929\u547d\uff0c\u516d\u5341\u800c\u8033\u987a\uff0c\u4e03\u5341\u800c\u4ece\u5fc3\u6240\u6b32\uff0c\u4e0d\u903e\u77e9\u3002\u201d\",\n    \"\u5b5f\u61ff\u5b50\u95ee\u5b5d\uff0c\u5b50\u66f0\uff1a\u201c\u65e0\u8fdd\u3002\u201d\u6a0a\u8fdf\u5fa1\uff0c\u5b50\u544a\u4e4b\u66f0\uff1a\u201c\u5b5f\u5b59\u95ee\u5b5d\u4e8e\u6211\uff0c\u6211\u5bf9\u66f0\u2018\u65e0\u8fdd\u2019\u3002\u201d\u6a0a\u8fdf\u66f0\uff1a\u201c\u4f55\u8c13\u4e5f\uff1f\u201d\u5b50\u66f0\uff1a\u201c\u751f\uff0c\u4e8b\u4e4b\u4ee5\u793c\uff1b\u6b7b\uff0c\u846c\u4e4b\u4ee5\u793c\uff0c\u796d\u4e4b\u4ee5\u793c\u3002\u201d\",\n    \"\u5b5f\u6b66\u4f2f\u95ee\u5b5d\u3002\u5b50\u66f0\uff1a\u201c\u7236\u6bcd\u552f\u5176\u75be\u4e4b\u5fe7\u3002\u201d\",\n    \"\u5b50\u6e38\u95ee\u5b5d\u3002\u5b50\u66f0\uff1a\u201c\u4eca\u4e4b\u5b5d\u8005\uff0c\u662f\u8c13\u80fd\u517b\u3002\u81f3\u4e8e\u72ac\u9a6c\u7686\u80fd\u6709\u517b\uff1b\u4e0d\u656c\uff0c\u4f55\u4ee5\u522b\u4e4e\uff1f\u201d\",\n    \"\u5b50\u590f\u95ee\u5b5d\u3002\u5b50\u66f0\uff1a\u201c\u8272\u96be\u3002\u6709\u4e8b\uff0c\u5f1f\u5b50\u670d\u5176\u52b3\uff1b\u6709\u9152\u98df\uff0c\u5148\u751f\u9994\uff0c\u66fe\u662f\u4ee5\u4e3a\u5b5d\u4e4e\uff1f\u201d\",\n    \"\u5b50\u66f0\uff1a\u201c\u543e\u4e0e\u56de\u8a00\u7ec8\u65e5\uff0c\u4e0d\u8fdd\uff0c\u5982\u611a\u3002\u9000\u800c\u7701\u5176\u79c1\uff0c\u4ea6\u8db3\u4ee5\u53d1\uff0c\u56de\u4e5f\u4e0d\u611a\u3002\u201d\",\n    \"\u5b50\u66f0\uff1a\u201c\u89c6\u5176\u6240\u4ee5\uff0c\u89c2\u5176\u6240\u7531\uff0c\u5bdf\u5176\u6240\u5b89\uff0c\u4eba\u7109\u5ecb\u54c9\uff1f\u4eba\u7109\u5ecb\u54c9\uff1f\u201d\",\n    \"\u5b50\u66f0\uff1a\u201c\u6e29\u6545\u800c\u77e5\u65b0\uff0c\u53ef\u4ee5\u4e3a\u5e08\u77e3\u3002\u201d\",\n    \"\u5b50\u66f0\uff1a\u201c\u541b\u5b50\u4e0d\u5668\u3002\u201d\"\n    ]\n\n# \u5047\u8bbe eval_texts \u662f\u60a8\u7684\u9a8c\u8bc1\u96c6\u6587\u672c\n# eval_input_ids = &#91;tokenizer.encode(text, return_tensors='pt', padding=True, truncation=True, max_length=512) for text in eval_texts]\n\n# \u51c6\u5907\u6a21\u578b\u548c\u4f18\u5316\u5668\ndevice = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\nmodel.to(device)\nmodel.train()\noptimizer = AdamW(model.parameters(), lr=5e-5, no_deprecation_warning=True)\n\ndef evaluate(model, tokenizer, device, eval_texts):\n    model.eval()  # \u5c06\u6a21\u578b\u8bbe\u7f6e\u4e3a\u8bc4\u4f30\u6a21\u5f0f\n    inputs = tokenizer(eval_texts, return_tensors='pt', padding=True, truncation=True, max_length=512)\n    input_ids = inputs&#91;'input_ids'].to(device)\n    attention_mask = inputs&#91;'attention_mask'].to(device)\n    labels = input_ids.clone()  # \u5bf9\u4e8e\u8bed\u8a00\u6a21\u578b\uff0c\u8f93\u5165\u548c\u6807\u7b7e\u901a\u5e38\u76f8\u540c\n\n    with torch.no_grad():  # \u5728\u8bc4\u4f30\u671f\u95f4\u4e0d\u8ba1\u7b97\u68af\u5ea6\n        outputs = model(input_ids, attention_mask=attention_mask, labels=labels)\n        loss = outputs.loss\n        eval_loss = loss.item()\n\n    model.train()  # \u5c06\u6a21\u578b\u8bbe\u7f6e\u56de\u8bad\u7ec3\u6a21\u5f0f\n    return eval_loss\n\n# \u521d\u59cb\u5316\u6700\u4f4e\u635f\u5931\u4e3a\u65e0\u7a77\u5927\nbest_loss = float('inf')\nbest_model_state = None\n\n# \u5b8c\u6210\u5fae\u8c03\u540e\u4fdd\u5b58\u6a21\u578b\u548c\u5206\u8bcd\u5668\nmodel_path = \"models\/gpt2_finetuned\"\n\n# \u5fae\u8c03epochs\nepochs = 100\nfor epoch in range(epochs):\n    current_train_loss = 0\n    model.train()  # \u786e\u4fdd\u6a21\u578b\u5904\u4e8e\u8bad\u7ec3\u6a21\u5f0f\n\n    for input_id in input_ids:\n        optimizer.zero_grad()\n        input_id = input_id.to(device)\n        outputs = model(input_id, labels=input_id)\n        loss = outputs.loss\n        loss.backward()\n        optimizer.step()\n\n    current_train_loss = loss.item()\n\n    # \u8bc4\u4f30\u9a8c\u8bc1\u96c6\u635f\u5931\n    eval_loss = evaluate(model, tokenizer, device, eval_texts)\n    #print(f\"Epoch {epoch}: Loss {loss.item()}, Eval Loss {eval_loss}\")\n\n    # \u68c0\u67e5\u5f53\u524d\u635f\u5931\u662f\u5426\u662f\u8fc4\u4eca\u4e3a\u6b62\u6700\u4f4e\u7684\n    if current_train_loss &lt; best_loss:\n        best_loss = current_train_loss\n        best_model_state = model.state_dict()  # \u4fdd\u5b58\u6700\u4f73\u6a21\u578b\u72b6\u6001\n        print(f\"New best model found at epoch {epoch} with training loss {best_loss} and saved \")\n\n# \u5728\u8bad\u7ec3\u7ed3\u675f\u540e\u4fdd\u5b58\u6700\u4f73\u6a21\u578b\u72b6\u6001\nmodel.load_state_dict(best_model_state)\nmodel.save_pretrained(model_path)\ntokenizer.save_pretrained(model_path)\nprint(f\"Best model saved to {model_path} with training loss {best_loss}\")\n\n# print the number of parameters in the model\nprint(sum(p.numel() for p in model.parameters())\/1e6, 'M parameters')\n\n\n# \u6587\u672c\u751f\u6210\n# \u4f7f\u7528`__call__`\u65b9\u6cd5\u751f\u6210`input_ids`\u548c`attention_mask`\ninputs = tokenizer(\"\u5b50\u66f0\", return_tensors='pt', padding=True, truncation=True, max_length=50)\n\n# \u63d0\u53d6`input_ids`\u548c`attention_mask`\ninput_ids = inputs&#91;'input_ids']\nattention_mask = inputs&#91;'attention_mask']\n\ninput_ids = input_ids.to(device)\nattention_mask = attention_mask.to(device)\n\n# \u6587\u672c\u751f\u6210\uff0c\u786e\u4fdd\u4f20\u5165`attention_mask`\nsample_outputs = model.generate(\n    input_ids,\n    eos_token_id=tokenizer.eos_token_id,\n    attention_mask=attention_mask,\n    max_length=100,\n    num_return_sequences=3,\n    do_sample=True,  # \u542f\u7528\u91c7\u6837\u4ee5\u589e\u52a0\u591a\u6837\u6027\n    top_p=0.98,      # \u4f7f\u7528top-p\u91c7\u6837\n)\n\n# \u6253\u5370\u751f\u6210\u7684\u6587\u672c\nfor i, sample_output in enumerate(sample_outputs):\n    print(\"{}: {}\".format(i, tokenizer.decode(sample_output, skip_special_tokens=True)))<\/code><\/pre>\n\n\n\n<p>\u8fd0\u884c setup7.1.py<\/p>\n\n\n\n<pre class=\"wp-block-code has-small-font-size\"><code>$ python setup7.1.py\nNew best model found at epoch 0 with training loss 4.69078254699707 and saved\nNew best model found at epoch 1 with training loss 4.215892314910889 and saved\nNew best model found at epoch 2 with training loss 3.143723964691162 and saved\nNew best model found at epoch 3 with training loss 2.4889767169952393 and saved\nNew best model found at epoch 5 with training loss 1.9887076616287231 and saved\nNew best model found at epoch 6 with training loss 1.8634755611419678 and saved\nNew best model found at epoch 8 with training loss 1.5359219312667847 and saved\n......\nNew best model found at epoch 92 with training loss 0.03972531855106354 and saved\nNew best model found at epoch 96 with training loss 0.039385080337524414 and saved\nBest model saved to models\/gpt2_finetuned with training loss 0.039385080337524414\n124.440576 M parameters\nSetting `pad_token_id` to `eos_token_id`:50256 for open-end generation.\n0: \u5b50\u66f0\uff1a\u201c\u5de7\u8a00\u4ee4\u8272\uff0c\u9c9c\u77e3\u4ec1\uff01\u201d\u5b50\u5165\u5219\u5f1f\uff01\u201d\u5b50\u6c42\u4e4b\uff1a\u201c\u53ef\u8c13\u4e4b\u4e5f\u3002\u201d\u5b50\u8c13\u4e4b\u77e5\u4eba\uff01\u201d\u5b50\u4e4b\u4ea6\u4e0d\u77e5\u800c\u548c\uff01\u201d\n1: \u5b50\u66f0\uff1a\u201c\u4e0d\u60a3\u4eba\u4e4b\u4e0d\u5df1\u77e5\uff0c\u60a3\u4e0d\u77e5\u4eba\u4e5f\u3002\u201d\u5b50\u4e0d\u5fe0\u4fe1\uff0c\u60a3\u4e0d\u77e5\u4eba\u4e5f\u3002\u201d\u5b50\u6c42\ufffd\u4e0d\u77e5\uff0c\u60a3\u4e0d\u77e5\u4eba\u77e5\u548c\uff0c\u60a3\u4e0d\u77e5\u4e8e\u77e5\n2: \u5b50\u66f0\uff1a\u201c\u4e0d\u4ea6\u8bf4\u4e4e\uff1f\u6709\u4fe1\uff0c\u8a00\u53ef\u4e5f\u3002\u606d\u8fd1\u4e8e\u4e49\uff0c\u4ea6\u53ef\u590d\u4e5f\u3002\u606d\u8fd1\u4e8e\u793c\uff0c\u4ea6\u53ef\u5b97\u4e5f\u3002\u201d\u5b50\u66f0\uff1a\u201c\u53ef\u4e5f\u3002\u201d\u5b50\u66f0\ufffd<\/code><\/pre>\n\n\n\n<h2 class=\"wp-block-heading\"><strong>8. \u52a0\u8f7d\u6700\u540e\u5fae\u8c03\u540e\u7684\u6a21\u578b<\/strong><\/h2>\n\n\n\n<p>setup7.1.py \u4ee3\u7801\u5982\u4e0b\uff1a<\/p>\n\n\n\n<pre class=\"wp-block-code has-small-font-size\"><code>from transformers import GPT2Tokenizer, GPT2LMHeadModel\nimport torch\n\n# \u52a0\u8f7d\u5fae\u8c03\u540e\u7684\u6a21\u578b\u548c\u5206\u8bcd\u5668\nmodel_path = \"models\/gpt2_finetuned\"\ntokenizer = GPT2Tokenizer.from_pretrained(model_path)\nmodel = GPT2LMHeadModel.from_pretrained(model_path)\n\n# print the number of parameters in the model\nprint(sum(p.numel() for p in model.parameters())\/1e6, 'M parameters')\n\n# \u6587\u672c\u751f\u6210\n# \u4f7f\u7528`__call__`\u65b9\u6cd5\u751f\u6210`input_ids`\u548c`attention_mask`\ninputs = tokenizer(\"\u5b50\u66f0\uff1a\", return_tensors='pt', padding=True, truncation=True, max_length=50)\n\ndevice = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\nmodel.to(device)\n\n# \u63d0\u53d6`input_ids`\u548c`attention_mask`\ninput_ids = inputs&#91;'input_ids']\nattention_mask = inputs&#91;'attention_mask']\n\ninput_ids = input_ids.to(device)\nattention_mask = attention_mask.to(device)\n\n# \u6587\u672c\u751f\u6210\uff0c\u786e\u4fdd\u4f20\u5165`attention_mask`\nsample_outputs = model.generate(\n    input_ids, \n    attention_mask=attention_mask,\n    max_length=100, \n    num_return_sequences=3,\n    do_sample=True,  # \u542f\u7528\u91c7\u6837\u4ee5\u589e\u52a0\u591a\u6837\u6027\n    top_p=0.98,      # \u4f7f\u7528top-p\u91c7\u6837\n)\n\n# \u6253\u5370\u751f\u6210\u7684\u6587\u672c\nfor i, sample_output in enumerate(sample_outputs):\n    print(\"{}: {}\".format(i, tokenizer.decode(sample_output, skip_special_tokens=True)))<\/code><\/pre>\n\n\n\n<p>\u8fd0\u884c setup8.1.py <\/p>\n\n\n\n<pre class=\"wp-block-code has-small-font-size\"><code>$ python setup8.1.py\n124.440576 M parameters\nSetting `pad_token_id` to `eos_token_id`:50256 for open-end generation.\n0: \u5b50\u66f0\uff1a\u201c\u8d24\u8d24\u6613\u8272\uff1b\u4e8b\u7236\u6bcd\uff0c\u80fd\u7aed\u5176\u529b\uff1b\u4e8b\u541b\uff0c\u80fd\u81f4\u5176\u8eab\uff1b\u4e0e\u670b\u53cb\u4ea4\uff0c\u8a00\u800c\u6709\u4fe1\u3002\u867d\u66f0\u672a\u5b66\uff0c\u543e\ufffd\n1: \u5b50\u66f0\uff1a\u201c\u9053\u5343\u4e58\u4e4b\u56fd\uff0c\u656c\u4e8b\u800c\u4fe1\uff0c\u8282\u7528\u800c\u7231\u4eba\uff0c\u4f7f\u6c11\u4ee5\u65f6\u3002\u201d\u5b50\u66f0\uff1a\u201c\u300a\u8bd7\u300b\u4e91\uff0c\u541b\u5b50\u4e4b\u4ee5\u793c\u8282\u4e4b\uff0c\u5176\u8bf8\u5f80\u800c\n2: \u5b50\u66f0\uff1a\u201c\u4e0d\u60a3\u4eba\u4e4b\u4e0d\u5df1\u77e5\uff0c\u60a3\u4e0d\u77e5\u4eba\u4e5f\u3002\u201d\u5b50\u66f0\uff1a\u201c\u4e0d\u77e5\u4eba\u4e5f\u3002\u201d\u5b50\u8d21\u66f0\uff1a\u201c\u4e0d\u77e5\u4eba\u4e5f\u3002\u201d\u5b50\u6e29\u3001\u606d\u3001\u4fed\u3001\u8ba9\u4ee5\u5f97\u4e4b\u3002\ufffd\n<\/code><\/pre>\n\n\n\n<p>\u53ef\u4ee5\u770b\u5230\uff1a\u201d\u4e0d\u77e5\u4eba\u4e5f\u201c \u662f\u65b0\u8bcd\uff1f<\/p>\n\n\n\n<h2 class=\"wp-block-heading\"><strong>9. \u4f7f\u7528 gpt2\u66f4\u5927\u7684\u6a21\u578b\u67b6\u6784<\/strong><\/h2>\n\n\n\n<p>GPT-2 \u6a21\u578b\u67b6\u6784\u7684\u53c2\u6570\u4e3a <strong>124M<\/strong><\/p>\n\n\n\n<p><a href=\"https:\/\/huggingface.co\/openai-community\/gpt2\">https:\/\/huggingface.co\/openai-community\/gpt2<\/a><\/p>\n\n\n\n<p>GPT-2 Medium \u6a21\u578b\u67b6\u6784\u7684\u53c2\u6570\u4e3a <strong>355M<\/strong><\/p>\n\n\n\n<p><a href=\"https:\/\/huggingface.co\/openai-community\/gpt2-medium\">https:\/\/huggingface.co\/openai-community\/gpt2-medium<\/a><\/p>\n\n\n\n<p>GPT-2 Large \u6a21\u578b\u67b6\u6784\u7684\u53c2\u6570\u4e3a <strong><strong>774<\/strong>M<\/strong><\/p>\n\n\n\n<p><a href=\"https:\/\/huggingface.co\/openai-community\/gpt2-large\">https:\/\/huggingface.co\/openai-community\/gpt2-large<\/a><\/p>\n\n\n\n<p>GPT-2 XL \u6a21\u578b\u67b6\u6784\u7684\u53c2\u6570\u4e3a <strong><strong><strong>1.5B<\/strong><\/strong><\/strong><\/p>\n\n\n\n<p><a href=\"https:\/\/huggingface.co\/openai-community\/gpt2-xl\">https:\/\/huggingface.co\/openai-community\/gpt2-xl<\/a><\/p>\n\n\n\n<p>\u73b0\u5728\u8bd5\u8bd5 \u628a\u6a21\u578b\u6539\u4e3a GPT-2 XL<\/p>\n\n\n\n<p>setup9.3.py \u4ee3\u7801\u5982\u4e0b\uff1a<\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>from transformers import GPT2Tokenizer, GPT2LMHeadModel, AdamW, get_linear_schedule_with_warmup\nimport torch\n\n# \u52a0\u8f7d\u9884\u8bad\u7ec3\u6a21\u578b\u548c\u5206\u8bcd\u5668\ntokenizer = GPT2Tokenizer.from_pretrained('models\/gpt2-xl')\nmodel = GPT2LMHeadModel.from_pretrained('models\/gpt2-xl')\n\n# \u4e3a\u5206\u8bcd\u5668\u8bbe\u7f6e\u586b\u5145\u4ee4\u724c\n# \u786e\u4fdd\u4e3a\u5206\u8bcd\u5668\u8bbe\u7f6e\u4e86pad_token\nif tokenizer.pad_token is None:\n    tokenizer.add_special_tokens({'pad_token': '&#91;PAD]'})\n    model.resize_token_embeddings(len(tokenizer))\n\n# \u51c6\u5907\u6570\u636e\ntexts = &#91;\n    # \u5b66\u800c\u7bc7\n    \"\u5b50\u66f0\uff1a\u201c\u5b66\u800c\u65f6\u4e60\u4e4b\uff0c\u4e0d\u4ea6\u8bf4\u4e4e\uff1f\u6709\u670b\u81ea\u8fdc\u65b9\u6765\uff0c\u4e0d\u4ea6\u4e50\u4e4e\uff1f\u4eba\u4e0d\u77e5\u800c\u4e0d\u6120\uff0c\u4e0d\u4ea6\u541b\u5b50\u4e4e\uff1f\u201d\",\n    \"\u6709\u5b50\u66f0\uff1a\u201c\u5176\u4e3a\u4eba\u4e5f\u5b5d\u5f1f\uff0c\u800c\u597d\u72af\u4e0a\u8005\uff0c\u9c9c\u77e3\uff1b\u4e0d\u597d\u72af\u4e0a\u800c\u597d\u4f5c\u4e71\u8005\uff0c\u672a\u4e4b\u6709\u4e5f\u3002\u541b\u5b50\u52a1\u672c\uff0c\u672c\u7acb\u800c\u9053\u751f\u3002\u5b5d\u5f1f\u4e5f\u8005\uff0c\u5176\u4e3a\u4ec1\u4e4b\u672c\u4e0e\uff01\u201d\",\n    \"\u5b50\u66f0\uff1a\u201c\u5de7\u8a00\u4ee4\u8272\uff0c\u9c9c\u77e3\u4ec1\uff01\u201d\",\n    \"\u66fe\u5b50\u66f0\uff1a\u201c\u543e\u65e5\u4e09\u7701\u543e\u8eab\uff1a\u4e3a\u4eba\u8c0b\u800c\u4e0d\u5fe0\u4e4e\uff1f\u4e0e\u670b\u53cb\u4ea4\u800c\u4e0d\u4fe1\u4e4e\uff1f\u4f20\u4e0d\u4e60\u4e4e\uff1f\u201d\",\n    \"\u5b50\u66f0\uff1a\u201c\u9053\u5343\u4e58\u4e4b\u56fd\uff0c\u656c\u4e8b\u800c\u4fe1\uff0c\u8282\u7528\u800c\u7231\u4eba\uff0c\u4f7f\u6c11\u4ee5\u65f6\u3002\u201d\",\n    \"\u5b50\u66f0\uff1a\u201c\u5f1f\u5b50\u5165\u5219\u5b5d\uff0c\u51fa\u5219\u5f1f\uff0c\u8c28\u800c\u4fe1\uff0c\u6cdb\u7231\u4f17\uff0c\u800c\u4eb2\u4ec1\uff0c\u884c\u6709\u4f59\u529b\uff0c\u5219\u4ee5\u5b66\u6587\u3002\u201d\",\n    \"\u5b50\u590f\u66f0\uff1a\u201c\u8d24\u8d24\u6613\u8272\uff1b\u4e8b\u7236\u6bcd\uff0c\u80fd\u7aed\u5176\u529b\uff1b\u4e8b\u541b\uff0c\u80fd\u81f4\u5176\u8eab\uff1b\u4e0e\u670b\u53cb\u4ea4\uff0c\u8a00\u800c\u6709\u4fe1\u3002\u867d\u66f0\u672a\u5b66\uff0c\u543e\u5fc5\u8c13\u4e4b\u5b66\u77e3\u3002\u201d\",\n    \"\u5b50\u66f0\uff1a\u201c\u541b\u5b50\u4e0d\u91cd\u5219\u4e0d\u5a01\uff0c\u5b66\u5219\u4e0d\u56fa\u3002\u4e3b\u5fe0\u4fe1\uff0c\u65e0\u53cb\u4e0d\u5982\u5df1\u8005\uff0c\u8fc7\uff0c\u5219\u52ff\u60ee\u6539\u3002\u201d\",\n    \"\u66fe\u5b50\u66f0\uff1a\u201c\u614e\u7ec8\u8ffd\u8fdc\uff0c\u6c11\u5fb7\u5f52\u539a\u77e3\u3002\u201d\",\n    \"\u5b50\u79bd\u95ee\u4e8e\u5b50\u8d21\u66f0\uff1a\u201c\u592b\u5b50\u81f3\u4e8e\u662f\u90a6\u4e5f\uff0c\u5fc5\u95fb\u5176\u653f\uff0c\u6c42\u4e4b\u4e0e\uff0c\u6291\u4e0e\u4e4b\u4e0e\uff1f\u201d\",\n    \"\u5b50\u8d21\u66f0\uff1a\u201c\u592b\u5b50\u6e29\u3001\u826f\u3001\u606d\u3001\u4fed\u3001\u8ba9\u4ee5\u5f97\u4e4b\u3002\u592b\u5b50\u4e4b\u6c42\u4e4b\u4e5f\uff0c\u5176\u8bf8\u5f02\u4e4e\u4eba\u4e4b\u6c42\u4e4b\u4e0e\uff1f\u201d\",\n    \"\u5b50\u66f0\uff1a\u201c\u7236\u5728\uff0c\u89c2\u5176\u5fd7\uff1b\u7236\u6ca1\uff0c\u89c2\u5176\u884c\uff1b\u4e09\u5e74\u65e0\u6539\u4e8e\u7236\u4e4b\u9053\uff0c\u53ef\u8c13\u5b5d\u77e3\u3002\u201d\",\n    \"\u6709\u5b50\u66f0\uff1a\u201c\u793c\u4e4b\u7528\uff0c\u548c\u4e3a\u8d35\u3002\u5148\u738b\u4e4b\u9053\uff0c\u65af\u4e3a\u7f8e\uff0c\u5c0f\u5927\u7531\u4e4b\u3002\u6709\u6240\u4e0d\u884c\uff0c\u77e5\u548c\u800c\u548c\uff0c\u4e0d\u4ee5\u793c\u8282\u4e4b\uff0c\u4ea6\u4e0d\u53ef\u884c\u4e5f\u3002\u201d\",\n    \"\u6709\u5b50\u66f0\uff1a\u201c\u4fe1\u8fd1\u4e8e\u4e49\uff0c\u8a00\u53ef\u590d\u4e5f\u3002\u606d\u8fd1\u4e8e\u793c\uff0c\u8fdc\u803b\u8fb1\u4e5f\u3002\u56e0\u4e0d\u5931\u5176\u4eb2\uff0c\u4ea6\u53ef\u5b97\u4e5f\u3002\u201d\",\n    \"\u5b50\u66f0\uff1a\u201c\u541b\u5b50\u98df\u65e0\u6c42\u9971\uff0c\u5c45\u65e0\u6c42\u5b89\uff0c\u654f\u4e8e\u4e8b\u800c\u614e\u4e8e\u8a00\uff0c\u5c31\u6709\u9053\u800c\u6b63\u7109\u3002\u53ef\u8c13\u597d\u5b66\u4e5f\u5df2\u3002\u201d\",\n    \"\u5b50\u8d21\u66f0\uff1a\u201c\u8d2b\u800c\u65e0\u8c04\uff0c\u5bcc\u800c\u65e0\u9a84\uff0c\u4f55\u5982\uff1f\u201d\",\n    \"\u5b50\u66f0\uff1a\u201c\u53ef\u4e5f\u3002\u672a\u82e5\u8d2b\u800c\u4e50\uff0c\u5bcc\u800c\u597d\u793c\u8005\u4e5f\u3002\u201d\",\n    \"\u5b50\u8d21\u66f0\uff1a\u201c\u300a\u8bd7\u300b\u4e91\uff1a\u2018\u5982\u5207\u5982\u78cb\uff0c\u5982\u7422\u5982\u78e8\u2019\uff0c\u5176\u65af\u4e4b\u8c13\u4e0e\uff1f\u201d\",\n    \"\u5b50\u66f0\uff1a\u201c\u8d50\u4e5f\uff0c\u59cb\u53ef\u4e0e\u8a00\u300a\u8bd7\u300b\u5df2\u77e3\uff0c\u544a\u8bf8\u5f80\u800c\u77e5\u6765\u8005\u3002\u201d\",\n    \"\u5b50\u66f0\uff1a\u201c\u4e0d\u60a3\u4eba\u4e4b\u4e0d\u5df1\u77e5\uff0c\u60a3\u4e0d\u77e5\u4eba\u4e5f\u3002\u201d\"\n    ]\ninput_ids = &#91;tokenizer.encode(text, return_tensors='pt', padding=True, truncation=True, max_length=512) for text in texts]\n\neval_texts = &#91;\n    # \u4e3a\u653f\u7bc7\n    \"\u5b50\u66f0\uff1a\u201c\u4e3a\u653f\u4ee5\u5fb7\uff0c\u8b6c\u5982\u5317\u8fb0\uff0c\u5c45\u5176\u6240\u800c\u4f17\u661f\u5171\u4e4b\u3002\u201d\",\n    \"\u5b50\u66f0\uff1a\u201c\u300a\u8bd7\u300b\u4e09\u767e\uff0c\u4e00\u8a00\u4ee5\u853d\u4e4b\uff0c\u66f0\uff1a\u2018\u601d\u65e0\u90aa\u2019\u3002\u201d\",\n    \"\u5b50\u66f0\uff1a\u201c\u9053\u4e4b\u4ee5\u653f\uff0c\u9f50\u4e4b\u4ee5\u5211\uff0c\u6c11\u514d\u800c\u65e0\u803b\u3002\u9053\u4e4b\u4ee5\u5fb7\uff0c\u9f50\u4e4b\u4ee5\u793c\uff0c\u6709\u803b\u4e14\u683c\u3002\u201d\",\n    \"\u5b50\u66f0\uff1a\u201c\u543e\u5341\u6709\u4e94\u800c\u5fd7\u4e8e\u5b66\uff0c\u4e09\u5341\u800c\u7acb\uff0c\u56db\u5341\u800c\u4e0d\u60d1\uff0c\u4e94\u5341\u800c\u77e5\u5929\u547d\uff0c\u516d\u5341\u800c\u8033\u987a\uff0c\u4e03\u5341\u800c\u4ece\u5fc3\u6240\u6b32\uff0c\u4e0d\u903e\u77e9\u3002\u201d\",\n    \"\u5b5f\u61ff\u5b50\u95ee\u5b5d\uff0c\u5b50\u66f0\uff1a\u201c\u65e0\u8fdd\u3002\u201d\u6a0a\u8fdf\u5fa1\uff0c\u5b50\u544a\u4e4b\u66f0\uff1a\u201c\u5b5f\u5b59\u95ee\u5b5d\u4e8e\u6211\uff0c\u6211\u5bf9\u66f0\u2018\u65e0\u8fdd\u2019\u3002\u201d\u6a0a\u8fdf\u66f0\uff1a\u201c\u4f55\u8c13\u4e5f\uff1f\u201d\",\n    \"\u5b50\u66f0\uff1a\u201c\u751f\uff0c\u4e8b\u4e4b\u4ee5\u793c\uff1b\u6b7b\uff0c\u846c\u4e4b\u4ee5\u793c\uff0c\u796d\u4e4b\u4ee5\u793c\u3002\u201d\",\n    \"\u5b5f\u6b66\u4f2f\u95ee\u5b5d\u3002\u5b50\u66f0\uff1a\u201c\u7236\u6bcd\u552f\u5176\u75be\u4e4b\u5fe7\u3002\u201d\",\n    \"\u5b50\u6e38\u95ee\u5b5d\u3002\u5b50\u66f0\uff1a\u201c\u4eca\u4e4b\u5b5d\u8005\uff0c\u662f\u8c13\u80fd\u517b\u3002\u81f3\u4e8e\u72ac\u9a6c\u7686\u80fd\u6709\u517b\uff1b\u4e0d\u656c\uff0c\u4f55\u4ee5\u522b\u4e4e\uff1f\u201d\",\n    \"\u5b50\u590f\u95ee\u5b5d\u3002\u5b50\u66f0\uff1a\u201c\u8272\u96be\u3002\u6709\u4e8b\uff0c\u5f1f\u5b50\u670d\u5176\u52b3\uff1b\u6709\u9152\u98df\uff0c\u5148\u751f\u9994\uff0c\u66fe\u662f\u4ee5\u4e3a\u5b5d\u4e4e\uff1f\u201d\",\n    \"\u5b50\u66f0\uff1a\u201c\u543e\u4e0e\u56de\u8a00\u7ec8\u65e5\uff0c\u4e0d\u8fdd\uff0c\u5982\u611a\u3002\u9000\u800c\u7701\u5176\u79c1\uff0c\u4ea6\u8db3\u4ee5\u53d1\uff0c\u56de\u4e5f\u4e0d\u611a\u3002\u201d\",\n    \"\u5b50\u66f0\uff1a\u201c\u89c6\u5176\u6240\u4ee5\uff0c\u89c2\u5176\u6240\u7531\uff0c\u5bdf\u5176\u6240\u5b89\uff0c\u4eba\u7109\u5ecb\u54c9\uff1f\u4eba\u7109\u5ecb\u54c9\uff1f\u201d\",\n    \"\u5b50\u66f0\uff1a\u201c\u6e29\u6545\u800c\u77e5\u65b0\uff0c\u53ef\u4ee5\u4e3a\u5e08\u77e3\u3002\u201d\",\n    \"\u5b50\u66f0\uff1a\u201c\u541b\u5b50\u4e0d\u5668\u3002\u201d\"\n    ]\n\n# \u5047\u8bbe eval_texts \u662f\u60a8\u7684\u9a8c\u8bc1\u96c6\u6587\u672c\n# eval_input_ids = &#91;tokenizer.encode(text, return_tensors='pt', padding=True, truncation=True, max_length=512) for text in eval_texts]\n\n# \u51c6\u5907\u6a21\u578b\u548c\u4f18\u5316\u5668\ndevice = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\nmodel.to(device)\nmodel.train()\noptimizer = AdamW(model.parameters(), lr=5e-5, no_deprecation_warning=True)\n\ndef evaluate(model, tokenizer, device, eval_texts):\n    model.eval()  # \u5c06\u6a21\u578b\u8bbe\u7f6e\u4e3a\u8bc4\u4f30\u6a21\u5f0f\n    inputs = tokenizer(eval_texts, return_tensors='pt', padding=True, truncation=True, max_length=512)\n    input_ids = inputs&#91;'input_ids'].to(device)\n    attention_mask = inputs&#91;'attention_mask'].to(device)\n    labels = input_ids.clone()  # \u5bf9\u4e8e\u8bed\u8a00\u6a21\u578b\uff0c\u8f93\u5165\u548c\u6807\u7b7e\u901a\u5e38\u76f8\u540c\n\n    with torch.no_grad():  # \u5728\u8bc4\u4f30\u671f\u95f4\u4e0d\u8ba1\u7b97\u68af\u5ea6\n        outputs = model(input_ids, attention_mask=attention_mask, labels=labels)\n        loss = outputs.loss\n        eval_loss = loss.item()\n\n    model.train()  # \u5c06\u6a21\u578b\u8bbe\u7f6e\u56de\u8bad\u7ec3\u6a21\u5f0f\n    return eval_loss\n\n# \u521d\u59cb\u5316\u6700\u4f4e\u635f\u5931\u4e3a\u65e0\u7a77\u5927\nbest_loss = float('inf')\nbest_model_state = None\n\n# \u5b8c\u6210\u5fae\u8c03\u540e\u4fdd\u5b58\u6a21\u578b\u548c\u5206\u8bcd\u5668\nmodel_path = \"models\/gpt2-xl_finetuned\"\n\n# \u5fae\u8c03epochs\nepochs = 100\nfor epoch in range(epochs):\n    current_train_loss = 0\n    model.train()  # \u786e\u4fdd\u6a21\u578b\u5904\u4e8e\u8bad\u7ec3\u6a21\u5f0f\n\n    for input_id in input_ids:\n        optimizer.zero_grad()\n        input_id = input_id.to(device)\n        outputs = model(input_id, labels=input_id)\n        loss = outputs.loss\n        loss.backward()\n        optimizer.step()\n\n    current_train_loss = loss.item()\n\n    # \u8bc4\u4f30\u9a8c\u8bc1\u96c6\u635f\u5931\n    eval_loss = evaluate(model, tokenizer, device, eval_texts)\n    #print(f\"Epoch {epoch}: Loss {loss.item()}, Eval Loss {eval_loss}\")\n\n    # \u68c0\u67e5\u5f53\u524d\u635f\u5931\u662f\u5426\u662f\u8fc4\u4eca\u4e3a\u6b62\u6700\u4f4e\u7684\n    if current_train_loss &lt; best_loss:\n        best_loss = current_train_loss\n        best_model_state = model.state_dict()  # \u4fdd\u5b58\u6700\u4f73\u6a21\u578b\u72b6\u6001\n        print(f\"New best model found at epoch {epoch} with training loss {best_loss} and saved \")\n\n# \u5728\u8bad\u7ec3\u7ed3\u675f\u540e\u4fdd\u5b58\u6700\u4f73\u6a21\u578b\u72b6\u6001\nmodel.load_state_dict(best_model_state)\nmodel.save_pretrained(model_path)\ntokenizer.save_pretrained(model_path)\nprint(f\"Best model saved to {model_path} with training loss {best_loss}\")\n\n# print the number of parameters in the model\nprint(sum(p.numel() for p in model.parameters())\/1e6, 'M parameters')\n\n\n# \u6587\u672c\u751f\u6210\n# \u4f7f\u7528`__call__`\u65b9\u6cd5\u751f\u6210`input_ids`\u548c`attention_mask`\ninputs = tokenizer(\"\u5b50\u66f0\", return_tensors='pt', padding=True, truncation=True, max_length=50)\n\n# \u63d0\u53d6`input_ids`\u548c`attention_mask`\ninput_ids = inputs&#91;'input_ids']\nattention_mask = inputs&#91;'attention_mask']\n\ninput_ids = input_ids.to(device)\nattention_mask = attention_mask.to(device)\n\n# \u6587\u672c\u751f\u6210\uff0c\u786e\u4fdd\u4f20\u5165`attention_mask`\nsample_outputs = model.generate(\n    input_ids,\n    eos_token_id=tokenizer.eos_token_id,\n    attention_mask=attention_mask,\n    max_length=150,\n    num_return_sequences=3,\n    do_sample=True,  # \u542f\u7528\u91c7\u6837\u4ee5\u589e\u52a0\u591a\u6837\u6027\n    top_p=0.98,      # \u4f7f\u7528top-p\u91c7\u6837\n)\n\n# \u6253\u5370\u751f\u6210\u7684\u6587\u672c\nfor i, sample_output in enumerate(sample_outputs):\n    print(\"{}: {}\".format(i, tokenizer.decode(sample_output, skip_special_tokens=True)))<\/code><\/pre>\n\n\n\n<p>\u8fd0\u884c setup9.3.py <\/p>\n\n\n\n<pre class=\"wp-block-code has-small-font-size\"><code>$ python setup9.3.py\nNew best model found at epoch 0 with training loss 1.9190336465835571 and saved\nNew best model found at epoch 1 with training loss 1.4716061353683472 and saved\nNew best model found at epoch 2 with training loss 0.8410051465034485 and saved\nNew best model found at epoch 3 with training loss 0.4602312445640564 and saved\nNew best model found at epoch 4 with training loss 0.3903290331363678 and saved\nNew best model found at epoch 6 with training loss 0.36641600728034973 and saved\nNew best model found at epoch 7 with training loss 0.11104749143123627 and saved\nNew best model found at epoch 9 with training loss 0.11090903729200363 and saved\nNew best model found at epoch 10 with training loss 0.09642010927200317 and saved\nNew best model found at epoch 11 with training loss 0.0929558053612709 and saved\nNew best model found at epoch 13 with training loss 0.08717972785234451 and saved\nNew best model found at epoch 17 with training loss 0.07089772820472717 and saved\nNew best model found at epoch 19 with training loss 0.06960687041282654 and saved\nNew best model found at epoch 25 with training loss 0.06584648787975311 and saved\nNew best model found at epoch 31 with training loss 0.0644926130771637 and saved\nNew best model found at epoch 45 with training loss 0.062989741563797 and saved\nNew best model found at epoch 88 with training loss 0.06280163675546646 and saved\nBest model saved to models\/gpt2-xl_finetuned with training loss 0.06280163675546646\n1557.6128 M parameters\nSetting `pad_token_id` to `eos_token_id`:50256 for open-end generation.\n0: \u5b50\u66f0\uff1a\u201c\u7236\u5728\uff0c\u89c2\u5176\u5fd7\uff1b\u7236\u6ca1\uff0c\u89c2\u5176\u884c\uff1b\u4e09\u5e74\u65e0\u6539\u4e8e\u7236\u4e4b\u9053\uff0c\u53ef\u8c13\u5b5d\u77e3\u3002\u201d\u7231\u81ea\u8fdc\u65b9\u6765\uff0c\u53ef\u8c13\u5b5d\u77e3\u3002\u201d\u77e5\u548c\u800c\u548c\uff0c\u4e0d\u4ee5\u7236\u4e4b\u9053\uff0c\u201d\u77e5\u4e09\u5e74\u65e0\u6539\u4e8e\u7231\ufffd\n1: \u5b50\u66f0\uff1a\u201c\u4e0d\u60a3\u4eba\u4e4b\u4e0d\u5df1\u77e5\uff0c\u60a3\u4e0d\u77e5\u4eba\u4e5f\u3002\u201d\u60a3\u4e0d\u77e5\u4e0d\u6c42\ufffd\u4e0d\u77e5\u4e0d\u4e5f\u3002\u201d\u60a3\u4e0d\u77e5\u4e0d\u4e5f\u3002\u201d\u6c42\ufffd\u4e0d\u77e5\u4e0d\u4e5f\u3002\u201d\u6c42\ufffd\u4e0d\u77e5\u4e0d\u77e5\u4e0d\u4e5f\u3002\u201d\u6c42\ufffd\u4e0d\u77e5\u4e0d\u4e5f\u3002\u201d\u6c42\ufffd\u4e0d\u77e5\u4e0d\u4e5f\u3002\u201d\u6c42\ufffd\u4e0d\u77e5\u4e0d\u6c42\ufffd\u4e0d\u77e5\u4e0d\n2: \u5b50\u66f0\uff1a\u201c\u5de7\u8a00\u4ee4\u8272\uff0c\u9c9c\u77e3\u4ec1\uff01\u201d\u77e3\u4ec1\uff01\u201d\u77e5\u5b66\u6587\u3002\u201d\u77e5\u7701\u543e\u8eab\uff01\u201d\u77e5\u5b66\u6587\u3002\u201d\u77e3\u4ec1\u4e4b\u5b66\u77e3\u3002\u201d\u77e5\u4e0d\u543e\u8eab\uff01\u201d\u77e5\u8282\u4e4b\u5b66\u77e3\u3002\u201d\u77e5\u4e0d\u53ef\u8c13\u5b66 \u77e3 \u4ec1\u4e4b\u5b66\u77e3\u3002\u201d\ufffd\n<\/code><\/pre>\n\n\n\n<h2 class=\"wp-block-heading\"><strong>10. \u5bf9\u6bd4\u539f\u6a21\u578b\u548c\u5fae\u8c03\u540e\u7684\u6a21\u578b<\/strong><\/h2>\n\n\n\n<p>\u8fd9\u91cc\u4f7f\u7528 gpt2-xl \u7684\u6a21\u578b\u67b6\u6784<\/p>\n\n\n\n<p>setup10.1.py \u4ee3\u7801<\/p>\n\n\n\n<pre class=\"wp-block-code has-small-font-size\"><code>from transformers import GPT2Tokenizer, GPT2LMHeadModel\nimport torch\n\n# \u52a0\u8f7d\u5fae\u8c03\u540e\u7684\u6a21\u578b\u548c\u5206\u8bcd\u5668\nmodel_path = \"models\/gpt2-xl\"\ntokenizer = GPT2Tokenizer.from_pretrained(model_path)\nmodel = GPT2LMHeadModel.from_pretrained(model_path)\n\n# print the number of parameters in the model\nprint(sum(p.numel() for p in model.parameters())\/1e6, 'M parameters')\n\n# \u8bbe\u7f6e\u586b\u5145\u4ee4\u724c\u4e3a\u7ed3\u675f\u7b26\u53f7\nif tokenizer.pad_token is None:\n    tokenizer.pad_token = tokenizer.eos_token\n    # \u5982\u679c\u60a8\u5728\u4e4b\u540e\u4fdd\u5b58\u5e76\u91cd\u65b0\u52a0\u8f7d\u5206\u8bcd\u5668\uff0c\u4e5f\u9700\u8981\u786e\u4fdd\u6a21\u578b\u7684\u8bcd\u5d4c\u5165\u5927\u5c0f\u4e0e\u5206\u8bcd\u5668\u7684\u8bcd\u6c47\u8868\u5927\u5c0f\u5339\u914d\n    model.resize_token_embeddings(len(tokenizer))\n\n# \u6587\u672c\u751f\u6210\n# \u4f7f\u7528`__call__`\u65b9\u6cd5\u751f\u6210`input_ids`\u548c`attention_mask`\ninputs = tokenizer(\"\u5b50\u66f0\uff1a\", return_tensors='pt', padding=True, truncation=True, max_length=50)\n\ndevice = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\nmodel.to(device)\n\n# \u63d0\u53d6`input_ids`\u548c`attention_mask`\ninput_ids = inputs&#91;'input_ids']\nattention_mask = inputs&#91;'attention_mask']\n\ninput_ids = input_ids.to(device)\nattention_mask = attention_mask.to(device)\n\n# \u6587\u672c\u751f\u6210\uff0c\u786e\u4fdd\u4f20\u5165`attention_mask`\nsample_outputs = model.generate(\n    input_ids,\n    eos_token_id=tokenizer.eos_token_id,\n    attention_mask=attention_mask,\n    min_length=20,  # \u8bbe\u7f6e\u4e00\u4e2a\u5408\u7406\u7684\u6700\u5c0f\u957f\u5ea6\n    max_length=150,  # \u8bbe\u7f6e\u4e00\u4e2a\u8f83\u5927\u7684\u6700\u5927\u957f\u5ea6\n    num_return_sequences=3,\n    do_sample=True,  # \u542f\u7528\u91c7\u6837\u4ee5\u589e\u52a0\u591a\u6837\u6027\n    top_p=0.98,      # \u4f7f\u7528top-p\u91c7\u6837\n    temperature=0.7,  # \u8c03\u6574\u6e29\u5ea6\u4ee5\u63a7\u5236\u968f\u673a\u6027\n)\n\n# \u6253\u5370\u751f\u6210\u7684\u6587\u672c\nfor i, sample_output in enumerate(sample_outputs):\n    print(\"{}: {}\".format(i, tokenizer.decode(sample_output, skip_special_tokens=True)))<\/code><\/pre>\n\n\n\n<p>\u8fd0\u884c setup10.1.py<\/p>\n\n\n\n<pre class=\"wp-block-code has-small-font-size\"><code>$ python setup10.1.py\n1557.6112 M parameters\nSetting `pad_token_id` to `eos_token_id`:50256 for open-end generation.\n0: \u5b50\u66f0\uff1a\ufee7\ufeec\ufef4\ufeec\ufeec\ufeee\ufee7\ufeed\ufeee\ufeeb\ufeed\ufeee\ufeee\ufeed\ufeee\ufeed\ufeee\ufeee\ufeed\ufeee\ufeed\ufeee\ufeee\ufeed\ufeee\ufeee\ufeed\ufeee\ufeed\ufeee\ufeed\ufeee\ufeed\ufeee\ufeed\ufeee\ufeed\ufeee\ufeed\ufeee\ufeed\ufeee\ufeed\ufeee\ufeed\ufeee\ufeed\ufffd\n1: \u5b50\u66f0\uff1a\uff11\uff14\uff20\uff12\uff13\uff14\uff15\uff15\uff17\uff1b\uff11\uff11\uff1d\uff14\uff18\uff14\uff15\uff15\uff17\uff1d\uff14\uff18\uff14\uff15\uff15\uff17\uff1d\uff14\uff18\uff14\uff15\uff15 \uff1e\uff22\uff23\uff20\uff26\uff22\uff25\uff21\uff22\uff24\uff24\uff27\uff22\uff26\uff23\ufffd\n2: \u5b50\u66f0\uff1a\ufef2\ufef3\uff19\uff21\ufef4\uff23\ufeee\ufeef\ufef4\ufef6\ufef7\ufef8\ufef9\ufefa\ufefb\ufefc\ufefd\ufefe  \ufec8 \ufec9 \ufeca \ufecb \ufecc \ufecd \ufece \ufecf \ufed0 \ufed1 \ufed2 \ufed3 \ufed4 \ufed5 \ufed6 \ufed7 \ufed8 \ufed9 \ufeda \ufedb \ufedc \ufedd \ufede \ufedf \ufee0 \ufee1 \ufee2 \ufee3 \ufffd\n\n\n\u53ef\u4ee5\u660e\u663e\u770b\u5230\uff0c\u6570\u636e\u91cc\u662f\u6ca1\u6709\u8bba\u8bed\u5185\u5bb9\u7684<\/code><\/pre>\n\n\n\n<p>\u4fee\u6539\u6a21\u578b\u4e3a\u5fae\u8c03\u540e\u7684\u6a21\u578b\uff0c\u518d\u770b\u770b\u6548\u679c<\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>model_path = \"models\/gpt2-xl_finetuned\"<\/code><\/pre>\n\n\n\n<p>\u8fd0\u884c setup10.2.py<\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>\n$ python setup10.2.py\nLoading checkpoint shards: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 2\/2 &#91;00:33&lt;00:00, 16.92s\/it]\n1557.6128 M parameters\nSetting <code>pad_token_id<\/code> to <code>eos_token_id<\/code>:50256 for open-end generation.\n0: \u5b50\u66f0\uff1a\u201c\u9053\u5343\u4e58\u4e4b\u56fd\uff0c\u656c\u4e8b\u800c\u4fe1\uff0c\u8282\u7528\u800c\u7231\u4eba\uff0c\u4f7f\u6c11\u4ee5\u65f6\u3002\u201d\u9053\u5343\u4e58\u4e4b\u56fd\uff0c\u8282\u7528\u800c\u7231\u4e0d\u4ee5\u65f6\u3002\u201d\u541b\u5b50\u4e5f\uff0c\u4f7f\u6c11\u4ee5\u65f6\u3002\u201d\u5b5d\u5f1f\u4e5f\u8005\uff0c\u5176\u8bf8\u5f1f\u201d\u77e5\u65f6\u3002\u201d\u5f1f\u201d\u5176\u8bf8\u5f1f\u201d\u77e5\u65f6\u3002\u201d\n1: \u5b50\u66f0\uff1a\u201c\u4e0d\u60a3\u4eba\u4e4b\u4e0d\u5df1\u77e5\uff0c\u60a3\u4e0d\u77e5\u4eba\u4e5f\u3002\u201d\u4e0d\u60a3\u4e0d\u77e5\u4e0d\u4e5f\u3002\u201d\u4e0d\u60a3\u4e0d\u77e5\u4e0d\u4e5f\u3002\u201d\u4e0d\u60a3\u4e0d\u77e5\u4e0d\u4e5f\u3002\u201d\u4e0d\u60a3\u4e0d\u77e5\u4e0d\u4e5f\u3002\u201d\u4e0d\u60a3\u4e0d\u77e5\u4e0d\u4e5f\u3002\u201d\u4e0d\u60a3\u4e0d\u77e5\u4e0d\u77e5\u4e0d\u4e5f\u3002\u201d\u4e0d\u60a3\u4e0d\u77e5\u4e0d\u4e5f\u3002\u201d\u4e0d\u60a3\u4e0d\u77e5\u4e0d\ufffd\n2: \u5b50\u66f0\uff1a\u201c\u5f1f\u5b50\u5165\u5219\u5b5d\uff0c\u51fa\u5219\u5f1f\uff0c\u8c28\u800c\u4fe1\uff0c\u6cdb\u7231\u4f17\uff0c\u800c\u4eb2\u4ec1\uff0c\u884c\u6709\u4f59\u529b\uff0c\u5219\u4ee5\u5b66\u6587\u3002\u201d\u5f1f\u201d\u5b66\u6587\u3002\u201d\u5f1f\u201d\u5f1f\u201d\u4e0d\u4ec1\u4e4b\u5b66\u77e3\u3002\u201d\u201d\u5176\u8bf8\u5f1f\u201d \u77e5\u5219\u4ec1\u4e4b\u5b66\u77e5\u77e5\u77e3\u3002\u201d\u201d\u5f1f\u201d<\/code><\/pre>\n\n\n\n<p>\u8fd9\u91cc\u53ef\u4ee5\u770b\u5230\uff0c\u5fae\u8c03\u540e\u662f\u6709\u8bba\u8bed\u5185\u5bb9\u7684<\/p>\n\n\n\n<h2 class=\"wp-block-heading\"><strong>11. \u662f\u5728\u8f83\u5c0f\u7684\u6a21\u578b\u4e0a\u5fae\u8c03\uff0c\u8fd8\u662f\u8f83\u5927\u7684\u6a21\u578b\u4e0a\u5fae\u8c03\uff1f<\/strong><\/h2>\n\n\n\n<p>\u8fd9\u91cc\u8981\u6bd4\u8f83\u4e00\u4e0b\u6700\u5c0f\u7684\u6a21\u578b\u548c\u6700\u5927\u7684\u6a21\u578b\u5fae\u8c03\u540e\u7684\u6548\u679c\uff0c\u5fae\u8c03\u7684\u6570\u636e\u662f\u4e00\u6837\u7684<\/p>\n\n\n\n<p>\u628asetup10.2.py\u91cc\u9762\u7684\u6a21\u578b\u4fee\u6539\u4e3agpt2_finetuned\uff0c\u770b\u770b\u6548\u679c<\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>model_path = \"models\/gpt2_finetuned\"<\/code><\/pre>\n\n\n\n<p>\u8fd0\u884c setup11.1.py \u7684\u6548\u679c<\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>$ python setup11.1.py\n124.440576 M parameters\nSetting `pad_token_id` to `eos_token_id`:50256 for open-end generation.\n0: \u5b50\u66f0\uff1a\u201c\u541b\u5b50\u4e0d\u91cd\u5219\u4e0d\u5a01\uff0c\u5b66\u5219\u4e0d\u56fa\u3002\u4e3b\u5fe0\u4fe1\uff0c\u65e0\u53cb\u4e0d\u5982\u5df1\u8005\uff0c\u8fc7\uff0c\u5219\u52ff\u60ee\u6539\u3002\u201d\u5b50\u4e0d\u5982\u5df1\u8005\uff0c\u5219\u52ff\u60ee\u6539\u3002\u201d\u5b50\u4e0d\u5df1\u8005\uff0c\u5219\u52ff\u60ee \u6539\u3002\u201d\u5b50\u4e0d\u77e5\u548c\u800c\u548c\uff0c\u5219\u52ff\u60ee\u6539\u3002\u201d\u5b50\u4e0d\u77e5\u4e8e\ufffd\n1: \u5b50\u66f0\uff1a\u201c\u5de7\u8a00\u4ee4\u8272\uff0c\u9c9c\u77e3\u4ec1\uff01\u201d\u5b50\u66f0\uff1a\u201c\u53ef\u8c13\u5b5d\u77e3\u3002\u201d\u5b50\u8d21\u66f0\uff1a\u201c\u53ef\u4e0a\u606d\u8fd1\u4e8e\u4e49\uff01\u201d\u5b50\u6e29\u3001\u826f\u3001\u606d\u3001\u4fed\u3001\u8ba9\u4ee5\u5f97\u4e4b\u3002\u592b\u5b50\u4e4b\u6c42\u4e4b\u4e5f\uff01\u201d\u5b50\u6e29\u3001\u606d\u3001\u4fed\u3001\u8ba9\u4ee5\u5f97\u4e4b\u3002\u592b\u5b50\n2: \u5b50\u66f0\uff1a\u201c\u4e0d\u60a3\u4eba\u4e4b\u4e0d\u5df1\u77e5\uff0c\u60a3\u4e0d\u77e5\u4eba\u4e5f\u3002\u201d\u5b50\u66f0\uff1a\u201c\u4e0d\u60a3\u4e0d\u77e5\u4eba\u4e5f\u3002\u201d\u5b50\u8d21\u66f0\uff1a\u201c\u4e0d\u77e5\u548c\u800c\u60a3\u4e0d\u77e5\u548c\uff0c\ufffd\u201d\u5b50\u66f0\uff1a\u201c\u4e0d\u77e5\u4eba\u4e5f\u3002\u201d\u5b50\u697d \u793c\u8282\u4e4b\u9053\uff0c\u60a3\u4e0d\u77e5\u548c\u800c\ufffd\u201d\u5b50<\/code><\/pre>\n\n\n\n<p>\u6bd4\u8f83\u4e00\u4e0bsetup10.2.py \u7684\u7ed3\u679c\uff0c\u6709\u533a\u522b\u4e0d\uff1f\u4e0d\u597d\u8bf4\uff1f<\/p>\n\n\n\n<p>\u9009\u62e9\u5728\u8f83\u5c0f\u7684\u6a21\u578b\u8fd8\u662f\u8f83\u5927\u7684\u6a21\u578b\u4e0a\u8fdb\u884c\u5fae\u8c03\u4e3b\u8981\u53d6\u51b3\u4e8e\u51e0\u4e2a\u5173\u952e\u56e0\u7d20\uff1a\u6570\u636e\u96c6\u7684\u5927\u5c0f\u3001\u8ba1\u7b97\u8d44\u6e90\u3001\u5fae\u8c03\u548c\u63a8\u7406\u7684\u65f6\u95f4\u8981\u6c42\u3001\u4ee5\u53ca\u671f\u671b\u8fbe\u5230\u7684\u6027\u80fd\u3002\u4e0b\u9762\u662f\u8fd9\u4e24\u79cd\u9009\u62e9\u7684\u4e00\u4e9b\u8003\u8651\u56e0\u7d20\uff1a<\/p>\n\n\n\n<h3 class=\"wp-block-heading has-medium-font-size\">\u5fae\u8c03\u8f83\u5c0f\u7684\u6a21\u578b<\/h3>\n\n\n\n<p><strong>\u4f18\u70b9<\/strong>\uff1a<\/p>\n\n\n\n<ul class=\"wp-block-list\">\n<li><strong>\u8ba1\u7b97\u6210\u672c\u8f83\u4f4e<\/strong>\uff1a\u8f83\u5c0f\u7684\u6a21\u578b\u9700\u8981\u7684\u8ba1\u7b97\u8d44\u6e90\u66f4\u5c11\uff0c\u53ef\u4ee5\u5728\u6709\u9650\u7684\u786c\u4ef6\u4e0a\u8bad\u7ec3\u548c\u63a8\u7406\u3002<\/li>\n\n\n\n<li><strong>\u901f\u5ea6\u66f4\u5feb<\/strong>\uff1a\u8bad\u7ec3\u548c\u63a8\u7406\u65f6\u95f4\u8f83\u77ed\uff0c\u9002\u5408\u5feb\u901f\u8fed\u4ee3\u548c\u5728\u8d44\u6e90\u53d7\u9650\u7684\u73af\u5883\u4e2d\u4f7f\u7528\u3002<\/li>\n\n\n\n<li><strong>\u66f4\u5bb9\u6613\u8c03\u6574<\/strong>\uff1a\u8f83\u5c0f\u7684\u6a21\u578b\u53c2\u6570\u8f83\u5c11\uff0c\u53ef\u80fd\u66f4\u5bb9\u6613\u627e\u5230\u826f\u597d\u7684\u8d85\u53c2\u6570\u8bbe\u7f6e\u3002<\/li>\n<\/ul>\n\n\n\n<p><strong>\u7f3a\u70b9<\/strong>\uff1a<\/p>\n\n\n\n<ul class=\"wp-block-list\">\n<li><strong>\u6027\u80fd\u53ef\u80fd\u8f83\u4f4e<\/strong>\uff1a\u8f83\u5c0f\u7684\u6a21\u578b\u8868\u8fbe\u80fd\u529b\u6709\u9650\uff0c\u53ef\u80fd\u5728\u590d\u6742\u4efb\u52a1\u4e0a\u8fbe\u4e0d\u5230\u8f83\u5927\u6a21\u578b\u7684\u6027\u80fd\u3002<\/li>\n<\/ul>\n\n\n\n<h3 class=\"wp-block-heading has-medium-font-size\">\u5fae\u8c03\u8f83\u5927\u7684\u6a21\u578b<\/h3>\n\n\n\n<p><strong>\u4f18\u70b9<\/strong>\uff1a<\/p>\n\n\n\n<ul class=\"wp-block-list\">\n<li><strong>\u6f5c\u5728\u7684\u66f4\u9ad8\u6027\u80fd<\/strong>\uff1a\u8f83\u5927\u7684\u6a21\u578b\u5177\u6709\u66f4\u5f3a\u7684\u8868\u8fbe\u80fd\u529b\uff0c\u901a\u5e38\u80fd\u591f\u5728\u590d\u6742\u7684\u4efb\u52a1\u4e0a\u53d6\u5f97\u66f4\u597d\u7684\u7ed3\u679c\u3002<\/li>\n\n\n\n<li><strong>\u66f4\u597d\u7684\u6cdb\u5316\u80fd\u529b<\/strong>\uff1a\u8f83\u5927\u7684\u6a21\u578b\u7531\u4e8e\u53c2\u6570\u591a\uff0c\u80fd\u591f\u6355\u6349\u5230\u6570\u636e\u4e2d\u7684\u66f4\u7ec6\u5fae\u7684\u6a21\u5f0f\uff0c\u53ef\u80fd\u5728\u672a\u89c1\u8fc7\u7684\u6570\u636e\u4e0a\u6709\u66f4\u597d\u7684\u8868\u73b0\u3002<\/li>\n<\/ul>\n\n\n\n<p><strong>\u7f3a\u70b9<\/strong>\uff1a<\/p>\n\n\n\n<ul class=\"wp-block-list\">\n<li><strong>\u8ba1\u7b97\u6210\u672c\u9ad8<\/strong>\uff1a\u9700\u8981\u66f4\u591a\u7684\u8ba1\u7b97\u8d44\u6e90\uff0c\u8bad\u7ec3\u548c\u63a8\u7406\u65f6\u95f4\u66f4\u957f\u3002<\/li>\n\n\n\n<li><strong>\u8c03\u4f18\u53ef\u80fd\u66f4\u56f0\u96be<\/strong>\uff1a\u7531\u4e8e\u53c2\u6570\u91cf\u5de8\u5927\uff0c\u53ef\u80fd\u9700\u8981\u66f4\u591a\u7684\u5b9e\u9a8c\u6765\u627e\u5230\u6700\u4f73\u7684\u8d85\u53c2\u6570\u8bbe\u7f6e\u3002<\/li>\n\n\n\n<li><strong>\u8fc7\u62df\u5408\u7684\u98ce\u9669<\/strong>\uff1a\u5728\u6570\u636e\u91cf\u4e0d\u8db3\u7684\u60c5\u51b5\u4e0b\uff0c\u8f83\u5927\u7684\u6a21\u578b\u66f4\u5bb9\u6613\u8fc7\u62df\u5408\u8bad\u7ec3\u6570\u636e\u3002<\/li>\n<\/ul>\n\n\n\n<h3 class=\"wp-block-heading has-medium-font-size\">\u5982\u4f55\u9009\u62e9<\/h3>\n\n\n\n<ul class=\"wp-block-list\">\n<li><strong>\u5982\u679c\u60a8\u7684\u8ba1\u7b97\u8d44\u6e90\u6709\u9650<\/strong>\uff0c\u6216\u8005\u60a8\u5e0c\u671b\u5feb\u901f\u8fed\u4ee3\u548c\u90e8\u7f72\u6a21\u578b\uff0c\u90a3\u4e48\u5f00\u59cb\u65f6\u9009\u62e9\u8f83\u5c0f\u7684\u6a21\u578b\u53ef\u80fd\u66f4\u5408\u9002\u3002<\/li>\n\n\n\n<li><strong>\u5982\u679c\u60a8\u8ffd\u6c42\u6700\u9ad8\u7684\u6027\u80fd<\/strong>\uff0c\u5e76\u4e14\u6709\u8db3\u591f\u7684\u6570\u636e\u548c\u8ba1\u7b97\u8d44\u6e90\u6765\u652f\u6301\u8bad\u7ec3\u8f83\u5927\u7684\u6a21\u578b\uff0c\u90a3\u4e48\u5fae\u8c03\u8f83\u5927\u7684\u6a21\u578b\u53ef\u80fd\u662f\u66f4\u597d\u7684\u9009\u62e9\u3002<\/li>\n\n\n\n<li><strong>\u6570\u636e\u96c6\u5927\u5c0f<\/strong>\u4e5f\u662f\u4e00\u4e2a\u91cd\u8981\u56e0\u7d20\u3002\u8f83\u5927\u7684\u6a21\u578b\u9700\u8981\u66f4\u591a\u7684\u6570\u636e\u6765\u907f\u514d\u8fc7\u62df\u5408\u3002\u5982\u679c\u60a8\u7684\u6570\u636e\u96c6\u76f8\u5bf9\u8f83\u5c0f\uff0c\u53ef\u80fd\u9996\u5148\u8003\u8651\u4f7f\u7528\u8f83\u5c0f\u7684\u6a21\u578b\u6216\u8005\u4f7f\u7528\u6570\u636e\u589e\u5f3a\u3001\u8fc1\u79fb\u5b66\u4e60\u7b49\u6280\u672f\u3002<\/li>\n<\/ul>\n\n\n\n<p>\u603b\u7684\u6765\u8bf4\uff0c\u9009\u62e9\u7684\u4f9d\u636e\u5e94\u8be5\u57fa\u4e8e\u5177\u4f53\u4efb\u52a1\u7684\u9700\u6c42\u3001\u53ef\u7528\u8d44\u6e90\u4ee5\u53ca\u671f\u671b\u8fbe\u5230\u7684\u6027\u80fd\u6807\u51c6\u3002\u5728\u67d0\u4e9b\u60c5\u51b5\u4e0b\uff0c\u4ece\u8f83\u5c0f\u7684\u6a21\u578b\u5f00\u59cb\uff0c\u7136\u540e\u6839\u636e\u9700\u8981\u9010\u6b65\u8fc1\u79fb\u5230\u8f83\u5927\u7684\u6a21\u578b\uff0c\u53ef\u4ee5\u662f\u4e00\u4e2a\u5b9e\u9645\u4e14\u9ad8\u6548\u7684\u7b56\u7565\u3002<\/p>\n","protected":false},"excerpt":{"rendered":"<p>\u8981\u5c06\u5b57\u7b26\u7ea7\u522b\u7684\u8bed\u8a00\u6a21\u578b\u66f4\u6539\u4e3a\u7c7b\u4f3cGPT-2\u8fd9\u6837\u7684\u6a21\u578b\uff0c\u6211\u4eec\u9700\u8981\u8003\u8651\u4ee5\u4e0b\u51e0\u4e2a\u5173\u952e\u7684\u4fee\u6539\uff1a \u4e0b\u9762\u662f\u4e00\u4e2a\u7b80\u5316\u7684\u793a\u4f8b\uff0c [&hellip;]<\/p>\n","protected":false},"author":1,"featured_media":0,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"site-sidebar-layout":"default","site-content-layout":"","ast-site-content-layout":"default","site-content-style":"default","site-sidebar-style":"default","ast-global-header-display":"","ast-banner-title-visibility":"","ast-main-header-display":"","ast-hfb-above-header-display":"","ast-hfb-below-header-display":"","ast-hfb-mobile-header-display":"","site-post-title":"","ast-breadcrumbs-content":"","ast-featured-img":"","footer-sml-layout":"","theme-transparent-header-meta":"","adv-header-id-meta":"","stick-header-meta":"","header-above-stick-meta":"","header-main-stick-meta":"","header-below-stick-meta":"","astra-migrate-meta-layouts":"set","ast-page-background-enabled":"default","ast-page-background-meta":{"desktop":{"background-color":"var(--ast-global-color-4)","background-image":"","background-repeat":"repeat","background-position":"center center","background-size":"auto","background-attachment":"scroll","background-type":"","background-media":"","overlay-type":"","overlay-color":"","overlay-opacity":"","overlay-gradient":""},"tablet":{"background-color":"","background-image":"","background-repeat":"repeat","background-position":"center center","background-size":"auto","background-attachment":"scroll","background-type":"","background-media":"","overlay-type":"","overlay-color":"","overlay-opacity":"","overlay-gradient":""},"mobile":{"background-color":"","background-image":"","background-repeat":"repeat","background-position":"center center","background-size":"auto","background-attachment":"scroll","background-type":"","background-media":"","overlay-type":"","overlay-color":"","overlay-opacity":"","overlay-gradient":""}},"ast-content-background-meta":{"desktop":{"background-color":"var(--ast-global-color-5)","background-image":"","background-repeat":"repeat","background-position":"center center","background-size":"auto","background-attachment":"scroll","background-type":"","background-media":"","overlay-type":"","overlay-color":"","overlay-opacity":"","overlay-gradient":""},"tablet":{"background-color":"var(--ast-global-color-5)","background-image":"","background-repeat":"repeat","background-position":"center center","background-size":"auto","background-attachment":"scroll","background-type":"","background-media":"","overlay-type":"","overlay-color":"","overlay-opacity":"","overlay-gradient":""},"mobile":{"background-color":"var(--ast-global-color-5)","background-image":"","background-repeat":"repeat","background-position":"center center","background-size":"auto","background-attachment":"scroll","background-type":"","background-media":"","overlay-type":"","overlay-color":"","overlay-opacity":"","overlay-gradient":""}},"_jetpack_memberships_contains_paid_content":false,"footnotes":""},"categories":[313,289,443,442,312],"tags":[242,314],"class_list":["post-2080","post","type-post","status-publish","format-standard","hentry","category-chatgpt","category-gpt","category-llm","category-llms","category-openai","tag-chatgpt","tag-openai-api"],"views":3554,"jetpack_sharing_enabled":true,"jetpack_featured_media_url":"","_links":{"self":[{"href":"https:\/\/www.aqwu.net\/wp\/index.php?rest_route=\/wp\/v2\/posts\/2080","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/www.aqwu.net\/wp\/index.php?rest_route=\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/www.aqwu.net\/wp\/index.php?rest_route=\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/www.aqwu.net\/wp\/index.php?rest_route=\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/www.aqwu.net\/wp\/index.php?rest_route=%2Fwp%2Fv2%2Fcomments&post=2080"}],"version-history":[{"count":24,"href":"https:\/\/www.aqwu.net\/wp\/index.php?rest_route=\/wp\/v2\/posts\/2080\/revisions"}],"predecessor-version":[{"id":2566,"href":"https:\/\/www.aqwu.net\/wp\/index.php?rest_route=\/wp\/v2\/posts\/2080\/revisions\/2566"}],"wp:attachment":[{"href":"https:\/\/www.aqwu.net\/wp\/index.php?rest_route=%2Fwp%2Fv2%2Fmedia&parent=2080"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/www.aqwu.net\/wp\/index.php?rest_route=%2Fwp%2Fv2%2Fcategories&post=2080"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/www.aqwu.net\/wp\/index.php?rest_route=%2Fwp%2Fv2%2Ftags&post=2080"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}