{"id":2661,"date":"2024-03-30T00:06:21","date_gmt":"2024-03-29T16:06:21","guid":{"rendered":"https:\/\/www.aqwu.net\/wp\/?p=2661"},"modified":"2024-04-28T20:01:02","modified_gmt":"2024-04-28T12:01:02","slug":"%e4%ba%86%e8%a7%a3-qwen1-5%e6%a8%a1%e5%9e%8b","status":"publish","type":"post","link":"https:\/\/www.aqwu.net\/wp\/?p=2661","title":{"rendered":"\u4e86\u89e3 Qwen1.5\u6a21\u578b"},"content":{"rendered":"\n<p>Qwen1.5 \u662f Qwen2 \u7684\u6d4b\u8bd5\u7248\uff0cQwen2 \u662f\u4e00\u79cd\u57fa\u4e8e Transformer \u7684\u7eaf\u89e3\u7801\u5668\u8bed\u8a00\u6a21\u578b\uff0c\u5728\u5927\u91cf\u6570\u636e\u4e0a\u8fdb\u884c\u4e86\u9884\u8bad\u7ec3\u3002\u4e0e\u4e4b\u524d\u53d1\u5e03\u7684 Qwen \u76f8\u6bd4\uff0c\u6539\u8fdb\u5305\u62ec\uff1a<\/p>\n\n\n\n<ul class=\"wp-block-list\">\n<li>6 model sizes, including 0.5B, 1.8B, 4B, 7B, 14B, and 72B;<br>6 \u79cd\u578b\u53f7\u5c3a\u5bf8\uff0c\u5305\u62ec 0.5B\u30011.8B\u30014B\u30017B\u300114B \u548c 72B;<\/li>\n\n\n\n<li>Significant performance improvement in human preference for chat models;<br>\u4eba\u7c7b\u5bf9\u804a\u5929\u6a21\u578b\u7684\u504f\u597d\u663e\u8457\u63d0\u9ad8;<\/li>\n\n\n\n<li>Multilingual support of both base and chat models;<br>\u5bf9\u57fa\u672c\u6a21\u578b\u548c\u804a\u5929\u6a21\u578b\u7684\u591a\u8bed\u8a00\u652f\u6301;<\/li>\n\n\n\n<li>Stable support of 32K context length for models of all sizes<br>\u7a33\u5b9a\u652f\u6301 32K \u4e0a\u4e0b\u6587\u957f\u5ea6\uff0c\u9002\u7528\u4e8e\u5404\u79cd\u5c3a\u5bf8\u7684\u6a21\u578b<\/li>\n\n\n\n<li>No need of&nbsp;<code>trust_remote_code<\/code>.<br>\u4e0d\u9700\u8981&nbsp;<code>trust_remote_code<\/code>&nbsp;.<\/li>\n<\/ul>\n\n\n\n<h2 class=\"wp-block-heading\"><strong>1. \u54b1\u4eec\u5148\u4ece\u6700\u5c0f\u7684\u6a21\u578b 0.5B \u5f00\u59cb<\/strong><\/h2>\n\n\n\n<p>\u5148\u770b\u770b\u6a21\u578b\u53c2\u6570<\/p>\n\n\n\n<div class=\"wp-block-urvanov-syntax-highlighter-code-block\"><pre class=\"lang:python decode:true \">from transformers import AutoModelForCausalLM, AutoTokenizer\n\ndevice = \"cuda\"\n\nmodel = AutoModelForCausalLM.from_pretrained(MODEL_NAME).to(device)\nprint(model)\n<\/pre><\/div>\n\n\n\n<p>\u8fd0\u884c\u7ed3\u679c\uff1a<\/p>\n\n\n\n<div class=\"wp-block-urvanov-syntax-highlighter-code-block\"><pre class=\"lang:sh decode:true \">Qwen2ForCausalLM(\n  (model): Qwen2Model(\n    (embed_tokens): Embedding(151936, 1024)\n    (layers): ModuleList(\n      (0-23): 24 x Qwen2DecoderLayer(\n        (self_attn): Qwen2SdpaAttention(\n          (q_proj): Linear(in_features=1024, out_features=1024, bias=True)\n          (k_proj): Linear(in_features=1024, out_features=1024, bias=True)\n          (v_proj): Linear(in_features=1024, out_features=1024, bias=True)\n          (o_proj): Linear(in_features=1024, out_features=1024, bias=False)\n          (rotary_emb): Qwen2RotaryEmbedding()\n        )\n        (mlp): Qwen2MLP(\n          (gate_proj): Linear(in_features=1024, out_features=2816, bias=False)\n          (up_proj): Linear(in_features=1024, out_features=2816, bias=False)\n          (down_proj): Linear(in_features=2816, out_features=1024, bias=False)\n          (act_fn): SiLU()\n        )\n        (input_layernorm): Qwen2RMSNorm()\n        (post_attention_layernorm): Qwen2RMSNorm()\n      )\n    )\n    (norm): Qwen2RMSNorm()\n  )\n  (lm_head): Linear(in_features=1024, out_features=151936, bias=False)\n)<\/pre><\/div>\n\n\n\n<ol class=\"wp-block-list\">\n<li>model.embed_tokens<\/li>\n\n\n\n<li>model.norm<\/li>\n\n\n\n<li>lm_head<\/li>\n\n\n\n<li>model.layers (\u603b\u517124\u5c42)<\/li>\n<\/ol>\n\n\n\n<p>\u6211\u4eec\u53ef\u4ee5\u5c06Transformer\u6a21\u578b\u7684\u6bcf\u4e00\u5c42\u5206\u914d\u7ed9\u4e00\u4e2aGPU\uff0c\u6bcf\u4e2aGPU\u53ef\u80fd\u662f\u591a\u5c42<\/p>\n\n\n\n<h3 class=\"wp-block-heading\"><strong>1.1. \u591a GPU \u6a21\u5f0f\u52a0\u8f7d\u8fd9\u4e2a\u5c0f\u6a21\u578b <strong>0.5B<\/strong><\/strong> <\/h3>\n\n\n\n<p>\u4ee3\u7801\u5982\u4e0b\uff1a<\/p>\n\n\n\n<div class=\"wp-block-urvanov-syntax-highlighter-code-block\"><pre class=\"lang:python decode:true \">from transformers import AutoModelForCausalLM, AutoTokenizer\nimport torch\n\nMODEL_NAME = \"Qwen\/Qwen1.5-0.5B-Chat\"\n\n# \u5b9a\u4e49\u4e00\u4e2a\u51fd\u6570\u6765\u81ea\u52a8\u914d\u7f6e\u5728\u591aGPU\u73af\u5883\u4e0b\u6a21\u578b\u5404\u90e8\u5206\u7684\u8bbe\u5907\u5206\u5e03\ndef auto_configure_device_map(num_gpus: int):\n    num_trans_layers = 24  # \u5b9a\u4e49Transformer\u6a21\u578b\u7684\u5c42\u6570\n    per_gpu_layers = num_trans_layers \/ num_gpus  # \u8ba1\u7b97\u6bcf\u4e2aGPU\u5e94\u627f\u62c5\u7684\u5c42\u6570\n    # \u521d\u59cb\u5316\u8bbe\u5907\u6620\u5c04\u5b57\u5178\uff0c\u6307\u5b9a\u4e00\u4e9b\u7279\u5b9a\u6a21\u5757\u5e94\u8be5\u653e\u7f6e\u7684GPU\u7f16\u53f7\n    device_map = {\n        'model.embed_tokens': 0,  # \u5d4c\u5165\u5c42\u653e\u5728\u7b2c\u4e00\u4e2aGPU\u4e0a\n        'model.norm': num_gpus-1,  # \u6700\u540e\u4e00\u4e2a\u6b63\u5219\u5316\u5c42\u653e\u5728\u6700\u540e\u4e00\u4e2aGPU\u4e0a\n        'lm_head': 0  # \u8bed\u8a00\u6a21\u578b\u5934\uff08\u7528\u4e8e\u9884\u6d4b\u4e0b\u4e00\u4e2a\u8bcd\u7684\u5c42\uff09\u653e\u5728\u6700\u540e\u4e00\u4e2aGPU\u4e0a\n    }\n    # \u5c06Transformer\u6a21\u578b\u7684\u6bcf\u4e00\u5c42\u5206\u914d\u7ed9\u4e00\u4e2aGPU\n    for i in range(num_trans_layers):\n        device_map[f'model.layers.{i}'] = int(i\/\/per_gpu_layers)\n    return device_map\n\n# \u68c0\u6d4b\u53ef\u7528\u7684GPU\u6570\u91cf\nNUM_GPUS = torch.cuda.device_count()\nprint(f\"NUM_GPUS: {NUM_GPUS}\")\n\n# \u5982\u679c\u6709\u53ef\u7528\u7684GPU\uff0c\u5219\u57fa\u4e8eGPU\u6570\u91cf\u81ea\u52a8\u914d\u7f6e\u8bbe\u5907\u6620\u5c04\uff1b\u5426\u5219\u4e0d\u4f7f\u7528\u8bbe\u5907\u6620\u5c04\ndevice_map = auto_configure_device_map(NUM_GPUS) if NUM_GPUS &gt; 0 else None\n\n# \u5982\u679c\u6709\u53ef\u7528\u7684GPU\uff0c\u5219\u4f7f\u7528\u7b2c\u4e00\u4e2aGPU\uff1b\u5426\u5219\u4f7f\u7528CPU\ndevice = torch.device(\"cuda\") if NUM_GPUS &gt; 0 else torch.device(\"cpu\")\n\n# \u6839\u636e\u662f\u5426\u4f7f\u7528GPU\u8bbe\u7f6e\u6570\u636e\u7c7b\u578b\uff08\u534a\u7cbe\u5ea6\u6216\u5168\u7cbe\u5ea6\uff09\ndevice_dtype = torch.float16 if NUM_GPUS &gt; 0 else torch.float\n\nmodel = AutoModelForCausalLM.from_pretrained(MODEL_NAME,\n                                             trust_remote_code=True,\n                                             torch_dtype=device_dtype,\n                                             attn_implementation=\"flash_attention_2\",\n                                             device_map=device_map)\n\n# \u52a0\u8f7d\u5206\u8bcd\u5668\u548c\u6a21\u578b\uff0c\u6307\u5b9a\u8bbe\u5907\u6620\u5c04\u548c\u6570\u636e\u7c7b\u578b\ntokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)\n\nmodel = model.eval()\n\nprompt = \"Give me a short introduction to large language model.\"\nmessages = [\n    {\"role\": \"system\", \"content\": \"You are a helpful assistant.\"},\n    {\"role\": \"user\", \"content\": prompt}\n]\ntext = tokenizer.apply_chat_template(\n    messages,\n    tokenize=False,\n    add_generation_prompt=True\n)\nmodel_inputs = tokenizer([text], return_tensors=\"pt\").to(device)\n\ngenerated_ids = model.generate(\n    model_inputs.input_ids,\n    max_new_tokens=512\n)\ngenerated_ids = [\n    output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)\n]\n\nresponse = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]\n\nprint(response)\n<\/pre><\/div>\n\n\n\n<p>\u8fd0\u884c\u7ed3\u679c\uff1a<\/p>\n\n\n\n<div class=\"wp-block-urvanov-syntax-highlighter-code-block\"><pre class=\"lang:python decode:true \">python test02.py\nNUM_GPUS: 8\nSpecial tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\nA large language model is a type of artificial intelligence that can generate text based on the input given to it, without being explicitly programmed. It is commonly used in natural language processing (NLP) tasks such as language translation, chatbots, and language understanding.\n\nLarge language models use a variety of techniques to learn from and generate text, including neural networks, transformers, and recurrent neural networks (RNNs). These models are trained on large datasets of labeled text data, which allows them to recognize patterns and structures within the language and generate new text that is similar to the training data.\n\nOne of the key benefits of large language models is their ability to generate high-quality output quickly and consistently. They can be trained to understand the context and meaning behind the text, which makes them useful for applications where accuracy is critical.\n\nHowever, large language models also have some limitations. For example, they may struggle with complex or ambiguous sentences, and they may not always accurately capture the nuances of human language. Additionally, there is often a trade-off between the quality and speed of generated text, which can be challenging to balance when using large language models in real-world scenarios.\n\nOverall, large language models have had significant implications for fields such as natural language processing, machine learning, and computer vision, and continue to evolve and improve in terms of performance and adaptability.<\/pre><\/div>\n\n\n\n<h3 class=\"wp-block-heading\"><strong>1.2. \u8bd5\u8bd5\u957f\u6587\u7ae0<\/strong> <strong><strong>0.5B<\/strong><\/strong> <\/h3>\n\n\n\n<p>Qwen 1.5 \u6765\u81ea\u5b98\u65b9\u7684\u63cf\u8ff0\uff0c\u8bf4\u7a33\u5b9a\u652f\u6301 32K \u4e0a\u4e0b\u6587\u957f\u5ea6\uff0c\u9002\u7528\u4e8e\u5404\u79cd\u5c3a\u5bf8\u7684\u6a21\u578b\u3002<\/p>\n\n\n\n<div class=\"wp-block-urvanov-syntax-highlighter-code-block\"><pre class=\"lang:python decode:true \">from transformers import AutoModelForCausalLM, AutoTokenizer\nimport torch\n\nMODEL_NAME = \"Qwen\/Qwen1.5-0.5B-Chat\"\n\n# \u5b9a\u4e49\u4e00\u4e2a\u51fd\u6570\u6765\u81ea\u52a8\u914d\u7f6e\u5728\u591aGPU\u73af\u5883\u4e0b\u6a21\u578b\u5404\u90e8\u5206\u7684\u8bbe\u5907\u5206\u5e03\ndef auto_configure_device_map(num_gpus: int):\n    num_trans_layers = 24  # \u5b9a\u4e49Transformer\u6a21\u578b\u7684\u5c42\u6570\n    per_gpu_layers = num_trans_layers \/ num_gpus  # \u8ba1\u7b97\u6bcf\u4e2aGPU\u5e94\u627f\u62c5\u7684\u5c42\u6570\n    # \u521d\u59cb\u5316\u8bbe\u5907\u6620\u5c04\u5b57\u5178\uff0c\u6307\u5b9a\u4e00\u4e9b\u7279\u5b9a\u6a21\u5757\u5e94\u8be5\u653e\u7f6e\u7684GPU\u7f16\u53f7\n    device_map = {\n        'model.embed_tokens': 0,  # \u5d4c\u5165\u5c42\u653e\u5728\u7b2c\u4e00\u4e2aGPU\u4e0a\n        'model.norm': num_gpus-1,  # \u6700\u540e\u4e00\u4e2a\u6b63\u5219\u5316\u5c42\u653e\u5728\u6700\u540e\u4e00\u4e2aGPU\u4e0a\n        'lm_head': 0  # \u8bed\u8a00\u6a21\u578b\u5934\uff08\u7528\u4e8e\u9884\u6d4b\u4e0b\u4e00\u4e2a\u8bcd\u7684\u5c42\uff09\u653e\u5728\u6700\u540e\u4e00\u4e2aGPU\u4e0a\n    }\n    # \u5c06Transformer\u6a21\u578b\u7684\u6bcf\u4e00\u5c42\u5206\u914d\u7ed9\u4e00\u4e2aGPU\n    for i in range(num_trans_layers):\n        device_map[f'model.layers.{i}'] = int(i\/\/per_gpu_layers)\n    return device_map\n\n# \u68c0\u6d4b\u53ef\u7528\u7684GPU\u6570\u91cf\nNUM_GPUS = torch.cuda.device_count()\nprint(f\"NUM_GPUS: {NUM_GPUS}\")\n\n# \u5982\u679c\u6709\u53ef\u7528\u7684GPU\uff0c\u5219\u57fa\u4e8eGPU\u6570\u91cf\u81ea\u52a8\u914d\u7f6e\u8bbe\u5907\u6620\u5c04\uff1b\u5426\u5219\u4e0d\u4f7f\u7528\u8bbe\u5907\u6620\u5c04\ndevice_map = auto_configure_device_map(NUM_GPUS) if NUM_GPUS &gt; 0 else None\n\n# \u5982\u679c\u6709\u53ef\u7528\u7684GPU\uff0c\u5219\u4f7f\u7528\u7b2c\u4e00\u4e2aGPU\uff1b\u5426\u5219\u4f7f\u7528CPU\ndevice = torch.device(\"cuda\") if NUM_GPUS &gt; 0 else torch.device(\"cpu\")\n\n# \u6839\u636e\u662f\u5426\u4f7f\u7528GPU\u8bbe\u7f6e\u6570\u636e\u7c7b\u578b\uff08\u534a\u7cbe\u5ea6\u6216\u5168\u7cbe\u5ea6\uff09\ndevice_dtype = torch.float16 if NUM_GPUS &gt; 0 else torch.float\n\nmodel = AutoModelForCausalLM.from_pretrained(MODEL_NAME,\n                                             trust_remote_code=True,\n                                             torch_dtype=device_dtype,\n                                             attn_implementation=\"flash_attention_2\",\n                                             device_map=device_map)\n\n# \u52a0\u8f7d\u5206\u8bcd\u5668\u548c\u6a21\u578b\uff0c\u6307\u5b9a\u8bbe\u5907\u6620\u5c04\u548c\u6570\u636e\u7c7b\u578b\ntokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)\n\nmodel = model.eval()\n\nprompt = \"\u5199\u4e00\u4e2a3\u4e07\u5b57\u7684\u6709\u5934\u6709\u5c3e\u7684\u77ed\u7bc7\u79d1\u5e7b\u5c0f\u8bf4.\"\nmessages = [\n    {\"role\": \"system\", \"content\": \"You are a helpful assistant.\"},\n    {\"role\": \"user\", \"content\": prompt}\n]\ntext = tokenizer.apply_chat_template(\n    messages,\n    tokenize=False,\n    add_generation_prompt=True\n)\nmodel_inputs = tokenizer([text], return_tensors=\"pt\").to(device)\n\ngenerated_ids = model.generate(\n    model_inputs.input_ids,\n    max_new_tokens=32000\n)\ngenerated_ids = [\n    output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)\n]\n\nresponse = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]\n\nprint(response)\n<\/pre><\/div>\n\n\n\n<p> \u8fd0\u884c\u7ed3\u679c\u5982\u4e0b\uff1a<\/p>\n\n\n\n<div class=\"wp-block-urvanov-syntax-highlighter-code-block\"><pre class=\"lang:python decode:true \">time python test03.py\nNUM_GPUS: 8\nSpecial tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n\u6807\u9898\uff1a\u300a\u672a\u6765\u4e4b\u5149\u300b\n\n\u5728\u672a\u6765\u76842078\u5e74\uff0c\u5730\u7403\u8868\u9762\u5df2\u7ecf\u88ab\u4e00\u9897\u540d\u4e3a\u201c\u661f\u9645\u706b\u7403\u201d\u7684\u672a\u77e5\u661f\u7403\u5b8c\u5168\u8986\u76d6\u3002\u8fd9\u9897\u661f\u7403\u4e0a\u5145\u6ee1\u4e86\u5947\u5f02\u7684\u73b0\u8c61\uff0c\u5305\u62ec\u65e0\u9650\u7684\u751f\u547d\u4f53\u3001\u65f6\u95f4\u65c5\u884c\u7b49\u3002\n\n\u4e3b\u89d2\u662f\u5730\u7403\u4e0a\u7684\u79d1\u5b66\u5bb6\u827e\u4f26\uff0c\u4ed6\u51b3\u5b9a\u5e26\u7740\u4ed6\u7684\u56e2\u961f\u53bb\u63a2\u7d22\u8fd9\u4e2a\u5168\u65b0\u7684\u4e16\u754c\u3002\u4ed6\u4eec\u5728\u661f\u9645\u706b\u7403\u4e0a\u53d1\u73b0\u4e86\u4e00\u79cd\u65b0\u578b\u7684\u80fd\u6e90\uff0c\u8fd9\u79cd\u80fd\u6e90\u662f\u4e00\u79cd\u53ef\u4ee5\u88ab\u4eba\u7c7b\u65e0\u9650\u6b21\u5229\u7528\u7684\u7269\u8d28\uff0c\u88ab\u79f0\u4e3a\u201c\u5149\u5b50\u201d\u3002\n\n\u7136\u800c\uff0c\u5149\u5b50\u5e76\u975e\u4e00\u5e06\u98ce\u987a\u3002\u4ed6\u4eec\u9996\u5148\u9047\u5230\u4e86\u4e00\u79cd\u65e0\u6cd5\u514b\u670d\u7684\u7269\u7406\u6311\u6218\u2014\u2014\u5149\u5b50\u5177\u6709\u6781\u9ad8\u7684\u80fd\u91cf\u5bc6\u5ea6\uff0c\u4e00\u65e6\u8fdb\u5165\u4eba\u7c7b\u7684\u76ae\u80a4\uff0c\u5c31\u4f1a\u77ac\u95f4\u71c3\u70e7\u3002\u4ed6\u4eec\u7684\u7814\u7a76\u65b9\u5411\u662f\u4ece\u5982\u4f55\u5c06\u5149\u5b50\u8f6c\u5316\u4e3a\u80fd\u91cf\u8fdb\u884c\u4f7f\u7528\u5f00\u59cb\u3002\n\n\u827e\u4f26\u548c\u4ed6\u7684\u56e2\u961f\u5f00\u59cb\u4e86\u6f2b\u957f\u7684\u5b9e\u9a8c\uff0c\u6700\u7ec8\u4ed6\u4eec\u6210\u529f\u5730\u5236\u9020\u51fa\u4e86\u8fd9\u79cd\u65b0\u7684\u80fd\u6e90\u3002\u4f46\u662f\uff0c\u5149\u5b50\u7684\u51fa\u73b0\u4e5f\u5e26\u6765\u4e86\u4e00\u4e9b\u95ee\u9898\u3002\u4ed6\u4eec\u9700\u8981\u627e\u5230\u4e00\u79cd\u65b9\u6cd5\u6765\u4fdd\u62a4\u8fd9\u79cd\u80fd\u6e90\u4e0d\u88ab\u6c61\u67d3\uff0c\u540c\u65f6\u4e5f\u4e0d\u80fd\u8fc7\u5ea6\u5f00\u53d1\u548c\u5229\u7528\u3002\n\n\u5728\u8fd9\u4e2a\u8fc7\u7a0b\u4e2d\uff0c\u4ed6\u4eec\u4e0e\u4e00\u4e9b\u5916\u661f\u6587\u660e\u8fdb\u884c\u4e86\u4ea4\u6d41\uff0c\u4ed6\u4eec\u5411\u827e\u4f26\u63d0\u4f9b\u4e86\u5148\u8fdb\u7684\u80fd\u6e90\u751f\u4ea7\u6280\u672f\uff0c\u5e76\u63d0\u51fa\u4e86\u5bf9\u4ed6\u4eec\u7684\u4fdd\u62a4\u65b9\u6848\u3002\u827e\u4f26\u63a5\u53d7\u4e86\u8fd9\u4e9b\u63d0\u8bae\uff0c\u4e8e\u662f\u4ed6\u4eec\u5f00\u59cb\u4e86\u4e00\u573a\u5bf9\u6297\u5916\u661f\u6587\u660e\u7684\u6218\u6597\u3002\n\n\u5728\u6fc0\u70c8\u7684\u6218\u6597\u4e2d\uff0c\u4ed6\u4eec\u7684\u56e2\u961f\u906d\u53d7\u4e86\u91cd\u521b\uff0c\u4f46\u4ed6\u4eec\u5e76\u672a\u653e\u5f03\u3002\u4ed6\u4eec\u901a\u8fc7\u667a\u6167\u548c\u52c7\u6c14\uff0c\u6210\u529f\u5730\u51fb\u8d25\u4e86\u5916\u661f\u6587\u660e\uff0c\u4fdd\u62a4\u4e86\u4ed6\u4eec\u7684\u5bb6\u56ed\u3002\n\n\u5728\u6218\u6597\u7ed3\u675f\u540e\uff0c\u827e\u4f26\u56de\u5230\u4e86\u5730\u7403\u4e0a\uff0c\u4ed6\u611f\u8c22\u6240\u6709\u7684\u5e2e\u52a9\uff0c\u4e5f\u611f\u8c22\u4ed6\u7684\u670b\u53cb\u4eec\u3002\u4ed6\u544a\u8bc9\u4eba\u4eec\uff0c\u867d\u7136\u79d1\u6280\u7684\u53d1\u5c55\u5e26\u6765\u4e86\u5f88\u591a\u4fbf\u5229\uff0c\u4f46\u4e5f\u9700\u8981\u6211\u4eec\u73cd\u60dc\u6211\u4eec\u7684\u8d44\u6e90\uff0c\u4fdd\u62a4\u597d\u6211\u4eec\u7684\u73af\u5883\u3002\n\n\u8fd9\u5c31\u662f\u300a\u672a\u6765\u4e4b\u5149\u300b\uff0c\u4e00\u4e2a\u5145\u6ee1\u6311\u6218\u53c8\u5145\u6ee1\u5e0c\u671b\u7684\u6545\u4e8b\u3002\u5b83\u544a\u8bc9\u6211\u4eec\uff0c\u53ea\u8981\u6211\u4eec\u6562\u4e8e\u9762\u5bf9\u56f0\u96be\uff0c\u52c7\u4e8e\u521b\u65b0\uff0c\u5c31\u4e00\u5b9a\u80fd\u591f\u521b\u9020\u51fa\u5c5e\u4e8e\u81ea\u5df1\u7684\u672a\u6765\u3002\n\nreal    1m4.372s\nuser    0m38.394s\nsys     0m8.390s<\/pre><\/div>\n\n\n\n<h3 class=\"wp-block-heading\"><strong>1.3 \u8bd5\u8bd5 0.5B \u5355GPU\u52a0\u8f7d<\/strong><\/h3>\n\n\n\n<div class=\"wp-block-urvanov-syntax-highlighter-code-block\"><pre class=\"lang:python decode:true \">from transformers import AutoModelForCausalLM, AutoTokenizer\nimport torch\n\nMODEL_NAME = \"Qwen\/Qwen1.5-0.5B-Chat\"\n\n# \u68c0\u6d4b\u53ef\u7528\u7684GPU\u6570\u91cf\nNUM_GPUS = torch.cuda.device_count()\nprint(f\"NUM_GPUS: {NUM_GPUS}\")\n\n# \u5982\u679c\u6709\u53ef\u7528\u7684GPU\uff0c\u5219\u4f7f\u7528\u7b2c\u4e00\u4e2aGPU\uff1b\u5426\u5219\u4f7f\u7528CPU\ndevice = torch.device(\"cuda\") if NUM_GPUS &gt; 0 else torch.device(\"cpu\")\n\n# \u6839\u636e\u662f\u5426\u4f7f\u7528GPU\u8bbe\u7f6e\u6570\u636e\u7c7b\u578b\uff08\u534a\u7cbe\u5ea6\u6216\u5168\u7cbe\u5ea6\uff09\ndevice_dtype = torch.float16 if NUM_GPUS &gt; 0 else torch.float\n\nmodel = AutoModelForCausalLM.from_pretrained(MODEL_NAME,\n                                             trust_remote_code=True,\n                                             torch_dtype=device_dtype,\n                                             attn_implementation=\"flash_attention_2\",\n                                             device_map=\"cuda:0\")\n\n# \u52a0\u8f7d\u5206\u8bcd\u5668\u548c\u6a21\u578b\uff0c\u6307\u5b9a\u8bbe\u5907\u6620\u5c04\u548c\u6570\u636e\u7c7b\u578b\ntokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)\n\nmodel = model.eval()\n\nprompt = \"\u5199\u4e00\u4e2a1\u4e07\u5b57\u7684\u6709\u5934\u6709\u5c3e\u7684\u77ed\u7bc7\u79d1\u5e7b\u5c0f\u8bf4.\"\nmessages = [\n    {\"role\": \"system\", \"content\": \"You are a helpful assistant.\"},\n    {\"role\": \"user\", \"content\": prompt}\n]\ntext = tokenizer.apply_chat_template(\n    messages,\n    tokenize=False,\n    add_generation_prompt=True\n)\nmodel_inputs = tokenizer([text], return_tensors=\"pt\").to(device)\n\ngenerated_ids = model.generate(\n    model_inputs.input_ids,\n    max_new_tokens=10000\n)\ngenerated_ids = [\n    output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)\n]\n\nresponse = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]\n\nprint(response)\n<\/pre><\/div>\n\n\n\n<p>\u8fd0\u884c\u7ed3\u679c\uff1a<\/p>\n\n\n\n<p>time python test05-0.5B-2.py<br>NUM_GPUS: 8<br>Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.<br>\u6807\u9898\uff1a\u661f\u9645\u5371\u673a<\/p>\n\n\n\n<p>\u6545\u4e8b\u7684\u4e3b\u89d2\u662f\u5730\u7403\u4e0a\u7684\u79d1\u5b66\u5bb6\uff0c\u4e9a\u5386\u5c71\u5927\u3002\u4ed6\u88ab\u9009\u4e3a\u4e86\u521b\u5efa\u4e00\u8258\u65b0\u7684\u706b\u661f\u8239\uff0c\u5e76\u5f00\u59cb\u4e86\u4ed6\u7684\u65c5\u7a0b\u3002\u4ed6\u7684\u98de\u8239\u540d\u4e3a\u201c\u745e\u5c14\u5fb7\u201d\uff0c\u7531\u5148\u8fdb\u7684\u4eba\u9020\u80fd\u6e90\u7cfb\u7edf\u548c\u5148\u8fdb\u7684\u901a\u8baf\u8bbe\u5907\u7ec4\u6210\u3002<\/p>\n\n\n\n<p>\u4e00\u5929\uff0c\u4e9a\u5386\u5c71\u5927\u5728\u7814\u7a76\u884c\u661f\u8868\u9762\u65f6\u53d1\u73b0\u4e86\u4e00\u4e2a\u5f02\u5e38\u7684\u73b0\u8c61\uff0c\u4ed6\u7684\u8239\u7a81\u7136\u5931\u53bb\u4e86\u52a8\u529b\u3002\u4ed6\u7acb\u523b\u542f\u52a8\u4e86\u5e94\u6025\u7a0b\u5e8f\uff0c\u7528\u4ed6\u7684\u6240\u6709\u6280\u672f\u6765\u4fee\u590d\u8fd9\u4e2a\u6545\u969c\u3002\u7136\u800c\uff0c\u5f53\u4ed6\u5728\u68c0\u67e5\u8239\u4e0a\u65f6\uff0c\u53d1\u73b0\u4e86\u4e00\u9897\u661f\u7403\u7684\u8868\u9762\u8986\u76d6\u7740\u4e00\u5c42\u795e\u79d8\u7684\u7269\u8d28\u3002\u8fd9\u4e9b\u7269\u8d28\u4f3c\u4e4e\u662f\u4e00\u79cd\u80fd\u591f\u5438\u6536\u548c\u8f6c\u6362\u80fd\u91cf\u7684\u80fd\u6e90\u3002\u4e9a\u5386\u5c71\u5927\u65e0\u6cd5\u7406\u89e3\u8fd9\u662f\u4ec0\u4e48\uff0c\u4ed6\u51b3\u5b9a\u4f7f\u7528\u4ed6\u7684\u79d1\u5b66\u77e5\u8bc6\u53bb\u5bfb\u627e\u7b54\u6848\u3002<\/p>\n\n\n\n<p>\u5728\u63a5\u4e0b\u6765\u7684\u51e0\u4e2a\u6708\u91cc\uff0c\u4e9a\u5386\u5c71\u5927\u548c\u4ed6\u7684\u56e2\u961f\u4e00\u8d77\u7814\u7a76\u4e86\u8fd9\u79cd\u7269\u8d28\u7684\u5b58\u5728\u65b9\u5f0f\uff0c\u4ed6\u4eec\u53d1\u73b0\u8fd9\u9897\u661f\u7403\u7684\u8868\u9762\u6709\u7740\u4e00\u79cd\u5947\u7279\u7684\u80fd\u91cf\u91ca\u653e\u5668\u3002\u8fd9\u79cd\u80fd\u91cf\u91ca\u653e\u5668\u53ef\u4ee5\u5c06\u661f\u7403\u7684\u80fd\u91cf\u8f6c\u5316\u4e3a\u5149\uff0c\u7136\u540e\u4ee5\u70ed\u80fd\u7684\u5f62\u5f0f\u4f20\u8f93\u5230\u5176\u4ed6\u661f\u7403\u3002\u4e9a\u5386\u5c71\u5927\u51b3\u5b9a\u5229\u7528\u8fd9\u4e2a\u80fd\u6e90\u91ca\u653e\u5668\uff0c\u628a\u6240\u6709\u7684\u80fd\u91cf\u90fd\u96c6\u4e2d\u5728\u4e00\u6b21\u71c3\u6599\u91ca\u653e\u4e0a\uff0c\u5e0c\u671b\u8fd9\u6b21\u80fd\u6e90\u91ca\u653e\u80fd\u8ba9\u4ed6\u4eec\u7684\u98de\u8239\u6062\u590d\u6b63\u5e38\u8fd0\u884c\u3002<\/p>\n\n\n\n<p>\u7136\u800c\uff0c\u5728\u91ca\u653e\u80fd\u91cf\u7684\u8fc7\u7a0b\u4e2d\uff0c\u98de\u8239\u53d1\u751f\u4e86\u4e25\u91cd\u7684\u95ee\u9898\u3002\u5b83\u5f00\u59cb\u5411\u592a\u7a7a\u55b7\u5c04\u5927\u91cf\u7684\u71c3\u6599\uff0c\u4f46\u662f\u71c3\u6599\u7684\u71c3\u70e7\u6ca1\u6709\u5b8c\u5168\u7184\u706d\u3002\u6700\u540e\uff0c\u4e9a\u5386\u5c71\u5927\u548c\u4ed6\u7684\u56e2\u961f\u88ab\u8feb\u64a4\u56de\u4e86\u4ed6\u4eec\u7684\u98de\u8239\u3002\u4ed6\u4eec\u7684\u635f\u5931\u60e8\u91cd\uff0c\u4f46\u4ed6\u4eec\u7684\u52c7\u6c14\u548c\u51b3\u5fc3\u5374\u4f7f\u4ed6\u4eec\u575a\u6301\u4e86\u4e0b\u6765\u3002<\/p>\n\n\n\n<p>\u4ed6\u4eec\u7684\u4efb\u52a1\u4e0d\u4ec5\u62ef\u6551\u4e86\u4ed6\u4eec\u7684\u98de\u8239\uff0c\u4e5f\u62ef\u6551\u4e86\u4ed6\u4eec\u81ea\u5df1\u3002\u4e9a\u5386\u5c71\u5927\u6210\u4e3a\u4e86\u4eba\u7c7b\u5386\u53f2\u4e0a\u7b2c\u4e00\u4f4d\u6210\u529f\u5730\u901a\u8fc7\u822a\u5929\u98de\u884c\u6539\u53d8\u4e16\u754c\u7684\u4eba\u7269\u3002\u4ed6\u8ba9\u4eba\u4eec\u660e\u767d\uff0c\u53ea\u8981\u6709\u575a\u5b9a\u7684\u51b3\u5fc3\u548c\u4e0d\u61c8\u7684\u52aa\u529b\uff0c\u5c31\u4e00\u5b9a\u80fd\u591f\u514b\u670d\u4efb\u4f55\u56f0\u96be\uff0c\u5b9e\u73b0\u81ea\u5df1\u7684\u68a6\u60f3\u3002<\/p>\n\n\n\n<p>\u5c3d\u7ba1\u4ed6\u4eec\u7684\u5192\u9669\u5145\u6ee1\u4e86\u6311\u6218\u548c\u98ce\u9669\uff0c\u4f46\u4ed6\u4eec\u4ecd\u7136\u575a\u4fe1\uff0c\u53ea\u8981\u4ed6\u4eec\u76f8\u4fe1\u81ea\u5df1\uff0c\u5c31\u53ef\u4ee5\u514b\u670d\u4efb\u4f55\u56f0\u96be\uff0c\u5b9e\u73b0\u81ea\u5df1\u7684\u68a6\u60f3\u3002\u4ed6\u4eec\u7684\u6545\u4e8b\u6fc0\u52b1\u7740\u4eba\u4eec\uff0c\u8ba9\u4eba\u4eec\u77e5\u9053\uff0c\u53ea\u8981\u6709\u52c7\u6c14\u548c\u667a\u6167\uff0c\u5c31\u80fd\u591f\u6539\u53d8\u4e16\u754c\u3002<\/p>\n\n\n\n<p>real 1m43.555s<br>user 0m32.418s<br>sys 0m10.565s<\/p>\n\n\n\n<h2 class=\"wp-block-heading\"><strong>2. 1.8B \u6a21\u578b\uff0c\u591aGPU<\/strong><\/h2>\n\n\n\n<div class=\"wp-block-urvanov-syntax-highlighter-code-block\"><pre class=\"lang:python decode:true \">from transformers import AutoModelForCausalLM, AutoTokenizer\nimport torch\nimport time\n\nMODEL_NAME = \"Qwen\/Qwen1.5-1.8B-Chat\"\n\n# \u5b9a\u4e49\u4e00\u4e2a\u51fd\u6570\u6765\u81ea\u52a8\u914d\u7f6e\u5728\u591aGPU\u73af\u5883\u4e0b\u6a21\u578b\u5404\u90e8\u5206\u7684\u8bbe\u5907\u5206\u5e03\ndef auto_configure_device_map(num_gpus: int):\n    num_trans_layers = 24  # \u5b9a\u4e49Transformer\u6a21\u578b\u7684\u5c42\u6570\n    per_gpu_layers = num_trans_layers \/ num_gpus  # \u8ba1\u7b97\u6bcf\u4e2aGPU\u5e94\u627f\u62c5\u7684\u5c42\u6570\n    # \u521d\u59cb\u5316\u8bbe\u5907\u6620\u5c04\u5b57\u5178\uff0c\u6307\u5b9a\u4e00\u4e9b\u7279\u5b9a\u6a21\u5757\u5e94\u8be5\u653e\u7f6e\u7684GPU\u7f16\u53f7\n    device_map = {\n        'model.embed_tokens': 0,  # \u5d4c\u5165\u5c42\u653e\u5728\u7b2c\u4e00\u4e2aGPU\u4e0a\n        'model.norm': num_gpus-1,  # \u6700\u540e\u4e00\u4e2a\u6b63\u5219\u5316\u5c42\u653e\u5728\u6700\u540e\u4e00\u4e2aGPU\u4e0a\n        'lm_head': 0  # \u8bed\u8a00\u6a21\u578b\u5934\uff08\u7528\u4e8e\u9884\u6d4b\u4e0b\u4e00\u4e2a\u8bcd\u7684\u5c42\uff09\u653e\u5728\u6700\u540e\u4e00\u4e2aGPU\u4e0a\n    }\n    # \u5c06Transformer\u6a21\u578b\u7684\u6bcf\u4e00\u5c42\u5206\u914d\u7ed9\u4e00\u4e2aGPU\n    for i in range(num_trans_layers):\n        device_map[f'model.layers.{i}'] = int(i\/\/per_gpu_layers)\n    return device_map\n\n# \u68c0\u6d4b\u53ef\u7528\u7684GPU\u6570\u91cf\nNUM_GPUS = torch.cuda.device_count()\nprint(f\"NUM_GPUS: {NUM_GPUS}\")\n\n# \u5982\u679c\u6709\u53ef\u7528\u7684GPU\uff0c\u5219\u57fa\u4e8eGPU\u6570\u91cf\u81ea\u52a8\u914d\u7f6e\u8bbe\u5907\u6620\u5c04\uff1b\u5426\u5219\u4e0d\u4f7f\u7528\u8bbe\u5907\u6620\u5c04\ndevice_map = auto_configure_device_map(NUM_GPUS) if NUM_GPUS &gt; 0 else None\n\n# \u5982\u679c\u6709\u53ef\u7528\u7684GPU\uff0c\u5219\u4f7f\u7528\u7b2c\u4e00\u4e2aGPU\uff1b\u5426\u5219\u4f7f\u7528CPU\ndevice = torch.device(\"cuda\") if NUM_GPUS &gt; 0 else torch.device(\"cpu\")\n\n# \u6839\u636e\u662f\u5426\u4f7f\u7528GPU\u8bbe\u7f6e\u6570\u636e\u7c7b\u578b\uff08\u534a\u7cbe\u5ea6\u6216\u5168\u7cbe\u5ea6\uff09\ndevice_dtype = torch.float16 if NUM_GPUS &gt; 0 else torch.float\n\n# \u83b7\u53d6\u8d77\u59cb\u65f6\u95f4\u6233\nstart_time = time.time()\n\nmodel = AutoModelForCausalLM.from_pretrained(MODEL_NAME,\n                                             trust_remote_code=True,\n                                             torch_dtype=device_dtype,\n                                             attn_implementation=\"flash_attention_2\",\n                                             device_map=device_map)\n\n# \u52a0\u8f7d\u5206\u8bcd\u5668\u548c\u6a21\u578b\uff0c\u6307\u5b9a\u8bbe\u5907\u6620\u5c04\u548c\u6570\u636e\u7c7b\u578b\ntokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)\n\nmodel = model.eval()\n\nend_time = time.time()\nelapsed_time = end_time - start_time\nprint(f\"Load Model Time: {elapsed_time} seconds\")\n\nstart_time2 = time.time()\n\nprompt = \"\u5199\u4e00\u4e2a2\u4e07\u5b57\u7684\u6709\u5934\u6709\u5c3e\u7684\u77ed\u7bc7\u79d1\u5e7b\u5c0f\u8bf4.\"\nmessages = [\n    {\"role\": \"system\", \"content\": \"You are a helpful assistant.\"},\n    {\"role\": \"user\", \"content\": prompt}\n]\ntext = tokenizer.apply_chat_template(\n    messages,\n    tokenize=False,\n    add_generation_prompt=True\n)\nmodel_inputs = tokenizer([text], return_tensors=\"pt\").to(device)\n\ngenerated_ids = model.generate(\n    model_inputs.input_ids,\n    max_new_tokens=20000\n)\ngenerated_ids = [\n    output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)\n]\n\nresponse = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]\nend_time2 = time.time()\nelapsed_time2 = end_time2 - start_time2\n\nelapsed_time = end_time2 - start_time\n\nprint(response)\n\n# \u8ba1\u7b97\u6bcf\u79d2\u5904\u7406\u7684token\u6570\u91cf\nnum_tokens_generated = generated_ids.shape[1]  # \u751f\u6210\u7684token\u603b\u6570\ntokens_per_second = num_tokens_generated \/ elapsed_time2\nprint(f\"Tokens per second: {tokens_per_second}\")\nprint(f\"Total Time: {elapsed_time} seconds\")\n\n<\/pre><\/div>\n\n\n\n<p>\u7ed3\u679c\u8fd0\u884c\u5982\u4e0b\uff1a<\/p>\n\n\n\n<div class=\"wp-block-urvanov-syntax-highlighter-code-block\"><pre class=\"lang:python decode:true \">time python test05-1.8B.py\nNUM_GPUS: 8\nSpecial tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\nLoad Model Time: 63.05974817276001 seconds\n\u6807\u9898\uff1a\u7a7f\u8d8a\u65f6\u7a7a\u7684\u6218\u58eb\n\n\u5728\u4e00\u4e2a\u9065\u8fdc\u7684\u672a\u6765\uff0c\u4eba\u7c7b\u793e\u4f1a\u5df2\u7ecf\u53d1\u5c55\u5230\u661f\u9645\u6b96\u6c11\u7684\u7a0b\u5ea6\u3002\u5728\u8fd9\u4e2a\u65f6\u4ee3\u4e2d\uff0c\u79d1\u6280\u9ad8\u5ea6\u53d1\u8fbe\uff0c\u4eba\u5de5\u667a\u80fd\u3001\u91cf\u5b50\u8ba1\u7b97\u673a\u548c\u65b0\u80fd\u6e90\u6280\u672f\u7684\u5e94\u7528\u5df2\u8d85\u8d8a\u4e86\u4eba\u7c7b\u60f3\u8c61\u3002\u7136\u800c\uff0c\u5c3d\u7ba1\u4eba\u4eec\u751f\u6d3b\u5728\u4e00\u4e2a\u7e41\u8363\u660c\u76db\u7684\u4e16\u754c\u91cc\uff0c\u4f46\u4e5f\u9762\u4e34\u7740\u8bb8\u591a\u672a\u77e5\u7684\u6311\u6218\u548c\u5371\u673a\u3002\n\n\u5728\u4e00\u6b21\u56fd\u9645\u6027\u7684\u661f\u9645\u7ade\u8d5b\u4e2d\uff0c\u4e00\u652f\u7531\u79d1\u5b66\u5bb6\u548c\u5de5\u7a0b\u5e08\u7ec4\u6210\u7684\u7cbe\u82f1\u56e2\u961f\u8d62\u5f97\u4e86\u6bd4\u8d5b\uff0c\u4ed6\u4eec\u6210\u529f\u5730\u5c06\u4e00\u9897\u53e4\u8001\u7684\u661f\u9645\u77f3\u5e26\u5230\u4e86\u4eba\u7c7b\u7684\u661f\u7403\u2014\u2014\u5730\u7403\u3002\u8fd9\u9897\u661f\u9645\u77f3\u88ab\u79f0\u4e3a\u201c\u65f6\u95f4\u4e4b\u7891\u201d\uff0c\u636e\u8bf4\u5b83\u62e5\u6709\u65e0\u9650\u7684\u65f6\u95f4\u65c5\u884c\u80fd\u529b\uff0c\u80fd\u591f\u8ba9\u63a2\u9669\u8005\u56de\u5230\u8fc7\u53bb\u6216\u672a\u6765\u7684\u4efb\u4f55\u4e00\u4e2a\u65f6\u523b\u3002\n\n\u968f\u7740\u661f\u9645\u77f3\u7684\u5230\u6765\uff0c\u4e00\u7cfb\u5217\u60ca\u4eba\u7684\u4e8b\u4ef6\u63a5\u8e35\u800c\u81f3\u3002\u79d1\u5b66\u5bb6\u4eec\u5229\u7528\u77f3\u7891\u7684\u529b\u91cf\u63a2\u7d22\u5e76\u7814\u7a76\u5730\u7403\u7684\u5386\u53f2\u4e0e\u672a\u6765\uff0c\u53d1\u73b0\u4e86\u4e00\u4e9b\u4ee5\u524d\u672a\u88ab\u4eba\u7c7b\u77e5\u6653\u7684\u79d8\u5bc6\u3002\u540c\u65f6\uff0c\u77f3\u7891\u4e5f\u5f15\u53d1\u4e86\u4e00\u573a\u5927\u89c4\u6a21\u7684\u4eba\u7c7b\u89c9\u9192\u8fd0\u52a8\uff0c\u4eba\u4eec\u5f00\u59cb\u8d28\u7591\u8fc7\u53bb\u7684\u9519\u8bef\u51b3\u7b56\u548c\u5bf9\u672a\u6765\u672a\u6765\u7684\u6050\u60e7\u3002\u8d8a\u6765\u8d8a\u591a\u7684\u4eba\u9009\u62e9\u79bb\u5f00\u5bb6\u4e61\uff0c\u8e0f\u4e0a\u5bfb\u627e\u7b54\u6848\u548c\u6539\u53d8\u547d\u8fd0\u7684\u65c5\u7a0b\u3002\n\n\u5176\u4e2d\uff0c\u4e00\u4f4d\u540d\u53eb\u827e\u7c73\u4e3d\u7684\u5973\u6027\u6210\u4e3a\u4e86\u8fd9\u573a\u53d8\u9769\u7684\u4e3b\u8981\u63a8\u52a8\u8005\u3002\u5979\u662f\u4e00\u4f4d\u6709\u7740\u8d85\u51e1\u667a\u6167\u548c\u575a\u97e7\u610f\u5fd7\u7684\u5e74\u8f7b\u79d1\u5b66\u5bb6\uff0c\u4e5f\u662f\u77f3\u7891\u4f7f\u7528\u8005\u4e4b\u4e00\u3002\u5728\u77f3\u7891\u7684\u5e2e\u52a9\u4e0b\uff0c\u827e\u7c73\u4e3d\u89e3\u5f00\u4e86\u8bb8\u591a\u590d\u6742\u7684\u79d1\u5b66\u8c1c\u56e2\uff0c\u751a\u81f3\u5728\u9762\u5bf9\u4e00\u4e9b\u770b\u4f3c\u4e0d\u53ef\u80fd\u89e3\u51b3\u7684\u95ee\u9898\u65f6\uff0c\u5979\u51ed\u501f\u81ea\u5df1\u7684\u52c7\u6c14\u548c\u51b3\u5fc3\u627e\u5230\u4e86\u89e3\u51b3\u65b9\u6848\u3002\n\n\u5728\u7ecf\u5386\u4e86\u4e00\u7cfb\u5217\u751f\u6b7b\u8003\u9a8c\u540e\uff0c\u827e\u7c73\u4e3d\u6700\u7ec8\u53d1\u73b0\u4e86\u4eba\u7c7b\u793e\u4f1a\u7684\u672c\u8d28\u95ee\u9898\u6240\u5728\uff1a\u6211\u4eec\u7684\u884c\u4e3a\u6a21\u5f0f\u6b63\u5728\u7834\u574f\u6211\u4eec\u81ea\u8eab\u548c\u540e\u4ee3\u7684\u751f\u6d3b\u8d28\u91cf\u3002\u4e3a\u4e86\u6539\u53d8\u8fd9\u4e2a\u72b6\u51b5\uff0c\u827e\u7c73\u4e3d\u51b3\u5b9a\u8fd4\u56de\u8fc7\u53bb\uff0c\u963b\u6b62\u90a3\u4e9b\u5bfc\u81f4\u4eba\u7c7b\u706d\u4ea1\u7684\u884c\u4e3a\u3002\u5979\u7684\u76ee\u6807\u662f\u5524\u9192\u4eba\u4eec\u5bf9\u8fc7\u53bb\u7684\u53cd\u601d\uff0c\u8ba9\u4eba\u4eec\u610f\u8bc6\u5230\u9519\u8bef\u7684\u51b3\u7b56\uff0c\u5e76\u91c7\u53d6\u884c\u52a8\u6765\u7ea0\u6b63\u5b83\u4eec\u3002\n\n\u827e\u7c73\u4e3d\u5e26\u7740\u77f3\u7891\u56de\u5230\u4e86\u8fc7\u53bb\uff0c\u4f46\u8fd9\u6b21\u5979\u4e0d\u518d\u662f\u666e\u901a\u7684\u79d1\u5b66\u5bb6\u3002\u5979\u7684\u8eab\u4efd\u88ab\u8d4b\u4e88\u4e86\u4e00\u9879\u5168\u65b0\u7684\u4f7f\u547d\u2014\u2014\u65f6\u95f4\u5b88\u62a4\u8005\u3002\u5979\u4e0d\u4ec5\u8981\u4fdd\u62a4\u65f6\u95f4\u7891\u4e0d\u88ab\u6ee5\u7528\uff0c\u8fd8\u8981\u901a\u8fc7\u81ea\u5df1\u7684\u884c\u52a8\u5f15\u5bfc\u5386\u53f2\u8d70\u5411\u66f4\u7f8e\u597d\u7684\u672a\u6765\u3002\u5728\u8fd9\u4e2a\u8fc7\u7a0b\u4e2d\uff0c\u5979\u9047\u5230\u4e86\u5404\u79cd\u5404\u6837\u7684\u4eba\uff0c\u5305\u62ec\u66fe\u7ecf\u88ab\u5979\u62ef\u6551\u7684\u5e78\u5b58\u8005\u3001\u88ab\u5386\u53f2\u9057\u5fd8\u7684\u82f1\u96c4\u548c\u5bf9\u827e\u7c73\u4e3d\u6000\u6709\u654c\u610f\u7684\u4eba\u3002\n\n\u7ecf\u8fc7\u65e0\u6570\u6b21\u7684\u6218\u6597\u548c\u632b\u6298\uff0c\u827e\u7c73\u4e3d\u9010\u6e10\u6210\u4e3a\u4e86\u4e00\u4e2a\u771f\u6b63\u7684\u9886\u8896\u3002\u5979\u7528\u81ea\u5df1\u7684\u77e5\u8bc6\u548c\u52c7\u6c14\u8d62\u5f97\u4e86\u4eba\u4eec\u7684\u4fe1\u4efb\u548c\u652f\u6301\uff0c\u5e26\u9886\u7740\u65f6\u95f4\u5b88\u62a4\u8005\u7684\u961f\u4f0d\u8d70\u5411\u4e86\u65b0\u7684\u5f81\u9014\u3002\u4ed6\u4eec\u7684\u6545\u4e8b\u6fc0\u52b1\u7740\u6bcf\u4e00\u4e2a\u8bd5\u56fe\u627e\u5230\u81ea\u6211\u4ef7\u503c\u548c\u6539\u53d8\u4e16\u754c\u7684\u4eba\uff0c\u4ed6\u4eec\u5728\u8ffd\u6c42\u771f\u7406\u7684\u8def\u4e0a\uff0c\u5171\u540c\u521b\u9020\u4e86\u4e00\u4e2a\u66f4\u52a0\u516c\u6b63\u3001\u548c\u5e73\u3001\u53ef\u6301\u7eed\u7684\u672a\u6765\u3002\n\n\u300a\u7a7f\u8d8a\u65f6\u7a7a\u7684\u6218\u58eb\u300b\u4ee5\u827e\u7c73\u4e3d\u4e3a\u4ee3\u8868\u7684\u4e00\u7fa4\u4eba\u7684\u5192\u9669\u65c5\u7a0b\u4e3a\u7ebf\u7d22\uff0c\u63cf\u7ed8\u4e86\u4e00\u4e2a\u5145\u6ee1\u6311\u6218\u548c\u5e0c\u671b\u7684\u672a\u6765\u4e16\u754c\u3002\u5b83\u901a\u8fc7\u6df1\u5165\u63a2\u8ba8\u4eba\u7c7b\u6587\u660e\u53d1\u5c55\u7684\u5386\u7a0b\uff0c\u63ed\u793a\u51fa\u6211\u4eec\u5bf9\u8fc7\u53bb\u548c\u672a\u6765\u7684\u8ba4\u77e5\u5c40\u9650\uff0c\u4ee5\u53ca\u5982\u4f55\u901a\u8fc7\u52c7\u6562\u9762\u5bf9\u56f0\u96be\uff0c\u5b9e\u73b0\u81ea\u6211\u6210\u957f\u548c\u5386\u53f2\u8fdb\u6b65\u3002\u8fd9\u90e8\u4f5c\u54c1\u65e2\u662f\u4e00\u90e8\u79d1\u5e7b\u5c0f\u8bf4\uff0c\u53c8\u662f\u4e00\u90e8\u5173\u4e8e\u4eba\u6027\u3001\u52c7\u6c14\u548c\u8d23\u4efb\u7684\u6545\u4e8b\uff0c\u5bd3\u610f\u6df1\u8fdc\uff0c\u5f15\u4eba\u6df1\u601d\u3002\nGenerated Tokens: 584\nTokens per second: 6.205085219425609\nTotal Generation Time: 94.11635446548462 seconds\n\nreal    2m41.035s\nuser    1m48.874s\nsys     0m20.042s<\/pre><\/div>\n\n\n\n<h3 class=\"wp-block-heading\"><strong>2.2 1.8B \u6a21\u578b\uff0c\u5355GPU<\/strong><\/h3>\n\n\n\n<div class=\"wp-block-urvanov-syntax-highlighter-code-block\"><pre class=\"lang:python decode:true \">from transformers import AutoModelForCausalLM, AutoTokenizer\nimport torch\nimport time\n\nMODEL_NAME = \"Qwen\/Qwen1.5-1.8B-Chat\"\n\n\n# \u68c0\u6d4b\u53ef\u7528\u7684GPU\u6570\u91cf\nNUM_GPUS = torch.cuda.device_count()\nprint(f\"NUM_GPUS: {NUM_GPUS}\")\n\n# \u5982\u679c\u6709\u53ef\u7528\u7684GPU\uff0c\u5219\u4f7f\u7528\u7b2c\u4e00\u4e2aGPU\uff1b\u5426\u5219\u4f7f\u7528CPU\ndevice = torch.device(\"cuda\") if NUM_GPUS &gt; 0 else torch.device(\"cpu\")\n\n# \u6839\u636e\u662f\u5426\u4f7f\u7528GPU\u8bbe\u7f6e\u6570\u636e\u7c7b\u578b\uff08\u534a\u7cbe\u5ea6\u6216\u5168\u7cbe\u5ea6\uff09\ndevice_dtype = torch.float16 if NUM_GPUS &gt; 0 else torch.float\n\n# \u83b7\u53d6\u8d77\u59cb\u65f6\u95f4\u6233\nstart_time = time.time()\n\nmodel = AutoModelForCausalLM.from_pretrained(MODEL_NAME,\n                                             trust_remote_code=True,\n                                             torch_dtype=device_dtype,\n                                             attn_implementation=\"flash_attention_2\",\n                                             device_map=\"cuda:0\")\n\n# \u52a0\u8f7d\u5206\u8bcd\u5668\u548c\u6a21\u578b\uff0c\u6307\u5b9a\u8bbe\u5907\u6620\u5c04\u548c\u6570\u636e\u7c7b\u578b\ntokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)\n\nmodel = model.eval()\n\nend_time = time.time()\nelapsed_time = end_time - start_time\nprint(f\"Load Model Time: {elapsed_time} seconds\")\n\nstart_time2 = time.time()\n\nprompt = \"\u5199\u4e00\u4e2a2\u4e07\u5b57\u7684\u6709\u5934\u6709\u5c3e\u7684\u77ed\u7bc7\u79d1\u5e7b\u5c0f\u8bf4.\"\nmessages = [\n    {\"role\": \"system\", \"content\": \"You are a helpful assistant.\"},\n    {\"role\": \"user\", \"content\": prompt}\n]\ntext = tokenizer.apply_chat_template(\n    messages,\n    tokenize=False,\n    add_generation_prompt=True\n)\nmodel_inputs = tokenizer([text], return_tensors=\"pt\").to(device)\n\ngenerated_ids = model.generate(\n    model_inputs.input_ids,\n    max_new_tokens=20000\n)\n\nend_time2 = time.time()\nelapsed_time2 = end_time2 - start_time2\n\nelapsed_time = end_time2 - start_time\nnum_tokens_generated = len(generated_ids[0]) - len(model_inputs.input_ids[0])\n\ntokens_per_second = num_tokens_generated \/ elapsed_time2\n\ngenerated_ids = [\n    output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)\n]\n\nresponse = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]\n\nprint(response)\n\nprint(f\"Generated Tokens: {num_tokens_generated}\")\nprint(f\"Tokens per second: {tokens_per_second}\")\nprint(f\"Total Generation Time: {elapsed_time2} seconds\")\n\n<\/pre><\/div>\n\n\n\n<p>\u8fd0\u884c\u7ed3\u679c\uff1a<\/p>\n\n\n\n<div class=\"wp-block-urvanov-syntax-highlighter-code-block\"><pre class=\"lang:python decode:true \">time python test05-1.8B-2.py\nNUM_GPUS: 8\nSpecial tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\nLoad Model Time: 67.75650358200073 seconds\n\u6807\u9898\uff1a\u5b87\u5b99\u4e4b\u95e8\n\n\u5728\u9065\u8fdc\u7684\u94f6\u6cb3\u7cfb\u4e2d\uff0c\u6709\u4e00\u4e2a\u540d\u4e3a\u827e\u5229\u65af\u7684\u661f\u7cfb\uff0c\u88ab\u8a89\u4e3a\u751f\u547d\u548c\u79d1\u6280\u7684\u6447\u7bee\u3002\u827e\u5229\u65af\u62e5\u6709\u4f17\u591a\u7f8e\u4e3d\u7684\u661f\u7403\uff0c\u5176\u4e2d\u4e00\u9897\u540d\u53eb\u201c\u7eb3\u9c81\u201d\u7684\u661f\u7403\u4ee5\u5176\u72ec\u7279\u7684\u73af\u5883\u548c\u4e30\u5bcc\u7684\u8d44\u6e90\u8457\u79f0\u3002\n\n\u7eb3\u9c81\u661f\u7403\u8868\u9762\u8986\u76d6\u7740\u4e00\u5c42\u7a00\u8584\u7684\u51b0\u5c42\uff0c\u72b9\u5982\u4e00\u7247\u6676\u83b9\u5254\u900f\u7684\u6d77\u6d0b\uff0c\u53cd\u5c04\u51fa\u65e0\u5c3d\u7684\u84dd\u8272\u661f\u5149\u3002\u5728\u8fd9\u4e2a\u5bd2\u51b7\u7684\u661f\u7403\u4e0a\uff0c\u751f\u6d3b\u7740\u4e00\u79cd\u667a\u6167\u751f\u7269\u2014\u2014\u7eb3\u9c81\u4eba\uff0c\u4ed6\u4eec\u4ee5\u81ea\u7136\u7684\u65b9\u5f0f\u751f\u5b58\u548c\u53d1\u5c55\uff0c\u64c5\u957f\u79cd\u690d\u548c\u91c7\u96c6\u5404\u79cd\u98df\u7269\u548c\u8d44\u6e90\uff0c\u4ee5\u6b64\u6765\u7ef4\u6301\u4ed6\u4eec\u7684\u751f\u5b58\u9700\u6c42\u3002\n\n\u7eb3\u9c81\u4eba\u7684\u79d1\u6280\u8fdc\u8d85\u5176\u4ed6\u540c\u7c7b\u7269\u79cd\uff0c\u4ed6\u4eec\u4f7f\u7528\u4e00\u79cd\u795e\u79d8\u7684\u80fd\u91cf\u77f3\u5757\uff0c\u53ef\u4ee5\u5c06\u4efb\u4f55\u7269\u8d28\u8f6c\u5316\u4e3a\u80fd\u6e90\uff0c\u751a\u81f3\u53ef\u4ee5\u76f4\u63a5\u64cd\u63a7\u548c\u64cd\u7eb5\u6574\u4e2a\u661f\u7403\u7684\u751f\u6001\u7cfb\u7edf\u3002\u8fd9\u79cd\u80fd\u91cf\u77f3\u5757\u88ab\u4ed6\u4eec\u89c6\u4e3a\u751f\u547d\u7684\u57fa\u77f3\uff0c\u662f\u4ed6\u4eec\u667a\u6167\u548c\u529b\u91cf\u7684\u8c61\u5f81\u3002\n\n\u7136\u800c\uff0c\u7eb3\u9c81\u661f\u7403\u7684\u79d8\u5bc6\u4e5f\u9010\u6e10\u6d6e\u51fa\u6c34\u9762\u3002\u636e\u4f20\u8bf4\uff0c\u7eb3\u9c81\u661f\u7403\u66fe\u7ecf\u662f\u4e00\u9897\u5de8\u5927\u7684\u6052\u661f\uff0c\u7531\u4e8e\u67d0\u79cd\u672a\u77e5\u7684\u539f\u56e0\uff0c\u5b83\u7684\u6838\u5fc3\u53d1\u751f\u4e86\u7206\u70b8\uff0c\u5f3a\u5927\u7684\u51b2\u51fb\u6ce2\u5c06\u5b83\u5e26\u5230\u4e86\u8fd9\u4e2a\u8352\u51c9\u800c\u964c\u751f\u7684\u884c\u661f\u4e0a\u3002\u968f\u7740\u65f6\u95f4\u7684\u63a8\u79fb\uff0c\u7eb3\u9c81\u661f\u7403\u4e0a\u7684\u751f\u547d\u9010\u6e10\u8fdb\u5316\uff0c\u5f62\u6210\u4e86\u72ec\u7279\u7684\u7eb3\u9c81\u6587\u660e\u3002\n\n\u7136\u800c\uff0c\u968f\u7740\u7eb3\u9c81\u6587\u660e\u7684\u53d1\u5c55\uff0c\u4ed6\u4eec\u7684\u751f\u6d3b\u65b9\u5f0f\u5f00\u59cb\u4e0e\u5730\u7403\u4eba\u7c7b\u622a\u7136\u4e0d\u540c\u3002\u4ed6\u4eec\u4e0d\u518d\u4f9d\u8d56\u4e8e\u5927\u81ea\u7136\u7684\u6069\u8d50\uff0c\u800c\u662f\u901a\u8fc7\u79d1\u6280\u624b\u6bb5\u63a7\u5236\u548c\u5229\u7528\u81ea\u7136\u8d44\u6e90\uff0c\u751a\u81f3\u5f00\u59cb\u5bf9\u5176\u4ed6\u661f\u7403\u8fdb\u884c\u6b96\u6c11\u548c\u5f00\u53d1\u3002\u8fd9\u79cd\u884c\u4e3a\u5f15\u8d77\u4e86\u7eb3\u9c81\u793e\u4f1a\u5185\u90e8\u7684\u77db\u76fe\u548c\u51b2\u7a81\uff0c\u4e00\u4e9b\u7eb3\u9c81\u4eba\u9009\u62e9\u79bb\u5f00\u4ed6\u4eec\u7684\u5bb6\u56ed\uff0c\u524d\u5f80\u66f4\u52a0\u9002\u5408\u4ed6\u4eec\u7684\u661f\u7403\u751f\u6d3b\u3002\n\n\u5728\u8fd9\u4e2a\u8fc7\u7a0b\u4e2d\uff0c\u7eb3\u9c81\u79d1\u5b66\u5bb6\u53d1\u73b0\u4e86\u4e00\u79cd\u65b0\u7684\u80fd\u91cf\u77f3\u5757\uff0c\u5b83\u4e0d\u4ec5\u53ef\u4ee5\u5c06\u7eb3\u9c81\u661f\u7403\u4e0a\u7684\u7269\u8d28\u8f6c\u5316\u4e3a\u80fd\u6e90\uff0c\u8fd8\u53ef\u4ee5\u4fee\u590d\u7eb3\u9c81\u661f\u7403\u7684\u6838\u5fc3\uff0c\u5e76\u6062\u590d\u5176\u66fe\u7ecf\u7684\u5149\u8f89\u3002\u4f46\u8fd9\u79cd\u65b0\u80fd\u91cf\u77f3\u5757\u5e76\u4e0d\u7b80\u5355\uff0c\u9700\u8981\u6781\u9ad8\u7684\u6280\u672f\u542b\u91cf\u548c\u590d\u6742\u7684\u80fd\u91cf\u8f6c\u5316\u8fc7\u7a0b\u3002\n\n\u79d1\u5b66\u5bb6\u4eec\u51b3\u5b9a\u6df1\u5165\u7814\u7a76\u7eb3\u9c81\u661f\u7403\u7684\u80fd\u91cf\u77f3\u5757\uff0c\u5e0c\u671b\u4ece\u4e2d\u83b7\u53d6\u7a81\u7834\u6027\u7684\u79d1\u6280\u7a81\u7834\u3002\u4ed6\u4eec\u5728\u661f\u7403\u7684\u5730\u4e0b\u6df1\u5904\uff0c\u6316\u6398\u51fa\u4e86\u4e00\u4e2a\u795e\u79d8\u7684\u80fd\u91cf\u77ff\u8109\uff0c\u90a3\u91cc\u5145\u6ee1\u4e86\u5947\u7279\u7684\u80fd\u91cf\u6676\u4f53\uff0c\u6bcf\u4e00\u4e2a\u90fd\u8574\u542b\u7740\u5168\u65b0\u7684\u80fd\u91cf\u8f6c\u6362\u539f\u7406\u3002\n\n\u7ecf\u8fc7\u591a\u5e74\u7684\u63a2\u7d22\u548c\u5b9e\u9a8c\uff0c\u79d1\u5b66\u5bb6\u4eec\u7ec8\u4e8e\u6210\u529f\u63d0\u53d6\u51fa\u4e86\u8fd9\u79cd\u80fd\u91cf\u6676\u4f53\uff0c\u5e76\u5c06\u5176\u5e94\u7528\u4e8e\u7eb3\u9c81\u661f\u7403\u7684\u80fd\u91cf\u77f3\u5757\u4e0a\u3002\u5728\u65e0\u6570\u6b21\u7684\u8bd5\u9a8c\u548c\u5931\u8d25\u540e\uff0c\u4ed6\u4eec\u7ec8\u4e8e\u6210\u529f\u5730\u5c06\u8fd9\u79cd\u80fd\u91cf\u6676\u4f53\u8f6c\u5316\u4e3a\u80fd\u6e90\uff0c\u4f7f\u5f97\u7eb3\u9c81\u661f\u7403\u7684\u80fd\u6e90\u5f97\u5230\u4e86\u5168\u9762\u7684\u6062\u590d\u3002\n\n\u540c\u65f6\uff0c\u79d1\u5b66\u5bb6\u4eec\u4e5f\u53d1\u73b0\u4e86\u7eb3\u9c81\u661f\u7403\u7684\u6838\u5fc3\u53d1\u751f\u4e86\u8d28\u7684\u53d8\u5316\uff0c\u6838\u5fc3\u4e2d\u7684\u6838\u5fc3\u77f3\u5757\u5df2\u7ecf\u5931\u53bb\u4e86\u5148\u524d\u7684\u80fd\u91cf\u6ce2\u52a8\uff0c\u663e\u793a\u51fa\u660e\u663e\u7684\u80fd\u91cf\u6d41\u5931\u8ff9\u8c61\u3002\u8fd9\u65e0\u7591\u662f\u4e00\u4e2a\u91cd\u5927\u7684\u5371\u673a\uff0c\u5982\u679c\u7ee7\u7eed\u6309\u7167\u73b0\u6709\u7684\u53d1\u5c55\u6a21\u5f0f\uff0c\u7eb3\u9c81\u661f\u7403\u5c06\u4f1a\u5931\u53bb\u6838\u5fc3\u7684\u529b\u91cf\uff0c\u65e0\u6cd5\u62b5\u6297\u5916\u90e8\u7684\u5165\u4fb5\u548c\u5a01\u80c1\u3002\n\n\u4e8e\u662f\uff0c\u79d1\u5b66\u5bb6\u4eec\u51b3\u5b9a\u91cd\u542f\u7eb3\u9c81\u661f\u7403\u7684\u6838\u5fc3\u77f3\u5757\uff0c\u8bd5\u56fe\u4fee\u590d\u5176\u539f\u6709\u7684\u80fd\u91cf\u72b6\u6001\u3002\u4ed6\u4eec\u8fdb\u884c\u4e86\u7cbe\u5fc3\u7684\u6a21\u62df\u5b9e\u9a8c\uff0c\u6700\u7ec8\u6210\u529f\u5730\u4f7f\u6838\u5fc3\u77f3\u5757\u91cd\u65b0\u83b7\u5f97\u4e86\u80fd\u91cf\u6ce2\u52a8\uff0c\u4f7f\u5176\u518d\u6b21\u5177\u6709\u4e86\u80fd\u91cf\u9a71\u52a8\u7684\u529f\u80fd\u3002\n\n\u7ecf\u8fc7\u8fd9\u6b21\u91cd\u7f6e\uff0c\u7eb3\u9c81\u661f\u7403\u7684\u4eba\u7c7b\u793e\u533a\u5f00\u59cb\u91cd\u65b0\u5d1b\u8d77\uff0c\u4ed6\u4eec\u7528\u5148\u8fdb\u7684\u79d1\u6280\u624b\u6bb5\u6539\u9020\u4e86\u81ea\u5df1\u7684\u751f\u6d3b\u73af\u5883\uff0c\u6210\u529f\u5730\u62b5\u5fa1\u4f4f\u4e86\u6765\u81ea\u5b87\u5b99\u7684\u5404\u79cd\u5916\u529b\u4fb5\u88ad\u3002\u4e0e\u6b64\u540c\u65f6\uff0c\u7eb3\u9c81\u4eba\u4e5f\u5f00\u59cb\u53cd\u601d\u81ea\u5df1\u7684\u53d1\u5c55\u6a21\u5f0f\uff0c\u8ba4\u8bc6\u5230\u8fc7\u5ea6\u5f00\u53d1\u548c\u5229\u7528\u8d44\u6e90\u53ef\u80fd\u5e26\u6765\u7684\u4e25\u91cd\u540e\u679c\uff0c\u5f00\u59cb\u4e86\u53ef\u6301\u7eed\u53d1\u5c55\u7684\u9053\u8def\u3002\n\n\u7eb3\u9c81\u661f\u7403\u7684\u6545\u4e8b\u544a\u8bc9\u6211\u4eec\uff0c\u79d1\u6280\u5e76\u975e\u4e07\u80fd\uff0c\u53ea\u6709\u5f53\u4eba\u4eec\u5c0a\u91cd\u81ea\u7136\uff0c\u5408\u7406\u5229\u7528\u81ea\u7136\u8d44\u6e90\uff0c\u624d\u80fd\u5b9e\u73b0\u771f\u6b63\u7684\u53ef\u6301\u7eed\u53d1\u5c55\u3002\u5728\u672a\u6765\u7684\u4e16\u754c\u91cc\uff0c\u7eb3\u9c81\u4eba\u5c06\u4ee5\u6b64\u4e3a\u9274\uff0c\u7ee7\u7eed\u5728\u79d1\u6280\u4e0e\u73af\u4fdd\u4e4b\u95f4\u5bfb\u627e\u5e73\u8861\uff0c\u4e3a\u6784\u5efa\u548c\u8c10\u3001\u7e41\u8363\u7684\u5b87\u5b99\u4e16\u754c\u505a\u51fa\u66f4\u5927\u7684\u8d21\u732e\u3002\nGenerated Tokens: 715\nTokens per second: 29.989652369309425\nTotal Generation Time: 23.841556787490845 seconds\n\nreal    1m34.365s\nuser    0m41.232s\nsys     0m18.896s\n<\/pre><\/div>\n\n\n\n<h2 class=\"wp-block-heading\"><strong>3. 4B \u591aGPU \u52a0\u8f7d<\/strong><\/h2>\n\n\n\n<div class=\"wp-block-urvanov-syntax-highlighter-code-block\"><pre class=\"lang:python decode:true \">from transformers import AutoModelForCausalLM, AutoTokenizer\nimport torch\nimport time\n\nMODEL_NAME = \"Qwen\/Qwen1.5-4B-Chat\"\n\n# \u5b9a\u4e49\u4e00\u4e2a\u51fd\u6570\u6765\u81ea\u52a8\u914d\u7f6e\u5728\u591aGPU\u73af\u5883\u4e0b\u6a21\u578b\u5404\u90e8\u5206\u7684\u8bbe\u5907\u5206\u5e03\ndef auto_configure_device_map(num_gpus: int):\n    num_trans_layers = 40  # \u5b9a\u4e49Transformer\u6a21\u578b\u7684\u5c42\u6570\n    per_gpu_layers = num_trans_layers \/ num_gpus  # \u8ba1\u7b97\u6bcf\u4e2aGPU\u5e94\u627f\u62c5\u7684\u5c42\u6570\n    # \u521d\u59cb\u5316\u8bbe\u5907\u6620\u5c04\u5b57\u5178\uff0c\u6307\u5b9a\u4e00\u4e9b\u7279\u5b9a\u6a21\u5757\u5e94\u8be5\u653e\u7f6e\u7684GPU\u7f16\u53f7\n    device_map = {\n        'model.embed_tokens': 0,  # \u5d4c\u5165\u5c42\u653e\u5728\u7b2c\u4e00\u4e2aGPU\u4e0a\n        'model.norm': num_gpus-1,  # \u6700\u540e\u4e00\u4e2a\u6b63\u5219\u5316\u5c42\u653e\u5728\u6700\u540e\u4e00\u4e2aGPU\u4e0a\n        'lm_head': 0  # \u8bed\u8a00\u6a21\u578b\u5934\uff08\u7528\u4e8e\u9884\u6d4b\u4e0b\u4e00\u4e2a\u8bcd\u7684\u5c42\uff09\u653e\u5728\u6700\u540e\u4e00\u4e2aGPU\u4e0a\n    }\n    # \u5c06Transformer\u6a21\u578b\u7684\u6bcf\u4e00\u5c42\u5206\u914d\u7ed9\u4e00\u4e2aGPU\n    for i in range(num_trans_layers):\n        device_map[f'model.layers.{i}'] = int(i\/\/per_gpu_layers)\n    return device_map\n\n# \u68c0\u6d4b\u53ef\u7528\u7684GPU\u6570\u91cf\nNUM_GPUS = torch.cuda.device_count()\nprint(f\"NUM_GPUS: {NUM_GPUS}\")\n\n# \u5982\u679c\u6709\u53ef\u7528\u7684GPU\uff0c\u5219\u57fa\u4e8eGPU\u6570\u91cf\u81ea\u52a8\u914d\u7f6e\u8bbe\u5907\u6620\u5c04\uff1b\u5426\u5219\u4e0d\u4f7f\u7528\u8bbe\u5907\u6620\u5c04\ndevice_map = auto_configure_device_map(NUM_GPUS) if NUM_GPUS &gt; 0 else None\n\n# \u5982\u679c\u6709\u53ef\u7528\u7684GPU\uff0c\u5219\u4f7f\u7528\u7b2c\u4e00\u4e2aGPU\uff1b\u5426\u5219\u4f7f\u7528CPU\ndevice = torch.device(\"cuda\") if NUM_GPUS &gt; 0 else torch.device(\"cpu\")\n\n# \u6839\u636e\u662f\u5426\u4f7f\u7528GPU\u8bbe\u7f6e\u6570\u636e\u7c7b\u578b\uff08\u534a\u7cbe\u5ea6\u6216\u5168\u7cbe\u5ea6\uff09\ndevice_dtype = torch.float16 if NUM_GPUS &gt; 0 else torch.float\n\n# \u83b7\u53d6\u8d77\u59cb\u65f6\u95f4\u6233\nstart_time = time.time()\n\nmodel = AutoModelForCausalLM.from_pretrained(MODEL_NAME,\n                                             trust_remote_code=True,\n                                             torch_dtype=device_dtype,\n                                             attn_implementation=\"flash_attention_2\",\n                                             device_map=device_map)\n\n# \u52a0\u8f7d\u5206\u8bcd\u5668\u548c\u6a21\u578b\uff0c\u6307\u5b9a\u8bbe\u5907\u6620\u5c04\u548c\u6570\u636e\u7c7b\u578b\ntokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)\n\nmodel = model.eval()\n\nend_time = time.time()\nelapsed_time = end_time - start_time\nprint(f\"Load Model Time: {elapsed_time} seconds\")\n\nstart_time2 = time.time()\n\nprompt = \"\u5199\u4e00\u4e2a2\u4e07\u5b57\u7684\u6709\u5934\u6709\u5c3e\u7684\u77ed\u7bc7\u79d1\u5e7b\u5c0f\u8bf4.\"\nmessages = [\n    {\"role\": \"system\", \"content\": \"You are a helpful assistant.\"},\n    {\"role\": \"user\", \"content\": prompt}\n]\ntext = tokenizer.apply_chat_template(\n    messages,\n    tokenize=False,\n    add_generation_prompt=True\n)\nmodel_inputs = tokenizer([text], return_tensors=\"pt\").to(device)\n\ngenerated_ids = model.generate(\n    model_inputs.input_ids,\n    max_new_tokens=20000\n)\n\nend_time2 = time.time()\nelapsed_time2 = end_time2 - start_time2\n\nelapsed_time = end_time2 - start_time\nnum_tokens_generated = len(generated_ids[0]) - len(model_inputs.input_ids[0])\n\ntokens_per_second = num_tokens_generated \/ elapsed_time2\n\ngenerated_ids = [\n    output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)\n]\n\nresponse = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]\n\nprint(response)\n\nprint(f\"Generated Tokens: {num_tokens_generated}\")\nprint(f\"Tokens per second: {tokens_per_second}\")\nprint(f\"Total Generation Time: {elapsed_time2} seconds\")\n\n<\/pre><\/div>\n\n\n\n<p>\u8fd0\u884c\u7ed3\u679c\uff1a<\/p>\n\n\n\n<div class=\"wp-block-urvanov-syntax-highlighter-code-block\"><pre class=\"lang:python decode:true \"> time python test05-4B.py\nNUM_GPUS: 8\nLoading checkpoint shards: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 2\/2 [03:07&lt;00:00, 93.74s\/it]\nSpecial tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\nLoad Model Time: 191.43567776679993 seconds\n\u975e\u5e38\u62b1\u6b49\uff0c\u4f5c\u4e3a\u4e00\u4e2aAI\u6587\u672c\u751f\u6210\u6a21\u578b\uff0c\u6211\u65e0\u6cd5\u4e00\u6b21\u6027\u4e3a\u60a8\u751f\u62102\u4e07\u5b57\u7684\u5185\u5bb9\u3002\u4f46\u662f\uff0c\u6211\u53ef\u4ee5\u5e2e\u52a9\u60a8\u5f00\u59cb\u5199 \u4f5c\u6216\u8005\u4e3a\u60a8\u63d0\u4f9b\u4e00\u4e9b\u521b\u610f\u548c\u7075\u611f\u3002\u8bf7\u60a8\u544a\u8bc9\u6211\u66f4\u591a\u7684\u7ec6\u8282\uff0c\u4f8b\u5982\u6545\u4e8b\u7684\u4e3b\u9898\u3001\u4e3b\u8981\u89d2\u8272\u7b49\u3002\u8fd9\u6837\u6211\u624d\u80fd\u66f4\u597d\u5730\u4e3a\u60a8\u63d0\u4f9b\u5e2e\u52a9\u3002\nGenerated Tokens: 54\nTokens per second: 2.780886061220365\nTotal Generation Time: 19.41827130317688 seconds\n\nreal    3m35.004s\nuser    0m56.952s\nsys     0m27.279s<\/pre><\/div>\n\n\n\n<p>\u8fd9\u4e48\u5b9e\u5728\u7684\u4e48\uff1f\u518d\u8fd0\u884c\u4e00\u6b21<\/p>\n\n\n\n<div class=\"wp-block-urvanov-syntax-highlighter-code-block\"><pre class=\"lang:python decode:true \">time python test05-4B.py\nNUM_GPUS: 8\nLoading checkpoint shards: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 2\/2 [02:10&lt;00:00, 65.05s\/it]\nSpecial tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\nLoad Model Time: 134.07681465148926 seconds\n\u6807\u9898\uff1a\u300a\u661f\u9645\u4e4b\u773c\u300b\n\n\u5728\u9065\u8fdc\u7684\u672a\u6765\uff0c\u5730\u7403\u6587\u660e\u5df2\u7ecf\u8fbe\u5230\u4e86\u524d\u6240\u672a\u6709\u7684\u9ad8\u5ea6\u3002\u79d1\u6280\u7684\u53d1\u5c55\uff0c\u4f7f\u5f97\u4eba\u7c7b\u5f00\u59cb\u5bf9\u5b87\u5b99\u8fdb\u884c\u63a2\u7d22\uff0c\u5bfb\u627e\u65b0\u7684\u751f\u547d\u5f62\u5f0f\u548c\u53ef\u80fd\u5b58\u5728\u7684\u5916\u661f\u6587\u660e\u3002\n\n\u5728\u8fd9\u4e2a\u8fc7\u7a0b\u4e2d\uff0c\u79d1\u5b66\u5bb6\u4eec\u53d1\u73b0\u4e86\u4e00\u79cd\u540d\u4e3a\"\u661f\u9645\u4e4b\u773c\"\u7684\u795e\u79d8\u7269\u8d28\uff0c\u5b83\u80fd\u591f\u5438\u6536\u548c\u53cd\u5c04\u5149\u7ebf\uff0c\u4f7f\u5f97\u4efb\u4f55\u751f\u7269\u90fd\u80fd\u901a\u8fc7\u5b83\u770b\u5230\u66f4\u8fdc\u7684\u5730\u65b9\uff0c\u751a\u81f3\u53ef\u4ee5\u770b\u5230\u5176\u4ed6\u661f\u7403\u7684\u4fe1\u606f\u3002\n\n\u7136\u800c\uff0c\u8fd9\u79cd\"\u661f\u9645\u4e4b\u773c\"\u7684\u51fa\u73b0\uff0c\u5e76\u6ca1\u6709\u8ba9\u4eba\u7c7b\u6b23\u559c\u82e5\u72c2\uff0c\u53cd\u800c\u5f15\u8d77\u4e86\u6050\u614c\u3002\u56e0\u4e3a\u79d1\u5b66\u5bb6\u4eec\u53d1\u73b0\uff0c\u8fd9\u79cd\u7269\u8d28\u53ef\u80fd\u4f1a\u5438\u5f15\u5916\u6765\u751f\u547d\u7684\u5165\u4fb5\uff0c\u800c\u4e14\u5982\u679c\u88ab\u6ee5\u7528\uff0c\u4e5f\u6709\u53ef\u80fd\u5bf9\u5730\u7403\u9020\u6210\u4e25\u91cd\u7684\u7834\u574f\u3002\n\n\u4e8e\u662f\uff0c\u4e00\u573a\u5173\u4e8e\u5982\u4f55\u4fdd\u62a4\u548c\u5229\u7528\"\u661f\u9645\u4e4b\u773c\"\u7684\u8fa9\u8bba\u5f00\u59cb\u4e86\u3002\u4e00\u4e9b\u4eba\u4e3b\u5f20\u5173\u95ed\"\u661f\u9645\u4e4b\u773c\"\uff0c\u9632\u6b62\u4e0d\u5fc5\u8981\u7684\u5e72\u6270\uff1b\u53e6\u4e00\u4e9b\u4eba\u5219\u8ba4\u4e3a\u5e94\u8be5\u7ee7\u7eed\u7814\u7a76\u548c\u5f00\u53d1\u5b83\u7684\u6f5c\u529b\uff0c\u4ee5\u63a8\u52a8\u4eba\u7c7b\u7684\u79d1\u6280\u8fdb\u6b65\u3002\n\n\u5728\u8fd9\u4e2a\u4e89\u8bba\u4e2d\uff0c\u4e00\u4f4d\u540d\u53eb\u6770\u514b\u7684\u5e74\u8f7b\u4eba\u7ad9\u4e86\u51fa\u6765\u3002\u4ed6\u63d0\u51fa\uff0c\u4eba\u7c7b\u4e0d\u5e94\u8be5\u5bb3\u6015\u672a\u77e5\uff0c\u800c\u5e94\u8be5\u52c7\u4e8e\u63a2\u7d22\u548c\u6311\u6218\u3002\u4ed6\u575a\u4fe1\uff0c\u53ea\u6709\u901a\u8fc7\u4e0d\u65ad\u7684\u52aa\u529b\u548c\u63a2\u7d22\uff0c\u4eba\u7c7b\u624d\u80fd\u627e\u5230\u771f\u6b63\u7684\u81ea\u6211\uff0c\u624d\u80fd\u6539\u53d8\u4e16\u754c\u3002\n\n\u4e8e\u662f\uff0c\u6770\u514b\u51b3\u5b9a\u72ec\u81ea\u4e00\u4e2a\u4eba\u53bb\u63a2\u7d22\"\u661f\u9645\u4e4b\u773c\"\u7684\u79d8\u5bc6\u3002\u7ecf\u8fc7\u4e86\u65e0\u6570\u4e2a\u65e5\u591c\u7684\u52aa\u529b\uff0c\u6770\u514b\u7ec8\u4e8e\u6210\u529f\u5730\u63ed\u793a\u4e86\"\u661f\u9645\u4e4b\u773c\"\u7684\u79d8\u5bc6\u3002\u539f\u6765\uff0c\u8fd9\u79cd\u7269\u8d28\u5176\u5b9e\u662f\u4e00\u79cd\u53ef\u4ee5\u8ba9\u4eba\u770b\u5230\u771f\u5b9e\u4e16\u754c\u7684\u80fd\u91cf\uff0c\u53ea\u8981\u6b63\u786e\u4f7f\u7528\uff0c\u5c31\u80fd\u591f\u8ba9\u4eba\u7c7b\u66f4\u597d\u5730\u4e86\u89e3\u548c\u63a2\u7d22\u5b87\u5b99\u3002\n\n\u5728\u6770\u514b\u7684\u9886\u5bfc\u4e0b\uff0c\u4eba\u7c7b\u6210\u529f\u5730\u5229\u7528\u4e86\"\u661f\u9645\u4e4b\u773c\"\u7684\u529b\u91cf\uff0c\u8fdb\u884c\u4e86\u5927\u91cf\u7684\u5b87\u5b99\u63a2\u7d22\u3002\u4ed6\u4eec\u53d1\u73b0\u4e86\u66f4\u591a\u7684\u65b0\u661f\u7403\uff0c\u4e86\u89e3\u5230\u4e86\u66f4\u591a\u751f\u547d\u7684\u5965\u79d8\uff0c\u751a\u81f3\u627e\u5230\u4e86\u4e0e\u5730\u7403\u76f8\u4f3c\u7684\u751f\u547d\u5f62\u5f0f\u3002\n\n\u7136\u800c\uff0c\u8fd9\u4e2a\u8fc7\u7a0b\u5e76\u6ca1\u6709\u7ed3\u675f\u3002\u7531\u4e8e\u5730\u7403\u7684\u80fd\u91cf\u6d88\u8017\u8fc7\u5927\uff0c\"\u661f\u9645\u4e4b\u773c\"\u5f00\u59cb\u9010\u6e10\u8870\u9000\u3002\u4e3a\u4e86\u4fdd\u62a4\u8fd9\u4e2a\u73cd\u8d35\u7684\u8d44\u6e90\uff0c\u6770\u514b\u63d0\u51fa\u4e86\u4e00\u4e2a\u5927\u80c6\u7684\u8ba1\u5212\uff1a\u4eba\u7c7b\u9700\u8981\u5bfb\u627e\u65b0\u7684\u80fd\u6e90\uff0c\u6765\u66ff\u4ee3\"\u661f\u9645\u4e4b\u773c\"\u7684\u80fd\u91cf\u3002\n\n\u5728\u6770\u514b\u7684\u5e26\u9886\u4e0b\uff0c\u4eba\u7c7b\u5f00\u59cb\u4e86\u8270\u96be\u7684\u5bfb\u627e\u65b0\u80fd\u6e90\u7684\u8fc7\u7a0b\u3002\u4ed6\u4eec\u7814\u53d1\u51fa\u4e86\u5404\u79cd\u5148\u8fdb\u7684\u79d1\u6280\uff0c\u8bd5\u56fe\u627e\u5230\u4e00\u79cd\u80fd\u591f\u66ff\u4ee3\"\u661f\u9645\u4e4b\u773c\"\u7684\u65b0\u80fd\u91cf\u3002\u7ecf\u8fc7\u65e0\u6570\u6b21\u7684\u5931\u8d25\u548c\u632b\u6298\uff0c\u4eba\u7c7b\u7ec8\u4e8e\u6210\u529f\u5730\u627e\u5230\u4e86\u65b0\u7684\u80fd\u6e90\u3002\n\n\u968f\u7740\u65b0\u80fd\u6e90\u7684\u6295\u5165\uff0c\"\u661f\u9645\u4e4b\u773c\"\u518d\u6b21\u6062\u590d\u4e86\u6d3b\u529b\uff0c\u4eba\u7c7b\u7684\u5b87\u5b99\u63a2\u7d22\u53c8\u91cd\u65b0\u8fdb\u5165\u4e86\u9ad8\u6f6e\u3002\u4ed6\u4eec\u4e0d\u4ec5\u627e\u5230\u4e86\u66f4\u591a\u7684\u65b0\u661f\u7403\uff0c\u8fd8\u53d1\u73b0\u4e86\u66f4\u591a\u7684\u751f\u547d\u5f62\u5f0f\uff0c\u751a\u81f3\u53d1\u73b0\u4e86\u4e00\u4e9b\u4e0e\u5730\u7403\u76f8\u4f3c\u7684\u751f\u547d\u5f62\u5f0f\u3002\n\n\u6700\u540e\uff0c\u6770\u514b\u5e26\u9886\u4eba\u7c7b\u6210\u529f\u5730\u6539\u53d8\u4e86\u4e16\u754c\u3002\u4ed6\u4eec\u7528\u81ea\u5df1\u7684\u667a\u6167\u548c\u52c7\u6c14\uff0c\u521b\u9020\u4e86\u4e00\u4e2a\u5168\u65b0\u7684\u3001\u5145\u6ee1\u5e0c\u671b\u7684\u4e16\u754c\u3002\u4eba\u7c7b\u4e0d\u518d\u662f\u5b64\u72ec\u7684\u4e2a\u4f53\uff0c\u800c\u662f\u6210\u4e3a\u4e86\u5b87\u5b99\u7684\u4e00\u90e8\u5206\uff0c\u4e0e\u6240\u6709\u751f\u547d\u5171\u540c\u6784\u6210\u4e86\u4e00\u4e2a\u548c\u8c10\u7684\u5b87\u5b99\u3002\n\n\u8fd9\u5c31\u662f\"\u661f\u9645\u4e4b\u773c\"\u7684\u6545\u4e8b\uff0c\u4e00\u4e2a\u5173\u4e8e\u63a2\u7d22\u3001\u6311\u6218\u548c\u5e0c\u671b\u7684\u6545\u4e8b\u3002\u8fd9\u4e2a\u6545\u4e8b\u544a\u8bc9\u6211\u4eec\uff0c\u53ea\u8981\u6709\u68a6\u60f3\uff0c\u6709\u52c7\u6c14\uff0c\u5c31\u4e00\u5b9a\u80fd\u591f\u5b9e\u73b0\u81ea\u5df1\u7684\u76ee\u6807\u3002\nGenerated Tokens: 566\nTokens per second: 2.9105564016095413\nTotal Generation Time: 194.46453595161438 seconds\n\nreal    5m32.603s\nuser    3m43.533s\nsys     0m36.183s<\/pre><\/div>\n\n\n\n<h3 class=\"wp-block-heading\"><strong>3.2. 4B \u5355GPU<\/strong><\/h3>\n\n\n\n<div class=\"wp-block-urvanov-syntax-highlighter-code-block\"><pre class=\"lang:python decode:true \">from transformers import AutoModelForCausalLM, AutoTokenizer\nimport torch\nimport time\n\nMODEL_NAME = \"Qwen\/Qwen1.5-4B-Chat\"\n\n\n# \u68c0\u6d4b\u53ef\u7528\u7684GPU\u6570\u91cf\nNUM_GPUS = torch.cuda.device_count()\nprint(f\"NUM_GPUS: {NUM_GPUS}\")\n\n# \u5982\u679c\u6709\u53ef\u7528\u7684GPU\uff0c\u5219\u4f7f\u7528\u7b2c\u4e00\u4e2aGPU\uff1b\u5426\u5219\u4f7f\u7528CPU\ndevice = torch.device(\"cuda\") if NUM_GPUS &gt; 0 else torch.device(\"cpu\")\n\n# \u6839\u636e\u662f\u5426\u4f7f\u7528GPU\u8bbe\u7f6e\u6570\u636e\u7c7b\u578b\uff08\u534a\u7cbe\u5ea6\u6216\u5168\u7cbe\u5ea6\uff09\ndevice_dtype = torch.float16 if NUM_GPUS &gt; 0 else torch.float\n\n# \u83b7\u53d6\u8d77\u59cb\u65f6\u95f4\u6233\nstart_time = time.time()\n\nmodel = AutoModelForCausalLM.from_pretrained(MODEL_NAME,\n                                             trust_remote_code=True,\n                                             torch_dtype=device_dtype,\n                                             attn_implementation=\"flash_attention_2\",\n                                             device_map=\"cuda:0\")\n\n# \u52a0\u8f7d\u5206\u8bcd\u5668\u548c\u6a21\u578b\uff0c\u6307\u5b9a\u8bbe\u5907\u6620\u5c04\u548c\u6570\u636e\u7c7b\u578b\ntokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)\n\nmodel = model.eval()\n\nend_time = time.time()\nelapsed_time = end_time - start_time\nprint(f\"Load Model Time: {elapsed_time} seconds\")\n\nstart_time2 = time.time()\n\nprompt = \"\u5199\u4e00\u4e2a2\u4e07\u5b57\u7684\u6709\u5934\u6709\u5c3e\u7684\u77ed\u7bc7\u79d1\u5e7b\u5c0f\u8bf4.\"\nmessages = [\n    {\"role\": \"system\", \"content\": \"You are a helpful assistant.\"},\n    {\"role\": \"user\", \"content\": prompt}\n]\ntext = tokenizer.apply_chat_template(\n    messages,\n    tokenize=False,\n    add_generation_prompt=True\n)\nmodel_inputs = tokenizer([text], return_tensors=\"pt\").to(device)\n\ngenerated_ids = model.generate(\n    model_inputs.input_ids,\n    max_new_tokens=20000\n)\n\nend_time2 = time.time()\nelapsed_time2 = end_time2 - start_time2\n\nelapsed_time = end_time2 - start_time\nnum_tokens_generated = len(generated_ids[0]) - len(model_inputs.input_ids[0])\n\ntokens_per_second = num_tokens_generated \/ elapsed_time2\n\ngenerated_ids = [\n    output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)\n]\n\nresponse = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]\n\nprint(response)\n\nprint(f\"Generated Tokens: {num_tokens_generated}\")\nprint(f\"Tokens per second: {tokens_per_second}\")\nprint(f\"Total Generation Time: {elapsed_time2} seconds\")\n\n<\/pre><\/div>\n\n\n\n<p>\u8fd0\u884c\u7ed3\u679c\uff1a<\/p>\n\n\n\n<div class=\"wp-block-urvanov-syntax-highlighter-code-block\"><pre class=\"lang:python decode:true \"> time python test05-4B-2.py\nNUM_GPUS: 8\nLoading checkpoint shards: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 2\/2 [02:03&lt;00:00, 61.86s\/it]\nSpecial tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\nLoad Model Time: 127.5276780128479 seconds\n\u5bf9\u4e0d\u8d77\uff0c\u7531\u4e8e\u7bc7\u5e45\u9650\u5236\uff0c\u6211\u65e0\u6cd5\u4e3a\u60a8\u63d0\u4f9b\u4e00\u4e2a\u5b8c\u6574\u76842\u4e07\u5b57\u7684\u79d1\u5e7b\u5c0f\u8bf4\u3002\u4f46\u6211\u53ef\u4ee5\u4e3a\u60a8\u5217\u51fa\u4e00\u4e2a\u5927\u81f4\u7684\u60c5 \u8282\u5927\u7eb2\u548c\u5f00\u5934\u90e8\u5206\uff1a\n\n\u6807\u9898\uff1a\u661f\u5c18\u4e2d\u7684\u6551\u8d4e\n\n\u4e00\u3001\u5f00\u5934\n\n\u5728\u4e00\u9897\u9065\u8fdc\u7684\u661f\u7403\u4e0a\uff0c\u79d1\u5b66\u5bb6\u4eec\u53d1\u73b0\u4e86\u4e00\u79cd\u80fd\u591f\u5b9e\u73b0\u661f\u9645\u65c5\u884c\u7684\u65b0\u6280\u672f\u2014\u2014\"\u5149\u5b50\u5f15\u64ce\"\u3002\n\n\u4e8c\u3001\u60c5\u8282\u53d1\u5c55\n\n\u79d1\u5b66\u5bb6\u4eec\u5229\u7528\u5149\u5b50\u5f15\u64ce\u63a2\u7d22\u5b87\u5b99\uff0c\u5bfb\u627e\u53ef\u80fd\u5b58\u5728\u7684\u5916\u661f\u751f\u547d\u3002\u7136\u800c\uff0c\u5728\u4e00\u6b21\u5192\u9669\u4e2d\uff0c\u4ed6\u4eec\u610f\u5916\u649e\u5165\u4e86\u4e00\u4e2a\u672a\u77e5\u7684\u9ed1\u6d1e\u3002\n\n\u4e09\u3001\u8f6c\u6298\u70b9\n\n\u5f53\u4ed6\u4eec\u7684\u98de\u8239\u5728\u9ed1\u6d1e\u8fb9\u7f18\u51fa\u73b0\u65f6\uff0c\u4ed6\u4eec\u88ab\u5438\u5165\u4e86\u9ed1\u6d1e\uff0c\u6210\u4e3a\u4e86\u4e00\u7fa4\u88ab\u56f0\u5728\u8fd9\u4e2a\u5b87\u5b99\u9ed1\u6697\u4e16\u754c\u4e2d\u7684\u5e78\u5b58\u8005\u3002\n\n\u56db\u3001\u9ad8\u6f6e\n\n\u4ed6\u4eec\u8bd5\u56fe\u627e\u5230\u9003\u8131\u9ed1\u6d1e\u7684\u65b9\u6cd5\uff0c\u4f46\u662f\u8fd9\u4e2a\u8fc7\u7a0b\u5145\u6ee1\u4e86\u56f0\u96be\u548c\u6311\u6218\u3002\u5728\u8fd9\u671f\u95f4\uff0c\u4ed6\u4eec\u53d1\u73b0\u4e86\u8bb8\u591a\u5173\u4e8e\u8fd9\u4e2a\u9ed1\u6d1e\u7684\u79d8\u5bc6\uff0c\u751a\u81f3\u53ef\u80fd\u63ed\u793a\u51fa\u4e00\u4e2a\u66f4\u5927\u7684\u5b87\u5b99\u79d8\u5bc6\u3002\n\n\u4e94\u3001\u7ed3\u5c40\n\n\u5728\u7ecf\u5386\u4e86\u65e0\u6570\u7684\u632b\u6298\u548c\u56f0\u96be\u540e\uff0c\u4ed6\u4eec\u6700\u7ec8\u627e\u5230\u4e86\u9003\u8131\u9ed1\u6d1e\u7684\u65b9\u6cd5\uff0c\u5e76\u6210\u529f\u5730\u56de\u5230\u4e86\u81ea\u5df1\u7684\u661f\u7403\u3002\u867d\u7136\u4ed6\u4eec\u5931\u53bb\u4e86\u8bb8\u591a\u5b9d\u8d35\u7684\u65f6\u95f4\uff0c\u4f46\u4ed6\u4eec\u4e5f\u4ece\u8fd9\u6b21\u7ecf\u5386\u4e2d\u5b66\u5230\u4e86\u8bb8\u591a\u5b9d\u8d35\u7684\u77e5\u8bc6\u548c\u7ecf\u9a8c\u3002\n\n\u8fd9\u662f\u4e00\u4e2a\u5927\u6982\u7684\u60c5\u8282\u6897\u6982\uff0c\u60a8\u53ef\u4ee5\u6839\u636e\u8fd9\u4e2a\u6897\u6982\u5f00\u59cb\u7f16\u5199\u60a8\u7684\u5c0f\u8bf4\u3002\u6211\u5e0c\u671b\u8fd9\u80fd\u5bf9\u60a8\u6709\u6240\u5e2e\u52a9\uff01\nGenerated Tokens: 248\nTokens per second: 23.96475511752906\nTotal Generation Time: 10.348530530929565 seconds\n\nreal    2m20.750s\nuser    0m42.449s\nsys     0m32.544s<\/pre><\/div>\n\n\n\n<h2 class=\"wp-block-heading\"><strong>4. 7B\u7684\u6a21\u578b<\/strong><\/h2>\n\n\n\n<div class=\"wp-block-urvanov-syntax-highlighter-code-block\"><pre class=\"lang:python decode:true \">from transformers import AutoModelForCausalLM, AutoTokenizer\nimport torch\n\nMODEL_NAME = \"Qwen\/Qwen1.5-7B-Chat\"\n\n# \u5b9a\u4e49\u4e00\u4e2a\u51fd\u6570\u6765\u81ea\u52a8\u914d\u7f6e\u5728\u591aGPU\u73af\u5883\u4e0b\u6a21\u578b\u5404\u90e8\u5206\u7684\u8bbe\u5907\u5206\u5e03\ndef auto_configure_device_map(num_gpus: int):\n    num_trans_layers = 32  # \u5b9a\u4e49Transformer\u6a21\u578b\u7684\u5c42\u6570\n    per_gpu_layers = num_trans_layers \/ num_gpus  # \u8ba1\u7b97\u6bcf\u4e2aGPU\u5e94\u627f\u62c5\u7684\u5c42\u6570\n    # \u521d\u59cb\u5316\u8bbe\u5907\u6620\u5c04\u5b57\u5178\uff0c\u6307\u5b9a\u4e00\u4e9b\u7279\u5b9a\u6a21\u5757\u5e94\u8be5\u653e\u7f6e\u7684GPU\u7f16\u53f7\n    device_map = {\n        'model.embed_tokens': 0,  # \u5d4c\u5165\u5c42\u653e\u5728\u7b2c\u4e00\u4e2aGPU\u4e0a\n        'model.norm': num_gpus-1,  # \u6700\u540e\u4e00\u4e2a\u6b63\u5219\u5316\u5c42\u653e\u5728\u6700\u540e\u4e00\u4e2aGPU\u4e0a\n        'lm_head': 0  # \u8bed\u8a00\u6a21\u578b\u5934\uff08\u7528\u4e8e\u9884\u6d4b\u4e0b\u4e00\u4e2a\u8bcd\u7684\u5c42\uff09\u653e\u5728\u6700\u540e\u4e00\u4e2aGPU\u4e0a\n    }\n    # \u5c06Transformer\u6a21\u578b\u7684\u6bcf\u4e00\u5c42\u5206\u914d\u7ed9\u4e00\u4e2aGPU\n    for i in range(num_trans_layers):\n        device_map[f'model.layers.{i}'] = int(i\/\/per_gpu_layers)\n    return device_map\n\n# \u68c0\u6d4b\u53ef\u7528\u7684GPU\u6570\u91cf\nNUM_GPUS = torch.cuda.device_count()\nprint(f\"NUM_GPUS: {NUM_GPUS}\")\n\n# \u5982\u679c\u6709\u53ef\u7528\u7684GPU\uff0c\u5219\u57fa\u4e8eGPU\u6570\u91cf\u81ea\u52a8\u914d\u7f6e\u8bbe\u5907\u6620\u5c04\uff1b\u5426\u5219\u4e0d\u4f7f\u7528\u8bbe\u5907\u6620\u5c04\ndevice_map = auto_configure_device_map(NUM_GPUS) if NUM_GPUS &gt; 0 else None\n\n# \u5982\u679c\u6709\u53ef\u7528\u7684GPU\uff0c\u5219\u4f7f\u7528\u7b2c\u4e00\u4e2aGPU\uff1b\u5426\u5219\u4f7f\u7528CPU\ndevice = torch.device(\"cuda\") if NUM_GPUS &gt; 0 else torch.device(\"cpu\")\n\n# \u6839\u636e\u662f\u5426\u4f7f\u7528GPU\u8bbe\u7f6e\u6570\u636e\u7c7b\u578b\uff08\u534a\u7cbe\u5ea6\u6216\u5168\u7cbe\u5ea6\uff09\ndevice_dtype = torch.float16 if NUM_GPUS &gt; 0 else torch.float\n\nmodel = AutoModelForCausalLM.from_pretrained(MODEL_NAME,\n                                             trust_remote_code=True,\n                                             torch_dtype=device_dtype,\n                                             attn_implementation=\"flash_attention_2\",\n                                             device_map=device_map)\n\n# \u52a0\u8f7d\u5206\u8bcd\u5668\u548c\u6a21\u578b\uff0c\u6307\u5b9a\u8bbe\u5907\u6620\u5c04\u548c\u6570\u636e\u7c7b\u578b\ntokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)\n\nmodel = model.eval()\n\nprompt = \"\u5199\u4e00\u4e2a2\u4e07\u5b57\u7684\u6709\u5934\u6709\u5c3e\u7684\u77ed\u7bc7\u79d1\u5e7b\u5c0f\u8bf4.\"\nmessages = [\n    {\"role\": \"system\", \"content\": \"You are a helpful assistant.\"},\n    {\"role\": \"user\", \"content\": prompt}\n]\ntext = tokenizer.apply_chat_template(\n    messages,\n    tokenize=False,\n    add_generation_prompt=True\n)\nmodel_inputs = tokenizer([text], return_tensors=\"pt\").to(device)\n\ngenerated_ids = model.generate(\n    model_inputs.input_ids,\n    max_new_tokens=20000\n)\ngenerated_ids = [\n    output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)\n]\n\nresponse = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]\n\nprint(response)\n<\/pre><\/div>\n\n\n\n<p>\u8fd0\u884c\u7ed3\u679c\u5982\u4e0b\uff1a<\/p>\n\n\n\n<div class=\"wp-block-urvanov-syntax-highlighter-code-block\"><pre class=\"lang:python decode:true \">time python test05-7B.py\nNUM_GPUS: 8\nLoading checkpoint shards: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 4\/4 [04:33&lt;00:00, 68.44s\/it]\nSpecial tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n\u6807\u9898\uff1a\u661f\u9645\u5c18\u57c3\u7684\u56de\u54cd\n\n\u5728\u9065\u8fdc\u7684\u672a\u6765\uff0c\u5730\u7403\u5df2\u7ecf\u6b65\u5165\u4e86\u79d1\u6280\u9ad8\u5ea6\u53d1\u8fbe\u7684\u65b0\u7eaa\u5143\u3002\u4eba\u7c7b\u4e0d\u518d\u5c40\u9650\u4e8e\u84dd\u8272\u661f\u7403\uff0c\u4ed6\u4eec\u5f00\u62d3\u4e86\u5b87\u5b99\uff0c\u5efa\u7acb\u4e86\u661f\u9645\u8054\u76df\uff0c\u63a2\u7d22\u672a\u77e5\uff0c\u5bfb\u627e\u65b0\u7684\u751f\u5b58\u7a7a\u95f4\u3002\u5728\u8fd9\u4e2a\u5b8f\u5927\u7684\u80cc\u666f\u4e0b\uff0c\u6211\u4eec\u7684\u6545\u4e8b\u5c31\u5728\u4e00\u9897\u540d\u4e3a\u201c\u8d5b\u535a\u5229\u4e9a\u201d\u7684\u6b96\u6c11\u661f\u7403\u5c55\u5f00\u3002\n\n\u6545\u4e8b\u7684\u4e3b\u89d2\uff0c\u827e\u767b\u00b7\u54c8\u7279\uff0c\u662f\u4e00\u4f4d\u5e74\u8f7b\u7684\u661f\u9645\u63a2\u9669\u5bb6\uff0c\u4ed6\u51fa\u751f\u5e76\u6210\u957f\u4e8e\u5730\u7403\uff0c\u4f46\u5bf9\u90a3\u9897\u84dd\u8272\u661f\u7403\u7684\u8bb0\u5fc6\u5374\u5982\u540c\u9065\u8fdc\u7684\u7ae5\u5e74\u68a6 \u3002\u4ed6\u7684\u68a6\u60f3\u662f\u627e\u5230\u4f20\u8bf4\u4e2d\u7684\"\u65f6\u95f4\u4e4b\u6e90\"\u2014\u2014\u4e00\u79cd\u53ef\u4ee5\u4fee\u590d\u548c\u5ef6\u957f\u751f\u547d\u7684\u795e\u79d8\u80fd\u6e90\uff0c\u4ee5\u62ef\u6551\u56e0\u8d44\u6e90\u67af\u7aed\u800c\u6fd2\u4e34\u5d29\u6e83\u7684\u5730\u7403\u3002\n\n\u8d5b\u535a\u5229\u4e9a\uff0c\u4e00\u9897\u88ab\u5e9f\u5f03\u7684\u77ff\u661f\uff0c\u66fe\u662f\u5730\u7403\u8d44\u6e90\u7684\u5b9d\u5e93\uff0c\u4f46\u73b0\u5728\uff0c\u5b83\u6ee1\u76ee\u75ae\u75cd\uff0c\u751f\u6001\u7cfb\u7edf\u4e25\u91cd\u53d7\u635f\u3002\u827e\u767b\u53d7\u547d\u5e26\u9886\u4e00\u652f\u5c0f\u578b\u63a2\u7d22\u961f\u6765\u5230\u8fd9\u91cc\uff0c\u4ed6\u4eec\u7684\u4efb\u52a1\u662f\u63ed\u5f00\u8fd9\u9897\u661f\u7403\u7684\u79d8\u5bc6\uff0c\u5bfb\u627e\u53ef\u80fd\u5b58\u5728\u7684\"\u65f6\u95f4\u4e4b\u6e90\"\u3002\n\n\u961f\u4f0d\u7531\u7ecf\u9a8c\u4e30\u5bcc\u7684\u8001\u961f\u957f\u8389\u4e9a\u3001\u6280\u672f\u7cbe\u6e5b\u7684\u79d1\u5b66\u5bb6\u51ef\u745f\u7433\u3001\u4ee5\u53ca\u52c7\u6562\u65e0\u754f\u7684\u673a\u68b0\u5e08\u4e9a\u5f53\u7ec4\u6210\u3002\u4ed6\u4eec\u5728\u8d5b\u535a\u5229\u4e9a\u7684\u5e9f\u589f\u4e2d\u53d1\u73b0\u4e86\u4e00\u5ea7\u53e4\u8001\u7684\u9057\u8ff9\uff0c\u5176\u4e0a\u7684\u58c1\u753b\u63cf\u7ed8\u4e86\u4e00\u4e2a\u795e\u79d8\u7684\u5faa\u73af\u56fe\u6848\uff0c\u4f3c\u4e4e\u6697\u793a\u7740\u65f6\u95f4\u7684\u5965\u79d8\u3002\n\n\u968f\u7740\u6df1\u5165\u7814\u7a76\uff0c\u4ed6\u4eec\u53d1\u73b0\u8fd9\u4e2a\u661f\u7403\u66fe\u906d\u53d7\u8fc7\u4e00\u573a\u707e\u96be\u6027\u7684\u65f6\u7a7a\u626d\u66f2\uff0c\u5bfc\u81f4\u65f6\u95f4\u6d41\u901f\u5f02\u5e38\uff0c\u751f\u7269\u5bff\u547d\u7f29\u77ed\u3002\u800c\u90a3\u4e2a\"\u65f6\u95f4\u4e4b\u6e90\"\uff0c\u6b63\u662f\u4fee\u590d\u8fd9\u79cd\u5931\u8861\u7684\u5173\u952e\u3002\u7136\u800c\uff0c\u9057\u8ff9\u7684\u5b88\u62a4\u8005\uff0c\u4e00\u53ea\u53e4\u8001\u667a\u6167\u7684\u673a\u68b0\u751f\u547d\u4f53\uff0c\u8b66\u544a\u4ed6\u4eec\uff0c\u4efb\u4f55\u8bd5\u56fe\u6539\u53d8\u65f6\u95f4\u7684\u884c\u4e3a\u90fd\u53ef\u80fd\u5bfc\u81f4\u65e0\u6cd5\u9884\u6599\u7684\u540e\u679c\u3002\n\n\u5728\u7d27\u5f20\u7684\u6289\u62e9\u4e2d\uff0c\u827e\u767b\u51b3\u5b9a\u5192\u9669\uff0c\u4ed6\u76f8\u4fe1\u53ea\u6709\u901a\u8fc7\u7406\u89e3\u548c\u5c0a\u91cd\u81ea\u7136\uff0c\u624d\u80fd\u627e\u5230\u771f\u6b63\u7684\u6551\u8d4e\u3002\u4ed6\u4e0e\u673a\u68b0\u751f\u547d\u4f53\u8fdb\u884c\u4e86\u4e00\u573a\u667a\u6167\u7684\u535a\u5f08\uff0c\u6700\u7ec8\u8d62\u5f97\u4e86\u5b83\u7684\u4fe1\u4efb\uff0c\u5e76\u6210\u529f\u542f\u52a8\u4e86\"\u65f6\u95f4\u7a33\u5b9a\u5668\"\uff0c\u7a33\u5b9a\u4e86\u8d5b\u535a\u5229\u4e9a\u7684\u65f6\u95f4\u6d41\u901f\u3002\n\n\u4fee\u590d\u540e\u7684\u8d5b\u535a\u5229\u4e9a\u5f00\u59cb\u6062\u590d\u751f\u673a\uff0c\u751f\u7269\u79cd\u7c7b\u7e41\u591a\uff0c\u7eff\u8272\u91cd\u65b0\u8986\u76d6\u5927\u5730\u3002\u827e\u767b\u548c\u4ed6\u7684\u56e2\u961f\u6210\u4e3a\u4e86\u82f1\u96c4\uff0c\u4ed6\u4eec\u7684\u4e8b\u8ff9\u88ab\u661f\u9645\u8054\u76df\u5e7f\u4e3a\u4f20\u9882\u3002\u4ed6\u4eec\u5e26\u7740\u8d5b\u535a\u5229\u4e9a\u7684\u91cd\u751f\u79d8\u5bc6\uff0c\u56de\u5230\u4e86\u5730\u7403\uff0c\u4e3a\u4eba\u7c7b\u7684\u672a\u6765\u5e26\u6765\u4e86\u5e0c\u671b\u3002\n\n\u7136\u800c\uff0c\u6545\u4e8b\u5e76\u672a\u7ed3\u675f\uff0c\u827e\u767b\u660e\u767d\uff0c\u4ed6\u4eec\u7684\u4f7f\u547d\u662f\u6c38\u6052\u7684\uff0c\u4ed6\u4eec\u5c06\u7ee7\u7eed\u5728\u661f\u8fb0\u5927\u6d77\u4e2d\u63a2\u7d22\uff0c\u5bfb\u627e\u66f4\u591a\u7684\u7b54\u6848\uff0c\u4fdd\u62a4\u8fd9\u4e2a\u5b87\u5b99\u7684\u5e73\u8861\u3002\"\u65f6\u95f4\u4e4b\u6e90\"\u7684\u6545\u4e8b\uff0c\u5c31\u50cf\u4e00\u9996\u672a\u5b8c\u7684\u4ea4\u54cd\u66f2\uff0c\u56de\u8361\u5728\u65e0\u5c3d\u7684\u5b87\u5b99\u4e2d\uff0c\u6fc0\u52b1\u7740\u6bcf\u4e00\u4e2a\u52c7\u5f80\u76f4\u524d\u7684\u63a2\u9669\u8005\u3002\n\n\u8fd9\u5c31\u662f\u300a\u661f\u9645\u5c18\u57c3\u7684\u56de\u54cd\u300b\u2014\u2014\u4e00\u4e2a\u5173\u4e8e\u52c7\u6c14\u3001\u667a\u6167\u548c\u7231\u7684\u79d1\u5e7b\u77ed\u7bc7\uff0c\u8bb2\u8ff0\u4e86\u4eba\u7c7b\u5728\u9762\u5bf9\u56f0\u5883\u65f6\u7684\u575a\u97e7\u4e0e\u51b3\u5fc3\uff0c\u4ee5\u53ca\u5bf9\u672a\u77e5\u4e16\u754c\u7684\u65e0\u5c3d\u63a2\u7d22\u3002\n\nreal    9m42.229s\nuser    5m38.796s\nsys     1m7.439s<\/pre><\/div>\n\n\n\n<p>\u4ece\u4e0a\u9762\u7ed3\u679c\u6765\u770b\uff0c<\/p>\n\n\n\n<p>\u603b\u82b1\u8d39\u65f6\u95f4\u4e3a\uff1a9:42<\/p>\n\n\n\n<p>\u6a21\u578b\u52a0\u8f7d\u65f6\u95f4\u4e3a4:33<\/p>\n\n\n\n<p>5\u5206\u949f\u5185\u751f\u6210\u4e86809\u4e2a\u6c49\u5b57\uff0c\u6bcf\u79d2\u5927\u7ea62.6\u4e2a\u6c49\u5b57\uff0c\u901f\u5ea6\u8fd8\u53ef\u4ee5<\/p>\n\n\n\n<h2 class=\"wp-block-heading\"><strong>5. 7B \u7684\u6a21\u578b device_map \u4f7f\u7528 auto \u65b9\u5f0f<\/strong><\/h2>\n\n\n\n<p>auto \u65b9\u5f0f\u4ecd\u7136\u662f\u4f7f\u7528\u591a\u4e2aGPU\uff0c\u53ea\u662f\u5206\u7684\u6bd4\u8f83\u5e73\u5747<\/p>\n\n\n\n<div class=\"wp-block-urvanov-syntax-highlighter-code-block\"><pre class=\"lang:python decode:true \">from transformers import AutoModelForCausalLM, AutoTokenizer\nimport torch\n\nMODEL_NAME = \"Qwen\/Qwen1.5-7B-Chat\"\n\n# \u68c0\u6d4b\u53ef\u7528\u7684GPU\u6570\u91cf\nNUM_GPUS = torch.cuda.device_count()\nprint(f\"NUM_GPUS: {NUM_GPUS}\")\n\n# \u5982\u679c\u6709\u53ef\u7528\u7684GPU\uff0c\u5219\u4f7f\u7528\u7b2c\u4e00\u4e2aGPU\uff1b\u5426\u5219\u4f7f\u7528CPU\ndevice = torch.device(\"cuda\") if NUM_GPUS &gt; 0 else torch.device(\"cpu\")\n\n# \u6839\u636e\u662f\u5426\u4f7f\u7528GPU\u8bbe\u7f6e\u6570\u636e\u7c7b\u578b\uff08\u534a\u7cbe\u5ea6\u6216\u5168\u7cbe\u5ea6\uff09\ndevice_dtype = torch.float16 if NUM_GPUS &gt; 0 else torch.float\n\nmodel = AutoModelForCausalLM.from_pretrained(MODEL_NAME,\n                                             trust_remote_code=True,\n                                             torch_dtype=device_dtype,\n                                             attn_implementation=\"flash_attention_2\",\n                                             device_map=\"auto\")\n\n# \u52a0\u8f7d\u5206\u8bcd\u5668\u548c\u6a21\u578b\uff0c\u6307\u5b9a\u8bbe\u5907\u6620\u5c04\u548c\u6570\u636e\u7c7b\u578b\ntokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)\n\nmodel = model.eval()\n\nprompt = \"\u5199\u4e00\u4e2a2\u4e07\u5b57\u7684\u6709\u5934\u6709\u5c3e\u7684\u77ed\u7bc7\u79d1\u5e7b\u5c0f\u8bf4.\"\nmessages = [\n    {\"role\": \"system\", \"content\": \"You are a helpful assistant.\"},\n    {\"role\": \"user\", \"content\": prompt}\n]\ntext = tokenizer.apply_chat_template(\n    messages,\n    tokenize=False,\n    add_generation_prompt=True\n)\nmodel_inputs = tokenizer([text], return_tensors=\"pt\").to(device)\n\ngenerated_ids = model.generate(\n    model_inputs.input_ids,\n    max_new_tokens=20000\n)\ngenerated_ids = [\n    output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)\n]\n\nresponse = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]\n\nprint(response)\n<\/pre><\/div>\n\n\n\n<p>\u8fd0\u884c\u7ed3\u679c\u5982\u4e0b\uff1a<\/p>\n\n\n\n<div class=\"wp-block-urvanov-syntax-highlighter-code-block\"><pre class=\"lang:python decode:true \">time python test05-7B-2.py\nNUM_GPUS: 8\nLoading checkpoint shards: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 4\/4 [03:50&lt;00:00, 57.51s\/it]\nSpecial tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n\u6807\u9898\uff1a\u661f\u9645\u8ff7\u822a\uff1a\u672a\u77e5\u7684\u8109\u52a8\n\n\u5728\u9065\u8fdc\u7684\u672a\u6765\uff0c\u4eba\u7c7b\u5df2\u7ecf\u638c\u63e1\u4e86\u661f\u9645\u65c5\u884c\uff0c\u63a2\u7d22\u5b87\u5b99\u6210\u4e3a\u4e86\u65e5\u5e38\u751f\u6d3b\u7684\u4e00\u90e8\u5206\u3002\u6545\u4e8b\u53d1\u751f\u5728\u4e00\u4e2a\u540d\u4e3a\"\u8bfa \u5170\u661f\u4e91\"\u7684\u795e\u79d8\u661f\u7cfb\uff0c\u8fd9\u91cc\u9690\u85cf\u7740\u4e00\u4e2a\u88ab\u9057\u5fd8\u7684\u79d8\u5bc6\u3002\n\n\u4e3b\u89d2\uff0c\u827e\u767b\u00b7\u67ef\u6797\u65af\uff0c\u662f\u4e00\u4f4d\u5e74\u8f7b\u7684\u661f\u9645\u63a2\u9669\u5bb6\uff0c\u4ed6\u7684\u7236\u4eb2\u66fe\u662f\u8457\u540d\u7684\u5b87\u822a\u5458\uff0c\u56e0\u4e00\u6b21\u795e\u79d8\u4efb\u52a1\u5931\u8e2a\uff0c\u7559 \u4e0b\u4e86\u4e00\u672c\u5c18\u5c01\u7684\u65e5 \u8bb0\uff0c\u91cc\u9762\u8bb0\u8f7d\u4e86\u5173\u4e8e\u201c\u8109\u52a8\u661f\u7403\u201d\u7684\u7ebf\u7d22\u3002\u8fd9\u4e2a\u661f\u7403\u636e\u8bf4\u62e5\u6709\u8d85\u8d8a\u79d1\u6280\u7684\u80fd\u91cf\u6e90\uff0c\u5438\u5f15 \u4e86\u5168\u7403\u79d1\u5b66\u5bb6\u7684\u76ee\u5149\uff0c\u5374\u56e0\u5176\u6781\u7aef\u73af\u5883\u800c\u65e0\u4eba\u80fd\u89e6\u53ca\u3002\n\n\u827e\u767b\u51b3\u5b9a\u6311\u6218\u81ea\u6211\uff0c\u5e26\u9886\u4e00\u652f\u7531\u5404\u9886\u57df\u7cbe\u82f1\u7ec4\u6210\u7684\u79d1\u7814\u56e2\u961f\uff0c\u8e0f\u4e0a\u4e86\u5bfb\u627e\u201c\u8109\u52a8\u661f\u7403\u201d\u7684\u65c5\u7a0b\u3002\u4ed6\u4eec\u7684\u98de\u8239\u2014\u2014\u201c\u66d9\u5149\u53f7\u201d\uff0c\u5728\u6d69\u6e3a\u7684\u661f\u6d77\u4e2d\u7a7f\u884c\uff0c\u906d\u9047\u4e86\u672a\u77e5\u7684\u9ed1\u6d1e\u3001\u5916\u661f\u751f\u7269\u548c\u6076\u52a3\u7684\u6c14\u5019\uff0c\u4f46\u4ed6\u4eec\u59cb\u7ec8\u4fdd\u6301\u4fe1\u5ff5\uff0c\u52c7\u5f80\u76f4\u524d\u3002\n\n\u5728\u4e00\u6b21\u610f\u5916\u4e2d\uff0c\u4ed6\u4eec\u53d1\u73b0\u4e86\u4e00\u4e2a\u770b\u4f3c\u8352\u829c\u7684\u661f\u7403\u2014\u2014\u6cf0\u5766\u3002\u8fd9\u91cc\u7684\u73af\u5883\u6bd4\u60f3\u8c61\u4e2d\u66f4\u4e3a\u6076\u52a3\uff0c\u4f46\u827e\u767b\u53d1\u73b0\u661f\u7403\u8868\u9762\u7684\u5947\u7279\u7eb9\u7406\u4e0e\u7236\u4eb2\u65e5\u8bb0\u4e2d\u7684\u63cf\u8ff0\u60ca\u4eba\u76f8\u4f3c\u3002\u4ed6\u575a\u4fe1\u8fd9\u5c31\u662f\u201c\u8109\u52a8\u661f\u7403\u201d\u3002\n\n\u56e2\u961f\u6210\u5458\u4eec\u5f00\u59cb\u6df1\u5165\u7814\u7a76\uff0c\u4ed6\u4eec\u53d1\u73b0\u6cf0\u5766\u7684\u571f\u58e4\u8574\u542b\u7740\u4e00\u79cd\u524d\u6240\u672a\u89c1\u7684\u5143\u7d20\uff0c\u8fd9\u79cd\u5143\u7d20\u91ca\u653e\u51fa\u5fae\u5f31\u4f46\u7a33\u5b9a\u7684\u6ce2\u52a8\uff0c\u4f3c\u4e4e\u5c31\u662f\u4f20\u8bf4\u4e2d\u7684\u80fd\u91cf\u6e90\u3002\u7136\u800c\uff0c\u8fd9\u79cd\u6ce2\u52a8\u4e5f\u5f15\u6765\u4e86\u4e00\u79cd\u540d\u4e3a\u201c\u6697\u5f71\u751f\u7269\u201d\u7684\u5a01\u80c1\uff0c\u5b83\u4eec\u5bf9\u8fd9\u79cd\u6ce2\u52a8\u5f02\u5e38\u654f\u611f\uff0c\u4f01\u56fe\u541e\u566c\u4e00\u5207\u3002\n\n\u827e\u767b\u51b3\u5b9a\u5192\u9669\u8fdb\u5165\u6cf0\u5766\u7684\u6838\u5fc3\u5730\u5e26\uff0c\u5e0c\u671b\u80fd\u627e\u5230\u63a7\u5236\u6ce2\u52a8\u7684\u5173\u952e\u3002\u4ed6\u4e0e\u56e2\u961f\u6210\u5458\u5e76\u80a9\u4f5c\u6218\uff0c\u9762\u5bf9\u751f\u6b7b\u8003\u9a8c\u3002\u5728\u751f\u6b7b\u8fb9\u7f18\uff0c\u4ed6\u4eec\u53d1\u73b0\u4e86\u6697\u5f71\u751f\u7269\u7684\u5f31\u70b9\uff0c\u539f\u6765\u5b83\u4eec\u60e7\u6015\u5149\uff0c\u800c\u6cf0\u5766\u6838\u5fc3\u6df1\u5904\u6b63\u662f\u6700\u5f3a\u70c8\u7684\u5149\u6e90\u3002\n\n\u7ecf\u8fc7\u4e00\u573a\u60ca\u5fc3\u52a8\u9b44\u7684\u51b3\u6218\uff0c\u827e\u767b\u6210\u529f\u6fc0\u6d3b\u4e86\u6838\u5fc3\u7684\u80fd\u91cf\uff0c\u6ce2\u52a8\u9010\u6e10\u7a33\u5b9a\u4e0b\u6765\u3002\u4ed6\u5e26\u7740\u5b9d\u8d35\u7684\u6837\u672c\u8fd4\u56de\u5730\u7403\uff0c\u4eba\u7c7b\u793e\u4f1a\u56e0\u6b64\u8fce\u6765\u4e86\u65b0\u7684\u79d1\u6280\u9769\u547d\uff0c\u75be\u75c5\u3001\u80fd\u6e90\u95ee\u9898\u5f97\u5230\u4e86\u524d\u6240\u672a\u6709\u7684\u89e3\u51b3\u3002\n\n\u7136\u800c\uff0c\u827e\u767b\u5e76\u672a\u505c\u4e0b\u811a\u6b65\uff0c\u4ed6\u7ee7\u7eed\u63a2\u7d22\u5b87\u5b99\uff0c\u5bfb\u627e\u66f4\u591a\u672a\u77e5\u7684\u5965\u79d8\u3002\u4ed6\u7684\u6545\u4e8b\u6fc0\u52b1\u7740\u4e00\u4ee3\u53c8\u4e00\u4ee3\u7684\u661f\u9645\u63a2\u9669\u8005\uff0c\u4ed6\u4eec\u5c06\u7236\u4eb2\u7684\u52c7\u6c14\u548c\u667a\u6167\u4f20\u627f\u4e0b\u53bb\uff0c\u7ee7\u7eed\u5728\u65e0\u5c3d\u7684\u661f\u6d77\u4e2d\u5bfb\u627e\u751f\u547d\u7684\u8d77\u6e90\u548c\u5b87\u5b99\u7684\u771f\u8c1b\u3002\n\n\u300a\u661f\u9645\u8ff7\u822a\uff1a\u672a\u77e5\u7684\u8109\u52a8\u300b\u4ee5\u827e\u767b\u7684\u5192\u9669\u4e3a\u7ebf\u7d22\uff0c\u8bb2\u8ff0\u4e86\u79d1\u6280\u3001\u53cb\u60c5\u548c\u727a\u7272\u7684\u529b\u91cf\uff0c\u63cf\u7ed8\u4e86\u4e00\u4e2a\u5145\u6ee1\u5e0c\u671b\u4e0e\u6311\u6218\u7684\u672a\u6765\u4e16\u754c\u3002\n\nreal    8m51.094s\nuser    5m25.063s\nsys     1m7.385s<\/pre><\/div>\n\n\n\n<p>\u4ece\u4e0a\u9762\u7ed3\u679c\u6765\u770b\uff0c<\/p>\n\n\n\n<p>\u603b\u82b1\u8d39\u65f6\u95f4\u4e3a\uff1a8:51<\/p>\n\n\n\n<p>\u6a21\u578b\u52a0\u8f7d\u65f6\u95f4\u4e3a3:50<\/p>\n\n\n\n<p>5\u5206\u949f\u5185\u751f\u6210\u4e86840\u4e2a\u6c49\u5b57\uff0c\u6bcf\u79d2\u5927\u7ea62.8\u4e2a\u6c49\u5b57\uff0c\u901f\u5ea6\u6bd4\u524d\u9762\u5feb\u4e00\u70b9<\/p>\n\n\n\n<h2 class=\"wp-block-heading\"><strong>6. 7B \u7684\u6a21\u578b \u53ea\u4f7f\u7528\u4e00\u4e2a GPU<\/strong><\/h2>\n\n\n\n<p>7B \u7684\u6a21\u578b\u53ea\u898114G\u5de6\u53f3\uff0c\u6240\u4ee5\u4f7f\u7528\u4e00\u4e2a24G\u7684 RTX 4090 \u662f\u53ef\u4ee5\u88c5\u8f7d\u7684<\/p>\n\n\n\n<div class=\"wp-block-urvanov-syntax-highlighter-code-block\"><pre class=\"lang:python decode:true \">from transformers import AutoModelForCausalLM, AutoTokenizer\nimport torch\n\nMODEL_NAME = \"Qwen\/Qwen1.5-7B-Chat\"\n\n# \u68c0\u6d4b\u53ef\u7528\u7684GPU\u6570\u91cf\nNUM_GPUS = torch.cuda.device_count()\nprint(f\"NUM_GPUS: {NUM_GPUS}\")\n\n# \u5982\u679c\u6709\u53ef\u7528\u7684GPU\uff0c\u5219\u4f7f\u7528\u7b2c\u4e00\u4e2aGPU\uff1b\u5426\u5219\u4f7f\u7528CPU\ndevice = torch.device(\"cuda\") if NUM_GPUS &gt; 0 else torch.device(\"cpu\")\n\n# \u6839\u636e\u662f\u5426\u4f7f\u7528GPU\u8bbe\u7f6e\u6570\u636e\u7c7b\u578b\uff08\u534a\u7cbe\u5ea6\u6216\u5168\u7cbe\u5ea6\uff09\ndevice_dtype = torch.float16 if NUM_GPUS &gt; 0 else torch.float\n\nmodel = AutoModelForCausalLM.from_pretrained(MODEL_NAME,\n                                             trust_remote_code=True,\n                                             torch_dtype=device_dtype,\n                                             attn_implementation=\"flash_attention_2\",\n                                             device_map=\"cuda:0\")\n\n# \u52a0\u8f7d\u5206\u8bcd\u5668\u548c\u6a21\u578b\uff0c\u6307\u5b9a\u8bbe\u5907\u6620\u5c04\u548c\u6570\u636e\u7c7b\u578b\ntokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)\n\nmodel = model.eval()\n\nprompt = \"\u5199\u4e00\u4e2a2\u4e07\u5b57\u7684\u6709\u5934\u6709\u5c3e\u7684\u77ed\u7bc7\u79d1\u5e7b\u5c0f\u8bf4.\"\nmessages = [\n    {\"role\": \"system\", \"content\": \"You are a helpful assistant.\"},\n    {\"role\": \"user\", \"content\": prompt}\n]\ntext = tokenizer.apply_chat_template(\n    messages,\n    tokenize=False,\n    add_generation_prompt=True\n)\nmodel_inputs = tokenizer([text], return_tensors=\"pt\").to(device)\n\ngenerated_ids = model.generate(\n    model_inputs.input_ids,\n    max_new_tokens=20000\n)\ngenerated_ids = [\n    output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)\n]\n\nresponse = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]\n\nprint(response)\n<\/pre><\/div>\n\n\n\n<p>\u8fd0\u884c\u7ed3\u679c\u5982\u4e0b<\/p>\n\n\n\n<div class=\"wp-block-urvanov-syntax-highlighter-code-block\"><pre class=\"lang:python decode:true \">time python test05-7B-3.py\nNUM_GPUS: 8\nLoading checkpoint shards: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 4\/4 [04:17&lt;00:00, 64.40s\/it]\nSpecial tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n\u6807\u9898\uff1a\u661f\u9645\u9057\u4ea7\n\n\u5728\u672a\u6765\u7684\u5730\u7403\uff0c\u4eba\u7c7b\u5df2\u7ecf\u6210\u529f\u5730\u62d3\u5c55\u5230\u4e86\u661f\u8fb0\u5927\u6d77\u3002\u5728\u6d69\u701a\u7684\u94f6\u6cb3\u7cfb\u8fb9\u7f18\uff0c\u9690\u85cf\u7740\u4e00\u9897\u540d\u4e3a\u201c\u65b0\u7eff\u6d32\u201d\u7684\u795e\u79d8\u884c\u661f\uff0c\u5b83\u62e5\u6709\u9002\u5b9c\u751f\u547d\u751f\u5b58\u7684\u7279\u6b8a\u73af\u5883\uff0c\u88ab\u8054\u5408\u56fd\u547d\u540d\u4e3a\u201c\u661f\u9645\u9057\u4ea7\u201d\u3002\n\n\u6545\u4e8b\u7684\u4e3b\u89d2\uff0c\u827e\u767b\u00b7\u4e9a\u5386\u5c71\u5927\uff0c\u662f\u4e00\u4f4d\u5e74\u8f7b\u7684\u5929\u4f53\u751f\u7269\u5b66\u5bb6\uff0c\u4ed6\u4e00\u76f4\u5bf9\u65b0\u7eff\u6d32\u5145\u6ee1\u597d\u5947\uff0c\u68a6\u60f3\u63ed\u5f00\u5176\u80cc\u540e \u7684\u79d8\u5bc6\u3002\u4ed6\u7684\u7236\u4eb2\uff0c\u4e00\u4f4d\u9000\u5f79\u7684\u5b87\u822a\u5458\uff0c\u66fe\u5728\u90a3\u91cc\u8fdb\u884c\u8fc7\u4e00\u6b21\u63a2\u9669\uff0c\u4f46\u90a3\u6b21\u4efb\u52a1\u540e\u795e\u79d8\u5931\u8e2a\uff0c\u7559\u4e0b\u4e86\u4e00\u4e2a\u672a\u89e3\u7684\u8c1c\u56e2\u3002\n\n\u827e\u767b\u51b3\u5b9a\u7ee7\u627f\u7236\u4eb2\u7684\u9057\u5fd7\uff0c\u8e0f\u4e0a\u4e86\u5bfb\u627e\u771f\u76f8\u7684\u65c5\u7a0b\u3002\u4ed6\u7ec4\u5efa\u4e86\u4e00\u652f\u7531\u5404\u9886\u57df\u7cbe\u82f1\u7ec4\u6210\u7684\u63a2\u9669\u961f\u4f0d\uff0c\u5305\u62ec\u4eba\u5de5\u667a\u80fd\u4e13\u5bb6\u8389\u5a1c\u3001\u5730\u8d28\u5b66\u5bb6\u7f57\u4f2f\u7279\u548c\u519b\u4e8b\u51fa\u8eab\u7684\u6307\u6325\u5b98\u51ef\u6587\u3002\u4ed6\u4eec\u7684\u76ee\u6807\u662f\u901a\u8fc7\u5148\u8fdb\u7684\u592a\u7a7a\u8239\u201c\u66d9\u5149\u53f7\u201d\u7a7f\u8d8a\u866b\u6d1e\uff0c\u62b5\u8fbe\u65b0\u7eff\u6d32\u3002\n\n\u822a\u884c\u8fc7\u7a0b\u4e2d\uff0c\u4ed6\u4eec\u906d\u9047\u4e86\u672a\u77e5\u7684\u5b87\u5b99\u98ce\u66b4\uff0c\u8239\u5458\u4eec\u9762\u4e34\u751f\u6b7b\u8003\u9a8c\u3002\u827e\u767b\u51ed\u501f\u4ed6\u7684\u79d1\u5b66\u77e5\u8bc6\u548c\u56e2\u961f\u7684\u534f\u4f5c\uff0c\u6210\u529f\u7a33\u5b9a\u4e86\u98de\u8239\uff0c\u4f46\u4ed6\u4eec\u5e76\u672a\u653e\u5f03\u5bf9\u65b0\u7eff\u6d32\u7684\u63a2\u7d22\uff0c\u53cd\u800c\u66f4\u52a0\u575a\u5b9a\u4e86\u51b3\u5fc3\u3002\n\n\u62b5\u8fbe\u65b0\u7eff\u6d32\u540e\uff0c\u4ed6\u4eec\u53d1\u73b0\u4e86\u4e00\u4e2a\u53e4\u8001\u7684\u6587\u660e\u9057\u8ff9\u3002\u8fd9\u662f\u4e00\u5ea7\u5de8\u5927\u7684\u592a\u7a7a\u7ad9\uff0c\u7531\u9ad8\u5ea6\u53d1\u8fbe\u7684\u5916\u661f\u751f\u7269\u5efa\u9020\uff0c\u4ed6\u4eec\u4f3c\u4e4e\u638c\u63e1\u4e86\u8d85\u8d8a\u4eba\u7c7b\u79d1\u6280\u7684\u529b\u91cf\u3002\u5728\u9057\u8ff9\u4e2d\uff0c\u827e\u767b\u53d1\u73b0\u4e86\u4e00\u4efd\u7236\u4eb2\u7559\u4e0b\u7684\u65e5\u8bb0\uff0c\u63ed\u793a\u4e86\u4ed6\u7684\u6700\u540e\u53d1\u73b0\u2014\u2014\u8fd9\u4e2a\u6587\u660e\u6b63\u5728\u9762\u4e34\u4e00\u573a\u751f\u6001\u5371\u673a\uff0c\u4e3a\u4e86\u4fdd\u62a4\u661f\u7403\uff0c\u4ed6\u4eec\u9009\u62e9\u4e86\u81ea\u6211\u6bc1\u706d\u3002\n\n\u8fd9\u4e2a\u53d1\u73b0\u8ba9\u827e\u767b\u548c\u4ed6\u7684\u56e2\u961f\u9677\u5165\u4e86\u6c89\u601d\u3002\u4ed6\u4eec\u610f\u8bc6\u5230\uff0c\u5982\u679c\u4eba\u7c7b\u7ee7\u7eed\u8d2a\u5a6a\u5730\u5f00\u53d1\u5b87\u5b99\uff0c\u4e5f\u8bb8\u4f1a\u6b65\u4e0a\u65b0\u7eff\u6d32\u6587\u660e\u7684\u540e\u5c18\u3002\u4ed6\u4eec\u51b3\u5b9a\u6682\u65f6\u7559\u5728\u65b0\u7eff\u6d32\uff0c\u7528\u79d1\u6280\u4e0e\u667a\u6167\u5e2e\u52a9\u90a3\u91cc\u7684\u751f\u7269\u91cd\u5efa\u5bb6\u56ed\uff0c\u540c\u65f6\u8b66\u544a\u5168\u4eba\u7c7b\u751f\u6001\u5e73\u8861\u7684\u91cd\u8981\u6027\u3002\n\n\u5728\u65b0\u7eff\u6d32\u7684\u65e5\u5b50\u91cc\uff0c\u827e\u767b\u548c\u4ed6\u7684\u56e2\u961f\u4e0d\u4ec5\u5b66\u4e60\u4e86\u65b0\u7684\u79d1\u6280\uff0c\u4e5f\u4f53\u9a8c\u4e86\u751f\u547d\u7684\u8106\u5f31\u4e0e\u575a\u97e7\u3002\u4ed6\u4eec\u4e0e\u65b0\u7eff\u6d32\u7684\u751f\u7269\u5efa\u7acb\u4e86\u6df1\u539a\u7684\u53cb\u8c0a\uff0c\u751a\u81f3\u5f00\u59cb\u5c1d\u8bd5\u7406\u89e3\u90a3\u4e9b\u5916\u661f\u751f\u7269\u7684\u8bed\u8a00\uff0c\u5e0c\u671b\u627e\u5230\u548c\u5e73\u5171\u5b58\u7684\u65b9\u6cd5\u3002\n\n\u6700\u7ec8\uff0c\u827e\u767b\u4ee5\u4ed6\u7684\u52c7\u6562\u548c\u667a\u6167\uff0c\u5411\u5730\u7403\u53d1\u51fa\u4e86\u5173\u4e8e\u65b0\u7eff\u6d32\u7684\u8b66\u544a\uff0c\u5e76\u63d0\u51fa\u4e86\u201c\u661f\u9645\u548c\u5e73\u201d\u7684\u7406\u5ff5\u3002\u4ed6\u7684\u6545\u4e8b\u6fc0\u52b1\u4e86\u5168\u7403\uff0c\u4eba\u7c7b\u5f00\u59cb\u53cd\u601d\u81ea\u5df1\u7684\u884c\u4e3a\uff0c\u91cd\u65b0\u5ba1\u89c6\u5bf9\u5b87\u5b99\u7684\u6001\u5ea6\u3002\n\n\u300a\u661f\u9645\u9057\u4ea7\u300b\u7684\u6545\u4e8b\u4ee5\u827e\u767b\u5728\u65b0\u7eff\u6d32\u7684\u6700\u540e\u4e00\u6b21\u6f14\u8bb2\u7ed3\u675f\uff0c\u4ed6\u7684\u8bdd\u8bed\u56de\u8361\u5728\u5b87\u5b99\u4e2d\uff1a\u201c\u6211\u4eec\u90fd\u662f\u661f\u9645\u9057\u4ea7 \u7684\u4e00\u90e8\u5206\uff0c\u6bcf\u4e00\u4e2a\u751f\u547d\uff0c\u6bcf\u4e00\u7247\u661f\u57df\uff0c\u90fd\u503c\u5f97\u5c0a\u91cd\u548c\u4fdd\u62a4\u3002\u8ba9\u6211\u4eec\u4e00\u8d77\uff0c\u4e3a\u661f\u9645\u7684\u548c\u8c10\u800c\u52aa\u529b\u3002\u201d\n\nreal    4m45.451s\nuser    0m56.169s\nsys     1m7.964s<\/pre><\/div>\n\n\n\n<p>\u4ece\u4e0a\u9762\u7ed3\u679c\u6765\u770b\uff0c<\/p>\n\n\n\n<p>\u603b\u82b1\u8d39\u65f6\u95f4\u4e3a\uff1a4:45<\/p>\n\n\n\n<p>\u6a21\u578b\u52a0\u8f7d\u65f6\u95f4\u4e3a4:17<\/p>\n\n\n\n<p>28\u79d2\u5185\u751f\u6210\u4e86883\u4e2a\u6c49\u5b57\uff0c\u6bcf\u79d2\u5927\u7ea631\u4e2a\u6c49\u5b57\uff0c\u901f\u5ea6\u6bd4\u524d\u9762\u5feb10\u500d\u8fd8\u591a<\/p>\n\n\n\n<h2 class=\"wp-block-heading\"><strong>7. 14B\u7684 \u6a21\u578b<\/strong><\/h2>\n\n\n\n<div class=\"wp-block-urvanov-syntax-highlighter-code-block\"><pre class=\"lang:python decode:true \">from transformers import AutoModelForCausalLM, AutoTokenizer\nimport torch\n\nMODEL_NAME = \"Qwen\/Qwen1.5-14B-Chat\"\n\n# \u5b9a\u4e49\u4e00\u4e2a\u51fd\u6570\u6765\u81ea\u52a8\u914d\u7f6e\u5728\u591aGPU\u73af\u5883\u4e0b\u6a21\u578b\u5404\u90e8\u5206\u7684\u8bbe\u5907\u5206\u5e03\ndef auto_configure_device_map(num_gpus: int):\n    num_trans_layers = 40  # \u5b9a\u4e49Transformer\u6a21\u578b\u7684\u5c42\u6570\n    per_gpu_layers = num_trans_layers \/ num_gpus  # \u8ba1\u7b97\u6bcf\u4e2aGPU\u5e94\u627f\u62c5\u7684\u5c42\u6570\n    # \u521d\u59cb\u5316\u8bbe\u5907\u6620\u5c04\u5b57\u5178\uff0c\u6307\u5b9a\u4e00\u4e9b\u7279\u5b9a\u6a21\u5757\u5e94\u8be5\u653e\u7f6e\u7684GPU\u7f16\u53f7\n    device_map = {\n        'model.embed_tokens': 0,  # \u5d4c\u5165\u5c42\u653e\u5728\u7b2c\u4e00\u4e2aGPU\u4e0a\n        'model.norm': num_gpus-1,  # \u6700\u540e\u4e00\u4e2a\u6b63\u5219\u5316\u5c42\u653e\u5728\u6700\u540e\u4e00\u4e2aGPU\u4e0a\n        'lm_head': 0  # \u8bed\u8a00\u6a21\u578b\u5934\uff08\u7528\u4e8e\u9884\u6d4b\u4e0b\u4e00\u4e2a\u8bcd\u7684\u5c42\uff09\u653e\u5728\u6700\u540e\u4e00\u4e2aGPU\u4e0a\n    }\n    # \u5c06Transformer\u6a21\u578b\u7684\u6bcf\u4e00\u5c42\u5206\u914d\u7ed9\u4e00\u4e2aGPU\n    for i in range(num_trans_layers):\n        device_map[f'model.layers.{i}'] = int(i\/\/per_gpu_layers)\n    return device_map\n\n# \u68c0\u6d4b\u53ef\u7528\u7684GPU\u6570\u91cf\nNUM_GPUS = torch.cuda.device_count()\nprint(f\"NUM_GPUS: {NUM_GPUS}\")\n\n# \u5982\u679c\u6709\u53ef\u7528\u7684GPU\uff0c\u5219\u57fa\u4e8eGPU\u6570\u91cf\u81ea\u52a8\u914d\u7f6e\u8bbe\u5907\u6620\u5c04\uff1b\u5426\u5219\u4e0d\u4f7f\u7528\u8bbe\u5907\u6620\u5c04\ndevice_map = auto_configure_device_map(NUM_GPUS) if NUM_GPUS &gt; 0 else None\n\n# \u5982\u679c\u6709\u53ef\u7528\u7684GPU\uff0c\u5219\u4f7f\u7528\u7b2c\u4e00\u4e2aGPU\uff1b\u5426\u5219\u4f7f\u7528CPU\ndevice = torch.device(\"cuda\") if NUM_GPUS &gt; 0 else torch.device(\"cpu\")\n\n# \u6839\u636e\u662f\u5426\u4f7f\u7528GPU\u8bbe\u7f6e\u6570\u636e\u7c7b\u578b\uff08\u534a\u7cbe\u5ea6\u6216\u5168\u7cbe\u5ea6\uff09\ndevice_dtype = torch.float16 if NUM_GPUS &gt; 0 else torch.float\n\nmodel = AutoModelForCausalLM.from_pretrained(MODEL_NAME,\n                                             trust_remote_code=True,\n                                             torch_dtype=device_dtype,\n                                             attn_implementation=\"flash_attention_2\",\n                                             device_map=device_map)\n\n# \u52a0\u8f7d\u5206\u8bcd\u5668\u548c\u6a21\u578b\uff0c\u6307\u5b9a\u8bbe\u5907\u6620\u5c04\u548c\u6570\u636e\u7c7b\u578b\ntokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)\n\nmodel = model.eval()\n\nprompt = \"\u5199\u4e00\u4e2a2\u4e07\u5b57\u7684\u6709\u5934\u6709\u5c3e\u7684\u77ed\u7bc7\u79d1\u5e7b\u5c0f\u8bf4.\"\nmessages = [\n    {\"role\": \"system\", \"content\": \"You are a helpful assistant.\"},\n    {\"role\": \"user\", \"content\": prompt}\n]\ntext = tokenizer.apply_chat_template(\n    messages,\n    tokenize=False,\n    add_generation_prompt=True\n)\nmodel_inputs = tokenizer([text], return_tensors=\"pt\").to(device)\n\ngenerated_ids = model.generate(\n    model_inputs.input_ids,\n    max_new_tokens=20000\n)\ngenerated_ids = [\n    output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)\n]\n\nresponse = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]\n\nprint(response)\n<\/pre><\/div>\n\n\n\n<p>\u8fd0\u884c\u7ed3\u679c\u5982\u4e0b\uff1a<\/p>\n\n\n\n<div class=\"wp-block-urvanov-syntax-highlighter-code-block\"><pre class=\"lang:python decode:true \"> time python test05-14B.py\nNUM_GPUS: 8\nLoading checkpoint shards: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 8\/8 [04:20&lt;00:00, 32.62s\/it]\nSpecial tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n\u5f88\u62b1\u6b49\uff0c\u7531\u4e8e\u6587\u672c\u683c\u5f0f\u9650\u5236\uff0c\u6211\u65e0\u6cd5\u4e00\u6b21\u6027\u63d0\u4f9b\u4e00\u7bc72\u4e07\u5b57\u7684\u5b8c\u6574\u77ed\u7bc7\u79d1\u5e7b\u5c0f\u8bf4\u3002\u4f46\u6211\u53ef\u4ee5\u5e2e\u4f60\u6784\u601d\u5e76\u7ed9\u51fa\u4e00\u4e2a\u5927\u6982\u7684\u6545\u4e8b \u6846\u67b6\u548c\u90e8\u5206\u5185\u5bb9\uff0c\u4f60\u53ef\u4ee5\u6839\u636e\u8fd9\u4e2a\u6846\u67b6\u81ea\u884c\u6269\u5c55\u3002\u5982\u679c\u4f60\u9700\u8981\u7b80\u77ed\u7684\u7247\u6bb5\u6216\u6545\u4e8b\u5927\u7eb2\uff0c\u4ee5\u4e0b\u662f\u4e00\u4e2a\u793a\u4f8b\uff1a\n\n\u6807\u9898\uff1a\u300a\u661f\u9645\u4fe1\u4f7f\u300b\n\n\u5f00\u5934\uff1a\n\u5728\u9065\u8fdc\u7684\u672a\u6765\uff0c\u5730\u7403\u8054\u76df\u7684\u79d1\u5b66\u5bb6\u4eec\u53d1\u73b0\u4e86\u4e00\u79cd\u65b0\u578b\u80fd\u6e90\u2014\u2014\u91cf\u5b50\u7ea0\u7f20\u661f\u5c18\uff0c\u5b83\u80fd\u4e3a\u661f\u9645\u65c5\u884c\u63d0\u4f9b\u65e0\u9650\u53ef\u80fd\u3002\u4e3b\u4eba\u516c\u827e\u767b\u00b7\u96f7 \u8bfa\u5179\uff0c\u4e00\u540d\u5e74\u8f7b\u7684\u5b87\u822a\u5458\uff0c\u88ab\u9009\u4e2d\u6210\u4e3a\u9996\u6279\u4f7f\u7528\u8fd9\u79cd\u80fd\u6e90\u7684\u661f\u9645\u4fe1\u4f7f\u3002\n\n\u6b63\u6587\uff1a\n1. \u827e\u767b\u63a5\u53d7\u4efb\u52a1\uff0c\u9a7e\u9a76\"\u661f\u9645\u53f7\"\u98de\u8239\u79bb\u5f00\u5730\u7403\uff0c\u76ee\u6807\u662f\u63a2\u7d22\u65b0\u53d1\u73b0\u7684\u661f\u7cfb\u3002\u4ed6\u7684\u4efb\u52a1\u662f\u5728\u5b87\u5b99\u4e2d\u4f20\u9012\u79d1\u5b66\u77e5\u8bc6\uff0c\u4fc3\u8fdb\u6587\u660e \u95f4\u7684\u4ea4\u6d41\u3002\n2. \u5728\u65c5\u9014\u4e2d\uff0c\u827e\u767b\u906d\u9047\u672a\u77e5\u7684\u5916\u661f\u751f\u7269\u2014\u2014\u8d5b\u4f26\u4eba\uff0c\u4ed6\u4eec\u5bf9\u5730\u7403\u6587\u660e\u5145\u6ee1\u597d\u5947\uff0c\u4f46\u53c8\u56e0\u8bef\u89e3\u800c\u654c\u89c6\u4eba\u7c7b\u3002\u827e\u767b\u901a\u8fc7\u91cf\u5b50\u7ea0\u7f20 \u661f\u5c18\u4e0e\u5730\u7403\u8fdb\u884c\u5373\u65f6\u901a\u8baf\uff0c\u8bf7\u6c42\u63f4\u52a9\u3002\n3. \u5730\u7403\u79d1\u5b66\u5bb6\u5229\u7528\u661f\u5c18\u6280\u672f\u89e3\u8bfb\u4e86\u8d5b\u4f26\u4eba\u7684\u8bed\u8a00\uff0c\u6210\u529f\u4f20\u8fbe\u4e86\u548c\u5e73\u7684\u4fe1\u606f\uff0c\u907f\u514d\u4e86\u4e00\u573a\u661f\u9645\u51b2\u7a81\u3002\n4. \u5728\u63a5\u4e0b\u6765\u7684\u65c5\u7a0b\u4e2d\uff0c\u827e\u767b\u89c1\u8bc1\u4e86\u5404\u79cd\u5947\u7279\u7684\u661f\u7403\u751f\u6001\uff0c\u5b66\u4e60\u4e86\u591a\u5143\u6587\u660e\uff0c\u540c\u65f6\u4e5f\u4f20\u64ad\u4e86\u4eba\u7c7b\u7684\u77e5\u8bc6\u548c\u4ef7\u503c\u89c2\u3002\n\n\u9ad8\u6f6e\uff1a\n\u827e\u767b\u5728\u4e00\u6b21\u610f\u5916\u4e2d\u53d1\u73b0\u4e86\u4e00\u4e2a\u88ab\u9057\u5fd8\u7684\u9057\u8ff9\uff0c\u90a3\u662f\u4e00\u4e2a\u9ad8\u5ea6\u53d1\u8fbe\u7684\u53e4\u4ee3\u6587\u660e\uff0c\u4ed6\u4eec\u66fe\u8bd5\u56fe\u7528\u661f\u5c18\u5b9e\u73b0\u5b87\u5b99\u5927\u4e00\u7edf\uff0c\u5374\u56e0\u4e3a\u8fc7\u5ea6\u4f9d\u8d56\u800c\u5bfc\u81f4\u79cd\u65cf\u8870\u843d\u3002\u827e\u767b\u4ece\u4e2d\u9886\u609f\u5230\u5e73\u8861\u548c\u53ef\u6301\u7eed\u53d1\u5c55\u7684\u771f\u8c1b\u3002\n\n\u7ed3\u5c3e\uff1a\n\u827e\u767b\u5e26\u7740\u8fd9\u4e2a\u6559\u8bad\u56de\u5230\u5730\u7403\uff0c\u4ed6\u5411\u8054\u76df\u63d0\u51fa\u5173\u4e8e\u661f\u5c18\u4f7f\u7528\u7684\u8c28\u614e\u5efa\u8bae\uff0c\u5021\u5bfc\u4eba\u7c7b\u5728\u8ffd\u6c42\u79d1\u6280\u8fdb\u6b65\u7684\u540c\u65f6\uff0c\u4e5f\u8981\u5c0a\u91cd\u81ea\u7136\u3001\u5e73\u8861\u53d1\u5c55\u3002\u4ed6\u7684\u7ecf\u5386\u5f15\u53d1\u4e86\u5168\u7403\u7684\u53cd\u601d\uff0c\u661f\u9645\u4fe1\u4f7f\u8ba1\u5212\u4e5f\u7531\u6b64\u53d8\u5f97\u66f4\u52a0\u660e\u667a\u548c\u4eba\u6027\u5316\u3002\n\n\u8fd9\u4e2a\u6846\u67b6\u53ea\u662f\u4e00\u4e2a\u5927\u6982\u7684\u6784\u601d\uff0c\u4f60\u53ef\u4ee5\u6839\u636e\u81ea\u5df1\u7684\u60f3\u8c61\u548c\u521b\u4f5c\u6dfb\u52a0\u7ec6\u8282\uff0c\u6bcf\u4e2a\u7ae0\u8282\u90fd\u53ef\u4ee5\u8fdb\u4e00\u6b65\u6269\u5c55\uff0c\u5f62\u6210\u4e00\u90e8\u5b8c\u6574\u76842\u4e07 \u5b57\u77ed\u7bc7\u79d1\u5e7b\u5c0f\u8bf4\u3002\u795d\u4f60\u5199\u4f5c\u987a\u5229\uff01\n\nreal    10m45.922s\nuser    7m3.979s\nsys     2m21.686s<\/pre><\/div>\n\n\n\n<p>\u4ece\u4e0a\u9762\u7ed3\u679c\u6765\u770b\uff0c<\/p>\n\n\n\n<p>\u603b\u82b1\u8d39\u65f6\u95f4\u4e3a\uff1a10:45<\/p>\n\n\n\n<p>\u6a21\u578b\u52a0\u8f7d\u65f6\u95f4\u4e3a4:20<\/p>\n\n\n\n<p>6\u5206\u949f\u5185\u751f\u6210\u4e86724\u4e2a\u6c49\u5b57\uff0c\u6bcf\u79d2\u5927\u7ea61.17\u4e2a\u6c49\u5b57\uff0c\u901f\u5ea6\u6bd4\u8f83\u6162\uff1f<\/p>\n\n\n\n<h2 class=\"wp-block-heading\"><strong>8. 14B \u5f3a\u5236\u4f7f\u75282\u5f20\u5361\u88c5\u8f7d<\/strong><\/h2>\n\n\n\n<p>\u5355\u4e2a24G\u7684RTX 4090 \u65e0\u6cd5\u88c5\u8f7d14B\u7684\u6a21\u578b<\/p>\n\n\n\n<div class=\"wp-block-urvanov-syntax-highlighter-code-block\"><pre class=\"lang:python decode:true \">from transformers import AutoModelForCausalLM, AutoTokenizer\nimport torch\n\nMODEL_NAME = \"Qwen\/Qwen1.5-14B-Chat\"\n\n# \u5b9a\u4e49\u4e00\u4e2a\u51fd\u6570\u6765\u81ea\u52a8\u914d\u7f6e\u5728\u591aGPU\u73af\u5883\u4e0b\u6a21\u578b\u5404\u90e8\u5206\u7684\u8bbe\u5907\u5206\u5e03\ndef auto_configure_device_map(num_gpus: int):\n    num_trans_layers = 40  # \u5b9a\u4e49Transformer\u6a21\u578b\u7684\u5c42\u6570\n    per_gpu_layers = num_trans_layers \/ num_gpus  # \u8ba1\u7b97\u6bcf\u4e2aGPU\u5e94\u627f\u62c5\u7684\u5c42\u6570\n    # \u521d\u59cb\u5316\u8bbe\u5907\u6620\u5c04\u5b57\u5178\uff0c\u6307\u5b9a\u4e00\u4e9b\u7279\u5b9a\u6a21\u5757\u5e94\u8be5\u653e\u7f6e\u7684GPU\u7f16\u53f7\n    device_map = {\n        'model.embed_tokens': 0,  # \u5d4c\u5165\u5c42\u653e\u5728\u7b2c\u4e00\u4e2aGPU\u4e0a\n        'model.norm': num_gpus-1,  # \u6700\u540e\u4e00\u4e2a\u6b63\u5219\u5316\u5c42\u653e\u5728\u6700\u540e\u4e00\u4e2aGPU\u4e0a\n        'lm_head': 0  # \u8bed\u8a00\u6a21\u578b\u5934\uff08\u7528\u4e8e\u9884\u6d4b\u4e0b\u4e00\u4e2a\u8bcd\u7684\u5c42\uff09\u653e\u5728\u6700\u540e\u4e00\u4e2aGPU\u4e0a\n    }\n    # \u5c06Transformer\u6a21\u578b\u7684\u6bcf\u4e00\u5c42\u5206\u914d\u7ed9\u4e00\u4e2aGPU\n    for i in range(num_trans_layers):\n        device_map[f'model.layers.{i}'] = int(i\/\/per_gpu_layers)\n    return device_map\n\n# \u68c0\u6d4b\u53ef\u7528\u7684GPU\u6570\u91cf\nNUM_GPUS = torch.cuda.device_count()\nprint(f\"NUM_GPUS: {NUM_GPUS}\")\n# \u5f3a\u5236\u88c5\u8f7d\u52302\u5f20\u5361\u91cc\u9762\nNUM_GPUS=2\n\n# \u5982\u679c\u6709\u53ef\u7528\u7684GPU\uff0c\u5219\u57fa\u4e8eGPU\u6570\u91cf\u81ea\u52a8\u914d\u7f6e\u8bbe\u5907\u6620\u5c04\uff1b\u5426\u5219\u4e0d\u4f7f\u7528\u8bbe\u5907\u6620\u5c04\ndevice_map = auto_configure_device_map(NUM_GPUS) if NUM_GPUS &gt; 0 else None\n\n# \u5982\u679c\u6709\u53ef\u7528\u7684GPU\uff0c\u5219\u4f7f\u7528\u7b2c\u4e00\u4e2aGPU\uff1b\u5426\u5219\u4f7f\u7528CPU\ndevice = torch.device(\"cuda\") if NUM_GPUS &gt; 0 else torch.device(\"cpu\")\n\n# \u6839\u636e\u662f\u5426\u4f7f\u7528GPU\u8bbe\u7f6e\u6570\u636e\u7c7b\u578b\uff08\u534a\u7cbe\u5ea6\u6216\u5168\u7cbe\u5ea6\uff09\ndevice_dtype = torch.float16 if NUM_GPUS &gt; 0 else torch.float\n\nmodel = AutoModelForCausalLM.from_pretrained(MODEL_NAME,\n                                             trust_remote_code=True,\n                                             torch_dtype=device_dtype,\n                                             attn_implementation=\"flash_attention_2\",\n                                             device_map=device_map)\n\n# \u52a0\u8f7d\u5206\u8bcd\u5668\u548c\u6a21\u578b\uff0c\u6307\u5b9a\u8bbe\u5907\u6620\u5c04\u548c\u6570\u636e\u7c7b\u578b\ntokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)\n\nmodel = model.eval()\n\nprompt = \"\u5199\u4e00\u4e2a2\u4e07\u5b57\u7684\u6709\u5934\u6709\u5c3e\u7684\u77ed\u7bc7\u79d1\u5e7b\u5c0f\u8bf4.\"\nmessages = [\n    {\"role\": \"system\", \"content\": \"You are a helpful assistant.\"},\n    {\"role\": \"user\", \"content\": prompt}\n]\ntext = tokenizer.apply_chat_template(\n    messages,\n    tokenize=False,\n    add_generation_prompt=True\n)\nmodel_inputs = tokenizer([text], return_tensors=\"pt\").to(device)\n\ngenerated_ids = model.generate(\n    model_inputs.input_ids,\n    max_new_tokens=20000\n)\ngenerated_ids = [\n    output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)\n]\n\nresponse = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]\n\nprint(response)\n<\/pre><\/div>\n\n\n\n<p>\u8fd0\u884c\u7ed3\u679c\u5982\u4e0b\uff1a<\/p>\n\n\n\n<div class=\"wp-block-urvanov-syntax-highlighter-code-block\"><pre class=\"lang:python decode:true \"> time python test05-14B-2.py\nNUM_GPUS: 8\nLoading checkpoint shards: 100%|\u2588\u2588\u2588\u2588| 8\/8 [07:34&lt;00:00, 56.79s\/it]\nSpecial tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n\u5f88\u62b1\u6b49\uff0c\u7531\u4e8e\u6587\u672c\u957f\u5ea6\u9650\u5236\uff0c\u6211\u65e0\u6cd5\u5728\u8fd9\u91cc\u76f4\u63a5\u4e3a\u60a8\u521b\u4f5c\u4e00\u7bc7\u5b8c\u6574\u76842\u4e07\u5b57\u77ed\u7bc7\u79d1\u5e7b\u5c0f\u8bf4\u3002\u4f46\u6211\u53ef\u4ee5\u4e3a\u60a8 \u63d0\u4f9b\u4e00\u4e2a\u6545\u4e8b\u5927\u7eb2\u548c\u90e8\u5206\u5f00\u5934\uff0c\u60a8\u53ef\u4ee5\u6839\u636e\u8fd9\u4e2a\u5927\u7eb2\u81ea\u884c\u6269\u5c55\u6216\u8005\u9080\u8bf7\u4e13\u4e1a\u4f5c\u5bb6\u5e2e\u52a9\u60a8\u5b8c\u6210\u3002\u4ee5\u4e0b\u662f\u4e00\u4e2a\u57fa\u672c\u7684\u6846\u67b6\uff1a\n\n\u6807\u9898\uff1a\u300a\u661f\u9645\u7f16\u5e74\u53f2\uff1a\u65f6\u95f4\u88c2\u7f1d\u300b\n\n\u6545\u4e8b\u5927\u7eb2\uff1a\n1. \u5f00\u573a\uff1a\u5728\u4e0d\u8fdc\u7684\u672a\u6765\uff0c\u4eba\u7c7b\u6210\u529f\u5b9e\u73b0\u4e86\u661f\u9645\u65c5\u884c\uff0c\u5730\u7403\u8054\u5408\u653f\u5e9c\u6210\u7acb\u4e86\u661f\u9645\u63a2\u7d22\u961f\"\u66d9\u5149\u53f7\"\u3002\u4e3b\u89d2\uff0c \u5e74\u8f7b\u800c\u5bcc\u6709\u624d\u534e\u7684\u79d1\u5b66\u5bb6\u827e\u4f26\u00b7\u745e\u6069\u88ab\u9009\u4e2d\u52a0\u5165\u3002\n2. \u53d1\u73b0\uff1a\u66d9\u5149\u53f7\u5728\u4e00\u6b21\u63a2\u7d22\u4e2d\u53d1\u73b0\u4e86\u4e00\u4e2a\u795e\u79d8\u7684\u65f6\u95f4\u88c2\u7f1d\uff0c\u5b83\u8fde\u63a5\u7740\u672a\u77e5\u7684\u7ef4\u5ea6\u3002\n3. \u63a2\u7d22\uff1a\u827e\u4f26\u5e26\u9886\u56e2\u961f\u6df1\u5165\u88c2\u7f1d\uff0c\u906d\u9047\u65f6\u95f4\u6096\u8bba\u548c\u5f02\u6b21\u5143\u751f\u7269\uff0c\u4ed6\u4eec\u5fc5\u987b\u89e3\u5f00\u8fd9\u4e9b\u8c1c\u56e2\u4ee5\u9632\u6b62\u5386\u53f2\u88ab\u6539 \u53d8\u3002\n4. \u51b2\u7a81\uff1a\u5728\u63a2\u7d22\u8fc7\u7a0b\u4e2d\uff0c\u827e\u4f26\u610f\u5916\u89e6\u53d1\u4e86\u65f6\u95f4\u8fde\u9501\u53cd\u5e94\uff0c\u4ed6\u88ab\u56f0\u5728\u8fc7\u53bb\uff0c\u5fc5\u987b\u5bfb\u627e\u56de\u5230\u73b0\u5b9e\u7684\u65b9\u6cd5\u3002\n5. \u5185\u5fc3\u6210\u957f\uff1a\u5728\u8fc7\u53bb\u7684\u7ecf\u5386\u4e2d\uff0c\u827e\u4f26\u9762\u5bf9\u4e2a\u4eba\u9009\u62e9\u4e0e\u8d23\u4efb\u7684\u8003\u9a8c\uff0c\u4ed6\u7684\u4fe1\u5ff5\u548c\u667a\u6167\u6210\u4e3a\u4ed6\u7a81\u7834\u56f0\u5883\u7684\u5173 \u952e\u3002\n6. \u8f6c\u6298\u70b9\uff1a\u827e\u4f26\u901a\u8fc7\u4e0e\u8fc7\u53bb\u7684\u81ea\u5df1\u4ea4\u8c08\uff0c\u610f\u8bc6\u5230\u53ea\u6709\u727a\u7272\u4e00\u90e8\u5206\u8bb0\u5fc6\uff0c\u624d\u80fd\u4fee\u590d\u65f6\u95f4\u7ebf\u3002\n7. \u7ed3\u5c40\uff1a\u827e\u4f26\u51b3\u5b9a\u727a\u7272\uff0c\u6210\u529f\u56de\u5230\u73b0\u4ee3\uff0c\u4f46\u5931\u53bb\u4e86\u4e00\u4e9b\u8bb0\u5fc6\u3002\u4ed6\u5206\u4eab\u4e86\u8fd9\u6b21\u7ecf\u5386\uff0c\u5f15\u53d1\u4e86\u5bf9\u65f6\u95f4\u3001\u8bb0\u5fc6 \u548c\u5b87\u5b99\u771f\u7406\u7684\u65b0\u601d\u8003\u3002\n\n\u5f00\u5934\uff08\u7ea61000\u5b57\uff09\uff1a\n\n\u57282199\u5e74\u7684\u5730\u7403\u8054\u5408\u653f\u5e9c\u661f\u9645\u63a2\u7d22\u57fa\u5730\uff0c\u827e\u4f26\u00b7\u745e\u6069\u535a\u58eb\u7ad9\u5728\u5de8\u5927\u7684\u5168\u606f\u6295\u5f71\u5c4f\u5e55\u524d\uff0c\u4ed6\u7684\u773c\u795e\u4e13\u6ce8\u800c\u575a \u5b9a\u3002\u4f5c\u4e3a\u201c\u66d9\u5149\u53f7\u201d\u661f\u9645\u63a2\u7d22\u961f\u7684\u4e00\u5458\uff0c\u4ed6\u5373\u5c06\u5e26\u9886\u56e2\u961f\u542f\u7a0b\uff0c\u63ed\u5f00\u5b87\u5b99\u7684\u672a\u77e5\u9762\u7eb1\u3002\u7136\u800c\uff0c\u4ed6\u4eec\u6b64\u6b21\u7684\u4efb\u52a1\u5e76\u975e\u5355\u7eaf\u7684\u63a2\u7d22\uff0c\u800c\u662f\u5bfb\u627e\u4f20\u8bf4\u4e2d\u7684\u65f6\u95f4\u88c2\u7f1d\u2014\u2014\u4e00\u79cd\u80fd\u7a7f\u8d8a\u65f6\u95f4\u548c\u7a7a\u95f4\u7684\u795e\u79d8\u73b0\u8c61\u3002\n\n\u201c\u66d9\u5149\u53f7\u201d\u5728\u5b87\u5b99\u4e2d\u95ea\u70c1\uff0c\u50cf\u4e00\u9897\u7480\u74a8\u7684\u661f\u8fb0\u5212\u7834\u9ed1\u6697\u3002\u827e\u4f26\u51dd\u89c6\u7740\u661f\u56fe\uff0c\u5fc3\u4e2d\u5145\u6ee1\u4e86\u671f\u5f85\u548c\u7d27\u5f20\u3002\u4ed6\u4eec\u5373\u5c06\u9762\u5bf9\u7684\u662f\u672a\u77e5\u7684\u6311\u6218\uff0c\u8fd8\u662f\u4e00\u4e2a\u6539\u53d8\u4eba\u7c7b\u547d\u8fd0\u7684\u673a\u4f1a\uff1f\u4e00\u5207\uff0c\u53ea\u5728\u4e00\u5239\u90a3\u95f4\u3002\n\n\u5f53\u4ed6\u4eec\u7684\u98de\u8239\u7a7f\u8d8a\u661f\u7cfb\uff0c\u4e00\u4e2a\u524d\u6240\u672a\u89c1\u7684\u5149\u6655\u7a81\u7136\u51fa\u73b0\uff0c\u5982\u540c\u4e00\u9053\u65f6\u7a7a\u88c2\u75d5\uff0c\u5f15\u5411\u4e00\u4e2a\u5145\u6ee1\u8c1c\u56e2\u7684\u9886\u57df\u3002\u827e\u4f26\u6df1\u5438\u4e00\u53e3\u6c14\uff0c\u4ed6\u77e5\u9053\uff0c\u4ed6\u4eec\u5df2\u7ecf\u8e0f\u4e0a\u4e86\u8fd9\u573a\u53f2\u8bd7\u822c\u7684\u65c5\u7a0b\u7684\u8d77\u70b9...\n\n\u63a5\u4e0b\u6765\u7684\u5185\u5bb9\u9700\u8981\u60a8\u6839\u636e\u8fd9\u4e2a\u5927\u7eb2\u7ee7\u7eed\u5c55\u5f00\uff0c\u6784\u5efa\u60c5\u8282\uff0c\u5851\u9020\u89d2\u8272\uff0c\u589e\u52a0\u7ec6\u8282\uff0c\u8ba9\u6545\u4e8b\u66f4\u52a0\u4e30\u5bcc\u548c\u5b8c\u6574\u3002\u795d\u60a8\u5199\u4f5c\u6109\u5feb\uff01\n\nreal    8m23.270s\nuser    1m33.979s\nsys     2m6.338s<\/pre><\/div>\n\n\n\n<div class=\"wp-block-urvanov-syntax-highlighter-code-block\"><\/div>\n\n\n\n<p>\u4ece\u4e0a\u9762\u7ed3\u679c\u6765\u770b\uff0c<\/p>\n\n\n\n<p>\u603b\u82b1\u8d39\u65f6\u95f4\u4e3a\uff1a8:23<\/p>\n\n\n\n<p>\u6a21\u578b\u52a0\u8f7d\u65f6\u95f4\u4e3a7:34<\/p>\n\n\n\n<p>49\u79d2\u5185\u751f\u6210\u4e86877\u4e2a\u6c49\u5b57\uff0c\u6bcf\u79d2\u5927\u7ea617\u4e2a\u6c49\u5b57\uff0c\u901f\u5ea6\u6bd4\u524d\u9762\u5feb10\u500d<\/p>\n\n\n\n<h3 class=\"wp-block-heading\"><strong>8.1. 14B \u5f3a\u5236\u4f7f\u75281\u5f20\u5361\u88c5\u8f7d\uff0c8\u4f4d\u91cf\u5316<\/strong><\/h3>\n\n\n\n<p>\u4f7f\u7528 1 \u5f2024G \u88c5\u8f7d\uff0c\u9700\u8981\u4f7f\u7528 BitsAndBytesConfig \u8fdb\u884c8\u4f4d\u91cf\u5316\uff0c\u5927\u7ea6\u9700\u898118G\u7684GPU\u5185\u5b58<\/p>\n\n\n\n<div class=\"wp-block-urvanov-syntax-highlighter-code-block\"><pre class=\"lang:python decode:true \">from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig\nimport torch\nimport time\n\nMODEL_NAME = \"Qwen\/Qwen1.5-14B-Chat\"\n\n# \u68c0\u6d4b\u53ef\u7528\u7684GPU\u6570\u91cf\nNUM_GPUS = torch.cuda.device_count()\nprint(f\"NUM_GPUS: {NUM_GPUS}\")\n\n# \u5982\u679c\u6709\u53ef\u7528\u7684GPU\uff0c\u5219\u4f7f\u7528\u7b2c\u4e00\u4e2aGPU\uff1b\u5426\u5219\u4f7f\u7528CPU\ndevice = torch.device(\"cuda\") if NUM_GPUS &gt; 0 else torch.device(\"cpu\")\n\n# \u6839\u636e\u662f\u5426\u4f7f\u7528GPU\u8bbe\u7f6e\u6570\u636e\u7c7b\u578b\uff08\u534a\u7cbe\u5ea6\u6216\u5168\u7cbe\u5ea6\uff09\ndevice_dtype = torch.float16 if NUM_GPUS &gt; 0 else torch.float\n\nquantization_config = BitsAndBytesConfig(load_in_8bit=True)\n# \u83b7\u53d6\u8d77\u59cb\u65f6\u95f4\u6233\nstart_time = time.time()\n\nmodel = AutoModelForCausalLM.from_pretrained(MODEL_NAME,\n                                             trust_remote_code=True,\n                                             torch_dtype=device_dtype,\n                                             attn_implementation=\"flash_attention_2\",\n                                             quantization_config=quantization_config,\n                                             device_map=\"cuda:0\")\n\n# \u52a0\u8f7d\u5206\u8bcd\u5668\u548c\u6a21\u578b\uff0c\u6307\u5b9a\u8bbe\u5907\u6620\u5c04\u548c\u6570\u636e\u7c7b\u578b\ntokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)\n\nmodel = model.eval()\nend_time = time.time()\nelapsed_time = end_time - start_time\nprint(f\"Load Model Time: {elapsed_time} seconds\")\n\nstart_time2 = time.time()\n\nprompt = \"\u5199\u4e00\u4e2a1\u4e07\u5b57\u7684\u6709\u5934\u6709\u5c3e\u7684\u77ed\u7bc7\u79d1\u5e7b\u5c0f\u8bf4.\"\nmessages = [\n    {\"role\": \"system\", \"content\": \"You are a helpful assistant.\"},\n    {\"role\": \"user\", \"content\": prompt}\n]\ntext = tokenizer.apply_chat_template(\n    messages,\n    tokenize=False,\n    add_generation_prompt=True\n)\nmodel_inputs = tokenizer([text], return_tensors=\"pt\").to(device)\n\ngenerated_ids = model.generate(\n    model_inputs.input_ids,\n    max_new_tokens=10000\n)\n\nend_time2 = time.time()\nelapsed_time2 = end_time2 - start_time2\n\nelapsed_time = end_time2 - start_time\nnum_tokens_generated = len(generated_ids[0]) - len(model_inputs.input_ids[0])\n\ntokens_per_second = num_tokens_generated \/ elapsed_time2\n\ngenerated_ids = [\n    output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)\n]\n\nresponse = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]\n\nprint(response)\n\nprint(f\"Generated Tokens: {num_tokens_generated}\")\nprint(f\"Tokens per second: {tokens_per_second}\")\nprint(f\"Total Generation Time: {elapsed_time2} seconds\")\n\n<\/pre><\/div>\n\n\n\n<p>\u8fd0\u884c\u7ed3\u679c<\/p>\n\n\n\n<div class=\"wp-block-urvanov-syntax-highlighter-code-block\"><pre class=\"lang:python decode:true \"> time python test05-14B-3.py\nNUM_GPUS: 8\nLoading checkpoint shards: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 8\/8 [10:01&lt;00:00, 75.17s\/it]\nSpecial tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\nLoad Model Time: 604.6261560916901 seconds\n\u5f88\u62b1\u6b49\uff0c\u7531\u4e8e\u6587\u672c\u683c\u5f0f\u9650\u5236\uff0c\u6211\u65e0\u6cd5\u4e00\u6b21\u6027\u63d0\u4f9b\u4e00\u7bc7\u4e00\u4e07\u5b57\u7684\u5b8c\u6574\u77ed\u7bc7\u79d1\u5e7b\u5c0f\u8bf4\u3002\u4f46\u6211\u53ef\u4ee5\u4e3a\u4f60\u63d0\u4f9b\u4e00\u4e2a\u5927\u6982\u7684\u6545\u4e8b\u6846\u67b6\u548c\u5f00\u5934\u90e8\u5206\uff0c\u4f60\u53ef\u4ee5\u6839\u636e\u8fd9\u4e2a\u6846\u67b6\u81ea\u884c\u6269\u5c55\u6216\u8bf7\u4e13\u4e1a\u4f5c\u5bb6\u8fdb\u884c\u521b\u4f5c\u3002\n\n\u6807\u9898\uff1a\u300a\u661f\u9645\u7f16\u5e74\u53f2\uff1a\u65f6\u95f4\u88c2\u75d5\u300b\n\n---\n\n\u5728\u9065\u8fdc\u7684\u672a\u6765\uff0c\u5730\u7403\u8054\u76df\u7684\u79d1\u5b66\u5bb6\u4eec\u53d1\u73b0\u4e86\u4e00\u79cd\u540d\u4e3a\"\u65f6\u7a7a\u77e9\u9635\"\u7684\u65b0\u6280\u672f\uff0c\u5b83\u80fd\u6253\u5f00\u901a\u5f80\u5b87\u5b99\u6df1\u5904\u7684\u65f6\u95f4\u901a\u9053\u3002\u4e3b\u89d2\uff0c\u5e74\u8f7b\u7684\u7269\u7406\u5b66\u5bb6\u827e\u767b\u00b7\u54c8\u7279\uff0c\u662f\u8fd9\u9879\u7814\u7a76\u7684\u5173\u952e\u6210\u5458\uff0c\u4ed6\u6e34\u671b\u63a2\u7d22\u672a\u77e5\uff0c\u7834\u89e3\u5b87\u5b99\u7684\u79d8\u5bc6\u3002\n\n---\n\n\u3010\u7b2c\u4e00\u7ae0\uff1a\u542f\u7a0b\u3011\n\n\u827e\u767b\u7ad9\u5728\"\u65f6\u5149\u53f7\"\u98de\u8239\u7684\u6307\u6325\u53f0\u4e0a\uff0c\u5fc3\u8df3\u52a0\u901f\u3002\u4ed6\u542f\u52a8\u4e86\u65f6\u7a7a\u77e9\u9635\uff0c\u4e00\u9053\u7480\u74a8\u7684\u5149\u675f\u5212\u7834\u591c\u7a7a\uff0c\u4ed6\u4eec\u88ab\u5438\u5165\u4e86\u65f6\u95f4\u4e0e\u7a7a\u95f4\u7684\u6f29\u6da1\u3002\u7a7f\u8d8a\u4e86\u51e0\u5343\u5e74\uff0c\u4ed6\u4eec\u964d\u843d\u5728\u4e00\u9897\u964c\u751f\u7684\u661f\u7403\u2014\u2014Zephyria\uff0c\u90a3\u91cc\u7684\u6587\u660e\u6b63\u5904\u5728\u79d1\u6280\u7684\u9ece\u660e\u671f\u3002\n\n---\n\n\u3010\u7b2c\u4e8c\u7ae0\uff1a\u5f02\u4e16\u754c\u5386\u9669\u3011\n\n\u827e\u767b\u4f2a\u88c5\u6210Zephyrians\u7684\u5b66\u8005\uff0c\u5f00\u59cb\u8c03\u67e5\u4ed6\u4eec\u7684\u5386\u53f2\u3002\u7136\u800c\uff0c\u4ed6\u610f\u5916\u5730\u53d1\u73b0\u4e86\u4e00\u4e2a\u5173\u4e8e\u65f6\u95f4\u88c2\u75d5\u7684\u53e4\u8001\u4f20\u8bf4\uff0c\u8fd9\u53ef\u80fd\u4e0e\u4ed6\u4eec\u6587\u660e\u7684\u8870\u843d\u6709\u5173\u3002\u4ed6\u51b3\u5b9a\u5bfb\u627e\u5e76\u4fee\u590d\u8fd9\u4e2a\u88c2\u75d5\uff0c\u4ee5\u9632\u6b62\u707e\u96be\u7684\u53d1\u751f\u3002\n\n---\n\n\u3010\u7b2c\u4e09\u7ae0\uff1a\u5371\u673a\u56db\u4f0f\u3011\n\n\u5728\u5bfb\u627e\u7ebf\u7d22\u7684\u8fc7\u7a0b\u4e2d\uff0c\u827e\u767b\u906d\u9047\u4e86\u5404\u79cd\u5371\u9669\uff0c\u5305\u62ec\u654c\u5bf9\u52bf\u529b\u7684\u8ffd\u6355\u548c\u795e\u79d8\u529b\u91cf\u7684\u963b\u6320\u3002\u4ed6\u7684\u56e2\u961f\u6210\u5458\u9010\u4e00\u5012\u4e0b\uff0c\u4ed6\u72ec\u81ea\u9762\u5bf9\u6311\u6218\uff0c\u5185\u5fc3\u5145\u6ee1\u4e86\u51b3\u5fc3\u3002\n\n---\n\n\u3010\u7b2c\u56db\u7ae0\uff1a\u65f6\u95f4\u7684\u6289\u62e9\u3011\n\n\u827e\u767b\u5728\u65f6\u95f4\u88c2\u75d5\u524d\u505a\u51fa\u4e86\u8270\u96be\u7684\u9009\u62e9\uff1a\u727a\u7272\u81ea\u5df1\u5173\u95ed\u88c2\u75d5\uff0c\u4fdd\u62a4\u6574\u4e2a\u5b87\u5b99\u7684\u65f6\u95f4\u7ebf\uff0c\u8fd8\u662f\u575a\u6301\u56de\u5230\u5730\u7403\uff0c\u5c06\u8fd9\u4e2a\u79d8\u5bc6\u5e26\u56de\u53bb\u6539\u53d8\u4eba\u7c7b\u7684\u547d\u8fd0\uff1f\n\n---\n\n\u3010\u5c3e\u58f0\uff1a\u5f52\u9014\u4e0e\u542f\u793a\u3011\n\n\u827e\u767b\u9009\u62e9\u5173\u95ed\u88c2\u75d5\uff0c\u4f46\u4ed6\u5e76\u672a\u6d88\u5931\uff0c\u800c\u662f\u5316\u4e3a\u4e00\u9053\u5149\uff0c\u7a7f\u8d8a\u65f6\u7a7a\u56de\u5230\u4e86\"\u65f6\u5149\u53f7\"\u3002\u4ed6\u5e26\u56de\u7684\u4fe1\u606f\u6539\u53d8\u4e86\u5730\u7403\u8054\u76df\u5bf9\u65f6\u95f4\u65c5\u884c\u7684\u7406\u89e3\uff0c\u4eba\u7c7b\u5f00\u59cb\u8c28\u614e\u5730\u5bf9\u5f85\u65f6\u95f4\uff0c\u907f\u514d\u6f5c\u5728\u7684\u707e\u96be\u3002\n\n---\n\n\u8fd9\u4e2a\u6545\u4e8b\u53ea\u662f\u4e00\u4e2a\u5927\u7eb2\uff0c\u4f60\u53ef\u4ee5\u6839\u636e\u9700\u8981\u6dfb\u52a0\u66f4\u591a\u7684\u7ec6\u8282\u3001\u89d2\u8272\u53d1\u5c55\u548c\u60c5\u8282\u8f6c\u6298\u3002\u5e0c\u671b\u5bf9\u4f60\u6709\u6240\u5e2e\u52a9\uff01\nGenerated Tokens: 437\nTokens per second: 3.273142501998678\nTotal Generation Time: 133.51083850860596 seconds\n\nreal    12m20.918s\nuser    2m58.178s\nsys     1m54.848s<\/pre><\/div>\n\n\n\n<h3 class=\"wp-block-heading\"><strong>8.2. 14B \u5f3a\u5236\u4f7f\u75281\u5f20\u5361\u88c5\u8f7d\uff0c4\u4f4d\u91cf\u5316<\/strong><\/h3>\n\n\n\n<p>\u4f7f\u7528 1 \u5f2024G \u88c5\u8f7d\uff0c\u9700\u8981\u4f7f\u7528 BitsAndBytesConfig \u8fdb\u884c4\u4f4d\u91cf\u5316\uff0c\u5927\u7ea6\u9700\u898110GB\u7684GPU\u5185\u5b58<\/p>\n\n\n\n<div class=\"wp-block-urvanov-syntax-highlighter-code-block\"><pre class=\"lang:python decode:true \">from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig\nimport torch\nimport time\n\nMODEL_NAME = \"Qwen\/Qwen1.5-14B-Chat\"\n\n# \u68c0\u6d4b\u53ef\u7528\u7684GPU\u6570\u91cf\nNUM_GPUS = torch.cuda.device_count()\nprint(f\"NUM_GPUS: {NUM_GPUS}\")\n\n# \u5982\u679c\u6709\u53ef\u7528\u7684GPU\uff0c\u5219\u4f7f\u7528\u7b2c\u4e00\u4e2aGPU\uff1b\u5426\u5219\u4f7f\u7528CPU\ndevice = torch.device(\"cuda\") if NUM_GPUS &gt; 0 else torch.device(\"cpu\")\n\n# \u6839\u636e\u662f\u5426\u4f7f\u7528GPU\u8bbe\u7f6e\u6570\u636e\u7c7b\u578b\uff08\u534a\u7cbe\u5ea6\u6216\u5168\u7cbe\u5ea6\uff09\ndevice_dtype = torch.float16 if NUM_GPUS &gt; 0 else torch.float\n\nquantization_config = BitsAndBytesConfig(load_in_4bit=True)\n# \u83b7\u53d6\u8d77\u59cb\u65f6\u95f4\u6233\nstart_time = time.time()\n\nmodel = AutoModelForCausalLM.from_pretrained(MODEL_NAME,\n                                             trust_remote_code=True,\n                                             torch_dtype=device_dtype,\n                                             attn_implementation=\"flash_attention_2\",\n                                             quantization_config=quantization_config,\n                                             device_map=\"cuda:0\")\n\n# \u52a0\u8f7d\u5206\u8bcd\u5668\u548c\u6a21\u578b\uff0c\u6307\u5b9a\u8bbe\u5907\u6620\u5c04\u548c\u6570\u636e\u7c7b\u578b\ntokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)\n\nmodel = model.eval()\nend_time = time.time()\nelapsed_time = end_time - start_time\nprint(f\"Load Model Time: {elapsed_time} seconds\")\n\nstart_time2 = time.time()\n\nprompt = \"\u5199\u4e00\u4e2a1\u4e07\u5b57\u7684\u6709\u5934\u6709\u5c3e\u7684\u77ed\u7bc7\u79d1\u5e7b\u5c0f\u8bf4.\"\nmessages = [\n    {\"role\": \"system\", \"content\": \"You are a helpful assistant.\"},\n    {\"role\": \"user\", \"content\": prompt}\n]\ntext = tokenizer.apply_chat_template(\n    messages,\n    tokenize=False,\n    add_generation_prompt=True\n)\nmodel_inputs = tokenizer([text], return_tensors=\"pt\").to(device)\n\ngenerated_ids = model.generate(\n    model_inputs.input_ids,\n    max_new_tokens=10000\n)\n\nend_time2 = time.time()\nelapsed_time2 = end_time2 - start_time2\n\nelapsed_time = end_time2 - start_time\nnum_tokens_generated = len(generated_ids[0]) - len(model_inputs.input_ids[0])\n\ntokens_per_second = num_tokens_generated \/ elapsed_time2\n\ngenerated_ids = [\n    output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)\n]\n\nresponse = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]\n\nprint(response)\n\nprint(f\"Generated Tokens: {num_tokens_generated}\")\nprint(f\"Tokens per second: {tokens_per_second}\")\nprint(f\"Total Generation Time: {elapsed_time2} seconds\")\n\n<\/pre><\/div>\n\n\n\n<p>\u8fd0\u884c\u7ed3\u679c<\/p>\n\n\n\n<div class=\"wp-block-urvanov-syntax-highlighter-code-block\"><pre class=\"lang:python decode:true \"> time python test05-14B-4.py\nNUM_GPUS: 8\nLoading checkpoint shards: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 8\/8 [07:55&lt;00:00, 59.49s\/it]\nSpecial tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\nLoad Model Time: 479.92521357536316 seconds\n\/home\/tony\/anaconda3\/envs\/Jamba\/lib\/python3.11\/site-packages\/bitsandbytes\/nn\/modules.py:391: UserWarning: Input type into Linear4bit is torch.float16, but bnb_4bit_compute_dtype=torch.float32 (default). This will lead to slow inference or training speed.\n  warnings.warn('Input type into Linear4bit is torch.float16, but bnb_4bit_compute_dtype=torch.float32 (default). This will lead to slow inference or training speed.')\n\u6807\u9898\uff1a\u661f\u9645\u7a7f\u8d8a\u7684\u9057\u4ea7\n\n\u5728\u9065\u8fdc\u7684\u672a\u6765\uff0c\u5730\u7403\u5df2\u4e0d\u518d\u662f\u4eba\u7c7b\u7684\u552f\u4e00\u5bb6\u56ed\u3002\u6211\u4eec\u5f00\u62d3\u4e86\u65b0\u7684\u661f\u7cfb\uff0c\u5efa\u7acb\u4e86\u661f\u9645\u8054\u90a6\uff0c\u79d1\u6280\u53d1\u5c55\u5230\u4e86\u524d\u6240\u672a\u6709\u7684\u9ad8\u5ea6\u3002\u7136\u800c\uff0c\u5728\u8fd9\u4e2a\u6545\u4e8b\u4e2d\uff0c\u6211\u4eec\u5c06\u805a\u7126\u5728\u4e00\u4e2a\u540d\u53eb\"\u65b0\u5e0c\u671b\u661f\"\u7684\u661f\u7403\u4e0a\uff0c\u90a3\u91cc\u7684\u5c45\u6c11\u4eec\u6b63\u5728\u5bfb\u627e\u4e00\u9879\u795e\u79d8\u9057\u4ea7\uff0c\u5b83\u53ef\u80fd\u6539\u53d8\u4ed6\u4eec\u6574\u4e2a\u6587\u660e\u7684\u547d\u8fd0\u3002\n\n\u7b2c\u4e00\u7ae0\uff1a\u65b0\u5e0c\u671b\u661f\n\n\u65b0\u5e0c\u671b\u661f\uff0c\u4e00\u9897\u4f4d\u4e8e\u94f6\u6cb3\u8fb9\u7f18\u7684\u5b9c\u5c45\u661f\u7403\uff0c\u5176\u8868\u9762\u8986\u76d6\u7740\u7e41\u8302\u7684\u68ee\u6797\u548c\u795e\u79d8\u7684\u9057\u8ff9\u3002\u8fd9\u91cc\u7684\u5c45\u6c11\uff0c\u8d5b\u5c14\u4eba\uff0c\u662f\u4e00\u4e2a\u62e5\u6709\u5148\u8fdb\u751f\u7269\u79d1\u6280\u7684\u79cd\u65cf\uff0c\u4ed6\u4eec\u7684\u57ce\u5e02\u9690\u85cf\u5728\u5730\u4e0b\uff0c\u4ee5\u907f\u514d\u592a\u9633\u8f90\u5c04\u7684\u4fb5\u6270\u3002\n\n\u7b2c\u4e8c\u7ae0\uff1a\u9057\u4ea7\u7684\u4f20\u8bf4\n\n\u53e4\u8001\u7684\u4f20\u8bf4\u4e2d\uff0c\u8d5b\u5c14\u4eba\u7684\u7956\u5148\u5728\u661f\u9645\u65c5\u884c\u4e2d\u53d1\u73b0\u4e86\u4e00\u9879\u795e\u79d8\u7684\u9057\u4ea7\uff0c\u88ab\u79f0\u4e3a\u201c\u5149\u8c31\u4e4b\u5fc3\u201d\u3002\u8fd9\u9897\u5fc3\u5f62\u88c5\u7f6e\u636e\u8bf4\u62e5\u6709\u65e0\u5c3d\u7684\u77e5\u8bc6\u548c\u529b\u91cf\uff0c\u80fd\u5f15\u9886\u4eba\u7c7b\u8d70\u5411\u66f4\u9ad8\u7684\u6587\u660e\u9636\u6bb5\u3002\u7136\u800c\uff0c\u6570\u767e\u5e74\u6765\uff0c\u6ca1\u6709\u4efb\u4f55\u4eba\u80fd\u627e\u5230\u5b83\u7684\u8e2a\u8ff9\u3002\n\n\u7b2c\u4e09\u7ae0\uff1a\u5931\u843d\u7684\u7ebf\u7d22\n\n\u4e00\u4f4d\u540d\u53eb\u827e\u4e3d\u4e9a\u7684\u5e74\u8f7b\u8d5b\u5c14\u79d1\u5b66\u5bb6\uff0c\u5bf9\u8fd9\u4e2a\u4f20\u8bf4\u5145\u6ee1\u4e86\u597d\u5947\u3002\u5979\u51b3\u5fc3\u627e\u5230\u201c\u5149\u8c31\u4e4b\u5fc3\u201d\uff0c\u5e0c\u671b\u80fd\u4e3a\u5979\u7684\u661f\u7403\u5e26\u6765\u548c\u5e73\u4e0e\u7e41\u8363\u3002\u5979\u5f00\u59cb\u7814\u7a76\u53e4\u4ee3\u6587\u732e\uff0c\u5bfb\u627e\u53ef\u80fd\u7684\u7ebf\u7d22\u3002\n\n\u7b2c\u56db\u7ae0\uff1a\u5192\u9669\u542f\u7a0b\n\n\u827e\u4e3d\u4e9a\u7ec4\u5efa\u4e86\u4e00\u4e2a\u63a2\u9669\u961f\uff0c\u5305\u62ec\u52c7\u6562\u7684\u98de\u884c\u5458\u745e\u6069\u3001\u667a\u6167\u7684\u5de5\u7a0b\u5e08\u8389\u5a1c\u548c\u4e00\u540d\u795e\u79d8\u7684\u5916\u661f\u76df\u53cb\u5361\u7279\u3002\u4ed6\u4eec\u8e0f\u4e0a\u4e86\u4e00\u573a\u5bfb\u627e\u201c\u5149\u8c31\u4e4b\u5fc3\u201d\u7684\u65c5\u7a0b\uff0c\u7a7f\u8d8a\u672a\u77e5\u7684\u661f\u7cfb\uff0c\u906d\u9047\u4e86\u5404\u79cd\u5371\u9669\u3002\n\n\u7b2c\u4e94\u7ae0\u81f3\u7b2c\u4e5d\u7ae0\uff1a\u661f\u9645\u6311\u6218\n\n\u4ed6\u4eec\u5728\u65c5\u9014\u4e2d\u906d\u9047\u4e86\u5916\u661f\u751f\u7269\u7684\u88ad\u51fb\uff0c\u7ecf\u5386\u4e86\u9ed1\u6d1e\u8fb9\u7f18\u7684\u9669\u5883\uff0c\u751a\u81f3\u5728\u4e00\u6b21\u610f\u5916\u4e2d\u53d1\u73b0\u4e86\u5173\u4e8e\u201c\u5149\u8c31\u4e4b\u5fc3\u201d\u7684\u66f4\u6df1\u5c42\u4fe1\u606f\u3002\u6bcf\u4e00\u6b21\u6311\u6218\u90fd\u8ba9\u4ed6\u4eec\u7684\u53cb\u60c5\u66f4\u52a0\u6df1\u539a\uff0c\u4e5f\u4f7f\u4ed6\u4eec\u66f4\u63a5\u8fd1\u76ee\u6807\u3002\n\n\u7b2c\u5341\u7ae0\uff1a\u8c1c\u56e2\u63ed\u6653\n\n\u7ecf\u8fc7\u4e00\u7cfb\u5217\u8270\u96be\u7684\u63a2\u7d22\uff0c\u4ed6\u4eec\u7ec8\u4e8e\u5728\u4e00\u4e2a\u53e4\u8001\u7684\u9057\u8ff9\u4e2d\u627e\u5230\u4e86\u201c\u5149\u8c31\u4e4b\u5fc3\u201d\u3002\u539f\u6765\uff0c\u8fd9\u5e76\u975e\u4e00\u4ef6\u6b66\u5668\u6216\u529b\u91cf\u6e90\u6cc9\uff0c\u800c\u662f\u4e00\u5957\u6559\u5bfc\u667a\u6167\u548c\u9053\u5fb7\u4f26\u7406\u7684\u8bfe\u7a0b\u3002\u827e\u4e3d\u4e9a\u610f\u8bc6\u5230\uff0c\u771f\u6b63\u7684\u9057\u4ea7\u5e76\u975e\u7269\u8d28\uff0c\u800c\u662f\u7406\u89e3\u548c\u5c0a\u91cd\u5b87\u5b99\u4e07\u7269\u7684\u77e5\u8bc6\u3002\n\n\u7b2c\u5341\u4e00\u7ae0\uff1a\u65b0\u5e0c\u671b\n\n\u4ed6\u4eec\u5e26\u7740\u8fd9\u4e2a\u65b0\u53d1\u73b0\u56de\u5230\u65b0\u5e0c\u671b\u661f\uff0c\u5206\u4eab\u4e86\u4ed6\u4eec\u7684\u53d1\u73b0\u3002\u4eba\u4eec\u5f00\u59cb\u7406\u89e3\u5230\uff0c\u771f\u6b63\u7684\u8fdb\u6b65\u5e76\u975e\u6765\u81ea\u4e8e\u529b\u91cf\uff0c\u800c\u662f\u6765\u81ea\u5185\u5fc3\u7684\u548c\u8c10\u4e0e\u7231\u3002\u8d5b\u5c14\u4eba\u7684\u793e\u4f1a\u5f00\u59cb\u53d1\u751f\u6df1\u523b\u7684\u53d8\u5316\uff0c\u4ed6\u4eec\u5b66\u4f1a\u4e86\u5171\u5904\uff0c\u5c0a\u91cd\u5dee\u5f02\uff0c\u548c\u5e73\u5171\u751f\u3002\n\n\u7b2c\u5341\u4e8c\u7ae0\uff1a\u5c3e\u58f0\n\n\u827e\u4e3d\u4e9a\u6210\u4e3a\u4e86\u4e00\u4e2a\u4f20\u5947\uff0c\u5979\u7684\u6545\u4e8b\u6fc0\u52b1\u4e86\u65b0\u4e00\u4ee3\u7684\u8d5b\u5c14\u4eba\u3002\u4ed6\u4eec\u4e0d\u518d\u8ffd\u6c42\u7269\u8d28\u7684\u529b\u91cf\uff0c\u800c\u662f\u8ffd\u6c42\u5fc3\u7075\u7684\u6210\u957f\u548c\u5b87\u5b99\u7684\u7406\u89e3\u3002\u65b0\u5e0c\u671b\u661f\u5728\u201c\u5149\u8c31\u4e4b\u5fc3\u201d\u7684\u5f71\u54cd\u4e0b\uff0c\u6210\u4e3a\u4e86\u94f6\u6cb3\u7cfb\u7684\u4e00\u9897\u7480\u74a8\u660e\u73e0\uff0c\u8c61\u5f81\u7740\u548c\u5e73\u4e0e\u667a\u6167\u7684\u5149\u8292\u3002\n\n\u6545\u4e8b\u7ed3\u675f\uff0c\u4f46\u8d5b\u5c14\u4eba\u7684\u65c5\u7a0b\u5e76\u672a\u505c\u6b62\uff0c\u4ed6\u4eec\u7ee7\u7eed\u63a2\u7d22\u5b87\u5b99\uff0c\u5bfb\u627e\u66f4\u591a\u7684\u77e5\u8bc6\u548c\u667a\u6167\uff0c\u4ed6\u4eec\u7684\u6545\u4e8b\u5728\u661f\u9645\u95f4\u6d41\u4f20\uff0c\u6210\u4e3a\u4e86\u4e00\u4e2a\u6c38\u6052\u7684\u4f20\u8bf4\u3002\nGenerated Tokens: 596\nTokens per second: 13.52436708752385\nTotal Generation Time: 44.06860566139221 seconds\n\nreal    8m46.833s\nuser    1m34.053s\nsys     1m49.504s<\/pre><\/div>\n\n\n\n<p>\u524d\u9762\u6709\u8b66\u544a\uff0c\u6211\u4eec\u53bb\u9664\u8b66\u544a\uff0c\u4fee\u6539\u5982\u4e0b\uff1a<\/p>\n\n\n\n<div class=\"wp-block-urvanov-syntax-highlighter-code-block\"><pre class=\"lang:python decode:true \">from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig\nimport torch\nimport time\n\nMODEL_NAME = \"Qwen\/Qwen1.5-14B-Chat\"\n\n# \u68c0\u6d4b\u53ef\u7528\u7684GPU\u6570\u91cf\nNUM_GPUS = torch.cuda.device_count()\nprint(f\"NUM_GPUS: {NUM_GPUS}\")\n\n# \u5982\u679c\u6709\u53ef\u7528\u7684GPU\uff0c\u5219\u4f7f\u7528\u7b2c\u4e00\u4e2aGPU\uff1b\u5426\u5219\u4f7f\u7528CPU\ndevice = torch.device(\"cuda\") if NUM_GPUS &gt; 0 else torch.device(\"cpu\")\n\n# \u6839\u636e\u662f\u5426\u4f7f\u7528GPU\u8bbe\u7f6e\u6570\u636e\u7c7b\u578b\uff08\u534a\u7cbe\u5ea6\u6216\u5168\u7cbe\u5ea6\uff09\ndevice_dtype = torch.float32 if NUM_GPUS &gt; 0 else torch.float\n\nquantization_config = BitsAndBytesConfig(load_in_4bit=True)\n# \u83b7\u53d6\u8d77\u59cb\u65f6\u95f4\u6233\nstart_time = time.time()\n\nmodel = AutoModelForCausalLM.from_pretrained(MODEL_NAME,\n                                             trust_remote_code=True,\n                                             torch_dtype=device_dtype,\n                                             quantization_config=quantization_config,\n                                             device_map=\"cuda:0\")\n\n# \u52a0\u8f7d\u5206\u8bcd\u5668\u548c\u6a21\u578b\uff0c\u6307\u5b9a\u8bbe\u5907\u6620\u5c04\u548c\u6570\u636e\u7c7b\u578b\ntokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)\n\nmodel = model.eval()\nend_time = time.time()\nelapsed_time = end_time - start_time\nprint(f\"Load Model Time: {elapsed_time} seconds\")\n\nstart_time2 = time.time()\n\nprompt = \"\u5199\u4e00\u4e2a1\u4e07\u5b57\u7684\u6709\u5934\u6709\u5c3e\u7684\u77ed\u7bc7\u79d1\u5e7b\u5c0f\u8bf4.\"\nmessages = [\n    {\"role\": \"system\", \"content\": \"You are a helpful assistant.\"},\n    {\"role\": \"user\", \"content\": prompt}\n]\ntext = tokenizer.apply_chat_template(\n    messages,\n    tokenize=False,\n    add_generation_prompt=True\n)\nmodel_inputs = tokenizer([text], return_tensors=\"pt\").to(device)\n\ngenerated_ids = model.generate(\n    model_inputs.input_ids,\n    max_new_tokens=10000\n)\n\nend_time2 = time.time()\nelapsed_time2 = end_time2 - start_time2\n\nelapsed_time = end_time2 - start_time\nnum_tokens_generated = len(generated_ids[0]) - len(model_inputs.input_ids[0])\n\ntokens_per_second = num_tokens_generated \/ elapsed_time2\n\ngenerated_ids = [\n    output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)\n]\n\nresponse = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]\n\nprint(response)\n\nprint(f\"Generated Tokens: {num_tokens_generated}\")\nprint(f\"Tokens per second: {tokens_per_second}\")\nprint(f\"Total Generation Time: {elapsed_time2} seconds\")\n\n<\/pre><\/div>\n\n\n\n<p>\u8fd0\u884c\u7ed3\u679c\uff1a<\/p>\n\n\n\n<div class=\"wp-block-urvanov-syntax-highlighter-code-block\"><pre class=\"lang:python decode:true \">time python test05-14B-4.py\nNUM_GPUS: 8\nLoading checkpoint shards: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 8\/8 [08:18&lt;00:00, 62.30s\/it]\nSpecial tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\nLoad Model Time: 501.56107807159424 seconds\n\u6807\u9898\uff1a\u661f\u9645\u4e4b\u8c1c\uff1a\u5149\u5e74\u4e4b\u5916\u7684\u9057\u4ea7\n\n\u5728\u9065\u8fdc\u7684\u672a\u6765\uff0c\u5730\u7403\u5df2\u7ecf\u6210\u4e3a\u4e86\u4eba\u7c7b\u6587\u660e\u7684\u6447\u7bee\u3002\u79d1\u6280\u7684\u53d1\u5c55\u4f7f\u5f97\u4eba\u7c7b\u63a2\u7d22\u5b87\u5b99\u7684\u6b65\u4f10\u65e5\u76ca\u52a0\u5feb\uff0c\u661f\u9645\u65c5\u884c\u53d8\u5f97\u5bfb\u5e38\u3002\u6211\u4eec\u7684\u4e3b\u4eba\u516c\uff0c\u5e74\u8f7b\u7684\u5929\u6587\u5b66\u5bb6\u827e\u7c73\u4e3d\u00b7\u54c8\u7279\uff0c\u662f\u4e00\u4e2a\u5bf9\u672a\u77e5\u5145\u6ee1\u597d\u5947\u7684\u63a2\u9669\u8005\uff0c\u5979\u88ab\u9009\u4e2d \u52a0\u5165\u4e86\u4e00\u9879\u540d\u4e3a\u201c\u5149\u5e74\u8ba1\u5212\u201d\u7684\u592a\u7a7a\u63a2\u7d22\u4efb\u52a1\u3002\n\n\u6545\u4e8b\u5f00\u59cb\uff1a\n\n\u7b2c\u4e00\u7ae0\uff1a\u542f\u7a0b\u7684\u53f7\u89d2\n\n\u827e\u7c73\u4e3d\u7ad9\u5728\u5de8\u5927\u7684\u661f\u9645\u98de\u8239\u201c\u5e0c\u671b\u53f7\u201d\u4e0a\uff0c\u671b\u7740\u90a3\u9897\u84dd\u8272\u661f\u7403\u6e10\u884c\u6e10\u8fdc\u3002\u5979\u7684\u4efb\u52a1\u662f\u524d\u5f80\u6700\u8fd1\u53d1\u73b0\u7684\u4e00\u9897\u540d\u4e3a\u201c\u4f0a\u7538\u661f\u201d\u7684\u795e\u79d8\u661f\u7403\uff0c\u5bfb\u627e\u53ef\u80fd\u5b58\u5728\u7684\u5916\u661f\u6587\u660e\u9057\u8ff9\u3002\u8239\u5458\u4eec\u7d27\u5f20\u800c\u5174\u594b\uff0c\u6bcf\u4e2a\u4eba\u90fd\u660e\u767d\u8fd9\u662f\u4e00\u6b21\u6539\u53d8\u5386\u53f2\u7684\u5192\u9669\u3002\n\n\u7b2c\u4e8c\u7ae0\uff1a\u672a\u77e5\u7684\u4f0a\u7538\u661f\n\n\u7ecf\u8fc7\u6570\u6708\u7684\u65c5\u884c\uff0c\u5e0c\u671b\u53f7\u62b5\u8fbe\u4e86\u4f0a\u7538\u661f\u3002\u827e\u7c73\u4e3d\u5e26\u9886\u56e2\u961f\u7740\u9646\uff0c\u4ed6\u4eec\u53d1\u73b0\u4e86\u4e00\u4e2a\u88ab\u53e4\u8001\u80fd\u91cf\u62a4\u76fe\u4fdd\u62a4\u7684\u9057\u8ff9\uff0c\u8fd9\u8ba9\u5979\u60f3\u8d77\u4f20\u8bf4\u4e2d\u7684\u201c\u661f\u9645\u9057\u4ea7\u201d\u3002\n\n\u7b2c\u4e09\u7ae0\uff1a\u9057\u4ea7\u7684\u79d8\u5bc6\n\n\u827e\u7c73\u4e3d\u548c\u56e2\u961f\u6df1\u5165\u9057\u8ff9\uff0c\u53d1\u73b0\u4e86\u4e00\u7cfb\u5217\u9ad8\u79d1\u6280\u8bbe\u5907\uff0c\u5176\u4e2d\u4e00\u53f0\u795e\u79d8\u7684\u201c\u65f6\u95f4\u5171\u9e23\u5668\u201d\u5f15\u8d77\u4e86\u4ed6\u4eec\u7684\u6ce8\u610f\u3002\u4ed6\u4eec\u610f\u8bc6\u5230\uff0c\u8fd9\u4e2a\u9057\u4ea7\u53ef\u80fd\u4e0e\u65f6\u95f4\u65c5\u884c\u6709\u5173\u3002\n\n\u7b2c\u56db\u7ae0\uff1a\u65f6\u95f4\u7684\u8bd5\u70bc\n\n\u827e\u7c73\u4e3d\u51b3\u5b9a\u6fc0\u6d3b\u65f6\u95f4\u5171\u9e23\u5668\uff0c\u8bd5\u56fe\u7a7f\u8d8a\u65f6\u95f4\u4ee5\u4e86\u89e3\u66f4\u591a\u5173\u4e8e\u4f0a\u7538\u661f\u6587\u660e\u7684\u4fe1\u606f\u3002\u7136\u800c\uff0c\u6bcf\u4e00\u6b21\u5c1d\u8bd5\u90fd\u5f15\u53d1\u4e86\u4e00\u8fde\u4e32\u7684\u65f6\u7a7a\u6df7\u4e71\uff0c\u4ed6\u4eec\u9677\u5165\u4e86\u65e0\u6cd5\u9884\u77e5\u7684\u5371\u9669\u3002\n\n\u7b2c\u4e94\u7ae0\uff1a\u65f6\u7a7a\u8ff7\u5bab\n\n\u5728\u6df7\u4e71\u4e2d\uff0c\u827e\u7c73\u4e3d\u610f\u5916\u5730\u56de\u5230\u4e86\u8fc7\u53bb\uff0c\u9047\u89c1\u4e86\u5e74\u8f7b\u7684\u81ea\u5df1\u3002\u5979\u4eec\u8054\u624b\u89e3\u5f00\u4e86\u4e00\u9053\u9053\u8c1c\u9898\uff0c\u63ed\u793a\u4e86\u4f0a\u7538\u661f\u6587\u660e\u7684\u771f\u76f8\u2014\u2014\u4ed6\u4eec\u66fe\u8bd5\u56fe\u963b\u6b62\u4e00\u573a\u6bc1\u706d\u6027\u7684\u707e\u96be\uff0c\u4f46\u5931\u8d25\u4e86\u3002\n\n\u7b2c\u516d\u7ae0\uff1a\u4f7f\u547d\u4e0e\u6289\u62e9\n\n\u827e\u7c73\u4e3d\u610f\u8bc6\u5230\uff0c\u53ea\u6709\u963b\u6b62\u8fd9\u573a\u707e\u96be\uff0c\u624d\u80fd\u4fee\u590d\u65f6\u7a7a\uff0c\u56de\u5230\u73b0\u5b9e\u3002\u5979\u51b3\u5b9a\u727a\u7272\u81ea\u5df1\uff0c\u5c06\u5e74\u8f7b\u7684\u81ea\u5df1\u9001\u56de\u8fc7\u53bb\uff0c\u62ef\u6551\u4f0a\u7538\u661f\u3002\n\n\u7b2c\u4e03\u7ae0\uff1a\u5149\u5e74\u7684\u51b3\u65ad\n\n\u827e\u7c73\u4e3d\u5728\u6700\u540e\u5173\u5934\uff0c\u542f\u52a8\u4e86\u65f6\u95f4\u5171\u9e23\u5668\uff0c\u5c06\u81ea\u5df1\u7684\u610f\u8bc6\u4e0e\u8fc7\u53bb\u7684\u81ea\u5df1\u878d\u5408\u3002\u4e00\u9053\u8000\u773c\u7684\u5149\u8292\u95ea\u8fc7\uff0c\u5979\u6d88\u5931\u5728\u4e86\u65f6\u95f4\u7684\u957f\u6cb3\u4e2d\u3002\n\n\u5c3e\u58f0\uff1a\u6c38\u6052\u7684\u9057\u4ea7\n\n\u5f53\u827e\u7c73\u4e3d\u91cd\u65b0\u7741\u5f00\u773c\u775b\uff0c\u53d1\u73b0\u81ea\u5df1\u56de\u5230\u4e86\u5e0c\u671b\u53f7\u4e0a\uff0c\u4f0a\u7538\u661f\u7684\u707e\u96be\u5df2\u88ab\u963b\u6b62\u3002\u5979\u7684\u727a\u7272\u62ef\u6551\u4e86\u4e24\u4e2a\u4e16\u754c\uff0c\u5979\u7684\u540d\u5b57\u88ab\u954c\u523b\u5728\u661f\u9645\u5386\u53f2\u7684\u7bc7\u7ae0\u4e2d\u3002\u867d\u7136\u5979\u5931\u53bb\u4e86\u8089\u8eab\uff0c\u4f46\u5979\u7684\u7cbe\u795e\u548c\u9057\u4ea7\u6c38\u8fdc\u7559\u5728\u4e86\u5149\u5e74\u4e4b\u5916\u7684\u661f\u6d77\u3002\n\n\u827e\u7c73\u4e3d\u7684\u6545\u4e8b\u544a\u8bc9\u6211\u4eec\uff0c\u52c7\u6562\u9762\u5bf9\u672a\u77e5\uff0c\u575a\u5b88\u6b63\u4e49\uff0c\u5373\u4f7f\u4ed8\u51fa\u751f\u547d\u7684\u4ee3\u4ef7\uff0c\u4e5f\u80fd\u521b\u9020\u5947\u8ff9\u3002\u5979\u7528\u751f\u547d\u7f16\u7ec7\u51fa\u7684\u4f20\u5947\uff0c\u5c06\u6fc0\u52b1\u540e\u4e16\u7684\u63a2\u9669\u8005\u7ee7\u7eed\u63a2\u7d22\u5b87\u5b99\u7684\u5965\u79d8\u3002\nGenerated Tokens: 557\nTokens per second: 15.294492646068159\nTotal Generation Time: 36.41833782196045 seconds\n\nreal    9m0.806s\nuser    1m31.212s\nsys     1m57.739s<\/pre><\/div>\n\n\n\n<h3 class=\"wp-block-heading\"><strong>8.3. \u4fdd\u5b58 14B 4\u4f4d\u91cf\u5316<\/strong><\/h3>\n\n\n\n<div class=\"wp-block-urvanov-syntax-highlighter-code-block\"><pre class=\"lang:python decode:true \">from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig\nimport torch\nimport time\n\nMODEL_NAME = \"Qwen\/Qwen1.5-14B-Chat\"\n\n# \u68c0\u6d4b\u53ef\u7528\u7684GPU\u6570\u91cf\nNUM_GPUS = torch.cuda.device_count()\nprint(f\"NUM_GPUS: {NUM_GPUS}\")\n\n# \u5982\u679c\u6709\u53ef\u7528\u7684GPU\uff0c\u5219\u4f7f\u7528\u7b2c\u4e00\u4e2aGPU\uff1b\u5426\u5219\u4f7f\u7528CPU\ndevice = torch.device(\"cuda\") if NUM_GPUS &gt; 0 else torch.device(\"cpu\")\n\n# \u6839\u636e\u662f\u5426\u4f7f\u7528GPU\u8bbe\u7f6e\u6570\u636e\u7c7b\u578b\uff08\u534a\u7cbe\u5ea6\u6216\u5168\u7cbe\u5ea6\uff09\ndevice_dtype = torch.float32 if NUM_GPUS &gt; 0 else torch.float\n\nquantization_config = BitsAndBytesConfig(load_in_4bit=True)\n# \u83b7\u53d6\u8d77\u59cb\u65f6\u95f4\u6233\nstart_time = time.time()\n\nmodel = AutoModelForCausalLM.from_pretrained(MODEL_NAME,\n                                             trust_remote_code=True,\n                                             torch_dtype=device_dtype,\n                                             quantization_config=quantization_config,\n                                             device_map=\"cuda:0\")\n\n# \u52a0\u8f7d\u5206\u8bcd\u5668\u548c\u6a21\u578b\uff0c\u6307\u5b9a\u8bbe\u5907\u6620\u5c04\u548c\u6570\u636e\u7c7b\u578b\ntokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)\n\nmodel = model.eval()\nend_time = time.time()\nelapsed_time = end_time - start_time\nprint(f\"Load Model Time: {elapsed_time} seconds\")\n\nstart_time2 = time.time()\n\nprompt = \"\u5199\u4e00\u4e2a1\u4e07\u5b57\u7684\u6709\u5934\u6709\u5c3e\u7684\u77ed\u7bc7\u79d1\u5e7b\u5c0f\u8bf4.\"\nmessages = [\n    {\"role\": \"system\", \"content\": \"You are a helpful assistant.\"},\n    {\"role\": \"user\", \"content\": prompt}\n]\ntext = tokenizer.apply_chat_template(\n    messages,\n    tokenize=False,\n    add_generation_prompt=True\n)\nmodel_inputs = tokenizer([text], return_tensors=\"pt\").to(device)\n\ngenerated_ids = model.generate(\n    model_inputs.input_ids,\n    max_new_tokens=10000\n)\n\nend_time2 = time.time()\nelapsed_time2 = end_time2 - start_time2\n\nelapsed_time = end_time2 - start_time\nnum_tokens_generated = len(generated_ids[0]) - len(model_inputs.input_ids[0])\n\ntokens_per_second = num_tokens_generated \/ elapsed_time2\n\ngenerated_ids = [\n    output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)\n]\n\nresponse = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]\n\nprint(response)\n\nprint(f\"Generated Tokens: {num_tokens_generated}\")\nprint(f\"Tokens per second: {tokens_per_second}\")\nprint(f\"Total Generation Time: {elapsed_time2} seconds\")\n\nprint(f\"config.save_pretrained\")\nmodel.config.save_pretrained('Qwen\/Qwen1.5-14B-Chat-Int4')\nprint(f\"model.save_pretrained\")\nmodel.save_pretrained('Qwen\/Qwen1.5-14B-Chat-Int4')\nprint(f\"tokenizer.save_pretrained\")\ntokenizer.save_pretrained('Qwen\/Qwen1.5-14B-Chat-Int4')\n<\/pre><\/div>\n\n\n\n<p>\u8fd0\u884c\u7ed3\u679c\uff1a<\/p>\n\n\n\n<div class=\"wp-block-urvanov-syntax-highlighter-code-block\"><pre class=\"lang:python decode:true \"> time python test05-14B-5.py\nNUM_GPUS: 8\nLoading checkpoint shards:  12%|\u2588\u2588\u2588\u258a                          | 1\/8 [01:09&lt;08:07, 69.59s\/it]\nLoading checkpoint shards: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 8\/8 [08:21&lt;00:00, 62.67s\/it]\nSpecial tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\nLoad Model Time: 504.4282364845276 seconds\n\u6807\u9898\uff1a\u661f\u9645\u8ff7\u822a\uff1a\u65b0\u79e9\u5e8f\n\n\u5728\u9065\u8fdc\u7684\u672a\u6765\uff0c\u4eba\u7c7b\u5df2\u7ecf\u8d85\u8d8a\u4e86\u5730\u7403\u7684\u675f\u7f1a\uff0c\u5efa\u7acb\u4e86\u5e9e\u5927\u7684\u661f\u9645\u8054\u90a6\u3002\u6545\u4e8b\u7684\u4e3b\u4eba\u516c\uff0c\u827e\u767b\u00b7\u96f7\u8bfa\u5179\uff0c\u662f \u4e00\u4f4d\u5e74\u8f7b\u7684\u661f\u9645\u98de\u884c\u5458\uff0c\u4ed6\u7684\u4ee3\u53f7\u201c\u5e7d\u7075\u201d\uff0c\u662f\u8054\u90a6\u8230\u961f\u4e2d\u6700\u51fa\u8272\u7684\u4fa6\u5bdf\u5458\u3002\n\n\u7b2c\u4e00\u7ae0\uff1a\u542f\u7a0b\n\n\u5728\u661f\u7cfb\u8fb9\u7f18\u7684\u970d\u5c14\u987f\u57fa\u5730\uff0c\u827e\u767b\u6b63\u51c6\u5907\u6267\u884c\u4e00\u9879\u79d8\u5bc6\u4efb\u52a1\u3002\u4ed6\u7ad9\u5728\u98de\u8239\u201c\u5e7d\u7075\u4e4b\u7ffc\u201d\u524d\uff0c\u51dd\u89c6\u7740\u661f\u7a7a\uff0c\u5fc3\u4e2d\u5145\u6ee1\u4e86\u672a\u77e5\u7684\u7d27\u5f20\u4e0e\u671f\u5f85\u3002\u4ed6\u7684\u4efb\u52a1\u662f\u6f5c\u5165\u795e\u79d8\u7684Zephyr\u661f\u57df\uff0c\u5bfb\u627e\u4f20\u8bf4\u4e2d\u7684\u201c\u65b0\u79e9\u5e8f\u201d\u80fd\u6e90\u77ff\u77f3\u3002\n\n\u7b2c\u4e8c\u7ae0\uff1a\u65b0\u79e9\u5e8f\n\nZephyr\u661f\u57df\uff0c\u4e00\u4e2a\u88ab\u9057\u5fd8\u7684\u533a\u57df\uff0c\u9690\u85cf\u7740\u4e00\u79cd\u540d\u4e3a\u201c\u65b0\u79e9\u5e8f\u201d\u7684\u80fd\u6e90\uff0c\u636e\u8bf4\u80fd\u4e3a\u4eba\u7c7b\u79d1\u6280\u5e26\u6765\u9769\u547d\u6027\u7684\u7a81\u7834\u3002\u827e\u767b\u6df1\u5165\u661f\u57df\uff0c\u906d\u9047\u4e86\u4ece\u672a\u89c1\u8fc7\u7684\u5f02\u5f62\u751f\u7269\u548c\u9669\u6076\u73af\u5883\uff0c\u4f46\u4ed6\u51ed\u501f\u575a\u97e7\u7684\u51b3\u5fc3\u548c\u5353\u8d8a\u7684\u9a7e\u9a76\u6280\u672f\uff0c\u4e00\u4e00\u514b\u670d\u3002\n\n\u7b2c\u4e09\u7ae0\uff1a\u80cc\u53db\n\n\u5728\u4e00\u6b21\u906d\u9047\u6218\u4e2d\uff0c\u827e\u767b\u53d1\u73b0\u4e86\u4e00\u4e2a\u60ca\u4eba\u7684\u4e8b\u5b9e\u2014\u2014\u4ed6\u7684\u6307\u6325\u5b98\uff0c\u6770\u68ee\u00b7\u54c8\u7279\uff0c\u7adf\u662f\u65b0\u79e9\u5e8f\u80fd\u6e90\u7684\u5e55\u540e\u9ed1\u624b\u3002 \u6770\u68ee\u4f01\u56fe\u5229\u7528\u8fd9\u79cd\u80fd\u6e90\u63a7\u5236\u661f\u9645\u8054\u90a6\uff0c\u827e\u767b\u51b3\u5b9a\u72ec\u81ea\u963b\u6b62\u4ed6\u3002\n\n\u7b2c\u56db\u7ae0\uff1a\u5b64\u72ec\u7684\u6218\u6597\n\n\u827e\u767b\u4e0e\u6770\u68ee\u5c55\u5f00\u4e86\u6fc0\u70c8\u7684\u8ffd\u9010\u6218\uff0c\u4ed6\u4e00\u8fb9\u8eb2\u907f\u6770\u68ee\u7684\u8ffd\u6355\uff0c\u4e00\u8fb9\u5411\u8054\u90a6\u53d1\u51fa\u8b66\u544a\u3002\u4ed6\u5728\u661f\u7a7a\u4e2d\u7a7f\u68ad\uff0c\u6bcf\u4e00\u6b21\u9003\u8131\u90fd\u662f\u4e00\u573a\u751f\u6b7b\u8003\u9a8c\u3002\n\n\u7b2c\u4e94\u7ae0\uff1a\u6700\u7ec8\u51b3\u6218\n\n\u5728\u65b0\u79e9\u5e8f\u80fd\u6e90\u7684\u6838\u5fc3\u533a\u57df\uff0c\u827e\u767b\u4e0e\u6770\u68ee\u8fdb\u884c\u4e86\u4e00\u573a\u51b3\u5b9a\u4eba\u7c7b\u547d\u8fd0\u7684\u51b3\u6597\u3002\u827e\u767b\u7528\u667a\u6167\u548c\u52c7\u6c14\u632b\u8d25\u4e86\u6770\u68ee\uff0c\u6210\u529f\u963b\u6b62\u4e86\u4ed6\u5229\u7528\u80fd\u6e90\u7684\u8ba1\u5212\u3002\n\n\u7b2c\u516d\u7ae0\uff1a\u65b0\u751f\n\n\u8054\u90a6\u8230\u961f\u95fb\u8baf\u800c\u6765\uff0c\u4ed6\u4eec\u902e\u6355\u4e86\u6770\u68ee\uff0c\u5e76\u4fee\u590d\u4e86\u88ab\u7834\u574f\u7684\u80fd\u6e90\u8bbe\u65bd\u3002\u827e\u767b\u56e0\u529f\u52cb\u5353\u8457\uff0c\u88ab\u6388\u4e88\u201c\u661f\u9645\u82f1\u96c4\u201d\u52cb\u7ae0\u3002\u4ed6\u56de\u5230\u5730\u7403\uff0c\u6210\u4e3a\u65b0\u4e00\u4ee3\u5e74\u8f7b\u4eba\u7684\u699c\u6837\u3002\n\n\u5c3e\u58f0\uff1a\u672a\u6765\u7684\u5e0c\u671b\n\n\u827e\u767b\u7684\u6545\u4e8b\u6fc0\u52b1\u7740\u4eba\u4eec\uff0c\u4eba\u7c7b\u7ee7\u7eed\u63a2\u7d22\u5b87\u5b99\uff0c\u8ffd\u6c42\u548c\u5e73\u4e0e\u8fdb\u6b65\u3002\u800c\u65b0\u79e9\u5e8f\u80fd\u6e90\uff0c\u867d\u7136\u4e00\u5ea6\u88ab\u9ed1\u6697\u6240\u5229\u7528\uff0c\u4f46\u6700\u7ec8\u8fd8\u662f\u6210\u4e3a\u4e86\u63a8\u52a8\u79d1\u6280\u53d1\u5c55\u7684\u5f3a\u5927\u52a8\u529b\u3002\u827e\u767b\u00b7\u96f7\u8bfa\u5179\u7684\u540d\u5b57\uff0c\u5c06\u6c38\u8fdc\u954c\u523b\u5728\u661f\u9645\u5386\u53f2\u7684\u7bc7\u7ae0\u4e2d\u3002\n\n\u8fd9\u90e8\u4e00\u4e07\u5b57\u7684\u77ed\u7bc7\u79d1\u5e7b\u5c0f\u8bf4\uff0c\u4ee5\u827e\u767b\u00b7\u96f7\u8bfa\u5179\u7684\u5192\u9669\u65c5\u7a0b\u4e3a\u4e3b\u7ebf\uff0c\u5c55\u73b0\u4e86\u4eba\u7c7b\u9762\u5bf9\u672a\u77e5\u6311\u6218\u65f6\u7684\u52c7\u6c14\u4e0e\u667a \u6167\uff0c\u540c\u65f6\u4e5f\u63a2\u8ba8\u4e86\u79d1\u6280\u4e0e\u4eba\u6027\u7684\u590d\u6742\u5173\u7cfb\u3002\nGenerated Tokens: 507\nTokens per second: 17.584081014282045\nTotal Generation Time: 28.832897186279297 seconds\nconfig.save_pretrained\nmodel.save_pretrained\ntokenizer.save_pretrained\n\nreal    10m23.330s\nuser    1m30.706s\nsys     2m1.814s\nls -l Qwen\/Qwen1.5-14B-Chat\ntotal 27681848\n-rwxrwxrwx 1 tony tony       6896 Mar 30 11:49 LICENSE\n-rwxrwxrwx 1 tony tony       4346 Mar 30 11:49 README.md\n-rwxrwxrwx 1 tony tony        663 Mar 30 11:49 config.json\n-rwxrwxrwx 1 tony tony        243 Mar 30 11:49 generation_config.json\n-rwxrwxrwx 1 tony tony       1519 Mar 30 11:49 gitattributes\n-rwxrwxrwx 1 tony tony    1671839 Mar 30 11:49 merges.txt\n-rwxrwxrwx 1 tony tony 3938903200 Mar 30 11:59 model-00001-of-00008.safetensors\n-rwxrwxrwx 1 tony tony 3975760472 Mar 30 11:59 model-00002-of-00008.safetensors\n-rwxrwxrwx 1 tony tony 3940360872 Mar 30 12:00 model-00003-of-00008.safetensors\n-rwxrwxrwx 1 tony tony 3923300576 Mar 30 12:00 model-00004-of-00008.safetensors\n-rwxrwxrwx 1 tony tony 3923300568 Mar 30 12:00 model-00005-of-00008.safetensors\n-rwxrwxrwx 1 tony tony 3975760552 Mar 30 12:00 model-00006-of-00008.safetensors\n-rwxrwxrwx 1 tony tony 3100115576 Mar 30 11:59 model-00007-of-00008.safetensors\n-rwxrwxrwx 1 tony tony 1557135488 Mar 30 11:55 model-00008-of-00008.safetensors\n-rwxrwxrwx 1 tony tony      39584 Mar 30 11:50 model.safetensors.index.json\n-rwxrwxrwx 1 tony tony    7028015 Mar 30 11:50 tokenizer.json\n-rwxrwxrwx 1 tony tony       1402 Mar 30 11:50 tokenizer_config.json\n-rwxrwxrwx 1 tony tony    2776833 Mar 30 11:50 vocab.json\nls -l Qwen\/Qwen1.5-14B-Chat-Int4\/\ntotal 13024520\n-rwxrwxrwx 1 tony tony         80 Mar 30 22:03 added_tokens.json\n-rwxrwxrwx 1 tony tony       1187 Mar 30 22:02 config.json\n-rwxrwxrwx 1 tony tony        243 Mar 30 22:02 generation_config.json\n-rwxrwxrwx 1 tony tony    1671853 Mar 30 22:03 merges.txt\n-rwxrwxrwx 1 tony tony 4986994946 Mar 30 22:02 model-00001-of-00003.safetensors\n-rwxrwxrwx 1 tony tony 4967849904 Mar 30 22:03 model-00002-of-00003.safetensors\n-rwxrwxrwx 1 tony tony 3370646086 Mar 30 22:03 model-00003-of-00003.safetensors\n-rwxrwxrwx 1 tony tony     120934 Mar 30 22:03 model.safetensors.index.json\n-rwxrwxrwx 1 tony tony        367 Mar 30 22:03 special_tokens_map.json\n-rwxrwxrwx 1 tony tony    7028015 Mar 30 22:03 tokenizer.json\n-rwxrwxrwx 1 tony tony       1414 Mar 30 22:03 tokenizer_config.json\n-rwxrwxrwx 1 tony tony    2776833 Mar 30 22:03 vocab.json\n(\n<\/pre><\/div>\n\n\n\n<h3 class=\"wp-block-heading\"><strong>8.4. \u52a0\u8f7d 14B 4\u4f4d\u91cf\u5316<\/strong><\/h3>\n\n\n\n<div class=\"wp-block-urvanov-syntax-highlighter-code-block\"><pre class=\"lang:python decode:true \">from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig\nimport torch\nimport time\n\nMODEL_NAME = \"Qwen\/Qwen1.5-14B-Chat-Int4\"\n\n# \u68c0\u6d4b\u53ef\u7528\u7684GPU\u6570\u91cf\nNUM_GPUS = torch.cuda.device_count()\nprint(f\"NUM_GPUS: {NUM_GPUS}\")\n\n# \u5982\u679c\u6709\u53ef\u7528\u7684GPU\uff0c\u5219\u4f7f\u7528\u7b2c\u4e00\u4e2aGPU\uff1b\u5426\u5219\u4f7f\u7528CPU\ndevice = torch.device(\"cuda\") if NUM_GPUS &gt; 0 else torch.device(\"cpu\")\n\nstart_time = time.time()\n\nmodel = AutoModelForCausalLM.from_pretrained(MODEL_NAME,\n                                             trust_remote_code=True,\n                                             device_map=\"cuda:0\")\n\n# \u52a0\u8f7d\u5206\u8bcd\u5668\u548c\u6a21\u578b\uff0c\u6307\u5b9a\u8bbe\u5907\u6620\u5c04\u548c\u6570\u636e\u7c7b\u578b\ntokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)\n\nmodel = model.eval()\nend_time = time.time()\nelapsed_time = end_time - start_time\nprint(f\"Load Model Time: {elapsed_time} seconds\")\n\nstart_time2 = time.time()\n\nprompt = \"\u5199\u4e00\u4e2a1\u4e07\u5b57\u7684\u6709\u5934\u6709\u5c3e\u7684\u77ed\u7bc7\u79d1\u5e7b\u5c0f\u8bf4.\"\nmessages = [\n    {\"role\": \"system\", \"content\": \"You are a helpful assistant.\"},\n    {\"role\": \"user\", \"content\": prompt}\n]\ntext = tokenizer.apply_chat_template(\n    messages,\n    tokenize=False,\n    add_generation_prompt=True\n)\nmodel_inputs = tokenizer([text], return_tensors=\"pt\").to(device)\n\ngenerated_ids = model.generate(\n    model_inputs.input_ids,\n    max_new_tokens=10000\n)\n\nend_time2 = time.time()\nelapsed_time2 = end_time2 - start_time2\n\nelapsed_time = end_time2 - start_time\nnum_tokens_generated = len(generated_ids[0]) - len(model_inputs.input_ids[0])\n\ntokens_per_second = num_tokens_generated \/ elapsed_time2\n\ngenerated_ids = [\n    output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)\n]\n\nresponse = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]\n\nprint(response)\n\nprint(f\"Generated Tokens: {num_tokens_generated}\")\nprint(f\"Tokens per second: {tokens_per_second}\")\nprint(f\"Total Generation Time: {elapsed_time2} seconds\")\n<\/pre><\/div>\n\n\n\n<p>\u8fd0\u884c\u7ed3\u679c\uff1a<\/p>\n\n\n\n<div class=\"wp-block-urvanov-syntax-highlighter-code-block\"><pre class=\"lang:python decode:true \">time python test05-14B-6.py\nNUM_GPUS: 8\nLoading checkpoint shards: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 3\/3 [06:06&lt;00:00, 122.18s\/it]\nSpecial tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\nLoad Model Time: 369.8666627407074 seconds\n\/home\/tony\/anaconda3\/envs\/Jamba\/lib\/python3.11\/site-packages\/bitsandbytes\/nn\/modules.py:391: UserWarning: Input type into Linear4bit is torch.float16, but bnb_4bit_compute_dtype=torch.float32 (default). This will lead to slow inference or training speed.\n  warnings.warn('Input type into Linear4bit is torch.float16, but bnb_4bit_compute_dtype=torch.float32 (default). This will lead to slow inference or training speed.')\n\u6807\u9898\uff1a\u661f\u9645\u8ff7\u822a\uff1a\u65b0\u79e9\u5e8f\n\n\u5728\u9065\u8fdc\u7684\u672a\u6765\uff0c\u4eba\u7c7b\u5df2\u7ecf\u638c\u63e1\u4e86\u661f\u9645\u65c5\u884c\u7684\u6280\u672f\uff0c\u5efa\u7acb\u4e86\u5e9e\u5927\u7684\u661f\u9645\u8054\u90a6\u3002\u6545\u4e8b\u7684\u4e3b\u4eba\u516c\uff0c\u827e\u745e\u514b\u00b7\u54c8\u7279 \uff0c\u662f\u4e00\u4f4d\u5e74\u8f7b\u7684\u661f\u9645\u63a2\u7d22\u8005\uff0c\u4ed6\u7684\u68a6\u60f3\u662f\u6210\u4e3a\u8054\u90a6\u7684\u65d7\u8230\u2014\u2014\u201c\u65b0\u79e9\u5e8f\u53f7\u201d\u4e0a\u7684\u8230\u957f\u3002\n\n\u7b2c\u4e00\u7ae0\uff1a\u542f\u822a\n\n\u827e\u745e\u514b\u7ad9\u5728\u65b0\u79e9\u5e8f\u53f7\u7684\u8230\u6865\u4e0a\uff0c\u671b\u7740\u90a3\u9897\u84dd\u8272\u7684\u5730\u7403\uff0c\u5fc3\u4e2d\u6ee1\u662f\u5bf9\u5bb6\u4e61\u7684\u7737\u604b\u3002\u7136\u800c\uff0c\u4ed6\u7684\u4f7f\u547d\u662f\u5bfb\u627e\u53ef\u80fd\u5b58\u5728\u7684\u5916\u661f\u751f\u547d\u8ff9\u8c61\uff0c\u8fd9\u662f\u4ed6\u4f5c\u4e3a\u63a2\u7d22\u8005\u7684\u4f7f\u547d\uff0c\u4e5f\u662f\u4ed6\u7684\u8363\u8a89\u3002\u968f\u7740\u6307\u6325\u5b98\u7684\u4e00\u58f0\u4ee4\u4e0b\uff0c\u98de\u8239\u72b9\u5982\u4e00\u9053\u6d41\u661f\u5212\u7834\u591c\u7a7a\uff0c\u79bb\u5f00\u4e86\u5730\u7403\u3002\n\n\u7b2c\u4e8c\u7ae0\uff1a\u672a\u77e5\u7684\u661f\u7cfb\n\n\u7a7f\u8d8a\u4e86\u6570\u5149\u5e74\u7684\u8ddd\u79bb\uff0c\u4ed6\u4eec\u6765\u5230\u4e86\u4e00\u4e2a\u5168\u65b0\u7684\u661f\u7cfb\u3002\u827e\u745e\u514b\u548c\u4ed6\u7684\u56e2\u961f\u53d1\u73b0\u4e86\u4e00\u9897\u540d\u4e3aZephyr\u7684\u884c\u661f\uff0c\u5176\u8868\u9762\u7684\u751f\u6001\u7cfb\u7edf\u4e0e\u5730\u7403\u622a\u7136\u4e0d\u540c\uff0c\u5145\u6ee1\u4e86\u672a\u77e5\u7684\u751f\u7269\u548c\u5947\u5f02\u7684\u690d\u7269\u3002\u4ed6\u4eec\u5f00\u59cb\u4e86\u6df1\u5165\u63a2\u7d22\uff0c\u671f\u5f85\u627e\u5230\u751f\u547d\u7684\u75d5\u8ff9\u3002\n\n\u7b2c\u4e09\u7ae0\uff1a\u610f\u5916\u7684\u53d1\u73b0\n\n\u5728Zephyr\u884c\u661f\u7684\u6df1\u5904\uff0c\u4ed6\u4eec\u53d1\u73b0\u4e86\u4e00\u4e2a\u53e4\u8001\u7684\u9057\u8ff9\uff0c\u4e00\u79cd\u9ad8\u5ea6\u53d1\u8fbe\u7684\u6587\u660e\u66fe\u5728\u8fd9\u91cc\u5b58\u5728\u3002\u7136\u800c\uff0c\u8fd9\u4e2a\u6587\u660e\u4f3c\u4e4e\u5df2\u7ecf\u6d88\u5931\uff0c\u53ea\u7559\u4e0b\u4e86\u4e00\u79cd\u795e\u79d8\u7684\u80fd\u91cf\u6676\u4f53\uff0c\u8fd9\u53ef\u80fd\u662f\u4ed6\u4eec\u751f\u5b58\u7684\u5173\u952e\u3002\n\n\u7b2c\u56db\u7ae0\uff1a\u5371\u673a\u6765\u4e34\n\n\u5728\u7814\u7a76\u80fd\u91cf\u6676\u4f53\u7684\u8fc7\u7a0b\u4e2d\uff0c\u4e00\u8258\u654c\u5bf9\u52bf\u529b\u7684\u98de\u8239\u7a81\u7136\u51fa\u73b0\uff0c\u4ed6\u4eec\u610f\u56fe\u593a\u53d6\u80fd\u91cf\u6676\u4f53\u3002\u4e00\u573a\u6fc0\u70c8\u7684\u6218\u6597\u5728\u661f\u7a7a\u4e2d\u5c55\u5f00\uff0c\u65b0\u79e9\u5e8f\u53f7\u51ed\u501f\u5176\u5148\u8fdb\u7684\u79d1\u6280\u52c9\u5f3a\u62b5\u6321\u4f4f\u4e86\u653b\u51fb\u3002\n\n\u7b2c\u4e94\u7ae0\uff1a\u5fe0\u8bda\u4e0e\u80cc\u53db\n\n\u5728\u6218\u6597\u4e2d\uff0c\u827e\u745e\u514b\u7684\u597d\u53cb\u3001\u526f\u8230\u957f\u4e3d\u838e\u56e0\u4e3a\u88ab\u654c\u65b9\u4fd8\u864f\uff0c\u88ab\u8feb\u52a0\u5165\u4e86\u654c\u4eba\u3002\u5979\u5411\u827e\u745e\u514b\u900f\u9732\u4e86\u4e00\u4e2a\u60ca\u4eba\u7684\u79d8\u5bc6\uff1a\u8fd9\u4e2a\u80fd\u91cf\u6676\u4f53\u53ef\u80fd\u5f15\u53d1\u5b87\u5b99\u7684\u6bc1\u706d\uff0c\u5979\u7684\u76ee\u6807\u53ea\u662f\u963b\u6b62\u5b83\u843d\u5165\u9519\u8bef\u7684\u624b\u4e2d\u3002\n\n\u7b2c\u516d\u7ae0\uff1a\u51b3\u5b9a\u547d\u8fd0\u7684\u9009\u62e9\n\n\u827e\u745e\u514b\u9762\u4e34\u7740\u8270\u96be\u7684\u6289\u62e9\uff1a\u4fdd\u62a4\u4eba\u7c7b\uff0c\u8fd8\u662f\u4fdd\u62a4\u5b87\u5b99\u7684\u5b89\u5168\uff1f\u4ed6\u51b3\u5b9a\u72ec\u81ea\u9762\u5bf9\u5371\u9669\uff0c\u53bb\u6467\u6bc1\u80fd\u91cf\u6676\u4f53\uff0c\u540c\u65f6\u8bbe\u6cd5\u8425\u6551\u4e3d\u838e\u3002\n\n\u7b2c\u4e03\u7ae0\uff1a\u6700\u540e\u7684\u51b3\u6218\n\n\u827e\u745e\u514b\u6210\u529f\u6467\u6bc1\u4e86\u80fd\u91cf\u6676\u4f53\uff0c\u4f46\u4e5f\u727a\u7272\u4e86\u81ea\u5df1\u3002\u4ed6\u7684\u52c7\u6562\u884c\u4e3a\u611f\u52a8\u4e86\u4e3d\u838e\uff0c\u5979\u51b3\u5b9a\u80cc\u53db\u654c\u5bf9\u52bf\u529b\uff0c\u56de\u5230\u65b0\u79e9\u5e8f\u53f7\u3002\u4ed6\u4eec\u5e26\u7740\u4e3d\u838e\u7684\u727a\u7272\u548c\u827e\u745e\u514b\u7684\u9057\u5fd7\uff0c\u8fd4\u56de\u5730\u7403\uff0c\u5c06\u771f\u76f8\u544a\u8bc9\u4e86\u8054\u90a6\u3002\n\n\u5c3e\u58f0\uff1a\u65b0\u7684\u5f00\u59cb\n\n\u867d\u7136\u827e\u745e\u514b\u6ca1\u6709\u56de\u6765\uff0c\u4f46\u4ed6\u7684\u7cbe\u795e\u6fc0\u52b1\u4e86\u65b0\u4e00\u4ee3\u7684\u63a2\u7d22\u8005\u3002\u65b0\u79e9\u5e8f\u53f7\u7ee7\u7eed\u5728\u661f\u6d77\u4e2d\u822a\u884c\uff0c\u5bfb\u627e\u66f4\u591a\u7684\u77e5\u8bc6\u548c\u667a\u6167\u3002\u4e3d\u838e\u6210\u4e3a\u4e86\u65b0\u4e00\u4efb\u8230\u957f\uff0c\u5e26\u9886\u7740\u4eba\u4eec\u8d70\u5411\u672a\u77e5\uff0c\u94ed\u8bb0\u7740\u827e\u745e\u514b\u7684\u52c7\u6c14\u548c\u51b3\u5fc3\u3002\n\n\u5728\u827e\u745e\u514b\u7684\u6545\u4e8b\u4e2d\uff0c\u6211\u4eec\u770b\u5230\u4e86\u4eba\u7c7b\u5bf9\u672a\u77e5\u7684\u6e34\u671b\uff0c\u5bf9\u6b63\u4e49\u7684\u575a\u6301\uff0c\u4ee5\u53ca\u5bf9\u751f\u547d\u7684\u5c0a\u91cd\u3002\u4ed6\u4eec\u7684\u5192\u9669\u65c5\u7a0b\u8fd8\u5728\u7ee7\u7eed\uff0c\u800c\u4ed6\u4eec\u7684\u6545\u4e8b\uff0c\u5c06\u6210\u4e3a\u661f\u9645\u5386\u53f2\u7684\u4e00\u90e8\u5206\u3002\nGenerated Tokens: 575\nTokens per second: 14.387834139574515\nTotal Generation Time: 39.964319467544556 seconds\nconfig.save_pretrained\nmodel.save_pretrained\ntokenizer.save_pretrained\n\nreal    7m57.178s\nuser    0m51.384s\nsys     0m44.998s<\/pre><\/div>\n\n\n\n<h2 class=\"wp-block-heading\"><strong>9. \u6211\u4eec\u518d\u8bd5\u8bd5\u6700\u5927\u4e2a\u768472B<\/strong><\/h2>\n\n\n\n<div class=\"wp-block-urvanov-syntax-highlighter-code-block\"><pre class=\"lang:python decode:true \">from transformers import AutoModelForCausalLM, AutoTokenizer\nimport torch\n\nMODEL_NAME = \"Qwen\/Qwen1.5-72B-Chat\"\n\n# \u5b9a\u4e49\u4e00\u4e2a\u51fd\u6570\u6765\u81ea\u52a8\u914d\u7f6e\u5728\u591aGPU\u73af\u5883\u4e0b\u6a21\u578b\u5404\u90e8\u5206\u7684\u8bbe\u5907\u5206\u5e03\ndef auto_configure_device_map(num_gpus: int):\n    num_trans_layers = 80  # \u5b9a\u4e49Transformer\u6a21\u578b\u7684\u5c42\u6570\n    per_gpu_layers = num_trans_layers \/ num_gpus  # \u8ba1\u7b97\u6bcf\u4e2aGPU\u5e94\u627f\u62c5\u7684\u5c42\u6570\n    # \u521d\u59cb\u5316\u8bbe\u5907\u6620\u5c04\u5b57\u5178\uff0c\u6307\u5b9a\u4e00\u4e9b\u7279\u5b9a\u6a21\u5757\u5e94\u8be5\u653e\u7f6e\u7684GPU\u7f16\u53f7\n    device_map = {\n        'model.embed_tokens': 0,  # \u5d4c\u5165\u5c42\u653e\u5728\u7b2c\u4e00\u4e2aGPU\u4e0a\n        'model.norm': num_gpus-1,  # \u6700\u540e\u4e00\u4e2a\u6b63\u5219\u5316\u5c42\u653e\u5728\u6700\u540e\u4e00\u4e2aGPU\u4e0a\n        'lm_head': 0  # \u8bed\u8a00\u6a21\u578b\u5934\uff08\u7528\u4e8e\u9884\u6d4b\u4e0b\u4e00\u4e2a\u8bcd\u7684\u5c42\uff09\u653e\u5728\u6700\u540e\u4e00\u4e2aGPU\u4e0a\n    }\n    # \u5c06Transformer\u6a21\u578b\u7684\u6bcf\u4e00\u5c42\u5206\u914d\u7ed9\u4e00\u4e2aGPU\n    for i in range(num_trans_layers):\n        device_map[f'model.layers.{i}'] = int(i\/\/per_gpu_layers)\n    return device_map\n\n# \u68c0\u6d4b\u53ef\u7528\u7684GPU\u6570\u91cf\nNUM_GPUS = torch.cuda.device_count()\nprint(f\"NUM_GPUS: {NUM_GPUS}\")\n\n# \u5982\u679c\u6709\u53ef\u7528\u7684GPU\uff0c\u5219\u57fa\u4e8eGPU\u6570\u91cf\u81ea\u52a8\u914d\u7f6e\u8bbe\u5907\u6620\u5c04\uff1b\u5426\u5219\u4e0d\u4f7f\u7528\u8bbe\u5907\u6620\u5c04\ndevice_map = auto_configure_device_map(NUM_GPUS) if NUM_GPUS &gt; 0 else None\n\n# \u5982\u679c\u6709\u53ef\u7528\u7684GPU\uff0c\u5219\u4f7f\u7528\u7b2c\u4e00\u4e2aGPU\uff1b\u5426\u5219\u4f7f\u7528CPU\ndevice = torch.device(\"cuda\") if NUM_GPUS &gt; 0 else torch.device(\"cpu\")\n\n# \u6839\u636e\u662f\u5426\u4f7f\u7528GPU\u8bbe\u7f6e\u6570\u636e\u7c7b\u578b\uff08\u534a\u7cbe\u5ea6\u6216\u5168\u7cbe\u5ea6\uff09\ndevice_dtype = torch.float16 if NUM_GPUS &gt; 0 else torch.float\n\nmodel = AutoModelForCausalLM.from_pretrained(MODEL_NAME,\n                                             trust_remote_code=True,\n                                             torch_dtype=device_dtype,\n                                             attn_implementation=\"flash_attention_2\",\n                                             device_map=device_map)\n\n# \u52a0\u8f7d\u5206\u8bcd\u5668\u548c\u6a21\u578b\uff0c\u6307\u5b9a\u8bbe\u5907\u6620\u5c04\u548c\u6570\u636e\u7c7b\u578b\ntokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)\n\nmodel = model.eval()\n\nprompt = \"\u5199\u4e00\u4e2a3\u4e07\u5b57\u7684\u6709\u5934\u6709\u5c3e\u7684\u77ed\u7bc7\u79d1\u5e7b\u5c0f\u8bf4.\"\nmessages = [\n    {\"role\": \"system\", \"content\": \"You are a helpful assistant.\"},\n    {\"role\": \"user\", \"content\": prompt}\n]\ntext = tokenizer.apply_chat_template(\n    messages,\n    tokenize=False,\n    add_generation_prompt=True\n)\nmodel_inputs = tokenizer([text], return_tensors=\"pt\").to(device)\n\ngenerated_ids = model.generate(\n    model_inputs.input_ids,\n    max_new_tokens=32000\n)\ngenerated_ids = [\n    output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)\n]\n\nresponse = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]\n\nprint(response)\n<\/pre><\/div>\n\n\n\n<p>\u8fd0\u884c\u7ed3\u679c\uff1a<\/p>\n\n\n\n<div class=\"wp-block-urvanov-syntax-highlighter-code-block\"><pre class=\"lang:python decode:true \"> time python test05-72B.py\nNUM_GPUS: 8\nLoading checkpoint shards: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 38\/38 [50:34&lt;00:00, 79.87s\/it]\nSpecial tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n\u5bf9\u4e0d\u8d77\uff0c\u7531\u4e8e\u6587\u672c\u957f\u5ea6\u7684\u9650\u5236\uff0c\u6211\u65e0\u6cd5\u5728\u8fd9\u91cc\u63d0\u4f9b\u4e00\u4e2a\u5b8c\u6574\u76843\u4e07\u5b57\u7684\u77ed\u7bc7\u79d1\u5e7b\u5c0f\u8bf4\u3002\u4f46\u6211\u53ef\u4ee5\u4e3a\u4f60\u63d0\u4f9b\u4e00\u4e2a\u7b80\u77ed\u7684\u5f00\u5934\u548c \u5927\u7eb2\uff0c\u4f60\u53ef\u4ee5\u6839\u636e\u8fd9\u4e2a\u5927\u7eb2\u81ea\u884c\u6269\u5c55\u3002\n\n\u6807\u9898\uff1a\u300a\u661f\u9645\u5f52\u9014\u300b\n\n\u5f00\u5934\uff1a\n\u5728\u9065\u8fdc\u7684\u672a\u6765\uff0c\u4eba\u7c7b\u5df2\u7ecf\u5efa\u7acb\u4e86\u8de8\u661f\u9645\u7684\u6b96\u6c11\u5730\u3002\u4e3b\u4eba\u516c\uff0c\u827e\u4f26\u00b7\u54c8\u7279\uff0c\u662f\u4e00\u540d\u52c7\u6562\u7684\u661f\u9645\u63a2\u9669\u5bb6\uff0c\u4ed6\u7684\u4efb\u52a1\u662f\u5bfb\u627e\u65b0\u7684\u9002 \u5408\u4eba\u7c7b\u5c45\u4f4f\u7684\u661f\u7403\u3002\u5728\u4e00\u6b21\u63a2\u7d22\u4e2d\uff0c\u4ed6\u7684\u98de\u8239\u610f\u5916\u5760\u843d\u5728\u4e86\u4e00\u4e2a\u672a\u77e5\u7684\u661f\u7403\u4e0a\u3002\n\n\u5927\u7eb2\uff1a\n\n1. **\u5931\u843d\u7684\u661f\u7403**\uff1a\u827e\u4f26\u9192\u6765\u53d1\u73b0\u81ea\u5df1\u5728\u4e00\u4e2a\u964c\u751f\u7684\u73af\u5883\u4e2d\uff0c\u661f\u7403\u7684\u73af\u5883\u4e0e\u5730\u7403\u622a\u7136\u4e0d\u540c\uff0c\u6709\u72ec\u7279\u7684\u751f\u7269\u548c\u5947\u7279\u7684\u81ea\u7136\u73b0 \u8c61\u3002\u4ed6\u5229\u7528\u98de\u8239\u6b8b\u9ab8\u4e2d\u7684\u8bbe\u5907\u8fdb\u884c\u751f\u5b58\u548c\u63a2\u7d22\u3002\n\n2. **\u5f02\u661f\u6587\u660e**\uff1a\u827e\u4f26\u53d1\u73b0\u4e86\u4e00\u79cd\u9ad8\u5ea6\u53d1\u8fbe\u7684\u5f02\u661f\u6587\u660e\uff0c\u4ed6\u4eec\u751f\u6d3b\u5728\u5730\u4e0b\uff0c\u4e0e\u661f\u7403\u7684\u73af\u5883\u548c\u8c10\u5171\u751f\u3002\u4ed6\u8bd5\u56fe\u4e0e\u4ed6\u4eec\u4ea4\u6d41\uff0c \u4f46\u8bed\u8a00\u548c\u6587\u5316\u7684\u5dee\u5f02\u4f7f\u5f97\u6c9f\u901a\u56f0\u96be\u3002\n\n3. **\u79d1\u6280\u4ea4\u6362**\uff1a\u827e\u4f26\u901a\u8fc7\u753b\u56fe\u3001\u624b\u52bf\u7b49\u65b9\u5f0f\u9010\u6e10\u4e0e\u5f02\u661f\u751f\u7269\u5efa\u7acb\u8054\u7cfb\uff0c\u4ed6\u4eec\u5206\u4eab\u4e86\u90e8\u5206\u79d1\u6280\u77e5\u8bc6\uff0c\u827e\u4f26\u4e5f\u6559\u4ed6\u4eec\u4eba\u7c7b\u7684 \u8bed\u8a00\u548c\u6587\u5316\u3002\n\n4. **\u5371\u673a\u964d\u4e34**\uff1a\u827e\u4f26\u53d1\u73b0\u8fd9\u4e2a\u661f\u7403\u6b63\u5728\u906d\u53d7\u4e00\u79cd\u672a\u77e5\u75c5\u6bd2\u7684\u4fb5\u88ad\uff0c\u8fd9\u79cd\u75c5\u6bd2\u5bf9\u5f02\u661f\u751f\u7269\u6765\u8bf4\u662f\u81f4\u547d\u7684\uff0c\u4f46\u5bf9\u4eba\u7c7b\u65e0\u5bb3\u3002 \u4ed6\u51b3\u5b9a\u5e2e\u52a9\u4ed6\u4eec\u5bfb\u627e\u89e3\u836f\u3002\n\n5. **\u751f\u6b7b\u6289\u62e9**\uff1a\u827e\u4f26\u5229\u7528\u65b0\u5b66\u7684\u79d1\u6280\uff0c\u6210\u529f\u7814\u53d1\u51fa\u75ab\u82d7\uff0c\u4f46\u53d1\u73b0\u8fd9\u4f1a\u7834\u574f\u5f02\u661f\u751f\u7269\u7684\u751f\u6001\u5e73\u8861\u3002\u4ed6\u5fc5\u987b\u505a\u51fa\u9009\u62e9\uff1a\u62ef\u6551 \u81ea\u5df1\uff0c\u8fd8\u662f\u62ef\u6551\u8fd9\u4e2a\u6587\u660e\uff1f\n\n6. **\u661f\u9645\u5f52\u9014**\uff1a\u827e\u4f26\u51b3\u5b9a\u727a\u7272\u81ea\u5df1\uff0c\u5c06\u75ab\u82d7\u7559\u7ed9\u5f02\u661f\u751f\u7269\u3002\u4ed6\u7684\u6545\u4e8b\u88ab\u5f02\u661f\u751f\u7269\u8bb0\u5f55\u4e0b\u6765\uff0c\u6210\u4e3a\u4ed6\u4eec\u7684\u4f20\u8bf4\u3002\u4ed6\u7684\u98de\u8239 \u88ab\u4fee\u590d\uff0c\u8bbe\u5b9a\u4e3a\u81ea\u52a8\u8fd4\u56de\u5730\u7403\u7684\u6a21\u5f0f\u3002\n\n\u7ed3\u5c3e\uff1a\n\u5728\u827e\u4f26\u7684\u98de\u8239\u8fd4\u56de\u5730\u7403\u7684\u90a3\u4e00\u523b\uff0c\u4ed6\u7684\u82f1\u52c7\u4e8b\u8ff9\u88ab\u5730\u7403\u4eba\u77e5\u6653\uff0c\u4ed6\u88ab\u8a89\u4e3a\u82f1\u96c4\uff0c\u4ed6\u7684\u7cbe\u795e\u6fc0\u52b1\u7740\u65b0\u4e00\u4ee3\u7684\u661f\u9645\u63a2\u9669\u8005\u3002\u800c\u90a3\u4e2a\u9065\u8fdc\u7684\u661f\u7403\uff0c\u4e5f\u5728\u827e\u4f26\u7684\u5f71\u54cd\u4e0b\uff0c\u5f00\u59cb\u5411\u5b87\u5b99\u53d1\u51fa\u548c\u5e73\u7684\u4fe1\u53f7\uff0c\u5bfb\u627e\u66f4\u591a\u7684\u751f\u547d\u4ea4\u6d41\u3002\n\n\u8fd9\u662f\u4e00\u4e2a\u5927\u81f4\u7684\u6545\u4e8b\u6846\u67b6\uff0c\u4f60\u53ef\u4ee5\u6839\u636e\u8fd9\u4e2a\u6846\u67b6\u586b\u5145\u7ec6\u8282\uff0c\u53d1\u5c55\u89d2\u8272\uff0c\u63cf\u7ed8\u573a\u666f\uff0c\u589e\u52a0\u51b2\u7a81\uff0c\u4f7f\u6545\u4e8b\u66f4\u52a0\u4e30\u5bcc\u548c\u5f15\u4eba\u5165\u80dc\u3002\n\nreal    54m38.507s\nuser    7m20.744s\nsys     9m10.919s<\/pre><\/div>\n\n\n\n<p>\u4ece\u4e0a\u9762\u7ed3\u679c\u6765\u770b\uff0c<\/p>\n\n\n\n<p>\u603b\u82b1\u8d39\u65f6\u95f4\u4e3a\uff1a54:38<\/p>\n\n\n\n<p>\u6a21\u578b\u52a0\u8f7d\u65f6\u95f4\u4e3a50:34<\/p>\n\n\n\n<p>4\u5206\u949f\u5185\u751f\u6210\u4e86809\u4e2a\u6c49\u5b57\uff0c\u6bcf\u79d2\u5927\u7ea63.37\u4e2a\u6c49\u5b57\uff0c\u901f\u5ea6\u8fd8\u53ef\u4ee5<\/p>\n\n\n\n<h3 class=\"wp-block-heading\"><strong>9.1. 72B \u76844\u4f4d\u91cf\u5316\u5e76\u4fdd\u5b58<\/strong><\/h3>\n\n\n\n<div class=\"wp-block-urvanov-syntax-highlighter-code-block\"><pre class=\"lang:python decode:true \">from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig\nimport torch\nimport time\n\nMODEL_NAME = \"Qwen\/Qwen1.5-72B-Chat\"\n\n# \u68c0\u6d4b\u53ef\u7528\u7684GPU\u6570\u91cf\nNUM_GPUS = torch.cuda.device_count()\nprint(f\"NUM_GPUS: {NUM_GPUS}\")\n\n# \u5982\u679c\u6709\u53ef\u7528\u7684GPU\uff0c\u5219\u4f7f\u7528\u7b2c\u4e00\u4e2aGPU\uff1b\u5426\u5219\u4f7f\u7528CPU\ndevice = torch.device(\"cuda\") if NUM_GPUS &gt; 0 else torch.device(\"cpu\")\n\n# \u6839\u636e\u662f\u5426\u4f7f\u7528GPU\u8bbe\u7f6e\u6570\u636e\u7c7b\u578b\uff08\u534a\u7cbe\u5ea6\u6216\u5168\u7cbe\u5ea6\uff09\ndevice_dtype = torch.float32 if NUM_GPUS &gt; 0 else torch.float\n\nquantization_config = BitsAndBytesConfig(load_in_4bit=True)\n# \u83b7\u53d6\u8d77\u59cb\u65f6\u95f4\u6233\nstart_time = time.time()\n\nmodel = AutoModelForCausalLM.from_pretrained(MODEL_NAME,\n                                             trust_remote_code=True,\n                                             torch_dtype=device_dtype,\n                                             quantization_config=quantization_config,\n                                             device_map=\"auto\")\n\n# \u52a0\u8f7d\u5206\u8bcd\u5668\u548c\u6a21\u578b\uff0c\u6307\u5b9a\u8bbe\u5907\u6620\u5c04\u548c\u6570\u636e\u7c7b\u578b\ntokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)\n\nmodel = model.eval()\nend_time = time.time()\nelapsed_time = end_time - start_time\nprint(f\"Load Model Time: {elapsed_time} seconds\")\n\nstart_time2 = time.time()\n\nprompt = \"\u5199\u4e00\u4e2a1\u4e07\u5b57\u7684\u6709\u5934\u6709\u5c3e\u7684\u77ed\u7bc7\u79d1\u5e7b\u5c0f\u8bf4.\"\nmessages = [\n    {\"role\": \"system\", \"content\": \"You are a helpful assistant.\"},\n    {\"role\": \"user\", \"content\": prompt}\n]\ntext = tokenizer.apply_chat_template(\n    messages,\n    tokenize=False,\n    add_generation_prompt=True\n)\nmodel_inputs = tokenizer([text], return_tensors=\"pt\").to(device)\n\ngenerated_ids = model.generate(\n    model_inputs.input_ids,\n    max_new_tokens=10000\n)\n\nend_time2 = time.time()\nelapsed_time2 = end_time2 - start_time2\n\nelapsed_time = end_time2 - start_time\nnum_tokens_generated = len(generated_ids[0]) - len(model_inputs.input_ids[0])\n\ntokens_per_second = num_tokens_generated \/ elapsed_time2\n\ngenerated_ids = [\n    output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)\n]\n\nresponse = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]\n\nprint(response)\n\nprint(f\"Generated Tokens: {num_tokens_generated}\")\nprint(f\"Tokens per second: {tokens_per_second}\")\nprint(f\"Total Generation Time: {elapsed_time2} seconds\")\n\nprint(f\"config.save_pretrained\")\nmodel.config.save_pretrained('Qwen\/Qwen1.5-72B-Chat-Int4')\nprint(f\"model.save_pretrained\")\nmodel.save_pretrained('Qwen\/Qwen1.5-72B-Chat-Int4')\nprint(f\"tokenizer.save_pretrained\")\ntokenizer.save_pretrained('Qwen\/Qwen1.5-72B-Chat-Int4')\n<\/pre><\/div>\n\n\n\n<p>\u8fd0\u884c\u7ed3\u679c\uff1a<\/p>\n\n\n\n<div class=\"wp-block-urvanov-syntax-highlighter-code-block\"><pre class=\"lang:python decode:true \">time python test05-72B-5.py\nNUM_GPUS: 8\nLoading checkpoint shards: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 38\/38 [48:18&lt;00:00, 76.27s\/it]\nSpecial tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\nLoad Model Time: 2907.2342824935913 seconds\n\u5bf9\u4e0d\u8d77\uff0c\u7531\u4e8e\u6587\u5b57\u957f\u5ea6\u9650\u5236\uff0c\u6211\u65e0\u6cd5\u5728\u8fd9\u91cc\u63d0\u4f9b\u4e00\u7bc7\u5b8c\u6574\u76841\u4e07\u5b57\u79d1\u5e7b\u5c0f\u8bf4\u3002\u4f46\u6211\u53ef\u4ee5\u4e3a\u4f60\u63d0\u4f9b\u4e00\u4e2a\u5927\u7eb2\uff0c\u4f60\u53ef\u4ee5\u6839\u636e\u8fd9 \u4e2a\u5927\u7eb2\u6765\u6269\u5c55\u4f60\u7684\u6545\u4e8b\uff1a\n\n\u6807\u9898\uff1a\u300a\u661f\u9645\u5f52\u9014\u300b\n\n\u4e00\u3001\u5f00\u5934\uff081000\u5b57\uff09\n1. \u63cf\u8ff0\u5730\u7403\u7684\u672a\u6765\uff0c\u79d1\u6280\u9ad8\u5ea6\u53d1\u8fbe\uff0c\u4eba\u7c7b\u5df2\u7ecf\u6210\u529f\u5b9e\u73b0\u4e86\u661f\u9645\u65c5\u884c\u3002\n2. \u4e3b\u89d2\u827e\u4f26\uff0c\u662f\u4e00\u4f4d\u661f\u9645\u63a2\u9669\u5bb6\uff0c\u4ed6\u7684\u4efb\u52a1\u662f\u5bfb\u627e\u65b0\u7684\u9002\u5408\u4eba\u7c7b\u5c45\u4f4f\u7684\u661f\u7403\u3002\n\n\u4e8c\u3001\u51b2\u7a81\uff083000\u5b57\uff09\n1. \u827e\u4f26\u5728\u4e00\u6b21\u63a2\u7d22\u4e2d\uff0c\u98de\u8239\u906d\u9047\u672a\u77e5\u529b\u91cf\u7684\u88ad\u51fb\uff0c\u88ab\u8feb\u964d\u843d\u5728\u4e00\u4e2a\u964c\u751f\u7684\u661f\u7403\u3002\n2. \u4ed6\u53d1\u73b0\u8fd9\u4e2a\u661f\u7403\u7684\u751f\u7269\u5177\u6709\u9ad8\u5ea6\u667a\u80fd\uff0c\u4e14\u5bf9\u4eba\u7c7b\u6000\u6709\u654c\u610f\u3002\n3. \u98de\u8239\u635f\u574f\u4e25\u91cd\uff0c\u4fee\u590d\u9700\u8981\u4e00\u79cd\u5f53\u5730\u7a00\u6709\u7684\u5143\u7d20\uff0c\u827e\u4f26\u5fc5\u987b\u4e0e\u8fd9\u4e2a\u661f\u7403\u7684\u751f\u7269\u8fdb\u884c\u4ea4\u6d41\u548c\u4ea4\u6613\u3002\n\n\u4e09\u3001\u53d1\u5c55\uff083000\u5b57\uff09\n1. \u827e\u4f26\u901a\u8fc7\u5b66\u4e60\u661f\u7403\u7684\u8bed\u8a00\u548c\u6587\u5316\uff0c\u9010\u6e10\u8d62\u5f97\u4e86\u90e8\u5206\u751f\u7269\u7684\u4fe1\u4efb\u3002\n2. \u5728\u6b64\u8fc7\u7a0b\u4e2d\uff0c\u4ed6\u53d1\u73b0\u8fd9\u4e2a\u661f\u7403\u7684\u751f\u7269\u6b63\u9762\u4e34\u4e00\u573a\u707e\u96be\uff0c\u4ed6\u4eec\u7684\u654c\u4eba\u662f\u4e00\u79cd\u6765\u81ea\u5b87\u5b99\u6df1\u5904\u7684\u90aa\u6076\u529b\u91cf\u3002\n3. \u827e\u4f26\u51b3\u5b9a\u5e2e\u52a9\u4ed6\u4eec\uff0c\u4ee5\u6b64\u6362\u53d6\u4fee\u590d\u98de\u8239\u7684\u5143\u7d20\u3002\n\n\u56db\u3001\u9ad8\u6f6e\uff081500\u5b57\uff09\n1. \u827e\u4f26\u5e26\u9886\u661f\u7403\u751f\u7269\u4e0e\u90aa\u6076\u529b\u91cf\u5c55\u5f00\u51b3\u6218\uff0c\u4ed6\u5229\u7528\u98de\u8239\u7684\u6b66\u5668\u7cfb\u7edf\uff0c\u6210\u529f\u51fb\u9000\u4e86\u90aa\u6076\u529b\u91cf\u3002\n2. \u5728\u6218\u6597\u4e2d\uff0c\u827e\u4f26\u5c55\u73b0\u51fa\u7684\u4eba\u7c7b\u667a\u6167\u548c\u52c7\u6c14\uff0c\u4f7f\u5f97\u661f\u7403\u751f\u7269\u5bf9\u4ed6\u4ea7\u751f\u4e86\u6df1\u6df1\u7684\u656c\u610f\u3002\n\n\u4e94\u3001\u7ed3\u5c40\uff081500\u5b57\uff09\n1. \u661f\u7403\u751f\u7269\u5e2e\u52a9\u827e\u4f26\u4fee\u590d\u98de\u8239\uff0c\u5e76\u8d60\u4e88\u4ed6\u8db3\u591f\u7684\u5143\u7d20\u3002\n2. \u827e\u4f26\u544a\u522b\u65b0\u670b\u53cb\uff0c\u542f\u7a0b\u8fd4\u56de\u5730\u7403\uff0c\u4f46\u4ed6\u627f\u8bfa\u4f1a\u56de\u6765\u518d\u6b21\u8bbf\u95ee\u8fd9\u4e2a\u661f\u7403\u3002\n3. \u8fd4\u56de\u5730\u7403\u540e\uff0c\u827e\u4f26\u7684\u6545\u4e8b\u6fc0\u52b1\u4e86\u5168\u4eba\u7c7b\uff0c\u5927\u5bb6\u66f4\u52a0\u56e2\u7ed3\uff0c\u5171\u540c\u9762\u5bf9\u672a\u6765\u7684\u6311\u6218\u3002\n\n\u8fd9\u53ea\u662f\u4e00\u4e2a\u57fa\u672c\u7684\u5927\u7eb2\uff0c\u4f60\u53ef\u4ee5\u6839\u636e\u81ea\u5df1\u7684\u60f3\u6cd5\u6dfb\u52a0\u66f4\u591a\u7684\u7ec6\u8282\uff0c\u6bd4\u5982\u89d2\u8272\u7684\u5185\u5fc3\u6323\u624e\u3001\u661f\u7403\u7684\u73af\u5883\u63cf\u8ff0\u3001\u6218\u6597\u7684\u7b56\u7565\u7b49\uff0c\u4ee5\u4e30\u5bcc\u4f60\u7684\u6545\u4e8b\u3002\nGenerated Tokens: 429\nTokens per second: 1.6327658813303874\nTotal Generation Time: 262.7443437576294 seconds\nconfig.save_pretrained\nmodel.save_pretrained\ntokenizer.save_pretrained\n\nreal    57m57.074s\nuser    13m11.878s\nsys     28m54.212s<\/pre><\/div>\n\n\n\n<h3 class=\"wp-block-heading\"><strong>9.2 \u52a0\u8f7d 72B 4\u4f4d\u91cf\u5316<\/strong><\/h3>\n\n\n\n<div class=\"wp-block-urvanov-syntax-highlighter-code-block\"><pre class=\"lang:python decode:true \">from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig\nimport torch\nimport time\n\n# \u6e05\u7a7a VRAM \u7f13\u51b2\u533a\ntorch.cuda.empty_cache()\n\nMODEL_NAME = \"Qwen\/Qwen1.5-72B-Chat-Int4\"\n\n# \u5b9a\u4e49\u4e00\u4e2a\u51fd\u6570\u6765\u81ea\u52a8\u914d\u7f6e\u5728\u591aGPU\u73af\u5883\u4e0b\u6a21\u578b\u5404\u90e8\u5206\u7684\u8bbe\u5907\u5206\u5e03\ndef auto_configure_device_map(num_gpus: int):\n    num_trans_layers = 80  # \u5b9a\u4e49Transformer\u6a21\u578b\u7684\u5c42\u6570\n    per_gpu_layers = num_trans_layers \/ num_gpus  # \u8ba1\u7b97\u6bcf\u4e2aGPU\u5e94\u627f\u62c5\u7684\u5c42\u6570\n    # \u521d\u59cb\u5316\u8bbe\u5907\u6620\u5c04\u5b57\u5178\uff0c\u6307\u5b9a\u4e00\u4e9b\u7279\u5b9a\u6a21\u5757\u5e94\u8be5\u653e\u7f6e\u7684GPU\u7f16\u53f7\n    device_map = {\n        'model.embed_tokens': 0,  # \u5d4c\u5165\u5c42\u653e\u5728\u7b2c\u4e00\u4e2aGPU\u4e0a\n        'model.norm': 0,  # \u6700\u540e\u4e00\u4e2a\u6b63\u5219\u5316\u5c42\u653e\u5728\u7b2c\u4e00\u4e2aGPU\u4e0a\n        'lm_head': 0  # \u8bed\u8a00\u6a21\u578b\u5934\uff08\u7528\u4e8e\u9884\u6d4b\u4e0b\u4e00\u4e2a\u8bcd\u7684\u5c42\uff09\u653e\u5728\u7b2c\u4e00\u4e2aGPU\u4e0a\n    }\n    # \u5c06Transformer\u6a21\u578b\u7684\u6bcf\u4e00\u5c42\u5206\u914d\u7ed9\u4e00\u4e2aGPU\n    for i in range(num_trans_layers):\n        device_map[f'model.layers.{i}'] = int(i\/\/per_gpu_layers)\n    return device_map\n\n# \u68c0\u6d4b\u53ef\u7528\u7684GPU\u6570\u91cf\nNUM_GPUS = torch.cuda.device_count()\nprint(f\"NUM_GPUS: {NUM_GPUS}\")\nNUM_GPUS = 3\n\n# \u5982\u679c\u6709\u53ef\u7528\u7684GPU\uff0c\u5219\u57fa\u4e8eGPU\u6570\u91cf\u81ea\u52a8\u914d\u7f6e\u8bbe\u5907\u6620\u5c04\uff1b\u5426\u5219\u4e0d\u4f7f\u7528\u8bbe\u5907\u6620\u5c04\ndevice_map = auto_configure_device_map(NUM_GPUS) if NUM_GPUS &gt; 0 else None\n\n# \u5982\u679c\u6709\u53ef\u7528\u7684GPU\uff0c\u5219\u4f7f\u7528\u7b2c\u4e00\u4e2aGPU\uff1b\u5426\u5219\u4f7f\u7528CPU\ndevice = torch.device(\"cuda\") if NUM_GPUS &gt; 0 else torch.device(\"cpu\")\n\nstart_time = time.time()\n\nmodel = AutoModelForCausalLM.from_pretrained(MODEL_NAME,\n                                             trust_remote_code=True,\n                                             device_map=device_map)\n\n# \u52a0\u8f7d\u5206\u8bcd\u5668\u548c\u6a21\u578b\uff0c\u6307\u5b9a\u8bbe\u5907\u6620\u5c04\u548c\u6570\u636e\u7c7b\u578b\ntokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)\n\nmodel = model.eval()\nend_time = time.time()\nelapsed_time = end_time - start_time\nprint(f\"Load Model Time: {elapsed_time} seconds\")\n\nstart_time2 = time.time()\n\nprompt = \"\u5199\u4e00\u4e2a1\u4e07\u5b57\u7684\u6709\u5934\u6709\u5c3e\u7684\u77ed\u7bc7\u79d1\u5e7b\u5c0f\u8bf4.\"\nmessages = [\n    {\"role\": \"system\", \"content\": \"You are a helpful assistant.\"},\n    {\"role\": \"user\", \"content\": prompt}\n]\ntext = tokenizer.apply_chat_template(\n    messages,\n    tokenize=False,\n    add_generation_prompt=True\n)\nmodel_inputs = tokenizer([text], return_tensors=\"pt\").to(device)\n\ngenerated_ids = model.generate(\n    model_inputs.input_ids,\n    max_new_tokens=10000\n)\n\nend_time2 = time.time()\nelapsed_time2 = end_time2 - start_time2\n\nelapsed_time = end_time2 - start_time\nnum_tokens_generated = len(generated_ids[0]) - len(model_inputs.input_ids[0])\n\ntokens_per_second = num_tokens_generated \/ elapsed_time2\n\ngenerated_ids = [\n    output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)\n]\n\nresponse = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]\n\nprint(response)\n\nprint(f\"Generated Tokens: {num_tokens_generated}\")\nprint(f\"Tokens per second: {tokens_per_second}\")\nprint(f\"Total Generation Time: {elapsed_time2} seconds\")\n<\/pre><\/div>\n\n\n\n<p>\u8fd0\u884c\u7ed3\u679c\uff1a<\/p>\n\n\n\n<div class=\"wp-block-urvanov-syntax-highlighter-code-block\"><pre class=\"lang:python decode:true \" >time python test05-72B-6.py\nNUM_GPUS: 8\nLoading checkpoint shards: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 10\/10 [22:04&lt;00:00, 132.45s\/it]\nSpecial tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\nLoad Model Time: 1328.904905796051 seconds\n\/home\/tony\/anaconda3\/envs\/Jamba\/lib\/python3.11\/site-packages\/bitsandbytes\/nn\/modules.py:391: UserWarning: Input type into Linear4bit is torch.float16, but bnb_4bit_compute_dtype=torch.float32 (default). This will lead to slow inference or training speed.\n  warnings.warn('Input type into Linear4bit is torch.float16, but bnb_4bit_compute_dtype=torch.float32 (default). This will lead to slow inference or training speed.')\n\u5f88\u62b1\u6b49\uff0c\u7531\u4e8e\u5b57\u6570\u9650\u5236\uff0c\u6211\u65e0\u6cd5\u5728\u8fd9\u91cc\u63d0\u4f9b\u4e00\u7bc7\u5b8c\u6574\u76841\u4e07\u5b57\u79d1\u5e7b\u5c0f\u8bf4\u3002\u4f46\u6211\u53ef\u4ee5\u4e3a\u4f60\u521b\u4f5c\u4e00\u4e2a\u7b80\u77ed\u7684\u5f00\u5934\u548c\u7ed3\u5c3e\uff0c\u5e76\u63d0 \u4f9b\u4e00\u4e2a\u5927\u81f4\u7684\u6545\u4e8b\u60c5\u8282\uff0c\u4f60\u53ef\u4ee5\u6839\u636e\u8fd9\u4e2a\u5927\u7eb2\u6765\u6269\u5c55\u4f60\u7684\u6545\u4e8b\u3002\n\n\u6807\u9898\uff1a\u300a\u661f\u9645\u5f52\u9014\u300b\n\n\u5f00\u5934\uff1a\n\u5728\u9065\u8fdc\u7684\u672a\u6765\uff0c\u4eba\u7c7b\u5df2\u7ecf\u5efa\u7acb\u4e86\u8de8\u661f\u7cfb\u7684\u6b96\u6c11\u5730\u3002\u4e3b\u89d2\u827e\u4f26\u00b7\u54c8\u7279\uff0c\u4e00\u4f4d\u5929\u624d\u7684\u661f\u9645\u5bfc\u822a\u5458\uff0c\u8d1f\u8d23\u5e26\u9886\u4e00\u652f\u63a2\u9669\u961f\u5bfb\u627e\u65b0 \u7684\u9002\u5b9c\u5c45\u4f4f\u7684\u661f\u7403\u3002\u5728\u4e00\u6b21\u610f\u5916\u4e2d\uff0c\u4ed6\u4eec\u7684\u98de\u8239\u88ab\u9ed1\u6d1e\u5438\u5165\uff0c\u7ecf\u8fc7\u4e86\u6f2b\u957f\u7684\u65f6\u7a7a\u626d\u66f2\uff0c\u4ed6\u4eec\u6765\u5230\u4e86\u4e00\u4e2a\u672a\u77e5\u7684\u5b87\u5b99\u533a\u57df\u3002\n\n\u60c5\u8282\u53d1\u5c55\uff1a\n\u827e\u4f26\u548c\u4ed6\u7684\u961f\u4f0d\u53d1\u73b0\uff0c\u8fd9\u4e2a\u5b87\u5b99\u533a\u57df\u4e0e\u4ed6\u4eec\u6240\u77e5\u7684\u5b87\u5b99\u622a\u7136\u4e0d\u540c\uff0c\u7269\u7406\u89c4\u5219\u4f3c\u4e4e\u90fd\u5728\u8fd9\u91cc\u5931\u6548\u3002\u4ed6\u4eec\u9762\u4e34\u7740\u8d44\u6e90\u67af\u7aed\u3001\u98de\u8239\u635f\u574f\u7684\u56f0\u5883\uff0c\u540c\u65f6\uff0c\u4ed6\u4eec\u4e5f\u53d1\u73b0\u4e86\u8fd9\u4e2a\u533a\u57df\u5185\u5b58\u5728\u4e00\u79cd\u9ad8\u5ea6\u53d1\u8fbe\u7684\u6587\u660e\u3002\u827e\u4f26\u4e0e\u8fd9\u4e2a\u6587\u660e\u63a5\u89e6\uff0c\u5b66\u4e60\u4ed6\u4eec\u7684\u79d1\u6280\uff0c\u5c1d\u8bd5\u4fee\u590d\u98de\u8239\u3002\n\n\u9ad8\u6f6e\uff1a\n\u827e\u4f26\u53d1\u73b0\uff0c\u8fd9\u4e2a\u6587\u660e\u6b63\u906d\u53d7\u4e00\u79cd\u540d\u4e3a\"\u6697\u80fd\u91cf\u761f\u75ab\"\u7684\u5a01\u80c1\uff0c\u8fd9\u79cd\u761f\u75ab\u4f1a\u7834\u574f\u5b87\u5b99\u7684\u57fa\u672c\u7ed3\u6784\u3002\u827e\u4f26\u51b3\u5b9a\u5e2e\u52a9\u4ed6\u4eec\uff0c\u4ed6\u5229\u7528\u81ea\u5df1\u7684\u77e5\u8bc6\u7ed3\u5408\u65b0\u5b66\u7684\u79d1\u6280\uff0c\u8bd5\u56fe\u627e\u5230\u89e3\u51b3\u529e\u6cd5\u3002\u5728\u8fd9\u4e2a\u8fc7\u7a0b\u4e2d\uff0c\u827e\u4f26\u7684\u4eba\u6027\u548c\u667a\u6167\u5f97\u5230\u4e86\u5145\u5206\u7684\u5c55\u73b0\uff0c\u4e5f\u8d62\u5f97\u4e86\u8fd9\u4e2a\u6587\u660e\u7684\u5c0a\u91cd\u3002\n\n\u7ed3\u5c3e\uff1a\n\u5728\u4e00\u573a\u58ee\u70c8\u7684\u6218\u6597\u540e\uff0c\u827e\u4f26\u6210\u529f\u963b\u6b62\u4e86\"\u6697\u80fd\u91cf\u761f\u75ab\"\u7684\u6269\u6563\uff0c\u4f46\u4e5f\u727a\u7272\u4e86\u81ea\u5df1\u3002\u4ed6\u7684\u52c7\u6562\u548c\u667a\u6167\u88ab\u5168\u5b87\u5b99\u94ed\u8bb0\uff0c\u4ed6\u7684\u98de\u8239\u88ab\u6539\u9020\u6210\u4e86\u4e00\u4e2a\u79fb\u52a8\u7684\u535a\u7269\u9986\uff0c\u7a7f\u8d8a\u661f\u7cfb\uff0c\u4f20\u64ad\u4ed6\u7684\u6545\u4e8b\u548c\u4eba\u7c7b\u7684\u7cbe\u795e\u3002\u800c\u4ed6\u7684\u961f\u4f0d\uff0c\u5e26\u7740\u827e\u4f26\u7684\u9057\u5fd7\uff0c\u627e\u5230\u4e86\u65b0\u7684\u5c45\u4f4f\u661f\u7403\uff0c\u5f00\u59cb\u4e86\u65b0\u7684\u751f\u6d3b\uff0c\u4ed6\u4eec\u7684\u5192\u9669\u548c\u63a2\u7d22\uff0c\u6210\u4e3a\u4e86\u65b0\u4e00\u4ee3\u7684\u4f20\u8bf4\u3002\n\n\u8fd9\u5c31\u662f\u300a\u661f\u9645\u5f52\u9014\u300b\u7684\u6545\u4e8b\u5927\u7eb2\uff0c\u4f60\u53ef\u4ee5\u6839\u636e\u8fd9\u4e2a\u6846\u67b6\uff0c\u586b\u5145\u66f4\u591a\u7684\u7ec6\u8282\uff0c\u5982\u4eba\u7269\u6027\u683c\u5851\u9020\u3001\u5177\u4f53\u4e8b\u4ef6\u63cf\u7ed8\u3001\u60c5\u611f\u51b2\u7a81\u7b49\uff0c\u5c06\u5176\u53d1\u5c55\u6210\u4e00\u4e2a\u5b8c\u6574\u7684\u6545\u4e8b\u3002\nGenerated Tokens: 393\nTokens per second: 3.4790348195658485\nTotal Generation Time: 112.9623646736145 seconds\n\nreal    24m5.878s\nuser    2m5.539s\nsys     3m35.394s\n(Jamba) tony@DESKTOP-HHKHPUU:\/mnt\/d\/ai\/Qwen$<\/pre><\/div>\n","protected":false},"excerpt":{"rendered":"<p>Qwen1.5 \u662f Qwen2 \u7684\u6d4b\u8bd5\u7248\uff0cQwen2 \u662f\u4e00\u79cd\u57fa\u4e8e Transformer \u7684\u7eaf\u89e3\u7801\u5668\u8bed\u8a00\u6a21\u578b\uff0c [&hellip;]<\/p>\n","protected":false},"author":1,"featured_media":0,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"site-sidebar-layout":"default","site-content-layout":"","ast-site-content-layout":"default","site-content-style":"default","site-sidebar-style":"default","ast-global-header-display":"","ast-banner-title-visibility":"","ast-main-header-display":"","ast-hfb-above-header-display":"","ast-hfb-below-header-display":"","ast-hfb-mobile-header-display":"","site-post-title":"","ast-breadcrumbs-content":"","ast-featured-img":"","footer-sml-layout":"","theme-transparent-header-meta":"","adv-header-id-meta":"","stick-header-meta":"","header-above-stick-meta":"","header-main-stick-meta":"","header-below-stick-meta":"","astra-migrate-meta-layouts":"set","ast-page-background-enabled":"default","ast-page-background-meta":{"desktop":{"background-color":"var(--ast-global-color-4)","background-image":"","background-repeat":"repeat","background-position":"center center","background-size":"auto","background-attachment":"scroll","background-type":"","background-media":"","overlay-type":"","overlay-color":"","overlay-opacity":"","overlay-gradient":""},"tablet":{"background-color":"","background-image":"","background-repeat":"repeat","background-position":"center center","background-size":"auto","background-attachment":"scroll","background-type":"","background-media":"","overlay-type":"","overlay-color":"","overlay-opacity":"","overlay-gradient":""},"mobile":{"background-color":"","background-image":"","background-repeat":"repeat","background-position":"center center","background-size":"auto","background-attachment":"scroll","background-type":"","background-media":"","overlay-type":"","overlay-color":"","overlay-opacity":"","overlay-gradient":""}},"ast-content-background-meta":{"desktop":{"background-color":"var(--ast-global-color-5)","background-image":"","background-repeat":"repeat","background-position":"center center","background-size":"auto","background-attachment":"scroll","background-type":"","background-media":"","overlay-type":"","overlay-color":"","overlay-opacity":"","overlay-gradient":""},"tablet":{"background-color":"var(--ast-global-color-5)","background-image":"","background-repeat":"repeat","background-position":"center center","background-size":"auto","background-attachment":"scroll","background-type":"","background-media":"","overlay-type":"","overlay-color":"","overlay-opacity":"","overlay-gradient":""},"mobile":{"background-color":"var(--ast-global-color-5)","background-image":"","background-repeat":"repeat","background-position":"center center","background-size":"auto","background-attachment":"scroll","background-type":"","background-media":"","overlay-type":"","overlay-color":"","overlay-opacity":"","overlay-gradient":""}},"_jetpack_memberships_contains_paid_content":false,"footnotes":""},"categories":[313,289,443,442,312],"tags":[400],"class_list":["post-2661","post","type-post","status-publish","format-standard","hentry","category-chatgpt","category-gpt","category-llm","category-llms","category-openai","tag-qwen1-5"],"views":3854,"jetpack_sharing_enabled":true,"jetpack_featured_media_url":"","_links":{"self":[{"href":"https:\/\/www.aqwu.net\/wp\/index.php?rest_route=\/wp\/v2\/posts\/2661","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/www.aqwu.net\/wp\/index.php?rest_route=\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/www.aqwu.net\/wp\/index.php?rest_route=\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/www.aqwu.net\/wp\/index.php?rest_route=\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/www.aqwu.net\/wp\/index.php?rest_route=%2Fwp%2Fv2%2Fcomments&post=2661"}],"version-history":[{"count":63,"href":"https:\/\/www.aqwu.net\/wp\/index.php?rest_route=\/wp\/v2\/posts\/2661\/revisions"}],"predecessor-version":[{"id":2802,"href":"https:\/\/www.aqwu.net\/wp\/index.php?rest_route=\/wp\/v2\/posts\/2661\/revisions\/2802"}],"wp:attachment":[{"href":"https:\/\/www.aqwu.net\/wp\/index.php?rest_route=%2Fwp%2Fv2%2Fmedia&parent=2661"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/www.aqwu.net\/wp\/index.php?rest_route=%2Fwp%2Fv2%2Fcategories&post=2661"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/www.aqwu.net\/wp\/index.php?rest_route=%2Fwp%2Fv2%2Ftags&post=2661"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}