{"id":4826,"date":"2024-10-22T15:04:22","date_gmt":"2024-10-22T07:04:22","guid":{"rendered":"https:\/\/www.aqwu.net\/wp\/?p=4826"},"modified":"2024-10-22T15:39:18","modified_gmt":"2024-10-22T07:39:18","slug":"%e6%a8%a1%e5%9e%8b%e5%9c%a8gpu%e5%86%85%e5%ad%98%e7%9a%84%e6%98%a0%e5%b0%84%e5%85%b3%e7%b3%bb","status":"publish","type":"post","link":"https:\/\/www.aqwu.net\/wp\/?p=4826","title":{"rendered":"\u6a21\u578b\u5728GPU\u5185\u5b58\u7684\u6620\u5c04\u5173\u7cfb"},"content":{"rendered":"\n<p>\u8fd9\u91cc\u6211\u4eec\u4ee5 <a href=\"https:\/\/huggingface.co\/nvidia\/Llama-3.1-Nemotron-70B-Instruct-HF\">nvidia\/Llama-3.1-Nemotron-70B-Instruct-HF<\/a> \u4e3a\u4f8b\u6765\u8bf4\u660e\u95ee\u9898<\/p>\n\n\n\n<p>\u4ee3\u7801\u5982\u4e0b\uff1a<\/p>\n\n\n\n<div class=\"wp-block-urvanov-syntax-highlighter-code-block\"><pre class=\"lang:python decode:true \" >import torch\nfrom transformers import AutoModelForCausalLM,\nfrom collections import defaultdict\n\n# \u68c0\u6d4b\u53ef\u7528\u7684GPU\u6570\u91cf\nNUM_GPUS = torch.cuda.device_count()\nprint(f\"NUM_GPUS: {NUM_GPUS}\")\n\nMODEL_ID = \"nvidia\/Llama-3.1-Nemotron-70B-Instruct-HF\"\n\nprint(f\"Load Model {MODEL_ID} ... \")\nmodel = AutoModelForCausalLM.from_pretrained(\n\t\tMODEL_ID, \n\t\tdevice_map=\"auto\", \n\t\ttorch_dtype=torch.bfloat16\n)\n    \n# \u5b9a\u4e49\u5b57\u5178\u6765\u5b58\u50a8\u6bcf\u4e00\u5c42\u7684\u53c2\u6570\u6570\u91cf\u3001\u5185\u5b58\u5927\u5c0f\u548c\u6240\u5728\u8bbe\u5907\nlayerwise_stats = defaultdict(lambda: {'num_params': 0, 'size_mb': 0, 'device': None})\n# \u5b9a\u4e49\u5b57\u5178\u6765\u5b58\u50a8\u6bcf\u4e2a\u8bbe\u5907(GPU\/CPU)\u7684\u603b\u5185\u5b58\u4f7f\u7528\ndevice_memory_usage = defaultdict(float)\n\n# \u904d\u5386\u6a21\u578b\u53c2\u6570\nfor name, param in model.named_parameters():\n    # \u83b7\u53d6\u4e3b\u8981\u7684\u5c42\u7ea7\u540d\u79f0\uff0c\u4f8b\u5982 model.layers.0\uff0cmodel.embed_tokens\n    layer_name = '.'.join(name.split('.')[:3]) if 'layers' in name else name.split('.')[0]\n    \n    param_size = param.numel() * param.element_size() \/ 1024 \/ 1024  # \u8ba1\u7b97\u5185\u5b58\u5360\u7528\uff0c\u5355\u4f4d\u4e3aMB\n    layerwise_stats[layer_name]['num_params'] += param.numel()  # \u7edf\u8ba1\u6bcf\u4e00\u5c42\u7684\u603b\u53c2\u6570\u6570\u91cf\n    layerwise_stats[layer_name]['size_mb'] += param_size  # \u7edf\u8ba1\u6bcf\u4e00\u5c42\u7684\u603b\u5185\u5b58\u5927\u5c0f\n    layerwise_stats[layer_name]['device'] = param.device  # \u8bb0\u5f55\u6bcf\u4e00\u5c42\u6240\u5728\u7684\u8bbe\u5907\n\n    # \u8bb0\u5f55\u6bcf\u4e2a\u8bbe\u5907\u7684\u603b\u5185\u5b58\u4f7f\u7528\u60c5\u51b5\n    device_memory_usage[param.device] += param_size\n\n# \u8f93\u51fa\u6bcf\u4e00\u5c42\u7684\u7edf\u8ba1\u7ed3\u679c\nfor layer_name, stats in layerwise_stats.items():\n    print(f\"Layer: {layer_name} | Total parameters: {stats['num_params']:,} | Total memory size: {stats['size_mb']:.2f} MB | Device: {stats['device']}\")\n\n# \u8ba1\u7b97\u5e76\u8f93\u51fa\u6a21\u578b\u7684\u603b\u53c2\u6570\u6570\u91cf\u548c\u603b\u5185\u5b58\u5360\u7528\ntotal_params = sum(stats['num_params'] for stats in layerwise_stats.values())\ntotal_size = sum(stats['size_mb'] for stats in layerwise_stats.values())\n\nprint(f\"\\nTotal number of parameters in the model: {total_params:,}\")\nprint(f\"Total memory size of the model: {total_size:.2f} MB\")\n\n# \u8f93\u51fa\u6bcf\u4e2a\u8bbe\u5907\u7684\u603b\u5185\u5b58\u4f7f\u7528\u60c5\u51b5\nprint(\"\\nMemory usage per device:\")\nfor device, memory in device_memory_usage.items():\n    print(f\"Device: {device} | Total memory used: {memory:.2f} MB\")\n        \n<\/pre><\/div>\n\n\n\n<p>\u8f93\u51fa\u7ed3\u679c\u5982\u4e0b\uff1a<\/p>\n\n\n\n<div class=\"wp-block-urvanov-syntax-highlighter-code-block\"><pre class=\"top-set:false lang:python decode:true \" >NUM_GPUS: 6\nLoad Model nvidia\/Llama-3.1-Nemotron-70B-Instruct-HF ...\nLoading checkpoint shards: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 30\/30 [08:41&lt;00:00, 17.38s\/it]\nSome parameters are on the meta device because they were offloaded to the cpu.\nLayer: model | Total parameters: 1,050,681,344 | Total memory size: 2004.02 MB | Device: cuda:5\nLayer: model.layers.0 | Total parameters: 855,654,400 | Total memory size: 1632.03 MB | Device: cuda:0\nLayer: model.layers.1 | Total parameters: 855,654,400 | Total memory size: 1632.03 MB | Device: cuda:0\nLayer: model.layers.2 | Total parameters: 855,654,400 | Total memory size: 1632.03 MB | Device: cuda:0\nLayer: model.layers.3 | Total parameters: 855,654,400 | Total memory size: 1632.03 MB | Device: cuda:0\nLayer: model.layers.4 | Total parameters: 855,654,400 | Total memory size: 1632.03 MB | Device: cuda:0\nLayer: model.layers.5 | Total parameters: 855,654,400 | Total memory size: 1632.03 MB | Device: cuda:0\nLayer: model.layers.6 | Total parameters: 855,654,400 | Total memory size: 1632.03 MB | Device: cuda:0\nLayer: model.layers.7 | Total parameters: 855,654,400 | Total memory size: 1632.03 MB | Device: cuda:0\nLayer: model.layers.8 | Total parameters: 855,654,400 | Total memory size: 1632.03 MB | Device: cuda:0\nLayer: model.layers.9 | Total parameters: 855,654,400 | Total memory size: 1632.03 MB | Device: cuda:0\nLayer: model.layers.10 | Total parameters: 855,654,400 | Total memory size: 1632.03 MB | Device: cuda:1\nLayer: model.layers.11 | Total parameters: 855,654,400 | Total memory size: 1632.03 MB | Device: cuda:1\nLayer: model.layers.12 | Total parameters: 855,654,400 | Total memory size: 1632.03 MB | Device: cuda:1\nLayer: model.layers.13 | Total parameters: 855,654,400 | Total memory size: 1632.03 MB | Device: cuda:1\nLayer: model.layers.14 | Total parameters: 855,654,400 | Total memory size: 1632.03 MB | Device: cuda:1\nLayer: model.layers.15 | Total parameters: 855,654,400 | Total memory size: 1632.03 MB | Device: cuda:1\nLayer: model.layers.16 | Total parameters: 855,654,400 | Total memory size: 1632.03 MB | Device: cuda:1\nLayer: model.layers.17 | Total parameters: 855,654,400 | Total memory size: 1632.03 MB | Device: cuda:1\nLayer: model.layers.18 | Total parameters: 855,654,400 | Total memory size: 1632.03 MB | Device: cuda:1\nLayer: model.layers.19 | Total parameters: 855,654,400 | Total memory size: 1632.03 MB | Device: cuda:1\nLayer: model.layers.20 | Total parameters: 855,654,400 | Total memory size: 1632.03 MB | Device: cuda:1\nLayer: model.layers.21 | Total parameters: 855,654,400 | Total memory size: 1632.03 MB | Device: cuda:1\nLayer: model.layers.22 | Total parameters: 855,654,400 | Total memory size: 1632.03 MB | Device: cuda:1\nLayer: model.layers.23 | Total parameters: 855,654,400 | Total memory size: 1632.03 MB | Device: cuda:1\nLayer: model.layers.24 | Total parameters: 855,654,400 | Total memory size: 1632.03 MB | Device: cuda:2\nLayer: model.layers.25 | Total parameters: 855,654,400 | Total memory size: 1632.03 MB | Device: cuda:2\nLayer: model.layers.26 | Total parameters: 855,654,400 | Total memory size: 1632.03 MB | Device: cuda:2\nLayer: model.layers.27 | Total parameters: 855,654,400 | Total memory size: 1632.03 MB | Device: cuda:2\nLayer: model.layers.28 | Total parameters: 855,654,400 | Total memory size: 1632.03 MB | Device: cuda:2\nLayer: model.layers.29 | Total parameters: 855,654,400 | Total memory size: 1632.03 MB | Device: cuda:2\nLayer: model.layers.30 | Total parameters: 855,654,400 | Total memory size: 1632.03 MB | Device: cuda:2\nLayer: model.layers.31 | Total parameters: 855,654,400 | Total memory size: 1632.03 MB | Device: cuda:2\nLayer: model.layers.32 | Total parameters: 855,654,400 | Total memory size: 1632.03 MB | Device: cuda:2\nLayer: model.layers.33 | Total parameters: 855,654,400 | Total memory size: 1632.03 MB | Device: cuda:2\nLayer: model.layers.34 | Total parameters: 855,654,400 | Total memory size: 1632.03 MB | Device: cuda:2\nLayer: model.layers.35 | Total parameters: 855,654,400 | Total memory size: 1632.03 MB | Device: cuda:2\nLayer: model.layers.36 | Total parameters: 855,654,400 | Total memory size: 1632.03 MB | Device: cuda:2\nLayer: model.layers.37 | Total parameters: 855,654,400 | Total memory size: 1632.03 MB | Device: cuda:2\nLayer: model.layers.38 | Total parameters: 855,654,400 | Total memory size: 1632.03 MB | Device: cuda:3\nLayer: model.layers.39 | Total parameters: 855,654,400 | Total memory size: 1632.03 MB | Device: cuda:3\nLayer: model.layers.40 | Total parameters: 855,654,400 | Total memory size: 1632.03 MB | Device: cuda:3\nLayer: model.layers.41 | Total parameters: 855,654,400 | Total memory size: 1632.03 MB | Device: cuda:3\nLayer: model.layers.42 | Total parameters: 855,654,400 | Total memory size: 1632.03 MB | Device: cuda:3\nLayer: model.layers.43 | Total parameters: 855,654,400 | Total memory size: 1632.03 MB | Device: cuda:3\nLayer: model.layers.44 | Total parameters: 855,654,400 | Total memory size: 1632.03 MB | Device: cuda:3\nLayer: model.layers.45 | Total parameters: 855,654,400 | Total memory size: 1632.03 MB | Device: cuda:3\nLayer: model.layers.46 | Total parameters: 855,654,400 | Total memory size: 1632.03 MB | Device: cuda:3\nLayer: model.layers.47 | Total parameters: 855,654,400 | Total memory size: 1632.03 MB | Device: cuda:3\nLayer: model.layers.48 | Total parameters: 855,654,400 | Total memory size: 1632.03 MB | Device: cuda:3\nLayer: model.layers.49 | Total parameters: 855,654,400 | Total memory size: 1632.03 MB | Device: cuda:3\nLayer: model.layers.50 | Total parameters: 855,654,400 | Total memory size: 1632.03 MB | Device: cuda:3\nLayer: model.layers.51 | Total parameters: 855,654,400 | Total memory size: 1632.03 MB | Device: cuda:3\nLayer: model.layers.52 | Total parameters: 855,654,400 | Total memory size: 1632.03 MB | Device: cuda:4\nLayer: model.layers.53 | Total parameters: 855,654,400 | Total memory size: 1632.03 MB | Device: cuda:4\nLayer: model.layers.54 | Total parameters: 855,654,400 | Total memory size: 1632.03 MB | Device: cuda:4\nLayer: model.layers.55 | Total parameters: 855,654,400 | Total memory size: 1632.03 MB | Device: cuda:4\nLayer: model.layers.56 | Total parameters: 855,654,400 | Total memory size: 1632.03 MB | Device: cuda:4\nLayer: model.layers.57 | Total parameters: 855,654,400 | Total memory size: 1632.03 MB | Device: cuda:4\nLayer: model.layers.58 | Total parameters: 855,654,400 | Total memory size: 1632.03 MB | Device: cuda:4\nLayer: model.layers.59 | Total parameters: 855,654,400 | Total memory size: 1632.03 MB | Device: cuda:4\nLayer: model.layers.60 | Total parameters: 855,654,400 | Total memory size: 1632.03 MB | Device: cuda:4\nLayer: model.layers.61 | Total parameters: 855,654,400 | Total memory size: 1632.03 MB | Device: cuda:4\nLayer: model.layers.62 | Total parameters: 855,654,400 | Total memory size: 1632.03 MB | Device: cuda:4\nLayer: model.layers.63 | Total parameters: 855,654,400 | Total memory size: 1632.03 MB | Device: cuda:4\nLayer: model.layers.64 | Total parameters: 855,654,400 | Total memory size: 1632.03 MB | Device: cuda:4\nLayer: model.layers.65 | Total parameters: 855,654,400 | Total memory size: 1632.03 MB | Device: cuda:4\nLayer: model.layers.66 | Total parameters: 855,654,400 | Total memory size: 1632.03 MB | Device: cuda:5\nLayer: model.layers.67 | Total parameters: 855,654,400 | Total memory size: 1632.03 MB | Device: cuda:5\nLayer: model.layers.68 | Total parameters: 855,654,400 | Total memory size: 1632.03 MB | Device: cuda:5\nLayer: model.layers.69 | Total parameters: 855,654,400 | Total memory size: 1632.03 MB | Device: cuda:5\nLayer: model.layers.70 | Total parameters: 855,654,400 | Total memory size: 1632.03 MB | Device: cuda:5\nLayer: model.layers.71 | Total parameters: 855,654,400 | Total memory size: 1632.03 MB | Device: cuda:5\nLayer: model.layers.72 | Total parameters: 855,654,400 | Total memory size: 1632.03 MB | Device: cuda:5\nLayer: model.layers.73 | Total parameters: 855,654,400 | Total memory size: 1632.03 MB | Device: cuda:5\nLayer: model.layers.74 | Total parameters: 855,654,400 | Total memory size: 1632.03 MB | Device: cuda:5\nLayer: model.layers.75 | Total parameters: 855,654,400 | Total memory size: 1632.03 MB | Device: cuda:5\nLayer: model.layers.76 | Total parameters: 855,654,400 | Total memory size: 1632.03 MB | Device: cuda:5\nLayer: model.layers.77 | Total parameters: 855,654,400 | Total memory size: 1632.03 MB | Device: cuda:5\nLayer: model.layers.78 | Total parameters: 855,654,400 | Total memory size: 1632.03 MB | Device: cuda:5\nLayer: model.layers.79 | Total parameters: 855,654,400 | Total memory size: 1632.03 MB | Device: cuda:5\nLayer: lm_head | Total parameters: 1,050,673,152 | Total memory size: 2004.00 MB | Device: meta\n\nTotal number of parameters in the model: 70,553,706,496\nTotal memory size of the model: 134570.52 MB\n\nMemory usage per device:\nDevice: cuda:0 | Total memory used: 18324.31 MB\nDevice: cuda:1 | Total memory used: 22848.44 MB\nDevice: cuda:2 | Total memory used: 22848.44 MB\nDevice: cuda:3 | Total memory used: 22848.44 MB\nDevice: cuda:4 | Total memory used: 22848.44 MB\nDevice: cuda:5 | Total memory used: 22848.45 MB\nDevice: meta | Total memory used: 2004.00 MB<\/pre><\/div>\n","protected":false},"excerpt":{"rendered":"<p>\u8fd9\u91cc\u6211\u4eec\u4ee5 nvidia\/Llama-3.1-Nemotron-70B-Instruct-HF \u4e3a\u4f8b\u6765\u8bf4\u660e\u95ee\u9898 [&hellip;]<\/p>\n","protected":false},"author":1,"featured_media":0,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"site-sidebar-layout":"default","site-content-layout":"","ast-site-content-layout":"default","site-content-style":"default","site-sidebar-style":"default","ast-global-header-display":"","ast-banner-title-visibility":"","ast-main-header-display":"","ast-hfb-above-header-display":"","ast-hfb-below-header-display":"","ast-hfb-mobile-header-display":"","site-post-title":"","ast-breadcrumbs-content":"","ast-featured-img":"","footer-sml-layout":"","theme-transparent-header-meta":"","adv-header-id-meta":"","stick-header-meta":"","header-above-stick-meta":"","header-main-stick-meta":"","header-below-stick-meta":"","astra-migrate-meta-layouts":"set","ast-page-background-enabled":"default","ast-page-background-meta":{"desktop":{"background-color":"var(--ast-global-color-4)","background-image":"","background-repeat":"repeat","background-position":"center center","background-size":"auto","background-attachment":"scroll","background-type":"","background-media":"","overlay-type":"","overlay-color":"","overlay-opacity":"","overlay-gradient":""},"tablet":{"background-color":"","background-image":"","background-repeat":"repeat","background-position":"center center","background-size":"auto","background-attachment":"scroll","background-type":"","background-media":"","overlay-type":"","overlay-color":"","overlay-opacity":"","overlay-gradient":""},"mobile":{"background-color":"","background-image":"","background-repeat":"repeat","background-position":"center center","background-size":"auto","background-attachment":"scroll","background-type":"","background-media":"","overlay-type":"","overlay-color":"","overlay-opacity":"","overlay-gradient":""}},"ast-content-background-meta":{"desktop":{"background-color":"var(--ast-global-color-5)","background-image":"","background-repeat":"repeat","background-position":"center center","background-size":"auto","background-attachment":"scroll","background-type":"","background-media":"","overlay-type":"","overlay-color":"","overlay-opacity":"","overlay-gradient":""},"tablet":{"background-color":"var(--ast-global-color-5)","background-image":"","background-repeat":"repeat","background-position":"center center","background-size":"auto","background-attachment":"scroll","background-type":"","background-media":"","overlay-type":"","overlay-color":"","overlay-opacity":"","overlay-gradient":""},"mobile":{"background-color":"var(--ast-global-color-5)","background-image":"","background-repeat":"repeat","background-position":"center center","background-size":"auto","background-attachment":"scroll","background-type":"","background-media":"","overlay-type":"","overlay-color":"","overlay-opacity":"","overlay-gradient":""}},"_jetpack_memberships_contains_paid_content":false,"footnotes":""},"categories":[444,443,442],"tags":[],"class_list":["post-4826","post","type-post","status-publish","format-standard","hentry","category-ai","category-llm","category-llms"],"views":1969,"jetpack_sharing_enabled":true,"jetpack_featured_media_url":"","_links":{"self":[{"href":"https:\/\/www.aqwu.net\/wp\/index.php?rest_route=\/wp\/v2\/posts\/4826","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/www.aqwu.net\/wp\/index.php?rest_route=\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/www.aqwu.net\/wp\/index.php?rest_route=\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/www.aqwu.net\/wp\/index.php?rest_route=\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/www.aqwu.net\/wp\/index.php?rest_route=%2Fwp%2Fv2%2Fcomments&post=4826"}],"version-history":[{"count":3,"href":"https:\/\/www.aqwu.net\/wp\/index.php?rest_route=\/wp\/v2\/posts\/4826\/revisions"}],"predecessor-version":[{"id":4829,"href":"https:\/\/www.aqwu.net\/wp\/index.php?rest_route=\/wp\/v2\/posts\/4826\/revisions\/4829"}],"wp:attachment":[{"href":"https:\/\/www.aqwu.net\/wp\/index.php?rest_route=%2Fwp%2Fv2%2Fmedia&parent=4826"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/www.aqwu.net\/wp\/index.php?rest_route=%2Fwp%2Fv2%2Fcategories&post=4826"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/www.aqwu.net\/wp\/index.php?rest_route=%2Fwp%2Fv2%2Ftags&post=4826"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}