{"id":4718,"date":"2024-09-08T11:34:48","date_gmt":"2024-09-08T03:34:48","guid":{"rendered":"https:\/\/www.aqwu.net\/wp\/?p=4718"},"modified":"2024-09-08T16:22:46","modified_gmt":"2024-09-08T08:22:46","slug":"reflection-llama-3-1-70b-%e5%ae%9e%e9%99%85%e4%b8%8a%e6%98%af-llama-3%ef%bc%9f","status":"publish","type":"post","link":"https:\/\/www.aqwu.net\/wp\/?p=4718","title":{"rendered":"Reflection-Llama-3.1-70B \u5b9e\u9645\u4e0a\u662f Llama-3\uff1f"},"content":{"rendered":"\n<p>\u5728\u6d4b\u91cf\u5dee\u5f02\u540e\uff0c\u8fd9\u4e2a\u6a21\u578b\u4f3c\u4e4e\u662f\u5e94\u7528\u4e86 LoRA \u8c03\u4f18\u7684 Llama 3\u3002\u4e0d\u662f Llama 3.1\u3002<\/p>\n\n\n\n<p>\u53c2\u8003\u94fe\u63a5\uff1a<a href=\"https:\/\/huggingface.co\/mattshumer\/Reflection-Llama-3.1-70B\/discussions\/38\">https:\/\/huggingface.co\/mattshumer\/Reflection-Llama-3.1-70B\/discussions\/38<\/a><\/p>\n\n\n\n<p><a href=\"https:\/\/gist.github.com\/StableFluffy\/1c6f8be84cbe9499de2f9b63d7105ff0\">https:\/\/gist.github.com\/StableFluffy\/1c6f8be84cbe9499de2f9b63d7105ff0<\/a><\/p>\n\n\n\n<p><a href=\"https:\/\/www.reddit.com\/r\/LocalLLaMA\/comments\/1fb6jdy\/reflectionllama3170b_is_actually_llama3\">https:\/\/www.reddit.com\/r\/LocalLLaMA\/comments\/1fb6jdy\/reflectionllama3170b_is_actually_llama3<\/a><\/p>\n\n\n\n<p>\u7b14\u8005\u5728windows \u4e0b\u7684\u6d4b\u8bd5\u4ee3\u7801\uff0c\u65e0\u9700 GPU \u73af\u5883\uff1a<\/p>\n\n\n\n<div class=\"wp-block-urvanov-syntax-highlighter-code-block\"><pre class=\"lang:python decode:true \" >from transformers import AutoModelForCausalLM, AutoTokenizer\nimport torch\nimport matplotlib.pyplot as plt\nimport seaborn as sns\nimport os\n\nos.environ[\"KMP_DUPLICATE_LIB_OK\"] = \"TRUE\"\ntorch.set_num_threads(1)\n\nbase_model_name = \"meta-llama\/Meta-Llama-3-70B-Instruct\"\nchat_model_name = \"mattshumer\/Reflection-Llama-3.1-70B\"\nbase_model = AutoModelForCausalLM.from_pretrained(base_model_name, torch_dtype=torch.bfloat16)\nchat_model = AutoModelForCausalLM.from_pretrained(chat_model_name, torch_dtype=torch.bfloat16)\n\ndef calculate_weight_diff(base_weight, chat_weight):\n    return torch.abs(base_weight - chat_weight).mean().item()\n\ndef calculate_layer_diffs(base_model, chat_model):\n    layer_diffs = []\n    for base_layer, chat_layer in zip(base_model.model.layers, chat_model.model.layers):\n        layer_diff = {\n            'input_layernorm': calculate_weight_diff(base_layer.input_layernorm.weight, chat_layer.input_layernorm.weight),\n            # 'mlp_down_proj': calculate_weight_diff(base_layer.mlp.down_proj.weight, chat_layer.mlp.down_proj.weight),\n            # 'mlp_gate_proj': calculate_weight_diff(base_layer.mlp.gate_proj.weight, chat_layer.mlp.gate_proj.weight),\n            # 'mlp_up_proj': calculate_weight_diff(base_layer.mlp.up_proj.weight, chat_layer.mlp.up_proj.weight),\n            'post_attention_layernorm': calculate_weight_diff(base_layer.post_attention_layernorm.weight, chat_layer.post_attention_layernorm.weight),\n            'self_attn_q_proj': calculate_weight_diff(base_layer.self_attn.q_proj.weight, chat_layer.self_attn.q_proj.weight),\n            'self_attn_k_proj': calculate_weight_diff(base_layer.self_attn.k_proj.weight, chat_layer.self_attn.k_proj.weight),\n            'self_attn_v_proj': calculate_weight_diff(base_layer.self_attn.v_proj.weight, chat_layer.self_attn.v_proj.weight),\n            'self_attn_o_proj': calculate_weight_diff(base_layer.self_attn.o_proj.weight, chat_layer.self_attn.o_proj.weight)\n        }\n        layer_diffs.append(layer_diff)\n    return layer_diffs\n\ndef visualize_layer_diffs(layer_diffs):\n    num_layers = len(layer_diffs)\n    num_components = len(layer_diffs[0])\n    \n    fig, axs = plt.subplots(1, num_components, figsize=(24, 8))\n    fig.suptitle(f\"{base_model_name} &lt;&gt; {chat_model_name}\", fontsize=16)\n    \n    for i, component in enumerate(layer_diffs[0].keys()):\n        component_diffs = [[layer_diff[component]] for layer_diff in layer_diffs]\n        sns.heatmap(component_diffs, annot=True, fmt=\".6f\", cmap=\"YlGnBu\", ax=axs[i], cbar_kws={\"shrink\": 0.8})\n        axs[i].set_title(component)\n        axs[i].set_xlabel(\"Layer\")\n        axs[i].set_ylabel(\"Difference\")\n        axs[i].set_xticks([])\n        axs[i].set_yticks(range(num_layers))\n        axs[i].set_yticklabels(range(num_layers))\n        axs[i].invert_yaxis()\n    \n    plt.tight_layout()\n    plt.show()\n\nlayer_diffs = calculate_layer_diffs(base_model, chat_model)\n\nvisualize_layer_diffs(layer_diffs)\n<\/pre><\/div>\n\n\n\n<p>\u7b14\u8005\u7684\u6d4b\u8bd5\u7ed3\u679c\uff1a<\/p>\n\n\n\n<figure class=\"wp-block-image size-large\"><img loading=\"lazy\" decoding=\"async\" width=\"1024\" height=\"521\" src=\"https:\/\/www.aqwu.net\/wp\/wp-content\/uploads\/2024\/09\/\u56fe\u7247-16-1024x521.png\" alt=\"\" class=\"wp-image-4729\" srcset=\"https:\/\/www.aqwu.net\/wp\/wp-content\/uploads\/2024\/09\/\u56fe\u7247-16-1024x521.png 1024w, https:\/\/www.aqwu.net\/wp\/wp-content\/uploads\/2024\/09\/\u56fe\u7247-16-300x153.png 300w, https:\/\/www.aqwu.net\/wp\/wp-content\/uploads\/2024\/09\/\u56fe\u7247-16-768x390.png 768w, https:\/\/www.aqwu.net\/wp\/wp-content\/uploads\/2024\/09\/\u56fe\u7247-16-1536x781.png 1536w, https:\/\/www.aqwu.net\/wp\/wp-content\/uploads\/2024\/09\/\u56fe\u7247-16-2048x1041.png 2048w, https:\/\/www.aqwu.net\/wp\/wp-content\/uploads\/2024\/09\/\u56fe\u7247-16-1320x671.png 1320w, https:\/\/www.aqwu.net\/wp\/wp-content\/uploads\/2024\/09\/\u56fe\u7247-16-600x305.png 600w\" sizes=\"auto, (max-width: 1024px) 100vw, 1024px\" \/><\/figure>\n","protected":false},"excerpt":{"rendered":"<p>\u5728\u6d4b\u91cf\u5dee\u5f02\u540e\uff0c\u8fd9\u4e2a\u6a21\u578b\u4f3c\u4e4e\u662f\u5e94\u7528\u4e86 LoRA \u8c03\u4f18\u7684 Llama 3\u3002\u4e0d\u662f Llama 3.1\u3002 \u53c2\u8003\u94fe\u63a5\uff1ah [&hellip;]<\/p>\n","protected":false},"author":1,"featured_media":0,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"site-sidebar-layout":"default","site-content-layout":"","ast-site-content-layout":"default","site-content-style":"default","site-sidebar-style":"default","ast-global-header-display":"","ast-banner-title-visibility":"","ast-main-header-display":"","ast-hfb-above-header-display":"","ast-hfb-below-header-display":"","ast-hfb-mobile-header-display":"","site-post-title":"","ast-breadcrumbs-content":"","ast-featured-img":"","footer-sml-layout":"","theme-transparent-header-meta":"","adv-header-id-meta":"","stick-header-meta":"","header-above-stick-meta":"","header-main-stick-meta":"","header-below-stick-meta":"","astra-migrate-meta-layouts":"set","ast-page-background-enabled":"default","ast-page-background-meta":{"desktop":{"background-color":"var(--ast-global-color-4)","background-image":"","background-repeat":"repeat","background-position":"center center","background-size":"auto","background-attachment":"scroll","background-type":"","background-media":"","overlay-type":"","overlay-color":"","overlay-opacity":"","overlay-gradient":""},"tablet":{"background-color":"","background-image":"","background-repeat":"repeat","background-position":"center center","background-size":"auto","background-attachment":"scroll","background-type":"","background-media":"","overlay-type":"","overlay-color":"","overlay-opacity":"","overlay-gradient":""},"mobile":{"background-color":"","background-image":"","background-repeat":"repeat","background-position":"center center","background-size":"auto","background-attachment":"scroll","background-type":"","background-media":"","overlay-type":"","overlay-color":"","overlay-opacity":"","overlay-gradient":""}},"ast-content-background-meta":{"desktop":{"background-color":"var(--ast-global-color-5)","background-image":"","background-repeat":"repeat","background-position":"center center","background-size":"auto","background-attachment":"scroll","background-type":"","background-media":"","overlay-type":"","overlay-color":"","overlay-opacity":"","overlay-gradient":""},"tablet":{"background-color":"var(--ast-global-color-5)","background-image":"","background-repeat":"repeat","background-position":"center center","background-size":"auto","background-attachment":"scroll","background-type":"","background-media":"","overlay-type":"","overlay-color":"","overlay-opacity":"","overlay-gradient":""},"mobile":{"background-color":"var(--ast-global-color-5)","background-image":"","background-repeat":"repeat","background-position":"center center","background-size":"auto","background-attachment":"scroll","background-type":"","background-media":"","overlay-type":"","overlay-color":"","overlay-opacity":"","overlay-gradient":""}},"_jetpack_memberships_contains_paid_content":false,"footnotes":""},"categories":[444,445],"tags":[395],"class_list":["post-4718","post","type-post","status-publish","format-standard","hentry","category-ai","category-ainews","tag-lora"],"views":1173,"jetpack_sharing_enabled":true,"jetpack_featured_media_url":"","_links":{"self":[{"href":"https:\/\/www.aqwu.net\/wp\/index.php?rest_route=\/wp\/v2\/posts\/4718","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/www.aqwu.net\/wp\/index.php?rest_route=\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/www.aqwu.net\/wp\/index.php?rest_route=\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/www.aqwu.net\/wp\/index.php?rest_route=\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/www.aqwu.net\/wp\/index.php?rest_route=%2Fwp%2Fv2%2Fcomments&post=4718"}],"version-history":[{"count":6,"href":"https:\/\/www.aqwu.net\/wp\/index.php?rest_route=\/wp\/v2\/posts\/4718\/revisions"}],"predecessor-version":[{"id":4734,"href":"https:\/\/www.aqwu.net\/wp\/index.php?rest_route=\/wp\/v2\/posts\/4718\/revisions\/4734"}],"wp:attachment":[{"href":"https:\/\/www.aqwu.net\/wp\/index.php?rest_route=%2Fwp%2Fv2%2Fmedia&parent=4718"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/www.aqwu.net\/wp\/index.php?rest_route=%2Fwp%2Fv2%2Fcategories&post=4718"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/www.aqwu.net\/wp\/index.php?rest_route=%2Fwp%2Fv2%2Ftags&post=4718"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}