{"id":4513,"date":"2024-08-17T10:07:48","date_gmt":"2024-08-17T02:07:48","guid":{"rendered":"https:\/\/www.aqwu.net\/wp\/?p=4513"},"modified":"2024-08-17T10:14:50","modified_gmt":"2024-08-17T02:14:50","slug":"vllm%ef%bc%9a%e5%a4%a7%e8%a7%84%e6%a8%a1%e6%8f%90%e4%be%9b-llm","status":"publish","type":"post","link":"https:\/\/www.aqwu.net\/wp\/?p=4513","title":{"rendered":"vLLM\uff1a\u5927\u89c4\u6a21\u63d0\u4f9b LLM"},"content":{"rendered":"\n<p>\u6b64\u73af\u5883\u63d0\u4f9b\u4e86&nbsp;<a href=\"https:\/\/github.com\/vllm-project\/vllm\">vLLM<\/a>&nbsp;\u670d\u52a1\u5f15\u64ce\u7684\u9ad8\u6027\u80fd\u90e8\u7f72\uff0c\u8be5\u5f15\u64ce\u9488\u5bf9\u5927\u89c4\u6a21\u670d\u52a1\u5927\u578b\u8bed\u8a00\u6a21\u578b\u8fdb\u884c\u4e86\u4f18\u5316\u3002<\/p>\n\n\n\n<h3 class=\"wp-block-heading\" id=\"features\">\u7279\u5f81<\/h3>\n\n\n\n<ul class=\"wp-block-list\">\n<li>\u9884\u914d\u7f6e\u7684 vLLM \u670d\u52a1\u5668\uff0c\u53ef\u4ee5\u8fd0\u884c\u60a8\u6307\u5b9a\u7684\u6a21\u578b<\/li>\n\n\n\n<li>\u517c\u5bb9 OpenAI \u7684 API \u7aef\u70b9<\/li>\n\n\n\n<li>\u901a\u8fc7\u73af\u5883\u53d8\u91cf\u8f7b\u677e\u914d\u7f6e<\/li>\n\n\n\n<li>\u652f\u6301\u81ea\u5b9a\u4e49SSL\u8bc1\u4e66<\/li>\n\n\n\n<li>\u5185\u7f6e\u57fa\u51c6\u6d4b\u8bd5\u5de5\u5177<\/li>\n<\/ul>\n\n\n\n<h3 class=\"wp-block-heading\" id=\"capabilities\">\u80fd\u529b<\/h3>\n\n\n\n<p>\u5728\u6211\u4eec\u7684\u00a0<code>1x RTX 3090<\/code>\u00a0\u5b9e\u4f8b\u4e0a\u5bf9 Llama 3.1 8B \uff08fp16\uff09 \u8fdb\u884c\u57fa\u51c6\u6d4b\u8bd5\u8868\u660e\uff0c\u5b83\u53ef\u4ee5\u901a\u8fc7\u5728 100+ \u5e76\u53d1\u8bf7\u6c42\u4e0b\u5b9e\u73b0\u6bcf\u79d2\u5408\u7406\u7684\u4ee4\u724c\u6570\u6765\u652f\u6301\u62e5\u6709\u6570\u5343\u7528\u6237\u7684\u5e94\u7528\u7a0b\u5e8f\u3002<\/p>\n\n\n\n<p>\u4e0b\u56fe\u663e\u793a\uff0c\u5bf9\u4e8e 100 \u4e2a\u5e76\u53d1\u8bf7\u6c42\uff0c\u6bcf\u4e2a\u8bf7\u6c42\u6700\u574f\u60c5\u51b5 \uff08p99\uff09 \u4e3a 12.88 \u4e2a\u4ee4\u724c\/\u79d2\uff0c\u5bfc\u81f4\u603b\u4ee4\u724c\u6570\u4e3a 1300+\uff01<\/p>\n\n\n\n<h4 class=\"wp-block-heading\" id=\"p99-tokens-per-second\">\u6bcf\u79d2 P99 \u4e2a\u4ee4\u724c<\/h4>\n\n\n\n<figure class=\"wp-block-image\"><img decoding=\"async\" src=\"https:\/\/backprop-media.s3.eu-north-1.amazonaws.com\/vllm-readme\/p99-tokens-per-second.png\" alt=\"\u6bcf\u79d2 P99 \u4e2a\u4ee4\u724c\"\/><\/figure>\n\n\n\n<p><a href=\"https:\/\/github.com\/backprop-ai\/vllm-benchmark\/blob\/main\/results\/3090.json\">\u8bf7\u5728\u6b64\u5904\u67e5\u770b\u539f\u59cb\u7ed3\u679c<\/a>\u3002<\/p>\n\n\n\n<p>\u8bf7\u6ce8\u610f\uff0c\u8fd9\u4f7f\u7528\u4e86\u4e00\u4e2a\u7b80\u5355\u7684\u4f4e\u4ee4\u724c\u63d0\u793a\uff0c\u5b9e\u9645\u7ed3\u679c\u53ef\u80fd\u4f1a\u6709\u6240\u4e0d\u540c\u3002<\/p>\n\n\n\n<h3 class=\"wp-block-heading\" id=\"using-the-server\">\u4f7f\u7528\u670d\u52a1\u5668<\/h3>\n\n\n\n<p>\u53ef\u4ee5\u901a\u8fc7\u4ee5\u4e0b URL \u8bbf\u95ee vLLM \u670d\u52a1\u5668\uff1a<code>http:\/\/&lt;your-instance-public-ip&gt;:8000\/v1<\/code><\/p>\n\n\n\n<p>\u5c06\u00a0<code>&lt;your-instance-public-ip><\/code>\u00a0\u66ff\u6362\u4e3a Backprop \u5b9e\u4f8b\u7684\u516c\u5171 IP\u3002\u5982\u679c\u5df2\u914d\u7f6e\uff0c\u8bf7\u4f7f\u7528 https\u3002<\/p>\n\n\n\n<p>\u793a\u4f8b\u8bf7\u6c42\uff1a<\/p>\n\n\n\n<div class=\"wp-block-urvanov-syntax-highlighter-code-block\"><pre class=\"lang:sh decode:true \">curl http:\/\/&lt;your-instance-public-ip&gt;:8000\/v1\/chat\/completions \\\n  -H \"Content-Type: application\/json\" \\\n  -H \"Authorization: Bearer token-abc123\" \\\n  -d '{\n    \"model\": \"NousResearch\/Meta-Llama-3.1-8B-Instruct\",\n    \"messages\": [\n      {\n        \"role\": \"system\",\n        \"content\": \"You are a helpful assistant.\"\n      },\n      {\n        \"role\": \"user\",\n        \"content\": \"Translate to French: Hello, how are you?\"\n      }\n    ]\n  }'<\/pre><\/div>\n\n\n\n<p>\u6709\u5173\u8be6\u7ec6\u4fe1\u606f\uff0c\u8bf7\u53c2\u9605&nbsp;<a href=\"https:\/\/docs.vllm.ai\/en\/latest\/serving\/openai_compatible_server.html\">vLLM API \u53c2\u8003<\/a>\u3002<\/p>\n\n\n\n<h3 class=\"wp-block-heading\" id=\"configuration\">\u914d\u7f6e<\/h3>\n\n\n\n<p>\u60a8\u53ef\u4ee5\u4f7f\u7528\u4ee5\u4e0b\u73af\u5883\u53d8\u91cf\u81ea\u5b9a\u4e49 vLLM \u670d\u52a1\u5668\u914d\u7f6e\uff1a<\/p>\n\n\n\n<ul class=\"wp-block-list\">\n<li><code><strong>MODEL_NAME<\/strong><\/code>\uff1a\u8981\u52a0\u8f7d\u7684 Huggingface \u6a21\u578b\u7684\u540d\u79f0\uff08\u9ed8\u8ba4\uff1a\u201cNousResearch\/Meta-Llama-3.1-8B-Instruct\u201d\uff09<\/li>\n\n\n\n<li><code><strong>API_KEY<\/strong><\/code>\uff1a\u7528\u4e8e\u8eab\u4efd\u9a8c\u8bc1\u7684 API \u5bc6\u94a5\uff08\u9ed8\u8ba4\uff1a\u201ctoken-abc123\u201d\uff09<\/li>\n\n\n\n<li><code><strong>GPU_MEMORY_UTILIZATION<\/strong><\/code>\uff1aGPU \u5185\u5b58\u5229\u7528\u7387\uff08\u9ed8\u8ba4\uff1a0.99\uff09<\/li>\n\n\n\n<li><code><strong>TENSOR_PARALLEL_SIZE<\/strong><\/code>\uff1a\u7528\u4e8e\u5f20\u91cf\u5e76\u884c\u7684 GPU \u6570\u91cf\uff08\u9ed8\u8ba4\u503c\uff1a1\uff09<\/li>\n\n\n\n<li><code><strong>MAX_MODEL_LEN<\/strong><\/code>\uff1a\u6700\u5927\u5e8f\u5217\u957f\u5ea6 &#8211; \u503c\u8d8a\u4f4e\uff0c\u4f7f\u7528\u7684 GPU VRAM \u8d8a\u5c11\uff08\u9ed8\u8ba4\u503c\uff1a50000\uff09<\/li>\n\n\n\n<li><code><strong>USE_HTTPS<\/strong><\/code>\uff1a\u8bbe\u7f6e\u4e3a\u201ctrue\u201d\u4ee5\u542f\u7528\u5177\u6709\u81ea\u7b7e\u540d\u8bc1\u4e66\u7684 HTTPS\uff08\u9ed8\u8ba4\u503c\uff1a\u201cfalse\u201d\uff09<\/li>\n<\/ul>\n\n\n\n<p>\u60a8\u53ef\u4ee5\u5728\u542f\u52a8\u73af\u5883\u65f6\u66f4\u65b0\u8fd9\u4e9b\u53d8\u91cf\u3002<\/p>\n\n\n\n<h3 class=\"wp-block-heading\" id=\"custom-ssl-certificates\">\u81ea\u5b9a\u4e49SSL\u8bc1\u4e66<\/h3>\n\n\n\n<p>\u5982\u679c\u8981\u4f7f\u7528\u81ea\u5b9a\u4e49 SSL \u8bc1\u4e66\u800c\u4e0d\u662f\u81ea\u52a8\u751f\u6210\u7684\u8bc1\u4e66\uff0c\u53ef\u4ee5\u66ff\u6362\u4ee5\u4e0b\u6587\u4ef6\uff1a<\/p>\n\n\n\n<ul class=\"wp-block-list\">\n<li><code>\/home\/ubuntu\/.vllm\/ssl\/cert.pem<\/code>\uff1a\u60a8\u7684SSL\u8bc1\u4e66<\/li>\n\n\n\n<li><code>\/home\/ubuntu\/.vllm\/ssl\/key.pem<\/code>\uff1a\u60a8\u7684 SSL \u79c1\u94a5<\/li>\n<\/ul>\n\n\n\n<p>\u66ff\u6362\u8fd9\u4e9b\u6587\u4ef6\u540e\uff0c\u91cd\u65b0\u542f\u52a8 vLLM \u670d\u52a1\u4ee5\u4f7f\u66f4\u6539\u751f\u6548\u3002<\/p>\n\n\n\n<h3 class=\"wp-block-heading\" id=\"advanced-configuration\">\u9ad8\u7ea7\u914d\u7f6e<\/h3>\n\n\n\n<p>\u8981\u66f4\u65b0 vLLM \u670d\u52a1\u5668\u914d\u7f6e\uff0c\u8bf7\u6267\u884c\u4ee5\u4e0b\u64cd\u4f5c\uff1a<\/p>\n\n\n\n<ul class=\"wp-block-list\">\n<li>\u6839\u636e\u9700\u8981\u4fee\u6539\u73af\u5883\u53d8\u91cf\uff08\u89c1\u4e0a\u6587\uff09\u3002<\/li>\n\n\n\n<li>\u5982\u6709\u5fc5\u8981\uff0c\u8bf7\u7f16\u8f91 systemd \u670d\u52a1\u6587\u4ef6\uff1a\n<ul class=\"wp-block-list\">\n<li><code>sudo nano \/etc\/systemd\/system\/vllm.service<\/code><\/li>\n<\/ul>\n<\/li>\n\n\n\n<li>\u8fdb\u884c\u66f4\u6539\u540e\uff0c\u91cd\u65b0\u52a0\u8f7d systemd \u5b88\u62a4\u7a0b\u5e8f\u5e76\u91cd\u65b0\u542f\u52a8\u670d\u52a1\uff1a\n<ul class=\"wp-block-list\">\n<li><code>sudo systemctl daemon-reload <\/code><\/li>\n\n\n\n<li><code>sudo systemctl restart vllm<\/code><\/li>\n<\/ul>\n<\/li>\n<\/ul>\n\n\n\n<h3 class=\"wp-block-heading\" id=\"viewing-logs\">\u67e5\u770b\u65e5\u5fd7<\/h3>\n\n\n\n<p>\u8981\u67e5\u770b vLLM \u670d\u52a1\u5668\u65e5\u5fd7\uff0c\u60a8\u53ef\u4ee5\u4f7f\u7528\u4ee5\u4e0b\u547d\u4ee4\uff1a<\/p>\n\n\n\n<div class=\"wp-block-urvanov-syntax-highlighter-code-block\"><pre class=\"lang:sh decode:true \">sudo journalctl -u vllm -f<\/pre><\/div>\n\n\n\n<p>\u8fd9\u5c06\u5411\u60a8\u663e\u793a vLLM \u670d\u52a1\u7684\u5b9e\u65f6\u65e5\u5fd7\u3002<\/p>\n\n\n\n<h3 class=\"wp-block-heading\" id=\"benchmarking\">\u6807\u6746<\/h3>\n\n\n\n<p>\u6b64\u73af\u5883\u9644\u5e26\u5185\u7f6e\u7684\u57fa\u51c6\u6d4b\u8bd5\u5de5\u5177\uff08<a href=\"http:\/\/github.com\/backprop-ai\/vllm-benchmark\/\">\u8bf7\u53c2\u9605 repo<\/a>\uff09\u3002\u60a8\u53ef\u4ee5\u5728\u00a0<code>\/home\/ubuntu\/vllm-benchmark<\/code>\u00a0\u76ee\u5f55\u4e2d\u627e\u5230\u57fa\u51c6\u6d4b\u8bd5\u811a\u672c\u3002<\/p>\n\n\n\n<p>\u8981\u8fd0\u884c\u57fa\u51c6\u6d4b\u8bd5\uff0c\u8bf7\u6267\u884c\u4ee5\u4e0b\u64cd\u4f5c\uff1a<\/p>\n\n\n\n<div class=\"wp-block-urvanov-syntax-highlighter-code-block\"><pre class=\"lang:sh decode:true \">cd \/home\/ubuntu\/vllm-benchmark\npython vllm_benchmark.py \\\n--vllm_url \"http:\/\/&lt;your-instance-public-ip&gt;:8000\/v1\" \\\n--api_key \"your-api-key\"\\\n--num_requests 100 --concurrency 10<\/pre><\/div>\n\n\n\n<h3 class=\"wp-block-heading\" id=\"further-documentation\">\u66f4\u591a\u6587\u6863<\/h3>\n\n\n\n<p>\u6709\u5173 vLLM \u53ca\u5176 OpenAI \u517c\u5bb9\u670d\u52a1\u5668\u7684\u66f4\u591a\u8be6\u7ec6\u4fe1\u606f\uff0c\u8bf7\u53c2\u9605<a href=\"https:\/\/docs.vllm.ai\/en\/latest\/serving\/openai_compatible_server.html\">\u5b98\u65b9 vLLM \u6587\u6863<\/a>\u3002<\/p>\n","protected":false},"excerpt":{"rendered":"<p>\u6b64\u73af\u5883\u63d0\u4f9b\u4e86&nbsp;vLLM&nbsp;\u670d\u52a1\u5f15\u64ce\u7684\u9ad8\u6027\u80fd\u90e8\u7f72\uff0c\u8be5\u5f15\u64ce\u9488\u5bf9\u5927\u89c4\u6a21\u670d\u52a1\u5927\u578b\u8bed\u8a00\u6a21\u578b\u8fdb\u884c\u4e86\u4f18\u5316\u3002 [&hellip;]<\/p>\n","protected":false},"author":1,"featured_media":0,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"site-sidebar-layout":"default","site-content-layout":"","ast-site-content-layout":"default","site-content-style":"default","site-sidebar-style":"default","ast-global-header-display":"","ast-banner-title-visibility":"","ast-main-header-display":"","ast-hfb-above-header-display":"","ast-hfb-below-header-display":"","ast-hfb-mobile-header-display":"","site-post-title":"","ast-breadcrumbs-content":"","ast-featured-img":"","footer-sml-layout":"","theme-transparent-header-meta":"","adv-header-id-meta":"","stick-header-meta":"","header-above-stick-meta":"","header-main-stick-meta":"","header-below-stick-meta":"","astra-migrate-meta-layouts":"set","ast-page-background-enabled":"default","ast-page-background-meta":{"desktop":{"background-color":"var(--ast-global-color-4)","background-image":"","background-repeat":"repeat","background-position":"center center","background-size":"auto","background-attachment":"scroll","background-type":"","background-media":"","overlay-type":"","overlay-color":"","overlay-opacity":"","overlay-gradient":""},"tablet":{"background-color":"","background-image":"","background-repeat":"repeat","background-position":"center center","background-size":"auto","background-attachment":"scroll","background-type":"","background-media":"","overlay-type":"","overlay-color":"","overlay-opacity":"","overlay-gradient":""},"mobile":{"background-color":"","background-image":"","background-repeat":"repeat","background-position":"center center","background-size":"auto","background-attachment":"scroll","background-type":"","background-media":"","overlay-type":"","overlay-color":"","overlay-opacity":"","overlay-gradient":""}},"ast-content-background-meta":{"desktop":{"background-color":"var(--ast-global-color-5)","background-image":"","background-repeat":"repeat","background-position":"center center","background-size":"auto","background-attachment":"scroll","background-type":"","background-media":"","overlay-type":"","overlay-color":"","overlay-opacity":"","overlay-gradient":""},"tablet":{"background-color":"var(--ast-global-color-5)","background-image":"","background-repeat":"repeat","background-position":"center center","background-size":"auto","background-attachment":"scroll","background-type":"","background-media":"","overlay-type":"","overlay-color":"","overlay-opacity":"","overlay-gradient":""},"mobile":{"background-color":"var(--ast-global-color-5)","background-image":"","background-repeat":"repeat","background-position":"center center","background-size":"auto","background-attachment":"scroll","background-type":"","background-media":"","overlay-type":"","overlay-color":"","overlay-opacity":"","overlay-gradient":""}},"_jetpack_memberships_contains_paid_content":false,"footnotes":""},"categories":[444,445,443,442],"tags":[536],"class_list":["post-4513","post","type-post","status-publish","format-standard","hentry","category-ai","category-ainews","category-llm","category-llms","tag-vllm"],"views":7599,"jetpack_sharing_enabled":true,"jetpack_featured_media_url":"","_links":{"self":[{"href":"https:\/\/www.aqwu.net\/wp\/index.php?rest_route=\/wp\/v2\/posts\/4513","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/www.aqwu.net\/wp\/index.php?rest_route=\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/www.aqwu.net\/wp\/index.php?rest_route=\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/www.aqwu.net\/wp\/index.php?rest_route=\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/www.aqwu.net\/wp\/index.php?rest_route=%2Fwp%2Fv2%2Fcomments&post=4513"}],"version-history":[{"count":4,"href":"https:\/\/www.aqwu.net\/wp\/index.php?rest_route=\/wp\/v2\/posts\/4513\/revisions"}],"predecessor-version":[{"id":4517,"href":"https:\/\/www.aqwu.net\/wp\/index.php?rest_route=\/wp\/v2\/posts\/4513\/revisions\/4517"}],"wp:attachment":[{"href":"https:\/\/www.aqwu.net\/wp\/index.php?rest_route=%2Fwp%2Fv2%2Fmedia&parent=4513"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/www.aqwu.net\/wp\/index.php?rest_route=%2Fwp%2Fv2%2Fcategories&post=4513"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/www.aqwu.net\/wp\/index.php?rest_route=%2Fwp%2Fv2%2Ftags&post=4513"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}