{"id":2619,"date":"2024-03-28T17:09:40","date_gmt":"2024-03-28T09:09:40","guid":{"rendered":"https:\/\/www.aqwu.net\/wp\/?p=2619"},"modified":"2024-04-28T20:01:21","modified_gmt":"2024-04-28T12:01:21","slug":"grok-1-%e7%9a%84%e4%b8%80%e6%ac%a1%e6%9c%89%e6%95%88%e8%be%93%e5%87%ba%e7%bb%93%e6%9e%9c","status":"publish","type":"post","link":"https:\/\/www.aqwu.net\/wp\/?p=2619","title":{"rendered":"grok-1 \u7684\u4e00\u6b21\u6709\u6548\u8f93\u51fa\u7ed3\u679c"},"content":{"rendered":"\n<p>\u6d4b\u8bd5\u73af\u5883 windows, llama.cpp, cuda, \u8fd0\u884c\u547d\u4ee4\uff1a<\/p>\n\n\n\n<div class=\"wp-block-urvanov-syntax-highlighter-code-block\"><pre class=\"lang:sh decode:true \" >main.exe -t 16 -ngl 256 -m models\\grok-1-q2_k_s-00001-of-00009.gguf --color -c 4096 --temp 0.9 --repeat_penalty 1.1 -n -1 -p \"### Instruction: Write me a linked list implementation in C\/C++, starting with '```' and ending with '```'\\n### Response:\"\n<\/pre><\/div>\n\n\n\n<div class=\"wp-block-urvanov-syntax-highlighter-code-block\"><pre class=\"lang:c decode:true \" >main.exe -t 16 -ngl 256 -m models\\grok-1-q2_k_s-00001-of-00009.gguf --color -c 4096 --temp 0.9 --repeat_penalty 1.1 -n -1 -p \"### Instruction: Write me a linked list implementation in C\/C++, starting with '```' and ending with '```'\\n### Response:\"\nLog start\nmain: build = 2555 (d0e2f641)\nmain: built with MSVC 19.39.33523.0 for x64\nmain: seed  = 1711616424\nllama_model_loader: additional 8 GGUFs metadata loaded.\nllama_model_loader: loaded meta data with 27 key-value pairs and 2114 tensors from c:\\ai\\grok\\grok-1\\models\\grok-1-q2_k_s-00001-of-00009.gguf (version GGUF V3 (latest))\nllama_model_loader: Dumping metadata keys\/values. Note: KV overrides do not apply in this output.\nllama_model_loader: - kv   0:                       general.architecture str              = grok\nllama_model_loader: - kv   1:                               general.name str              = Grok\nllama_model_loader: - kv   2:                           grok.block_count u32              = 64\nllama_model_loader: - kv   3:                        grok.context_length u32              = 4096\nllama_model_loader: - kv   4:                      grok.embedding_length u32              = 6144\nllama_model_loader: - kv   5:                   grok.feed_forward_length u32              = 32768\nllama_model_loader: - kv   6:                  grok.attention.head_count u32              = 48\nllama_model_loader: - kv   7:               grok.attention.head_count_kv u32              = 8\nllama_model_loader: - kv   8:                        grok.rope.freq_base f32              = 100000.000000\nllama_model_loader: - kv   9:      grok.attention.layer_norm_rms_epsilon f32              = 0.000010\nllama_model_loader: - kv  10:                          grok.expert_count u32              = 8\nllama_model_loader: - kv  11:                     grok.expert_used_count u32              = 2\nllama_model_loader: - kv  12:                          general.file_type u32              = 10\nllama_model_loader: - kv  13:                       tokenizer.ggml.model str              = llama\nllama_model_loader: - kv  14:                      tokenizer.ggml.tokens arr[str,131072]  = [\"[PAD]\", \"[BOS]\", \"[EOS]\", \"[UNK]\", ...\nllama_model_loader: - kv  15:                      tokenizer.ggml.scores arr[f32,131072]  = [0.000000, 0.000000, 0.000000, 0.0000...\nllama_model_loader: - kv  16:                  tokenizer.ggml.token_type arr[i32,131072]  = [3, 3, 3, 2, 1, 1, 1, 1, 1, 1, 1, 1, ...\nllama_model_loader: - kv  17:                tokenizer.ggml.bos_token_id u32              = 1\nllama_model_loader: - kv  18:                tokenizer.ggml.eos_token_id u32              = 2\nllama_model_loader: - kv  19:            tokenizer.ggml.unknown_token_id u32              = 3\nllama_model_loader: - kv  20:            tokenizer.ggml.padding_token_id u32              = 0\nllama_model_loader: - kv  21:               tokenizer.ggml.add_bos_token bool             = false\nllama_model_loader: - kv  22:               tokenizer.ggml.add_eos_token bool             = false\nllama_model_loader: - kv  23:               general.quantization_version u32              = 2\nllama_model_loader: - kv  24:                                   split.no u16              = 0\nllama_model_loader: - kv  25:                                split.count u16              = 9\nllama_model_loader: - kv  26:                        split.tensors.count i32              = 2114\nllama_model_loader: - type  f32:  257 tensors\nllama_model_loader: - type  f16:   64 tensors\nllama_model_loader: - type q8_0:  128 tensors\nllama_model_loader: - type q2_K: 1088 tensors\nllama_model_loader: - type q3_K:  512 tensors\nllama_model_loader: - type q5_K:   64 tensors\nllama_model_loader: - type q6_K:    1 tensors\nllm_load_vocab: mismatch in special tokens definition ( 284\/131072 vs 260\/131072 ).\nllm_load_print_meta: format           = GGUF V3 (latest)\nllm_load_print_meta: arch             = grok\nllm_load_print_meta: vocab type       = SPM\nllm_load_print_meta: n_vocab          = 131072\nllm_load_print_meta: n_merges         = 0\nllm_load_print_meta: n_ctx_train      = 4096\nllm_load_print_meta: n_embd           = 6144\nllm_load_print_meta: n_head           = 48\nllm_load_print_meta: n_head_kv        = 8\nllm_load_print_meta: n_layer          = 64\nllm_load_print_meta: n_rot            = 128\nllm_load_print_meta: n_embd_head_k    = 128\nllm_load_print_meta: n_embd_head_v    = 128\nllm_load_print_meta: n_gqa            = 6\nllm_load_print_meta: n_embd_k_gqa     = 1024\nllm_load_print_meta: n_embd_v_gqa     = 1024\nllm_load_print_meta: f_norm_eps       = 0.0e+00\nllm_load_print_meta: f_norm_rms_eps   = 1.0e-05\nllm_load_print_meta: f_clamp_kqv      = 0.0e+00\nllm_load_print_meta: f_max_alibi_bias = 0.0e+00\nllm_load_print_meta: f_logit_scale    = 0.0e+00\nllm_load_print_meta: n_ff             = 32768\nllm_load_print_meta: n_expert         = 8\nllm_load_print_meta: n_expert_used    = 2\nllm_load_print_meta: causal attn      = 1\nllm_load_print_meta: pooling type     = 0\nllm_load_print_meta: rope type        = 2\nllm_load_print_meta: rope scaling     = linear\nllm_load_print_meta: freq_base_train  = 100000.0\nllm_load_print_meta: freq_scale_train = 1\nllm_load_print_meta: n_yarn_orig_ctx  = 4096\nllm_load_print_meta: rope_finetuned   = unknown\nllm_load_print_meta: ssm_d_conv       = 0\nllm_load_print_meta: ssm_d_inner      = 0\nllm_load_print_meta: ssm_d_state      = 0\nllm_load_print_meta: ssm_dt_rank      = 0\nllm_load_print_meta: model type       = 314B\nllm_load_print_meta: model ftype      = Q2_K - Medium\nllm_load_print_meta: model params     = 315.68 B\nllm_load_print_meta: model size       = 107.96 GiB (2.94 BPW)\nllm_load_print_meta: general.name     = Grok\nllm_load_print_meta: BOS token        = 1 '[BOS]'\nllm_load_print_meta: EOS token        = 2 '[EOS]'\nllm_load_print_meta: UNK token        = 3 '[UNK]'\nllm_load_print_meta: PAD token        = 0 '[PAD]'\nllm_load_print_meta: LF token         = 79 '&lt;0x0A&gt;'\nggml_cuda_init: GGML_CUDA_FORCE_MMQ:   no\nggml_cuda_init: CUDA_USE_TENSOR_CORES: yes\nggml_cuda_init: found 8 CUDA devices:\n  Device 0: NVIDIA GeForce RTX 4090, compute capability 8.9, VMM: yes\n  Device 1: NVIDIA GeForce RTX 4090, compute capability 8.9, VMM: yes\n  Device 2: NVIDIA GeForce RTX 4090, compute capability 8.9, VMM: yes\n  Device 3: NVIDIA GeForce RTX 4090, compute capability 8.9, VMM: yes\n  Device 4: NVIDIA GeForce RTX 4090, compute capability 8.9, VMM: yes\n  Device 5: NVIDIA GeForce RTX 4090, compute capability 8.9, VMM: yes\n  Device 6: NVIDIA GeForce RTX 4090, compute capability 8.9, VMM: yes\n  Device 7: NVIDIA GeForce RTX 4090, compute capability 8.9, VMM: yes\nllm_load_tensors: ggml ctx size =    7.26 MiB\nllm_load_tensors: offloading 64 repeating layers to GPU\nllm_load_tensors: offloading non-repeating layers to GPU\nllm_load_tensors: offloaded 65\/65 layers to GPU\nllm_load_tensors:        CPU buffer size =   630.00 MiB\nllm_load_tensors:      CUDA0 buffer size = 15457.50 MiB\nllm_load_tensors:      CUDA1 buffer size = 13740.00 MiB\nllm_load_tensors:      CUDA2 buffer size = 13740.00 MiB\nllm_load_tensors:      CUDA3 buffer size = 13740.00 MiB\nllm_load_tensors:      CUDA4 buffer size = 13740.00 MiB\nllm_load_tensors:      CUDA5 buffer size = 13740.00 MiB\nllm_load_tensors:      CUDA6 buffer size = 13740.00 MiB\nllm_load_tensors:      CUDA7 buffer size = 12652.52 MiB\n....................................................................................................\nllama_new_context_with_model: n_ctx      = 4096\nllama_new_context_with_model: n_batch    = 2048\nllama_new_context_with_model: n_ubatch   = 512\nllama_new_context_with_model: freq_base  = 100000.0\nllama_new_context_with_model: freq_scale = 1\nllama_kv_cache_init:      CUDA0 KV buffer size =   144.00 MiB\nllama_kv_cache_init:      CUDA1 KV buffer size =   128.00 MiB\nllama_kv_cache_init:      CUDA2 KV buffer size =   128.00 MiB\nllama_kv_cache_init:      CUDA3 KV buffer size =   128.00 MiB\nllama_kv_cache_init:      CUDA4 KV buffer size =   128.00 MiB\nllama_kv_cache_init:      CUDA5 KV buffer size =   128.00 MiB\nllama_kv_cache_init:      CUDA6 KV buffer size =   128.00 MiB\nllama_kv_cache_init:      CUDA7 KV buffer size =   112.00 MiB\nllama_new_context_with_model: KV self size  = 1024.00 MiB, K (f16):  512.00 MiB, V (f16):  512.00 MiB\nllama_new_context_with_model:  CUDA_Host  output buffer size =     0.50 MiB\nllama_new_context_with_model: pipeline parallelism enabled (n_copies=4)\nllama_new_context_with_model:      CUDA0 compute buffer size =   564.04 MiB\nllama_new_context_with_model:      CUDA1 compute buffer size =   564.04 MiB\nllama_new_context_with_model:      CUDA2 compute buffer size =   564.04 MiB\nllama_new_context_with_model:      CUDA3 compute buffer size =   564.04 MiB\nllama_new_context_with_model:      CUDA4 compute buffer size =   564.04 MiB\nllama_new_context_with_model:      CUDA5 compute buffer size =   564.04 MiB\nllama_new_context_with_model:      CUDA6 compute buffer size =   564.04 MiB\nllama_new_context_with_model:      CUDA7 compute buffer size =   564.05 MiB\nllama_new_context_with_model:  CUDA_Host compute buffer size =    44.02 MiB\nllama_new_context_with_model: graph nodes  = 3784\nllama_new_context_with_model: graph splits = 9\n\nsystem_info: n_threads = 16 \/ 64 | AVX = 1 | AVX_VNNI = 0 | AVX2 = 1 | AVX512 = 1 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 1 | SSSE3 = 0 | VSX = 0 | MATMUL_INT8 = 0 |\nsampling:\n        repeat_last_n = 64, repeat_penalty = 1.100, frequency_penalty = 0.000, presence_penalty = 0.000\n        top_k = 40, tfs_z = 1.000, top_p = 0.950, min_p = 0.050, typical_p = 1.000, temp = 0.900\n        mirostat = 0, mirostat_lr = 0.100, mirostat_ent = 5.000\nsampling order:\nCFG -&gt; Penalties -&gt; top_k -&gt; tfs_z -&gt; typical_p -&gt; top_p -&gt; min_p -&gt; temperature\ngenerate: n_ctx = 4096, n_batch = 2048, n_predict = -1, n_keep = 0\n\n\n ### Instruction: Write me a linked list implementation in C\/C++, starting with '```' and ending with '```'\\n### Response:\n\n&lt;details&gt;&lt;summary&gt;Click to expand&lt;\/summary&gt;\n&lt;p&gt;\n#include &lt;stdio.h&gt;\n#include &lt;stdlib.h&gt;\n\nstruct node {\n  int data;\n  struct node *next;\n};\n\nvoid push(struct node **head, int value) {\n  struct node *node = malloc(sizeof(struct node));\n  node-&gt;data = value;\n  node-&gt;next = *head;\n  *head = node;\n}\n\nint main() {\n  \/\/ Create a new head pointer for the list.\n  struct node **list = (struct node **) calloc(1, sizeof(struct node *));\n  push(&amp;(*list), 0); \/\/ Insert a value into the list.\n  for (int i = 1; i &lt; 8; i++) {\n    \/\/ Create new nodes for each element in array and add to linked-list.\n    if (!i) continue;\n    push(list, i);\n  }\n\n  struct node *current = *list;\n  while (current-&gt;next) {\n    printf(\"%d \", current-&gt;data);\n    current = (*list)-&gt;next;\n  }\n  printf(\"\\n\"); \/\/ 1234567\n}\n\n&lt;\/p&gt;\n&lt;\/details&gt; [end of text]\n\nllama_print_timings:        load time =  142096.82 ms\nllama_print_timings:      sample time =     174.00 ms \/   281 runs   (    0.62 ms per token,  1614.90 tokens per second)\nllama_print_timings: prompt eval time =    2152.91 ms \/    29 tokens (   74.24 ms per token,    13.47 tokens per second)\nllama_print_timings:        eval time =  257631.30 ms \/   280 runs   (  920.11 ms per token,     1.09 tokens per second)\nllama_print_timings:       total time =  260199.48 ms \/   309 tokens\nLog end\n<\/pre><\/div>\n","protected":false},"excerpt":{"rendered":"<p>\u6d4b\u8bd5\u73af\u5883 windows, llama.cpp, cuda, \u8fd0\u884c\u547d\u4ee4\uff1a<\/p>\n","protected":false},"author":1,"featured_media":0,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"site-sidebar-layout":"default","site-content-layout":"","ast-site-content-layout":"default","site-content-style":"default","site-sidebar-style":"default","ast-global-header-display":"","ast-banner-title-visibility":"","ast-main-header-display":"","ast-hfb-above-header-display":"","ast-hfb-below-header-display":"","ast-hfb-mobile-header-display":"","site-post-title":"","ast-breadcrumbs-content":"","ast-featured-img":"","footer-sml-layout":"","theme-transparent-header-meta":"","adv-header-id-meta":"","stick-header-meta":"","header-above-stick-meta":"","header-main-stick-meta":"","header-below-stick-meta":"","astra-migrate-meta-layouts":"set","ast-page-background-enabled":"default","ast-page-background-meta":{"desktop":{"background-color":"var(--ast-global-color-4)","background-image":"","background-repeat":"repeat","background-position":"center center","background-size":"auto","background-attachment":"scroll","background-type":"","background-media":"","overlay-type":"","overlay-color":"","overlay-opacity":"","overlay-gradient":""},"tablet":{"background-color":"","background-image":"","background-repeat":"repeat","background-position":"center center","background-size":"auto","background-attachment":"scroll","background-type":"","background-media":"","overlay-type":"","overlay-color":"","overlay-opacity":"","overlay-gradient":""},"mobile":{"background-color":"","background-image":"","background-repeat":"repeat","background-position":"center center","background-size":"auto","background-attachment":"scroll","background-type":"","background-media":"","overlay-type":"","overlay-color":"","overlay-opacity":"","overlay-gradient":""}},"ast-content-background-meta":{"desktop":{"background-color":"var(--ast-global-color-5)","background-image":"","background-repeat":"repeat","background-position":"center center","background-size":"auto","background-attachment":"scroll","background-type":"","background-media":"","overlay-type":"","overlay-color":"","overlay-opacity":"","overlay-gradient":""},"tablet":{"background-color":"var(--ast-global-color-5)","background-image":"","background-repeat":"repeat","background-position":"center center","background-size":"auto","background-attachment":"scroll","background-type":"","background-media":"","overlay-type":"","overlay-color":"","overlay-opacity":"","overlay-gradient":""},"mobile":{"background-color":"var(--ast-global-color-5)","background-image":"","background-repeat":"repeat","background-position":"center center","background-size":"auto","background-attachment":"scroll","background-type":"","background-media":"","overlay-type":"","overlay-color":"","overlay-opacity":"","overlay-gradient":""}},"_jetpack_memberships_contains_paid_content":false,"footnotes":""},"categories":[313,289,443,442,312],"tags":[398],"class_list":["post-2619","post","type-post","status-publish","format-standard","hentry","category-chatgpt","category-gpt","category-llm","category-llms","category-openai","tag-grok-1"],"views":1932,"jetpack_sharing_enabled":true,"jetpack_featured_media_url":"","_links":{"self":[{"href":"https:\/\/www.aqwu.net\/wp\/index.php?rest_route=\/wp\/v2\/posts\/2619","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/www.aqwu.net\/wp\/index.php?rest_route=\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/www.aqwu.net\/wp\/index.php?rest_route=\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/www.aqwu.net\/wp\/index.php?rest_route=\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/www.aqwu.net\/wp\/index.php?rest_route=%2Fwp%2Fv2%2Fcomments&post=2619"}],"version-history":[{"count":4,"href":"https:\/\/www.aqwu.net\/wp\/index.php?rest_route=\/wp\/v2\/posts\/2619\/revisions"}],"predecessor-version":[{"id":2623,"href":"https:\/\/www.aqwu.net\/wp\/index.php?rest_route=\/wp\/v2\/posts\/2619\/revisions\/2623"}],"wp:attachment":[{"href":"https:\/\/www.aqwu.net\/wp\/index.php?rest_route=%2Fwp%2Fv2%2Fmedia&parent=2619"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/www.aqwu.net\/wp\/index.php?rest_route=%2Fwp%2Fv2%2Fcategories&post=2619"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/www.aqwu.net\/wp\/index.php?rest_route=%2Fwp%2Fv2%2Ftags&post=2619"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}