当前位置：首页 > news >正文

做推广的网站带宽需要多少合适网站建设作业教程

news 2025/11/14 17:10:16

做推广的网站带宽需要多少合适,网站建设作业教程,珠海柏泰教育官方网站建设,东莞住建局官网查询llama.cpp是一个C编写的轻量级开源类AIGC大模型框架#xff0c;可以支持在消费级普通设备上本地部署运行大模型#xff0c;以及作为依赖库集成的到应用程序中提供类GPT的功能。以下基于llama.cpp的源码利用C api来开发实例demo演示加载本地模型文件并提供GPT文本生成。项…llama.cpp是一个C编写的轻量级开源类AIGC大模型框架可以支持在消费级普通设备上本地部署运行大模型以及作为依赖库集成的到应用程序中提供类GPT的功能。以下基于llama.cpp的源码利用C api来开发实例demo演示加载本地模型文件并提供GPT文本生成。项目结构 llamacpp_starter- llama.cpp-b1547- src|- main.cpp- CMakeLists.txtCMakeLists.txt cmake_minimum_required(VERSION 3.15)# this only works for unix, xapian source code not support compile in windows yetproject(llamacpp_starter)set(CMAKE_CXX_STANDARD 14) set(CMAKE_CXX_STANDARD_REQUIRED ON)add_subdirectory(llama.cpp-b1547)include_directories(${CMAKE_CURRENT_SOURCE_DIR}/llama.cpp-b1547${CMAKE_CURRENT_SOURCE_DIR}/llama.cpp-b1547/common )file(GLOB SRCsrc/*.hsrc/*.cpp )add_executable(${PROJECT_NAME} ${SRC})target_link_libraries(${PROJECT_NAME}commonllama ) main.cpp #include iostream #include string #include vector #include common.h #include llama.hint main(int argc, char** argv) {bool numa_support false;const std::string model_file_path ./llama-ggml.gguf;const std::string prompt once upon a time; // input wordsconst int n_len 32; // total length of the sequence including the prompt// set gpt paramsgpt_params params;params.model model_file_path;params.prompt prompt;// init LLMllama_backend_init(false);// load modelllama_model_params model_params llama_model_default_params();//model_params.n_gpu_layers 99; // offload all layers to the GPUllama_model* model llama_load_model_from_file(model_file_path.c_str(), model_params);if (model NULL){std::cerr __func__ load model file error std::endl;return 1;}// init contextllama_context_params ctx_params llama_context_default_params();ctx_params.seed 1234;ctx_params.n_ctx 2048;ctx_params.n_threads params.n_threads;ctx_params.n_threads_batch params.n_threads_batch -1 ? params.n_threads : params.n_threads_batch;llama_context* ctx llama_new_context_with_model(model, ctx_params);if (ctx NULL){std::cerr __func__ failed to create the llama_context std::endl;return 1;}// tokenize the promptstd::vectorllama_token tokens_list llama_tokenize(ctx, params.prompt, true);const int n_ctx llama_n_ctx(ctx);const int n_kv_req tokens_list.size() (n_len - tokens_list.size());// make sure the KV cache is big enough to hold all the prompt and generated tokensif (n_kv_req n_ctx){std::cerr __func__ error: n_kv_req n_ctx, the required KV cache size is not big enough std::endl;std::cerr __func__ either reduce n_parallel or increase n_ctx std::endl;return 1;}// print the prompt token-by-tokenfor (auto id : tokens_list)std::cout llama_token_to_piece(ctx, id) ;std::cout std::endl;// create a llama_batch with size 512// we use this object to submit token data for decodingllama_batch batch llama_batch_init(512, 0, 1);// evaluate the initial promptfor (size_t i 0; i tokens_list.size(); i)llama_batch_add(batch, tokens_list[i], i, { 0 }, false);// llama_decode will output logits only for the last token of the promptbatch.logits[batch.n_tokens - 1] true;if (llama_decode(ctx, batch) ! 0){std::cerr __func__ llama_decode failed std::endl;return 1;}// main loop to generate wordsint n_cur batch.n_tokens;int n_decode 0;const auto t_main_start ggml_time_us();while (n_cur n_len){// sample the next tokenauto n_vocab llama_n_vocab(model);auto* logits llama_get_logits_ith(ctx, batch.n_tokens - 1);std::vectorllama_token_data candidates;candidates.reserve(n_vocab);for (llama_token token_id 0; token_id n_vocab; token_id){candidates.emplace_back(llama_token_data{ token_id, logits[token_id], 0.0f });}llama_token_data_array candidates_p { candidates.data(), candidates.size(), false };// sample the most likely tokenconst llama_token new_token_id llama_sample_token_greedy(ctx, candidates_p);// is it an end of stream?if (new_token_id llama_token_eos(model) || n_cur n_len){std::cout std::endl;break;}std::cout llama_token_to_piece(ctx, new_token_id) ;// prepare the next batchllama_batch_clear(batch);// push this new token for next evaluationllama_batch_add(batch, new_token_id, n_cur, { 0 }, true);n_decode 1;n_cur 1;// evaluate the current batch with the transformer modelif (llama_decode(ctx, batch)){std::cerr __func__ failed to eval std::endl;return 1;}}std::cout std::endl;const auto t_main_end ggml_time_us();std::cout __func__ decoded n_decode tokens in (t_main_end - t_main_start) / 1000000.0f s, speed: n_decode / ((t_main_end - t_main_start) / 1000000.0f) t / s std::endl;llama_print_timings(ctx);llama_batch_free(batch);// free contextllama_free(ctx);llama_free_model(model);// free LLMllama_backend_free();return 0; } 源码 llamacpp_starter 本文由博客一文多发平台 OpenWrite 发布

查看全文

http://www.zqtcl.cn/news/995456/