[{"data":1,"prerenderedAt":3806},["ShallowReactive",2],{"search-docs":3,"doc-\u002Fai\u002Fllm\u002Flocal-deploy":886},[4,8,12,16,20,24,28,32,36,40,44,48,52,56,60,64,68,72,76,80,84,88,92,96,100,104,108,112,116,120,124,128,132,136,140,144,148,152,156,159,162,165,169,172,175,178,182,186,190,194,198,202,206,210,214,218,222,226,230,234,238,242,246,250,254,258,262,266,269,273,277,281,285,288,291,294,298,301,304,307,310,313,316,319,322,325,329,332,336,340,344,348,352,356,359,362,365,368,371,374,377,380,383,386,389,393,396,399,402,405,408,411,414,417,420,424,428,432,435,438,442,446,450,454,458,462,466,470,474,477,480,483,487,491,494,497,500,504,507,511,515,518,521,524,527,530,533,536,539,542,545,548,551,554,557,560,563,566,569,572,575,579,583,587,591,595,599,603,606,610,614,617,620,623,626,629,633,637,640,643,646,649,652,655,658,661,664,667,670,673,676,679,682,685,688,691,694,697,700,703,706,709,712,716,720,724,728,732,736,740,744,748,752,756,760,764,768,772,775,779,783,787,790,793,796,799,802,805,808,811,814,818,822,825,829,832,835,838,841,844,848,851,854,858,862,865,869,873,876,879,882],{"path":5,"title":6,"description":7},"\u002Fabout\u002Fauthor","作者相关","只想纯粹的做一个程序员...",{"path":9,"title":10,"description":11},"\u002Fabout\u002Fjourney","心路历程","",{"path":13,"title":14,"description":15},"\u002Fai\u002Fagent\u002Fframeworks","Agent 框架","主流 Agent 框架：LangChain、LlamaIndex、AutoGen、CrewAI",{"path":17,"title":18,"description":19},"\u002Fai\u002Fagent\u002Fhooks","Agent Hooks 与自动化","Claude Agent 的 Hooks 生命周期、事件类型、典型自动化场景",{"path":21,"title":22,"description":23},"\u002Fai\u002Fagent\u002Fintroduction","AI Agent 概述","AI Agent 核心概念：感知、规划、执行、记忆",{"path":25,"title":26,"description":27},"\u002Fai\u002Fagent\u002Fpractice","Agent 实战","AI Agent 实战：构建自主任务执行系统",{"path":29,"title":30,"description":31},"\u002Fai\u002Fagent\u002Fsdk","Claude Agent SDK 开发","使用 Claude Agent SDK 构建自定义 AI Agent：架构、API、生命周期",{"path":33,"title":34,"description":35},"\u002Fai\u002Fagent\u002Fsubagents","Subagents 子代理","用 Subagents 分解复杂任务、并发执行、隔离上下文",{"path":37,"title":38,"description":39},"\u002Fai\u002Fagent\u002Ftool-use","工具调用","AI Agent 工具调用：Function Calling、Tool Use 原理与实践",{"path":41,"title":42,"description":43},"\u002Fai\u002Ffundamentals\u002Fdeep-learning","深度学习入门","深度学习基础知识：前向传播、反向传播、损失函数、优化器",{"path":45,"title":46,"description":47},"\u002Fai\u002Ffundamentals\u002Fml-basics","机器学习基础","机器学习核心概念：监督学习、无监督学习、强化学习",{"path":49,"title":50,"description":51},"\u002Fai\u002Ffundamentals\u002Fneural-networks","神经网络原理","神经网络架构：CNN、RNN、注意力机制",{"path":53,"title":54,"description":55},"\u002Fai\u002Fgetting-started","AI 学习路线","AI 技术学习路线图，从基础到实战的完整指南",{"path":57,"title":58,"description":59},"\u002Fai\u002Fllm\u002Ffine-tuning","模型微调","大模型微调技术：LoRA、QLoRA、全量微调、RLHF",{"path":61,"title":62,"description":63},"\u002Fai\u002Fllm\u002Fintroduction","大模型概述","大语言模型发展历程、核心能力与主流模型对比",{"path":65,"title":66,"description":67},"\u002Fai\u002Fllm\u002Flocal-deploy","本地部署","大模型本地部署：Ollama、vLLM、llama.cpp",{"path":69,"title":70,"description":71},"\u002Fai\u002Fllm\u002Ftransformer","Transformer 架构","Transformer 架构详解：自注意力机制、位置编码、多头注意力",{"path":73,"title":74,"description":75},"\u002Fai\u002Fmcp\u002Fclient","MCP Client 开发","MCP Client 开发指南：连接、调用、集成",{"path":77,"title":78,"description":79},"\u002Fai\u002Fmcp\u002Fdebugging","MCP 调试与排错","MCP Server 开发与集成过程中的常见问题、日志分析、诊断工具",{"path":81,"title":82,"description":83},"\u002Fai\u002Fmcp\u002Fintroduction","MCP 概述","Model Context Protocol 协议概述：架构、核心概念、应用场景",{"path":85,"title":86,"description":87},"\u002Fai\u002Fmcp\u002Fserver","MCP Server 开发","MCP Server 开发指南：资源、工具、提示词的实现",{"path":89,"title":90,"description":91},"\u002Fai\u002Fmcp\u002Ftools","MCP Tools 深入","深入理解 MCP Tools：与 Resources\u002FPrompts 的差异、Schema 设计、Annotations 与权限控制",{"path":93,"title":94,"description":95},"\u002Fai\u002Fprompt\u002Fadvanced","高级 Prompt 模式","高级 Prompt 设计模式：Tree-of-Thought、自我反思、多轮对话策略",{"path":97,"title":98,"description":99},"\u002Fai\u002Fprompt\u002Fbasics","Prompt 基础","Prompt Engineering 入门：基本概念、角色设定、输出格式控制",{"path":101,"title":102,"description":103},"\u002Fai\u002Fprompt\u002Ftechniques","提示词技巧","常用提示词技巧：Few-shot、Chain-of-Thought、ReAct",{"path":105,"title":106,"description":107},"\u002Fai\u002Frag\u002Fembedding","文本嵌入","文本嵌入模型：Embedding 原理、模型选择、相似度计算",{"path":109,"title":110,"description":111},"\u002Fai\u002Frag\u002Fintroduction","RAG 概述","检索增强生成（RAG）架构原理、优势与应用场景",{"path":113,"title":114,"description":115},"\u002Fai\u002Frag\u002Fpractice","RAG 实战","RAG 应用实战：文档问答系统、知识库搭建",{"path":117,"title":118,"description":119},"\u002Fai\u002Frag\u002Fvector-database","向量数据库","主流向量数据库对比：Milvus、Pinecone、Chroma、Weaviate",{"path":121,"title":122,"description":123},"\u002Fai\u002Fskills\u002Fbest-practices","Skill 最佳实践","编写高质量 Skill 的设计原则、常见陷阱与优化技巧",{"path":125,"title":126,"description":127},"\u002Fai\u002Fskills\u002Fcreating","创建自定义 Skill","从零编写一个可被 Agent 自动发现和调用的 Skill",{"path":129,"title":130,"description":131},"\u002Fai\u002Fskills\u002Fintroduction","Agent Skills 概述","Claude Agent Skills 概念、工作原理、与 Tools\u002FMCP 的区别",{"path":133,"title":134,"description":135},"\u002Fgolang\u002Fadvanced\u002Fconcurrency","Go - 并发深入","深入理解 Go 并发编程的核心机制。",{"path":137,"title":138,"description":139},"\u002Fgolang\u002Fadvanced\u002Fgc","Go - 垃圾回收","理解 Go 的垃圾回收机制，掌握 GC 调优方法。",{"path":141,"title":142,"description":143},"\u002Fgolang\u002Fadvanced\u002Fgmp","Go - GMP 调度模型","GMP 是 Go 运行时调度器的核心模型，理解它对于编写高性能 Go 程序至关重要。",{"path":145,"title":146,"description":147},"\u002Fgolang\u002Fadvanced\u002Fgo-concurrency","Go - 并发编程","Go 的并发是其核心特性之一，通过 Goroutine 和 Channel 实现。",{"path":149,"title":150,"description":151},"\u002Fgolang\u002Fadvanced\u002Fmemory","Go - 内存模型","理解 Go 的内存分配机制和内存模型。",{"path":153,"title":154,"description":155},"\u002Fgolang\u002Fadvanced\u002Fprofiling","Go - 性能分析","掌握 Go 的性能分析工具：pprof、trace、benchmark。",{"path":157,"title":158,"description":11},"\u002Fgolang\u002Fcore\u002Fgo-basic","Go - 基础语法",{"path":160,"title":161,"description":11},"\u002Fgolang\u002Fcore\u002Fgo-composite","Go - 复合类型",{"path":163,"title":164,"description":11},"\u002Fgolang\u002Fcore\u002Fgo-control","Go - 流程控制",{"path":166,"title":167,"description":168},"\u002Fgolang\u002Fcore\u002Fgo-error","Go - 错误处理","Go 使用显式的错误返回值来处理错误，而不是异常机制。",{"path":170,"title":171,"description":11},"\u002Fgolang\u002Fcore\u002Fgo-function","Go - 函数",{"path":173,"title":174,"description":11},"\u002Fgolang\u002Fcore\u002Fgo-install","Go - 环境搭建",{"path":176,"title":177,"description":11},"\u002Fgolang\u002Fcore\u002Fgo-interface","Go - 接口",{"path":179,"title":180,"description":181},"\u002Fgolang\u002Fcore\u002Fgo-module","Go - 包管理","Go Modules 是 Go 1.11 引入的官方依赖管理方案，Go 1.16 后成为默认模式。",{"path":183,"title":184,"description":185},"\u002Fgolang\u002Fdistributed\u002Fgrpc","Go - gRPC","gRPC 是 Google 开发的高性能 RPC 框架，使用 Protocol Buffers 作为序列化协议。",{"path":187,"title":188,"description":189},"\u002Fgolang\u002Fdistributed\u002Fmicroservice","Go - 微服务","微服务架构的核心组件：服务发现、负载均衡、熔断降级。",{"path":191,"title":192,"description":193},"\u002Fgolang\u002Fdistributed\u002Fmq","Go - 消息队列","使用 Go 操作 Kafka 和 RabbitMQ。",{"path":195,"title":196,"description":197},"\u002Fgolang\u002Fdistributed\u002Fredis","Go - Redis","使用 go-redis 操作 Redis，实现缓存、分布式锁等功能。",{"path":199,"title":200,"description":201},"\u002Fgolang\u002Fengineering\u002Fconfig","Go - 配置管理","使用 viper 进行配置管理，支持多种配置格式和配置中心。",{"path":203,"title":204,"description":205},"\u002Fgolang\u002Fengineering\u002Fdocker","Go - Docker 部署","使用 Docker 容器化部署 Go 应用。",{"path":207,"title":208,"description":209},"\u002Fgolang\u002Fengineering\u002Fkubernetes","Go - Kubernetes 部署","在 Kubernetes 上部署和管理 Go 应用。",{"path":211,"title":212,"description":213},"\u002Fgolang\u002Fengineering\u002Flogging","Go - 日志系统","使用 zap 和 logrus 构建高性能结构化日志系统。",{"path":215,"title":216,"description":217},"\u002Fgolang\u002Fengineering\u002Ftesting","Go - 单元测试","Go 内置了强大的测试框架，掌握测试是编写高质量代码的基础。",{"path":219,"title":220,"description":221},"\u002Fgolang\u002Fstdlib\u002Fbufio","bufio","在 Go 语言中，bufio 包提供了带缓冲的 I\u002FO 操作，能够提高读写性能。以下是一些常用的 bufio 包 API 及其详细说明：",{"path":223,"title":224,"description":225},"\u002Fgolang\u002Fstdlib\u002Fcontainer","container","在Go语言标准库中，container 包提供了几种常用的数据结构实现，这些数据结构对于高效地管理和操作数据非常有用。以下是 container 包中主要的数据结构：",{"path":227,"title":228,"description":229},"\u002Fgolang\u002Fstdlib\u002Fcrypto","crypto","在 Go 语言中，crypto 包提供了一组用于加密和解密的功能。以下是一些常用的 crypto 包及其子包的 API 及其详细说明：",{"path":231,"title":232,"description":233},"\u002Fgolang\u002Fstdlib\u002Fencoding-csv","encoding\u002Fcsv","在 Go 语言中，encoding\u002Fcsv 包提供了对 CSV（逗号分隔值）文件进行读写的功能。以下是一些常用的 encoding\u002Fcsv 包的 API 及其详细说明：",{"path":235,"title":236,"description":237},"\u002Fgolang\u002Fstdlib\u002Fencoding-json","encoding\u002Fjson","在 Go 语言中，encoding\u002Fjson 包提供了对 JSON 数据进行编码和解码的功能。以下是一些常用的 encoding\u002Fjson 包的 API 及其详细说明：",{"path":239,"title":240,"description":241},"\u002Fgolang\u002Fstdlib\u002Fencoding-xml","encoding\u002Fxml","在 Go 语言中，encoding\u002Fxml 包提供了对 XML 数据进行编码和解码的功能。以下是一些常用的 encoding\u002Fxml 包的 API 及其详细说明：",{"path":243,"title":244,"description":245},"\u002Fgolang\u002Fstdlib\u002Fflag","flag","在Go语言中，flag 包是用于处理命令行参数的标准库，它提供了一种简单而直接的方式来解析和使用命令行参数。下面是关于 flag 包的一些基本介绍和常用功能：",{"path":247,"title":248,"description":249},"\u002Fgolang\u002Fstdlib\u002Ffmt","fmt","在 Go 语言的标准库中，fmt 包是非常重要的，它提供了处理格式化输入和输出的基本工具。以下是一些 fmt 包内常用的API：",{"path":251,"title":252,"description":253},"\u002Fgolang\u002Fstdlib\u002Fhttp","net\u002Fhttp","在 Go 语言中，net\u002Fhttp 包提供了用于构建 HTTP 客户端和服务器的强大工具。以下是一些常用的 net\u002Fhttp 包的 API 及其详细说明：",{"path":255,"title":256,"description":257},"\u002Fgolang\u002Fstdlib\u002Fio","io","在 Go 语言中，io 包提供了基本的输入输出功能。以下是一些常用的 io 包的 API 及其详细说明：",{"path":259,"title":260,"description":261},"\u002Fgolang\u002Fstdlib\u002Flog","log","在 Go 语言中，log 包提供了简单的日志记录功能。以下是一些常用的 log 包的 API 及其详细说明：",{"path":263,"title":264,"description":265},"\u002Fgolang\u002Fstdlib\u002Fmath","math","在 Go 语言中，math 包提供了基本的数学函数和常量。以下是一些常用的 math 包的 API 及其详细说明：",{"path":267,"title":268,"description":11},"\u002Fgolang\u002Fstdlib\u002Fnet","net",{"path":270,"title":271,"description":272},"\u002Fgolang\u002Fstdlib\u002Fos","os","在Go语言中，os 包是一个非常重要且常用的标准库，它提供了与操作系统交互的功能，包括文件操作、环境变量管理、进程管理等。下面是一些 os 包中常用的功能和API：",{"path":274,"title":275,"description":276},"\u002Fgolang\u002Fstdlib\u002Fsort","order","在 Go 语言中，sort 包提供了对切片和用户定义的集合进行排序的函数。它实现了常见的排序算法，如快速排序（Quicksort）和堆排序（Heapsort），并且为自定义集合提供了接口，使得用户可以根据特定的需求进行排序。",{"path":278,"title":279,"description":280},"\u002Fgolang\u002Fstdlib\u002Fstrconv","strconv","在 Go 语言中，strconv 包提供了字符串和基本数据类型之间的转换函数，例如将整数转换为字符串、字符串转换为整数，以及其他类型之间的转换。这些功能非常有用，特别是在处理用户输入或从外部数据源读取数据时。",{"path":282,"title":283,"description":284},"\u002Fgolang\u002Fstdlib\u002Ftime","time","在 Go 语言中，time 包提供了处理时间和日期的功能。以下是一些常用的 time 包的 API 及其详细说明：",{"path":286,"title":287,"description":11},"\u002Fgolang\u002Fweb\u002Fgin\u002Ferror","Gin - 错误处理",{"path":289,"title":290,"description":11},"\u002Fgolang\u002Fweb\u002Fgin\u002Ffile","Gin - 文件处理",{"path":292,"title":293,"description":11},"\u002Fgolang\u002Fweb\u002Fgin\u002Fmiddleware","Gin - 中间件",{"path":295,"title":296,"description":297},"\u002Fgolang\u002Fweb\u002Fgin\u002Fquickstart","Gin - 快速开始","Gin 是目前最流行的 Go Web 框架，以高性能和简洁 API 著称。",{"path":299,"title":300,"description":11},"\u002Fgolang\u002Fweb\u002Fgin\u002Frequest","Gin - 请求处理",{"path":302,"title":303,"description":11},"\u002Fgolang\u002Fweb\u002Fgin\u002Fresponse","Gin - 响应处理",{"path":305,"title":306,"description":11},"\u002Fgolang\u002Fweb\u002Fgin\u002Frouter","Gin - 路由",{"path":308,"title":309,"description":11},"\u002Fgolang\u002Fweb\u002Fgin\u002Fvalidation","Gin - 参数校验",{"path":311,"title":312,"description":11},"\u002Fgolang\u002Fweb\u002Fgorm\u002Fassociation","GORM - 关联关系",{"path":314,"title":315,"description":11},"\u002Fgolang\u002Fweb\u002Fgorm\u002Fcrud","GORM - CRUD 操作",{"path":317,"title":318,"description":11},"\u002Fgolang\u002Fweb\u002Fgorm\u002Fmodel","GORM - 模型定义",{"path":320,"title":321,"description":11},"\u002Fgolang\u002Fweb\u002Fgorm\u002Fperformance","GORM - 日志与性能",{"path":323,"title":324,"description":11},"\u002Fgolang\u002Fweb\u002Fgorm\u002Fquery","GORM - 高级查询",{"path":326,"title":327,"description":328},"\u002Fgolang\u002Fweb\u002Fgorm\u002Fquickstart","GORM - 快速开始","GORM 是 Go 语言最流行的 ORM 库，功能强大，使用简单。",{"path":330,"title":331,"description":11},"\u002Fgolang\u002Fweb\u002Fgorm\u002Ftransaction","GORM - 事务与 Hook",{"path":333,"title":334,"description":335},"\u002Finterview\u002Fbasic","计算机基础面经","本章节汇总了面试中常见的通用技术概念，不局限于特定语言或数据库，是考察技术内功的关键考点。",{"path":337,"title":338,"description":339},"\u002Finterview\u002Fgolang","Golang 面试题","Go 语言面试高频考点，覆盖基础语法、数据结构、并发编程、内存管理、GC、调度器等核心知识。",{"path":341,"title":342,"description":343},"\u002Finterview\u002Fk8s","Kubernetes 面试题","Kubernetes（K8s）面试高频考点，覆盖架构原理、核心资源、网络存储、调度策略、运维监控等核心知识。",{"path":345,"title":346,"description":347},"\u002Finterview\u002Fmysql","MySQL 面试题","MySQL 数据库面试高频考点，覆盖索引、事务、锁、优化、主从复制等核心知识。",{"path":349,"title":350,"description":351},"\u002Finterview\u002Fredis","Redis 面试题","Redis 面试高频考点，覆盖数据结构、持久化、集群、缓存一致性、性能优化等核心知识。",{"path":353,"title":354,"description":355},"\u002Finterview\u002Frocketmq","RocketMQ 面试题","RocketMQ 面试高频考点，覆盖消息模型、可靠性、顺序消息、事务消息、存储与高可用等核心知识。",{"path":357,"title":358,"description":11},"\u002Fother\u002Fjava\u002Fcollection\u002Flist-arraylist","List - ArrayList 源码解析",{"path":360,"title":361,"description":11},"\u002Fother\u002Fjava\u002Fcollection\u002Flist-linkedlist","List - LinkedList 源码解析",{"path":363,"title":364,"description":11},"\u002Fother\u002Fjava\u002Fcollection\u002Flist-stack","List - Satck源码解析",{"path":366,"title":367,"description":11},"\u002Fother\u002Fjava\u002Fcollection\u002Flist-vectore","List - Vector 源码解析",{"path":369,"title":370,"description":11},"\u002Fother\u002Fjava\u002Fcollection\u002Fmap-hashmap","Map - HashMap 源码解析",{"path":372,"title":373,"description":11},"\u002Fother\u002Fjava\u002Fcollection\u002Fmap-linkedhashmap","Map - LinkedHashMap 源码解析",{"path":375,"title":376,"description":11},"\u002Fother\u002Fjava\u002Fcollection\u002Fmap-treemap","Map - TreeMap 源码解析",{"path":378,"title":379,"description":11},"\u002Fother\u002Fjava\u002Fcollection\u002Fqueue-deque","Queue - Deque 接口解析",{"path":381,"title":382,"description":11},"\u002Fother\u002Fjava\u002Fcollection\u002Fqueue-queue","Queue - Queue 接口解析",{"path":384,"title":385,"description":11},"\u002Fother\u002Fjava\u002Fcollection\u002Fset-hashset","Set - HashSet源码解析",{"path":387,"title":388,"description":11},"\u002Fother\u002Fjava\u002Fcollection\u002Fset-linkedhashset","Set - LinkedHashSet 源码解析",{"path":390,"title":391,"description":392},"\u002Fother\u002Fjava\u002Fcollection\u002Fset-treeset","Set - TreeSet源码解析","TreeSet 是一个 Set 集合接口的实现类，与 HashSet 类似，其底层也是通过维护了一个 TreeMap 对象来封装了一些实现方法，故本篇不再对 TreeSet 的底层原理进行详细说明，仅对常用 API 做简单介绍，如需了解 TreeMap 的底层实现原理，请移步 Map - HashMap 源码解析",{"path":394,"title":395,"description":11},"\u002Fother\u002Fjava\u002Fcore\u002Fannotation","Java核心 - 注解",{"path":397,"title":398,"description":11},"\u002Fother\u002Fjava\u002Fcore\u002Fbasic-grammar","Java核心 - 基础语法",{"path":400,"title":401,"description":11},"\u002Fother\u002Fjava\u002Fcore\u002Fclass-and-object","Java核心 - 面向对象",{"path":403,"title":404,"description":11},"\u002Fother\u002Fjava\u002Fcore\u002Fcommon-classes","Java核心 - 常用类",{"path":406,"title":407,"description":11},"\u002Fother\u002Fjava\u002Fcore\u002Fexception","Java核心 - 异常处理",{"path":409,"title":410,"description":11},"\u002Fother\u002Fjava\u002Fcore\u002Fgenerics","Java核心 - 泛型",{"path":412,"title":413,"description":11},"\u002Fother\u002Fjava\u002Fcore\u002Fjdk-env-path","Java核心 - 环境搭建",{"path":415,"title":416,"description":11},"\u002Fother\u002Fjava\u002Fcore\u002Freflection","Java核心 - 反射",{"path":418,"title":419,"description":11},"\u002Fother\u002Fjava\u002Fcore\u002Fstring","Java核心 - String 字符串",{"path":421,"title":422,"description":423},"\u002Fother\u002Fjava\u002Fio\u002Fbuffer-stream","Java IO - 缓冲流","缓冲流是对基本流的包装，通过内置缓冲区减少系统调用次数，大幅提升读写效率。",{"path":425,"title":426,"description":427},"\u002Fother\u002Fjava\u002Fio\u002Fbyte-stream","Java IO - 字节流","字节流是 Java IO 中最基本的流类型，以字节（byte）为单位进行数据读写，可以处理任意类型的文件。",{"path":429,"title":430,"description":431},"\u002Fother\u002Fjava\u002Fio\u002Fchar-stream","Java IO - 字符流","字符流以字符为单位进行读写，专门用于处理文本文件。相比字节流，字符流能够正确处理字符编码，避免中文乱码问题。",{"path":433,"title":434,"description":11},"\u002Fother\u002Fjava\u002Fio\u002Ffile","Java IO - File 类",{"path":436,"title":437,"description":11},"\u002Fother\u002Fjava\u002Fio\u002Fio-stream-system","Java IO - IO流概述",{"path":439,"title":440,"description":441},"\u002Fother\u002Fjava\u002Fio\u002Fnio","Java IO - NIO","NIO（New IO）是 JDK 1.4 引入的新 IO 模型，提供了更高效的 IO 操作方式，支持非阻塞 IO 和多路复用。",{"path":443,"title":444,"description":445},"\u002Fother\u002Fjava\u002Fjvm\u002Fclass-loading","类加载机制","类加载机制是 JVM 将 .class 文件加载到内存，并对数据进行校验、转换解析和初始化，最终形成可被 JVM 直接使用的 Java 类型的过程。",{"path":447,"title":448,"description":449},"\u002Fother\u002Fjava\u002Fjvm\u002Fgarbage-collection","垃圾回收","垃圾回收（Garbage Collection，GC）是 JVM 自动管理内存的机制，负责回收不再使用的对象所占用的内存。",{"path":451,"title":452,"description":453},"\u002Fother\u002Fjava\u002Fjvm\u002Fjvm-memory","JVM 内存结构","JVM 在执行 Java 程序时，会把它管理的内存划分为若干个不同的数据区域。这些区域有各自的用途、创建和销毁时间。",{"path":455,"title":456,"description":457},"\u002Fother\u002Fjava\u002Fjvm\u002Fjvm-tuning","JVM 调优","JVM 调优是优化 Java 应用性能的重要手段，主要包括参数配置、性能监控和问题排查。",{"path":459,"title":460,"description":461},"\u002Fother\u002Fjava\u002Fthread\u002Fatomic","原子类","Java 原子类（Atomic Classes）提供了一种无锁的线程安全方式，基于 CAS（Compare-And-Swap）操作实现。",{"path":463,"title":464,"description":465},"\u002Fother\u002Fjava\u002Fthread\u002Fcompletable-future","CompletableFuture","CompletableFuture 是 JDK 8 引入的异步编程工具，实现了 Future 和 CompletionStage 接口，支持函数式编程和链式调用。",{"path":467,"title":468,"description":469},"\u002Fother\u002Fjava\u002Fthread\u002Fconcurrent-collections","并发集合","Java 并发包提供了多种线程安全的集合类，用于替代传统的同步集合（如 Collections.synchronizedList）。",{"path":471,"title":472,"description":473},"\u002Fother\u002Fjava\u002Fthread\u002Fconcurrent-utils","并发工具类","Java 并发包提供了多种实用的并发工具类，用于控制线程之间的协调与同步。",{"path":475,"title":476,"description":11},"\u002Fother\u002Fjava\u002Fthread\u002Fsynchronized-lock","同步机制",{"path":478,"title":479,"description":11},"\u002Fother\u002Fjava\u002Fthread\u002Fthread-basic","线程基础",{"path":481,"title":482,"description":11},"\u002Fother\u002Fjava\u002Fthread\u002Fthread-pool","线程池",{"path":484,"title":485,"description":486},"\u002Fother\u002Fspring-series\u002Fspring\u002Fannotations-beans","Spring - 基于注解管理Bean","从 Java 5 开始，Java 增加了对注解（Annotation）的支持，它是代码中的一种特殊标记，可以在编译、类加载和运行时被读取，执行相应的处理。开发人员可以通过注解在不改变原有代码和逻辑的情况下，在源代码中嵌入补充信息。",{"path":488,"title":489,"description":490},"\u002Fother\u002Fspring-series\u002Fspring\u002Fimplement-ioc","Spring - 原理手写IoC","Spring 框架的 IOC 是基于 Java 反射机制实现的，在学习手写 IoC 之前，你需要具备一定的 Java 反射相关的知识，参考本站内的 Java 教程。",{"path":492,"title":493,"description":11},"\u002Fother\u002Fspring-series\u002Fspring\u002Fintroduction-case","Spring - 入门案例",{"path":495,"title":496,"description":11},"\u002Fother\u002Fspring-series\u002Fspring\u002Fspring-aop","Spring - 面向切面AOP",{"path":498,"title":499,"description":11},"\u002Fother\u002Fspring-series\u002Fspring\u002Fspring-aot","Spring - AOT提前编译",{"path":501,"title":502,"description":503},"\u002Fother\u002Fspring-series\u002Fspring\u002Fspring-data-validation","Spring - 数据校验","在开发中，我们经常遇到参数校验的需求，比如用户注册的时候，要校验用户名不能为空、用户名长度不超过20个字符、手机号是合法的手机号格式等等。如果使用普通方式，我们会把校验的代码和真正的业务处理逻辑耦合在一起，而且如果未来要新增一种校验逻辑也需要在修改多个地方。而spring validation允许通过注解的方式来定义对象校验规则，把校验和业务逻辑分离开，让代码编写更加方便。Spring Validation其实就是对Hibernate Validator进一步的封装，方便在Spring中使用。",{"path":505,"title":506,"description":11},"\u002Fother\u002Fspring-series\u002Fspring\u002Fspring-i18n","Spring - 国际化i18n",{"path":508,"title":509,"description":510},"\u002Fother\u002Fspring-series\u002Fspring\u002Fspring-ioc","Spring - IOC容器","IoC 是 Inversion of Control 的简写，译为“控制反转”，它不是一门技术，而是一种设计思想，是一个重要的面向对象编程法则，能够指导我们如何设计出松耦合、更优良的程序。",{"path":512,"title":513,"description":514},"\u002Fother\u002Fspring-series\u002Fspring\u002Fspring-junit","Spring - 单元测试JUnit","在之前的测试方法中，几乎都能看到以下的两行代码：",{"path":516,"title":517,"description":11},"\u002Fother\u002Fspring-series\u002Fspring\u002Fspring-resources","Spring - 资源操作",{"path":519,"title":520,"description":11},"\u002Fother\u002Fspring-series\u002Fspring\u002Fspring-summarize","Spring - Spring概述",{"path":522,"title":523,"description":11},"\u002Fother\u002Fspring-series\u002Fspring\u002Fspring-transaction","Spring - 事务",{"path":525,"title":526,"description":11},"\u002Fother\u002Fspring-series\u002Fspring\u002Fxml-beans","Spring - 基于XML管理Bean",{"path":528,"title":529,"description":11},"\u002Fother\u002Fspring-series\u002Fspringboot\u002Fspringboot-config","SpringBoot - 配置详解",{"path":531,"title":532,"description":11},"\u002Fother\u002Fspring-series\u002Fspringboot\u002Fspringboot-data","SpringBoot - 数据访问",{"path":534,"title":535,"description":11},"\u002Fother\u002Fspring-series\u002Fspringboot\u002Fspringboot-quickstart","SpringBoot - 快速入门",{"path":537,"title":538,"description":11},"\u002Fother\u002Fspring-series\u002Fspringboot\u002Fspringboot-web","SpringBoot - Web 开发",{"path":540,"title":541,"description":11},"\u002Fother\u002Fspring-series\u002Fspringcloud\u002Fspringcloud-config","SpringCloud - 配置中心",{"path":543,"title":544,"description":11},"\u002Fother\u002Fspring-series\u002Fspringcloud\u002Fspringcloud-discovery","SpringCloud - 服务注册与发现",{"path":546,"title":547,"description":11},"\u002Fother\u002Fspring-series\u002Fspringcloud\u002Fspringcloud-feign","SpringCloud - 服务调用",{"path":549,"title":550,"description":11},"\u002Fother\u002Fspring-series\u002Fspringcloud\u002Fspringcloud-gateway","SpringCloud - 服务网关",{"path":552,"title":553,"description":11},"\u002Fother\u002Fspring-series\u002Fspringcloud\u002Fspringcloud-introduction","SpringCloud - 微服务概述",{"path":555,"title":556,"description":11},"\u002Fother\u002Fspring-series\u002Fspringcloud\u002Fspringcloud-sentinel","SpringCloud - 服务保护",{"path":558,"title":559,"description":11},"\u002Fother\u002Fspring-series\u002Fspringmvc\u002Fspringmvc-databind","SpringMVC - 数据绑定与转换",{"path":561,"title":562,"description":11},"\u002Fother\u002Fspring-series\u002Fspringmvc\u002Fspringmvc-exception","SpringMVC - 异常处理",{"path":564,"title":565,"description":11},"\u002Fother\u002Fspring-series\u002Fspringmvc\u002Fspringmvc-interceptor","SpringMVC - 拦截器",{"path":567,"title":568,"description":11},"\u002Fother\u002Fspring-series\u002Fspringmvc\u002Fspringmvc-introduction","SpringMVC - 简介与环境搭建",{"path":570,"title":571,"description":11},"\u002Fother\u002Fspring-series\u002Fspringmvc\u002Fspringmvc-request","SpringMVC - 请求处理",{"path":573,"title":574,"description":11},"\u002Fother\u002Fspring-series\u002Fspringmvc\u002Fspringmvc-response","SpringMVC - 响应处理",{"path":576,"title":577,"description":578},"\u002Fproject\u002Frocket-leaf\u002Farchitecture","项目架构","Rocket-Leaf 的目录结构、模块划分、数据流向，以及各层之间的依赖关系。",{"path":580,"title":581,"description":582},"\u002Fproject\u002Frocket-leaf\u002Fbackend-layers","后端分层设计","Rocket-Leaf 的 model \u002F rocketmq \u002F service 三层结构，以及服务之间的依赖关系与设计取舍。",{"path":584,"title":585,"description":586},"\u002Fproject\u002Frocket-leaf\u002Fclient-manager","RocketMQ 客户端管理器","AdminClientManager 的多客户端池、默认连接懒加载、自动重连重试的设计与实现。",{"path":588,"title":589,"description":590},"\u002Fproject\u002Frocket-leaf\u002Fencryption","连接信息加密存储","AES-256-GCM + SHA-256 字段级派生密钥的实现，以及如何在不破坏兼容性的前提下为历史明文数据做透明迁移。",{"path":592,"title":593,"description":594},"\u002Fproject\u002Frocket-leaf\u002Ffrontend","前端结构与类型绑定","React + Vite 目录组织、自动生成的 Wails 绑定、api 薄封装与自定义 hooks 的职责划分。",{"path":596,"title":597,"description":598},"\u002Fproject\u002Frocket-leaf","项目简介","Rocket-Leaf 是一款基于 Wails v3 构建的跨平台 RocketMQ 桌面管理客户端，Go 后端 + React 前端。本文档系列拆解它的架构与关键实现。",{"path":600,"title":601,"description":602},"\u002Fproject\u002Frocket-leaf\u002Fwails-v3","Wails v3 入门","Wails v3 的核心概念、Service 绑定机制，以及 Rocket-Leaf 是如何用它把 Go 后端和 React 前端打通的。",{"path":604,"title":605,"description":11},"\u002Ftutorials\u002Fcloud\u002Fdocker\u002Fdocker-basic","Docker - 入门基础",{"path":607,"title":608,"description":609},"\u002Ftutorials\u002Fcloud\u002Fdocker\u002Fdocker-compose","Docker - Compose","在部署应用时，常常使用到不止一个容器，那么在部署容器的时候就需要一个一个进行部署，这样的部署过程也相对来说比较繁琐复杂，也容易出问题，那么有没有一种更为简单的方法呢？",{"path":611,"title":612,"description":613},"\u002Ftutorials\u002Fcloud\u002Fdocker\u002Fdocker-container-connection","Docker - 容器互联","在上一个章节中我们学习了 Docker 容器的端口映射，可以将 Docker 容器和本地以及网络中的端口进行连接起来。",{"path":615,"title":616,"description":11},"\u002Ftutorials\u002Fcloud\u002Fdocker\u002Fdocker-dockerfile","Docker - Dockerfile",{"path":618,"title":619,"description":11},"\u002Ftutorials\u002Fcloud\u002Fdocker\u002Fdocker-helloworld","Docker - HelloWorld",{"path":621,"title":622,"description":11},"\u002Ftutorials\u002Fcloud\u002Fdocker\u002Fdocker-install","Docker - 安装",{"path":624,"title":625,"description":11},"\u002Ftutorials\u002Fcloud\u002Fdocker\u002Fdocker-introduce","Docker - 简介",{"path":627,"title":628,"description":11},"\u002Ftutorials\u002Fcloud\u002Fdocker\u002Fdocker-object","Docker - 镜像、容器、仓库",{"path":630,"title":631,"description":632},"\u002Ftutorials\u002Fcloud\u002Fdocker\u002Fdocker-warehouse","Docker - 仓库管理","仓库是集中存放资源的地方，代码仓库是存放代码的，那么Docker 中的仓库就是存放 Docker 镜像的。",{"path":634,"title":635,"description":636},"\u002Ftutorials\u002Fcloud\u002Fdocker\u002Fdocker-web-containers","Docker - WEB应用实例","在之前的章节中，仅对普通容器进行了演示，但在实际中常常使用到 Docker 容器中的 WEB 应用程序。",{"path":638,"title":639,"description":11},"\u002Ftutorials\u002Fcloud\u002Fkubernetes\u002Fk8s-config","Kubernetes - ConfigMap 与 Secret",{"path":641,"title":642,"description":11},"\u002Ftutorials\u002Fcloud\u002Fkubernetes\u002Fk8s-helm","Kubernetes - Helm 包管理",{"path":644,"title":645,"description":11},"\u002Ftutorials\u002Fcloud\u002Fkubernetes\u002Fk8s-install","Kubernetes - 集群安装",{"path":647,"title":648,"description":11},"\u002Ftutorials\u002Fcloud\u002Fkubernetes\u002Fk8s-introduction","Kubernetes - 简介与架构",{"path":650,"title":651,"description":11},"\u002Ftutorials\u002Fcloud\u002Fkubernetes\u002Fk8s-kubectl","Kubernetes - kubectl 命令行工具",{"path":653,"title":654,"description":11},"\u002Ftutorials\u002Fcloud\u002Fkubernetes\u002Fk8s-monitoring","Kubernetes - 监控与日志",{"path":656,"title":657,"description":11},"\u002Ftutorials\u002Fcloud\u002Fkubernetes\u002Fk8s-network-security","Kubernetes - 网络与安全",{"path":659,"title":660,"description":11},"\u002Ftutorials\u002Fcloud\u002Fkubernetes\u002Fk8s-service","Kubernetes - Service 与 Ingress",{"path":662,"title":663,"description":11},"\u002Ftutorials\u002Fcloud\u002Fkubernetes\u002Fk8s-storage","Kubernetes - 持久化存储",{"path":665,"title":666,"description":11},"\u002Ftutorials\u002Fcloud\u002Fkubernetes\u002Fk8s-workload","Kubernetes - 工作负载资源",{"path":668,"title":669,"description":11},"\u002Ftutorials\u002Fcloud\u002Flinux\u002Flinux-bash","Linux - Bash 基础语法",{"path":671,"title":672,"description":11},"\u002Ftutorials\u002Fcloud\u002Flinux\u002Flinux-file-directory","Linux - 文件与目录操作",{"path":674,"title":675,"description":11},"\u002Ftutorials\u002Fcloud\u002Flinux\u002Flinux-network","Linux - 网络配置",{"path":677,"title":678,"description":11},"\u002Ftutorials\u002Fcloud\u002Flinux\u002Flinux-package","Linux - 软件包管理",{"path":680,"title":681,"description":11},"\u002Ftutorials\u002Fcloud\u002Flinux\u002Flinux-process","Linux - 进程管理",{"path":683,"title":684,"description":11},"\u002Ftutorials\u002Fcloud\u002Flinux\u002Flinux-scripts","Linux - 常用脚本示例",{"path":686,"title":687,"description":11},"\u002Ftutorials\u002Fcloud\u002Flinux\u002Flinux-service","Linux - 服务管理",{"path":689,"title":690,"description":11},"\u002Ftutorials\u002Fcloud\u002Flinux\u002Flinux-user-permission","Linux - 用户与权限管理",{"path":692,"title":693,"description":11},"\u002Ftutorials\u002Fcloud\u002Fnginx\u002Fnginx-https","Nginx - HTTPS 配置",{"path":695,"title":696,"description":11},"\u002Ftutorials\u002Fcloud\u002Fnginx\u002Fnginx-install","Nginx - 安装与配置",{"path":698,"title":699,"description":11},"\u002Ftutorials\u002Fcloud\u002Fnginx\u002Fnginx-loadbalance","Nginx - 负载均衡",{"path":701,"title":702,"description":11},"\u002Ftutorials\u002Fcloud\u002Fnginx\u002Fnginx-optimization","Nginx - 性能优化",{"path":704,"title":705,"description":11},"\u002Ftutorials\u002Fcloud\u002Fnginx\u002Fnginx-proxy","Nginx - 反向代理",{"path":707,"title":708,"description":11},"\u002Ftutorials\u002Fcloud\u002Fnginx\u002Fnginx-static","Nginx - 静态资源服务",{"path":710,"title":711,"description":11},"\u002Ftutorials\u002Fcloud\u002Fnginx\u002Fnginx-vhost","Nginx - 虚拟主机配置",{"path":713,"title":714,"description":715},"\u002Ftutorials\u002Fdatabase\u002Fmysql\u002Fmysql-architecture","MySQL 高可用架构","主从复制、读写分离、分库分表。",{"path":717,"title":718,"description":719},"\u002Ftutorials\u002Fdatabase\u002Fmysql\u002Fmysql-index","MySQL 索引","索引是帮助 MySQL 高效获取数据的有序数据结构。",{"path":721,"title":722,"description":723},"\u002Ftutorials\u002Fdatabase\u002Fmysql\u002Fmysql-lock","MySQL 锁","锁用于解决并发访问时的数据一致性问题。",{"path":725,"title":726,"description":727},"\u002Ftutorials\u002Fdatabase\u002Fmysql\u002Fmysql-optimize","MySQL 性能优化","SQL 优化是后端开发必备技能。",{"path":729,"title":730,"description":731},"\u002Ftutorials\u002Fdatabase\u002Fmysql\u002Fmysql-transaction","MySQL 事务","事务是一组不可分割的操作，要么全部成功，要么全部失败。",{"path":733,"title":734,"description":735},"\u002Ftutorials\u002Fdatabase\u002Fmysql\u002Fsql-advanced","SQL 进阶","多表查询、子查询、函数、视图、存储过程。",{"path":737,"title":738,"description":739},"\u002Ftutorials\u002Fdatabase\u002Fmysql\u002Fsql-basic","SQL 基础","SQL（Structured Query Language）是操作关系型数据库的标准语言。",{"path":741,"title":742,"description":743},"\u002Ftutorials\u002Fdatabase\u002Fredis\u002Fredis-advanced","Redis 进阶功能","事务、发布订阅、Lua 脚本、Pipeline。",{"path":745,"title":746,"description":747},"\u002Ftutorials\u002Fdatabase\u002Fredis\u002Fredis-basic","Redis 基础","Redis 安装配置与基本命令。",{"path":749,"title":750,"description":751},"\u002Ftutorials\u002Fdatabase\u002Fredis\u002Fredis-cluster","Redis 高可用","主从复制、哨兵、Cluster 集群。",{"path":753,"title":754,"description":755},"\u002Ftutorials\u002Fdatabase\u002Fredis\u002Fredis-datatype","Redis 数据类型","Redis 5 种基本数据类型 + 4 种特殊类型。",{"path":757,"title":758,"description":759},"\u002Ftutorials\u002Fdatabase\u002Fredis\u002Fredis-optimize","Redis 性能优化","内存优化、缓存问题、最佳实践。",{"path":761,"title":762,"description":763},"\u002Ftutorials\u002Fdatabase\u002Fredis\u002Fredis-persistence","Redis 持久化","Redis 提供 RDB 和 AOF 两种持久化方式。",{"path":765,"title":766,"description":767},"\u002Ftutorials\u002Fdatabase\u002Fredis\u002Fredis-principle","Redis 底层原理","数据结构、线程模型、网络模型。",{"path":769,"title":770,"description":771},"\u002Ftutorials\u002Fdev-idea\u002Fdesign-patterns\u002Fbehaiver-patterns\u002Fobserver-pattern","观察者模式","观察者模式属于行为型模式，定义了对象之间的一对多的依赖关系，在这种模式中，当一个对象的状态发生变化时，所有依赖于它的对象都会得到通知，并且执行相关操作。观察者模式又被成为“发布—订阅模式”，即发布者发生改变后，会通知所有订阅者。",{"path":773,"title":774,"description":11},"\u002Ftutorials\u002Fdev-idea\u002Fdesign-patterns\u002Fcreate-patterns\u002Ffactory-pattern","工厂模式",{"path":776,"title":777,"description":778},"\u002Ftutorials\u002Fdev-idea\u002Fdesign-patterns\u002Fcreate-patterns\u002Fsingleton-pattern","单例模式","单例模式是最常用的设计模式之一，他可以保证在整个应用中，某个类只存在一个实例化对象，即全局使用到该类的只有一个对象，这种模式在需要限制某些类的实例数量时非常有用，通常全局只需要一个该对象即可，如一些配置文件映射对象、数据库连接对象等。",{"path":780,"title":781,"description":782},"\u002Ftutorials\u002Fdev-idea\u002Fdesign-patterns\u002Fstructural-patterns\u002Fadapter-pattern","适配器模式","适配器模式是一种结构型模式，可以将一个类的接口转换成客户端所期望的另一种接口，适配器模式可以帮助开发人员在不修改现有代码的情况下，将不兼容的类组合在一起。",{"path":784,"title":785,"description":786},"\u002Ftutorials\u002Fdev-tools\u002Fgit\u002Fgit-basic-operations","Git 创建版本库","在 Git 上创建版本库有两种方式，一种是直接拷贝远程 Git 仓库到本地，另外一种是我们自己创建本地的版本库。",{"path":788,"title":789,"description":11},"\u002Ftutorials\u002Fdev-tools\u002Fgit\u002Fgit-branch-manage","Git 分支管理",{"path":791,"title":792,"description":11},"\u002Ftutorials\u002Fdev-tools\u002Fgit\u002Fgit-content-operations","Git 仓库内容操作",{"path":794,"title":795,"description":11},"\u002Ftutorials\u002Fdev-tools\u002Fgit\u002Fgit-introduce-install","Git 介绍和安装",{"path":797,"title":798,"description":11},"\u002Ftutorials\u002Fdev-tools\u002Fgit\u002Fgit-remote-manage","Git 远程管理",{"path":800,"title":801,"description":11},"\u002Ftutorials\u002Fdev-tools\u002Fgit\u002Fgit-workspace-index-repo","Git 工作原理",{"path":803,"title":804,"description":11},"\u002Ftutorials\u002Fdev-tools\u002Fhomebrew","HomeBrew 教程",{"path":806,"title":807,"description":11},"\u002Ftutorials\u002Fdev-tools\u002Fidea\u002Fshortcuts","快捷键",{"path":809,"title":810,"description":11},"\u002Ftutorials\u002Fdev-tools\u002Fmaven\u002Fintroduce-install-config","Maven - 介绍、安装、配置",{"path":812,"title":813,"description":11},"\u002Ftutorials\u002Ffront-end\u002Fvue3\u002Fbasic-knowledge","2. 基础知识",{"path":815,"title":816,"description":817},"\u002Ftutorials\u002Ffront-end\u002Fvue3\u002Fcomponent-communication","9. 组件通信","在前面的章节内，介绍了 Vue 中最核心的内容——组件的介绍和使用，和 Java 等编程语言相反，组件并不近似于这些变成语言中的类，类可以通过类或者其实例化的对象来相互交互，但 Vue 组件之间的作用域是相互独立的，这就意味着不同组件之间的数据无法相互引用。",{"path":819,"title":820,"description":821},"\u002Ftutorials\u002Ffront-end\u002Fvue3\u002Fcomputed","4. 计算属性","虽然直接在模板中使用表达式方便，但是如果在模板中添加很多逻辑，会让模板变的臃肿且难维护，耦合度较高。有没有一种简单的方式来实现呢？答案是有的。",{"path":823,"title":824,"description":11},"\u002Ftutorials\u002Ffront-end\u002Fvue3\u002Fcreate-vue-project","1. 环境搭建及安装",{"path":826,"title":827,"description":828},"\u002Ftutorials\u002Ffront-end\u002Fvue3\u002Flife-cycle","6. 生命周期","生命周期是指组件从创建、挂载、更新到销毁的整个过程中所经历的一系列阶段。在 Vue 中，每个组件都有自己的生命周期，可以通过生命周期钩子函数来监听和处理组件在不同阶段的行为和状态。",{"path":830,"title":831,"description":11},"\u002Ftutorials\u002Ffront-end\u002Fvue3\u002Fother-api","10. 其他 API",{"path":833,"title":834,"description":11},"\u002Ftutorials\u002Ffront-end\u002Fvue3\u002Fpinia","8. Pinia",{"path":836,"title":837,"description":11},"\u002Ftutorials\u002Ffront-end\u002Fvue3\u002Frouter","7. 路由",{"path":839,"title":840,"description":11},"\u002Ftutorials\u002Ffront-end\u002Fvue3\u002Ftemplate-grammar","3. 指令及模板语法",{"path":842,"title":843,"description":11},"\u002Ftutorials\u002Ffront-end\u002Fvue3\u002Fvue3-new-component","11. Vue3 新组件",{"path":845,"title":846,"description":847},"\u002Ftutorials\u002Ffront-end\u002Fvue3\u002Fwatch","5. 监视","Watch 是 Vue 提供的一个用于监视响应式数据变化并执行相应操作的 API，能够对响应式数据的变化做出一些操作的功能。Vue3 中的 Watch 支持多种用法，包括监视响应式对象、ref 对象、数组、函数等。",{"path":849,"title":850,"description":11},"\u002Ftutorials\u002Fmq\u002Fkafka\u002Fkafka-introduction","Kafka 简介与安装",{"path":852,"title":853,"description":11},"\u002Ftutorials\u002Fmq\u002Fkafka\u002Fkafka-producer-consumer","Kafka 生产者与消费者",{"path":855,"title":856,"description":857},"\u002Ftutorials\u002Fmq\u002Fkafka\u002Fkafka-springboot","Spring Boot 整合 Kafka","Spring Kafka 提供了对 Apache Kafka 的便捷集成。",{"path":859,"title":860,"description":861},"\u002Ftutorials\u002Fmq\u002Frabbitmq\u002Frabbitmq-exchange","RabbitMQ Exchange 详解","Exchange（交换机）是 RabbitMQ 的核心组件，负责接收生产者发送的消息，并根据规则将消息路由到一个或多个队列。",{"path":863,"title":864,"description":11},"\u002Ftutorials\u002Fmq\u002Frabbitmq\u002Frabbitmq-introduction","RabbitMQ 简介与安装",{"path":866,"title":867,"description":868},"\u002Ftutorials\u002Fmq\u002Frabbitmq\u002Frabbitmq-reliability","RabbitMQ 消息可靠性","消息可靠性是消息队列的核心要求，RabbitMQ 提供了多种机制来保证消息不丢失。",{"path":870,"title":871,"description":872},"\u002Ftutorials\u002Fmq\u002Frabbitmq\u002Frabbitmq-springboot","Spring Boot 整合 RabbitMQ","Spring AMQP 提供了对 RabbitMQ 的便捷集成，大大简化了开发工作。",{"path":874,"title":875,"description":11},"\u002Ftutorials\u002Fmq\u002Frocketmq\u002Frocketmq-client","RocketMQ 客户端使用",{"path":877,"title":878,"description":11},"\u002Ftutorials\u002Fmq\u002Frocketmq\u002Frocketmq-concepts","RocketMQ 核心概念",{"path":880,"title":881,"description":11},"\u002Ftutorials\u002Fmq\u002Frocketmq\u002Frocketmq-installation","RocketMQ 安装部署",{"path":883,"title":884,"description":885},"\u002Ftutorials\u002Fmq\u002Frocketmq\u002Frocketmq-message-type","RocketMQ 消息类型","RocketMQ 支持多种消息类型，满足不同业务场景需求。",{"id":887,"title":66,"body":888,"description":67,"extension":3801,"meta":3802,"navigation":1486,"path":65,"seo":3803,"stem":3804,"__hash__":3805},"docs\u002Fai\u002Fllm\u002Flocal-deploy.md",{"type":889,"value":890,"toc":3760},"minimark",[891,895,899,934,938,941,945,948,1045,1049,1052,1142,1146,1149,1207,1210,1213,1216,1314,1318,1321,1324,1428,1431,1435,1438,1441,1523,1526,1620,1623,1738,1742,1745,1907,1910,1948,1952,1955,2015,2059,2063,2066,2069,2095,2098,2188,2191,2285,2288,2467,2471,2475,2478,2481,2495,2498,2627,2630,2742,2745,2748,2818,2821,2825,2828,2898,2901,2905,2908,2968,2995,2998,3057,3061,3064,3335,3339,3342,3572,3619,3622,3747,3750,3753,3756],[892,893,894],"h2",{"id":894},"为什么要本地部署",[896,897,898],"p",{},"使用云端 API 虽然方便，但本地部署大模型有其独特的优势：",[900,901,902,910,916,922,928],"ul",{},[903,904,905,909],"li",{},[906,907,908],"strong",{},"数据隐私","：所有数据在本地处理，不会传输到第三方服务器，适合医疗、金融、政务等对数据安全要求高的场景",[903,911,912,915],{},[906,913,914],{},"成本可控","：一次性投入硬件成本后，推理不再产生按量付费的 API 费用，适合高并发场景",[903,917,918,921],{},[906,919,920],{},"低延迟","：无网络传输延迟，响应速度更快",[903,923,924,927],{},[906,925,926],{},"离线可用","：不依赖网络，在断网环境中也能使用",[903,929,930,933],{},[906,931,932],{},"完全可控","：可以自由选择模型、调整参数、定制推理流程",[935,936,937],"note",{},"\n本地部署并非适合所有场景。如果你只是偶尔使用、对延迟不敏感且没有数据隐私顾虑，使用云端 API 通常是更经济的选择。\n",[892,939,940],{"id":940},"硬件需求",[942,943,944],"h3",{"id":944},"显存估算",[896,946,947],{},"模型推理时的显存需求主要取决于模型参数量和精度：",[949,950,951,973],"table",{},[952,953,954],"thead",{},[955,956,957,961,964,967,970],"tr",{},[958,959,960],"th",{},"精度",[958,962,963],{},"每个参数占用",[958,965,966],{},"7B 模型",[958,968,969],{},"13B 模型",[958,971,972],{},"70B 模型",[974,975,976,994,1011,1028],"tbody",{},[955,977,978,982,985,988,991],{},[979,980,981],"td",{},"FP32",[979,983,984],{},"4 字节",[979,986,987],{},"28 GB",[979,989,990],{},"52 GB",[979,992,993],{},"280 GB",[955,995,996,999,1002,1005,1008],{},[979,997,998],{},"FP16\u002FBF16",[979,1000,1001],{},"2 字节",[979,1003,1004],{},"14 GB",[979,1006,1007],{},"26 GB",[979,1009,1010],{},"140 GB",[955,1012,1013,1016,1019,1022,1025],{},[979,1014,1015],{},"INT8",[979,1017,1018],{},"1 字节",[979,1020,1021],{},"7 GB",[979,1023,1024],{},"13 GB",[979,1026,1027],{},"70 GB",[955,1029,1030,1033,1036,1039,1042],{},[979,1031,1032],{},"INT4",[979,1034,1035],{},"0.5 字节",[979,1037,1038],{},"3.5 GB",[979,1040,1041],{},"6.5 GB",[979,1043,1044],{},"35 GB",[1046,1047,1048],"tip",{},"\n以上仅为模型权重的显存占用，实际运行还需要额外的 KV Cache、计算中间结果等开销。建议预留模型权重 1.2-1.5 倍的显存。\n",[942,1050,1051],{"id":1051},"推荐硬件配置",[949,1053,1054,1070],{},[952,1055,1056],{},[955,1057,1058,1061,1064,1067],{},[958,1059,1060],{},"场景",[958,1062,1063],{},"GPU",[958,1065,1066],{},"内存",[958,1068,1069],{},"适合模型",[974,1071,1072,1086,1100,1114,1128],{},[955,1073,1074,1077,1080,1083],{},[979,1075,1076],{},"入门体验",[979,1078,1079],{},"无 GPU（纯 CPU）",[979,1081,1082],{},"16 GB",[979,1084,1085],{},"1-3B 量化模型",[955,1087,1088,1091,1094,1097],{},[979,1089,1090],{},"个人开发",[979,1092,1093],{},"RTX 4060 (8GB)",[979,1095,1096],{},"32 GB",[979,1098,1099],{},"7B INT4 量化",[955,1101,1102,1105,1108,1111],{},[979,1103,1104],{},"进阶开发",[979,1106,1107],{},"RTX 4090 (24GB)",[979,1109,1110],{},"64 GB",[979,1112,1113],{},"7-8B FP16 \u002F 70B INT4",[955,1115,1116,1119,1122,1125],{},[979,1117,1118],{},"专业部署",[979,1120,1121],{},"A100 (80GB)",[979,1123,1124],{},"128 GB",[979,1126,1127],{},"70B FP16",[955,1129,1130,1133,1136,1139],{},[979,1131,1132],{},"企业级",[979,1134,1135],{},"多卡 A100\u002FH100",[979,1137,1138],{},"256 GB+",[979,1140,1141],{},"70B+ FP16 \u002F MoE 模型",[942,1143,1145],{"id":1144},"apple-silicon","Apple Silicon",[896,1147,1148],{},"Apple M 系列芯片的统一内存架构使其成为本地部署的热门选择：",[949,1150,1151,1163],{},[952,1152,1153],{},[955,1154,1155,1158,1161],{},[958,1156,1157],{},"芯片",[958,1159,1160],{},"统一内存",[958,1162,1069],{},[974,1164,1165,1176,1186,1197],{},[955,1166,1167,1170,1173],{},[979,1168,1169],{},"M1\u002FM2 (8GB)",[979,1171,1172],{},"8 GB",[979,1174,1175],{},"1-3B 量化",[955,1177,1178,1181,1183],{},[979,1179,1180],{},"M1\u002FM2 Pro (16GB)",[979,1182,1082],{},[979,1184,1185],{},"7B INT4",[955,1187,1188,1191,1194],{},[979,1189,1190],{},"M1\u002FM2 Max (32-64GB)",[979,1192,1193],{},"32-64 GB",[979,1195,1196],{},"7B FP16 \u002F 70B INT4",[955,1198,1199,1202,1205],{},[979,1200,1201],{},"M3\u002FM4 Ultra (128-192GB)",[979,1203,1204],{},"128-192 GB",[979,1206,1127],{},[892,1208,1209],{"id":1209},"量化技术",[896,1211,1212],{},"量化是降低模型精度以减少显存占用和加速推理的关键技术。",[942,1214,1215],{"id":1215},"常见量化格式",[949,1217,1218,1234],{},[952,1219,1220],{},[955,1221,1222,1225,1228,1231],{},[958,1223,1224],{},"量化方式",[958,1226,1227],{},"精度损失",[958,1229,1230],{},"速度提升",[958,1232,1233],{},"适用场景",[974,1235,1236,1249,1263,1277,1289,1301],{},[955,1237,1238,1240,1243,1246],{},[979,1239,998],{},[979,1241,1242],{},"几乎无",[979,1244,1245],{},"基线",[979,1247,1248],{},"显存充足时",[955,1250,1251,1254,1257,1260],{},[979,1252,1253],{},"INT8 (W8A8)",[979,1255,1256],{},"很小",[979,1258,1259],{},"1.5-2x",[979,1261,1262],{},"平衡精度和效率",[955,1264,1265,1268,1271,1274],{},[979,1266,1267],{},"INT4 (W4A16)",[979,1269,1270],{},"轻微",[979,1272,1273],{},"2-3x",[979,1275,1276],{},"消费级显卡",[955,1278,1279,1282,1284,1286],{},[979,1280,1281],{},"GPTQ (4-bit)",[979,1283,1270],{},[979,1285,1273],{},[979,1287,1288],{},"GPU 推理",[955,1290,1291,1294,1296,1298],{},[979,1292,1293],{},"AWQ (4-bit)",[979,1295,1270],{},[979,1297,1273],{},[979,1299,1300],{},"GPU 推理（更快）",[955,1302,1303,1306,1309,1311],{},[979,1304,1305],{},"GGUF (2-8bit)",[979,1307,1308],{},"可变",[979,1310,1308],{},[979,1312,1313],{},"CPU\u002F混合推理",[942,1315,1317],{"id":1316},"gguf-格式","GGUF 格式",[896,1319,1320],{},"GGUF（GPT-Generated Unified Format）是 llama.cpp 项目定义的模型格式，专为 CPU 和 CPU+GPU 混合推理优化。GGUF 文件是自包含的，包含了模型权重、分词器和元数据。",[896,1322,1323],{},"常见的 GGUF 量化级别：",[949,1325,1326,1342],{},[952,1327,1328],{},[955,1329,1330,1333,1336,1339],{},[958,1331,1332],{},"量化类型",[958,1334,1335],{},"每参数比特",[958,1337,1338],{},"7B 模型大小",[958,1340,1341],{},"质量",[974,1343,1344,1358,1372,1386,1400,1414],{},[955,1345,1346,1349,1352,1355],{},[979,1347,1348],{},"Q2_K",[979,1350,1351],{},"~2.6 bit",[979,1353,1354],{},"~2.8 GB",[979,1356,1357],{},"较差",[955,1359,1360,1363,1366,1369],{},[979,1361,1362],{},"Q3_K_M",[979,1364,1365],{},"~3.4 bit",[979,1367,1368],{},"~3.5 GB",[979,1370,1371],{},"可用",[955,1373,1374,1377,1380,1383],{},[979,1375,1376],{},"Q4_K_M",[979,1378,1379],{},"~4.6 bit",[979,1381,1382],{},"~4.4 GB",[979,1384,1385],{},"推荐",[955,1387,1388,1391,1394,1397],{},[979,1389,1390],{},"Q5_K_M",[979,1392,1393],{},"~5.7 bit",[979,1395,1396],{},"~5.3 GB",[979,1398,1399],{},"很好",[955,1401,1402,1405,1408,1411],{},[979,1403,1404],{},"Q6_K",[979,1406,1407],{},"~6.6 bit",[979,1409,1410],{},"~5.9 GB",[979,1412,1413],{},"接近原始",[955,1415,1416,1419,1422,1425],{},[979,1417,1418],{},"Q8_0",[979,1420,1421],{},"8 bit",[979,1423,1424],{},"~7.2 GB",[979,1426,1427],{},"几乎无损",[935,1429,1430],{},"\n对于大多数应用场景，Q4_K_M 是一个很好的平衡点——显存占用约为 FP16 的 1\u002F4，而质量损失通常在可接受范围内。\n",[892,1432,1434],{"id":1433},"ollama-使用指南","Ollama 使用指南",[896,1436,1437],{},"Ollama 是目前最简单的本地大模型部署工具，一条命令即可运行模型。",[942,1439,1440],{"id":1440},"安装",[1442,1443,1447],"pre",{"className":1444,"code":1445,"language":1446,"meta":11,"style":11},"language-bash shiki shiki-themes github-light github-light github-dark","# macOS \u002F Linux\ncurl -fsSL https:\u002F\u002Follama.com\u002Finstall.sh | sh\n\n# macOS 也可以通过 Homebrew\nbrew install ollama\n\n# Windows\n# 从 https:\u002F\u002Follama.com\u002Fdownload 下载安装包\n","bash",[1448,1449,1450,1459,1481,1488,1494,1506,1511,1517],"code",{"__ignoreMap":11},[1451,1452,1455],"span",{"class":1453,"line":1454},"line",1,[1451,1456,1458],{"class":1457},"sCsY4","# macOS \u002F Linux\n",[1451,1460,1462,1466,1470,1474,1478],{"class":1453,"line":1461},2,[1451,1463,1465],{"class":1464},"snPdu","curl",[1451,1467,1469],{"class":1468},"sBjJW"," -fsSL",[1451,1471,1473],{"class":1472},"sIIMD"," https:\u002F\u002Follama.com\u002Finstall.sh",[1451,1475,1477],{"class":1476},"s8jYJ"," |",[1451,1479,1480],{"class":1464}," sh\n",[1451,1482,1484],{"class":1453,"line":1483},3,[1451,1485,1487],{"emptyLinePlaceholder":1486},true,"\n",[1451,1489,1491],{"class":1453,"line":1490},4,[1451,1492,1493],{"class":1457},"# macOS 也可以通过 Homebrew\n",[1451,1495,1497,1500,1503],{"class":1453,"line":1496},5,[1451,1498,1499],{"class":1464},"brew",[1451,1501,1502],{"class":1472}," install",[1451,1504,1505],{"class":1472}," ollama\n",[1451,1507,1509],{"class":1453,"line":1508},6,[1451,1510,1487],{"emptyLinePlaceholder":1486},[1451,1512,1514],{"class":1453,"line":1513},7,[1451,1515,1516],{"class":1457},"# Windows\n",[1451,1518,1520],{"class":1453,"line":1519},8,[1451,1521,1522],{"class":1457},"# 从 https:\u002F\u002Follama.com\u002Fdownload 下载安装包\n",[942,1524,1525],{"id":1525},"基本使用",[1442,1527,1529],{"className":1444,"code":1528,"language":1446,"meta":11,"style":11},"# 启动 Ollama 服务（安装后通常自动启动）\nollama serve\n\n# 运行模型（首次会自动下载）\nollama run llama3.1\nollama run qwen2.5:7b\nollama run deepseek-r1:8b\n\n# 查看已下载的模型\nollama list\n\n# 删除模型\nollama rm llama3.1\n",[1448,1530,1531,1536,1544,1548,1553,1563,1572,1581,1585,1591,1599,1604,1610],{"__ignoreMap":11},[1451,1532,1533],{"class":1453,"line":1454},[1451,1534,1535],{"class":1457},"# 启动 Ollama 服务（安装后通常自动启动）\n",[1451,1537,1538,1541],{"class":1453,"line":1461},[1451,1539,1540],{"class":1464},"ollama",[1451,1542,1543],{"class":1472}," serve\n",[1451,1545,1546],{"class":1453,"line":1483},[1451,1547,1487],{"emptyLinePlaceholder":1486},[1451,1549,1550],{"class":1453,"line":1490},[1451,1551,1552],{"class":1457},"# 运行模型（首次会自动下载）\n",[1451,1554,1555,1557,1560],{"class":1453,"line":1496},[1451,1556,1540],{"class":1464},[1451,1558,1559],{"class":1472}," run",[1451,1561,1562],{"class":1472}," llama3.1\n",[1451,1564,1565,1567,1569],{"class":1453,"line":1508},[1451,1566,1540],{"class":1464},[1451,1568,1559],{"class":1472},[1451,1570,1571],{"class":1472}," qwen2.5:7b\n",[1451,1573,1574,1576,1578],{"class":1453,"line":1513},[1451,1575,1540],{"class":1464},[1451,1577,1559],{"class":1472},[1451,1579,1580],{"class":1472}," deepseek-r1:8b\n",[1451,1582,1583],{"class":1453,"line":1519},[1451,1584,1487],{"emptyLinePlaceholder":1486},[1451,1586,1588],{"class":1453,"line":1587},9,[1451,1589,1590],{"class":1457},"# 查看已下载的模型\n",[1451,1592,1594,1596],{"class":1453,"line":1593},10,[1451,1595,1540],{"class":1464},[1451,1597,1598],{"class":1472}," list\n",[1451,1600,1602],{"class":1453,"line":1601},11,[1451,1603,1487],{"emptyLinePlaceholder":1486},[1451,1605,1607],{"class":1453,"line":1606},12,[1451,1608,1609],{"class":1457},"# 删除模型\n",[1451,1611,1613,1615,1618],{"class":1453,"line":1612},13,[1451,1614,1540],{"class":1464},[1451,1616,1617],{"class":1472}," rm",[1451,1619,1562],{"class":1472},[942,1621,1622],{"id":1622},"常用模型",[949,1624,1625,1641],{},[952,1626,1627],{},[955,1628,1629,1632,1635,1638],{},[958,1630,1631],{},"模型",[958,1633,1634],{},"命令",[958,1636,1637],{},"大小",[958,1639,1640],{},"说明",[974,1642,1643,1659,1674,1690,1706,1722],{},[955,1644,1645,1648,1653,1656],{},[979,1646,1647],{},"Llama 3.1 8B",[979,1649,1650],{},[1448,1651,1652],{},"ollama run llama3.1",[979,1654,1655],{},"~4.7 GB",[979,1657,1658],{},"Meta 开源，英文为主",[955,1660,1661,1664,1669,1671],{},[979,1662,1663],{},"Qwen 2.5 7B",[979,1665,1666],{},[1448,1667,1668],{},"ollama run qwen2.5:7b",[979,1670,1382],{},[979,1672,1673],{},"中文能力强",[955,1675,1676,1679,1684,1687],{},[979,1677,1678],{},"DeepSeek-R1 8B",[979,1680,1681],{},[1448,1682,1683],{},"ollama run deepseek-r1:8b",[979,1685,1686],{},"~4.9 GB",[979,1688,1689],{},"推理能力强",[955,1691,1692,1695,1700,1703],{},[979,1693,1694],{},"Phi-3 Mini",[979,1696,1697],{},[1448,1698,1699],{},"ollama run phi3",[979,1701,1702],{},"~2.2 GB",[979,1704,1705],{},"微软小模型",[955,1707,1708,1711,1716,1719],{},[979,1709,1710],{},"Gemma 2 9B",[979,1712,1713],{},[1448,1714,1715],{},"ollama run gemma2:9b",[979,1717,1718],{},"~5.4 GB",[979,1720,1721],{},"Google 开源",[955,1723,1724,1727,1732,1735],{},[979,1725,1726],{},"CodeLlama 7B",[979,1728,1729],{},[1448,1730,1731],{},"ollama run codellama",[979,1733,1734],{},"~3.8 GB",[979,1736,1737],{},"代码专用",[942,1739,1741],{"id":1740},"api-调用","API 调用",[896,1743,1744],{},"Ollama 提供了兼容 OpenAI 格式的 API：",[1442,1746,1750],{"className":1747,"code":1748,"language":1749,"meta":11,"style":11},"language-python shiki shiki-themes github-light github-light github-dark","from openai import OpenAI\n\nclient = OpenAI(\n    base_url=\"http:\u002F\u002Flocalhost:11434\u002Fv1\",\n    api_key=\"ollama\"  # Ollama 不需要真正的 API key\n)\n\nresponse = client.chat.completions.create(\n    model=\"qwen2.5:7b\",\n    messages=[\n        {\"role\": \"user\", \"content\": \"用 Python 写一个快速排序算法\"}\n    ]\n)\n\nprint(response.choices[0].message.content)\n","python",[1448,1751,1752,1767,1771,1782,1796,1809,1814,1818,1828,1840,1850,1878,1883,1887,1892],{"__ignoreMap":11},[1451,1753,1754,1757,1761,1764],{"class":1453,"line":1454},[1451,1755,1756],{"class":1476},"from",[1451,1758,1760],{"class":1759},"sxrX7"," openai ",[1451,1762,1763],{"class":1476},"import",[1451,1765,1766],{"class":1759}," OpenAI\n",[1451,1768,1769],{"class":1453,"line":1461},[1451,1770,1487],{"emptyLinePlaceholder":1486},[1451,1772,1773,1776,1779],{"class":1453,"line":1483},[1451,1774,1775],{"class":1759},"client ",[1451,1777,1778],{"class":1476},"=",[1451,1780,1781],{"class":1759}," OpenAI(\n",[1451,1783,1784,1788,1790,1793],{"class":1453,"line":1490},[1451,1785,1787],{"class":1786},"sP4rz","    base_url",[1451,1789,1778],{"class":1476},[1451,1791,1792],{"class":1472},"\"http:\u002F\u002Flocalhost:11434\u002Fv1\"",[1451,1794,1795],{"class":1759},",\n",[1451,1797,1798,1801,1803,1806],{"class":1453,"line":1496},[1451,1799,1800],{"class":1786},"    api_key",[1451,1802,1778],{"class":1476},[1451,1804,1805],{"class":1472},"\"ollama\"",[1451,1807,1808],{"class":1457},"  # Ollama 不需要真正的 API key\n",[1451,1810,1811],{"class":1453,"line":1508},[1451,1812,1813],{"class":1759},")\n",[1451,1815,1816],{"class":1453,"line":1513},[1451,1817,1487],{"emptyLinePlaceholder":1486},[1451,1819,1820,1823,1825],{"class":1453,"line":1519},[1451,1821,1822],{"class":1759},"response ",[1451,1824,1778],{"class":1476},[1451,1826,1827],{"class":1759}," client.chat.completions.create(\n",[1451,1829,1830,1833,1835,1838],{"class":1453,"line":1587},[1451,1831,1832],{"class":1786},"    model",[1451,1834,1778],{"class":1476},[1451,1836,1837],{"class":1472},"\"qwen2.5:7b\"",[1451,1839,1795],{"class":1759},[1451,1841,1842,1845,1847],{"class":1453,"line":1593},[1451,1843,1844],{"class":1786},"    messages",[1451,1846,1778],{"class":1476},[1451,1848,1849],{"class":1759},"[\n",[1451,1851,1852,1855,1858,1861,1864,1867,1870,1872,1875],{"class":1453,"line":1601},[1451,1853,1854],{"class":1759},"        {",[1451,1856,1857],{"class":1472},"\"role\"",[1451,1859,1860],{"class":1759},": ",[1451,1862,1863],{"class":1472},"\"user\"",[1451,1865,1866],{"class":1759},", ",[1451,1868,1869],{"class":1472},"\"content\"",[1451,1871,1860],{"class":1759},[1451,1873,1874],{"class":1472},"\"用 Python 写一个快速排序算法\"",[1451,1876,1877],{"class":1759},"}\n",[1451,1879,1880],{"class":1453,"line":1606},[1451,1881,1882],{"class":1759},"    ]\n",[1451,1884,1885],{"class":1453,"line":1612},[1451,1886,1813],{"class":1759},[1451,1888,1890],{"class":1453,"line":1889},14,[1451,1891,1487],{"emptyLinePlaceholder":1486},[1451,1893,1895,1898,1901,1904],{"class":1453,"line":1894},15,[1451,1896,1897],{"class":1468},"print",[1451,1899,1900],{"class":1759},"(response.choices[",[1451,1902,1903],{"class":1468},"0",[1451,1905,1906],{"class":1759},"].message.content)\n",[896,1908,1909],{},"也可以使用 Ollama 原生 API：",[1442,1911,1913],{"className":1444,"code":1912,"language":1446,"meta":11,"style":11},"curl http:\u002F\u002Flocalhost:11434\u002Fapi\u002Fgenerate -d '{\n  \"model\": \"qwen2.5:7b\",\n  \"prompt\": \"什么是 Transformer？\",\n  \"stream\": false\n}'\n",[1448,1914,1915,1928,1933,1938,1943],{"__ignoreMap":11},[1451,1916,1917,1919,1922,1925],{"class":1453,"line":1454},[1451,1918,1465],{"class":1464},[1451,1920,1921],{"class":1472}," http:\u002F\u002Flocalhost:11434\u002Fapi\u002Fgenerate",[1451,1923,1924],{"class":1468}," -d",[1451,1926,1927],{"class":1472}," '{\n",[1451,1929,1930],{"class":1453,"line":1461},[1451,1931,1932],{"class":1472},"  \"model\": \"qwen2.5:7b\",\n",[1451,1934,1935],{"class":1453,"line":1483},[1451,1936,1937],{"class":1472},"  \"prompt\": \"什么是 Transformer？\",\n",[1451,1939,1940],{"class":1453,"line":1490},[1451,1941,1942],{"class":1472},"  \"stream\": false\n",[1451,1944,1945],{"class":1453,"line":1496},[1451,1946,1947],{"class":1472},"}'\n",[942,1949,1951],{"id":1950},"自定义-modelfile","自定义 Modelfile",[896,1953,1954],{},"Ollama 支持通过 Modelfile 自定义模型配置：",[1442,1956,1960],{"className":1957,"code":1958,"language":1959,"meta":11,"style":11},"language-dockerfile shiki shiki-themes github-light github-light github-dark","# Modelfile\nFROM qwen2.5:7b\n\n# 设置系统提示词\nSYSTEM \"你是一个专业的技术文档助手，使用中文回答问题。\"\n\n# 调整参数\nPARAMETER temperature 0.7\nPARAMETER top_p 0.9\nPARAMETER num_ctx 8192\n","dockerfile",[1448,1961,1962,1967,1974,1978,1983,1991,1995,2000,2005,2010],{"__ignoreMap":11},[1451,1963,1964],{"class":1453,"line":1454},[1451,1965,1966],{"class":1457},"# Modelfile\n",[1451,1968,1969,1972],{"class":1453,"line":1461},[1451,1970,1971],{"class":1476},"FROM",[1451,1973,1571],{"class":1759},[1451,1975,1976],{"class":1453,"line":1483},[1451,1977,1487],{"emptyLinePlaceholder":1486},[1451,1979,1980],{"class":1453,"line":1490},[1451,1981,1982],{"class":1457},"# 设置系统提示词\n",[1451,1984,1985,1988],{"class":1453,"line":1496},[1451,1986,1987],{"class":1759},"SYSTEM ",[1451,1989,1990],{"class":1472},"\"你是一个专业的技术文档助手，使用中文回答问题。\"\n",[1451,1992,1993],{"class":1453,"line":1508},[1451,1994,1487],{"emptyLinePlaceholder":1486},[1451,1996,1997],{"class":1453,"line":1513},[1451,1998,1999],{"class":1457},"# 调整参数\n",[1451,2001,2002],{"class":1453,"line":1519},[1451,2003,2004],{"class":1759},"PARAMETER temperature 0.7\n",[1451,2006,2007],{"class":1453,"line":1587},[1451,2008,2009],{"class":1759},"PARAMETER top_p 0.9\n",[1451,2011,2012],{"class":1453,"line":1593},[1451,2013,2014],{"class":1759},"PARAMETER num_ctx 8192\n",[1442,2016,2018],{"className":1444,"code":2017,"language":1446,"meta":11,"style":11},"# 创建自定义模型\nollama create my-assistant -f Modelfile\n\n# 运行自定义模型\nollama run my-assistant\n",[1448,2019,2020,2025,2041,2045,2050],{"__ignoreMap":11},[1451,2021,2022],{"class":1453,"line":1454},[1451,2023,2024],{"class":1457},"# 创建自定义模型\n",[1451,2026,2027,2029,2032,2035,2038],{"class":1453,"line":1461},[1451,2028,1540],{"class":1464},[1451,2030,2031],{"class":1472}," create",[1451,2033,2034],{"class":1472}," my-assistant",[1451,2036,2037],{"class":1468}," -f",[1451,2039,2040],{"class":1472}," Modelfile\n",[1451,2042,2043],{"class":1453,"line":1483},[1451,2044,1487],{"emptyLinePlaceholder":1486},[1451,2046,2047],{"class":1453,"line":1490},[1451,2048,2049],{"class":1457},"# 运行自定义模型\n",[1451,2051,2052,2054,2056],{"class":1453,"line":1496},[1451,2053,1540],{"class":1464},[1451,2055,1559],{"class":1472},[1451,2057,2058],{"class":1472}," my-assistant\n",[892,2060,2062],{"id":2061},"vllm-部署","vLLM 部署",[896,2064,2065],{},"vLLM 是一个高性能的 LLM 推理和服务框架，专为生产环境设计。",[942,2067,2068],{"id":2068},"核心优势",[900,2070,2071,2077,2083,2089],{},[903,2072,2073,2076],{},[906,2074,2075],{},"PagedAttention","：类似操作系统的虚拟内存管理，高效管理 KV Cache，显存利用率接近最优",[903,2078,2079,2082],{},[906,2080,2081],{},"连续批处理（Continuous Batching）","：动态合并请求，吞吐量比朴素方案提升 2-4 倍",[903,2084,2085,2088],{},[906,2086,2087],{},"高并发","：支持大量并发请求",[903,2090,2091,2094],{},[906,2092,2093],{},"OpenAI 兼容 API","：可直接替换 OpenAI API",[942,2096,2097],{"id":2097},"安装与使用",[1442,2099,2101],{"className":1444,"code":2100,"language":1446,"meta":11,"style":11},"# 安装（需要 NVIDIA GPU）\npip install vllm\n\n# 启动 API 服务\npython -m vllm.entrypoints.openai.api_server \\\n    --model Qwen\u002FQwen2.5-7B-Instruct \\\n    --host 0.0.0.0 \\\n    --port 8000 \\\n    --max-model-len 8192 \\\n    --gpu-memory-utilization 0.9\n",[1448,2102,2103,2108,2118,2122,2127,2140,2150,2160,2170,2180],{"__ignoreMap":11},[1451,2104,2105],{"class":1453,"line":1454},[1451,2106,2107],{"class":1457},"# 安装（需要 NVIDIA GPU）\n",[1451,2109,2110,2113,2115],{"class":1453,"line":1461},[1451,2111,2112],{"class":1464},"pip",[1451,2114,1502],{"class":1472},[1451,2116,2117],{"class":1472}," vllm\n",[1451,2119,2120],{"class":1453,"line":1483},[1451,2121,1487],{"emptyLinePlaceholder":1486},[1451,2123,2124],{"class":1453,"line":1490},[1451,2125,2126],{"class":1457},"# 启动 API 服务\n",[1451,2128,2129,2131,2134,2137],{"class":1453,"line":1496},[1451,2130,1749],{"class":1464},[1451,2132,2133],{"class":1468}," -m",[1451,2135,2136],{"class":1472}," vllm.entrypoints.openai.api_server",[1451,2138,2139],{"class":1468}," \\\n",[1451,2141,2142,2145,2148],{"class":1453,"line":1508},[1451,2143,2144],{"class":1468},"    --model",[1451,2146,2147],{"class":1472}," Qwen\u002FQwen2.5-7B-Instruct",[1451,2149,2139],{"class":1468},[1451,2151,2152,2155,2158],{"class":1453,"line":1513},[1451,2153,2154],{"class":1468},"    --host",[1451,2156,2157],{"class":1468}," 0.0.0.0",[1451,2159,2139],{"class":1468},[1451,2161,2162,2165,2168],{"class":1453,"line":1519},[1451,2163,2164],{"class":1468},"    --port",[1451,2166,2167],{"class":1468}," 8000",[1451,2169,2139],{"class":1468},[1451,2171,2172,2175,2178],{"class":1453,"line":1587},[1451,2173,2174],{"class":1468},"    --max-model-len",[1451,2176,2177],{"class":1468}," 8192",[1451,2179,2139],{"class":1468},[1451,2181,2182,2185],{"class":1453,"line":1593},[1451,2183,2184],{"class":1468},"    --gpu-memory-utilization",[1451,2186,2187],{"class":1468}," 0.9\n",[942,2189,2190],{"id":2190},"常用参数",[949,2192,2193,2205],{},[952,2194,2195],{},[955,2196,2197,2200,2202],{},[958,2198,2199],{},"参数",[958,2201,1640],{},[958,2203,2204],{},"默认值",[974,2206,2207,2220,2233,2246,2259,2272],{},[955,2208,2209,2214,2217],{},[979,2210,2211],{},[1448,2212,2213],{},"--model",[979,2215,2216],{},"模型路径或 Hugging Face ID",[979,2218,2219],{},"必填",[955,2221,2222,2227,2230],{},[979,2223,2224],{},[1448,2225,2226],{},"--tensor-parallel-size",[979,2228,2229],{},"GPU 并行数",[979,2231,2232],{},"1",[955,2234,2235,2240,2243],{},[979,2236,2237],{},[1448,2238,2239],{},"--max-model-len",[979,2241,2242],{},"最大上下文长度",[979,2244,2245],{},"模型默认值",[955,2247,2248,2253,2256],{},[979,2249,2250],{},[1448,2251,2252],{},"--gpu-memory-utilization",[979,2254,2255],{},"GPU 显存使用比例",[979,2257,2258],{},"0.9",[955,2260,2261,2266,2269],{},[979,2262,2263],{},[1448,2264,2265],{},"--quantization",[979,2267,2268],{},"量化方式（awq\u002Fgptq）",[979,2270,2271],{},"无",[955,2273,2274,2279,2282],{},[979,2275,2276],{},[1448,2277,2278],{},"--dtype",[979,2280,2281],{},"数据类型（float16\u002Fbfloat16）",[979,2283,2284],{},"auto",[942,2286,2287],{"id":2287},"调用示例",[1442,2289,2291],{"className":1747,"code":2290,"language":1749,"meta":11,"style":11},"from openai import OpenAI\n\nclient = OpenAI(\n    base_url=\"http:\u002F\u002Flocalhost:8000\u002Fv1\",\n    api_key=\"token-abc123\"  # vLLM 默认不校验\n)\n\nresponse = client.chat.completions.create(\n    model=\"Qwen\u002FQwen2.5-7B-Instruct\",\n    messages=[\n        {\"role\": \"system\", \"content\": \"你是一个有用的助手。\"},\n        {\"role\": \"user\", \"content\": \"解释 PagedAttention 的原理\"}\n    ],\n    temperature=0.7,\n    max_tokens=1024,\n)\n\nprint(response.choices[0].message.content)\n",[1448,2292,2293,2303,2307,2315,2326,2338,2342,2346,2354,2365,2373,2396,2417,2422,2434,2446,2451,2456],{"__ignoreMap":11},[1451,2294,2295,2297,2299,2301],{"class":1453,"line":1454},[1451,2296,1756],{"class":1476},[1451,2298,1760],{"class":1759},[1451,2300,1763],{"class":1476},[1451,2302,1766],{"class":1759},[1451,2304,2305],{"class":1453,"line":1461},[1451,2306,1487],{"emptyLinePlaceholder":1486},[1451,2308,2309,2311,2313],{"class":1453,"line":1483},[1451,2310,1775],{"class":1759},[1451,2312,1778],{"class":1476},[1451,2314,1781],{"class":1759},[1451,2316,2317,2319,2321,2324],{"class":1453,"line":1490},[1451,2318,1787],{"class":1786},[1451,2320,1778],{"class":1476},[1451,2322,2323],{"class":1472},"\"http:\u002F\u002Flocalhost:8000\u002Fv1\"",[1451,2325,1795],{"class":1759},[1451,2327,2328,2330,2332,2335],{"class":1453,"line":1496},[1451,2329,1800],{"class":1786},[1451,2331,1778],{"class":1476},[1451,2333,2334],{"class":1472},"\"token-abc123\"",[1451,2336,2337],{"class":1457},"  # vLLM 默认不校验\n",[1451,2339,2340],{"class":1453,"line":1508},[1451,2341,1813],{"class":1759},[1451,2343,2344],{"class":1453,"line":1513},[1451,2345,1487],{"emptyLinePlaceholder":1486},[1451,2347,2348,2350,2352],{"class":1453,"line":1519},[1451,2349,1822],{"class":1759},[1451,2351,1778],{"class":1476},[1451,2353,1827],{"class":1759},[1451,2355,2356,2358,2360,2363],{"class":1453,"line":1587},[1451,2357,1832],{"class":1786},[1451,2359,1778],{"class":1476},[1451,2361,2362],{"class":1472},"\"Qwen\u002FQwen2.5-7B-Instruct\"",[1451,2364,1795],{"class":1759},[1451,2366,2367,2369,2371],{"class":1453,"line":1593},[1451,2368,1844],{"class":1786},[1451,2370,1778],{"class":1476},[1451,2372,1849],{"class":1759},[1451,2374,2375,2377,2379,2381,2384,2386,2388,2390,2393],{"class":1453,"line":1601},[1451,2376,1854],{"class":1759},[1451,2378,1857],{"class":1472},[1451,2380,1860],{"class":1759},[1451,2382,2383],{"class":1472},"\"system\"",[1451,2385,1866],{"class":1759},[1451,2387,1869],{"class":1472},[1451,2389,1860],{"class":1759},[1451,2391,2392],{"class":1472},"\"你是一个有用的助手。\"",[1451,2394,2395],{"class":1759},"},\n",[1451,2397,2398,2400,2402,2404,2406,2408,2410,2412,2415],{"class":1453,"line":1606},[1451,2399,1854],{"class":1759},[1451,2401,1857],{"class":1472},[1451,2403,1860],{"class":1759},[1451,2405,1863],{"class":1472},[1451,2407,1866],{"class":1759},[1451,2409,1869],{"class":1472},[1451,2411,1860],{"class":1759},[1451,2413,2414],{"class":1472},"\"解释 PagedAttention 的原理\"",[1451,2416,1877],{"class":1759},[1451,2418,2419],{"class":1453,"line":1612},[1451,2420,2421],{"class":1759},"    ],\n",[1451,2423,2424,2427,2429,2432],{"class":1453,"line":1889},[1451,2425,2426],{"class":1786},"    temperature",[1451,2428,1778],{"class":1476},[1451,2430,2431],{"class":1468},"0.7",[1451,2433,1795],{"class":1759},[1451,2435,2436,2439,2441,2444],{"class":1453,"line":1894},[1451,2437,2438],{"class":1786},"    max_tokens",[1451,2440,1778],{"class":1476},[1451,2442,2443],{"class":1468},"1024",[1451,2445,1795],{"class":1759},[1451,2447,2449],{"class":1453,"line":2448},16,[1451,2450,1813],{"class":1759},[1451,2452,2454],{"class":1453,"line":2453},17,[1451,2455,1487],{"emptyLinePlaceholder":1486},[1451,2457,2459,2461,2463,2465],{"class":1453,"line":2458},18,[1451,2460,1897],{"class":1468},[1451,2462,1900],{"class":1759},[1451,2464,1903],{"class":1468},[1451,2466,1906],{"class":1759},[2468,2469,2470],"warning",{},"\nvLLM 目前仅支持 NVIDIA GPU（需要 CUDA）。如果使用 AMD GPU 或 Apple Silicon，请考虑 llama.cpp 或 Ollama。\n",[892,2472,2474],{"id":2473},"llamacpp","llama.cpp",[896,2476,2477],{},"llama.cpp 是一个纯 C\u002FC++ 实现的 LLM 推理框架，以其极高的兼容性和效率著称。",[942,2479,2480],{"id":2480},"特点",[900,2482,2483,2486,2489,2492],{},[903,2484,2485],{},"支持 CPU、CUDA、Metal（Apple GPU）、Vulkan 等多种后端",[903,2487,2488],{},"内存占用极低",[903,2490,2491],{},"支持多种量化格式（GGUF）",[903,2493,2494],{},"跨平台（Linux、macOS、Windows）",[942,2496,2097],{"id":2497},"安装与使用-1",[1442,2499,2501],{"className":1444,"code":2500,"language":1446,"meta":11,"style":11},"# 克隆并编译\ngit clone https:\u002F\u002Fgithub.com\u002Fggerganov\u002Fllama.cpp\ncd llama.cpp\n\n# macOS (Metal 加速)\ncmake -B build -DGGML_METAL=ON\ncmake --build build --config Release\n\n# Linux (CUDA 加速)\ncmake -B build -DGGML_CUDA=ON\ncmake --build build --config Release\n\n# 纯 CPU\ncmake -B build\ncmake --build build --config Release\n",[1448,2502,2503,2508,2519,2527,2531,2536,2550,2565,2569,2574,2585,2597,2601,2606,2615],{"__ignoreMap":11},[1451,2504,2505],{"class":1453,"line":1454},[1451,2506,2507],{"class":1457},"# 克隆并编译\n",[1451,2509,2510,2513,2516],{"class":1453,"line":1461},[1451,2511,2512],{"class":1464},"git",[1451,2514,2515],{"class":1472}," clone",[1451,2517,2518],{"class":1472}," https:\u002F\u002Fgithub.com\u002Fggerganov\u002Fllama.cpp\n",[1451,2520,2521,2524],{"class":1453,"line":1483},[1451,2522,2523],{"class":1468},"cd",[1451,2525,2526],{"class":1472}," llama.cpp\n",[1451,2528,2529],{"class":1453,"line":1490},[1451,2530,1487],{"emptyLinePlaceholder":1486},[1451,2532,2533],{"class":1453,"line":1496},[1451,2534,2535],{"class":1457},"# macOS (Metal 加速)\n",[1451,2537,2538,2541,2544,2547],{"class":1453,"line":1508},[1451,2539,2540],{"class":1464},"cmake",[1451,2542,2543],{"class":1468}," -B",[1451,2545,2546],{"class":1472}," build",[1451,2548,2549],{"class":1468}," -DGGML_METAL=ON\n",[1451,2551,2552,2554,2557,2559,2562],{"class":1453,"line":1513},[1451,2553,2540],{"class":1464},[1451,2555,2556],{"class":1468}," --build",[1451,2558,2546],{"class":1472},[1451,2560,2561],{"class":1468}," --config",[1451,2563,2564],{"class":1472}," Release\n",[1451,2566,2567],{"class":1453,"line":1519},[1451,2568,1487],{"emptyLinePlaceholder":1486},[1451,2570,2571],{"class":1453,"line":1587},[1451,2572,2573],{"class":1457},"# Linux (CUDA 加速)\n",[1451,2575,2576,2578,2580,2582],{"class":1453,"line":1593},[1451,2577,2540],{"class":1464},[1451,2579,2543],{"class":1468},[1451,2581,2546],{"class":1472},[1451,2583,2584],{"class":1468}," -DGGML_CUDA=ON\n",[1451,2586,2587,2589,2591,2593,2595],{"class":1453,"line":1601},[1451,2588,2540],{"class":1464},[1451,2590,2556],{"class":1468},[1451,2592,2546],{"class":1472},[1451,2594,2561],{"class":1468},[1451,2596,2564],{"class":1472},[1451,2598,2599],{"class":1453,"line":1606},[1451,2600,1487],{"emptyLinePlaceholder":1486},[1451,2602,2603],{"class":1453,"line":1612},[1451,2604,2605],{"class":1457},"# 纯 CPU\n",[1451,2607,2608,2610,2612],{"class":1453,"line":1889},[1451,2609,2540],{"class":1464},[1451,2611,2543],{"class":1468},[1451,2613,2614],{"class":1472}," build\n",[1451,2616,2617,2619,2621,2623,2625],{"class":1453,"line":1894},[1451,2618,2540],{"class":1464},[1451,2620,2556],{"class":1468},[1451,2622,2546],{"class":1472},[1451,2624,2561],{"class":1468},[1451,2626,2564],{"class":1472},[942,2628,2629],{"id":2629},"运行模型",[1442,2631,2633],{"className":1444,"code":2632,"language":1446,"meta":11,"style":11},"# 交互式对话\n.\u002Fbuild\u002Fbin\u002Fllama-cli \\\n    -m models\u002Fqwen2.5-7b-instruct-q4_k_m.gguf \\\n    -n 512 \\\n    -t 8 \\\n    --interactive-first\n\n# 启动 API 服务（兼容 OpenAI 格式）\n.\u002Fbuild\u002Fbin\u002Fllama-server \\\n    -m models\u002Fqwen2.5-7b-instruct-q4_k_m.gguf \\\n    --host 0.0.0.0 \\\n    --port 8080 \\\n    -t 8 \\\n    -ngl 99  # GPU 层数，99 表示尽可能多地放到 GPU\n",[1448,2634,2635,2640,2647,2657,2667,2677,2682,2686,2691,2698,2706,2714,2723,2731],{"__ignoreMap":11},[1451,2636,2637],{"class":1453,"line":1454},[1451,2638,2639],{"class":1457},"# 交互式对话\n",[1451,2641,2642,2645],{"class":1453,"line":1461},[1451,2643,2644],{"class":1464},".\u002Fbuild\u002Fbin\u002Fllama-cli",[1451,2646,2139],{"class":1468},[1451,2648,2649,2652,2655],{"class":1453,"line":1483},[1451,2650,2651],{"class":1468},"    -m",[1451,2653,2654],{"class":1472}," models\u002Fqwen2.5-7b-instruct-q4_k_m.gguf",[1451,2656,2139],{"class":1468},[1451,2658,2659,2662,2665],{"class":1453,"line":1490},[1451,2660,2661],{"class":1468},"    -n",[1451,2663,2664],{"class":1468}," 512",[1451,2666,2139],{"class":1468},[1451,2668,2669,2672,2675],{"class":1453,"line":1496},[1451,2670,2671],{"class":1468},"    -t",[1451,2673,2674],{"class":1468}," 8",[1451,2676,2139],{"class":1468},[1451,2678,2679],{"class":1453,"line":1508},[1451,2680,2681],{"class":1468},"    --interactive-first\n",[1451,2683,2684],{"class":1453,"line":1513},[1451,2685,1487],{"emptyLinePlaceholder":1486},[1451,2687,2688],{"class":1453,"line":1519},[1451,2689,2690],{"class":1457},"# 启动 API 服务（兼容 OpenAI 格式）\n",[1451,2692,2693,2696],{"class":1453,"line":1587},[1451,2694,2695],{"class":1464},".\u002Fbuild\u002Fbin\u002Fllama-server",[1451,2697,2139],{"class":1468},[1451,2699,2700,2702,2704],{"class":1453,"line":1593},[1451,2701,2651],{"class":1468},[1451,2703,2654],{"class":1472},[1451,2705,2139],{"class":1468},[1451,2707,2708,2710,2712],{"class":1453,"line":1601},[1451,2709,2154],{"class":1468},[1451,2711,2157],{"class":1468},[1451,2713,2139],{"class":1468},[1451,2715,2716,2718,2721],{"class":1453,"line":1606},[1451,2717,2164],{"class":1468},[1451,2719,2720],{"class":1468}," 8080",[1451,2722,2139],{"class":1468},[1451,2724,2725,2727,2729],{"class":1453,"line":1612},[1451,2726,2671],{"class":1468},[1451,2728,2674],{"class":1468},[1451,2730,2139],{"class":1468},[1451,2732,2733,2736,2739],{"class":1453,"line":1889},[1451,2734,2735],{"class":1468},"    -ngl",[1451,2737,2738],{"class":1468}," 99",[1451,2740,2741],{"class":1457},"  # GPU 层数，99 表示尽可能多地放到 GPU\n",[942,2743,2744],{"id":2744},"模型转换",[896,2746,2747],{},"如果你有 Hugging Face 格式的模型，可以转换为 GGUF：",[1442,2749,2751],{"className":1444,"code":2750,"language":1446,"meta":11,"style":11},"# 安装依赖\npip install -r requirements.txt\n\n# 转换为 GGUF（FP16）\npython convert_hf_to_gguf.py \u002Fpath\u002Fto\u002Fmodel --outfile model-f16.gguf\n\n# 量化\n.\u002Fbuild\u002Fbin\u002Fllama-quantize model-f16.gguf model-q4_k_m.gguf Q4_K_M\n",[1448,2752,2753,2758,2770,2774,2779,2795,2799,2804],{"__ignoreMap":11},[1451,2754,2755],{"class":1453,"line":1454},[1451,2756,2757],{"class":1457},"# 安装依赖\n",[1451,2759,2760,2762,2764,2767],{"class":1453,"line":1461},[1451,2761,2112],{"class":1464},[1451,2763,1502],{"class":1472},[1451,2765,2766],{"class":1468}," -r",[1451,2768,2769],{"class":1472}," requirements.txt\n",[1451,2771,2772],{"class":1453,"line":1483},[1451,2773,1487],{"emptyLinePlaceholder":1486},[1451,2775,2776],{"class":1453,"line":1490},[1451,2777,2778],{"class":1457},"# 转换为 GGUF（FP16）\n",[1451,2780,2781,2783,2786,2789,2792],{"class":1453,"line":1496},[1451,2782,1749],{"class":1464},[1451,2784,2785],{"class":1472}," convert_hf_to_gguf.py",[1451,2787,2788],{"class":1472}," \u002Fpath\u002Fto\u002Fmodel",[1451,2790,2791],{"class":1468}," --outfile",[1451,2793,2794],{"class":1472}," model-f16.gguf\n",[1451,2796,2797],{"class":1453,"line":1508},[1451,2798,1487],{"emptyLinePlaceholder":1486},[1451,2800,2801],{"class":1453,"line":1513},[1451,2802,2803],{"class":1457},"# 量化\n",[1451,2805,2806,2809,2812,2815],{"class":1453,"line":1519},[1451,2807,2808],{"class":1464},".\u002Fbuild\u002Fbin\u002Fllama-quantize",[1451,2810,2811],{"class":1472}," model-f16.gguf",[1451,2813,2814],{"class":1472}," model-q4_k_m.gguf",[1451,2816,2817],{"class":1472}," Q4_K_M\n",[892,2819,2820],{"id":2820},"模型下载源",[942,2822,2824],{"id":2823},"hugging-face","Hugging Face",[896,2826,2827],{},"全球最大的模型托管平台：",[1442,2829,2831],{"className":1444,"code":2830,"language":1446,"meta":11,"style":11},"# 使用 huggingface-cli\npip install huggingface_hub\nhuggingface-cli download Qwen\u002FQwen2.5-7B-Instruct --local-dir .\u002Fqwen2.5-7b\n\n# 下载 GGUF 文件\nhuggingface-cli download bartowski\u002FQwen2.5-7B-Instruct-GGUF \\\n    Qwen2.5-7B-Instruct-Q4_K_M.gguf \\\n    --local-dir .\u002Fmodels\n",[1448,2832,2833,2838,2847,2863,2867,2872,2883,2890],{"__ignoreMap":11},[1451,2834,2835],{"class":1453,"line":1454},[1451,2836,2837],{"class":1457},"# 使用 huggingface-cli\n",[1451,2839,2840,2842,2844],{"class":1453,"line":1461},[1451,2841,2112],{"class":1464},[1451,2843,1502],{"class":1472},[1451,2845,2846],{"class":1472}," huggingface_hub\n",[1451,2848,2849,2852,2855,2857,2860],{"class":1453,"line":1483},[1451,2850,2851],{"class":1464},"huggingface-cli",[1451,2853,2854],{"class":1472}," download",[1451,2856,2147],{"class":1472},[1451,2858,2859],{"class":1468}," --local-dir",[1451,2861,2862],{"class":1472}," .\u002Fqwen2.5-7b\n",[1451,2864,2865],{"class":1453,"line":1490},[1451,2866,1487],{"emptyLinePlaceholder":1486},[1451,2868,2869],{"class":1453,"line":1496},[1451,2870,2871],{"class":1457},"# 下载 GGUF 文件\n",[1451,2873,2874,2876,2878,2881],{"class":1453,"line":1508},[1451,2875,2851],{"class":1464},[1451,2877,2854],{"class":1472},[1451,2879,2880],{"class":1472}," bartowski\u002FQwen2.5-7B-Instruct-GGUF",[1451,2882,2139],{"class":1468},[1451,2884,2885,2888],{"class":1453,"line":1513},[1451,2886,2887],{"class":1472},"    Qwen2.5-7B-Instruct-Q4_K_M.gguf",[1451,2889,2139],{"class":1468},[1451,2891,2892,2895],{"class":1453,"line":1519},[1451,2893,2894],{"class":1468},"    --local-dir",[1451,2896,2897],{"class":1472}," .\u002Fmodels\n",[1046,2899,2900],{},"\nHugging Face 在国内访问速度可能较慢，可以使用镜像站 `https:\u002F\u002Fhf-mirror.com` 加速下载。设置环境变量 `HF_ENDPOINT=https:\u002F\u002Fhf-mirror.com` 即可。\n",[942,2902,2904],{"id":2903},"modelscope魔搭社区","ModelScope（魔搭社区）",[896,2906,2907],{},"阿里巴巴推出的国内模型托管平台，国内下载速度快：",[1442,2909,2911],{"className":1444,"code":2910,"language":1446,"meta":11,"style":11},"# 安装\npip install modelscope\n\n# 下载模型\nfrom modelscope import snapshot_download\nmodel_dir = snapshot_download('Qwen\u002FQwen2.5-7B-Instruct')\n",[1448,2912,2913,2918,2927,2931,2936,2949],{"__ignoreMap":11},[1451,2914,2915],{"class":1453,"line":1454},[1451,2916,2917],{"class":1457},"# 安装\n",[1451,2919,2920,2922,2924],{"class":1453,"line":1461},[1451,2921,2112],{"class":1464},[1451,2923,1502],{"class":1472},[1451,2925,2926],{"class":1472}," modelscope\n",[1451,2928,2929],{"class":1453,"line":1483},[1451,2930,1487],{"emptyLinePlaceholder":1486},[1451,2932,2933],{"class":1453,"line":1490},[1451,2934,2935],{"class":1457},"# 下载模型\n",[1451,2937,2938,2940,2943,2946],{"class":1453,"line":1496},[1451,2939,1756],{"class":1464},[1451,2941,2942],{"class":1472}," modelscope",[1451,2944,2945],{"class":1472}," import",[1451,2947,2948],{"class":1472}," snapshot_download\n",[1451,2950,2951,2954,2957,2960,2963,2966],{"class":1453,"line":1508},[1451,2952,2953],{"class":1464},"model_dir",[1451,2955,2956],{"class":1472}," =",[1451,2958,2959],{"class":1472}," snapshot_download",[1451,2961,2962],{"class":1759},"(",[1451,2964,2965],{"class":1464},"'Qwen\u002FQwen2.5-7B-Instruct'",[1451,2967,1813],{"class":1759},[1442,2969,2971],{"className":1444,"code":2970,"language":1446,"meta":11,"style":11},"# 命令行下载\nmodelscope download --model Qwen\u002FQwen2.5-7B-Instruct --local_dir .\u002Fqwen2.5-7b\n",[1448,2972,2973,2978],{"__ignoreMap":11},[1451,2974,2975],{"class":1453,"line":1454},[1451,2976,2977],{"class":1457},"# 命令行下载\n",[1451,2979,2980,2983,2985,2988,2990,2993],{"class":1453,"line":1461},[1451,2981,2982],{"class":1464},"modelscope",[1451,2984,2854],{"class":1472},[1451,2986,2987],{"class":1468}," --model",[1451,2989,2147],{"class":1472},[1451,2991,2992],{"class":1468}," --local_dir",[1451,2994,2862],{"class":1472},[942,2996,2997],{"id":2997},"其他下载源",[949,2999,3000,3012],{},[952,3001,3002],{},[955,3003,3004,3007,3010],{},[958,3005,3006],{},"平台",[958,3008,3009],{},"地址",[958,3011,2480],{},[974,3013,3014,3024,3035,3046],{},[955,3015,3016,3018,3021],{},[979,3017,2824],{},[979,3019,3020],{},"huggingface.co",[979,3022,3023],{},"全球最全，模型最多",[955,3025,3026,3029,3032],{},[979,3027,3028],{},"ModelScope",[979,3030,3031],{},"modelscope.cn",[979,3033,3034],{},"国内速度快",[955,3036,3037,3040,3043],{},[979,3038,3039],{},"HF Mirror",[979,3041,3042],{},"hf-mirror.com",[979,3044,3045],{},"Hugging Face 镜像",[955,3047,3048,3051,3054],{},[979,3049,3050],{},"Ollama Library",[979,3052,3053],{},"ollama.com\u002Flibrary",[979,3055,3056],{},"Ollama 专用，一键下载",[892,3058,3060],{"id":3059},"api-服务部署","API 服务部署",[896,3062,3063],{},"无论使用哪种推理引擎，最终都需要将模型封装为 API 服务。以下是一个使用 FastAPI 封装 Ollama 的示例：",[1442,3065,3067],{"className":1747,"code":3066,"language":1749,"meta":11,"style":11},"from fastapi import FastAPI\nfrom pydantic import BaseModel\nimport httpx\n\napp = FastAPI()\n\nclass ChatRequest(BaseModel):\n    message: str\n    model: str = \"qwen2.5:7b\"\n    temperature: float = 0.7\n\n@app.post(\"\u002Fchat\")\nasync def chat(request: ChatRequest):\n    async with httpx.AsyncClient() as client:\n        response = await client.post(\n            \"http:\u002F\u002Flocalhost:11434\u002Fapi\u002Fchat\",\n            json={\n                \"model\": request.model,\n                \"messages\": [{\"role\": \"user\", \"content\": request.message}],\n                \"stream\": False,\n                \"options\": {\"temperature\": request.temperature}\n            },\n            timeout=120.0\n        )\n    return response.json()\n",[1448,3068,3069,3081,3093,3100,3104,3114,3118,3134,3142,3155,3168,3172,3184,3198,3215,3228,3235,3245,3253,3275,3288,3303,3309,3320,3326],{"__ignoreMap":11},[1451,3070,3071,3073,3076,3078],{"class":1453,"line":1454},[1451,3072,1756],{"class":1476},[1451,3074,3075],{"class":1759}," fastapi ",[1451,3077,1763],{"class":1476},[1451,3079,3080],{"class":1759}," FastAPI\n",[1451,3082,3083,3085,3088,3090],{"class":1453,"line":1461},[1451,3084,1756],{"class":1476},[1451,3086,3087],{"class":1759}," pydantic ",[1451,3089,1763],{"class":1476},[1451,3091,3092],{"class":1759}," BaseModel\n",[1451,3094,3095,3097],{"class":1453,"line":1483},[1451,3096,1763],{"class":1476},[1451,3098,3099],{"class":1759}," httpx\n",[1451,3101,3102],{"class":1453,"line":1490},[1451,3103,1487],{"emptyLinePlaceholder":1486},[1451,3105,3106,3109,3111],{"class":1453,"line":1496},[1451,3107,3108],{"class":1759},"app ",[1451,3110,1778],{"class":1476},[1451,3112,3113],{"class":1759}," FastAPI()\n",[1451,3115,3116],{"class":1453,"line":1508},[1451,3117,1487],{"emptyLinePlaceholder":1486},[1451,3119,3120,3123,3126,3128,3131],{"class":1453,"line":1513},[1451,3121,3122],{"class":1476},"class",[1451,3124,3125],{"class":1464}," ChatRequest",[1451,3127,2962],{"class":1759},[1451,3129,3130],{"class":1464},"BaseModel",[1451,3132,3133],{"class":1759},"):\n",[1451,3135,3136,3139],{"class":1453,"line":1519},[1451,3137,3138],{"class":1759},"    message: ",[1451,3140,3141],{"class":1468},"str\n",[1451,3143,3144,3147,3150,3152],{"class":1453,"line":1587},[1451,3145,3146],{"class":1759},"    model: ",[1451,3148,3149],{"class":1468},"str",[1451,3151,2956],{"class":1476},[1451,3153,3154],{"class":1472}," \"qwen2.5:7b\"\n",[1451,3156,3157,3160,3163,3165],{"class":1453,"line":1593},[1451,3158,3159],{"class":1759},"    temperature: ",[1451,3161,3162],{"class":1468},"float",[1451,3164,2956],{"class":1476},[1451,3166,3167],{"class":1468}," 0.7\n",[1451,3169,3170],{"class":1453,"line":1601},[1451,3171,1487],{"emptyLinePlaceholder":1486},[1451,3173,3174,3177,3179,3182],{"class":1453,"line":1606},[1451,3175,3176],{"class":1464},"@app.post",[1451,3178,2962],{"class":1759},[1451,3180,3181],{"class":1472},"\"\u002Fchat\"",[1451,3183,1813],{"class":1759},[1451,3185,3186,3189,3192,3195],{"class":1453,"line":1612},[1451,3187,3188],{"class":1476},"async",[1451,3190,3191],{"class":1476}," def",[1451,3193,3194],{"class":1464}," chat",[1451,3196,3197],{"class":1759},"(request: ChatRequest):\n",[1451,3199,3200,3203,3206,3209,3212],{"class":1453,"line":1889},[1451,3201,3202],{"class":1476},"    async",[1451,3204,3205],{"class":1476}," with",[1451,3207,3208],{"class":1759}," httpx.AsyncClient() ",[1451,3210,3211],{"class":1476},"as",[1451,3213,3214],{"class":1759}," client:\n",[1451,3216,3217,3220,3222,3225],{"class":1453,"line":1894},[1451,3218,3219],{"class":1759},"        response ",[1451,3221,1778],{"class":1476},[1451,3223,3224],{"class":1476}," await",[1451,3226,3227],{"class":1759}," client.post(\n",[1451,3229,3230,3233],{"class":1453,"line":2448},[1451,3231,3232],{"class":1472},"            \"http:\u002F\u002Flocalhost:11434\u002Fapi\u002Fchat\"",[1451,3234,1795],{"class":1759},[1451,3236,3237,3240,3242],{"class":1453,"line":2453},[1451,3238,3239],{"class":1786},"            json",[1451,3241,1778],{"class":1476},[1451,3243,3244],{"class":1759},"{\n",[1451,3246,3247,3250],{"class":1453,"line":2458},[1451,3248,3249],{"class":1472},"                \"model\"",[1451,3251,3252],{"class":1759},": request.model,\n",[1451,3254,3256,3259,3262,3264,3266,3268,3270,3272],{"class":1453,"line":3255},19,[1451,3257,3258],{"class":1472},"                \"messages\"",[1451,3260,3261],{"class":1759},": [{",[1451,3263,1857],{"class":1472},[1451,3265,1860],{"class":1759},[1451,3267,1863],{"class":1472},[1451,3269,1866],{"class":1759},[1451,3271,1869],{"class":1472},[1451,3273,3274],{"class":1759},": request.message}],\n",[1451,3276,3278,3281,3283,3286],{"class":1453,"line":3277},20,[1451,3279,3280],{"class":1472},"                \"stream\"",[1451,3282,1860],{"class":1759},[1451,3284,3285],{"class":1468},"False",[1451,3287,1795],{"class":1759},[1451,3289,3291,3294,3297,3300],{"class":1453,"line":3290},21,[1451,3292,3293],{"class":1472},"                \"options\"",[1451,3295,3296],{"class":1759},": {",[1451,3298,3299],{"class":1472},"\"temperature\"",[1451,3301,3302],{"class":1759},": request.temperature}\n",[1451,3304,3306],{"class":1453,"line":3305},22,[1451,3307,3308],{"class":1759},"            },\n",[1451,3310,3312,3315,3317],{"class":1453,"line":3311},23,[1451,3313,3314],{"class":1786},"            timeout",[1451,3316,1778],{"class":1476},[1451,3318,3319],{"class":1468},"120.0\n",[1451,3321,3323],{"class":1453,"line":3322},24,[1451,3324,3325],{"class":1759},"        )\n",[1451,3327,3329,3332],{"class":1453,"line":3328},25,[1451,3330,3331],{"class":1476},"    return",[1451,3333,3334],{"class":1759}," response.json()\n",[942,3336,3338],{"id":3337},"docker-部署","Docker 部署",[896,3340,3341],{},"使用 Docker 可以更方便地管理部署环境：",[1442,3343,3347],{"className":3344,"code":3345,"language":3346,"meta":11,"style":11},"language-yaml shiki shiki-themes github-light github-light github-dark","# docker-compose.yml\nversion: '3.8'\nservices:\n  ollama:\n    image: ollama\u002Follama\n    ports:\n      - \"11434:11434\"\n    volumes:\n      - ollama_data:\u002Froot\u002F.ollama\n    deploy:\n      resources:\n        reservations:\n          devices:\n            - driver: nvidia\n              count: all\n              capabilities: [gpu]\n\n  # 可选：Open WebUI（提供聊天界面）\n  open-webui:\n    image: ghcr.io\u002Fopen-webui\u002Fopen-webui:main\n    ports:\n      - \"3000:8080\"\n    environment:\n      - OLLAMA_BASE_URL=http:\u002F\u002Follama:11434\n    depends_on:\n      - ollama\n\nvolumes:\n  ollama_data:\n","yaml",[1448,3348,3349,3354,3365,3373,3380,3390,3397,3405,3412,3419,3426,3433,3440,3447,3460,3470,3484,3488,3493,3500,3509,3515,3522,3529,3536,3543,3551,3556,3564],{"__ignoreMap":11},[1451,3350,3351],{"class":1453,"line":1454},[1451,3352,3353],{"class":1457},"# docker-compose.yml\n",[1451,3355,3356,3360,3362],{"class":1453,"line":1461},[1451,3357,3359],{"class":3358},"sovSZ","version",[1451,3361,1860],{"class":1759},[1451,3363,3364],{"class":1472},"'3.8'\n",[1451,3366,3367,3370],{"class":1453,"line":1483},[1451,3368,3369],{"class":3358},"services",[1451,3371,3372],{"class":1759},":\n",[1451,3374,3375,3378],{"class":1453,"line":1490},[1451,3376,3377],{"class":3358},"  ollama",[1451,3379,3372],{"class":1759},[1451,3381,3382,3385,3387],{"class":1453,"line":1496},[1451,3383,3384],{"class":3358},"    image",[1451,3386,1860],{"class":1759},[1451,3388,3389],{"class":1472},"ollama\u002Follama\n",[1451,3391,3392,3395],{"class":1453,"line":1508},[1451,3393,3394],{"class":3358},"    ports",[1451,3396,3372],{"class":1759},[1451,3398,3399,3402],{"class":1453,"line":1513},[1451,3400,3401],{"class":1759},"      - ",[1451,3403,3404],{"class":1472},"\"11434:11434\"\n",[1451,3406,3407,3410],{"class":1453,"line":1519},[1451,3408,3409],{"class":3358},"    volumes",[1451,3411,3372],{"class":1759},[1451,3413,3414,3416],{"class":1453,"line":1587},[1451,3415,3401],{"class":1759},[1451,3417,3418],{"class":1472},"ollama_data:\u002Froot\u002F.ollama\n",[1451,3420,3421,3424],{"class":1453,"line":1593},[1451,3422,3423],{"class":3358},"    deploy",[1451,3425,3372],{"class":1759},[1451,3427,3428,3431],{"class":1453,"line":1601},[1451,3429,3430],{"class":3358},"      resources",[1451,3432,3372],{"class":1759},[1451,3434,3435,3438],{"class":1453,"line":1606},[1451,3436,3437],{"class":3358},"        reservations",[1451,3439,3372],{"class":1759},[1451,3441,3442,3445],{"class":1453,"line":1612},[1451,3443,3444],{"class":3358},"          devices",[1451,3446,3372],{"class":1759},[1451,3448,3449,3452,3455,3457],{"class":1453,"line":1889},[1451,3450,3451],{"class":1759},"            - ",[1451,3453,3454],{"class":3358},"driver",[1451,3456,1860],{"class":1759},[1451,3458,3459],{"class":1472},"nvidia\n",[1451,3461,3462,3465,3467],{"class":1453,"line":1894},[1451,3463,3464],{"class":3358},"              count",[1451,3466,1860],{"class":1759},[1451,3468,3469],{"class":1472},"all\n",[1451,3471,3472,3475,3478,3481],{"class":1453,"line":2448},[1451,3473,3474],{"class":3358},"              capabilities",[1451,3476,3477],{"class":1759},": [",[1451,3479,3480],{"class":1472},"gpu",[1451,3482,3483],{"class":1759},"]\n",[1451,3485,3486],{"class":1453,"line":2453},[1451,3487,1487],{"emptyLinePlaceholder":1486},[1451,3489,3490],{"class":1453,"line":2458},[1451,3491,3492],{"class":1457},"  # 可选：Open WebUI（提供聊天界面）\n",[1451,3494,3495,3498],{"class":1453,"line":3255},[1451,3496,3497],{"class":3358},"  open-webui",[1451,3499,3372],{"class":1759},[1451,3501,3502,3504,3506],{"class":1453,"line":3277},[1451,3503,3384],{"class":3358},[1451,3505,1860],{"class":1759},[1451,3507,3508],{"class":1472},"ghcr.io\u002Fopen-webui\u002Fopen-webui:main\n",[1451,3510,3511,3513],{"class":1453,"line":3290},[1451,3512,3394],{"class":3358},[1451,3514,3372],{"class":1759},[1451,3516,3517,3519],{"class":1453,"line":3305},[1451,3518,3401],{"class":1759},[1451,3520,3521],{"class":1472},"\"3000:8080\"\n",[1451,3523,3524,3527],{"class":1453,"line":3311},[1451,3525,3526],{"class":3358},"    environment",[1451,3528,3372],{"class":1759},[1451,3530,3531,3533],{"class":1453,"line":3322},[1451,3532,3401],{"class":1759},[1451,3534,3535],{"class":1472},"OLLAMA_BASE_URL=http:\u002F\u002Follama:11434\n",[1451,3537,3538,3541],{"class":1453,"line":3328},[1451,3539,3540],{"class":3358},"    depends_on",[1451,3542,3372],{"class":1759},[1451,3544,3546,3548],{"class":1453,"line":3545},26,[1451,3547,3401],{"class":1759},[1451,3549,3550],{"class":1472},"ollama\n",[1451,3552,3554],{"class":1453,"line":3553},27,[1451,3555,1487],{"emptyLinePlaceholder":1486},[1451,3557,3559,3562],{"class":1453,"line":3558},28,[1451,3560,3561],{"class":3358},"volumes",[1451,3563,3372],{"class":1759},[1451,3565,3567,3570],{"class":1453,"line":3566},29,[1451,3568,3569],{"class":3358},"  ollama_data",[1451,3571,3372],{"class":1759},[1442,3573,3575],{"className":1444,"code":3574,"language":1446,"meta":11,"style":11},"docker compose up -d\n\n# 下载模型\ndocker exec -it ollama ollama pull qwen2.5:7b\n",[1448,3576,3577,3591,3595,3599],{"__ignoreMap":11},[1451,3578,3579,3582,3585,3588],{"class":1453,"line":1454},[1451,3580,3581],{"class":1464},"docker",[1451,3583,3584],{"class":1472}," compose",[1451,3586,3587],{"class":1472}," up",[1451,3589,3590],{"class":1468}," -d\n",[1451,3592,3593],{"class":1453,"line":1461},[1451,3594,1487],{"emptyLinePlaceholder":1486},[1451,3596,3597],{"class":1453,"line":1483},[1451,3598,2935],{"class":1457},[1451,3600,3601,3603,3606,3609,3612,3614,3617],{"class":1453,"line":1490},[1451,3602,3581],{"class":1464},[1451,3604,3605],{"class":1472}," exec",[1451,3607,3608],{"class":1468}," -it",[1451,3610,3611],{"class":1472}," ollama",[1451,3613,3611],{"class":1472},[1451,3615,3616],{"class":1472}," pull",[1451,3618,1571],{"class":1472},[892,3620,3621],{"id":3621},"部署方案对比",[949,3623,3624,3639],{},[952,3625,3626],{},[955,3627,3628,3631,3634,3637],{},[958,3629,3630],{},"特性",[958,3632,3633],{},"Ollama",[958,3635,3636],{},"vLLM",[958,3638,2474],{},[974,3640,3641,3654,3667,3681,3695,3709,3722,3735],{},[955,3642,3643,3646,3649,3652],{},[979,3644,3645],{},"上手难度",[979,3647,3648],{},"极低",[979,3650,3651],{},"中等",[979,3653,3651],{},[955,3655,3656,3659,3661,3664],{},[979,3657,3658],{},"推理性能",[979,3660,3651],{},[979,3662,3663],{},"最高",[979,3665,3666],{},"较高",[955,3668,3669,3672,3675,3678],{},[979,3670,3671],{},"GPU 支持",[979,3673,3674],{},"CUDA\u002FMetal",[979,3676,3677],{},"CUDA",[979,3679,3680],{},"CUDA\u002FMetal\u002FVulkan",[955,3682,3683,3686,3689,3692],{},[979,3684,3685],{},"CPU 推理",[979,3687,3688],{},"支持",[979,3690,3691],{},"不支持",[979,3693,3694],{},"最佳",[955,3696,3697,3700,3703,3706],{},[979,3698,3699],{},"量化支持",[979,3701,3702],{},"GGUF",[979,3704,3705],{},"AWQ\u002FGPTQ",[979,3707,3708],{},"GGUF（最全）",[955,3710,3711,3714,3717,3720],{},[979,3712,3713],{},"并发能力",[979,3715,3716],{},"一般",[979,3718,3719],{},"最强",[979,3721,3716],{},[955,3723,3724,3726,3729,3732],{},[979,3725,1233],{},[979,3727,3728],{},"个人\u002F开发",[979,3730,3731],{},"生产环境",[979,3733,3734],{},"资源受限\u002F跨平台",[955,3736,3737,3740,3743,3745],{},[979,3738,3739],{},"API 兼容",[979,3741,3742],{},"OpenAI",[979,3744,3742],{},[979,3746,3742],{},[892,3748,3749],{"id":3749},"小结",[896,3751,3752],{},"本地部署大模型已经变得越来越简单。对于个人开发者和学习用途，Ollama 是最推荐的入门方案；对于生产环境的高并发需求，vLLM 是最佳选择；对于需要在 CPU 或多种硬件上运行的场景，llama.cpp 是最灵活的方案。",[896,3754,3755],{},"选择合适的量化级别（推荐 Q4_K_M）和硬件配置，即使在消费级设备上也能获得不错的使用体验。",[3757,3758,3759],"style",{},"html pre.shiki code .sCsY4, html code.shiki .sCsY4{--shiki-light:#6A737D;--shiki-default:#6A737D;--shiki-dark:#6A737D}html pre.shiki code .snPdu, html code.shiki .snPdu{--shiki-light:#6F42C1;--shiki-default:#6F42C1;--shiki-dark:#B392F0}html pre.shiki code .sBjJW, html code.shiki .sBjJW{--shiki-light:#005CC5;--shiki-default:#005CC5;--shiki-dark:#79B8FF}html pre.shiki code .sIIMD, html code.shiki .sIIMD{--shiki-light:#032F62;--shiki-default:#032F62;--shiki-dark:#9ECBFF}html pre.shiki code .s8jYJ, html code.shiki .s8jYJ{--shiki-light:#D73A49;--shiki-default:#D73A49;--shiki-dark:#F97583}html .light .shiki span {color: var(--shiki-light);background: var(--shiki-light-bg);font-style: var(--shiki-light-font-style);font-weight: var(--shiki-light-font-weight);text-decoration: var(--shiki-light-text-decoration);}html.light .shiki span {color: var(--shiki-light);background: var(--shiki-light-bg);font-style: var(--shiki-light-font-style);font-weight: var(--shiki-light-font-weight);text-decoration: var(--shiki-light-text-decoration);}html .default .shiki span {color: var(--shiki-default);background: var(--shiki-default-bg);font-style: var(--shiki-default-font-style);font-weight: var(--shiki-default-font-weight);text-decoration: var(--shiki-default-text-decoration);}html .shiki span {color: var(--shiki-default);background: var(--shiki-default-bg);font-style: var(--shiki-default-font-style);font-weight: var(--shiki-default-font-weight);text-decoration: var(--shiki-default-text-decoration);}html .dark .shiki span {color: var(--shiki-dark);background: var(--shiki-dark-bg);font-style: var(--shiki-dark-font-style);font-weight: var(--shiki-dark-font-weight);text-decoration: var(--shiki-dark-text-decoration);}html.dark .shiki span {color: var(--shiki-dark);background: var(--shiki-dark-bg);font-style: var(--shiki-dark-font-style);font-weight: var(--shiki-dark-font-weight);text-decoration: var(--shiki-dark-text-decoration);}html pre.shiki code .sxrX7, html code.shiki .sxrX7{--shiki-light:#24292E;--shiki-default:#24292E;--shiki-dark:#E1E4E8}html pre.shiki code .sP4rz, html code.shiki .sP4rz{--shiki-light:#E36209;--shiki-default:#E36209;--shiki-dark:#FFAB70}html pre.shiki code .sovSZ, html code.shiki .sovSZ{--shiki-light:#22863A;--shiki-default:#22863A;--shiki-dark:#85E89D}",{"title":11,"searchDepth":1461,"depth":1461,"links":3761},[3762,3763,3768,3772,3779,3785,3791,3796,3799,3800],{"id":894,"depth":1461,"text":894},{"id":940,"depth":1461,"text":940,"children":3764},[3765,3766,3767],{"id":944,"depth":1483,"text":944},{"id":1051,"depth":1483,"text":1051},{"id":1144,"depth":1483,"text":1145},{"id":1209,"depth":1461,"text":1209,"children":3769},[3770,3771],{"id":1215,"depth":1483,"text":1215},{"id":1316,"depth":1483,"text":1317},{"id":1433,"depth":1461,"text":1434,"children":3773},[3774,3775,3776,3777,3778],{"id":1440,"depth":1483,"text":1440},{"id":1525,"depth":1483,"text":1525},{"id":1622,"depth":1483,"text":1622},{"id":1740,"depth":1483,"text":1741},{"id":1950,"depth":1483,"text":1951},{"id":2061,"depth":1461,"text":2062,"children":3780},[3781,3782,3783,3784],{"id":2068,"depth":1483,"text":2068},{"id":2097,"depth":1483,"text":2097},{"id":2190,"depth":1483,"text":2190},{"id":2287,"depth":1483,"text":2287},{"id":2473,"depth":1461,"text":2474,"children":3786},[3787,3788,3789,3790],{"id":2480,"depth":1483,"text":2480},{"id":2497,"depth":1483,"text":2097},{"id":2629,"depth":1483,"text":2629},{"id":2744,"depth":1483,"text":2744},{"id":2820,"depth":1461,"text":2820,"children":3792},[3793,3794,3795],{"id":2823,"depth":1483,"text":2824},{"id":2903,"depth":1483,"text":2904},{"id":2997,"depth":1483,"text":2997},{"id":3059,"depth":1461,"text":3060,"children":3797},[3798],{"id":3337,"depth":1483,"text":3338},{"id":3621,"depth":1461,"text":3621},{"id":3749,"depth":1461,"text":3749},"md",{},{"title":66,"description":67},"ai\u002Fllm\u002Flocal-deploy","TiNXtPVcil6akmIwEfAD1Rqq8gf-dWNdPlg99ZR86vo",1775474634857]