更新于 2025年9月23日
10 分钟
:LlamaIndex 是一个框架,可帮助您通过索引、检索和编排工具将数据连接到大型语言模型——非常适合 RAG、代理和结构化输出。
# 1) 安装# pip install llama-index llama-index-embeddings-openai llama-index-llms-openaiimport osfrom llama_index.core import VectorStoreIndex, SimpleDirectoryReaderfrom llama_index.embeddings.openai import OpenAIEmbeddingfrom llama_index.llms.openai import OpenAI# 2) 配置您的模型 + 嵌入os.environ["OPENAI_API_KEY"] = "YOUR_KEY" # or use any supported LLM/embedding providerllm = OpenAI(model="gpt-4o-mini")embed_model = OpenAIEmbedding(model="text-embedding-3-small")# 3) 加载文档 (例如,./data/*.pdf, .md, .txt)docs = SimpleDirectoryReader("./data").load_data# 4) 构建索引index = VectorStoreIndex.from_documents(docs, embed_model=embed_model)# 5) 创建查询引擎并提出问题query_engine = index.as_query_engine(llm=llm)response = query_engine.query("What are the key security practices mentioned in the docs?")print(response)SimpleDirectoryReader,PDF/HTML/Markdown 读取器BeautifulSoupWebReader,站点地图读取器from llama_index.core import SimpleDirectoryReaderfrom llama_index.readers.web import SimpleWebPageReaderfile_docs = SimpleDirectoryReader("./policies").load_dataweb_docs = SimpleWebPageReader(html_to_text=True).load_dataall_docs = file_docs + web_docsfrom llama_index.core.node_parser import SentenceSplitterfrom llama_index.core import Documentparser = SentenceSplitter(chunk_size=800, chunk_overlap=100)nodes = []for d in all_docs:nodes.extend(parser.get_nodes_from_documents([Document(text=d.text, metadata=d.metadata)]))from llama_index.core import VectorStoreIndex, SummaryIndexfrom llama_index.core.retrievers import BM25Retrieverfrom llama_index.core.query_engine import RetrieverQueryEngine# Vector index from pre-parsed nodesv_index = VectorStoreIndex(nodes)# BM25 keyword retrieverbm25_retriever = BM25Retriever.from_defaults(nodes=nodes, similarity_top_k=6)# Hybrid: merge candidates, then rerankfrom llama_index.core.retrievers import RouterRetrieverfrom llama_index.retrievers.merge import MergerRetrieverv_retriever = v_index.as_retriever(similarity_top_k=6)hybrid = MergerRetriever(retrievers=[v_retriever, bm25_retriever], top_k=8)query_engine = RetrieverQueryEngine.from_args(retriever=hybrid)source == 'handbook',created_at > 2024-01-01)from llama_index.postprocessor.flag_embedding_reranker import FlagEmbeddingRerankerfrom llama_index.core.query_engine import RetrieverQueryEnginereranker = FlagEmbeddingReranker(top_n=5, model="BAAI/bge-reranker-base")query_engine = v_index.as_query_engine(similarity_top_k=12,node_postprocessors=[reranker])from llama_index.core.response_synthesizers import get_response_synthesizerfrom llama_index.core import ServiceContextsynth = get_response_synthesizer(response_mode="tree_summarize")query_engine = v_index.as_query_engine(response_synthesizer=synth)ans = query_engine.query("Summarize the onboarding steps and cite sources.")print(ans)from llama_index.core.prompts import PromptTemplateqa_tmpl = PromptTemplate("""You are a terse, evidence-first assistant. Use only the provided context.If unsure, say you don't know. Return JSON with keys: answer, sources.Question: {query_str}Context: {context_str}""")query_engine = v_index.as_query_engine(text_qa_template=qa_tmpl)from llama_index.core.agent import ReActAgentfrom llama_index.tools.sql import SQLQueryEngineToolfrom sqlalchemy import create_engineengine = create_engine("sqlite:///analytics.db")sql_tool = SQLQueryEngineTool.from_engine(engine)agent = ReActAgent.from_tools([sql_tool], llm=llm, verbose=True)agent.chat("What was monthly churn in Q2 2025? If needed, query the DB.")from llama_index.core.evaluation import FaithfulnessEvaluator, RelevancyEvaluatorfaith = FaithfulnessEvaluator(llm=llm)rel = RelevancyEvaluator(llm=llm)pred = query_engine.query("List SOC 2 control families in our policy.")print("faithful?", faith.evaluate_response(pred))print("relevant?", rel.evaluate_response(pred))# 示例:Chroma# pip install chromadb llama-index-vector-stores-chromafrom llama_index.vector_stores.chroma import ChromaVectorStorefrom llama_index.core import StorageContextimport chromadbchroma_client = chromadb.PersistentClient(path="./chroma_store")collection = chroma_client.get_or_create_collection("company_knowledge")vector_store = ChromaVectorStore(chroma_collection=collection)storage_context = StorageContext.from_defaults(vector_store=vector_store)index = VectorStoreIndex.from_documents(all_docs, storage_context=storage_context)# 示例:查询时基于元数据的过滤retriever = index.as_retriever(similarity_top_k=8)retriever.metadata_filters = {"department": ["legal", "security"], "published": [True]}/query 端点;使索引在内存中保持温暖。# 最小 FastAPI 包装器# pip install fastapi uvicornfrom fastapi import FastAPIapp = FastAPIqe = index.as_query_engine(llm=llm)@app.post("/query")async def query(payload: dict):q = payload.get("q", "")resp = qe.query(q)return {"answer": str(resp), "sources": [s.node.metadata for s in resp.source_nodes]}