Continue 源码分析 - RerankerRetrievalPipeline
类别: Continue AICodingAssistant 标签: Continue FTS SQLite BM25 LanceDB GitHubCopilot目录
RerankerRetrievalPipeline
源代码:core/context/retrieval/pipelines/RerankerRetrievalPipeline.ts
export default class RerankerRetrievalPipeline extends BaseRetrievalPipeline {
private async _retrieveInitial(): Promise<Chunk[]> {
const { input, nRetrieve } = this.options;
const retrievalResults: Chunk[] = [];
const ftsChunks = await this.retrieveFts(input, nRetrieve);
const embeddingsChunks = await this.retrieveEmbeddings(input, nRetrieve);
const recentlyEditedFilesChunks =
await this.retrieveAndChunkRecentlyEditedFiles(nRetrieve);
retrievalResults.push(
...recentlyEditedFilesChunks,
...ftsChunks,
...embeddingsChunks,
);
const deduplicatedRetrievalResults: Chunk[] =
deduplicateChunks(retrievalResults);
return deduplicatedRetrievalResults;
}
}
- recentlyEditedFilesChunks: 最近编辑的文件
- ftsChunks: 全文检索
- embeddingsChunks: 向量检索
全文检索(FTS,Full-Text Search)数据库表设计
- 源代码:core/indexing/FullTextSearch.ts
- SQLite 存储位置:~/.continue/index/index.sqlite
创建
CREATE VIRTUAL TABLE IF NOT EXISTS fts USING fts5(
path,
content,
tokenize = 'trigram'
)
CREATE TABLE IF NOT EXISTS fts_metadata (
id INTEGER PRIMARY KEY,
path TEXT NOT NULL,
cacheKey TEXT NOT NULL,
chunkId INTEGER NOT NULL,
FOREIGN KEY (chunkId) REFERENCES chunks (id),
FOREIGN KEY (id) REFERENCES fts (rowid)
)
更新
SELECT * FROM chunks WHERE path = ? AND cacheKey = ?
INSERT INTO fts (path, content) VALUES (?, ?)
INSERT INTO fts_metadata (id, path, cacheKey, chunkId)
VALUES (?, ?, ?, ?)
ON CONFLICT(id) DO UPDATE SET
path = excluded.path,
cacheKey = excluded.cacheKey,
chunkId = excluded.chunkId
- ON CONFLICT(id) DO UPDATE SET
- 指定冲突处理策略。当插入时遇到 id 列的冲突(即 id 已经存在),执行更新操作而不是插入操作。
- path = excluded.path, cacheKey = excluded.cacheKey, chunkId = excluded.chunkId
- excluded 是一个特殊的表别名,表示插入操作中冲突的行。
- 将现有记录的 path, cacheKey, chunkId 列更新为新插入值(即 excluded.path, excluded.cacheKey, excluded.chunkId)。
检索
SELECT fts_metadata.chunkId, fts_metadata.path, fts.content, rank
FROM fts
JOIN fts_metadata ON fts.rowid = fts_metadata.id
JOIN chunk_tags ON fts_metadata.chunkId = chunk_tags.chunkId
WHERE fts MATCH '"user input"' AND chunk_tags.tag IN (?)
ORDER BY rank
LIMIT ?
向量检索
- LanceDB 存储位置:~/.continue/index/lancedb
创建
CREATE TABLE IF NOT EXISTS lance_db_cache (
uuid TEXT PRIMARY KEY,
cacheKey TEXT NOT NULL,
path TEXT NOT NULL,
artifact_id TEXT NOT NULL,
vector TEXT NOT NULL,
startLine INTEGER NOT NULL,
endLine INTEGER NOT NULL,
contents TEXT NOT NULL
)
增加
INSERT INTO lance_db_cache (uuid, cacheKey, path, artifact_id, vector, startLine, endLine, contents) VALUES (?, ?, ?, ?, ?, ?, ?, ?)
检索
SELECT * FROM lance_db_cache WHERE uuid in (?)
全文检索源代码
在聊天窗口中输入:
@Codebase
用户提问
全文检索
文件 | 类 | 函数 | 事件 |
---|---|---|---|
extensions/vscode/src/webviewProtocol.ts | VsCodeWebviewProtocol | set webview | |
extensions/vscode/src/extension/VsCodeMessenger.ts | VsCodeMessenger | constructor | WEBVIEW_TO_CORE_PASS_THROUGH |
core/core.ts | Core | constructor | context/getContextItems |
core/context/retrieval/retrieval.ts | retrieveContextItemsFromEmbeddings | ||
core/context/retrieval/pipelines/RerankerRetrievalPipeline.ts | RerankerRetrievalPipeline | _retrieveInitial | |
core/context/retrieval/fullTextSearch.ts | retrieveFts | ||
core/indexing/FullTextSearch.ts | FullTextSearchCodebaseIndex | retrieve |
向量检索
文件 | 类 | 函数 |
---|---|---|
core/context/retrieval/pipelines/RerankerRetrievalPipeline.ts | RerankerRetrievalPipeline | _retrieveInitial |
core/context/retrieval/pipelines/BaseRetrievalPipeline.ts | BaseRetrievalPipeline | retrieveEmbeddings |
core/indexing/LanceDbIndex.ts | LanceDbIndex | retrieve |