Files

109 lines
3.3 KiB
Python

from datetime import datetime
from typing import List
from langchain_core.documents import Document
from config.milvus import get_kn_vectorstore, get_mem_vectorstore
def get_knowledge_by_key_words(key_words: str, kn_ids: List[str]) -> str:
"""
根据关键词和 kn_ids 列表,在知识库中检索相关内容,并返回整理后的文本字符串
"""
# 构建过滤表达式:只查 kn_ids 范围内的
if kn_ids:
ids_expr = " or ".join([f'kn_id == "{kid}"' for kid in kn_ids])
expr = f"({ids_expr})"
else:
return "未找到相关的知识。"
result = get_kn_vectorstore().similarity_search(
query=key_words, k=3, expr=expr # 可调节返回条数
)
# 整理成字符串
doc_texts = []
for idx, doc in enumerate(result, start=1):
text = doc.page_content.strip()
if text:
# 可以加个编号,便于LLM区分
doc_texts.append(f"[文档{idx}]: {text}")
# 拼成一个大字符串,用换行隔开
combined_text = "\n\n".join(doc_texts)
return combined_text
def get_memory_by_key_words(key_words: str, ai_ids: List[str]) -> str:
print("ai_id是:", ai_ids)
"""
根据关键词和 ai_ids 列表,在知识库中检索相关内容,并返回整理后的文本字符串
"""
# 构建过滤表达式:只查 kn_ids 范围内的
if ai_ids:
ids_expr = " or ".join([f'ai_id == "{kid}"' for kid in ai_ids])
expr = f"({ids_expr})"
else:
expr = "" # 不限制 kn_id todo 实际上应该不反悔任何内容
result = get_mem_vectorstore().similarity_search(
query=key_words, k=5, expr=expr # 可调节返回条数
)
# 整理成字符串
doc_texts = []
for idx, doc in enumerate(result, start=1):
text = doc.page_content.strip()
if text:
# 可以加个编号,便于LLM区分
doc_texts.append(f"[记忆{idx}]: {text}")
# 拼成一个大字符串,用换行隔开
combined_text = "\n\n".join(doc_texts)
return combined_text
def get_knowledge_by_base_id(base_id: str):
expr = f'kn_id == "{base_id}"' # base_id 会被替换
result = get_kn_vectorstore().similarity_search(
query="", k=100, expr=expr # 如果只想用过滤条件,可以传空字符串
)
return [
{
"id": str(doc.metadata["id"]),
"text": doc.page_content,
"is_active": doc.metadata["is_active"],
}
for doc in result
]
def add_knowledge(text: str, is_active: bool, base_id: str, user_id: str):
docs = [
Document(
page_content=text,
metadata={
"kn_id": str(base_id),
"created_by": str(user_id),
"created_at": datetime.now().isoformat(),
"is_active": is_active,
},
)
]
return get_kn_vectorstore().add_documents(docs)
def add_memory(ai_id: str, mem: str, user_id: str, is_active: bool):
docs = [
Document(
page_content=mem,
metadata={
"ai_id": str(ai_id),
"created_by": str(user_id),
"created_at": datetime.now().isoformat(),
"is_active": is_active,
},
)
]
return get_mem_vectorstore().add_documents(docs)