110 lines
3.4 KiB
Python
110 lines
3.4 KiB
Python
from config.milvus import knVectorstore,memVectorstore
|
|
from langchain.schema import Document
|
|
from datetime import datetime
|
|
from typing import List
|
|
|
|
from typing import List, Dict, Any
|
|
|
|
def get_knowledge_by_key_words(key_words: str, kn_ids: List[str]) -> str:
|
|
"""
|
|
根据关键词和 kn_ids 列表,在知识库中检索相关内容,并返回整理后的文本字符串
|
|
"""
|
|
# 构建过滤表达式:只查 kn_ids 范围内的
|
|
if kn_ids:
|
|
ids_expr = " or ".join([f'kn_id == "{kid}"' for kid in kn_ids])
|
|
expr = f"({ids_expr})"
|
|
else:
|
|
return "未找到相关的知识。"
|
|
|
|
result = knVectorstore.similarity_search(
|
|
query=key_words,
|
|
k=3, # 可调节返回条数
|
|
expr=expr
|
|
)
|
|
|
|
# 整理成字符串
|
|
doc_texts = []
|
|
for idx, doc in enumerate(result, start=1):
|
|
text = doc.page_content.strip()
|
|
if text:
|
|
# 可以加个编号,便于LLM区分
|
|
doc_texts.append(f"[文档{idx}]: {text}")
|
|
|
|
# 拼成一个大字符串,用换行隔开
|
|
combined_text = "\n\n".join(doc_texts)
|
|
return combined_text
|
|
|
|
|
|
def get_memory_by_key_words(key_words: str, ai_ids: List[str]) -> str:
|
|
print("ai_id是:" , ai_ids)
|
|
"""
|
|
根据关键词和 ai_ids 列表,在知识库中检索相关内容,并返回整理后的文本字符串
|
|
"""
|
|
# 构建过滤表达式:只查 kn_ids 范围内的
|
|
if ai_ids:
|
|
ids_expr = " or ".join([f'ai_id == "{kid}"' for kid in ai_ids])
|
|
expr = f"({ids_expr})"
|
|
else:
|
|
expr = "" # 不限制 kn_id todo 实际上应该不反悔任何内容
|
|
|
|
result = memVectorstore.similarity_search(
|
|
query=key_words,
|
|
k=5, # 可调节返回条数
|
|
expr=expr
|
|
)
|
|
|
|
# 整理成字符串
|
|
doc_texts = []
|
|
for idx, doc in enumerate(result, start=1):
|
|
text = doc.page_content.strip()
|
|
if text:
|
|
# 可以加个编号,便于LLM区分
|
|
doc_texts.append(f"[记忆{idx}]: {text}")
|
|
|
|
# 拼成一个大字符串,用换行隔开
|
|
combined_text = "\n\n".join(doc_texts)
|
|
return combined_text
|
|
def get_knowledge_by_base_id(base_id: str):
|
|
expr = f'kn_id == "{base_id}"' # base_id 会被替换
|
|
result = knVectorstore.similarity_search(
|
|
query="", # 如果只想用过滤条件,可以传空字符串
|
|
k=100,
|
|
expr=expr
|
|
)
|
|
return [
|
|
{
|
|
"id": str(doc.metadata["id"]),
|
|
"text": doc.page_content,
|
|
"is_active": doc.metadata["is_active"],
|
|
}
|
|
for doc in result
|
|
]
|
|
|
|
def add_knowledge(text: str, is_active: bool, base_id: str, user_id: str):
|
|
docs = [
|
|
Document(
|
|
page_content=text,
|
|
metadata={
|
|
"kn_id": str(base_id),
|
|
"created_by": str(user_id),
|
|
"created_at": datetime.now().isoformat(),
|
|
"is_active": is_active,
|
|
}
|
|
)
|
|
]
|
|
return knVectorstore.add_documents(docs)
|
|
|
|
def add_memory(ai_id:str,mem: str, user_id: str,is_active: bool):
|
|
docs = [
|
|
Document(
|
|
page_content=mem,
|
|
metadata={
|
|
"ai_id": str(ai_id),
|
|
"created_by": str(user_id),
|
|
"created_at": datetime.now().isoformat(),
|
|
"is_active": is_active,
|
|
}
|
|
)
|
|
]
|
|
return memVectorstore.add_documents(docs)
|