from datetime import datetime from typing import List from langchain_core.documents import Document from config.milvus import get_kn_vectorstore, get_mem_vectorstore def get_knowledge_by_key_words(key_words: str, kn_ids: List[str]) -> str: """ 根据关键词和 kn_ids 列表,在知识库中检索相关内容,并返回整理后的文本字符串 """ # 构建过滤表达式:只查 kn_ids 范围内的 if kn_ids: ids_expr = " or ".join([f'kn_id == "{kid}"' for kid in kn_ids]) expr = f"({ids_expr})" else: return "未找到相关的知识。" result = get_kn_vectorstore().similarity_search( query=key_words, k=3, expr=expr # 可调节返回条数 ) # 整理成字符串 doc_texts = [] for idx, doc in enumerate(result, start=1): text = doc.page_content.strip() if text: # 可以加个编号,便于LLM区分 doc_texts.append(f"[文档{idx}]: {text}") # 拼成一个大字符串,用换行隔开 combined_text = "\n\n".join(doc_texts) return combined_text def get_memory_by_key_words(key_words: str, ai_ids: List[str]) -> str: print("ai_id是:", ai_ids) """ 根据关键词和 ai_ids 列表,在知识库中检索相关内容,并返回整理后的文本字符串 """ # 构建过滤表达式:只查 kn_ids 范围内的 if ai_ids: ids_expr = " or ".join([f'ai_id == "{kid}"' for kid in ai_ids]) expr = f"({ids_expr})" else: expr = "" # 不限制 kn_id todo 实际上应该不反悔任何内容 result = get_mem_vectorstore().similarity_search( query=key_words, k=5, expr=expr # 可调节返回条数 ) # 整理成字符串 doc_texts = [] for idx, doc in enumerate(result, start=1): text = doc.page_content.strip() if text: # 可以加个编号,便于LLM区分 doc_texts.append(f"[记忆{idx}]: {text}") # 拼成一个大字符串,用换行隔开 combined_text = "\n\n".join(doc_texts) return combined_text def get_knowledge_by_base_id(base_id: str): expr = f'kn_id == "{base_id}"' # base_id 会被替换 result = get_kn_vectorstore().similarity_search( query="", k=100, expr=expr # 如果只想用过滤条件,可以传空字符串 ) return [ { "id": str(doc.metadata["id"]), "text": doc.page_content, "is_active": doc.metadata["is_active"], } for doc in result ] def add_knowledge(text: str, is_active: bool, base_id: str, user_id: str): docs = [ Document( page_content=text, metadata={ "kn_id": str(base_id), "created_by": str(user_id), "created_at": datetime.now().isoformat(), "is_active": is_active, }, ) ] return get_kn_vectorstore().add_documents(docs) def add_memory(ai_id: str, mem: str, user_id: str, is_active: bool): docs = [ Document( page_content=mem, metadata={ "ai_id": str(ai_id), "created_by": str(user_id), "created_at": datetime.now().isoformat(), "is_active": is_active, }, ) ] return get_mem_vectorstore().add_documents(docs)