更新python后端

This commit is contained in:
BBIT-Kai
2025-09-18 17:18:18 +08:00
parent 2fc209e6e6
commit de6a350da8
45 changed files with 2524 additions and 89 deletions
+126
View File
@@ -0,0 +1,126 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "d029ad67",
"metadata": {},
"outputs": [],
"source": [
"from langchain_milvus import BM25BuiltInFunction, Milvus\n",
"from typing import List\n",
"URI = \"http://10.10.10.9:19530\"\n",
"tongyiKey = \"sk-9464b2498c184982a9fe9d2c2e725ab5\"\n",
"from langchain_community.embeddings import DashScopeEmbeddings\n",
"embeddings = DashScopeEmbeddings(\n",
" model=\"text-embedding-v3\",\n",
" dashscope_api_key= tongyiKey, \n",
")\n",
"memVectorstore = Milvus(\n",
" embedding_function=embeddings,\n",
" connection_args={\"uri\": URI, \"token\": \"root:Milvus\", \"db_name\": \"bbit_ai_lab\"},\n",
" collection_name=\"memory\",\n",
" index_params={\"index_type\": \"FLAT\", \"metric_type\": \"L2\"},\n",
" consistency_level=\"Strong\",\n",
" auto_id=True,\n",
"\n",
" primary_field = \"id\",\n",
" text_field=\"text\",\n",
" vector_field=\"vector\",\n",
" partition_key_field = \"ai_id\",\n",
" enable_dynamic_field = True,\n",
" drop_old=False, # set to True if seeking to drop the collection with that name if it exists\n",
")\n"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "a480053b",
"metadata": {},
"outputs": [],
"source": [
"def get_memory_by_key_words(key_words: str, ai_ids: List[str]) -> str:\n",
" print(\"ai_id是:\" , ai_ids)\n",
" \"\"\"\n",
" 根据关键词和 ai_ids 列表,在知识库中检索相关内容,并返回整理后的文本字符串\n",
" \"\"\"\n",
" # 构建过滤表达式:只查 kn_ids 范围内的\n",
" if ai_ids:\n",
" ids_expr = \" or \".join([f'ai_id == \"{kid}\"' for kid in ai_ids])\n",
" expr = f\"({ids_expr})\"\n",
" else:\n",
" expr = \"\" # 不限制 kn_id todo 实际上应该不反悔任何内容\n",
" \n",
" result = knVectorstore.similarity_search(\n",
" query=key_words,\n",
" k=5, # 可调节返回条数\n",
" expr=expr\n",
" )\n",
" \n",
" # 整理成字符串\n",
" doc_texts = []\n",
" for idx, doc in enumerate(result, start=1):\n",
" text = doc.page_content.strip()\n",
" if text:\n",
" # 可以加个编号,便于LLM区分\n",
" doc_texts.append(f\"[记忆{idx}]: {text}\")\n",
" \n",
" # 拼成一个大字符串,用换行隔开\n",
" combined_text = \"\\n\\n\".join(doc_texts)\n",
" return combined_text"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "36759de5",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"ai_id是: ['3730f279-8b56-46ec-bde9-8a9e6c27f021']\n"
]
},
{
"ename": "NameError",
"evalue": "name 'knVectorstore' is not defined",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[3], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mget_memory_by_key_words\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m共育室 部署 地方\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m3730f279-8b56-46ec-bde9-8a9e6c27f021\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m)\u001b[49m\n",
"Cell \u001b[0;32mIn[2], line 13\u001b[0m, in \u001b[0;36mget_memory_by_key_words\u001b[0;34m(key_words, ai_ids)\u001b[0m\n\u001b[1;32m 10\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 11\u001b[0m expr \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;66;03m# 不限制 kn_id todo 实际上应该不反悔任何内容\u001b[39;00m\n\u001b[0;32m---> 13\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43mknVectorstore\u001b[49m\u001b[38;5;241m.\u001b[39msimilarity_search(\n\u001b[1;32m 14\u001b[0m query\u001b[38;5;241m=\u001b[39mkey_words,\n\u001b[1;32m 15\u001b[0m k\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m5\u001b[39m, \u001b[38;5;66;03m# 可调节返回条数\u001b[39;00m\n\u001b[1;32m 16\u001b[0m expr\u001b[38;5;241m=\u001b[39mexpr\n\u001b[1;32m 17\u001b[0m )\n\u001b[1;32m 19\u001b[0m \u001b[38;5;66;03m# 整理成字符串\u001b[39;00m\n\u001b[1;32m 20\u001b[0m doc_texts \u001b[38;5;241m=\u001b[39m []\n",
"\u001b[0;31mNameError\u001b[0m: name 'knVectorstore' is not defined"
]
}
],
"source": [
"get_memory_by_key_words(\"共育室 部署 地方\",[\"3730f279-8b56-46ec-bde9-8a9e6c27f021\"])"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "lang",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.18"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
+114
View File
@@ -0,0 +1,114 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 21,
"id": "d029ad67",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[460823023525530114, 460823023525530115]"
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from langchain_milvus import BM25BuiltInFunction, Milvus\n",
"URI = \"http://10.10.10.9:19530\"\n",
"tongyiKey = \"sk-9464b2498c184982a9fe9d2c2e725ab5\"\n",
"from langchain_community.embeddings import DashScopeEmbeddings\n",
"embeddings = DashScopeEmbeddings(\n",
" model=\"text-embedding-v3\",\n",
" dashscope_api_key= tongyiKey, \n",
")\n",
"vectorstore = Milvus(\n",
" embedding_function=embeddings,\n",
" connection_args={\"uri\": URI, \"token\": \"root:Milvus\", \"db_name\": \"bbit_ai_lab\"},\n",
" collection_name=\"knowledge\",\n",
" index_params={\"index_type\": \"FLAT\", \"metric_type\": \"L2\"},\n",
" consistency_level=\"Strong\",\n",
" auto_id=True,\n",
"\n",
" primary_field = \"id\",\n",
" text_field=\"text\",\n",
" vector_field=\"vector\",\n",
" partition_key_field = \"kn_id\",\n",
" enable_dynamic_field = True,\n",
" drop_old=False, # set to True if seeking to drop the collection with that name if it exists\n",
")\n",
"\n",
"from langchain.schema import Document\n",
"\n",
"docs = [\n",
" Document(\n",
" page_content=\"这是第一条文本\",\n",
" metadata={\n",
" \"kn_id\": \"8ecd1179-4194-4b80-bc39-5addc678df4b\",\n",
" \"is_active\": True,\n",
" }\n",
" ),\n",
" Document(\n",
" page_content=\"这是第二条文本\",\n",
" metadata={\n",
" \"kn_id\": \"8ecd1179-4194-4b80-bc39-5addc678df4b\",\n",
" \"is_active\": True,\n",
" }\n",
" )\n",
"]\n",
"\n",
"vectorstore.add_documents(docs)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a480053b",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"*这是第一条文本 [{'kn_id': '8ecd1179-4194-4b80-bc39-5addc678df4b', 'id': 460823023525530108, 'is_active': True}]\n",
"*这是第一条文本 [{'kn_id': '8ecd1179-4194-4b80-bc39-5addc678df4b', 'id': 460823023525530110, 'is_active': True}]\n"
]
}
],
"source": [
"results = vectorstore.similarity_search(\n",
" \"\",\n",
" k=2,\n",
" expr='kn_id == \"8ecd1179-4194-4b80-bc39-5addc678df4b\"',\n",
")\n",
"for res in results:\n",
" print(f\"*{res.page_content} [{res.metadata}]\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "lang",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.18"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
+260
View File
@@ -0,0 +1,260 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 11,
"id": "dfb008fd",
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"from openai import OpenAI\n",
"from glob import glob\n",
"from pymilvus import MilvusClient\n",
"from tqdm import tqdm"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "eaa97ad1",
"metadata": {},
"outputs": [],
"source": [
"client = OpenAI(\n",
" api_key= \"sk-9464b2498c184982a9fe9d2c2e725ab5\", # 如果您没有配置环境变量,请在此处用您的API Key进行替换\n",
" base_url=\"https://dashscope.aliyuncs.com/compatible-mode/v1\" # 百炼服务的base_url\n",
")\n",
"def emb_text(text):\n",
" return client.embeddings.create(\n",
" model=\"text-embedding-v4\",\n",
" input=text,\n",
" dimensions=1024, # 指定向量维度(仅 text-embedding-v3及 text-embedding-v4支持该参数)\n",
" encoding_format=\"float\"\n",
" ).data[0].embedding"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9df315ea",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"1024\n",
"[-0.017507297918200493, 0.02571254037320614, 0.02589302882552147, -0.02639283984899521, -0.013571279123425484, -0.0032158030662685633, -0.006428135093301535, 0.02458796463906765, -0.059366535395383835, 0.13083963096141815]\n"
]
}
],
"source": [
"# 测试\n",
"test_embedding = emb_text(\"This is a test\")\n",
"embedding_dim = len(test_embedding)\n",
"print(embedding_dim)\n",
"print(test_embedding[:10])\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "95d0a121",
"metadata": {},
"outputs": [],
"source": [
"# Milvus数据库配置\n",
"milvus_client = MilvusClient(uri=\"http://10.10.10.9:19530\")\n",
"collection_name = \"my_rag_collection\"\n",
"embedding_dim = 1024\n",
"\n",
"if milvus_client.has_collection(collection_name):\n",
" milvus_client.drop_collection(collection_name)\n",
"milvus_client.create_collection(\n",
" collection_name=collection_name,\n",
" dimension=embedding_dim,\n",
" metric_type=\"IP\", # Inner product distance\n",
" consistency_level=\"Bounded\", # Supported values are (`\"Strong\"`, `\"Session\"`, `\"Bounded\"`, `\"Eventually\"`). See https://milvus.io/docs/consistency.md#Consistency-Level for more details.\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e09edfec",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Creating embeddings: 100%|██████████| 72/72 [00:11<00:00, 6.46it/s]\n"
]
},
{
"data": {
"text/plain": [
"{'insert_count': 72, 'ids': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71], 'cost': 0}"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# 从文件中插入数据\n",
"text_lines = []\n",
"for file_path in glob(\"milvus_docs/en/faq/*.md\", recursive=True):\n",
" with open(file_path, \"r\") as file:\n",
" file_text = file.read()\n",
"\n",
" text_lines += file_text.split(\"# \")\n",
"\n",
"data = []\n",
"\n",
"for i, line in enumerate(tqdm(text_lines, desc=\"Creating embeddings\")):\n",
" data.append({\"id\": i, \"vector\": emb_text(line), \"text\": line})\n",
"\n",
"milvus_client.insert(collection_name=collection_name, data=data)"
]
},
{
"cell_type": "code",
"execution_count": 25,
"id": "f3007553",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Milvus 是一个开源的向量数据库,主要用于高效地存储、管理和检索大规模的向量数据。它广泛应用于机器学习、推荐系统、图像识别等需要处理高维数据的场景。\n"
]
}
],
"source": [
"question = \"milvus是什么,用中文回答\"\n",
"search_res = milvus_client.search(\n",
" collection_name=collection_name,\n",
" data=[\n",
" emb_text(question)\n",
" ], # Use the `emb_text` function to convert the question to an embedding vector\n",
" limit=3, # Return top 3 results\n",
" search_params={\"metric_type\": \"IP\", \"params\": {}}, # Inner product distance\n",
" output_fields=[\"text\"], # Return the text field\n",
")\n",
"import json\n",
"# 获取答案\n",
"retrieved_lines_with_distances = [\n",
" (res[\"entity\"][\"text\"], res[\"distance\"]) for res in search_res[0]\n",
"]\n",
"context = \"\\n\".join(\n",
" [line_with_distance[0] for line_with_distance in retrieved_lines_with_distances]\n",
")\n",
"SYSTEM_PROMPT = \"\"\"\n",
"Human: You are an AI assistant. You are able to find answers to the questions from the contextual passage snippets provided.\n",
"\"\"\"\n",
"USER_PROMPT = f\"\"\"\n",
"Use the following pieces of information enclosed in <context> tags to provide an answer to the question enclosed in <question> tags.\n",
"<context>\n",
"{context}\n",
"</context>\n",
"<question>\n",
"{question}\n",
"</question>\n",
"\"\"\"\n",
"response = client.chat.completions.create(\n",
" model='qwen-turbo',\n",
" messages=[\n",
" {\"role\": \"system\", \"content\": SYSTEM_PROMPT},\n",
" {\"role\": \"user\", \"content\": USER_PROMPT},\n",
" ],\n",
")\n",
"print(response.choices[0].message.content)\n",
"\n",
"\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "077922d1",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"2025-09-15 15:12:53,649 [ERROR][handler]: RPC error: [drop_database], <MilvusException: (code=65535, message=can not drop default database)>, <Time:{'RPC start': '2025-09-15 15:12:53.638539', 'RPC error': '2025-09-15 15:12:53.649605'}> (decorators.py:140)\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Database 'default' already exists.\n",
"Collection 'my_rag_collection' has been dropped.\n",
"Collection 'bbit_ai_lab_knowledge' has been dropped.\n",
"An error occurred: <MilvusException: (code=65535, message=can not drop default database)>\n"
]
}
],
"source": [
"from pymilvus import Collection, MilvusException, connections, db, utility\n",
"\n",
"conn = connections.connect(host=\"10.10.10.9\", port=19530)\n",
"\n",
"# Check if the database exists\n",
"db_name = \"default\"\n",
"\n",
"try:\n",
" existing_databases = db.list_database()\n",
" if db_name in existing_databases:\n",
" print(f\"Database '{db_name}' already exists.\")\n",
"\n",
" # Use the database context\n",
" db.using_database(db_name)\n",
"\n",
" # Drop all collections in the database\n",
" collections = utility.list_collections()\n",
" for collection_name in collections:\n",
" collection = Collection(name=collection_name)\n",
" collection.drop()\n",
" print(f\"Collection '{collection_name}' has been dropped.\")\n",
"\n",
" db.drop_database(db_name)\n",
" print(f\"Database '{db_name}' has been deleted.\")\n",
" else:\n",
" print(f\"Database '{db_name}' does not exist.\")\n",
" database = db.create_database(db_name)\n",
" print(f\"Database '{db_name}' created successfully.\")\n",
"except MilvusException as e:\n",
" print(f\"An error occurred: {e}\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "lang",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.18"
}
},
"nbformat": 4,
"nbformat_minor": 5
}