更新python后端
This commit is contained in:
@@ -0,0 +1,260 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"id": "dfb008fd",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"from openai import OpenAI\n",
|
||||
"from glob import glob\n",
|
||||
"from pymilvus import MilvusClient\n",
|
||||
"from tqdm import tqdm"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "eaa97ad1",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"client = OpenAI(\n",
|
||||
" api_key= \"sk-9464b2498c184982a9fe9d2c2e725ab5\", # 如果您没有配置环境变量,请在此处用您的API Key进行替换\n",
|
||||
" base_url=\"https://dashscope.aliyuncs.com/compatible-mode/v1\" # 百炼服务的base_url\n",
|
||||
")\n",
|
||||
"def emb_text(text):\n",
|
||||
" return client.embeddings.create(\n",
|
||||
" model=\"text-embedding-v4\",\n",
|
||||
" input=text,\n",
|
||||
" dimensions=1024, # 指定向量维度(仅 text-embedding-v3及 text-embedding-v4支持该参数)\n",
|
||||
" encoding_format=\"float\"\n",
|
||||
" ).data[0].embedding"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "9df315ea",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"1024\n",
|
||||
"[-0.017507297918200493, 0.02571254037320614, 0.02589302882552147, -0.02639283984899521, -0.013571279123425484, -0.0032158030662685633, -0.006428135093301535, 0.02458796463906765, -0.059366535395383835, 0.13083963096141815]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# 测试\n",
|
||||
"test_embedding = emb_text(\"This is a test\")\n",
|
||||
"embedding_dim = len(test_embedding)\n",
|
||||
"print(embedding_dim)\n",
|
||||
"print(test_embedding[:10])\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "95d0a121",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Milvus数据库配置\n",
|
||||
"milvus_client = MilvusClient(uri=\"http://10.10.10.9:19530\")\n",
|
||||
"collection_name = \"my_rag_collection\"\n",
|
||||
"embedding_dim = 1024\n",
|
||||
"\n",
|
||||
"if milvus_client.has_collection(collection_name):\n",
|
||||
" milvus_client.drop_collection(collection_name)\n",
|
||||
"milvus_client.create_collection(\n",
|
||||
" collection_name=collection_name,\n",
|
||||
" dimension=embedding_dim,\n",
|
||||
" metric_type=\"IP\", # Inner product distance\n",
|
||||
" consistency_level=\"Bounded\", # Supported values are (`\"Strong\"`, `\"Session\"`, `\"Bounded\"`, `\"Eventually\"`). See https://milvus.io/docs/consistency.md#Consistency-Level for more details.\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "e09edfec",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Creating embeddings: 100%|██████████| 72/72 [00:11<00:00, 6.46it/s]\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'insert_count': 72, 'ids': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71], 'cost': 0}"
|
||||
]
|
||||
},
|
||||
"execution_count": 13,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# 从文件中插入数据\n",
|
||||
"text_lines = []\n",
|
||||
"for file_path in glob(\"milvus_docs/en/faq/*.md\", recursive=True):\n",
|
||||
" with open(file_path, \"r\") as file:\n",
|
||||
" file_text = file.read()\n",
|
||||
"\n",
|
||||
" text_lines += file_text.split(\"# \")\n",
|
||||
"\n",
|
||||
"data = []\n",
|
||||
"\n",
|
||||
"for i, line in enumerate(tqdm(text_lines, desc=\"Creating embeddings\")):\n",
|
||||
" data.append({\"id\": i, \"vector\": emb_text(line), \"text\": line})\n",
|
||||
"\n",
|
||||
"milvus_client.insert(collection_name=collection_name, data=data)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 25,
|
||||
"id": "f3007553",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Milvus 是一个开源的向量数据库,主要用于高效地存储、管理和检索大规模的向量数据。它广泛应用于机器学习、推荐系统、图像识别等需要处理高维数据的场景。\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"question = \"milvus是什么,用中文回答\"\n",
|
||||
"search_res = milvus_client.search(\n",
|
||||
" collection_name=collection_name,\n",
|
||||
" data=[\n",
|
||||
" emb_text(question)\n",
|
||||
" ], # Use the `emb_text` function to convert the question to an embedding vector\n",
|
||||
" limit=3, # Return top 3 results\n",
|
||||
" search_params={\"metric_type\": \"IP\", \"params\": {}}, # Inner product distance\n",
|
||||
" output_fields=[\"text\"], # Return the text field\n",
|
||||
")\n",
|
||||
"import json\n",
|
||||
"# 获取答案\n",
|
||||
"retrieved_lines_with_distances = [\n",
|
||||
" (res[\"entity\"][\"text\"], res[\"distance\"]) for res in search_res[0]\n",
|
||||
"]\n",
|
||||
"context = \"\\n\".join(\n",
|
||||
" [line_with_distance[0] for line_with_distance in retrieved_lines_with_distances]\n",
|
||||
")\n",
|
||||
"SYSTEM_PROMPT = \"\"\"\n",
|
||||
"Human: You are an AI assistant. You are able to find answers to the questions from the contextual passage snippets provided.\n",
|
||||
"\"\"\"\n",
|
||||
"USER_PROMPT = f\"\"\"\n",
|
||||
"Use the following pieces of information enclosed in <context> tags to provide an answer to the question enclosed in <question> tags.\n",
|
||||
"<context>\n",
|
||||
"{context}\n",
|
||||
"</context>\n",
|
||||
"<question>\n",
|
||||
"{question}\n",
|
||||
"</question>\n",
|
||||
"\"\"\"\n",
|
||||
"response = client.chat.completions.create(\n",
|
||||
" model='qwen-turbo',\n",
|
||||
" messages=[\n",
|
||||
" {\"role\": \"system\", \"content\": SYSTEM_PROMPT},\n",
|
||||
" {\"role\": \"user\", \"content\": USER_PROMPT},\n",
|
||||
" ],\n",
|
||||
")\n",
|
||||
"print(response.choices[0].message.content)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "077922d1",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"2025-09-15 15:12:53,649 [ERROR][handler]: RPC error: [drop_database], <MilvusException: (code=65535, message=can not drop default database)>, <Time:{'RPC start': '2025-09-15 15:12:53.638539', 'RPC error': '2025-09-15 15:12:53.649605'}> (decorators.py:140)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Database 'default' already exists.\n",
|
||||
"Collection 'my_rag_collection' has been dropped.\n",
|
||||
"Collection 'bbit_ai_lab_knowledge' has been dropped.\n",
|
||||
"An error occurred: <MilvusException: (code=65535, message=can not drop default database)>\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from pymilvus import Collection, MilvusException, connections, db, utility\n",
|
||||
"\n",
|
||||
"conn = connections.connect(host=\"10.10.10.9\", port=19530)\n",
|
||||
"\n",
|
||||
"# Check if the database exists\n",
|
||||
"db_name = \"default\"\n",
|
||||
"\n",
|
||||
"try:\n",
|
||||
" existing_databases = db.list_database()\n",
|
||||
" if db_name in existing_databases:\n",
|
||||
" print(f\"Database '{db_name}' already exists.\")\n",
|
||||
"\n",
|
||||
" # Use the database context\n",
|
||||
" db.using_database(db_name)\n",
|
||||
"\n",
|
||||
" # Drop all collections in the database\n",
|
||||
" collections = utility.list_collections()\n",
|
||||
" for collection_name in collections:\n",
|
||||
" collection = Collection(name=collection_name)\n",
|
||||
" collection.drop()\n",
|
||||
" print(f\"Collection '{collection_name}' has been dropped.\")\n",
|
||||
"\n",
|
||||
" db.drop_database(db_name)\n",
|
||||
" print(f\"Database '{db_name}' has been deleted.\")\n",
|
||||
" else:\n",
|
||||
" print(f\"Database '{db_name}' does not exist.\")\n",
|
||||
" database = db.create_database(db_name)\n",
|
||||
" print(f\"Database '{db_name}' created successfully.\")\n",
|
||||
"except MilvusException as e:\n",
|
||||
" print(f\"An error occurred: {e}\")"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "lang",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.18"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
Reference in New Issue
Block a user