{ "cells": [ { "cell_type": "code", "execution_count": 11, "id": "dfb008fd", "metadata": {}, "outputs": [], "source": [ "import os\n", "from openai import OpenAI\n", "from glob import glob\n", "from pymilvus import MilvusClient\n", "from tqdm import tqdm" ] }, { "cell_type": "code", "execution_count": 7, "id": "eaa97ad1", "metadata": {}, "outputs": [], "source": [ "client = OpenAI(\n", " api_key= \"sk-9464b2498c184982a9fe9d2c2e725ab5\", # 如果您没有配置环境变量,请在此处用您的API Key进行替换\n", " base_url=\"https://dashscope.aliyuncs.com/compatible-mode/v1\" # 百炼服务的base_url\n", ")\n", "def emb_text(text):\n", " return client.embeddings.create(\n", " model=\"text-embedding-v4\",\n", " input=text,\n", " dimensions=1024, # 指定向量维度(仅 text-embedding-v3及 text-embedding-v4支持该参数)\n", " encoding_format=\"float\"\n", " ).data[0].embedding" ] }, { "cell_type": "code", "execution_count": null, "id": "9df315ea", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "1024\n", "[-0.017507297918200493, 0.02571254037320614, 0.02589302882552147, -0.02639283984899521, -0.013571279123425484, -0.0032158030662685633, -0.006428135093301535, 0.02458796463906765, -0.059366535395383835, 0.13083963096141815]\n" ] } ], "source": [ "# 测试\n", "test_embedding = emb_text(\"This is a test\")\n", "embedding_dim = len(test_embedding)\n", "print(embedding_dim)\n", "print(test_embedding[:10])\n" ] }, { "cell_type": "code", "execution_count": null, "id": "95d0a121", "metadata": {}, "outputs": [], "source": [ "# Milvus数据库配置\n", "milvus_client = MilvusClient(uri=\"http://10.10.10.9:19530\")\n", "collection_name = \"my_rag_collection\"\n", "embedding_dim = 1024\n", "\n", "if milvus_client.has_collection(collection_name):\n", " milvus_client.drop_collection(collection_name)\n", "milvus_client.create_collection(\n", " collection_name=collection_name,\n", " dimension=embedding_dim,\n", " metric_type=\"IP\", # Inner product distance\n", " consistency_level=\"Bounded\", # Supported values are (`\"Strong\"`, `\"Session\"`, `\"Bounded\"`, `\"Eventually\"`). See https://milvus.io/docs/consistency.md#Consistency-Level for more details.\n", ")" ] }, { "cell_type": "code", "execution_count": null, "id": "e09edfec", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Creating embeddings: 100%|██████████| 72/72 [00:11<00:00, 6.46it/s]\n" ] }, { "data": { "text/plain": [ "{'insert_count': 72, 'ids': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71], 'cost': 0}" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 从文件中插入数据\n", "text_lines = []\n", "for file_path in glob(\"milvus_docs/en/faq/*.md\", recursive=True):\n", " with open(file_path, \"r\") as file:\n", " file_text = file.read()\n", "\n", " text_lines += file_text.split(\"# \")\n", "\n", "data = []\n", "\n", "for i, line in enumerate(tqdm(text_lines, desc=\"Creating embeddings\")):\n", " data.append({\"id\": i, \"vector\": emb_text(line), \"text\": line})\n", "\n", "milvus_client.insert(collection_name=collection_name, data=data)" ] }, { "cell_type": "code", "execution_count": 25, "id": "f3007553", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Milvus 是一个开源的向量数据库,主要用于高效地存储、管理和检索大规模的向量数据。它广泛应用于机器学习、推荐系统、图像识别等需要处理高维数据的场景。\n" ] } ], "source": [ "question = \"milvus是什么,用中文回答\"\n", "search_res = milvus_client.search(\n", " collection_name=collection_name,\n", " data=[\n", " emb_text(question)\n", " ], # Use the `emb_text` function to convert the question to an embedding vector\n", " limit=3, # Return top 3 results\n", " search_params={\"metric_type\": \"IP\", \"params\": {}}, # Inner product distance\n", " output_fields=[\"text\"], # Return the text field\n", ")\n", "import json\n", "# 获取答案\n", "retrieved_lines_with_distances = [\n", " (res[\"entity\"][\"text\"], res[\"distance\"]) for res in search_res[0]\n", "]\n", "context = \"\\n\".join(\n", " [line_with_distance[0] for line_with_distance in retrieved_lines_with_distances]\n", ")\n", "SYSTEM_PROMPT = \"\"\"\n", "Human: You are an AI assistant. You are able to find answers to the questions from the contextual passage snippets provided.\n", "\"\"\"\n", "USER_PROMPT = f\"\"\"\n", "Use the following pieces of information enclosed in tags to provide an answer to the question enclosed in tags.\n", "\n", "{context}\n", "\n", "\n", "{question}\n", "\n", "\"\"\"\n", "response = client.chat.completions.create(\n", " model='qwen-turbo',\n", " messages=[\n", " {\"role\": \"system\", \"content\": SYSTEM_PROMPT},\n", " {\"role\": \"user\", \"content\": USER_PROMPT},\n", " ],\n", ")\n", "print(response.choices[0].message.content)\n", "\n", "\n", "\n" ] }, { "cell_type": "code", "execution_count": null, "id": "077922d1", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "2025-09-15 15:12:53,649 [ERROR][handler]: RPC error: [drop_database], , (decorators.py:140)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Database 'default' already exists.\n", "Collection 'my_rag_collection' has been dropped.\n", "Collection 'bbit_ai_lab_knowledge' has been dropped.\n", "An error occurred: \n" ] } ], "source": [ "from pymilvus import Collection, MilvusException, connections, db, utility\n", "\n", "conn = connections.connect(host=\"10.10.10.9\", port=19530)\n", "\n", "# Check if the database exists\n", "db_name = \"default\"\n", "\n", "try:\n", " existing_databases = db.list_database()\n", " if db_name in existing_databases:\n", " print(f\"Database '{db_name}' already exists.\")\n", "\n", " # Use the database context\n", " db.using_database(db_name)\n", "\n", " # Drop all collections in the database\n", " collections = utility.list_collections()\n", " for collection_name in collections:\n", " collection = Collection(name=collection_name)\n", " collection.drop()\n", " print(f\"Collection '{collection_name}' has been dropped.\")\n", "\n", " db.drop_database(db_name)\n", " print(f\"Database '{db_name}' has been deleted.\")\n", " else:\n", " print(f\"Database '{db_name}' does not exist.\")\n", " database = db.create_database(db_name)\n", " print(f\"Database '{db_name}' created successfully.\")\n", "except MilvusException as e:\n", " print(f\"An error occurred: {e}\")" ] } ], "metadata": { "kernelspec": { "display_name": "lang", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.18" } }, "nbformat": 4, "nbformat_minor": 5 }