Files
AILab/bbit_ai/test/vision/Vision.py
T
2025-09-24 13:59:00 +08:00

57 lines
1.8 KiB
Python

import os
import base64
from langchain.prompts import PromptTemplate
from langchain_openai import ChatOpenAI
from langchain.output_parsers import JsonOutputParser
from langchain.schema import HumanMessage
from pydantic import BaseModel, Field
# 定义你想要的结构化输出
class ImageDescription(BaseModel):
objects: list[str] = Field(description="图片里出现的主要物体")
scene: str = Field(description="场景描述")
mood: str = Field(description="整体氛围")
parser = JsonOutputParser(pydantic_object=ImageDescription)
prompt = PromptTemplate(
template="""你是一个图像分析助手。请根据输入的图片内容和文字说明,
输出符合下列 JSON schema 的结果:
{
"moisture_content": 12.5,
"cocoon_weight": 15.2,
"defective_pupa_count": 3,
"fresh_shell_weight": 8.7,
"sample_count": 50,
"net_weight_total": 760,
"evaluator": "张三",
"reviewer": "李四",
"barcode": "123456789012"
}
输入内容:
{query}
""",
input_variables=["query"],
partial_variables={"format_instructions": parser.get_format_instructions()},
)
def get_response(base64_image: str, system_text: str = "请分析这张图片并输出JSON结果"):
messages = [
HumanMessage(content=[
{"type": "text", "text": system_text},
# {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}} # 本地图片转成base64
{"type": "image_url", "image_url": {"url": {base64_image}}} # 本地图片转成base64
])
]
# 串起来:prompt -> llm -> parser
chain = prompt | llm | parser
response = chain.invoke({"query": messages})
print(response)
if __name__ == "__main__":
with open("test.jpg", "rb") as f:
base64_image = base64.b64encode(f.read()).decode("utf-8")
get_response(base64_image)