import os import base64 from langchain.prompts import PromptTemplate from langchain_openai import ChatOpenAI from langchain.output_parsers import JsonOutputParser from langchain.schema import HumanMessage from pydantic import BaseModel, Field # 定义你想要的结构化输出 class ImageDescription(BaseModel): objects: list[str] = Field(description="图片里出现的主要物体") scene: str = Field(description="场景描述") mood: str = Field(description="整体氛围") parser = JsonOutputParser(pydantic_object=ImageDescription) prompt = PromptTemplate( template="""你是一个图像分析助手。请根据输入的图片内容和文字说明, 输出符合下列 JSON schema 的结果: { "moisture_content": 12.5, "cocoon_weight": 15.2, "defective_pupa_count": 3, "fresh_shell_weight": 8.7, "sample_count": 50, "net_weight_total": 760, "evaluator": "张三", "reviewer": "李四", "barcode": "123456789012" } 输入内容: {query} """, input_variables=["query"], partial_variables={"format_instructions": parser.get_format_instructions()}, ) def get_response(base64_image: str, system_text: str = "请分析这张图片并输出JSON结果"): messages = [ HumanMessage(content=[ {"type": "text", "text": system_text}, # {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}} # 本地图片转成base64 {"type": "image_url", "image_url": {"url": {base64_image}}} # 本地图片转成base64 ]) ] # 串起来:prompt -> llm -> parser chain = prompt | llm | parser response = chain.invoke({"query": messages}) print(response) if __name__ == "__main__": with open("test.jpg", "rb") as f: base64_image = base64.b64encode(f.read()).decode("utf-8") get_response(base64_image)