参考: https://docs.siliconflow.cn/cn/userguide/capabilities/vision
import base64
import json
from openai import OpenAI
from PIL import Image
import io
# 初始化OpenAI客户端
client = OpenAI(
api_key="sk-**********", # 替换为实际API密钥
base_url="https://api.siliconflow.cn/v1"
)
def convert_image_to_webp_base64(input_image_path: str) -> str:
"""将本地图片转换为WebP格式的Base64字符串"""
try:
with Image.open(input_image_path) as img:
# 转换为WebP格式(优化大小)
byte_arr = io.BytesIO()
img.save(byte_arr, format='WEBP', quality=85) # 调整质量平衡大小和清晰度
byte_arr = byte_arr.getvalue()
return base64.b64encode(byte_arr).decode('utf-8')
except Exception as e:
print(f"图片转换错误: {e}")
return None
# 1. 转换本地图片
input_image_path = "7125e2e3.jpeg" # 替换为实际图片路径
base64_image = convert_image_to_webp_base64(input_image_path)
if not base64_image:
print("图片转换失败,请检查路径和格式")
exit()
# 2. 创建流式请求
response = client.chat.completions.create(
model="Qwen/Qwen2.5-VL-72B-Instruct",
messages=[
{
"role": "user",
"content": [
{
"type": "image_url",
"image_url": {
"url": f"data:image/webp;base64,{base64_image}", # 指定WebP格式
"detail": "high" # 平衡速度与精度
}
},
{
"type": "text",
"text": "使用ocr识别图片内容并输出" # 替换为你的提示词
}
]
}
],
stream=True,
max_tokens=1000 # 控制响应长度
)
# 3. 流式处理响应
print("模型响应:")
full_response = ""
for chunk in response:
if chunk.choices[0].delta.content:
text_chunk = chunk.choices[0].delta.content
print(text_chunk, end='', flush=True)
full_response += text_chunk
print("\n\n完整响应已接收")