现在的工作单位经常搞一些意义不明的绩效工作,每个月都搞来一万多张图片让我们挨个打开对应图片上的名字进行改名操作以方便公司领导进行检查和搜索调阅,图片上面的内容有数字和文字,数字没有特殊意义不做识别,文字有手写的和手机后加的水印字体两种,主要的需求就是想批量识别图片上的文字并同时进行批量改名,在数字化办公、档案管理、摄影工作流等场景中,经常需要对大量图片文件进行命名。例如:
- 教育领域:批量识别试卷、作业照片中的标题或学号
- 医疗领域:识别 X 光片、病理报告图片中的患者信息
- 电商行业:自动提取商品图片中的 SKU、型号等信息
- 档案管理:将纸质文档扫描件按内容命名归档
通过 OCR 技术自动识别图片文字并批量改名,可以极大提高工作效率,减少人工错误。
界面设计
基于 Qt 的界面设计可以包含以下核心组件:
- 文件选择区:包含 "添加图片" 和 "清空列表" 按钮,以及文件拖放功能
- 图片预览区:网格布局展示待处理图片缩略图,支持选中操作
- OCR 识别设置区:
- 腾讯云 API 密钥输入框(SecretId/SecretKey)
- 识别语言选择下拉框
- 识别模式选择(通用文字 / 表格 / 手写体等)
- 预览和操作区:
- 原文件名与识别结果对比列表
- 自定义命名规则设置(前缀、后缀、编号格式等)
- "预览重命名" 和 "执行重命名" 按钮
- 状态栏:显示处理进度、成功 / 失败数量和日志信息
实现方案
下面是基于 PyQt5 和腾讯云 OCR 的实现代码:
import sys
import os
import json
import uuid
from PyQt5.QtWidgets import (QApplication, QMainWindow, QPushButton, QVBoxLayout,
QHBoxLayout, QFileDialog, QLabel, QWidget, QListWidget,
QProgressBar, QTextEdit, QComboBox, QLineEdit, QGridLayout,
QGroupBox, QCheckBox, QMessageBox, QSplitter, QFormLayout)
from PyQt5.QtGui import QPixmap, QIcon, QFont
from PyQt5.QtCore import Qt, QThread, pyqtSignal, QSize, QUrl
from PIL import Image, ImageQt
from tencentcloud.common import credential
from tencentcloud.common.profile.client_profile import ClientProfile
from tencentcloud.common.profile.http_profile import HttpProfile
from tencentcloud.common.exception.tencent_cloud_sdk_exception import TencentCloudSDKException
from tencentcloud.ocr.v20181119 import ocr_client, models
class OCRThread(QThread):
"""OCR识别线程,避免阻塞UI"""
progress_updated = pyqtSignal(int)
ocr_completed = pyqtSignal(int, str, str)
ocr_failed = pyqtSignal(int, str, str)
all_finished = pyqtSignal()
def __init__(self, file_list, secret_id, secret_key, ocr_type="GeneralBasicOCR"):
super().__init__()
self.file_list = file_list
self.secret_id = secret_id
self.secret_key = secret_key
self.ocr_type = ocr_type
def run(self):
cred = credential.Credential(self.secret_id, self.secret_key)
httpProfile = HttpProfile()
httpProfile.endpoint = "ocr.tencentcloudapi.com"
clientProfile = ClientProfile()
clientProfile.httpProfile = httpProfile
client = ocr_client.OcrClient(cred, "ap-guangzhou", clientProfile)
total = len(self.file_list)
for i, (index, file_path) in enumerate(self.file_list):
try:
with open(file_path, "rb") as f:
image_data = f.read()
image_base64 = image_data.hex()
if self.ocr_type == "GeneralBasicOCR":
req = models.GeneralBasicOCRRequest()
elif self.ocr_type == "AccurateBasicOCR":
req = models.AccurateBasicOCRRequest()
elif self.ocr_type == "HandwritingOCR":
req = models.HandwritingOCRRequest()
else:
req = models.GeneralBasicOCRRequest()
params = {"ImageBase64": image_base64}
req.from_json_string(json.dumps(params))
resp = client.call(self.ocr_type, req)
result = json.loads(resp.to_json_string())
text = ""
for item in result.get("TextDetections", []):
text += item.get("DetectedText", "") + "\n"
self.ocr_completed.emit(index, file_path, text.strip())
except Exception as e:
self.ocr_failed.emit(index, file_path, str(e))
self.progress_updated.emit(int((i + 1) / total * 100))
self.all_finished.emit()
class RenameThread(QThread):
"""文件重命名线程"""
progress_updated = pyqtSignal(int)
rename_completed = pyqtSignal(str, str, bool, str)
all_finished = pyqtSignal()
def __init__(self, rename_list, dry_run=True):
super().__init__()
self.rename_list = rename_list
self.dry_run = dry_run
def run(self):
total = len(self.rename_list)
for i, (src, dst) in enumerate(self.rename_list):
success = False
error_msg = ""
try:
if not self.dry_run:
os.rename(src, dst)
success = True
except Exception as e:
error_msg = str(e)
self.rename_completed.emit(src, dst, success, error_msg)
self.progress_updated.emit(int((i + 1) / total * 100))
self.all_finished.emit()
class ImageWidget(QWidget):
"""图片预览小部件"""
selected = pyqtSignal(int, bool)
def __init__(self, index, file_path, parent=None):
super().__init__(parent)
self.index = index
self.file_path = file_path
self.is_selected = False
layout = QVBoxLayout(self)
layout.setContentsMargins(2, 2, 2, 2)
self.image_label = QLabel()
self.image_label.setAlignment(Qt.AlignCenter)
self.image_label.setMinimumSize(120, 120)
self.image_label.setMaximumSize(120, 120)
self.image_label.setStyleSheet("border: 1px solid #ccc;")
self.name_label = QLabel(os.path.basename(file_path))
self.name_label.setAlignment(Qt.AlignCenter)
self.name_label.setWordWrap(True)
self.name_label.setMaximumWidth(120)
layout.addWidget(self.image_label)
layout.addWidget(self.name_label)
self.load_image()
def load_image(self):
try:
pixmap = QPixmap(self.file_path)
if not pixmap.isNull():
scaled_pixmap = pixmap.scaled(
120, 120, Qt.KeepAspectRatio, Qt.SmoothTransformation)
self.image_label.setPixmap(scaled_pixmap)
except Exception as e:
self.image_label.setText("加载失败")
def mousePressEvent(self, event):
self.is_selected = not self.is_selected
self.update_style()
self.selected.emit(self.index, self.is_selected)
def update_style(self):
if self.is_selected:
self.setStyleSheet("background-color: #add8e6; border-radius: 4px;")
else:
self.setStyleSheet("background-color: transparent;")
class MainWindow(QMainWindow):
def __init__(self):
super().__init__()
self.setWindowTitle("图片OCR批量改名工具")
self.setMinimumSize(1024, 768)
self.image_files = [] # 存储图片文件路径
self.ocr_results = {} # 存储OCR识别结果 {index: text}
self.rename_rules = {
"prefix": "",
"suffix": "",
"use_ocr": True,
"add_number": False,
"number_start": 1,
"number_digits": 3
}
self.init_ui()
def init_ui(self):
# 创建主分割窗口
main_splitter = QSplitter(Qt.Vertical)
# 上部:文件选择和预览区域
top_widget = QWidget()
top_layout = QVBoxLayout(top_widget)
# 文件操作区
file_op_area = QWidget()
file_op_layout = QHBoxLayout(file_op_area)
self.add_files_btn = QPushButton("添加图片")
self.add_files_btn.clicked.connect(self.add_files)
self.clear_files_btn = QPushButton("清空列表")
self.clear_files_btn.clicked.connect(self.clear_files)
self.file_count_label = QLabel("已选择: 0 张图片")
file_op_layout.addWidget(self.add_files_btn)
file_op_layout.addWidget(self.clear_files_btn)
file_op_layout.addStretch()
file_op_layout.addWidget(self.file_count_label)
# 图片预览区
self.preview_area = QWidget()
self.preview_layout = QGridLayout(self.preview_area)
self.preview_layout.setSpacing(5)
self.preview_layout.setAlignment(Qt.AlignLeft | Qt.AlignTop)
# 滚动区域
scroll_area = QWidget()
scroll_layout = QVBoxLayout(scroll_area)
scroll_layout.addWidget(self.preview_area)
scroll_layout.addStretch()
scroll = QWidget()
scroll.setMaximumHeight(250)
scroll_layout = QVBoxLayout(scroll)
scroll_layout.addWidget(file_op_area)
scroll_area = QWidget()
scroll_layout = QVBoxLayout(scroll_area)
scroll_layout.addWidget(self.preview_area)
scroll_layout.addStretch()
scroll_area = QWidget()
scroll_layout = QVBoxLayout(scroll_area)
scroll_layout.addWidget(self.preview_area)
scroll_layout.addStretch()
scroll = QWidget()
scroll.setMaximumHeight(250)
scroll_layout = QVBoxLayout(scroll)
scroll_layout.addWidget(file_op_area)
scroll_area = QScrollArea()
scroll_area.setWidgetResizable(True)
scroll_area.setWidget(scroll_area)
# 中部:OCR设置区域
ocr_settings = QGroupBox("OCR识别设置")
ocr_layout = QFormLayout()
self.secret_id_edit = QLineEdit()
self.secret_id_edit.setPlaceholderText("输入腾讯云SecretId")
self.secret_key_edit = QLineEdit()
self.secret_key_edit.setPlaceholderText("输入腾讯云SecretKey")
self.secret_key_edit.setEchoMode(QLineEdit.Password)
self.ocr_type_combo = QComboBox()
self.ocr_type_combo.addItems([
"通用文字识别",
"高精度文字识别",
"手写文字识别",
"数字识别",
"身份证识别"
])
self.ocr_type_mapping = {
"通用文字识别": "GeneralBasicOCR",
"高精度文字识别": "AccurateBasicOCR",
"手写文字识别": "HandwritingOCR",
"数字识别": "NumbersOCR",
"身份证识别": "IDCardOCR"
}
self.ocr_btn = QPushButton("开始识别")
self.ocr_btn.clicked.connect(self.start_ocr)
ocr_layout.addRow("SecretId:", self.secret_id_edit)
ocr_layout.addRow("SecretKey:", self.secret_key_edit)
ocr_layout.addRow("识别类型:", self.ocr_type_combo)
ocr_layout.addRow(self.ocr_btn)
ocr_settings.setLayout(ocr_layout)
# 下部:重命名设置和预览区域
rename_area = QWidget()
rename_layout = QVBoxLayout(rename_area)
# 重命名规则设置
rules_group = QGroupBox("重命名规则")
rules_layout = QFormLayout()
self.prefix_edit = QLineEdit()
self.prefix_edit.textChanged.connect(self.update_rename_preview)
self.suffix_edit = QLineEdit()
self.suffix_edit.textChanged.connect(self.update_rename_preview)
self.use_ocr_check = QCheckBox("使用OCR识别结果")
self.use_ocr_check.setChecked(True)
self.use_ocr_check.stateChanged.connect(self.update_rename_preview)
self.add_number_check = QCheckBox("添加序号")
self.add_number_check.stateChanged.connect(self.update_number_options)
self.add_number_check.stateChanged.connect(self.update_rename_preview)
self.number_start_edit = QLineEdit("1")
self.number_start_edit.setMaximumWidth(60)
self.number_start_edit.textChanged.connect(self.update_rename_preview)
self.number_digits_edit = QLineEdit("3")
self.number_digits_edit.setMaximumWidth(60)
self.number_digits_edit.textChanged.connect(self.update_rename_preview)
number_layout = QHBoxLayout()
number_layout.addWidget(QLabel("起始序号:"))
number_layout.addWidget(self.number_start_edit)
number_layout.addWidget(QLabel("位数:"))
number_layout.addWidget(self.number_digits_edit)
number_layout.addStretch()
rules_layout.addRow("前缀:", self.prefix_edit)
rules_layout.addRow("后缀:", self.suffix_edit)
rules_layout.addRow(self.use_ocr_check)
rules_layout.addRow(self.add_number_check)
rules_layout.addRow(number_layout)
rules_group.setLayout(rules_layout)
# 重命名预览
preview_group = QGroupBox("重命名预览")
preview_layout = QVBoxLayout()
self.preview_list = QListWidget()
self.preview_list.setAlternatingRowColors(True)
self.dry_run_btn = QPushButton("预览重命名")
self.dry_run_btn.clicked.connect(self.preview_rename)
self.rename_btn = QPushButton("执行重命名")
self.rename_btn.clicked.connect(self.execute_rename)
self.rename_btn.setEnabled(False)
btn_layout = QHBoxLayout()
btn_layout.addWidget(self.dry_run_btn)
btn_layout.addWidget(self.rename_btn)
preview_layout.addWidget(self.preview_list)
preview_layout.addLayout(btn_layout)
preview_group.setLayout(preview_layout)
rename_layout.addWidget(rules_group)
rename_layout.addWidget(preview_group)
# 状态栏
self.statusBar().showMessage("就绪")
self.progress_bar = QProgressBar()
self.progress_bar.setMaximumWidth(200)
self.statusBar().addPermanentWidget(self.progress_bar)
# 组装界面
main_splitter.addWidget(scroll)
main_splitter.addWidget(ocr_settings)
main_splitter.addWidget(rename_area)
self.setCentralWidget(main_splitter)
# 设置分割器初始大小
main_splitter.setSizes([250, 150, 350])
def add_files(self):
files, _ = QFileDialog.getOpenFileNames(
self, "选择图片文件", "", "图片文件 (*.png *.jpg *.jpeg *.bmp *.gif);;所有文件 (*)"
)
if files:
start_index = len(self.image_files)
for i, file_path in enumerate(files):
if file_path not in [f[1] for f in self.image_files]:
self.image_files.append((start_index + i, file_path))
image_widget = ImageWidget(start_index + i, file_path)
image_widget.selected.connect(self.image_selected)
row = (start_index + i) // 5
col = (start_index + i) % 5
self.preview_layout.addWidget(image_widget, row, col)
self.file_count_label.setText(f"已选择: {len(self.image_files)} 张图片")
self.statusBar().showMessage(f"已添加 {len(files)} 张图片")
def clear_files(self):
self.image_files = []
self.ocr_results = {}
# 清空预览区
while self.preview_layout.count():
item = self.preview_layout.takeAt(0)
widget = item.widget()
if widget:
widget.deleteLater()
self.file_count_label.setText(f"已选择: {len(self.image_files)} 张图片")
self.statusBar().showMessage("已清空图片列表")
self.preview_list.clear()
self.rename_btn.setEnabled(False)
def image_selected(self, index, selected):
# 这里可以实现多选功能
pass
def update_number_options(self, state):
enabled = state == Qt.Checked
self.number_start_edit.setEnabled(enabled)
self.number_digits_edit.setEnabled(enabled)
def start_ocr(self):
if not self.image_files:
QMessageBox.warning(self, "警告", "请先添加图片文件")
return
secret_id = self.secret_id_edit.text().strip()
secret_key = self.secret_key_edit.text().strip()
if not secret_id or not secret_key:
QMessageBox.warning(self, "警告", "请输入腾讯云API密钥")
return
ocr_type_text = self.ocr_type_combo.currentText()
ocr_type = self.ocr_type_mapping.get(ocr_type_text, "GeneralBasicOCR")
self.statusBar().showMessage("正在进行OCR识别...")
self.progress_bar.setValue(0)
self.ocr_btn.setEnabled(False)
self.rename_btn.setEnabled(False)
self.ocr_thread = OCRThread(self.image_files, secret_id, secret_key, ocr_type)
self.ocr_thread.progress_updated.connect(self.update_progress)
self.ocr_thread.ocr_completed.connect(self.ocr_result_ready)
self.ocr_thread.ocr_failed.connect(self.ocr_result_failed)
self.ocr_thread.all_finished.connect(self.ocr_all_finished)
self.ocr_thread.start()
def update_progress(self, value):
self.progress_bar.setValue(value)
def ocr_result_ready(self, index, file_path, text):
self.ocr_results[index] = text
file_name = os.path.basename(file_path)
self.statusBar().showMessage(f"已识别: {file_name}")
def ocr_result_failed(self, index, file_path, error):
file_name = os.path.basename(file_path)
self.statusBar().showMessage(f"识别失败: {file_name} - {error}")
def ocr_all_finished(self):
self.ocr_btn.setEnabled(True)
self.statusBar().showMessage(f"OCR识别完成,共识别 {len(self.ocr_results)} 张图片")
self.update_rename_preview()
def generate_new_name(self, index, original_name):
prefix = self.prefix_edit.text()
suffix = self.suffix_edit.text()
use_ocr = self.use_ocr_check.isChecked()
add_number = self.add_number_check.isChecked()
base_name, ext = os.path.splitext(original_name)
new_name = ""
# 添加前缀
if prefix:
new_name += prefix
# 添加OCR识别结果
if use_ocr and index in self.ocr_results:
# 提取前20个非空字符作为文件名
ocr_text = self.ocr_results[index].replace("\n", " ").strip()
valid_chars = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_"
safe_text = ''.join(c for c in ocr_text if c in valid_chars)
if safe_text:
new_name += safe_text[:20] # 限制最大长度
# 添加序号
if add_number:
try:
start = int(self.number_start_edit.text())
digits = int(self.number_digits_edit.text())
num_str = f"{start + index:0{digits}d}"
new_name += num_str
except ValueError:
new_name += f"_{index}"
# 添加后缀
if suffix:
new_name += suffix
# 如果生成的文件名是空的,使用原始文件名
if not new_name:
new_name = base_name
return new_name + ext
def update_rename_preview(self):
self.preview_list.clear()
if not self.image_files or not self.ocr_results:
return
self.rename_list = []
for index, file_path in self.image_files:
if index in self.ocr_results:
original_name = os.path.basename(file_path)
new_name = self.generate_new_name(index, original_name)
self.rename_list.append((file_path, os.path.join(
os.path.dirname(file_path), new_name)))
item_text = f"{original_name} → {new_name}"
self.preview_list.addItem(item_text)
def preview_rename(self):
if not self.rename_list:
QMessageBox.warning(self, "警告", "没有可预览的重命名文件")
return
self.rename_btn.setEnabled(True)
QMessageBox.information(self, "预览完成",
f"已预览 {len(self.rename_list)} 个文件的重命名操作")
def execute_rename(self):
if not self.rename_list:
QMessageBox.warning(self, "警告", "没有可执行的重命名文件")
return
reply = QMessageBox.question(self, "确认重命名",
f"确定要对 {len(self.rename_list)} 个文件执行重命名操作吗?\n此操作不可撤销!",
QMessageBox.Yes | QMessageBox.No, QMessageBox.No)
if reply == QMessageBox.Yes:
self.statusBar().showMessage("正在执行重命名...")
self.progress_bar.setValue(0)
self.rename_btn.setEnabled(False)
self.rename_thread = RenameThread(self.rename_list, dry_run=False)
self.rename_thread.progress_updated.connect(self.update_progress)
self.rename_thread.rename_completed.connect(self.rename_result_ready)
self.rename_thread.all_finished.connect(self.rename_all_finished)
self.rename_thread.start()
def rename_result_ready(self, src, dst, success, error_msg):
if success:
self.statusBar().showMessage(f"已重命名: {os.path.basename(src)}")
else:
self.statusBar().showMessage(f"重命名失败: {os.path.basename(src)} - {error_msg}")
def rename_all_finished(self):
self.rename_btn.setEnabled(False)
self.statusBar().showMessage(f"重命名完成,共处理 {len(self.rename_list)} 个文件")
# 更新文件列表
new_image_files = []
for (index, old_path), (_, new_path) in zip(self.image_files, self.rename_list):
new_image_files.append((index, new_path))
self.image_files = new_image_files
self.update_rename_preview()
if __name__ == "__main__":
# 确保中文显示正常
font = QFont("SimHei")
app = QApplication(sys.argv)
app.setFont(font)
window = MainWindow()
window.show()
sys.exit(app.exec_())
代码说明
上述代码实现了一个完整的图片 OCR 批量改名工具,主要包含以下模块:
-
OCR 处理模块:
- 使用腾讯云 OCR SDK 实现文字识别
- 支持多种 OCR 类型(通用、高精度、手写体等)
- 单独线程处理 OCR 请求,避免 UI 卡顿
-
文件管理模块:
- 支持文件选择和拖放
- 图片预览网格布局
- 文件批量重命名功能
-
UI 界面模块:
- 采用 PyQt5 构建现代界面
- 多区域布局设计,功能分区明确
- 重命名规则自定义设置
-
多线程处理:
- OCR 识别线程
- 文件重命名线程
- 进度显示和状态更新
优化建议
-
性能优化:
- 实现 OCR 结果缓存机制,避免重复识别
- 支持多线程并行 OCR 处理(需注意 API 调用频率限制)
- 大图片预处理(缩放、压缩)减少 OCR 处理时间
-
功能增强:
- 添加文件过滤和排序功能
- 支持正则表达式替换
- 添加预览图片放大查看功能
- 增加更多 OCR 识别选项(如表格识别、车牌识别等)
-
用户体验:
- 实现撤销 / 重做功能
- 添加配置保存和加载功能
- 支持拖放调整文件处理顺序
- 增强错误处理和提示信息
-
安全性:
- 实现 API 密钥加密存储
- 添加操作确认和恢复机制
- 实现文件备份功能,防止误操作
这个工具可以显著提高需要处理大量图片命名工作的效率,特别是在需要基于图片内容进行命名的场景中。通过合理的优化,可以进一步提升其性能和用户体验。