|
|
|
|
@ -1,17 +1,45 @@
|
|
|
|
|
# 依赖目录pip install python-pptx Pillow imagehash
|
|
|
|
|
from pptx import Presentation
|
|
|
|
|
from pptx.util import Inches
|
|
|
|
|
from pptx.dml.color import RGBColor
|
|
|
|
|
from PIL import Image
|
|
|
|
|
import imagehash
|
|
|
|
|
import os
|
|
|
|
|
from config import Config
|
|
|
|
|
|
|
|
|
|
def is_similar_image(image_path, reference_image_path, similarity_threshold=0.7):
|
|
|
|
|
def get_copyright_image_paths():
|
|
|
|
|
# 构建 assets/copyright-img 的相对路径
|
|
|
|
|
copyright_dir = os.path.join(
|
|
|
|
|
os.path.dirname(__file__), "..", "assets", "copyright-img"
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
# 获取所有图片文件的路径
|
|
|
|
|
copyright_image_paths = []
|
|
|
|
|
for filename in os.listdir(copyright_dir):
|
|
|
|
|
if filename.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.bmp')): # 支持的图片格式
|
|
|
|
|
copyright_image_paths.append(os.path.join(copyright_dir, filename))
|
|
|
|
|
|
|
|
|
|
return copyright_image_paths
|
|
|
|
|
|
|
|
|
|
def convert_image_to_rgba(image):
|
|
|
|
|
"""
|
|
|
|
|
将图像转换为 RGBA 格式以处理透明度
|
|
|
|
|
"""
|
|
|
|
|
if image.mode == 'P':
|
|
|
|
|
image = image.convert('RGBA')
|
|
|
|
|
return image
|
|
|
|
|
|
|
|
|
|
def is_similar_image(image_path, reference_image_path, similarity_threshold=0.95):
|
|
|
|
|
"""
|
|
|
|
|
检查两张图片之间的相似度
|
|
|
|
|
"""
|
|
|
|
|
img1 = Image.open(image_path)
|
|
|
|
|
img2 = Image.open(reference_image_path)
|
|
|
|
|
|
|
|
|
|
# 转换图像为 RGBA 格式
|
|
|
|
|
img1 = convert_image_to_rgba(img1)
|
|
|
|
|
img2 = convert_image_to_rgba(img2)
|
|
|
|
|
|
|
|
|
|
# 计算哈希值
|
|
|
|
|
hash1 = imagehash.average_hash(img1)
|
|
|
|
|
hash2 = imagehash.average_hash(img2)
|
|
|
|
|
@ -26,6 +54,11 @@ def remove_copyright_images(pptx_path, copyright_image_paths):
|
|
|
|
|
|
|
|
|
|
# 使用一个列表来记录要删除的幻灯片索引
|
|
|
|
|
slides_to_delete = []
|
|
|
|
|
change_text = False
|
|
|
|
|
# 定义要替换的关键字数组
|
|
|
|
|
replace_text_keywords = ["第一PPT"]
|
|
|
|
|
# 定义要删除的关键字符数组
|
|
|
|
|
del_text_keywords = ["Speaker name and title", "OfficePLUS", "Presenter name", "www.officeplus.cn"]
|
|
|
|
|
|
|
|
|
|
# 遍历每一页幻灯片
|
|
|
|
|
for index, slide in enumerate(prs.slides):
|
|
|
|
|
@ -41,7 +74,7 @@ def remove_copyright_images(pptx_path, copyright_image_paths):
|
|
|
|
|
# 检查相似度
|
|
|
|
|
for copyright_image in copyright_image_paths:
|
|
|
|
|
if is_similar_image(temp_image_path, copyright_image):
|
|
|
|
|
print(f"有相似的图片,在第{index + 1}页")
|
|
|
|
|
print(f"'{os.path.basename(pptx_path)}'有相似的图片'{os.path.basename(copyright_image)}',在第{index + 1}页")
|
|
|
|
|
# 删除该形状
|
|
|
|
|
slide.shapes._spTree.remove(shape._element)
|
|
|
|
|
|
|
|
|
|
@ -51,20 +84,69 @@ def remove_copyright_images(pptx_path, copyright_image_paths):
|
|
|
|
|
# 删除临时图片
|
|
|
|
|
os.remove(temp_image_path)
|
|
|
|
|
|
|
|
|
|
# 检查形状是否为文本框
|
|
|
|
|
if shape.has_text_frame:
|
|
|
|
|
text_content = shape.text
|
|
|
|
|
|
|
|
|
|
# 检查文本内容是否包含要删除的关键字符数组中的任意字符
|
|
|
|
|
if any(char in text_content for char in del_text_keywords):
|
|
|
|
|
if not change_text:
|
|
|
|
|
change_text = True
|
|
|
|
|
print(f"'{os.path.basename(pptx_path)}'在第 {index + 1} 页删除文本框,内容为: '{text_content}'")
|
|
|
|
|
# 删除文本框
|
|
|
|
|
slide.shapes._spTree.remove(shape._element)
|
|
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
# 判断并替换文本
|
|
|
|
|
for keyword in replace_text_keywords:
|
|
|
|
|
if keyword in text_content:
|
|
|
|
|
if not change_text:
|
|
|
|
|
change_text = True
|
|
|
|
|
print(f"'{os.path.basename(pptx_path)}'在第 {index + 1} 页替换文本 '{text_content}' 为 '创意素材'")
|
|
|
|
|
|
|
|
|
|
# 记录原字体样式
|
|
|
|
|
original_font = shape.text_frame.paragraphs[0].runs[0].font
|
|
|
|
|
|
|
|
|
|
new_text = text_content.replace(keyword, "创意素材")
|
|
|
|
|
shape.text = new_text
|
|
|
|
|
|
|
|
|
|
# 应用原字体样式
|
|
|
|
|
for paragraph in shape.text_frame.paragraphs:
|
|
|
|
|
for run in paragraph.runs:
|
|
|
|
|
run.font.bold = original_font.bold
|
|
|
|
|
run.font.italic = original_font.italic
|
|
|
|
|
run.font.underline = original_font.underline
|
|
|
|
|
run.font.size = original_font.size
|
|
|
|
|
|
|
|
|
|
# 处理颜色
|
|
|
|
|
if isinstance(original_font.color, RGBColor):
|
|
|
|
|
run.font.color.rgb = original_font.color.rgb
|
|
|
|
|
elif hasattr(original_font.color, 'theme_color'):
|
|
|
|
|
run.font.color.rgb = RGBColor(255, 255, 255)
|
|
|
|
|
else:
|
|
|
|
|
# 处理未识别的颜色类型
|
|
|
|
|
run.font.color.rgb = RGBColor(255, 255, 255)
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
# 删除记录中的幻灯片
|
|
|
|
|
for index in slides_to_delete:
|
|
|
|
|
slides = list(prs.slides._sldIdLst)
|
|
|
|
|
current_slide_to_delete = slides[index]
|
|
|
|
|
prs.slides._sldIdLst.remove(current_slide_to_delete)
|
|
|
|
|
|
|
|
|
|
if(len(slides_to_delete) > 0):
|
|
|
|
|
if(len(slides_to_delete) > 0 or change_text):
|
|
|
|
|
# 保存修改后的 PowerPoint 文件
|
|
|
|
|
prs.save(pptx_path)
|
|
|
|
|
else:
|
|
|
|
|
print(f"{pptx_path}没有版权信息,不需要删除")
|
|
|
|
|
print(f"{pptx_path}没有版权信息或者版权文案,不需要删除替换")
|
|
|
|
|
|
|
|
|
|
# 使用示例
|
|
|
|
|
pptx_file_path = '/Users/minya/Desktop/test/1.pptx' # 输入 PowerPoint 文件路径
|
|
|
|
|
copyright_image_paths = ['/Users/minya/Desktop/test/1.png'] # 版权图片路径列表
|
|
|
|
|
def process_ppt_files(directory):
|
|
|
|
|
copyright_image_paths = get_copyright_image_paths() # 获取版权图片路径列表
|
|
|
|
|
|
|
|
|
|
# 遍历指定目录下的所有文件
|
|
|
|
|
for filename in os.listdir(directory):
|
|
|
|
|
if filename.lower().endswith(('.pptx', '.ppt')): # 只处理 pptx 和 ppt 文件
|
|
|
|
|
pptx_file_path = os.path.join(directory, filename)
|
|
|
|
|
remove_copyright_images(pptx_file_path, copyright_image_paths)
|
|
|
|
|
|
|
|
|
|
process_ppt_files(Config.get_latest_folder(Config.WORK_PATH))
|