|
|
|
@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
# 依赖目录pip install python-pptx Pillow imagehash
|
|
|
|
|
|
|
|
from pptx import Presentation
|
|
|
|
|
|
|
|
from pptx.util import Inches
|
|
|
|
|
|
|
|
from PIL import Image
|
|
|
|
|
|
|
|
import imagehash
|
|
|
|
|
|
|
|
import os
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def is_similar_image(image_path, reference_image_path, similarity_threshold=0.7):
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
检查两张图片之间的相似度
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
img1 = Image.open(image_path)
|
|
|
|
|
|
|
|
img2 = Image.open(reference_image_path)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# 计算哈希值
|
|
|
|
|
|
|
|
hash1 = imagehash.average_hash(img1)
|
|
|
|
|
|
|
|
hash2 = imagehash.average_hash(img2)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# 计算相似度(1 - 汉明距离 / 哈希位数)
|
|
|
|
|
|
|
|
similarity = 1 - (hash1 - hash2) / len(hash1.hash) ** 2
|
|
|
|
|
|
|
|
return similarity >= similarity_threshold
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def remove_copyright_images(pptx_path, copyright_image_paths):
|
|
|
|
|
|
|
|
# 打开 PowerPoint 文件
|
|
|
|
|
|
|
|
prs = Presentation(pptx_path)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# 使用一个列表来记录要删除的幻灯片索引
|
|
|
|
|
|
|
|
slides_to_delete = []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# 遍历每一页幻灯片
|
|
|
|
|
|
|
|
for index, slide in enumerate(prs.slides):
|
|
|
|
|
|
|
|
# 遍历幻灯片中的每个形状
|
|
|
|
|
|
|
|
for shape in slide.shapes:
|
|
|
|
|
|
|
|
# 检查形状是否为图片
|
|
|
|
|
|
|
|
if shape.shape_type == 13: # 13 表示图片
|
|
|
|
|
|
|
|
# 临时保存图片
|
|
|
|
|
|
|
|
temp_image_path = 'temp_image.png'
|
|
|
|
|
|
|
|
with open(temp_image_path, 'wb') as f:
|
|
|
|
|
|
|
|
f.write(shape.image.blob)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# 检查相似度
|
|
|
|
|
|
|
|
for copyright_image in copyright_image_paths:
|
|
|
|
|
|
|
|
if is_similar_image(temp_image_path, copyright_image):
|
|
|
|
|
|
|
|
print(f"有相似的图片,在第{index + 1}页")
|
|
|
|
|
|
|
|
# 删除该形状
|
|
|
|
|
|
|
|
slide.shapes._spTree.remove(shape._element)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
slides_to_delete.append(index) # 记录要删除的幻灯片
|
|
|
|
|
|
|
|
break # 找到相似图片后可以跳出循环
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# 删除临时图片
|
|
|
|
|
|
|
|
os.remove(temp_image_path)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# 删除记录中的幻灯片
|
|
|
|
|
|
|
|
for index in slides_to_delete:
|
|
|
|
|
|
|
|
slides = list(prs.slides._sldIdLst)
|
|
|
|
|
|
|
|
current_slide_to_delete = slides[index]
|
|
|
|
|
|
|
|
prs.slides._sldIdLst.remove(current_slide_to_delete)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if(len(slides_to_delete) > 0):
|
|
|
|
|
|
|
|
# 保存修改后的 PowerPoint 文件
|
|
|
|
|
|
|
|
prs.save(pptx_path)
|
|
|
|
|
|
|
|
else:
|
|
|
|
|
|
|
|
print(f"{pptx_path}没有版权信息,不需要删除")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# 使用示例
|
|
|
|
|
|
|
|
pptx_file_path = '/Users/minya/Desktop/test/1.pptx' # 输入 PowerPoint 文件路径
|
|
|
|
|
|
|
|
copyright_image_paths = ['/Users/minya/Desktop/test/1.png'] # 版权图片路径列表
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
remove_copyright_images(pptx_file_path, copyright_image_paths)
|