|
|
# 依赖目录pip install python-pptx Pillow imagehash
|
|
|
from pptx import Presentation
|
|
|
from pptx.util import Inches
|
|
|
from pptx.dml.color import RGBColor
|
|
|
from PIL import Image
|
|
|
import imagehash
|
|
|
import os
|
|
|
from config import Config
|
|
|
from pptx.opc.constants import RELATIONSHIP_TYPE as RT
|
|
|
from pptx.oxml import parse_xml
|
|
|
|
|
|
def get_copyright_image_paths():
|
|
|
# 构建 assets/copyright-img 的相对路径
|
|
|
copyright_dir = os.path.join(
|
|
|
os.path.dirname(__file__), "..", "assets", "copyright-img"
|
|
|
)
|
|
|
|
|
|
# 获取所有图片文件的路径
|
|
|
copyright_image_paths = []
|
|
|
for filename in os.listdir(copyright_dir):
|
|
|
if filename.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.bmp')): # 支持的图片格式
|
|
|
copyright_image_paths.append(os.path.join(copyright_dir, filename))
|
|
|
|
|
|
return copyright_image_paths
|
|
|
|
|
|
def convert_image_to_rgba(image):
|
|
|
"""
|
|
|
将图像转换为 RGBA 格式以处理透明度
|
|
|
"""
|
|
|
if image.mode == 'P':
|
|
|
image = image.convert('RGBA')
|
|
|
return image
|
|
|
|
|
|
def is_similar_image(image_path, reference_image_path, similarity_threshold=0.95):
|
|
|
"""
|
|
|
检查两张图片之间的相似度
|
|
|
"""
|
|
|
img1 = Image.open(image_path)
|
|
|
img2 = Image.open(reference_image_path)
|
|
|
|
|
|
# 转换图像为 RGBA 格式
|
|
|
img1 = convert_image_to_rgba(img1)
|
|
|
img2 = convert_image_to_rgba(img2)
|
|
|
|
|
|
# 计算哈希值
|
|
|
hash1 = imagehash.average_hash(img1)
|
|
|
hash2 = imagehash.average_hash(img2)
|
|
|
|
|
|
# 计算相似度(1 - 汉明距离 / 哈希位数)
|
|
|
similarity = 1 - (hash1 - hash2) / len(hash1.hash) ** 2
|
|
|
return similarity >= similarity_threshold
|
|
|
|
|
|
def remove_copyright_images(pptx_path, copyright_image_paths):
|
|
|
# 打开 PowerPoint 文件
|
|
|
prs = Presentation(pptx_path)
|
|
|
|
|
|
# 使用一个列表来记录要删除的幻灯片索引
|
|
|
slides_to_delete = []
|
|
|
change_text = False
|
|
|
# 定义要替换的关键字数组
|
|
|
replace_text_keywords = ["第一PPT"]
|
|
|
# 定义要删除的关键字符数组(只删除文本框)
|
|
|
del_text_keywords = [
|
|
|
"Speaker name and title",
|
|
|
"OfficePLUS", "Presenter name",
|
|
|
"www.officeplus.cn",
|
|
|
"第一PPT模板网-WWW.1PPT.COM",
|
|
|
"标题字体来源于字魂网,未经授权不可商用",
|
|
|
"标题字体来源字魂网,未经授权不可商用",
|
|
|
"以上字体来源字魂网,未经授权不得商用",
|
|
|
"标题字来源于字魂网,未经授权不可商用",
|
|
|
"以上书法字体来源字魂网,未经授权不得商用",
|
|
|
]
|
|
|
# 定义要删除的关键字符数组(删除整页幻灯片)
|
|
|
del_text_keywords_slide = ["模板中使用的字体为开源字体","一站式办公内容服务平台", "欢迎关注OfficePLUS官方渠道"]
|
|
|
|
|
|
# 遍历所有幻灯片母版
|
|
|
# for sld_mstr in prs.slide_masters:
|
|
|
# theme_part = sld_mstr.part.part_related_by(RT.THEME)
|
|
|
# theme = parse_xml(theme_part.blob)
|
|
|
# sld_mstr.name = theme.get("name")
|
|
|
# print(sld_mstr.name)
|
|
|
|
|
|
# 遍历每一页幻灯片
|
|
|
for index, slide in enumerate(prs.slides):
|
|
|
# 遍历幻灯片中的每个形状
|
|
|
for shape in slide.shapes:
|
|
|
|
|
|
# 检查形状是否为图片
|
|
|
if shape.shape_type == 13: # 13 表示图片
|
|
|
# 临时保存图片
|
|
|
temp_image_path = 'temp_image.png'
|
|
|
with open(temp_image_path, 'wb') as f:
|
|
|
f.write(shape.image.blob)
|
|
|
|
|
|
# 检查相似度
|
|
|
for copyright_image in copyright_image_paths:
|
|
|
if is_similar_image(temp_image_path, copyright_image):
|
|
|
print(f"'{os.path.basename(pptx_path)}'有相似的图片'{os.path.basename(copyright_image)}',在第{index + 1}页")
|
|
|
# 删除该形状
|
|
|
slide.shapes._spTree.remove(shape._element)
|
|
|
|
|
|
slides_to_delete.append(index) # 记录要删除的幻灯片
|
|
|
break # 找到相似图片后可以跳出循环
|
|
|
|
|
|
# 删除临时图片
|
|
|
os.remove(temp_image_path)
|
|
|
|
|
|
# 检查形状是否为文本框
|
|
|
if shape.has_text_frame:
|
|
|
text_content = shape.text
|
|
|
|
|
|
# 检查文本内容是否包含要删除的关键字符数组中的任意字符
|
|
|
if any(char in text_content for char in del_text_keywords_slide):
|
|
|
print(f"'{os.path.basename(pptx_path)}'在第 {index + 1} 页需要删除,内容为: '{text_content}'")
|
|
|
slides_to_delete.append(index)
|
|
|
break
|
|
|
|
|
|
# 检查文本内容是否包含要删除的关键字符数组中的任意字符
|
|
|
if any(char in text_content for char in del_text_keywords):
|
|
|
if not change_text:
|
|
|
change_text = True
|
|
|
print(f"'{os.path.basename(pptx_path)}'在第 {index + 1} 页删除文本框,内容为: '{text_content}'")
|
|
|
# 删除文本框
|
|
|
slide.shapes._spTree.remove(shape._element)
|
|
|
|
|
|
"""
|
|
|
# 判断并替换文本
|
|
|
for keyword in replace_text_keywords:
|
|
|
if keyword in text_content:
|
|
|
if not change_text:
|
|
|
change_text = True
|
|
|
print(f"'{os.path.basename(pptx_path)}'在第 {index + 1} 页替换文本 '{text_content}' 为 '创意素材'")
|
|
|
|
|
|
# 记录原字体样式
|
|
|
original_font = shape.text_frame.paragraphs[0].runs[0].font
|
|
|
|
|
|
new_text = text_content.replace(keyword, "创意素材")
|
|
|
shape.text = new_text
|
|
|
|
|
|
# 应用原字体样式
|
|
|
for paragraph in shape.text_frame.paragraphs:
|
|
|
for run in paragraph.runs:
|
|
|
run.font.bold = original_font.bold
|
|
|
run.font.italic = original_font.italic
|
|
|
run.font.underline = original_font.underline
|
|
|
run.font.size = original_font.size
|
|
|
|
|
|
# 处理颜色
|
|
|
if isinstance(original_font.color, RGBColor):
|
|
|
run.font.color.rgb = original_font.color.rgb
|
|
|
elif hasattr(original_font.color, 'theme_color'):
|
|
|
run.font.color.rgb = RGBColor(255, 255, 255)
|
|
|
else:
|
|
|
# 处理未识别的颜色类型
|
|
|
run.font.color.rgb = RGBColor(255, 255, 255)
|
|
|
"""
|
|
|
|
|
|
# 删除记录中的幻灯片
|
|
|
for index in slides_to_delete:
|
|
|
slides = list(prs.slides._sldIdLst)
|
|
|
current_slide_to_delete = slides[index]
|
|
|
prs.slides._sldIdLst.remove(current_slide_to_delete)
|
|
|
|
|
|
if(len(slides_to_delete) > 0 or change_text):
|
|
|
# 保存修改后的 PowerPoint 文件
|
|
|
prs.save(pptx_path)
|
|
|
else:
|
|
|
print(f"{pptx_path}没有版权信息或者版权文案,不需要删除替换")
|
|
|
|
|
|
def process_ppt_files(directory):
|
|
|
copyright_image_paths = get_copyright_image_paths() # 获取版权图片路径列表
|
|
|
|
|
|
# 遍历指定目录下的所有文件
|
|
|
for filename in os.listdir(directory):
|
|
|
# 过滤掉以 ~$ 开头的文件
|
|
|
if filename.startswith('~$'):
|
|
|
continue
|
|
|
|
|
|
if filename.lower().endswith(('.pptx', '.ppt')): # 只处理 pptx 和 ppt 文件
|
|
|
pptx_file_path = os.path.join(directory, filename)
|
|
|
remove_copyright_images(pptx_file_path, copyright_image_paths)
|
|
|
|
|
|
process_ppt_files(Config.get_latest_folder(Config.WORK_PATH)) |