# 依赖目录pip install python-pptx Pillow imagehash from pptx import Presentation from pptx.util import Inches from pptx.dml.color import RGBColor from PIL import Image import imagehash import os from config import Config def get_copyright_image_paths(): # 构建 assets/copyright-img 的相对路径 copyright_dir = os.path.join( os.path.dirname(__file__), "..", "assets", "copyright-img" ) # 获取所有图片文件的路径 copyright_image_paths = [] for filename in os.listdir(copyright_dir): if filename.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.bmp')): # 支持的图片格式 copyright_image_paths.append(os.path.join(copyright_dir, filename)) return copyright_image_paths def convert_image_to_rgba(image): """ 将图像转换为 RGBA 格式以处理透明度 """ if image.mode == 'P': image = image.convert('RGBA') return image def is_similar_image(image_path, reference_image_path, similarity_threshold=0.95): """ 检查两张图片之间的相似度 """ img1 = Image.open(image_path) img2 = Image.open(reference_image_path) # 转换图像为 RGBA 格式 img1 = convert_image_to_rgba(img1) img2 = convert_image_to_rgba(img2) # 计算哈希值 hash1 = imagehash.average_hash(img1) hash2 = imagehash.average_hash(img2) # 计算相似度(1 - 汉明距离 / 哈希位数) similarity = 1 - (hash1 - hash2) / len(hash1.hash) ** 2 return similarity >= similarity_threshold def remove_copyright_images(pptx_path, copyright_image_paths): # 打开 PowerPoint 文件 prs = Presentation(pptx_path) # 使用一个列表来记录要删除的幻灯片索引 slides_to_delete = [] change_text = False # 定义要替换的关键字数组 replace_text_keywords = ["第一PPT"] # 定义要删除的关键字符数组 del_text_keywords = ["Speaker name and title", "OfficePLUS", "Presenter name", "www.officeplus.cn", "第一PPT模板网-WWW.1PPT.COM"] # 遍历每一页幻灯片 for index, slide in enumerate(prs.slides): # 遍历幻灯片中的每个形状 for shape in slide.shapes: # 检查形状是否为图片 if shape.shape_type == 13: # 13 表示图片 # 临时保存图片 temp_image_path = 'temp_image.png' with open(temp_image_path, 'wb') as f: f.write(shape.image.blob) # 检查相似度 for copyright_image in copyright_image_paths: if is_similar_image(temp_image_path, copyright_image): print(f"'{os.path.basename(pptx_path)}'有相似的图片'{os.path.basename(copyright_image)}',在第{index + 1}页") # 删除该形状 slide.shapes._spTree.remove(shape._element) slides_to_delete.append(index) # 记录要删除的幻灯片 break # 找到相似图片后可以跳出循环 # 删除临时图片 os.remove(temp_image_path) # 检查形状是否为文本框 if shape.has_text_frame: text_content = shape.text # 检查文本内容是否包含要删除的关键字符数组中的任意字符 if any(char in text_content for char in del_text_keywords): if not change_text: change_text = True print(f"'{os.path.basename(pptx_path)}'在第 {index + 1} 页删除文本框,内容为: '{text_content}'") # 删除文本框 slide.shapes._spTree.remove(shape._element) """ # 判断并替换文本 for keyword in replace_text_keywords: if keyword in text_content: if not change_text: change_text = True print(f"'{os.path.basename(pptx_path)}'在第 {index + 1} 页替换文本 '{text_content}' 为 '创意素材'") # 记录原字体样式 original_font = shape.text_frame.paragraphs[0].runs[0].font new_text = text_content.replace(keyword, "创意素材") shape.text = new_text # 应用原字体样式 for paragraph in shape.text_frame.paragraphs: for run in paragraph.runs: run.font.bold = original_font.bold run.font.italic = original_font.italic run.font.underline = original_font.underline run.font.size = original_font.size # 处理颜色 if isinstance(original_font.color, RGBColor): run.font.color.rgb = original_font.color.rgb elif hasattr(original_font.color, 'theme_color'): run.font.color.rgb = RGBColor(255, 255, 255) else: # 处理未识别的颜色类型 run.font.color.rgb = RGBColor(255, 255, 255) """ # 删除记录中的幻灯片 for index in slides_to_delete: slides = list(prs.slides._sldIdLst) current_slide_to_delete = slides[index] prs.slides._sldIdLst.remove(current_slide_to_delete) if(len(slides_to_delete) > 0 or change_text): # 保存修改后的 PowerPoint 文件 prs.save(pptx_path) else: print(f"{pptx_path}没有版权信息或者版权文案,不需要删除替换") def process_ppt_files(directory): copyright_image_paths = get_copyright_image_paths() # 获取版权图片路径列表 # 遍历指定目录下的所有文件 for filename in os.listdir(directory): # 过滤掉以 ~$ 开头的文件 if filename.startswith('~$'): continue if filename.lower().endswith(('.pptx', '.ppt')): # 只处理 pptx 和 ppt 文件 pptx_file_path = os.path.join(directory, filename) remove_copyright_images(pptx_file_path, copyright_image_paths) process_ppt_files(Config.get_latest_folder(Config.WORK_PATH))