diff --git a/assets/copyright-img/1ppt.png b/assets/copyright-img/1ppt.png new file mode 100644 index 0000000..c4f8f89 Binary files /dev/null and b/assets/copyright-img/1ppt.png differ diff --git a/assets/copyright-img/缘梦.png b/assets/copyright-img/缘梦.png new file mode 100644 index 0000000..25da72b Binary files /dev/null and b/assets/copyright-img/缘梦.png differ diff --git a/config/index.js b/config/index.js index 5ac1ced..6007970 100644 --- a/config/index.js +++ b/config/index.js @@ -10,7 +10,7 @@ const config = { * 则直接使用inputDir作为工作目录 */ inputDir: '', - // inputDir: 'E:/商品资料汇总/商品资料(1051-1100)/ppt1056-研究生复试', + // inputDir: 'C:/Users/Administrator/Desktop/test', // 指定要查找的目录 directoryPath: 'E:/商品资料汇总/商品资料(1201-1250)', // 要转换的文件个数, 0表示全部转换 diff --git a/py-src/del_copyright_img.py b/py-src/del_copyright_img.py index a83e3b6..d2e99fb 100644 --- a/py-src/del_copyright_img.py +++ b/py-src/del_copyright_img.py @@ -1,17 +1,45 @@ # 依赖目录pip install python-pptx Pillow imagehash from pptx import Presentation from pptx.util import Inches +from pptx.dml.color import RGBColor from PIL import Image import imagehash import os +from config import Config -def is_similar_image(image_path, reference_image_path, similarity_threshold=0.7): +def get_copyright_image_paths(): + # 构建 assets/copyright-img 的相对路径 + copyright_dir = os.path.join( + os.path.dirname(__file__), "..", "assets", "copyright-img" + ) + + # 获取所有图片文件的路径 + copyright_image_paths = [] + for filename in os.listdir(copyright_dir): + if filename.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.bmp')): # 支持的图片格式 + copyright_image_paths.append(os.path.join(copyright_dir, filename)) + + return copyright_image_paths + +def convert_image_to_rgba(image): + """ + 将图像转换为 RGBA 格式以处理透明度 + """ + if image.mode == 'P': + image = image.convert('RGBA') + return image + +def is_similar_image(image_path, reference_image_path, similarity_threshold=0.95): """ 检查两张图片之间的相似度 """ img1 = Image.open(image_path) img2 = Image.open(reference_image_path) + # 转换图像为 RGBA 格式 + img1 = convert_image_to_rgba(img1) + img2 = convert_image_to_rgba(img2) + # 计算哈希值 hash1 = imagehash.average_hash(img1) hash2 = imagehash.average_hash(img2) @@ -26,6 +54,11 @@ def remove_copyright_images(pptx_path, copyright_image_paths): # 使用一个列表来记录要删除的幻灯片索引 slides_to_delete = [] + change_text = False + # 定义要替换的关键字数组 + replace_text_keywords = ["第一PPT"] + # 定义要删除的关键字符数组 + del_text_keywords = ["Speaker name and title", "OfficePLUS", "Presenter name", "www.officeplus.cn"] # 遍历每一页幻灯片 for index, slide in enumerate(prs.slides): @@ -41,7 +74,7 @@ def remove_copyright_images(pptx_path, copyright_image_paths): # 检查相似度 for copyright_image in copyright_image_paths: if is_similar_image(temp_image_path, copyright_image): - print(f"有相似的图片,在第{index + 1}页") + print(f"'{os.path.basename(pptx_path)}'有相似的图片'{os.path.basename(copyright_image)}',在第{index + 1}页") # 删除该形状 slide.shapes._spTree.remove(shape._element) @@ -51,20 +84,69 @@ def remove_copyright_images(pptx_path, copyright_image_paths): # 删除临时图片 os.remove(temp_image_path) + # 检查形状是否为文本框 + if shape.has_text_frame: + text_content = shape.text + + # 检查文本内容是否包含要删除的关键字符数组中的任意字符 + if any(char in text_content for char in del_text_keywords): + if not change_text: + change_text = True + print(f"'{os.path.basename(pptx_path)}'在第 {index + 1} 页删除文本框,内容为: '{text_content}'") + # 删除文本框 + slide.shapes._spTree.remove(shape._element) + + """ + # 判断并替换文本 + for keyword in replace_text_keywords: + if keyword in text_content: + if not change_text: + change_text = True + print(f"'{os.path.basename(pptx_path)}'在第 {index + 1} 页替换文本 '{text_content}' 为 '创意素材'") + + # 记录原字体样式 + original_font = shape.text_frame.paragraphs[0].runs[0].font + + new_text = text_content.replace(keyword, "创意素材") + shape.text = new_text + + # 应用原字体样式 + for paragraph in shape.text_frame.paragraphs: + for run in paragraph.runs: + run.font.bold = original_font.bold + run.font.italic = original_font.italic + run.font.underline = original_font.underline + run.font.size = original_font.size + + # 处理颜色 + if isinstance(original_font.color, RGBColor): + run.font.color.rgb = original_font.color.rgb + elif hasattr(original_font.color, 'theme_color'): + run.font.color.rgb = RGBColor(255, 255, 255) + else: + # 处理未识别的颜色类型 + run.font.color.rgb = RGBColor(255, 255, 255) + """ + # 删除记录中的幻灯片 for index in slides_to_delete: slides = list(prs.slides._sldIdLst) current_slide_to_delete = slides[index] prs.slides._sldIdLst.remove(current_slide_to_delete) - if(len(slides_to_delete) > 0): + if(len(slides_to_delete) > 0 or change_text): # 保存修改后的 PowerPoint 文件 prs.save(pptx_path) else: - print(f"{pptx_path}没有版权信息,不需要删除") + print(f"{pptx_path}没有版权信息或者版权文案,不需要删除替换") + +def process_ppt_files(directory): + copyright_image_paths = get_copyright_image_paths() # 获取版权图片路径列表 -# 使用示例 -pptx_file_path = '/Users/minya/Desktop/test/1.pptx' # 输入 PowerPoint 文件路径 -copyright_image_paths = ['/Users/minya/Desktop/test/1.png'] # 版权图片路径列表 + # 遍历指定目录下的所有文件 + for filename in os.listdir(directory): + if filename.lower().endswith(('.pptx', '.ppt')): # 只处理 pptx 和 ppt 文件 + pptx_file_path = os.path.join(directory, filename) + remove_copyright_images(pptx_file_path, copyright_image_paths) -remove_copyright_images(pptx_file_path, copyright_image_paths) \ No newline at end of file +process_ppt_files(Config.get_latest_folder(Config.WORK_PATH)) \ No newline at end of file