diff --git a/PPT读取.py b/PPT读取.py new file mode 100644 index 0000000..798d6bd --- /dev/null +++ b/PPT读取.py @@ -0,0 +1,35 @@ +# 使用from...import从pptx模块中导入Presentation +from pptx import Presentation +# 使用import导入docx +import docx + +# 新建一个空白Word文档,赋值给变量docxFile +docxFile = docx.Document() +# 将.pptx文件路径赋值给变量path +path = input("请输入PPT文件路径:") +# 读取path并赋值给变量pptxFile +pptxFile = Presentation(path) + +n = 1 +# for循环遍历pptxFile中的.slides属性,赋值给slide +for slide in pptxFile.slides: + # 向文档中添加标题f"第{n}页",为二级标题 + docxFile.add_heading(f"第{n}页",level=2) + # for循环遍历slide中.shapes属性,赋值给变量shape + for shape in slide.shapes: + # 判断形状中是否有文本框 + if shape.has_text_frame == True: + # 读取形状中的文本框,并赋值给变量textFrame + textFrame = shape.text_frame + # for循环遍历文本框内的所有段落,赋值给变量paragraph + for paragraph in textFrame.paragraphs: + # for循环遍历段落中的所有样式块,赋值给变量run + for run in paragraph.runs: + # 读取样式块中的文本内容 + texts = run.text + # 向Word文档中添加段落texts的文本内容 + docxFile.add_paragraph(texts) + n = n + 1 + +# 保存文档并命名为"资料.docx" +docxFile.save("PPT提取\资料.docx") \ No newline at end of file