|
|
|
@ -0,0 +1,35 @@
|
|
|
|
|
# 使用from...import从pptx模块中导入Presentation
|
|
|
|
|
from pptx import Presentation
|
|
|
|
|
# 使用import导入docx
|
|
|
|
|
import docx
|
|
|
|
|
|
|
|
|
|
# 新建一个空白Word文档,赋值给变量docxFile
|
|
|
|
|
docxFile = docx.Document()
|
|
|
|
|
# 将.pptx文件路径赋值给变量path
|
|
|
|
|
path = input("请输入PPT文件路径:")
|
|
|
|
|
# 读取path并赋值给变量pptxFile
|
|
|
|
|
pptxFile = Presentation(path)
|
|
|
|
|
|
|
|
|
|
n = 1
|
|
|
|
|
# for循环遍历pptxFile中的.slides属性,赋值给slide
|
|
|
|
|
for slide in pptxFile.slides:
|
|
|
|
|
# 向文档中添加标题f"第{n}页",为二级标题
|
|
|
|
|
docxFile.add_heading(f"第{n}页",level=2)
|
|
|
|
|
# for循环遍历slide中.shapes属性,赋值给变量shape
|
|
|
|
|
for shape in slide.shapes:
|
|
|
|
|
# 判断形状中是否有文本框
|
|
|
|
|
if shape.has_text_frame == True:
|
|
|
|
|
# 读取形状中的文本框,并赋值给变量textFrame
|
|
|
|
|
textFrame = shape.text_frame
|
|
|
|
|
# for循环遍历文本框内的所有段落,赋值给变量paragraph
|
|
|
|
|
for paragraph in textFrame.paragraphs:
|
|
|
|
|
# for循环遍历段落中的所有样式块,赋值给变量run
|
|
|
|
|
for run in paragraph.runs:
|
|
|
|
|
# 读取样式块中的文本内容
|
|
|
|
|
texts = run.text
|
|
|
|
|
# 向Word文档中添加段落texts的文本内容
|
|
|
|
|
docxFile.add_paragraph(texts)
|
|
|
|
|
n = n + 1
|
|
|
|
|
|
|
|
|
|
# 保存文档并命名为"资料.docx"
|
|
|
|
|
docxFile.save("PPT提取\资料.docx")
|