From f937761debb1f8ce379ec01247affa4295497258 Mon Sep 17 00:00:00 2001 From: pjniyekxf <3358921628@qq.com> Date: Mon, 20 Jan 2025 19:14:58 +0800 Subject: [PATCH] ADD file via upload --- PPTExtractor.cpp | 73 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 73 insertions(+) create mode 100644 PPTExtractor.cpp diff --git a/PPTExtractor.cpp b/PPTExtractor.cpp new file mode 100644 index 0000000..a82c218 --- /dev/null +++ b/PPTExtractor.cpp @@ -0,0 +1,73 @@ +#include "PPTExtractor.h" +#include +#include +#include +#include + +PPTExtractor::PPTExtractor(const std::string& filePath) : filePath(filePath) {} + +PPTExtractor::~PPTExtractor() {} + +bool PPTExtractor::isPPTX() { + return filePath.substr(filePath.find_last_of(".") + 1) == "pptx"; +} + +bool PPTExtractor::extractText(std::vector& slidesText) { + if (isPPTX()) { + return extractTextFromPPTX(slidesText); + } + else { + std::cerr << "Only PPTX files are supported." << std::endl; + return false; + } +} + +bool PPTExtractor::extractTextFromPPTX(std::vector& slidesText) { + int err = 0; + zip_t* zip = zip_open(filePath.c_str(), 0, &err); + if (!zip) { + std::cerr << "Failed to open PPTX file." << std::endl; + return false; + } + + for (int i = 1; ; ++i) { + std::ostringstream oss; + oss << "ppt/slides/slide" << i << ".xml"; + std::string slidePath = oss.str(); + + struct zip_stat st; + if (zip_stat(zip, slidePath.c_str(), 0, &st) != 0) { + break; // No more slides + } + + zip_file_t* slideFile = zip_fopen(zip, slidePath.c_str(), 0); + if (!slideFile) { + std::cerr << "Failed to open slide: " << slidePath << std::endl; + continue; + } + + char* buffer = new char[st.size]; + zip_fread(slideFile, buffer, st.size); + zip_fclose(slideFile); + + pugi::xml_document doc; + pugi::xml_parse_result result = doc.load_buffer(buffer, st.size); + delete[] buffer; + + if (!result) { + std::cerr << "Failed to parse slide XML: " << slidePath << std::endl; + continue; + } + + std::string slideText; + for (pugi::xml_node node : doc.select_nodes("//a:t")) { + slideText += node.child_value(); + slideText += "\n"; + } + + slidesText.push_back(slideText); + } + + zip_close(zip); + return true; +} \ No newline at end of file