You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
49 lines
1.3 KiB
49 lines
1.3 KiB
import * as pdfjsLib from 'pdfjs-dist';
|
|
|
|
pdfjsLib.GlobalWorkerOptions.workerSrc = new URL(
|
|
'pdfjs-dist/build/pdf.worker.mjs',
|
|
// @ts-ignore
|
|
import.meta.url
|
|
).href;
|
|
|
|
const readPDFContent = (file: File): Promise<string> => {
|
|
return new Promise((resolve, reject) => {
|
|
const reader = new FileReader();
|
|
reader.onload = async function (e: any) {
|
|
try {
|
|
const arrayBuffer = e.target.result;
|
|
const loadingTask = pdfjsLib.getDocument({ data: arrayBuffer });
|
|
|
|
const pdf = await loadingTask.promise;
|
|
const numPages = pdf.numPages;
|
|
|
|
const pagePromises = [];
|
|
|
|
for (let i = 1; i <= numPages; i++) {
|
|
const pagePromise = pdf.getPage(i).then(function (page) {
|
|
return page.getTextContent().then(function (textContent) {
|
|
return textContent.items
|
|
.map(function (item: any) {
|
|
return item.str;
|
|
})
|
|
.join(' ');
|
|
});
|
|
});
|
|
pagePromises.push(pagePromise);
|
|
}
|
|
|
|
const pageTexts = await Promise.all(pagePromises);
|
|
const result = pageTexts?.join(' ');
|
|
resolve(result);
|
|
} catch (error) {
|
|
reject(error);
|
|
}
|
|
};
|
|
|
|
reader.onerror = (error) => reject(error);
|
|
reader.readAsArrayBuffer(file);
|
|
});
|
|
};
|
|
|
|
export default readPDFContent;
|