import * as pdfjsLib from 'pdfjs-dist/webpack.js';

const LINE_BREAK = '\n';

async function parsePdfData(data) {
  // console.log('starting parsing PDF from data');
  const loadingTask = pdfjsLib.getDocument({data});
  return await parsePdf(loadingTask);
}

async function parsePdfURL(url) {
  // console.log('starting parsing PDF from URL', url);
  const loadingTask = pdfjsLib.getDocument(url);
  return await parsePdf(loadingTask);
}

async function parsePdf(loadingTask) {
  try {
    const pdfDocument = await loadingTask.promise;
    // console.log(`parsed document with ${pdfDocument.numPages} pages`);

    const textPages = [];
    for (let pageNum = 1; pageNum <= pdfDocument.numPages; pageNum++ ) {
      const pdfPage = await pdfDocument.getPage(pageNum);
      const pdfTextContent = await pdfPage.getTextContent();
  
      let textContent = '';
      pdfTextContent.items.forEach((pdfTextItem) => {
        textContent += pdfTextItem.str.length ? pdfTextItem.str : LINE_BREAK;
      });
      textPages.push(textContent);
    }
    // console.log('finished extracting text from PDF');
    return textPages.join(LINE_BREAK);
  } 
  catch (err) {
    console.error('Error parsing PDF: ', err);
    console.error(`${err.name} - ${err.message}`);
  }
}

export default parsePdfData;