Search This Blog

Monday, December 9, 2024

PDF to Text Covert by Oracle Apex

Here are the steps to convert and get the character into a region by Oracle Apex.


Step1.  Create a page and Copy-Pest the below code into Page  Function and Global Variable Declaration

  var datass = '';
  var DataArr = [];
  PDFJS.workerSrc = '';

  function ExtractText() {
      var input = document.getElementById("file-id");
      var fReader = new FileReader();
      fReader.readAsDataURL(input.files[0]);
      // console.log(input.files[0]);
      fReader.onloadend = function(event) {
          convertDataURIToBinary(event.target.result);
      }
  }

  var BASE64_MARKER = ';base64,';

  function convertDataURIToBinary(dataURI) {

      var base64Index = dataURI.indexOf(BASE64_MARKER) + BASE64_MARKER.length;
      var base64 = dataURI.substring(base64Index);
      var raw = window.atob(base64);
      var rawLength = raw.length;
      var array = new Uint8Array(new ArrayBuffer(rawLength));

      for (var i = 0; i < rawLength; i++) {
          array[i] = raw.charCodeAt(i);
      }
      pdfAsArray(array)

  }

  function getPageText(pageNum, PDFDocumentInstance) {
      // Return a Promise that is solved once the text of the page is retrieven
      return new Promise(function(resolve, reject) {
          PDFDocumentInstance.getPage(pageNum).then(function(pdfPage) {
              // The main trick to obtain the text of the PDF page, use the getTextContent method
              pdfPage.getTextContent().then(function(textContent) {
                  var textItems = textContent.items;
                  var finalString = "";

                  // Concatenate the string of the item to the final string
                  for (var i = 0; i < textItems.length; i++) {
                      var item = textItems[i];

                      finalString += item.str + " ";
                  }

                  // Solve promise with the text retrieven from the page
                  resolve(finalString);
              });
          });
      });
  }

  function pdfAsArray(pdfAsArray) {

      PDFJS.getDocument(pdfAsArray).then(function(pdf) {

          var pdfDocument = pdf;
          // Create an array that will contain our promises
          var pagesPromises = [];

          for (var i = 0; i < pdf.pdfInfo.numPages; i++) {
              // Required to prevent that i is always the total of pages
              (function(pageNumber) {
                  // Store the promise of getPageText that returns the text of a page
                  pagesPromises.push(getPageText(pageNumber, pdfDocument));
              })(i + 1);
          }

          // Execute all the promises
          Promise.all(pagesPromises).then(function(pagesText) {

              // Display text of all the pages in the console
              // e.g ["Text content page 1", "Text content page 2", "Text content page 3" ... ]
              console.log(pagesText); // representing every single page of PDF Document by array indexing
              console.log(pagesText.length);
              var outputStr = "";
              for (var pageNum = 0; pageNum < pagesText.length; pageNum++) {
                  console.log(pagesText[pageNum]);
                  outputStr = "";
                  outputStr = "<br/><br/>Page " + (pageNum + 1) + " contents <br/> <br/>";

                  var div = document.getElementById('output');

                  div.innerHTML += (outputStr + pagesText[pageNum]);

              }


          });

      }, function(reason) {
          // PDF loading error
          console.error(reason);
      });
  }


Step2. Create a region with the static content type and Copy-paste the code below. 

<script src="https://cdnjs.cloudflare.com/ajax/libs/pdf.js/1.10.100/pdf.min.js" ></script>
    <script src="https://cdnjs.cloudflare.com/ajax/libs/pdf.js/2.6.347/pdf.worker.entry.min.js" ></script>

    <script src="https://cdnjs.cloudflare.com/ajax/libs/pdf.js/1.10.100/pdf.worker.min.js" ></script>

    <input type="file" id="file-id" name="file_name" onchange="ExtractText();">

    <!-- a container for the output -->

    <div id="output"></div>
   
    <a href="https://qawithexperts.com/article/javascript/read-pdf-file-using-javascript/318">Reference</a>



No comments:

Post a Comment

PDF to Text Covert by Oracle Apex

Here are the steps to convert and get the character into a region by Oracle Apex. Step1.  Create a page and Copy-Pest the below code into Pa...