import PizZip from 'pizzip'
import pdfToText from 'react-pdftotext'
import { Observable } from 'rxjs'

import { DocumentParserErrors } from '@submission-portal/models'
import { DOMParser } from '@xmldom/xmldom'

const PdfExtensions: string[] = ['pdf']
const DocExtensions: string[] = ['doc', 'docx']

// Conversion Referrals
// - https://gist.github.com/Matheswaaran/c12ab47634698fdb316049d5706f86c6 - Doc extraction
// - https://socket.dev/npm/package/react-pdftotext/files/1.3.0/README.md - Pdf extraction

const convertStringToXML = (str: string): Document => {
  const result = str.charCodeAt(0) === 65279 ? str.substring(1) : str
  return new DOMParser().parseFromString(result, 'text/xml')
}

//Referral Snippet
const getParagraphsFromDocFile = (content: string): string[] => {
  const zip = new PizZip(content)
  const xml = convertStringToXML(zip.files['word/document.xml'].asText())
  const paragraphsXml = xml.getElementsByTagName('w:p')
  const paragraphs: string[] = []

  for (let i = 0, len = paragraphsXml.length; i < len; i += 1) {
    let fullText = ''
    const textsXml = paragraphsXml[i].getElementsByTagName('w:t')
    for (let j = 0, len2 = textsXml.length; j < len2; j += 1) {
      const textXml = textsXml[j]
      if (textXml.childNodes) {
        fullText += textXml.childNodes[0].nodeValue
      }
    }
    if (fullText) {
      paragraphs.push(fullText)
    }
  }
  return paragraphs
}

const parseTextFromFile = (
  file: File,
  extension: string
): Observable<string> => {
  return new Observable<string>((observer) => {
    if (file) {
      if (PdfExtensions.includes(extension)) {
        pdfToText(file)
          .then((text) => {
            if (text) observer.next(text)
            else observer.error(new Error(DocumentParserErrors.PdfParseError))
          })
          .catch(() => {
            observer.error(new Error(DocumentParserErrors.PdfParseError))
          })
          .finally(() => {
            observer.complete()
          })
      } else if (DocExtensions.includes(extension)) {
        const fileReader = new FileReader()
        fileReader.addEventListener('load', () => {
          try {
            const text = getParagraphsFromDocFile(
              fileReader.result as string
            ).join('\n')
            if (text) {
              observer.next(text)
            } else {
              observer.error(new Error(DocumentParserErrors.DocParseError))
            }
          } catch {
            observer.error(new Error(DocumentParserErrors.DocParseError))
          }
          observer.complete()
        })
        fileReader.readAsArrayBuffer(file)
      }
    } else {
      observer.error(new Error(DocumentParserErrors.InvalidFile))
    }
  })
}

export { parseTextFromFile }
