import { anselToUtf8Map } from '@/logic/gedcom/anselToUtf8Map'

// source = byteArray
export function convertToUtf8(source) {
  const binary = new Uint8Array(source)
  const testConversion = new TextDecoder('utf-8').decode(binary);
  const match = testConversion.match(/1 CHAR (?<characterSet>\b[\w-]*\b)/)   // eslint-disable-line no-unused-vars
  const characterSet = match.groups ? match.groups.characterSet : 'UNSPECIFIED'
  const hasUTF8BOM = binary[0] === 239 && binary[1] === 187 && binary[2] === 191  // UTF-8 encoded files will have a byte order mark (BOM) of the characters 239,187,191 (EF,BB,BF) at the beginning of the file.
  console.info(`Character set ${characterSet} found.`)

  if (characterSet !== 'UTF-8' && hasUTF8BOM) {
    console.warn(`UTF-8 BOM present; however, the character set, ${characterSet} is not UTF-8.`)
  }

  switch (characterSet) 
  {
    case 'ANSEL':
      return processAnselSource(binary, hasUTF8BOM)
    case 'ANSI':
    case 'ASCII':
      return new TextDecoder('windows-1252').decode(binary);
    case 'UNICODE':
    case 'UTF-8':
      return testConversion      // This is a UTF-8 file, our test conversion is enough
    default:
      console.warn(`Unrecognized character set ${characterSet}. Will assume UTF-8.`)
      return testConversion
  }
}

export function processAnselSource(binary, hasUTF8BOM) {
  let result = ''
  let n = 0

  // If there is a UTF-8 BOM at the beginning of the file, which there shouldn't, skip them.
  if (hasUTF8BOM) {
    n = 3
  }
  
  do {
    const byte = binary[n]
    let char = ''
    let jump = 1

    if (byte <= 0x7F) {
      char = String.fromCharCode(byte)
    } else if (byte >= 0x88 && byte <= 0xCF) {
      const key = byte.toString(16).toUpperCase()
      char = anselToUtf8Map[key] || anselToUtf8Map['ERR']
    } else if (byte >= 0xE0) {
      char = anselToUtf8Map['ERR']

      for (const span of [3, 2, 1]) {
        const keyArray = []
        binary.slice(n, n+span).forEach(x => {keyArray.push(buildKey(x))})
        const longkey = keyArray.join('+')
        char = anselToUtf8Map[longkey]

        if (char) {
          jump = span
          break
        } 
      }
    } 

    result += char
    n += jump

  } while (n < binary.length)

  return result
}

export function buildKey(byte) {
  return byte.toString(16).toUpperCase()
}
