import DOMPurify from 'dompurify'

const allowedTags = [
  'b',
  'body',
  'caption',
  'col',
  'colgroup',
  'del',
  'em',
  'h1',
  'h2',
  'h3',
  'h4',
  'h5',
  'h6',
  'html',
  'i',
  'ins',
  'li',
  'mark',
  'ol',
  'p',
  'small',
  'span',
  'strong',
  'sub',
  'sup',
  'table',
  'tbody',
  'td',
  'tfoot',
  'th',
  'thead',
  'tr',
  'ul',
  // DomPurify needs to know that it should read the text from the tags
  // that's why we need to pass #text
  '#text',
]

const allowedAttributes = ['colspan', 'rowspan']

// Pre-process function to escape any content between < and >
const escapeInvalidTagsButKeepContent = (text: string): string => {
  // Step 1: Match everything between < and >, or any stray < without a closing >
  return text.replace(/<[^>]*>?|<[^>]*$/g, (match) => {
    // Step 2: Use another regex to capture just the tag name if there is a closing >
    const tagNameMatch = match.match(/^<\/?([a-zA-Z][a-zA-Z0-9]*)\b/)
    const tagName = tagNameMatch ? tagNameMatch[1] : null

    // If tagName is null (i.e., we couldn't extract a valid tag name), treat it as invalid
    if (!tagName || !allowedTags.includes(tagName)) {
      return match.replace(/</g, '&lt;').replace(/>/g, '&gt;')
    }

    // If it's a valid tag, return it as-is
    return match
  })
}

const sanitize = (text: string) => {
  // Pre-process: escape all invalid tags
  const escapedText = escapeInvalidTagsButKeepContent(text)

  // Sanitize using DOMPurify (which will now handle the already escaped content)
  const sanitizedText = DOMPurify.sanitize(escapedText, {
    ALLOWED_TAGS: allowedTags,
    ALLOWED_ATTR: allowedAttributes,
    KEEP_CONTENT: false, // This isn't needed, but keeping it in case a tag slips through our custom sanitization
  })

  return sanitizedText
}

export { allowedAttributes, allowedTags, sanitize }
