import DOMPurify from 'dompurify';

function filter(text: string) {
  // fixing chrome bug w/ only string reference rather than val: https://stackoverflow.com/a/31733628
  let text_ = (' ' + text).slice(1);

  // filter out "[number]." as used in dates etc. through regexp
  // TODO: this could be improved
  text_ = text_.replace(/(\d)\./g, '$1');

  // replace x1.x2. ... xn. with x1x2...xn. for all xi single characters
  let pattern = /\.(\w)\./g;
  text_ = text_.replace(pattern, '$1');

  // filter out "." from titles (Dr., Prof., etc.)
  text_ = text_.replace(/(Dr)\./g, '$1');
  text_ = text_.replace(/(Prof)\./g, '$1');

  return text_;
}

export function splitSentences(text: string) {
  // fixing chrome bug w/ only string reference rather than val: https://stackoverflow.com/a/31733628
  let text_ = (' ' + text).slice(1);
  text_ = filter(text_);

  let splitted = text_
    .split('?')
    .join('?#')
    .split('!')
    .join('!#')
    .split(':')
    .join(':#')
    .split('.')
    .join('.#')
    .split('#');

  // remove empty strings
  splitted = splitted.filter((sentence) => sentence.trim() !== '');

  return splitted;
}

export function countNumberOfSentences(text: string) {
  return splitSentences(text).length;
}

export function getSentenceForCharacter(text: string, characterIdx: number) {
  let sentences = splitSentences(text);
  console.log('sentences', sentences);
  let sentenceIdx = 0;

  for (let i = 0; i < sentences.length; i++) {
    sentenceIdx += sentences[i].length;
    console.log('sentenceIdx', sentenceIdx, 'characterIdx', characterIdx, 'i', i, 'sentences[i]', sentences[i]);
    if (sentenceIdx > characterIdx) {
      return i;
    }
  }
}

export const sanitizeContent = (content: string): string => {
  if (!content) return '';

  return (
    content
      // Remove script tags and their content
      .replace(/<script\b[^<]*(?:(?!<\/script>)<[^<]*)*<\/script>/gi, '')
      // Remove control characters and zero-width spaces
      .replace(/[\u0000-\u001F\u007F-\u009F\u200B-\u200D\uFEFF]/g, ' ')
      // Replace multiple spaces (including tabs) with single space
      .replace(/\s+/g, ' ')
      // Remove remaining HTML tags
      .replace(/<[^>]*>/g, '')
      // Trim whitespace
      .trim()
  );
};

export const stripHtml = (html: string) => {
  const sanitizedHtml = DOMPurify.sanitize(html);
  const tmp = document.createElement('div');
  tmp.innerHTML = sanitizedHtml;
  return tmp.textContent || tmp.innerText || '';
};

export const truncate = (text: string | undefined | null, maxLength: number = 50, stripHtmlContent: boolean = true) => {
  if (!text) return '';
  const stripped = stripHtmlContent ? stripHtml(text) : text;
  return stripped.slice(0, maxLength) + (stripped.length > maxLength ? '...' : '');
};
