export class HtmlScrubber {

    _tagWhitelist = [];
    _attributeWhitelist = {};
    _attributeValuesBlacklist = /^"?(\w+\(|on)/i;

    /**
     * @param {string[]?} tagWhitelist
     * @param { { [tagName]: string[] }? } attributeWhitelist
     */
    constructor(tagWhitelist, attributeWhitelist) {
        this.tagWhitelist = tagWhitelist ?? defaultTagWhitelist;
        this.attributeWhitelist = attributeWhitelist ?? defaultAttributeWhitelist;
    }

    get tagWhitelist() { return this._tagWhitelist; }
    set tagWhitelist(newArray) { this._tagWhitelist = Array.isArray(newArray) ? newArray : this._tagWhitelist }

    get attributeWhitelist() { return this._attributeWhitelist; }
    set attributeWhitelist(newObject) {
        if (typeof newObject === 'object') {
            for (const prop in newObject) {
                if (!Array.isArray(newObject[prop])) return;
            }
            this._attributeWhitelist = newObject;
        }
    }

    _sanitiseIframe(fullTag) {
        const src = fullTag.match(/src=\\*"(.*?)"/)?.[1].trim();
        if (!src) return '';
        let valid = false;
        for (let i=0; i<defaultIframeSourceWhitelist.length; i++) {
            if (src.indexOf(defaultIframeSourceWhitelist[i]) === 0) {
                valid = true;
                break;
            }
        }
        const shortSrc = src.replace(/(\..*?\/).*/, '$1');
        return valid
            ? this._sanitiseAttributes(fullTag, 'iframe')
            : `<div style="font-style: italic; color: grey; padding: 0.5rem 0;">&lt; Videos embedded from "${shortSrc || src}" not currently allowed &gt;</div>`;
    }

    _sanitiseAttributes (fullTag, tagName) {
        const rxAttribute = /(\w+)\s*=\s*(".*?"|\S+)/gs;
        const closeTag = /\/>\s*$/.test(fullTag)
            ? '/>'
            : '>';
        const attributeMatches = Array.from(fullTag.matchAll(rxAttribute));
        const validAttributes = attributeMatches.reduce((output, match) => {
            const [_, attribute, value] = match;
            const targetWhitelistArray = this._attributeWhitelist[tagName] ?? this._attributeWhitelist.global;
            return this._attributeValuesBlacklist.test(value) || !(targetWhitelistArray.includes(attribute))
                ? output
                : [...output, `${attribute}=${value}`];
        }, ['']);
        return `<${tagName}${validAttributes.join(' ')}${closeTag}`;
    }

    _sanitiseTags (inputString) {
        const rxFullTag = /\\*?<\/?([^\s>]+).*?>/gs;
        const replacer = (match, group1) => {
            return this._tagWhitelist.includes(group1)
                ? /^\\*<\//.test(match)
                    ? `</${group1}>`
                    : this._sanitiseAttributes(match, group1)
                : 'iframe' === group1
                    ? this._sanitiseIframe(match)
                : '';
        }
        return inputString
            .replace(/<!--.*?-->/gs, '') // remove comments
            .replace(/<script.*?<\/script>/gs, '') // remove script tags and inner contents
            .replace(/<\s/g, '&gt; ') // escape non-tag < greater thans
            .replace(rxFullTag, replacer) // replace all other tags and closing tags if not whitelisted
            .replace(/(\s*\n\s*)+/g, '\n'); // remove excessive line breaks
    }

    /**
     * Scrub an html string of non-whitelisted tags and attributes
     * @param {string} inputHtml
     * @returns {string}
     */
    clean(inputHtml) {
        return this._sanitiseTags(inputHtml);
    }

}

const defaultTagWhitelist = [
    'p',
    'div',
    'b',
    'i',
    'span',
    'strong',
    'h1',
    'h2',
    'h3',
    'h4',
    'h5',
    'h6',
    'br',
    'hr',
    'image',
    'img',
    'a',
    'ol',
    'ul',
    'li',
    'table',
    'th',
    'tr',
    'td',
    'audio',
    'video',
    'source',
    'code',
    'pre',
];

const defaultIframeSourceWhitelist = [
    'https://www.loom.com/',
    // 'https://www.youtube.com/'
];

const defaultAttributeWhitelist = {
    global: [ 'style' ],
    img: [ 'src', 'width', 'height', 'style' ],
    image: [ 'src', 'width', 'height', 'style', 'alt' ],
    a: [ 'href', 'title', 'target', 'rel' ],
    audio: ['src', 'controls', 'height', 'width'],
    video: ['src', 'controls', 'height', 'width'],
    source: ['src', 'type'],
    iframe: ['src', 'allowfullscreen', 'width', 'height']
};
