html2str.mjs

import htmlparser from 'htmlparser' //要能於nodejs下運行, 故不使用瀏覽器動態加載
import each from 'lodash-es/each.js'
import join from 'lodash-es/join.js'
import size from 'lodash-es/size.js'
import isestr from './isestr.mjs'
import getGlobal from './getGlobal.mjs'


function getHtmlparser() {
    let g = getGlobal()
    let x = htmlparser || g.htmlparser || g.Tautologistics.NodeHtmlParser
    // if (x.default) {
    //     x = x.default
    // }
    return x
}


/**
 * html轉純文字
 *
 * Unit Test: {@link https://github.com/yuda-lyu/wsemi/blob/master/test/html2str.test.mjs Github}
 * @memberOf wsemi
 * @param {String} html 輸入html字串
 * @returns {String} 回傳純文字字串
 * @example
 *
 * let h = `
 * <!DOCTYPE html>
 * <html>
 *
 * <body>
 *     <h1>My First Heading</h1>
 *     <p>My first paragraph.</p>
 * </body>
 *
 * </html>
 * `
 *
 * let c = html2str(h)
 * console.log(c)
 * // =>
 * //
 * //
 * //
 * //
 * //     My First Heading
 * //     My first paragraph.
 * //
 * //
 * //
 * //
 *
 */
function html2str(html) {
    let err = null

    //check
    if (!isestr(html)) {
        return ''
    }

    //useHtmlparser
    let useHtmlparser = getHtmlparser()

    //handler
    let handler = new useHtmlparser.DefaultHandler(function (error, dom) {
        if (error) {
            err = error
        }
    })

    //parser
    let parser = new useHtmlparser.Parser(handler)
    parser.parseComplete(html)

    //check
    if (err !== null) {
        return err
    }

    //to text
    let r = []
    function getText(d) {
        each(d, (v) => {
            if (size(v.children) > 0) {
                getText(v.children)
            }
            if (v.type === 'text') {
                r.push(v.data)
            }
        })
    }
    getText(handler.dom)
    r = join(r, '')

    return r
}


export default html2str