arrFilterByKeywords.mjs

import map from 'lodash-es/map.js'
import size from 'lodash-es/size.js'
import trim from 'lodash-es/trim.js'
import isearr from './isearr.mjs'
import isestr from './isestr.mjs'
import sep from './sep.mjs'
import strleft from './strleft.mjs'
import strdelleft from './strdelleft.mjs'


/**
 * 對arr陣列內各字串進行關鍵字計算,計算是否含有關鍵字與程度,多關鍵字用空白區隔,必要關鍵字可於字首添加「+」,不要關鍵字可於字首添加「-」
 *
 * Unit Test: {@link https://github.com/yuda-lyu/wsemi/blob/master/test/arrFilterByKeywords.test.mjs Github}
 * @memberOf wsemi
 * @param {Array} arr 輸入要被尋找的字串陣列
 * @param {String|Array} keywords 輸入要尋找的關鍵字字串或陣列
 * @returns {Array} 回傳結果物件陣列,陣列內各物件包含hasKeyword與weight欄位,其中hasKeyword代表有無關鍵字為布林值,weight代表權重為浮點數,其值可大於1
 * @example
 *
 * let arr = [
 *     'abc def xyz',
 *     '測試abc中文mnop',
 *     'Instead of creating yet another opinionated application',
 *     'Node.js module which can be integrated into a larger application',
 * ]
 * let kws = null
 * let r = null
 *
 * kws = 'abc'
 * r = arrFilterByKeywords(arr, kws)
 * console.log(r)
 * // => [
 * //     { hasKeyword: true, weight: 1 },
 * //     { hasKeyword: true, weight: 1 },
 * //     { hasKeyword: false, weight: 0 },
 * //     { hasKeyword: false, weight: 0 }
 * // ]
 *
 * kws = 'def'
 * r = arrFilterByKeywords(arr, kws)
 * console.log(r)
 * // => [
 * //     { hasKeyword: true, weight: 1 },
 * //     { hasKeyword: false, weight: 0 },
 * //     { hasKeyword: false, weight: 0 },
 * //     { hasKeyword: false, weight: 0 }
 * // ]
 *
 * kws = 'def 中文'
 * r = arrFilterByKeywords(arr, kws)
 * console.log(r)
 * // => [
 * //     { hasKeyword: true, weight: 1 },
 * //     { hasKeyword: true, weight: 0.25 },
 * //     { hasKeyword: false, weight: 0 },
 * //     { hasKeyword: false, weight: 0 }
 * // ]
 *
 * kws = 'def 中文 mnop'
 * r = arrFilterByKeywords(arr, kws)
 * console.log(r)
 * // => [
 * //     { hasKeyword: true, weight: 1 },
 * //     { hasKeyword: true, weight: 0.5555555555555557 },
 * //     { hasKeyword: false, weight: 0 },
 * //     { hasKeyword: false, weight: 0 }
 * // ]
 *
 * kws = 'def +yet'
 * r = arrFilterByKeywords(arr, kws)
 * console.log(r)
 * // => [
 * //     { hasKeyword: false, weight: 0 },
 * //     { hasKeyword: false, weight: 0 },
 * //     { hasKeyword: true, weight: 1 },
 * //     { hasKeyword: false, weight: 0 }
 * // ]
 *
 * kws = 'def of module -yet'
 * r = arrFilterByKeywords(arr, kws)
 * console.log(r)
 * // => [
 * //     { hasKeyword: true, weight: 1 },
 * //     { hasKeyword: true, weight: 1 },
 * //     { hasKeyword: false, weight: 0 },
 * //     { hasKeyword: true, weight: 1 }
 * // ]
 *
 * kws = '+'
 * r = arrFilterByKeywords(arr, kws)
 * console.log(r)
 * // => [
 * //     { hasKeyword: false, weight: 0 },
 * //     { hasKeyword: false, weight: 0 },
 * //     { hasKeyword: false, weight: 0 },
 * //     { hasKeyword: false, weight: 0 }
 * // ]
 *
 * kws = '-'
 * r = arrFilterByKeywords(arr, kws)
 * console.log(r)
 * // => [
 * //     { hasKeyword: false, weight: 0 },
 * //     { hasKeyword: false, weight: 0 },
 * //     { hasKeyword: false, weight: 0 },
 * //     { hasKeyword: false, weight: 0 }
 * // ]
 *
 * kws = 'def +'
 * r = arrFilterByKeywords(arr, kws)
 * console.log(r)
 * // => [
 * //     { hasKeyword: true, weight: 1 },
 * //     { hasKeyword: false, weight: 0 },
 * //     { hasKeyword: false, weight: 0 },
 * //     { hasKeyword: false, weight: 0 }
 * // ]
 *
 * kws = ['def', 'of', 'module', '-yet']
 * r = arrFilterByKeywords(arr, kws)
 * console.log(r)
 * // => [
 * //     { hasKeyword: true, weight: 1 },
 * //     { hasKeyword: true, weight: 1 },
 * //     { hasKeyword: false, weight: 0 },
 * //     { hasKeyword: true, weight: 1 }
 * // ]
 *
 * kws = ['can be', 'def']
 * r = arrFilterByKeywords(arr, kws)
 * console.log(r)
 * // => [
 * //     { hasKeyword: true, weight: 0.25 },
 * //     { hasKeyword: false, weight: 0 },
 * //     { hasKeyword: false, weight: 0 },
 * //     { hasKeyword: true, weight: 1 }
 * // ]
 *
 * kws = ['+abc']
 * r = arrFilterByKeywords(arr, kws)
 * console.log(r)
 * // => [
 * //     { hasKeyword: true, weight: 1 },
 * //     { hasKeyword: true, weight: 1 },
 * //     { hasKeyword: false, weight: 0 },
 * //     { hasKeyword: false, weight: 0 }
 * // ]
 *
 * kws = ['-abc']
 * r = arrFilterByKeywords(arr, kws)
 * console.log(r)
 * // => [
 * //     { hasKeyword: false, weight: 0 },
 * //     { hasKeyword: false, weight: 0 },
 * //     { hasKeyword: true, weight: 1 },
 * //     { hasKeyword: true, weight: 1 }
 * // ]
 *
 */
function arrFilterByKeywords(arr, keywords) {

    //check
    if (!isearr(arr)) {
        return []
    }

    //check
    if (!isestr(keywords) && !isearr(keywords)) {
        return []
    }

    //kws
    let kws = null
    if (isestr(keywords)) {
        kws = sep(keywords, ' ') //若為字串則用空白切分出各關鍵字
    }
    else {
        kws = keywords //可支援關鍵字內含空白
    }

    //n
    let n = size(kws)

    function core(c) {

        //for
        let b = false
        let weight = 0
        for (let k = 0; k < n; k++) {
            let kw = kws[k]

            //ekw
            let ekw = kw
            let bInclude = strleft(kw, 1) === '+'
            let bExclude = strleft(kw, 1) === '-'
            if (bInclude || bExclude) {
                ekw = strdelleft(kw, 1)
            }

            //check
            if (trim(ekw) === '') {
                continue
            }

            //bHas
            let bHas = c.indexOf(ekw) >= 0
            // console.log('c', c, ':ekw', ekw, ':bHas', bHas)

            //必有關鍵字
            if (bInclude) {
                if (!bHas) {
                    b = false //若無必有關鍵字, 強制視為找不到
                    weight = 0
                    break
                }
                else {
                    b = true //找到必有關鍵字, 視為找到
                    weight = 1
                    break
                }
            }

            //不能有關鍵字
            if (bExclude) {
                if (bHas) {
                    b = false //找到不能有的關鍵字, 強制視為找不到
                    weight = 0
                    break
                }
                else {
                    b = true //找不到不能有關鍵字, 視為找到
                    weight = 1
                    break
                }
            }

            //含有關建字則依照順序給予權重
            if (bHas) {
                b = true
                let w = 1 - k / n
                w = w ** 2 //非線性遞減, 有第1關鍵字權重1, 只有2+3關鍵字權重也不超過第1關鍵字權重
                weight += w //權重累加可超過1
            }

        }

        return {
            hasKeyword: b,
            weight,
        }
    }

    //rs
    let rs = map(arr, (c) => {
        return core(c)
    })

    return rs
}


export default arrFilterByKeywords