histGen.mjs

import get from 'lodash-es/get.js'
import map from 'lodash-es/map.js'
import min from 'lodash-es/min.js'
import max from 'lodash-es/max.js'
import sum from 'lodash-es/sum.js'
import size from 'lodash-es/size.js'
import floor from 'lodash-es/floor.js'
import round from 'lodash-es/round.js'
import ceil from 'lodash-es/ceil.js'
import ispint from 'wsemi/src/ispint.mjs'
import isfun from 'wsemi/src/isfun.mjs'
import ispm from 'wsemi/src/ispm.mjs'
import cdbl from 'wsemi/src/cdbl.mjs'
import cint from 'wsemi/src/cint.mjs'
import arrFilterByNum from 'wsemi/src/arrFilterByNum.mjs'
import rang from 'wsemi/src/rang.mjs'
import bin from './bin.mjs'
import isnum from 'wsemi/src/isnum.mjs'


/**
 * 數據陣列進行直方圖分組,並基於外部提供函數產生擬合數據
 *
 * Unit Test: {@link https://github.com/yuda-lyu/w-statistic/blob/master/test/arrGammaInv.test.js Github}
 * @memberOf w-statistic
 * @param {Array} arr 輸入陣列,只提取有效數字(或為字串的數字)進行計算
 * @param {Function} fun 輸入產生擬合函數
 * @param {Object} [opt={}] 輸入設定物件,預設{}
 * @param {Number} [opt.dx=null] 輸入直方圖分組(x軸)寬度數字,預設null
 * @param {Integer} [opt.n=30] 輸入直方圖分組(x軸)數量整數,預設30
 * @param {Integer} [opt.nCurve=100] 輸入擬合用陣列數據長度整數,預設100
 * @param {Number} [opt.min=null] 輸入直方圖分組(x軸)最小值數字,若不給則使用數據最小值,預設null
 * @param {Number} [opt.max=null] 輸入直方圖分組(x軸)最大值數字,若不給則使用數據最大值,預設null
 * @returns {Promise} 回傳Promise,若成功則resolve數據分組與統計數據物件,若失敗則reject錯誤訊息
 * @example
 *
 * async function test() {
 *
 *     let arr
 *     let opt
 *     let r
 *
 *     arr = [13.8, 19.5, 16.7, 14.3, 17.5, 14.5, 12.6, 15.6, 8.9, 13.3, 17.1, 15, 16.3, 18.3, 18.7, 16.7, 20.9, 12.3, 22.3, 14.6, 20.1, 16.7, 16.8, 24.2, 21.2, 20.7, 18.6, 18.3, 15.9, 17.2, 18.8, 16.3, 20.5, 14.7, 17.5, 24.1, 14.6, 21.1, 17, 19.6, 21.2, 16, 16.5, 9.3, 16.2, 14.9, 16.3, 13.9, 18.4, 19.2, 24.6, 20.2, 12.8, 12.9, 14.1, 18.5, 13.4, 17.1, 9.9, 21.9, 19.6, 18.3, 19.7, 16.6, 21.7, 11.8, 15.9, 11.4, 14.3, 17.4, 16.4, 20.7, 18.6, 22, 17.9, 16.3, 15.4, 17.2, 17.3, 18.3, 16.8, 20.9, 15, 15.1, 18.6, 17.5, 14.7, 17.4, 13.8, 18.3, 17.2, 13.4, 12.5, 22.8, 18.6, 16.3, 16.1, 15.7, 20.8, 16.4]
 *     opt = {
 *         n: 10,
 *         min: 12,
 *         max: 23,
 *     }
 *     r = await histGen(arr, (params) => {
 *         // console.log('params', params)
 *         let avg = arrAverage(params.arr)
 *         let std = arrStd(params.arr)
 *         let ry = (std * Math.sqrt(2 * Math.PI))
 *         let curveY = params.curveX.map((x) => {
 *             let y = 0
 *             if (ry !== 0) {
 *                 y = 1 / ry * Math.exp(-((x - avg) ** 2) / (2 * std ** 2))
 *             }
 *             return y
 *         })
 *         return curveY
 *     }, opt)
 *     // let c = ''
 *     // r.curveX.forEach((v, k) => {
 *     //     let x = r.curveX[k]
 *     //     let y = r.curveY[k]
 *     //     c += `${x}, ${y}\n`
 *     // })
 *     // console.log(c)
 *     console.log(r)
 *     // => {
 *     //   arr: [
 *     //     13.8, 19.5, 16.7, 14.3, 17.5, 14.5, 12.6, 15.6,  8.9, 13.3,
 *     //     17.1,   15, 16.3, 18.3, 18.7, 16.7, 20.9, 12.3, 22.3, 14.6,
 *     //     20.1, 16.7, 16.8, 24.2, 21.2, 20.7, 18.6, 18.3, 15.9, 17.2,
 *     //     18.8, 16.3, 20.5, 14.7, 17.5, 24.1, 14.6, 21.1,   17, 19.6,
 *     //     21.2,   16, 16.5,  9.3, 16.2, 14.9, 16.3, 13.9, 18.4, 19.2,
 *     //     24.6, 20.2, 12.8, 12.9, 14.1, 18.5, 13.4, 17.1,  9.9, 21.9,
 *     //     19.6, 18.3, 19.7, 16.6, 21.7, 11.8, 15.9, 11.4, 14.3, 17.4,
 *     //     16.4, 20.7, 18.6,   22, 17.9, 16.3, 15.4, 17.2, 17.3, 18.3,
 *     //     16.8, 20.9,   15, 15.1, 18.6, 17.5, 14.7, 17.4, 13.8, 18.3,
 *     //     17.2, 13.4, 12.5, 22.8, 18.6, 16.3, 16.1, 15.7, 20.8, 16.4
 *     //   ],
 *     //   min: 12,
 *     //   max: 23.000000000000007,
 *     //   barWidth: 1.0999999999999996,
 *     //   bins: [
 *     //     { min: 12, max: 13.1, avg: 12.55, arr: [Array], counts: 5 },
 *     //     {
 *     //       min: 13.1,
 *     //       max: 14.2,
 *     //       avg: 13.649999999999999,
 *     //       arr: [Array],
 *     //       counts: 7
 *     //     },
 *     //     {
 *     //       min: 14.2,
 *     //       max: 15.299999999999999,
 *     //       avg: 14.75,
 *     //       arr: [Array],
 *     //       counts: 11
 *     //     },
 *     //     {
 *     //       min: 15.299999999999999,
 *     //       max: 16.4,
 *     //       avg: 15.849999999999998,
 *     //       arr: [Array],
 *     //       counts: 15
 *     //     },
 *     //     { min: 16.4, max: 17.5, avg: 16.95, arr: [Array], counts: 19 },
 *     //     { min: 17.5, max: 18.6, avg: 18.05, arr: [Array], counts: 12 },
 *     //     {
 *     //       min: 18.6,
 *     //       max: 19.700000000000003,
 *     //       avg: 19.150000000000002,
 *     //       arr: [Array],
 *     //       counts: 7
 *     //     },
 *     //     {
 *     //       min: 19.700000000000003,
 *     //       max: 20.800000000000004,
 *     //       avg: 20.250000000000004,
 *     //       arr: [Array],
 *     //       counts: 6
 *     //     },
 *     //     {
 *     //       min: 20.800000000000004,
 *     //       max: 21.900000000000006,
 *     //       avg: 21.350000000000005,
 *     //       arr: [Array],
 *     //       counts: 7
 *     //     },
 *     //     {
 *     //       min: 21.900000000000006,
 *     //       max: 23.000000000000007,
 *     //       avg: 22.450000000000006,
 *     //       arr: [Array],
 *     //       counts: 3
 *     //     }
 *     //   ],
 *     //   counts: [
 *     //      5, 7, 11, 15, 19,
 *     //     12, 7,  6,  7,  3
 *     //   ],
 *     //   pdfs: [
 *     //     0.04940711462450595,
 *     //     0.06916996047430833,
 *     //     0.10869565217391308,
 *     //     0.14822134387351785,
 *     //     0.1877470355731226,
 *     //     0.11857707509881428,
 *     //     0.06916996047430833,
 *     //     0.05928853754940714,
 *     //     0.06916996047430833,
 *     //     0.02964426877470357
 *     //   ],
 *     //   ratioForCountToPdf: 0.00988142292490119,
 *     //   curveX: [
 *     //                     12,              12.11, 12.219999999999999,
 *     //     12.329999999999998, 12.439999999999998, 12.549999999999997,
 *     //     12.659999999999997, 12.769999999999996, 12.879999999999995,
 *     //     12.989999999999995, 13.099999999999994, 13.209999999999994,
 *     //     13.319999999999993, 13.429999999999993, 13.539999999999992,
 *     //     13.649999999999991, 13.759999999999991,  13.86999999999999,
 *     //      13.97999999999999,  14.08999999999999, 14.199999999999989,
 *     //     14.309999999999988, 14.419999999999987, 14.529999999999987,
 *     //     14.639999999999986, 14.749999999999986, 14.859999999999985,
 *     //     14.969999999999985, 15.079999999999984, 15.189999999999984,
 *     //     15.299999999999983, 15.409999999999982, 15.519999999999982,
 *     //     15.629999999999981,  15.73999999999998,  15.84999999999998,
 *     //      15.95999999999998,  16.06999999999998,  16.17999999999998,
 *     //     16.289999999999978, 16.399999999999977, 16.509999999999977,
 *     //     16.619999999999976, 16.729999999999976, 16.839999999999975,
 *     //     16.949999999999974, 17.059999999999974, 17.169999999999973,
 *     //     17.279999999999973, 17.389999999999972,  17.49999999999997,
 *     //      17.60999999999997,  17.71999999999997,  17.82999999999997,
 *     //      17.93999999999997,  18.04999999999997, 18.159999999999968,
 *     //     18.269999999999968, 18.379999999999967, 18.489999999999966,
 *     //     18.599999999999966, 18.709999999999965, 18.819999999999965,
 *     //     18.929999999999964, 19.039999999999964, 19.149999999999963,
 *     //     19.259999999999962, 19.369999999999962,  19.47999999999996,
 *     //      19.58999999999996,  19.69999999999996,  19.80999999999996,
 *     //      19.91999999999996,  20.02999999999996, 20.139999999999958,
 *     //     20.249999999999957, 20.359999999999957, 20.469999999999956,
 *     //     20.579999999999956, 20.689999999999955, 20.799999999999955,
 *     //     20.909999999999954, 21.019999999999953, 21.129999999999953,
 *     //     21.239999999999952,  21.34999999999995,  21.45999999999995,
 *     //      21.56999999999995,  21.67999999999995,  21.78999999999995,
 *     //      21.89999999999995,  22.00999999999995, 22.119999999999948,
 *     //     22.229999999999947, 22.339999999999947, 22.449999999999946,
 *     //     22.559999999999945, 22.669999999999945, 22.779999999999944,
 *     //     22.889999999999944,
 *     //     ... 1 more item
 *     //   ],
 *     //   curveY: [
 *     //      0.03539347632001737,    0.037383482855803, 0.039437822053457874,
 *     //      0.04155494476232438, 0.043732984782065484,  0.04596975107654172,
 *     //      0.04826272158141871,  0.05060903873486474, 0.053005506852505095,
 *     //      0.05544859145799284, 0.057934420669161386,  0.06045878872679094,
 *     //      0.06301716173861893,  0.06560468569544393,  0.06821619679912554,
 *     //      0.07084623412410286,  0.07348905461489254,  0.07613865040205554,
 *     //      0.07878876839852786,  0.08143293211719617,  0.08406446562938243,
 *     //      0.08667651956270586,   0.0892620990158537,  0.09181409324734967,
 *     //      0.09432530697570551,  0.09678849310961433,  0.09919638670933023,
 *     //      0.10154173996430707,  0.10381735795775265,  0.10601613497619995,
 *     //      0.10813109111168648,  0.11015540889583084,  0.11208246969914742,
 *     //      0.11390588962545672,  0.11561955463032393,  0.11721765459414842,
 *     //      0.11869471608486172,    0.120045633552169,  0.12126569870485275,
 *     //      0.12235062783478269,  0.12329658686584205,  0.12410021392285919,
 *     //        0.124758639234666,  0.12526950220640412,   0.1256309655189545,
 *     //      0.12584172613764152,   0.1259010231379035,  0.12580864228215807,
 *     //      0.12556491730933672,  0.12517072792622605,  0.12462749451753433,
 *     //      0.12393716961919929,   0.1231022262265693,  0.12212564303543284,
 *     //       0.1210108867391587,  0.11976189152917438,  0.11838303596840025,
 *     //      0.11687911742784585,  0.11525532429515117,  0.11351720618024314,
 *     //      0.11167064235732105,  0.10972180869396257,  0.10767714332716188,
 *     //      0.10554331135251384,  0.10332716879651553,   0.1010357261430725,
 *     //      0.09867611168380674,  0.09625553495773567,  0.09378125053941684,
 *     //       0.0912605224258574,  0.08870058926151618,   0.0861086306277478,
 *     //      0.08349173460824426,  0.08085686682562278,  0.07821084112651096,
 *     //       0.0755602920735203,  0.07291164938261302,  0.07027111442379702,
 *     //      0.06764463888206981,  0.06503790565431186,  0.06245631203663879,
 *     //      0.05990495523578081, 0.057388620217582655, 0.054911769885907534,
 *     //      0.05247853756626619, 0.050092721750548005,  0.04775778304244922,
 *     //     0.045476843227705024,  0.04325268637914335,  0.04108776189397607,
 *     //      0.03898418934969129,  0.03694376505545445,  0.03496797016808652,
 *     //      0.03305798023546953, 0.031214676025614625, 0.029438655496583702,
 *     //     0.027730246760929483, 0.026089521898245954,  0.02451631147072182,
 *     //     0.023010219599172477,
 *     //     ... 1 more item
 *     //   ]
 *     // }
 *
 * }
 * test()
 *     .catch((err) => {
 *         console.log(err)
 *     })
 *
 */
async function histGen(arr, fun, opt = {}) {

    //check fun
    if (!isfun(fun)) {
        throw new Error(`invalid fun`)
    }

    //arrFilterByNum
    let rs = arrFilterByNum(arr)

    //check rs
    if (size(rs) === 0) {
        return null
    }

    //nCurve
    let nCurve = get(opt, 'nCurve')
    if (!ispint(nCurve)) {
        nCurve = 100
    }
    nCurve = cint(nCurve)
    // console.log('nCurve', nCurve)

    //dx, n, rmin, rmax
    let dx
    let n
    let rmin
    let rmax

    //dx
    dx = get(opt, 'dx', null)
    // console.log('dx', dx)

    //check
    if (isnum(dx)) {

        //cdbl
        dx = cdbl(dx)

        //dataMin, dataMax
        let dataMin = min(rs)
        let dataMax = max(rs)

        //rmin
        rmin = floor(dataMin / dx) * dx
        // console.log('rmin', rmin)

        //rmax
        rmax = ceil(dataMax / dx) * dx
        // console.log('rmax', rmax)

        //n
        n = round(rmax - rmin) / dx
        // console.log('n', n)

    }
    else {
        //若不提供dx, 則需讀取n, min, max, 並由直方圖數據取得當前dx

        //n
        n = get(opt, 'n')
        if (!ispint(n)) {
            n = 30
        }
        n = cint(n)

        //rmin
        rmin = get(opt, 'min', null)
        // console.log('rmin', rmin)

        //rmax
        rmax = get(opt, 'max', null)
        // console.log('rmax', rmax)

    }

    //bs
    let bs = bin(rs, n, { min: rmin, max: rmax })
    // console.log('bs', bs)

    //由直方圖數據取得當前dx
    if (!isnum(dx)) {

        //dx
        dx = get(bs, '0.max') - get(bs, '0.min')
        // console.log('dx', dx)

    }

    //add counts
    bs = map(bs, (v) => {
        v.counts = size(v.arr)
        return v
    })
    // console.log('bs', bs)

    //barMin, barMax
    let rsTemp = [...map(bs, 'min'), ...map(bs, 'max')]
    let barMin = min(rsTemp)
    let barMax = max(rsTemp)
    // console.log('barMin', barMin, 'barMax', barMax)

    //counts
    let counts = map(bs, 'counts')
    // console.log('counts', counts)

    //r
    let r = sum(counts) * dx //dx大於0, 一定為有效陣列, r必定>0
    // console.log('r', r, 'dx', dx)

    //pdfs
    let pdfs = map(counts, (v) => {
        return v / r //r必定>0不用檢核
    })

    //curveX
    let curveX = rang(barMin, barMax, nCurve)
    // console.log('curveX', curveX)

    //curveY
    let curveY = fun({
        arr: rs,
        min: barMin,
        max: barMax,
        barWidth: dx,
        bins: bs,
        counts,
        pdfs,
        curveX,
    })
    if (ispm(curveY)) {
        curveY = await curveY
    }
    // console.log('curveY', curveY)

    return {
        arr: rs,
        min: barMin,
        max: barMax,
        barWidth: dx,
        bins: bs,
        counts,
        pdfs,
        ratioForCountToPdf: 1 / r,
        curveX,
        curveY,
    }
}


export default histGen