strDiff.mjs

import * as Diff from 'diff'
import get from 'lodash-es/get.js'
import split from 'lodash-es/split.js'
import take from 'lodash-es/take.js'
import size from 'lodash-es/size.js'
import isstr from './isstr.mjs'
import isbol from './isbol.mjs'
// console.log('Diff', Diff)


/**
 * 比對新舊文字差異處
 *
 * Unit Test: {@link https://github.com/yuda-lyu/wsemi/blob/master/test/strDiff.test.mjs Github}
 * @memberOf wsemi
 * @param {String} strOld 輸入原始文字字串
 * @param {String} strNew 輸入更新文字字串
 * @param {Object} [opt={}] 輸入設定物件,預設{}
 * @param {Boolean} [opt.eliminateEndLine=false] 輸入是否清除diff最後解析結果布林值,通常為處理數據時因各列有自動添加換行符號,導致diff會多出額外比對結果故須清除。預設false
 * @returns {Object} 回傳比對結果物件,包含diff與dfs鍵值,diff為比對原始結果字串,dfs為依照各列比對結果陣列
 * @example
 *
 * let r
 *
 * r = strDiff('test中文', '')
 * console.log(r)
 * // => {
 * //   diff: [ { count: 1, added: undefined, removed: true, value: 'test中文' } ],
 * //   dfs: [ { p: 'remove', vo: 'test中文', vn: '' } ]
 * // }
 *
 * r = strDiff('test中文1\ntest中文2', '')
 * console.log(r)
 * // => {
 * //   diff: [
 * //     {
 * //       count: 2,
 * //       added: undefined,
 * //       removed: true,
 * //       value: 'test中文1\ntest中文2'
 * //     }
 * //   ],
 * //   dfs: [
 * //     { p: 'remove', vo: 'test中文1', vn: '' },
 * //     { p: 'remove', vo: 'test中文2', vn: '' }
 * //   ]
 * // }
 *
 * r = strDiff('', 'test中文')
 * console.log(r)
 * // => {
 * //   diff: [ { count: 1, added: true, removed: undefined, value: 'test中文' } ],
 * //   dfs: [ { p: 'add', vo: 'test中文', vn: '' } ]
 * // }
 *
 * r = strDiff('', 'test中文1\ntest中文2')
 * console.log(r)
 * // => {
 * //   diff: [
 * //     {
 * //       count: 2,
 * //       added: true,
 * //       removed: undefined,
 * //       value: 'test中文1\ntest中文2'
 * //     }
 * //   ],
 * //   dfs: [
 * //     { p: 'add', vo: 'test中文1', vn: '' },
 * //     { p: 'add', vo: 'test中文2', vn: '' }
 * //   ]
 * // }
 *
 * let tab1 = `1|0.974848293|0.791303871|0.716898185|0.506002098|0.137888903|0.626724085
 * 2|-0.529408622|0.839882385|0.663059856|0.49047221|0.395763265|0.866151835
 * 3|-0.10320217|0.475514539|0.969205779|0.711250309|0.153847069|0.410092395
 * 4|-0.121479865|0.486179086|0.481023842|0.467410582|0.42602231|0.849701641
 * 5|0.757346003|0.697242433|0.67532802|0.174644416|0.045652267|0.397104668
 * 6|0.663032731|0.259252779|0.566177431|0.679637706|0.377814487|0.400248119
 * 7|0.72721374|0.263793391|0.167895215|0.794808602|0.107070584|0.011822872
 * 8|0.247416664|0.360426795|0.014346373|0.000469616|0.4082693|0.913806611
 * 9|0.345880037|0.167996664|0.711054429|0.363177921|0.206849994|0.636855344
 * 10|0.576739457|0.324665077|0.973218005|0.883927423|0.176906125|0.20087887
 * 11|a1
 * 12|a2
 * 13|0.504421248|0.984003751|0.32549507|0.987090751|0.192745589|0.735133561
 * 14|0.273214614|0.083431884|0.565146092|0.935388666|0.637675154|0.523815661
 *
 * `
 * let tab2 = `1|0.974848293|0.791303871|0.716898185|0.506002098|0.137888903|0.626724085
 * 2|-0.529408622|a0.839882385|0.663059856|0.49047221|0.395763265|0.866151835
 * 3|-0.10320217|0.475514539|0.969205779|0.711250309|0.153847069|0.410092395
 * 4|-0.121479865|0.486179086|0.481023842|0.467410582|0.42602231|0.849701641
 * 5|0.757346003|0.697242433|0.67532802|0.174644416|0.045652267|0.397104668
 * 7|0.72721374|0.263793391|0.167895215|0.794808602|0.107070584|0.011822872
 * 8|0.247416664|0.360426795|0.014346373|0.000469616|0.4082693|0.913806611
 * 9|0.345880037|0.167996664|0.711054429|0.363173478|0.636855344
 * 10|0.576739457|0.324665077|0.973218005|0.883927423|0.176906125|0.20087887
 * 13|0.504421248|0.984003751|0.32549507|0.987090751|0.192745589|0.735133561
 * 14|0.273214614|0.083431884|0.565146092|0.935388666|0.637675154|0.523815661
 * n1|0.944492151|0.89950443|0.182709318|0.892820757|0.709746901|0.097385354
 * n2|0.769805921|0.061355308|0.314826137|0.855857651|0.653550539|0.772500773
 * n3|0.158739038|0.085078711|0.844664253|0.21630142|0.912931341|0.735138313
 *
 *
 * `
 * r = strDiff(tab1, tab2)
 * console.log(r)
 * // => {
 * //   diff: [
 * //     {
 * //       count: 1,
 * //       value: '1|0.974848293|0.791303871|0.716898185|0.506002098|0.137888903|0.626724085\n'
 * //     },
 * //     {
 * //       count: 1,
 * //       added: undefined,
 * //       removed: true,
 * //       value: '2|-0.529408622|0.839882385|0.663059856|0.49047221|0.395763265|0.866151835\n'
 * //     },
 * //     {
 * //       count: 1,
 * //       added: true,
 * //       removed: undefined,
 * //       value: '2|-0.529408622|a0.839882385|0.663059856|0.49047221|0.395763265|0.866151835\n'
 * //     },
 * //     {
 * //       count: 3,
 * //       value: '3|-0.10320217|0.475514539|0.969205779|0.711250309|0.153847069|0.410092395\n' +
 * //         '4|-0.121479865|0.486179086|0.481023842|0.467410582|0.42602231|0.849701641\n' +
 * //         '5|0.757346003|0.697242433|0.67532802|0.174644416|0.045652267|0.397104668\n'
 * //     },
 * //     {
 * //       count: 1,
 * //       added: undefined,
 * //       removed: true,
 * //       value: '6|0.663032731|0.259252779|0.566177431|0.679637706|0.377814487|0.400248119\n'
 * //     },
 * //     {
 * //       count: 2,
 * //       value: '7|0.72721374|0.263793391|0.167895215|0.794808602|0.107070584|0.011822872\n' +
 * //         '8|0.247416664|0.360426795|0.014346373|0.000469616|0.4082693|0.913806611\n'
 * //     },
 * //     {
 * //       count: 1,
 * //       added: undefined,
 * //       removed: true,
 * //       value: '9|0.345880037|0.167996664|0.711054429|0.363177921|0.206849994|0.636855344\n'
 * //     },
 * //     {
 * //       count: 1,
 * //       added: true,
 * //       removed: undefined,
 * //       value: '9|0.345880037|0.167996664|0.711054429|0.363173478|0.636855344\n'
 * //     },
 * //     {
 * //       count: 1,
 * //       value: '10|0.576739457|0.324665077|0.973218005|0.883927423|0.176906125|0.20087887\n'
 * //     },
 * //     {
 * //       count: 2,
 * //       added: undefined,
 * //       removed: true,
 * //       value: '11|a1\n12|a2\n'
 * //     },
 * //     {
 * //       count: 2,
 * //       value: '13|0.504421248|0.984003751|0.32549507|0.987090751|0.192745589|0.735133561\n' +
 * //         '14|0.273214614|0.083431884|0.565146092|0.935388666|0.637675154|0.523815661\n'
 * //     },
 * //     {
 * //       count: 3,
 * //       added: true,
 * //       removed: undefined,
 * //       value: 'n1|0.944492151|0.89950443|0.182709318|0.892820757|0.709746901|0.097385354\n' +
 * //         'n2|0.769805921|0.061355308|0.314826137|0.855857651|0.653550539|0.772500773\n' +
 * //         'n3|0.158739038|0.085078711|0.844664253|0.21630142|0.912931341|0.735138313\n'
 * //     },
 * //     { count: 1, value: '\n' },
 * //     { count: 1, added: true, removed: undefined, value: '\n' }
 * //   ],
 * //   dfs: [
 * //     {
 * //       p: '',
 * //       vo: '1|0.974848293|0.791303871|0.716898185|0.506002098|0.137888903|0.626724085',
 * //       vn: ''
 * //     },
 * //     {
 * //       p: 'modify',
 * //       vo: '2|-0.529408622|0.839882385|0.663059856|0.49047221|0.395763265|0.866151835',
 * //       vn: '2|-0.529408622|a0.839882385|0.663059856|0.49047221|0.395763265|0.866151835'
 * //     },
 * //     {
 * //       p: '',
 * //       vo: '3|-0.10320217|0.475514539|0.969205779|0.711250309|0.153847069|0.410092395',
 * //       vn: ''
 * //     },
 * //     {
 * //       p: '',
 * //       vo: '4|-0.121479865|0.486179086|0.481023842|0.467410582|0.42602231|0.849701641',
 * //       vn: ''
 * //     },
 * //     {
 * //       p: '',
 * //       vo: '5|0.757346003|0.697242433|0.67532802|0.174644416|0.045652267|0.397104668',
 * //       vn: ''
 * //     },
 * //     {
 * //       p: 'remove',
 * //       vo: '6|0.663032731|0.259252779|0.566177431|0.679637706|0.377814487|0.400248119',
 * //       vn: ''
 * //     },
 * //     {
 * //       p: '',
 * //       vo: '7|0.72721374|0.263793391|0.167895215|0.794808602|0.107070584|0.011822872',
 * //       vn: ''
 * //     },
 * //     {
 * //       p: '',
 * //       vo: '8|0.247416664|0.360426795|0.014346373|0.000469616|0.4082693|0.913806611',
 * //       vn: ''
 * //     },
 * //     {
 * //       p: 'modify',
 * //       vo: '9|0.345880037|0.167996664|0.711054429|0.363177921|0.206849994|0.636855344',
 * //       vn: '9|0.345880037|0.167996664|0.711054429|0.363173478|0.636855344'
 * //     },
 * //     {
 * //       p: '',
 * //       vo: '10|0.576739457|0.324665077|0.973218005|0.883927423|0.176906125|0.20087887',
 * //       vn: ''
 * //     },
 * //     { p: 'remove', vo: '11|a1', vn: '' },
 * //     { p: 'remove', vo: '12|a2', vn: '' },
 * //     {
 * //       p: '',
 * //       vo: '13|0.504421248|0.984003751|0.32549507|0.987090751|0.192745589|0.735133561',
 * //       vn: ''
 * //     },
 * //     {
 * //       p: '',
 * //       vo: '14|0.273214614|0.083431884|0.565146092|0.935388666|0.637675154|0.523815661',
 * //       vn: ''
 * //     },
 * //     {
 * //       p: 'add',
 * //       vo: 'n1|0.944492151|0.89950443|0.182709318|0.892820757|0.709746901|0.097385354',
 * //       vn: ''
 * //     },
 * //     {
 * //       p: 'add',
 * //       vo: 'n2|0.769805921|0.061355308|0.314826137|0.855857651|0.653550539|0.772500773',
 * //       vn: ''
 * //     },
 * //     {
 * //       p: 'add',
 * //       vo: 'n3|0.158739038|0.085078711|0.844664253|0.21630142|0.912931341|0.735138313',
 * //       vn: ''
 * //     },
 * //     { p: '', vo: '', vn: '' },
 * //     { p: 'add', vo: '', vn: '' }
 * //   ]
 * // }
 *
 */
function strDiff(strOld, strNew, opt = {}) {

    //check
    if (!isstr(strOld)) {
        return {}
    }
    if (!isstr(strNew)) {
        return {}
    }
    if (strOld === '' && strNew === '') {
        return {}
    }

    //eliminateEndLine
    let eliminateEndLine = get(opt, 'eliminateEndLine')
    if (!isbol(eliminateEndLine)) {
        eliminateEndLine = false
    }

    // console.log('strDiff strOld', strOld)
    // console.log('strDiff strNew', strNew)

    //diffLines
    let diff = Diff.diffLines(strOld, strNew)
    // console.log('diff', diff)

    //k
    let k

    //dfsOld, dfsNew
    let n = size(diff)
    let dfsOld = []
    let dfsNew = []
    k = -1
    while (true) {
        k++
        if (k > n - 1) {
            break
        }

        //s
        let s = diff[k]

        //count, removed, added
        let count = get(s, 'count', 0)
        let removed = get(s, 'removed', false)
        let added = get(s, 'added', false)

        //vs
        let v = get(s, 'value', '')
        let vs = split(v, '\n')
        vs = take(vs, count)

        //each
        for (let i = 1; i <= count; i++) {
            let v = vs[i - 1]

            if (!removed && !added) { //不變line
                dfsOld.push({
                    p: '',
                    v,
                })
                dfsNew.push({
                    p: '',
                    v,
                })
            }
            else if (removed && !added) { //有刪除line
                dfsOld.push({
                    p: 'remove',
                    v,
                })
                dfsNew.push({
                    p: 'empty',
                    v: '',
                })
            }
            else if (!removed && added) { //有新增line
                dfsOld.push({
                    p: 'empty',
                    v: '',
                })
                dfsNew.push({
                    p: 'add',
                    v,
                })
            }

        }
    }
    // console.log('dfsOld(分開彙整)', dfsOld)
    // console.log('dfsNew(分開彙整)', dfsNew)
    // each(dfsOld, (_, k) => {
    //     let vOld = get(dfsOld, k)
    //     let vNew = get(dfsNew, k)
    //     console.log('分開彙整', k, vOld, vNew)
    // })

    //nOld, nNew
    let nOld = size(dfsOld)
    // let nNew = size(dfsNew)

    //merge dfsOld, dfsNew
    let m = nOld //Math.max(nOld, nNew)
    k = -1
    while (true) {
        k++
        if (k > m - 1) {
            break
        }

        //v0
        let df0Old = get(dfsOld, k, null)
        let df0New = get(dfsNew, k, null)
        let p0Old = get(df0Old, 'p', '')
        let p0New = get(df0New, 'p', '')
        // let v0Old = get(df0Old, 'v', '')
        // let v0New = get(df0New, 'v', '')

        //偵測modify
        if (p0Old === 'remove' && p0New === 'empty') {

            let icOld = 0 //old出現連續可合併列數
            let icOldStart = k
            let icOldEnd = -1
            for (let j = k; j <= m - 1; j++) {
                let dfOld = get(dfsOld, j, null)
                let dfNew = get(dfsNew, j, null)
                let pOld = get(dfOld, 'p', '')
                let pNew = get(dfNew, 'p', '')
                if (pOld === 'remove' && pNew === 'empty') {
                    icOld++
                    icOldEnd = j
                }
                else {
                    break
                }
            }
            if (icOld <= 0) {
                continue
            }
            // console.log(k, 'icOld', icOld, 'from', icOldStart, `to`, icOldEnd)

            let icNew = 0 //new出現連續可合併列數
            let icNewState = false
            let icNewStart = -1
            let icNewEnd = -1
            for (let j = k; j <= icOldEnd + icOld; j++) {
                let dfOld = get(dfsOld, j, null)
                let dfNew = get(dfsNew, j, null)
                let pOld = get(dfOld, 'p', '')
                let pNew = get(dfNew, 'p', '')
                if (pOld === 'empty' && pNew === 'add') {
                    if (!icNewState) {
                        icNewStart = j
                    }
                    icNewState = true
                    icNew++
                    if (icNewState) {
                        icNewEnd = j
                    }
                }
                else if (icNewState) {
                    break
                }
            }
            if (icNew <= 0) {
                continue
            }
            // console.log(k, 'icNew', icNew, 'from', icNewStart, `to`, icNewEnd)

            //ic
            let ic = Math.min(icOld, icNew)
            // console.log(k, 'ic', ic)

            //combine
            if (ic >= 1) {
                for (let j = 0; j <= ic - 1; j++) {
                    let jOld = icOldStart + j
                    let jNew = icNewStart + j
                    dfsOld[jOld] = {
                        p: 'modify',
                        vo: get(dfsOld, `${jOld}.v`, ''),
                        vn: get(dfsNew, `${jNew}.v`, ''),
                        // b: true,
                    }
                    dfsNew[jOld] = null
                    dfsOld[jNew] = null
                    dfsNew[jNew] = null
                }
                k += ic + 1 //跳過ic行繼續偵測
            }

        }
        else {
            // dfsOld[k].b = true
            // dfsNew[k].b = true
        }

    }
    // console.log('dfsOld(偵測modify)', dfsOld)
    // console.log('dfsNew(偵測modify)', dfsNew)

    //dfs
    let dfs = []
    k = -1
    while (true) {
        k++
        if (k > m - 1) {
            break
        }

        let dfOld = get(dfsOld, k, null)
        let dfNew = get(dfsNew, k, null)
        let pOld = get(dfOld, 'p', '')
        let pNew = get(dfNew, 'p', '')
        let vOld = get(dfOld, 'v', '')
        let vNew = get(dfNew, 'v', '')

        //check
        if (dfOld === null && dfNew === null) {
            continue
        }

        let p = ''
        let vo = vOld
        let vn = ''
        if (pOld === 'modify') {
            p = 'modify'
            vo = dfOld.vo
            vn = dfOld.vn
        }
        else if (pOld === 'remove' && pNew === 'empty') {
            p = 'remove'
            vo = vOld
            vn = ''
        }
        else if (pOld === 'empty' && pNew === 'add') {
            p = 'add'
            vo = vNew
            vn = ''
        }
        dfs.push({
            p,
            vo,
            vn,
            // _dfOld: dfOld,
            // _dfNew: dfNew,
        })
    }
    // console.log('dfs', dfs)

    return {
        diff,
        dfs,
    }
}


export default strDiff