import isestr from './isestr.mjs'
function core(s1, s2) {
s1 = s1.replace(/\s+/g, '')
s2 = s2.replace(/\s+/g, '')
if (!s1.length && !s2.length) return 1 // if both are empty strings
if (!s1.length || !s2.length) return 0 // if only one is empty string
if (s1 === s2) return 1 // identical
if (s1.length === 1 && s2.length === 1) return 0 // both are 1-letter strings
if (s1.length < 2 || s2.length < 2) return 0 // if either is a 1-letter string
let firstBigrams = new Map()
for (let i = 0; i < s1.length - 1; i++) {
const bigram = s1.substring(i, i + 2)
const count = firstBigrams.has(bigram)
? firstBigrams.get(bigram) + 1
: 1
firstBigrams.set(bigram, count)
};
let intersectionSize = 0
for (let i = 0; i < s2.length - 1; i++) {
const bigram = s2.substring(i, i + 2)
const count = firstBigrams.has(bigram)
? firstBigrams.get(bigram)
: 0
if (count > 0) {
firstBigrams.set(bigram, count - 1)
intersectionSize++
}
}
return (2.0 * intersectionSize) / (s1.length + s2.length - 2)
}
/**
* 計算2字串相似度
*
* Fork: {@link https://github.com/aceakash/string-similarity string-similarity}
*
* Unit Test: {@link https://github.com/yuda-lyu/wsemi/blob/master/test/strCompare.test.mjs Github}
* @memberOf wsemi
* @param {String} str1 輸入欲比對之一般字串
* @param {String} str2 輸入欲比對之一般字串
* @returns {Number} 回傳字串相似度0~1
* @example
*
* console.log(strCompare('The Woodman(樵夫) set to work at once, and so...', 'Wodooman(樵夫)'))
* // => 0.375
*
*/
function strCompare(str1, str2) {
//check
if (!isestr(str1)) {
return 0
}
if (!isestr(str2)) {
return 0
}
return core(str1, str2)
}
export default strCompare