arrFilterByKeywords.mjs

  1. import map from 'lodash-es/map.js'
  2. import size from 'lodash-es/size.js'
  3. import trim from 'lodash-es/trim.js'
  4. import isearr from './isearr.mjs'
  5. import isestr from './isestr.mjs'
  6. import sep from './sep.mjs'
  7. import strleft from './strleft.mjs'
  8. import strdelleft from './strdelleft.mjs'
  9. /**
  10. * 對arr陣列內各字串進行關鍵字計算,計算是否含有關鍵字與程度,多關鍵字用空白區隔,必要關鍵字可於字首添加「+」,不要關鍵字可於字首添加「-」
  11. *
  12. * Unit Test: {@link https://github.com/yuda-lyu/wsemi/blob/master/test/arrFilterByKeywords.test.mjs Github}
  13. * @memberOf wsemi
  14. * @param {Array} arr 輸入要被尋找的字串陣列
  15. * @param {String|Array} keywords 輸入要尋找的關鍵字字串或陣列
  16. * @returns {Array} 回傳結果物件陣列,陣列內各物件包含hasKeyword與weight欄位,其中hasKeyword代表有無關鍵字為布林值,weight代表權重為浮點數,其值可大於1
  17. * @example
  18. *
  19. * let arr = [
  20. * 'abc def xyz',
  21. * '測試abc中文mnop',
  22. * 'Instead of creating yet another opinionated application',
  23. * 'Node.js module which can be integrated into a larger application',
  24. * ]
  25. * let kws = null
  26. * let r = null
  27. *
  28. * kws = 'abc'
  29. * r = arrFilterByKeywords(arr, kws)
  30. * console.log(r)
  31. * // => [
  32. * // { hasKeyword: true, weight: 1 },
  33. * // { hasKeyword: true, weight: 1 },
  34. * // { hasKeyword: false, weight: 0 },
  35. * // { hasKeyword: false, weight: 0 }
  36. * // ]
  37. *
  38. * kws = 'def'
  39. * r = arrFilterByKeywords(arr, kws)
  40. * console.log(r)
  41. * // => [
  42. * // { hasKeyword: true, weight: 1 },
  43. * // { hasKeyword: false, weight: 0 },
  44. * // { hasKeyword: false, weight: 0 },
  45. * // { hasKeyword: false, weight: 0 }
  46. * // ]
  47. *
  48. * kws = 'def 中文'
  49. * r = arrFilterByKeywords(arr, kws)
  50. * console.log(r)
  51. * // => [
  52. * // { hasKeyword: true, weight: 1 },
  53. * // { hasKeyword: true, weight: 0.25 },
  54. * // { hasKeyword: false, weight: 0 },
  55. * // { hasKeyword: false, weight: 0 }
  56. * // ]
  57. *
  58. * kws = 'def 中文 mnop'
  59. * r = arrFilterByKeywords(arr, kws)
  60. * console.log(r)
  61. * // => [
  62. * // { hasKeyword: true, weight: 1 },
  63. * // { hasKeyword: true, weight: 0.5555555555555557 },
  64. * // { hasKeyword: false, weight: 0 },
  65. * // { hasKeyword: false, weight: 0 }
  66. * // ]
  67. *
  68. * kws = 'def +yet'
  69. * r = arrFilterByKeywords(arr, kws)
  70. * console.log(r)
  71. * // => [
  72. * // { hasKeyword: false, weight: 0 },
  73. * // { hasKeyword: false, weight: 0 },
  74. * // { hasKeyword: true, weight: 1 },
  75. * // { hasKeyword: false, weight: 0 }
  76. * // ]
  77. *
  78. * kws = 'def of module -yet'
  79. * r = arrFilterByKeywords(arr, kws)
  80. * console.log(r)
  81. * // => [
  82. * // { hasKeyword: true, weight: 1 },
  83. * // { hasKeyword: true, weight: 1 },
  84. * // { hasKeyword: false, weight: 0 },
  85. * // { hasKeyword: true, weight: 1 }
  86. * // ]
  87. *
  88. * kws = '+'
  89. * r = arrFilterByKeywords(arr, kws)
  90. * console.log(r)
  91. * // => [
  92. * // { hasKeyword: false, weight: 0 },
  93. * // { hasKeyword: false, weight: 0 },
  94. * // { hasKeyword: false, weight: 0 },
  95. * // { hasKeyword: false, weight: 0 }
  96. * // ]
  97. *
  98. * kws = '-'
  99. * r = arrFilterByKeywords(arr, kws)
  100. * console.log(r)
  101. * // => [
  102. * // { hasKeyword: false, weight: 0 },
  103. * // { hasKeyword: false, weight: 0 },
  104. * // { hasKeyword: false, weight: 0 },
  105. * // { hasKeyword: false, weight: 0 }
  106. * // ]
  107. *
  108. * kws = 'def +'
  109. * r = arrFilterByKeywords(arr, kws)
  110. * console.log(r)
  111. * // => [
  112. * // { hasKeyword: true, weight: 1 },
  113. * // { hasKeyword: false, weight: 0 },
  114. * // { hasKeyword: false, weight: 0 },
  115. * // { hasKeyword: false, weight: 0 }
  116. * // ]
  117. *
  118. * kws = ['def', 'of', 'module', '-yet']
  119. * r = arrFilterByKeywords(arr, kws)
  120. * console.log(r)
  121. * // => [
  122. * // { hasKeyword: true, weight: 1 },
  123. * // { hasKeyword: true, weight: 1 },
  124. * // { hasKeyword: false, weight: 0 },
  125. * // { hasKeyword: true, weight: 1 }
  126. * // ]
  127. *
  128. * kws = ['can be', 'def']
  129. * r = arrFilterByKeywords(arr, kws)
  130. * console.log(r)
  131. * // => [
  132. * // { hasKeyword: true, weight: 0.25 },
  133. * // { hasKeyword: false, weight: 0 },
  134. * // { hasKeyword: false, weight: 0 },
  135. * // { hasKeyword: true, weight: 1 }
  136. * // ]
  137. *
  138. * kws = ['+abc']
  139. * r = arrFilterByKeywords(arr, kws)
  140. * console.log(r)
  141. * // => [
  142. * // { hasKeyword: true, weight: 1 },
  143. * // { hasKeyword: true, weight: 1 },
  144. * // { hasKeyword: false, weight: 0 },
  145. * // { hasKeyword: false, weight: 0 }
  146. * // ]
  147. *
  148. * kws = ['-abc']
  149. * r = arrFilterByKeywords(arr, kws)
  150. * console.log(r)
  151. * // => [
  152. * // { hasKeyword: false, weight: 0 },
  153. * // { hasKeyword: false, weight: 0 },
  154. * // { hasKeyword: true, weight: 1 },
  155. * // { hasKeyword: true, weight: 1 }
  156. * // ]
  157. *
  158. */
  159. function arrFilterByKeywords(arr, keywords) {
  160. //check
  161. if (!isearr(arr)) {
  162. return []
  163. }
  164. //check
  165. if (!isestr(keywords) && !isearr(keywords)) {
  166. return []
  167. }
  168. //kws
  169. let kws = null
  170. if (isestr(keywords)) {
  171. kws = sep(keywords, ' ') //若為字串則用空白切分出各關鍵字
  172. }
  173. else {
  174. kws = keywords //可支援關鍵字內含空白
  175. }
  176. //n
  177. let n = size(kws)
  178. function core(c) {
  179. //for
  180. let b = false
  181. let weight = 0
  182. for (let k = 0; k < n; k++) {
  183. let kw = kws[k]
  184. //ekw
  185. let ekw = kw
  186. let bInclude = strleft(kw, 1) === '+'
  187. let bExclude = strleft(kw, 1) === '-'
  188. if (bInclude || bExclude) {
  189. ekw = strdelleft(kw, 1)
  190. }
  191. //check
  192. if (trim(ekw) === '') {
  193. continue
  194. }
  195. //bHas
  196. let bHas = c.indexOf(ekw) >= 0
  197. // console.log('c', c, ':ekw', ekw, ':bHas', bHas)
  198. //必有關鍵字
  199. if (bInclude) {
  200. if (!bHas) {
  201. b = false //若無必有關鍵字, 強制視為找不到
  202. weight = 0
  203. break
  204. }
  205. else {
  206. b = true //找到必有關鍵字, 視為找到
  207. weight = 1
  208. break
  209. }
  210. }
  211. //不能有關鍵字
  212. if (bExclude) {
  213. if (bHas) {
  214. b = false //找到不能有的關鍵字, 強制視為找不到
  215. weight = 0
  216. break
  217. }
  218. else {
  219. b = true //找不到不能有關鍵字, 視為找到
  220. weight = 1
  221. break
  222. }
  223. }
  224. //含有關建字則依照順序給予權重
  225. if (bHas) {
  226. b = true
  227. let w = 1 - k / n
  228. w = w ** 2 //非線性遞減, 有第1關鍵字權重1, 只有2+3關鍵字權重也不超過第1關鍵字權重
  229. weight += w //權重累加可超過1
  230. }
  231. }
  232. return {
  233. hasKeyword: b,
  234. weight,
  235. }
  236. }
  237. //rs
  238. let rs = map(arr, (c) => {
  239. return core(c)
  240. })
  241. return rs
  242. }
  243. export default arrFilterByKeywords