k-medoids/fastermsc.mjs

// Ported 1:1 from src/fastermsc.rs (FasterMSC algorithm).
import { arrayAdapter } from './arrayadapter.mjs';
import { Reco, DistancePair, U32_MAX, find_min, find_max, choose_medoid_within_partition } from './util.mjs';

// _loss(a, b): 0 if a or b is zero, else a / b.
export function _loss(a, b) {
  if (a === 0 || b === 0) { return 0; } else { return a / b; }
}

/**
 * Run the FasterMSC algorithm.
 * @param {object} mat - pairwise distance matrix (wrapped)
 * @param {number[]} med - the list of medoids (mutated in place)
 * @param {number} maxiter - maximum number of iterations
 * returns { loss, assi, nIter, nSwaps }
 */
export function fastermsc(mat, med, maxiter) {
  mat = arrayAdapter(mat);
  const n = mat.len(), k = med.length;
  if (k === 1) {
    const assi = new Array(n).fill(0);
    const [swapped, loss] = choose_medoid_within_partition(mat, assi, med, 0);
    return { loss, assi, nIter: 1, nSwaps: swapped ? 1 : 0 };
  }
  if (k === 2) { // special hadling, as there is no third
    return fastermsc_k2(mat, med, maxiter);
  }
  let [loss, data] = initial_assignment(mat, med);

  let removal_loss = new Array(k).fill(0);
  update_removal_loss(data, removal_loss);
  let lastswap = n, n_swaps = 0, iter = 0;
  while (iter < maxiter) {
    iter += 1;
    const swaps_before = n_swaps, lastloss = loss;
    for (let j = 0; j < n; j++) {
      if (j === lastswap) {
        break;
      }
      if (j === med[data[j].near.i]) {
        continue; // This already is a medoid
      }
      const [change, b] = find_best_swap(mat, removal_loss, data, j);
      if (change <= 0) {
        continue; // No improvement
      }
      n_swaps += 1;
      lastswap = j;
      // perform the swap
      loss = do_swap(mat, med, data, b, j);
      update_removal_loss(data, removal_loss);
    }
    if (n_swaps === swaps_before || loss >= lastloss) {
      break; // converged
    }
  }
  const assi = data.map((x) => x.near.i);
  loss = 1 - loss / n;
  return { loss, assi, nIter: iter, nSwaps: n_swaps };
}

/** Perform the initial assignment to medoids. Returns [loss, data] (data = Reco[]). */
export function initial_assignment(mat, med) {
  const n = mat.len(), k = med.length;
  if (!mat.isSquare()) throw new Error('Dissimilarity matrix is not square');
  if (!(n <= U32_MAX)) throw new Error('N is too large');
  if (!(k > 0 && k < U32_MAX)) throw new Error('invalid N');
  if (!(k <= n)) throw new Error('k must be at most N');
  const data = new Array(mat.len());
  for (let _i = 0; _i < data.length; _i++) data[_i] = Reco.empty();
  const firstcenter = med[0];
  let loss = 0;
  for (let i = 0; i < data.length; i++) {
    // Rust: *cur = Reco::new(...) overwrites the slot in place.
    const cur = new Reco(0, mat.get(i, firstcenter), U32_MAX, 0, U32_MAX, 0);
    data[i] = cur;
    for (let m = 1; m < med.length; m++) {
      const me = med[m];
      const d = mat.get(i, me);
      if (d < cur.near.d || i === me) {
        cur.third = cur.seco.clone();
        cur.seco = cur.near.clone();
        cur.near = new DistancePair(m, d);
      } else if (cur.seco.i === U32_MAX || d < cur.seco.d) {
        cur.third = cur.seco.clone();
        cur.seco = new DistancePair(m, d);
      } else if (cur.third.i === U32_MAX || d < cur.third.d) {
        cur.third = new DistancePair(m, d);
      }
    }
    loss += _loss(cur.near.d, cur.seco.d);
  }
  return [loss, data];
}

/** Find the best swap for object j - FastMSC version. Returns [change, b]. */
export function find_best_swap(mat, removal_loss, data, j) {
  const ploss = removal_loss.slice();
  // Improvement from the journal version:
  let acc = 0;
  for (let o = 0; o < data.length; o++) {
    const reco = data[o];
    const doj = mat.get(o, j);
    if (doj < reco.near.d) {
      acc += _loss(reco.near.d, reco.seco.d) - _loss(doj, reco.near.d);
      // loss already includes (dt - ds) - (ds - dn), remove
      ploss[reco.near.i] += _loss(doj, reco.near.d) + _loss(reco.seco.d, reco.third.d) - _loss(reco.near.d + doj, reco.seco.d);
      ploss[reco.seco.i] += _loss(reco.near.d, reco.third.d) - _loss(reco.near.d, reco.seco.d);
    } else if (doj < reco.seco.d) {
      acc += _loss(reco.near.d, reco.seco.d) - _loss(reco.near.d, doj);
      ploss[reco.near.i] += _loss(reco.near.d, doj) + _loss(reco.seco.d, reco.third.d) - _loss(reco.near.d + doj, reco.seco.d);
      // loss already includes (dt - ds) - (ds - dn), adjust to 2*d(xo) - ds - dt
      // loss already includes (dt - ds), adjust to 2*d(xo) - ds - dt
      ploss[reco.seco.i] += _loss(reco.near.d, reco.third.d) - _loss(reco.near.d, reco.seco.d);
    } else if (doj < reco.third.d) {
      // loss already includes (dt - ds) - (ds - dn), adjust to d(xo)- dt
      ploss[reco.near.i] += _loss(reco.seco.d, reco.third.d) - _loss(reco.seco.d, doj);
      // loss already includes (dt - ds), adjust to d(xo)- dt
      ploss[reco.seco.i] += _loss(reco.near.d, reco.third.d) - _loss(reco.near.d, doj);
    }
  }
  const [b, bloss] = find_max(ploss);
  return [bloss + acc, b]; // add the shared accumulator
}

/** Update the loss when removing each medoid. Mutates loss in place. */
export function update_removal_loss(data, loss) {
  loss.fill(0); // stable since 1.50
  for (let r = 0; r < data.length; r++) {
    const rec = data[r];
    loss[rec.near.i] += _loss(rec.near.d, rec.seco.d) - _loss(rec.seco.d, rec.third.d);
    loss[rec.seco.i] += _loss(rec.near.d, rec.seco.d) - _loss(rec.near.d, rec.third.d);
    // as N might be unsigned
  }
}

/** Update the third nearest medoid information. Called after each swap. Returns fresh DistancePair. */
export function update_third_nearest(mat, med, n, s, b, o, doj) {
  let dist = new DistancePair(b, doj);
  for (let i = 0; i < med.length; i++) {
    const mi = med[i];
    if (i === n || i === b || i === s) {
      continue;
    }
    const d = mat.get(o, mi);
    if (d < dist.d) {
      dist = new DistancePair(i, d);
    }
  }
  return dist;
}

/** Perform a single swap. Returns RAW loss. */
export function do_swap(mat, med, data, b, j) {
  const n = mat.len();
  if (!(b < med.length)) throw new Error('invalid medoid number');
  if (!(j < n)) throw new Error('invalid object number');
  med[b] = j;
  let acc = 0;
  for (let o = 0; o < data.length; o++) {
    const reco = data[o];
    if (o === j) {
      if (reco.near.i !== b) {
        if (reco.seco.i !== b) {
          reco.third = reco.seco.clone();
        }
        reco.seco = reco.near.clone();
      }
      reco.near = new DistancePair(b, 0);
      acc += 0;
      continue;
    }
    const doj = mat.get(o, j);
    // Nearest medoid is gone:
    if (reco.near.i === b) {
      if (doj < reco.seco.d) {
        reco.near = new DistancePair(b, doj);
      } else if (reco.third.i === U32_MAX || doj < reco.third.d) {
        reco.near = reco.seco.clone();
        reco.seco = new DistancePair(b, doj);
      } else {
        reco.near = reco.seco.clone();
        reco.seco = reco.third.clone();
        reco.third = update_third_nearest(mat, med, reco.near.i, reco.seco.i, b, o, doj);
      }
    } else if (reco.seco.i === b) {
      // second nearest was replaced
      if (doj < reco.near.d) {
        reco.seco = reco.near.clone();
        reco.near = new DistancePair(b, doj);
      } else if (reco.third.i === U32_MAX || doj < reco.third.d) {
        reco.seco = new DistancePair(b, doj);
      } else {
        reco.seco = reco.third.clone();
        reco.third = update_third_nearest(mat, med, reco.near.i, reco.seco.i, b, o, doj);
      }
    } else {
      // nearest not removed
      if (doj < reco.near.d) {
        reco.third = reco.seco.clone();
        reco.seco = reco.near.clone();
        reco.near = new DistancePair(b, doj);
      } else if (doj < reco.seco.d) {
        reco.third = reco.seco.clone();
        reco.seco = new DistancePair(b, doj);
      } else if (reco.third.i === U32_MAX || doj < reco.third.d) {
        reco.third = new DistancePair(b, doj);
      } else if (reco.third.i === b) {
        reco.third = update_third_nearest(mat, med, reco.near.i, reco.seco.i, b, o, doj);
      }
    }
    acc += _loss(reco.near.d, reco.seco.d);
  }
  return acc;
}

/** Special case k=2 of the FasterMSC algorithm. Returns { loss, assi, nIter, nSwaps }. */
export function fastermsc_k2(mat, med, maxiter) {
  const n = mat.len(), k = med.length;
  if (!(k === 2)) throw new Error('Only valid for k=2');
  let [loss, assi, data] = initial_assignment_k2(mat, med);
  let lastswap = n, n_swaps = 0, iter = 0;
  while (iter < maxiter) {
    iter += 1;
    const swaps_before = n_swaps, lastloss = loss;
    for (let j = 0; j < n; j++) {
      if (j === lastswap) {
        break;
      }
      if (j === med[assi[j]]) {
        continue; // This already is a medoid
      }
      const [newloss, b] = find_best_swap_k2(mat, data, j); // assi not used, see below
      if (newloss >= loss) {
        continue; // No improvement
      }
      n_swaps += 1;
      lastswap = j;
      // perform the swap
      loss = do_swap_k2(mat, med, assi, data, b, j);
    }
    if (n_swaps === swaps_before || loss >= lastloss) {
      break; // converged
    }
  }
  loss = 1 - loss / n;
  return { loss, assi, nIter: iter, nSwaps: n_swaps };
}

/** Perform the initial assignment to medoids, for k=2 only. Returns [loss, assi, data] (data = [d0,d1] pairs). */
export function initial_assignment_k2(mat, med) {
  const n = mat.len(), k = med.length;
  if (!mat.isSquare()) throw new Error('Dissimilarity matrix is not square');
  if (!(n <= U32_MAX)) throw new Error('N is too large');
  if (!(k === 2)) throw new Error('k must be 2');
  const assi = new Array(mat.len()).fill(0);
  const data = new Array(mat.len());
  for (let _i = 0; _i < data.length; _i++) data[_i] = [0, 0];
  let loss = 0;
  for (let i = 0; i < data.length; i++) {
    const d = data[i];
    d[0] = mat.get(i, med[0]);
    d[1] = mat.get(i, med[1]);
    if (d[0] < d[1]) {
      assi[i] = 0;
      loss += _loss(d[0], d[1]); // return
    } else {
      assi[i] = 1;
      loss += _loss(d[1], d[0]); // return
    }
  }
  return [loss, assi, data];
}

/** Find the best swap for object j - FastMSC version, k=2. Returns [loss, b]. */
export function find_best_swap_k2(mat, data, j) {
  const ploss = [0, 0];
  for (let o = 0; o < data.length; o++) {
    const d = data[o];
    const doj = mat.get(o, j);
    // We do not use the assignment here, because we stored d0/d1 by medoid position, not closeness
    ploss[0] += (doj < d[1]) ? _loss(doj, d[1]) : _loss(d[1], doj);
    ploss[1] += (doj < d[0]) ? _loss(doj, d[0]) : _loss(d[0], doj);
  }
  const [b, bloss] = find_min(ploss);
  return [bloss, b];
}

/** Perform a single swap, k=2. Returns RAW loss. */
export function do_swap_k2(mat, med, assi, data, b, j) {
  const n = mat.len();
  if (!(b < med.length)) throw new Error('invalid medoid number');
  if (!(j < n)) throw new Error('invalid object number');
  med[b] = j;
  // Its nicer to have the if outside, even though this looks duplicated
  if (b === 0) {
    let acc = 0;
    for (let o = 0; o < data.length; o++) {
      const d = data[o];
      if (o === j) {
        assi[o] = 0;
        d[0] = 0;
        acc += 0;
        continue;
      }
      const doj = mat.get(o, j);
      d[0] = doj;
      if (doj < d[1] || (doj === d[1] && assi[o] === 0)) {
        assi[o] = 0;
        acc += _loss(doj, d[1]); // return
      } else {
        assi[o] = 1;
        acc += _loss(d[1], doj); // return
      }
    }
    return acc;
  } else { // b == 1
    let acc = 0;
    for (let o = 0; o < data.length; o++) {
      const d = data[o];
      if (o === j) {
        assi[o] = 1;
        d[1] = 0;
        acc += 0;
        continue;
      }
      const doj = mat.get(o, j);
      d[1] = doj;
      if (doj < d[0] || (doj === d[0] && assi[o] === 1)) {
        assi[o] = 1;
        acc += _loss(doj, d[0]); // return
      } else {
        assi[o] = 0;
        acc += _loss(d[0], doj); // return
      }
    }
    return acc;
  }
}