123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547 |
- "use strict";
- const similarity = (a, b) => {
- const l = Math.min(a.length, b.length);
- let dist = 0;
- for (let i = 0; i < l; i++) {
- const ca = a.charCodeAt(i);
- const cb = b.charCodeAt(i);
- dist += Math.max(0, 10 - Math.abs(ca - cb));
- }
- return dist;
- };
- const getName = (a, b, usedNames) => {
- const l = Math.min(a.length, b.length);
- let i = 0;
- while (i < l) {
- if (a.charCodeAt(i) !== b.charCodeAt(i)) {
- i++;
- break;
- }
- i++;
- }
- while (i < l) {
- const name = a.slice(0, i);
- const lowerName = name.toLowerCase();
- if (!usedNames.has(lowerName)) {
- usedNames.add(lowerName);
- return name;
- }
- i++;
- }
-
-
- return a;
- };
- const addSizeTo = (total, size) => {
- for (const key of Object.keys(size)) {
- total[key] = (total[key] || 0) + size[key];
- }
- };
- const subtractSizeFrom = (total, size) => {
- for (const key of Object.keys(size)) {
- total[key] -= size[key];
- }
- };
- const sumSize = nodes => {
- const sum = Object.create(null);
- for (const node of nodes) {
- addSizeTo(sum, node.size);
- }
- return sum;
- };
- const isTooBig = (size, maxSize) => {
- for (const key of Object.keys(size)) {
- const s = size[key];
- if (s === 0) continue;
- const maxSizeValue = maxSize[key];
- if (typeof maxSizeValue === "number") {
- if (s > maxSizeValue) return true;
- }
- }
- return false;
- };
- const isTooSmall = (size, minSize) => {
- for (const key of Object.keys(size)) {
- const s = size[key];
- if (s === 0) continue;
- const minSizeValue = minSize[key];
- if (typeof minSizeValue === "number") {
- if (s < minSizeValue) return true;
- }
- }
- return false;
- };
- const getTooSmallTypes = (size, minSize) => {
- const types = new Set();
- for (const key of Object.keys(size)) {
- const s = size[key];
- if (s === 0) continue;
- const minSizeValue = minSize[key];
- if (typeof minSizeValue === "number") {
- if (s < minSizeValue) types.add(key);
- }
- }
- return types;
- };
- const getNumberOfMatchingSizeTypes = (size, types) => {
- let i = 0;
- for (const key of Object.keys(size)) {
- if (size[key] !== 0 && types.has(key)) i++;
- }
- return i;
- };
- const selectiveSizeSum = (size, types) => {
- let sum = 0;
- for (const key of Object.keys(size)) {
- if (size[key] !== 0 && types.has(key)) sum += size[key];
- }
- return sum;
- };
- class Node {
-
- constructor(item, key, size) {
- this.item = item;
- this.key = key;
- this.size = size;
- }
- }
- class Group {
-
- constructor(nodes, similarities, size) {
- this.nodes = nodes;
- this.similarities = similarities;
- this.size = size || sumSize(nodes);
-
- this.key = undefined;
- }
-
- popNodes(filter) {
- const newNodes = [];
- const newSimilarities = [];
- const resultNodes = [];
- let lastNode;
- for (let i = 0; i < this.nodes.length; i++) {
- const node = this.nodes[i];
- if (filter(node)) {
- resultNodes.push(node);
- } else {
- if (newNodes.length > 0) {
- newSimilarities.push(
- lastNode === this.nodes[i - 1]
- ? (this.similarities)[i - 1]
- : similarity(lastNode.key, node.key)
- );
- }
- newNodes.push(node);
- lastNode = node;
- }
- }
- if (resultNodes.length === this.nodes.length) return undefined;
- this.nodes = newNodes;
- this.similarities = newSimilarities;
- this.size = sumSize(newNodes);
- return resultNodes;
- }
- }
- const getSimilarities = nodes => {
-
-
- const similarities = [];
- let last = undefined;
- for (const node of nodes) {
- if (last !== undefined) {
- similarities.push(similarity(last.key, node.key));
- }
- last = node;
- }
- return similarities;
- };
- module.exports = ({ maxSize, minSize, items, getSize, getKey }) => {
-
- const result = [];
- const nodes = Array.from(
- items,
- item => new Node(item, getKey(item), getSize(item))
- );
-
- const initialNodes = [];
-
- nodes.sort((a, b) => {
- if (a.key < b.key) return -1;
- if (a.key > b.key) return 1;
- return 0;
- });
-
-
- for (const node of nodes) {
- if (isTooBig(node.size, maxSize) && !isTooSmall(node.size, minSize)) {
- result.push(new Group([node], []));
- } else {
- initialNodes.push(node);
- }
- }
- if (initialNodes.length > 0) {
- const initialGroup = new Group(initialNodes, getSimilarities(initialNodes));
-
- const removeProblematicNodes = (group, consideredSize = group.size) => {
- const problemTypes = getTooSmallTypes(consideredSize, minSize);
- if (problemTypes.size > 0) {
-
-
- const problemNodes = group.popNodes(
- n => getNumberOfMatchingSizeTypes(n.size, problemTypes) > 0
- );
- if (problemNodes === undefined) return false;
-
- const possibleResultGroups = result.filter(
- n => getNumberOfMatchingSizeTypes(n.size, problemTypes) > 0
- );
- if (possibleResultGroups.length > 0) {
- const bestGroup = possibleResultGroups.reduce((min, group) => {
- const minMatches = getNumberOfMatchingSizeTypes(min, problemTypes);
- const groupMatches = getNumberOfMatchingSizeTypes(
- group,
- problemTypes
- );
- if (minMatches !== groupMatches)
- return minMatches < groupMatches ? group : min;
- if (
- selectiveSizeSum(min.size, problemTypes) >
- selectiveSizeSum(group.size, problemTypes)
- )
- return group;
- return min;
- });
- for (const node of problemNodes) bestGroup.nodes.push(node);
- bestGroup.nodes.sort((a, b) => {
- if (a.key < b.key) return -1;
- if (a.key > b.key) return 1;
- return 0;
- });
- } else {
-
-
- result.push(new Group(problemNodes, null));
- }
- return true;
- } else {
- return false;
- }
- };
- if (initialGroup.nodes.length > 0) {
- const queue = [initialGroup];
- while (queue.length) {
- const group = (queue.pop());
-
- if (!isTooBig(group.size, maxSize)) {
- result.push(group);
- continue;
- }
-
-
- if (removeProblematicNodes(group)) {
-
- queue.push(group);
- continue;
- }
-
-
-
- let left = 1;
- let leftSize = Object.create(null);
- addSizeTo(leftSize, group.nodes[0].size);
- while (left < group.nodes.length && isTooSmall(leftSize, minSize)) {
- addSizeTo(leftSize, group.nodes[left].size);
- left++;
- }
- let right = group.nodes.length - 2;
- let rightSize = Object.create(null);
- addSizeTo(rightSize, group.nodes[group.nodes.length - 1].size);
- while (right >= 0 && isTooSmall(rightSize, minSize)) {
- addSizeTo(rightSize, group.nodes[right].size);
- right--;
- }
-
-
-
-
-
-
-
-
- if (left - 1 > right) {
-
- let prevSize;
- if (right < group.nodes.length - left) {
- subtractSizeFrom(rightSize, group.nodes[right + 1].size);
- prevSize = rightSize;
- } else {
- subtractSizeFrom(leftSize, group.nodes[left - 1].size);
- prevSize = leftSize;
- }
- if (removeProblematicNodes(group, prevSize)) {
-
- queue.push(group);
- continue;
- }
-
-
-
-
- result.push(group);
- continue;
- }
- if (left <= right) {
-
-
-
-
-
- let best = -1;
- let bestSimilarity = Infinity;
- let pos = left;
- let rightSize = sumSize(group.nodes.slice(pos));
-
-
-
-
- while (pos <= right + 1) {
- const similarity = (group.similarities)[
- pos - 1
- ];
- if (
- similarity < bestSimilarity &&
- !isTooSmall(leftSize, minSize) &&
- !isTooSmall(rightSize, minSize)
- ) {
- best = pos;
- bestSimilarity = similarity;
- }
- addSizeTo(leftSize, group.nodes[pos].size);
- subtractSizeFrom(rightSize, group.nodes[pos].size);
- pos++;
- }
- if (best < 0) {
-
-
-
- result.push(group);
- continue;
- }
- left = best;
- right = best - 1;
- }
-
-
- const rightNodes = [group.nodes[right + 1]];
-
- const rightSimilarities = [];
- for (let i = right + 2; i < group.nodes.length; i++) {
- rightSimilarities.push(
- (group.similarities)[i - 1]
- );
- rightNodes.push(group.nodes[i]);
- }
- queue.push(new Group(rightNodes, rightSimilarities));
- const leftNodes = [group.nodes[0]];
-
- const leftSimilarities = [];
- for (let i = 1; i < left; i++) {
- leftSimilarities.push(
- (group.similarities)[i - 1]
- );
- leftNodes.push(group.nodes[i]);
- }
- queue.push(new Group(leftNodes, leftSimilarities));
- }
- }
- }
-
- result.sort((a, b) => {
- if (a.nodes[0].key < b.nodes[0].key) return -1;
- if (a.nodes[0].key > b.nodes[0].key) return 1;
- return 0;
- });
-
- const usedNames = new Set();
- for (let i = 0; i < result.length; i++) {
- const group = result[i];
- if (group.nodes.length === 1) {
- group.key = group.nodes[0].key;
- } else {
- const first = group.nodes[0];
- const last = group.nodes[group.nodes.length - 1];
- const name = getName(first.key, last.key, usedNames);
- group.key = name;
- }
- }
-
- return result.map(group => {
-
- return {
- key: group.key,
- items: group.nodes.map(node => node.item),
- size: group.size
- };
- });
- };
|