You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
541 lines
15 KiB
541 lines
15 KiB
/*
|
|
MIT License http://www.opensource.org/licenses/mit-license.php
|
|
Author Tobias Koppers @sokra
|
|
*/
|
|
|
|
"use strict";
|
|
|
|
// Simulations show these probabilities for a single change
|
|
// 93.1% that one group is invalidated
|
|
// 4.8% that two groups are invalidated
|
|
// 1.1% that 3 groups are invalidated
|
|
// 0.1% that 4 or more groups are invalidated
|
|
//
|
|
// And these for removing/adding 10 lexically adjacent files
|
|
// 64.5% that one group is invalidated
|
|
// 24.8% that two groups are invalidated
|
|
// 7.8% that 3 groups are invalidated
|
|
// 2.7% that 4 or more groups are invalidated
|
|
//
|
|
// And these for removing/adding 3 random files
|
|
// 0% that one group is invalidated
|
|
// 3.7% that two groups are invalidated
|
|
// 80.8% that 3 groups are invalidated
|
|
// 12.3% that 4 groups are invalidated
|
|
// 3.2% that 5 or more groups are invalidated
|
|
|
|
/**
|
|
* @param {string} a key
|
|
* @param {string} b key
|
|
* @returns {number} the similarity as number
|
|
*/
|
|
const similarity = (a, b) => {
|
|
const l = Math.min(a.length, b.length);
|
|
let dist = 0;
|
|
for (let i = 0; i < l; i++) {
|
|
const ca = a.charCodeAt(i);
|
|
const cb = b.charCodeAt(i);
|
|
dist += Math.max(0, 10 - Math.abs(ca - cb));
|
|
}
|
|
return dist;
|
|
};
|
|
|
|
/**
|
|
* @param {string} a key
|
|
* @param {string} b key
|
|
* @param {Set<string>} usedNames set of already used names
|
|
* @returns {string} the common part and a single char for the difference
|
|
*/
|
|
const getName = (a, b, usedNames) => {
|
|
const l = Math.min(a.length, b.length);
|
|
let i = 0;
|
|
while (i < l) {
|
|
if (a.charCodeAt(i) !== b.charCodeAt(i)) {
|
|
i++;
|
|
break;
|
|
}
|
|
i++;
|
|
}
|
|
while (i < l) {
|
|
const name = a.slice(0, i);
|
|
const lowerName = name.toLowerCase();
|
|
if (!usedNames.has(lowerName)) {
|
|
usedNames.add(lowerName);
|
|
return name;
|
|
}
|
|
i++;
|
|
}
|
|
// names always contain a hash, so this is always unique
|
|
// we don't need to check usedNames nor add it
|
|
return a;
|
|
};
|
|
|
|
/**
|
|
* @param {Record<string, number>} total total size
|
|
* @param {Record<string, number>} size single size
|
|
* @returns {void}
|
|
*/
|
|
const addSizeTo = (total, size) => {
|
|
for (const key of Object.keys(size)) {
|
|
total[key] = (total[key] || 0) + size[key];
|
|
}
|
|
};
|
|
|
|
/**
|
|
* @param {Record<string, number>} total total size
|
|
* @param {Record<string, number>} size single size
|
|
* @returns {void}
|
|
*/
|
|
const subtractSizeFrom = (total, size) => {
|
|
for (const key of Object.keys(size)) {
|
|
total[key] -= size[key];
|
|
}
|
|
};
|
|
|
|
/**
|
|
* @template T
|
|
* @param {Iterable<Node<T>>} nodes some nodes
|
|
* @returns {Record<string, number>} total size
|
|
*/
|
|
const sumSize = nodes => {
|
|
const sum = Object.create(null);
|
|
for (const node of nodes) {
|
|
addSizeTo(sum, node.size);
|
|
}
|
|
return sum;
|
|
};
|
|
|
|
/**
|
|
* @param {Record<string, number>} size size
|
|
* @param {Record<string, number>} maxSize minimum size
|
|
* @returns {boolean} true, when size is too big
|
|
*/
|
|
const isTooBig = (size, maxSize) => {
|
|
for (const key of Object.keys(size)) {
|
|
const s = size[key];
|
|
if (s === 0) continue;
|
|
const maxSizeValue = maxSize[key];
|
|
if (typeof maxSizeValue === "number" && s > maxSizeValue) return true;
|
|
}
|
|
return false;
|
|
};
|
|
|
|
/**
|
|
* @param {Record<string, number>} size size
|
|
* @param {Record<string, number>} minSize minimum size
|
|
* @returns {boolean} true, when size is too small
|
|
*/
|
|
const isTooSmall = (size, minSize) => {
|
|
for (const key of Object.keys(size)) {
|
|
const s = size[key];
|
|
if (s === 0) continue;
|
|
const minSizeValue = minSize[key];
|
|
if (typeof minSizeValue === "number" && s < minSizeValue) return true;
|
|
}
|
|
return false;
|
|
};
|
|
|
|
/**
|
|
* @param {Record<string, number>} size size
|
|
* @param {Record<string, number>} minSize minimum size
|
|
* @returns {Set<string>} set of types that are too small
|
|
*/
|
|
const getTooSmallTypes = (size, minSize) => {
|
|
const types = new Set();
|
|
for (const key of Object.keys(size)) {
|
|
const s = size[key];
|
|
if (s === 0) continue;
|
|
const minSizeValue = minSize[key];
|
|
if (typeof minSizeValue === "number" && s < minSizeValue) types.add(key);
|
|
}
|
|
return types;
|
|
};
|
|
|
|
/**
|
|
* @template T
|
|
* @param {TODO} size size
|
|
* @param {Set<string>} types types
|
|
* @returns {number} number of matching size types
|
|
*/
|
|
const getNumberOfMatchingSizeTypes = (size, types) => {
|
|
let i = 0;
|
|
for (const key of Object.keys(size)) {
|
|
if (size[key] !== 0 && types.has(key)) i++;
|
|
}
|
|
return i;
|
|
};
|
|
|
|
/**
|
|
* @param {Record<string, number>} size size
|
|
* @param {Set<string>} types types
|
|
* @returns {number} selective size sum
|
|
*/
|
|
const selectiveSizeSum = (size, types) => {
|
|
let sum = 0;
|
|
for (const key of Object.keys(size)) {
|
|
if (size[key] !== 0 && types.has(key)) sum += size[key];
|
|
}
|
|
return sum;
|
|
};
|
|
|
|
/**
|
|
* @template T
|
|
*/
|
|
class Node {
|
|
/**
|
|
* @param {T} item item
|
|
* @param {string} key key
|
|
* @param {Record<string, number>} size size
|
|
*/
|
|
constructor(item, key, size) {
|
|
this.item = item;
|
|
this.key = key;
|
|
this.size = size;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* @template T
|
|
*/
|
|
class Group {
|
|
/**
|
|
* @param {Node<T>[]} nodes nodes
|
|
* @param {number[] | null} similarities similarities between the nodes (length = nodes.length - 1)
|
|
* @param {Record<string, number>=} size size of the group
|
|
*/
|
|
constructor(nodes, similarities, size) {
|
|
this.nodes = nodes;
|
|
this.similarities = similarities;
|
|
this.size = size || sumSize(nodes);
|
|
/** @type {string | undefined} */
|
|
this.key = undefined;
|
|
}
|
|
|
|
/**
|
|
* @param {function(Node<T>): boolean} filter filter function
|
|
* @returns {Node<T>[] | undefined} removed nodes
|
|
*/
|
|
popNodes(filter) {
|
|
const newNodes = [];
|
|
const newSimilarities = [];
|
|
const resultNodes = [];
|
|
let lastNode;
|
|
for (let i = 0; i < this.nodes.length; i++) {
|
|
const node = this.nodes[i];
|
|
if (filter(node)) {
|
|
resultNodes.push(node);
|
|
} else {
|
|
if (newNodes.length > 0) {
|
|
newSimilarities.push(
|
|
lastNode === this.nodes[i - 1]
|
|
? /** @type {number[]} */ (this.similarities)[i - 1]
|
|
: similarity(/** @type {Node<T>} */ (lastNode).key, node.key)
|
|
);
|
|
}
|
|
newNodes.push(node);
|
|
lastNode = node;
|
|
}
|
|
}
|
|
if (resultNodes.length === this.nodes.length) return;
|
|
this.nodes = newNodes;
|
|
this.similarities = newSimilarities;
|
|
this.size = sumSize(newNodes);
|
|
return resultNodes;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* @template T
|
|
* @param {Iterable<Node<T>>} nodes nodes
|
|
* @returns {number[]} similarities
|
|
*/
|
|
const getSimilarities = nodes => {
|
|
// calculate similarities between lexically adjacent nodes
|
|
/** @type {number[]} */
|
|
const similarities = [];
|
|
let last;
|
|
for (const node of nodes) {
|
|
if (last !== undefined) {
|
|
similarities.push(similarity(last.key, node.key));
|
|
}
|
|
last = node;
|
|
}
|
|
return similarities;
|
|
};
|
|
|
|
/**
|
|
* @template T
|
|
* @typedef {object} GroupedItems<T>
|
|
* @property {string} key
|
|
* @property {T[]} items
|
|
* @property {Record<string, number>} size
|
|
*/
|
|
|
|
/**
|
|
* @template T
|
|
* @typedef {object} Options
|
|
* @property {Record<string, number>} maxSize maximum size of a group
|
|
* @property {Record<string, number>} minSize minimum size of a group (preferred over maximum size)
|
|
* @property {Iterable<T>} items a list of items
|
|
* @property {function(T): Record<string, number>} getSize function to get size of an item
|
|
* @property {function(T): string} getKey function to get the key of an item
|
|
*/
|
|
|
|
/**
|
|
* @template T
|
|
* @param {Options<T>} options options object
|
|
* @returns {GroupedItems<T>[]} grouped items
|
|
*/
|
|
module.exports = ({ maxSize, minSize, items, getSize, getKey }) => {
|
|
/** @type {Group<T>[]} */
|
|
const result = [];
|
|
|
|
const nodes = Array.from(
|
|
items,
|
|
item => new Node(item, getKey(item), getSize(item))
|
|
);
|
|
|
|
/** @type {Node<T>[]} */
|
|
const initialNodes = [];
|
|
|
|
// lexically ordering of keys
|
|
nodes.sort((a, b) => {
|
|
if (a.key < b.key) return -1;
|
|
if (a.key > b.key) return 1;
|
|
return 0;
|
|
});
|
|
|
|
// return nodes bigger than maxSize directly as group
|
|
// But make sure that minSize is not violated
|
|
for (const node of nodes) {
|
|
if (isTooBig(node.size, maxSize) && !isTooSmall(node.size, minSize)) {
|
|
result.push(new Group([node], []));
|
|
} else {
|
|
initialNodes.push(node);
|
|
}
|
|
}
|
|
|
|
if (initialNodes.length > 0) {
|
|
const initialGroup = new Group(initialNodes, getSimilarities(initialNodes));
|
|
|
|
/**
|
|
* @param {Group<T>} group group
|
|
* @param {Record<string, number>} consideredSize size of the group to consider
|
|
* @returns {boolean} true, if the group was modified
|
|
*/
|
|
const removeProblematicNodes = (group, consideredSize = group.size) => {
|
|
const problemTypes = getTooSmallTypes(consideredSize, minSize);
|
|
if (problemTypes.size > 0) {
|
|
// We hit an edge case where the working set is already smaller than minSize
|
|
// We merge problematic nodes with the smallest result node to keep minSize intact
|
|
const problemNodes = group.popNodes(
|
|
n => getNumberOfMatchingSizeTypes(n.size, problemTypes) > 0
|
|
);
|
|
if (problemNodes === undefined) return false;
|
|
// Only merge it with result nodes that have the problematic size type
|
|
const possibleResultGroups = result.filter(
|
|
n => getNumberOfMatchingSizeTypes(n.size, problemTypes) > 0
|
|
);
|
|
if (possibleResultGroups.length > 0) {
|
|
const bestGroup = possibleResultGroups.reduce((min, group) => {
|
|
const minMatches = getNumberOfMatchingSizeTypes(min, problemTypes);
|
|
const groupMatches = getNumberOfMatchingSizeTypes(
|
|
group,
|
|
problemTypes
|
|
);
|
|
if (minMatches !== groupMatches)
|
|
return minMatches < groupMatches ? group : min;
|
|
if (
|
|
selectiveSizeSum(min.size, problemTypes) >
|
|
selectiveSizeSum(group.size, problemTypes)
|
|
)
|
|
return group;
|
|
return min;
|
|
});
|
|
for (const node of problemNodes) bestGroup.nodes.push(node);
|
|
bestGroup.nodes.sort((a, b) => {
|
|
if (a.key < b.key) return -1;
|
|
if (a.key > b.key) return 1;
|
|
return 0;
|
|
});
|
|
} else {
|
|
// There are no other nodes with the same size types
|
|
// We create a new group and have to accept that it's smaller than minSize
|
|
result.push(new Group(problemNodes, null));
|
|
}
|
|
return true;
|
|
}
|
|
return false;
|
|
};
|
|
|
|
if (initialGroup.nodes.length > 0) {
|
|
const queue = [initialGroup];
|
|
|
|
while (queue.length) {
|
|
const group = /** @type {Group<T>} */ (queue.pop());
|
|
// only groups bigger than maxSize need to be splitted
|
|
if (!isTooBig(group.size, maxSize)) {
|
|
result.push(group);
|
|
continue;
|
|
}
|
|
// If the group is already too small
|
|
// we try to work only with the unproblematic nodes
|
|
if (removeProblematicNodes(group)) {
|
|
// This changed something, so we try this group again
|
|
queue.push(group);
|
|
continue;
|
|
}
|
|
|
|
// find unsplittable area from left and right
|
|
// going minSize from left and right
|
|
// at least one node need to be included otherwise we get stuck
|
|
let left = 1;
|
|
const leftSize = Object.create(null);
|
|
addSizeTo(leftSize, group.nodes[0].size);
|
|
while (left < group.nodes.length && isTooSmall(leftSize, minSize)) {
|
|
addSizeTo(leftSize, group.nodes[left].size);
|
|
left++;
|
|
}
|
|
let right = group.nodes.length - 2;
|
|
const rightSize = Object.create(null);
|
|
addSizeTo(rightSize, group.nodes[group.nodes.length - 1].size);
|
|
while (right >= 0 && isTooSmall(rightSize, minSize)) {
|
|
addSizeTo(rightSize, group.nodes[right].size);
|
|
right--;
|
|
}
|
|
|
|
// left v v right
|
|
// [ O O O ] O O O [ O O O ]
|
|
// ^^^^^^^^^ leftSize
|
|
// rightSize ^^^^^^^^^
|
|
// leftSize > minSize
|
|
// rightSize > minSize
|
|
|
|
// Perfect split: [ O O O ] [ O O O ]
|
|
// right === left - 1
|
|
|
|
if (left - 1 > right) {
|
|
// We try to remove some problematic nodes to "fix" that
|
|
let prevSize;
|
|
if (right < group.nodes.length - left) {
|
|
subtractSizeFrom(rightSize, group.nodes[right + 1].size);
|
|
prevSize = rightSize;
|
|
} else {
|
|
subtractSizeFrom(leftSize, group.nodes[left - 1].size);
|
|
prevSize = leftSize;
|
|
}
|
|
if (removeProblematicNodes(group, prevSize)) {
|
|
// This changed something, so we try this group again
|
|
queue.push(group);
|
|
continue;
|
|
}
|
|
// can't split group while holding minSize
|
|
// because minSize is preferred of maxSize we return
|
|
// the problematic nodes as result here even while it's too big
|
|
// To avoid this make sure maxSize > minSize * 3
|
|
result.push(group);
|
|
continue;
|
|
}
|
|
if (left <= right) {
|
|
// when there is a area between left and right
|
|
// we look for best split point
|
|
// we split at the minimum similarity
|
|
// here key space is separated the most
|
|
// But we also need to make sure to not create too small groups
|
|
let best = -1;
|
|
let bestSimilarity = Infinity;
|
|
let pos = left;
|
|
const rightSize = sumSize(group.nodes.slice(pos));
|
|
|
|
// pos v v right
|
|
// [ O O O ] O O O [ O O O ]
|
|
// ^^^^^^^^^ leftSize
|
|
// rightSize ^^^^^^^^^^^^^^^
|
|
|
|
while (pos <= right + 1) {
|
|
const similarity = /** @type {number[]} */ (group.similarities)[
|
|
pos - 1
|
|
];
|
|
if (
|
|
similarity < bestSimilarity &&
|
|
!isTooSmall(leftSize, minSize) &&
|
|
!isTooSmall(rightSize, minSize)
|
|
) {
|
|
best = pos;
|
|
bestSimilarity = similarity;
|
|
}
|
|
addSizeTo(leftSize, group.nodes[pos].size);
|
|
subtractSizeFrom(rightSize, group.nodes[pos].size);
|
|
pos++;
|
|
}
|
|
if (best < 0) {
|
|
// This can't happen
|
|
// but if that assumption is wrong
|
|
// fallback to a big group
|
|
result.push(group);
|
|
continue;
|
|
}
|
|
left = best;
|
|
right = best - 1;
|
|
}
|
|
|
|
// create two new groups for left and right area
|
|
// and queue them up
|
|
const rightNodes = [group.nodes[right + 1]];
|
|
/** @type {number[]} */
|
|
const rightSimilarities = [];
|
|
for (let i = right + 2; i < group.nodes.length; i++) {
|
|
rightSimilarities.push(
|
|
/** @type {number[]} */ (group.similarities)[i - 1]
|
|
);
|
|
rightNodes.push(group.nodes[i]);
|
|
}
|
|
queue.push(new Group(rightNodes, rightSimilarities));
|
|
|
|
const leftNodes = [group.nodes[0]];
|
|
/** @type {number[]} */
|
|
const leftSimilarities = [];
|
|
for (let i = 1; i < left; i++) {
|
|
leftSimilarities.push(
|
|
/** @type {number[]} */ (group.similarities)[i - 1]
|
|
);
|
|
leftNodes.push(group.nodes[i]);
|
|
}
|
|
queue.push(new Group(leftNodes, leftSimilarities));
|
|
}
|
|
}
|
|
}
|
|
|
|
// lexically ordering
|
|
result.sort((a, b) => {
|
|
if (a.nodes[0].key < b.nodes[0].key) return -1;
|
|
if (a.nodes[0].key > b.nodes[0].key) return 1;
|
|
return 0;
|
|
});
|
|
|
|
// give every group a name
|
|
const usedNames = new Set();
|
|
for (let i = 0; i < result.length; i++) {
|
|
const group = result[i];
|
|
if (group.nodes.length === 1) {
|
|
group.key = group.nodes[0].key;
|
|
} else {
|
|
const first = group.nodes[0];
|
|
const last = group.nodes[group.nodes.length - 1];
|
|
const name = getName(first.key, last.key, usedNames);
|
|
group.key = name;
|
|
}
|
|
}
|
|
|
|
// return the results
|
|
return result.map(
|
|
group =>
|
|
/** @type {GroupedItems<T>} */
|
|
({
|
|
key: group.key,
|
|
items: group.nodes.map(node => node.item),
|
|
size: group.size
|
|
})
|
|
);
|
|
};
|