Source: queue/Classifier.js

import { Queue } from "../core/Queue.js";
import { ArrayQueue } from "./ArrayQueue.js";
import { SortedArray } from "./SortedArray.js";
import { ORDER, SORTER } from "../global.js";
import { Each, Path } from "@fizzwiz/fluent";

/**
 * Classifier
 * 
 * A hierarchical, prefix-based multimap where keys are **sequences** and
 * values are grouped by their key sequence (equivalence classes).
 *
 * ## Concept
 * - Keys are iterables (e.g., `['a','b','c']`).
 * - Multiple values may be stored per key sequence.
 * - Keys are stored in a `SortedArray` for efficient prefix queries.
 * - Values at each node form an **equivalence class**, optionally sorted or queued.
 *
 * ## Node Structure
 * Each node contains:
 * - `parent` — parent node (null for root)
 * - `key` — key for this node (undefined for root)
 * - `depth` — depth in the tree (root = 0)
 * - `sortedKeys` — sorted list of child keys
 * - `keyToChild` — map of key → child node
 * - `nin` — number of sequences ending **at** this node
 * - `nout` — number of sequences ending **below** this node
 * - `class` — container of values for this node’s key sequence
 * - `sorter(node)` — function defining key and value order
 *
 * ## Features
 * - Multimap: multiple values per key sequence.
 * - SortedMap: both keys and values are ordered.
 * - Prefix queries with wildcards.
 * - Efficient traversal, insertion, and removal.
 *
 * ## Example
 * ```js
 * const clf = new Classifier();
 * clf.add(['a','b','c'], 'abc');   // store item "abc"
 * for (const item of clf.get([undefined,'b'])) {
 *     console.log(item);            // all items with repr[1] === 'b'
 * }
 * ```
 *
 * @class
 * @extends Queue
 */

export class Classifier extends Queue {

    /**
     * Creates a classifier node.
     *
     * @constructor
     * @param {Function} [sorter]
     *     Function receiving the newly created node and returning a pair [keyComparator, itemComparator]:
     *     - `keyComparator(a, b)` — for sorting child keys.
     *     - `itemComparator(a, b)` — for sorting items inside `.class`.
     *       Optional: if a Boolean flag is provided instead of a itemComparator, `.class` becomes an `ArrayQueue(flag)`
     *       (`true` | undefined = FIFO, `false` = LIFO), allowing duplicates.
     *
     * @param {Classifier|null} parent
     *     The parent node, or `null` for the root.
     *
     * @param {*} key
     *     The key leading from the parent to this node
     *     (`undefined` for the root node).
     */
    constructor(
        sorter = SORTER.UNIFORM(ORDER.ASCENDING, ORDER.INSERTION),
        parent = null,
        key = undefined
    ) {
        super();

        /** @type {Classifier|null} */
        this.parent = parent;

        /** @type {*} */
        this.key = key;

        /**
         * Function returning a couple of comparators: for child keys and class items.
         * @type {Function}
         */
        this.sorter = sorter;

        /** @type {number} */
        this.depth = (parent?.depth ?? -1) + 1;

        /** @type {number} */
        this.nin = 0;

        /** @type {number} */
        this.nout = 0;

        const [keyComparator, itemComparator] = sorter(this);

        /** @type {SortedArray<*>} */
        this.sortedKeys = new SortedArray(keyComparator);

        /** @type {Map<*,Classifier>} */
        this.keyToChild = new Map();

        /**
         * Items whose representation corresponds exactly to this node's
         * full key-path. Acts as an equivalence class container.
         *
         * - If an item-comparator is provided → `SortedArray`
         * - Otherwise → `ArrayQueue` preserving insertion order
         *
         * @type {Queue}
         */
        this.class = typeof(itemComparator) === 'function'
            ? new SortedArray(itemComparator)
            : new ArrayQueue(itemComparator);  // either undefined or a boolean
    }

    // ---------------------------------------------------------------------
    // Helpers
    // ---------------------------------------------------------------------

    /**
     * Returns the child associated with `key`.
     * If `creating` is true and the child does not exist, it is created.
     *
     * @param {*} key - Child key.
     * @param {boolean} [creating=false] - Whether to create the child if missing.
     * @returns {Classifier|undefined} - The child node, or undefined.
     */
    getChild(key, creating = false) {
        let child = this.keyToChild.get(key);
        if (!child && creating) {
            child = new Classifier(this.sorter, this, key);
            this.keyToChild.set(key, child);
            this.sortedKeys.add(key);
        }
        return child;
    }

    /**
     * Descends through a sequence of keys and returns the final node.
     *
     * @param {Iterable<*> | *} keys - Sequence of keys (or single key) to follow.
     * @param {boolean} [creating=false] - Whether to create missing nodes.
     * @returns {Classifier|undefined} - The descendant node, or undefined.
     */
    with(keys, creating = false) {
        let node = this;
        for (const key of Each.as(keys)) {
            node = node.getChild(key, creating);
            if (!node) return undefined;
        }
        return node;
    }

    /**
     * Adjusts occurrence counters along this node's ancestor chain.
     *
     * - `nin` is incremented only on this node.
     * - `nout` is incremented on all ancestor nodes.
     *
     * @param {number} [value=1] - Amount to increment (or decrement).
     * @returns {void}
     */
    increment(value = 1) {
        this.nin += value;
        let node = this.parent;
        while (node) {
            node.nout += value;
            node = node.parent;
        }
    }

    /**
     * Removes empty nodes from the tree, traversing bottom-up.
     *
     * A node is removed if both its `nin` and `nout` are zero.
     * The process continues up the parent chain until reaching a non-empty node or the root.
     *
     * @returns {void}
     */
    pruneIfEmpty() {
        let node = this;
        while (node.parent && node.nin === 0 && node.nout === 0) {
            const parent = node.parent;
            parent.keyToChild.delete(node.key);
            parent.sortedKeys.remove(node.key);

            // Detach node from tree
            node.parent = null;
            node.depth = 0;

            node = parent;
        }
    }


    // ---------------------------------------------------------------------
    // Collection
    // ---------------------------------------------------------------------

    /**
     * Returns the number of item occurrences stored in this node's subtree.
     *
     * @returns {number} Total occurrences `.nin` + `.nout`.
     */
    n() {
        return this.nin + this.nout;
    }

    /**
     * Total number of item occurrences `.nin` + `.nout` stored in the subtree rooted at this node.
     *
     * @type {number}
     * @readonly
     */
    get size() {
        return this.nin + this.nout;
    }

    /**
     * Returns `true` iff the sequence exists and ends at a terminating node.
     *
     * @param {Iterable<*> | *} keys (or key)
     * @returns {boolean}
     */
    has(keys) {
        const node = this.with(keys);
        return node !== undefined && node.nin > 0;
    }

    /**
     * Adds or removes occurrences of an entry >keys, obj> .
     * If `xTimes` > 0, missing nodes are created.
     *
     * @param {Iterable<*> | *} keys (or key)
     * @param {any} item
     * @param {number} [xTimes=1] - Positive to add, negative to remove. A float number is allowed.
     * @returns {boolean} - Whether the operation succeeded.
     */
    add(keys, item, xTimes = 1) {
        const node = this.with(keys, xTimes > 0);
        if (!node) return false;
        const got = node.class.add(item); 

        node.increment(xTimes);
        node.pruneIfEmpty();
        return got;
    }

    /**
     * Removes occurrences of a stored sequence identified by `keys`.
     *
     * - Decrements `.nin` of the terminal node by up to `xTimes`.
     * - Updates `.nout` counters along ancestor nodes.
     * - Prunes nodes that become empty.
     *
     * @param {Iterable<*> | *} keys - Sequence of keys (or key) representing the stored path.
     * @param {number} [xTimes=Infinity] - Maximum number of occurrences to remove.
     * @returns {boolean} `true` if any sequences were removed, `false` if the path did not exist.
     */
    remove(keys, xTimes = Infinity) {
        const node = this.with(keys);
        if (!node) return false;

        node.increment(-Math.min(xTimes, node.nin));
        node.pruneIfEmpty();
        return true;
    }

    /**
     * Removes all entries stored in this node and its entire subtree.
     *
     * This method:
     * - Resets `.nin` (sequences ending at this node) and `.nout` (sequences passing below)
     * - Clears all child nodes (`keyToChild` and `sortedKeys`)
     * - Clears the `.class` collection of items associated with this node
     * - Prunes this node from its parent if it becomes empty
     *
     * @returns {void}
     */
    clear() {
        this.nin = 0;
        this.nout = 0;
        this.sortedKeys.clear();
        this.keyToChild.clear();
        this.class.clear();
        this.pruneIfEmpty();
    }

    /**
     * Retrieves all items stored in nodes whose key-path matches the given prefix.
     *
     * - Each node along the tree whose path matches `query` contributes its `.class` items.
     * - `undefined` in `query` is treated as a wildcard (matches any key at that level).
     * - Returns a flattened `Each<*>` of all matching items.
     *
     * Conceptually, this works like `Map.get(key)` but supports **prefix matching**:
     * all items under nodes that match the query are included.
     *
     * @param {Array<*>} query - Prefix of keys to match.
     * @param {boolean} [first=true] - If `false`, traversal order is reversed.
     * @returns {Each<*>} An iterable of all items stored in nodes matching the query.
     *
     * @example
     * // Store items under sequences
     * clf.add(['a','b'], 'item1');
     * clf.add(['a','c'], 'item2');
     *
     * // Get items under 'a' prefix
     * const items = clf.get(['a']);
     * console.log([...items]); // ['item1', 'item2']
     */
    get(query, first = true) {
        return Each.else(
            this
                .descendants(query, first)
                .sthen(node => first? node.class: node.class.reverse())
        );
    }

    /**
     * Returns all items contained in this `Classifier` in **reverse order**.
     *
     * This is equivalent to calling `get([], false)`, i.e., retrieving all items
     * from the tree in reverse traversal order.
     *
     * @returns {Each<*>} Iterable of all stored items, reversed.
     */
    reverse() {
        return this.get([], false);
    }

    /**
     * Returns the first or last stored item in this subtree.
     *
     * - `first = true` → returns the first item in traversal order
     * - `first = false` → returns the last item
     *
     * @param {boolean} [first=true] - Whether to retrieve the first (`true`) or last (`false`) item.
     * @returns {*|undefined} The first/last item, or `undefined` if the tree is empty.
     */
    peek(first = true) {
        return this.get([], first).what();
    }

    /**
     * Returns the path from the root node to this node.
     *
     * @returns {Path<Classifier>} Absolute path containing all nodes from the root (excluded) to this node (included).
     */
    path() {
        return this.parent
            ? this.parent.path().add(this)
            : new Path();
    }

    /**
     * Iterates over all stored items in this subtree.
     *
     * Equivalent to `get([], true)`.
     *
     * @generator
     * @yields {*} Each item in the classifier, in traversal order.
     */
    *[Symbol.iterator]() {
        yield* this.get([], true);
    }

    /**
     * Returns the child classifier nodes.
     *
     * By default, children are returned in arbitrary iteration order
     * (the natural order of `Map.prototype.values()`).
     *
     * If `sorted` is `true`, the children are returned in the order defined
     * by `this.sortedKeys`, which stores the classifier's sorted key sequence.
     *
     * ## Invariants
     * - Every key in `this.sortedKeys` MUST correspond to an existing child
     *   in `this.keyToChild`.  
     * - Therefore, calling this method with `sorted = true` will *never*
     *   return `undefined` elements.  
     *   If that happens, it indicates an internal consistency error elsewhere.
     *
     * ## Notes
     * - No filtering is performed: missing children are treated as fatal
     *   structural violations and will propagate errors naturally.
     *
     * @param {boolean} [sorted=false]
     *     Whether to return children in sorted key order.
     *
     * @returns {Each<Classifier>}
     *     An `Each` iterable of child classifier nodes.
     */
    children(sorted = false) {
        return sorted
            ? this.sortedKeys.sthen(key => this.keyToChild.get(key))
            : Each.as(this.keyToChild.values());
    }

    // ---------------------------------------------------------------------
    // Traversal
    // ---------------------------------------------------------------------

    /**
     * Performs a depth-first traversal of all **descendant classifier nodes**
     * starting from this node. The node itself is *not* included.
     *
     * This is the primitive traversal operation used internally by
     * `keys()`, `values()`, and `entries()`.
     *
     * ---
     * ### Prefix filtering (`query`)
     *
     * A `query` is an array of expected child keys at each depth:
     *
     * - `query[d]` = required key at depth `d`
     * - `undefined` = wildcard → all keys here are accepted
     * - If `query.length === 0`, the traversal is unrestricted
     *
     * Matching proceeds depth-by-depth as the traversal descends.
     *
     * ---
     * ### Traversal order
     *
     * - `first = true` → children visited in sorted ascending key order
     * - `first = false` → children visited in reversed order
     *
     * ---
     *
     * @param {Array<*>} [query=[]]
     *     Optional prefix constraint. `undefined` acts as a wildcard.
     *
     * @param {boolean} [first=true]
     *     Whether to traverse children in normal (`true`) or reversed (`false`) order.
     *
     * @param {number} [index=0]
     *     Internal recursion depth. Users should normally ignore this.
     *
     * @returns {Each<Classifier>}
     *     An `Each` iterable yielding all matching descendant nodes,
     *     in depth-first search order.
     *
     * @example
     * // Iterate all descendants
     * for (const node of clf.descendants()) {
     *     console.log(node.key);
     * }
     *
     * @example
     * // Prefix-filtered traversal (e.g., only under ['A', *, 'C'])
     * const nodes = clf.descendants(['A', undefined, 'C']);
     * console.log(nodes.toArray());
     */
    descendants(query = [], first = true, index = 0) {
        if (!Array.isArray(query)) query = Each.as(query).toArray(); 
        const self = this;
        const got = {
            *[Symbol.iterator]() {
                const keys = first ? self.sortedKeys : [...self.sortedKeys].reverse();
                const matchKey = query[index];

                for (const key of keys) {
                    if (matchKey !== undefined && key !== matchKey) continue;

                    const child = self.keyToChild.get(key);
                    if (!child) continue;

                    yield child;
                    yield* child.descendants(query, first, index + 1);
                }
            }
        }
        return Each.as(got);
    }

    /**
     * Returns an iterable over all key-paths in the classifier.
     *
     * A “key-path” is an array of keys describing the path from the root
     * to a node containing at least one stored item.
     *
     * @returns {Each<Array<*>>}
     *     An `Each` iterable yielding arrays of keys.
     *
     * @example
     * for (const keys of clf.keys()) {
     *     console.log(keys);
     * }
     */
    keys() {
        const self = this;
        const got = {
            *[Symbol.iterator]() {
                for (const node of self.descendants()) {
                    if (node.nin > 0) yield node.path().toArray().map(next => next.key);
                }
            }
        };
        return Each.as(got);
    }

    /**
     * Returns an iterable over **all items** stored anywhere in this
     * classifier subtree.
     *
     * Items are drawn from the equivalence class stored at each node.
     *
     * @returns {Each<*>}
     *     An `Each` iterable yielding all stored items.
     *
     * @example
     * for (const value of clf.values()) {
     *     console.log(value);
     * }
     */
    values() {
        const self = this;
        const got = {
            *[Symbol.iterator]() {
                for (const node of self.descendants()) {
                    for (const item of node.class) yield item;
                }
            }
        };

        return Each.as(got);
    }    

    /**
     * Returns an iterable of `[keys, item]` pairs, similar to `Map.prototype.entries()`.
     *
     * - `keys` is the full key-path from the root to the node.
     * - `item` is one element stored in that node's class.
     *
     * Only nodes with positive weight (`nin > 0`) are included.
     *
     * @returns {Each<Array<Array<*>, *>>}
     *     An `Each` iterable yielding `[keys, item]` pairs.
     *
     * @example
     * for (const [keys, value] of clf.entries()) {
     *     console.log(keys, value);
     * }
     */
    entries() {
        const self = this;
        const got = {
            *[Symbol.iterator]() {
                for (const node of self.descendants()) {
                    if (node.nin <= 0) continue;
                    const keys = node.path().toArray().map(next => next.key);
                    for (const item of node.class) {
                        yield [keys, item];
                    }
                }
            }
        };

        return Each.as(got);
    }  
}