mirror of
https://github.com/moku-project/Moku.git
synced 2026-06-13 09:19:56 -05:00
Chore: Attempted De-Dupe Patch #1 & Alternative Thumbnails
This commit is contained in:
+104
-41
@@ -1,58 +1,107 @@
|
||||
/**
|
||||
* Session-level request cache.
|
||||
* Session-level request cache — v3.
|
||||
*
|
||||
* Key design decisions (v1, preserved):
|
||||
* Key design decisions (preserved from v1/v2):
|
||||
* - Stores the Promise itself — concurrent callers await the same fetch (no thundering herd).
|
||||
* - On real errors the entry is evicted so the next call retries.
|
||||
* - AbortErrors do NOT evict — the request was cancelled by the user, not failed.
|
||||
* This is critical: if we evicted on abort, rapid open/close would drain the browser's
|
||||
* connection pool (Chromium allows only 6 concurrent connections to the same origin).
|
||||
* - Subscribers are notified when a key is explicitly cleared (for reactive invalidation).
|
||||
* - AbortErrors do NOT evict — cancellation ≠ failure.
|
||||
* - Subscribers are notified when a key is explicitly cleared or updated.
|
||||
*
|
||||
* v2 additions:
|
||||
* - TTL-aware get(): stale entries are re-fetched automatically (default 5 min).
|
||||
* Pass Infinity to pin an entry for the session (source list, extension list).
|
||||
* - getPageSet(): lightweight page-number tracker for multi-page browse sessions.
|
||||
* Mirrors Suwayomi's CACHE_PAGES_KEY pattern so GenreDrillPage / Search TagTab
|
||||
* can resume a session without re-fetching pages already in memory.
|
||||
* - Stable multi-tag cache keys: tag arrays are sorted before joining so
|
||||
* ["Action","Romance"] and ["Romance","Action"] share the same bucket.
|
||||
* v3 additions:
|
||||
* - cache.set(): direct write without a fetcher — for optimistic updates and
|
||||
* post-mutation cache patching. Notifies subscribers immediately.
|
||||
* - Invalidation groups: tag a cache key with one or more group strings.
|
||||
* cache.clearGroup("library") clears ALL keys tagged with "library" in one call.
|
||||
* This replaces the pattern of manually calling cache.clear() on every related key.
|
||||
* - Subscriber notifications on set() — reactive components re-render when the
|
||||
* cache is updated, not just when it's cleared.
|
||||
* - cache.update(): atomically patch a cached value (read → transform → write).
|
||||
*/
|
||||
|
||||
interface Entry<T> {
|
||||
promise: Promise<T>;
|
||||
fetchedAt: number; // ms since epoch
|
||||
fetchedAt: number;
|
||||
}
|
||||
|
||||
const store = new Map<string, Entry<unknown>>();
|
||||
const subs = new Map<string, Set<() => void>>();
|
||||
const store = new Map<string, Entry<unknown>>();
|
||||
const subs = new Map<string, Set<() => void>>();
|
||||
const groups = new Map<string, Set<string>>(); // groupTag → Set<cacheKey>
|
||||
|
||||
/** Default revalidation window: 5 min (matches Suwayomi's browse-page TTL). */
|
||||
export const DEFAULT_TTL_MS = 5 * 60 * 1_000;
|
||||
|
||||
function notify(key: string) {
|
||||
subs.get(key)?.forEach((cb) => cb());
|
||||
}
|
||||
|
||||
export const cache = {
|
||||
/**
|
||||
* Return a cached promise.
|
||||
* Re-fetches automatically once the entry is older than `ttl` ms.
|
||||
* Pass `Infinity` to cache for the entire session (e.g. source/extension lists).
|
||||
* Return a cached promise. Re-fetches once older than `ttl` ms.
|
||||
* Pass `Infinity` to pin for the session.
|
||||
*/
|
||||
get<T>(key: string, fetcher: () => Promise<T>, ttl: number = DEFAULT_TTL_MS): Promise<T> {
|
||||
get<T>(
|
||||
key: string,
|
||||
fetcher: () => Promise<T>,
|
||||
ttl: number = DEFAULT_TTL_MS,
|
||||
group?: string | string[],
|
||||
): Promise<T> {
|
||||
const existing = store.get(key) as Entry<T> | undefined;
|
||||
if (existing && Date.now() - existing.fetchedAt < ttl) return existing.promise;
|
||||
|
||||
const promise = fetcher().catch((err) => {
|
||||
// Only evict on real failures, not user cancellations
|
||||
if (err?.name !== "AbortError") store.delete(key);
|
||||
return Promise.reject(err);
|
||||
}) as Promise<T>;
|
||||
|
||||
store.set(key, { promise, fetchedAt: Date.now() });
|
||||
|
||||
// Register in invalidation groups
|
||||
if (group) {
|
||||
const tags = Array.isArray(group) ? group : [group];
|
||||
for (const tag of tags) {
|
||||
if (!groups.has(tag)) groups.set(tag, new Set());
|
||||
groups.get(tag)!.add(key);
|
||||
}
|
||||
}
|
||||
|
||||
// Notify subscribers once the fetch resolves (reactive update on new data)
|
||||
promise.then(() => notify(key)).catch(() => {});
|
||||
|
||||
return promise;
|
||||
},
|
||||
|
||||
/**
|
||||
* Directly write a value into the cache — for optimistic updates and
|
||||
* post-mutation patching. Notifies subscribers immediately.
|
||||
*/
|
||||
set<T>(key: string, value: T, group?: string | string[]) {
|
||||
const promise = Promise.resolve(value);
|
||||
store.set(key, { promise, fetchedAt: Date.now() });
|
||||
|
||||
if (group) {
|
||||
const tags = Array.isArray(group) ? group : [group];
|
||||
for (const tag of tags) {
|
||||
if (!groups.has(tag)) groups.set(tag, new Set());
|
||||
groups.get(tag)!.add(key);
|
||||
}
|
||||
}
|
||||
|
||||
notify(key);
|
||||
},
|
||||
|
||||
/**
|
||||
* Atomically patch a cached value.
|
||||
* If the key doesn't exist, does nothing.
|
||||
*/
|
||||
update<T>(key: string, fn: (prev: T) => T) {
|
||||
const existing = store.get(key) as Entry<T> | undefined;
|
||||
if (!existing) return;
|
||||
const next = existing.promise.then(fn);
|
||||
store.set(key, { promise: next, fetchedAt: Date.now() });
|
||||
next.then(() => notify(key)).catch(() => {});
|
||||
},
|
||||
|
||||
has(key: string): boolean { return store.has(key); },
|
||||
|
||||
/** How old (ms) a cached entry is, or undefined if absent. */
|
||||
ageOf(key: string): number | undefined {
|
||||
const e = store.get(key);
|
||||
return e ? Date.now() - e.fetchedAt : undefined;
|
||||
@@ -60,15 +109,30 @@ export const cache = {
|
||||
|
||||
clear(key: string) {
|
||||
store.delete(key);
|
||||
subs.get(key)?.forEach((cb) => cb());
|
||||
notify(key);
|
||||
},
|
||||
|
||||
/**
|
||||
* Clear all keys belonging to an invalidation group.
|
||||
* e.g. cache.clearGroup("library") clears "library", "all_manga_unfiltered", etc.
|
||||
*/
|
||||
clearGroup(tag: string) {
|
||||
const keys = groups.get(tag);
|
||||
if (!keys) return;
|
||||
for (const key of keys) {
|
||||
store.delete(key);
|
||||
notify(key);
|
||||
}
|
||||
groups.delete(tag);
|
||||
},
|
||||
|
||||
clearAll() {
|
||||
const allKeys = [...store.keys()];
|
||||
store.clear();
|
||||
subs.forEach((set) => set.forEach((cb) => cb()));
|
||||
groups.clear();
|
||||
allKeys.forEach(notify);
|
||||
},
|
||||
|
||||
/** Subscribe to cache invalidation for a key. Returns unsubscribe fn. */
|
||||
subscribe(key: string, cb: () => void): () => void {
|
||||
if (!subs.has(key)) subs.set(key, new Set());
|
||||
subs.get(key)!.add(cb);
|
||||
@@ -78,24 +142,24 @@ export const cache = {
|
||||
|
||||
// ── Cache key constants ───────────────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* Invalidation group tags.
|
||||
* cache.clearGroup(CACHE_GROUPS.LIBRARY) clears all library-related keys at once.
|
||||
*/
|
||||
export const CACHE_GROUPS = {
|
||||
LIBRARY: "g:library", // library + all_manga_unfiltered
|
||||
SOURCES: "g:sources", // sources list + per-source page caches
|
||||
} as const;
|
||||
|
||||
export const CACHE_KEYS = {
|
||||
LIBRARY: "library",
|
||||
SOURCES: "sources",
|
||||
POPULAR: "popular",
|
||||
LIBRARY: "library",
|
||||
ALL_MANGA: "all_manga_unfiltered",
|
||||
SOURCES: "sources",
|
||||
POPULAR: "popular",
|
||||
GENRE: (genre: string) => `genre:${genre}`,
|
||||
MANGA: (id: number) => `manga:${id}`,
|
||||
CHAPTERS: (id: number) => `chapters:${id}`,
|
||||
|
||||
/**
|
||||
* Stable key for a browse session's page-number set.
|
||||
* Tag arrays are sorted so order never creates duplicate buckets —
|
||||
* ["Action","Romance"] and ["Romance","Action"] share one key.
|
||||
*
|
||||
* Examples:
|
||||
* CACHE_KEYS.sourceMangaPages("src123", "POPULAR")
|
||||
* CACHE_KEYS.sourceMangaPages("src123", "SEARCH", "naruto")
|
||||
* CACHE_KEYS.sourceMangaPages("src123", "SEARCH", ["Action","Romance"])
|
||||
*/
|
||||
sourceMangaPages(
|
||||
sourceId: string,
|
||||
type: "POPULAR" | "LATEST" | "SEARCH",
|
||||
@@ -105,7 +169,6 @@ export const CACHE_KEYS = {
|
||||
return `pages:${sourceId}:${type}:${q}`;
|
||||
},
|
||||
|
||||
/** Per-page result key. Always pair with sourceMangaPages(). */
|
||||
sourceMangaPage(
|
||||
sourceId: string,
|
||||
type: "POPULAR" | "LATEST" | "SEARCH",
|
||||
|
||||
+87
-7
@@ -30,16 +30,60 @@ export function dedupeSources(sources: Source[], preferredLang: string): Source[
|
||||
// ── Manga deduplication ───────────────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* Deduplicates manga by title (case-insensitive), keeping the first occurrence.
|
||||
* Use this when merging results across sources — eliminates the same series
|
||||
* appearing multiple times in grids from different source variants.
|
||||
* Normalizes a title for fuzzy matching:
|
||||
* - Lowercases and trims
|
||||
* - Strips common subtitle suffixes: "(Official)", "(Web Comic)", etc.
|
||||
* - Removes all non-alphanumeric characters (punctuation, dashes, colons)
|
||||
* - Strips leading articles: "the ", "a ", "an "
|
||||
* - Collapses whitespace
|
||||
*
|
||||
* "The Solo Leveling: Official Comic" → "solo leveling official comic"
|
||||
* "Solo Leveling (Web Comic)" → "solo leveling web comic"
|
||||
*/
|
||||
export function dedupeMangaByTitle<T extends { id: number; title: string }>(items: T[]): T[] {
|
||||
const seen = new Set<string>();
|
||||
export function normalizeTitle(title: string): string {
|
||||
return title
|
||||
.toLowerCase()
|
||||
.replace(/\(official\)|\(web comic\)|\(webtoon\)|\(manhwa\)|\(manhua\)/gi, "")
|
||||
.replace(/[^a-z0-9\s]/g, " ")
|
||||
.replace(/^(the|a|an)\s+/, "")
|
||||
.replace(/\s+/g, " ")
|
||||
.trim();
|
||||
}
|
||||
|
||||
/**
|
||||
* Builds a short fingerprint from a description — first 120 chars, normalized.
|
||||
* Used as a secondary dedup signal when titles differ but the series is the same.
|
||||
* Returns null if the description is too short to be a reliable signal (< 40 chars).
|
||||
*/
|
||||
function descFingerprint(desc: string | null | undefined): string | null {
|
||||
if (!desc) return null;
|
||||
const norm = desc.toLowerCase().replace(/[^a-z0-9\s]/g, " ").replace(/\s+/g, " ").trim();
|
||||
if (norm.length < 40) return null;
|
||||
return norm.slice(0, 120);
|
||||
}
|
||||
|
||||
/**
|
||||
* Deduplicates manga by normalized title OR description fingerprint, keeping the
|
||||
* first occurrence. Runs in a single O(n) pass — no nested loops.
|
||||
*
|
||||
* Use this when merging results across sources. Same series from different source
|
||||
* variants (e.g. MangaDex EN + Asura Scans) will be collapsed.
|
||||
*
|
||||
* The kept entry is the first one seen, so prefer passing library manga first so
|
||||
* the richer/preferred entry survives.
|
||||
*/
|
||||
export function dedupeMangaByTitle<T extends { id: number; title: string; description?: string | null }>(items: T[]): T[] {
|
||||
const seenTitles = new Set<string>();
|
||||
const seenDescs = new Set<string>();
|
||||
const out: T[] = [];
|
||||
for (const m of items) {
|
||||
const key = m.title.toLowerCase().trim();
|
||||
if (!seen.has(key)) { seen.add(key); out.push(m); }
|
||||
const tk = normalizeTitle(m.title);
|
||||
const dk = descFingerprint(m.description);
|
||||
if (seenTitles.has(tk)) continue;
|
||||
if (dk && seenDescs.has(dk)) continue;
|
||||
seenTitles.add(tk);
|
||||
if (dk) seenDescs.add(dk);
|
||||
out.push(m);
|
||||
}
|
||||
return out;
|
||||
}
|
||||
@@ -57,3 +101,39 @@ export function dedupeMangaById<T extends { id: number }>(items: T[]): T[] {
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
/**
|
||||
* Groups items that share a normalized title or description fingerprint.
|
||||
* Returns an array of groups — single-member groups are non-duplicates,
|
||||
* multi-member groups are the same series from different sources.
|
||||
*
|
||||
* Used by MangaPreview to show alternate thumbnails for merged entries.
|
||||
*/
|
||||
export function groupDuplicates<T extends { id: number; title: string; description?: string | null }>(items: T[]): T[][] {
|
||||
const titleMap = new Map<string, T[]>();
|
||||
const descMap = new Map<string, T[]>();
|
||||
|
||||
for (const m of items) {
|
||||
const tk = normalizeTitle(m.title);
|
||||
const dk = descFingerprint(m.description);
|
||||
|
||||
const existingGroup = titleMap.get(tk) ?? (dk ? descMap.get(dk) : undefined);
|
||||
if (existingGroup) {
|
||||
existingGroup.push(m);
|
||||
if (!titleMap.has(tk)) titleMap.set(tk, existingGroup);
|
||||
if (dk && !descMap.has(dk)) descMap.set(dk, existingGroup);
|
||||
} else {
|
||||
const group = [m];
|
||||
titleMap.set(tk, group);
|
||||
if (dk) descMap.set(dk, group);
|
||||
}
|
||||
}
|
||||
|
||||
// Return unique groups only
|
||||
const seen = new Set<T[]>();
|
||||
const out: T[][] = [];
|
||||
for (const g of titleMap.values()) {
|
||||
if (!seen.has(g)) { seen.add(g); out.push(g); }
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user