Chore: Attempted De-Dupe Patch #1 & Alternative Thumbnails

This commit is contained in:
Youwes09
2026-03-19 21:39:51 -05:00
parent deb8a5ee02
commit b772b94c6c
8 changed files with 349 additions and 68 deletions
+104 -41
View File
@@ -1,58 +1,107 @@
/**
* Session-level request cache.
* Session-level request cache — v3.
*
* Key design decisions (v1, preserved):
* Key design decisions (preserved from v1/v2):
* - Stores the Promise itself — concurrent callers await the same fetch (no thundering herd).
* - On real errors the entry is evicted so the next call retries.
* - AbortErrors do NOT evict — the request was cancelled by the user, not failed.
* This is critical: if we evicted on abort, rapid open/close would drain the browser's
* connection pool (Chromium allows only 6 concurrent connections to the same origin).
* - Subscribers are notified when a key is explicitly cleared (for reactive invalidation).
* - AbortErrors do NOT evict — cancellation ≠ failure.
* - Subscribers are notified when a key is explicitly cleared or updated.
*
* v2 additions:
* - TTL-aware get(): stale entries are re-fetched automatically (default 5 min).
* Pass Infinity to pin an entry for the session (source list, extension list).
* - getPageSet(): lightweight page-number tracker for multi-page browse sessions.
* Mirrors Suwayomi's CACHE_PAGES_KEY pattern so GenreDrillPage / Search TagTab
* can resume a session without re-fetching pages already in memory.
* - Stable multi-tag cache keys: tag arrays are sorted before joining so
* ["Action","Romance"] and ["Romance","Action"] share the same bucket.
* v3 additions:
* - cache.set(): direct write without a fetcher — for optimistic updates and
* post-mutation cache patching. Notifies subscribers immediately.
* - Invalidation groups: tag a cache key with one or more group strings.
* cache.clearGroup("library") clears ALL keys tagged with "library" in one call.
* This replaces the pattern of manually calling cache.clear() on every related key.
* - Subscriber notifications on set() — reactive components re-render when the
* cache is updated, not just when it's cleared.
* - cache.update(): atomically patch a cached value (read → transform → write).
*/
interface Entry<T> {
promise: Promise<T>;
fetchedAt: number; // ms since epoch
fetchedAt: number;
}
const store = new Map<string, Entry<unknown>>();
const subs = new Map<string, Set<() => void>>();
const store = new Map<string, Entry<unknown>>();
const subs = new Map<string, Set<() => void>>();
const groups = new Map<string, Set<string>>(); // groupTag → Set<cacheKey>
/** Default revalidation window: 5 min (matches Suwayomi's browse-page TTL). */
export const DEFAULT_TTL_MS = 5 * 60 * 1_000;
function notify(key: string) {
subs.get(key)?.forEach((cb) => cb());
}
export const cache = {
/**
* Return a cached promise.
* Re-fetches automatically once the entry is older than `ttl` ms.
* Pass `Infinity` to cache for the entire session (e.g. source/extension lists).
* Return a cached promise. Re-fetches once older than `ttl` ms.
* Pass `Infinity` to pin for the session.
*/
get<T>(key: string, fetcher: () => Promise<T>, ttl: number = DEFAULT_TTL_MS): Promise<T> {
get<T>(
key: string,
fetcher: () => Promise<T>,
ttl: number = DEFAULT_TTL_MS,
group?: string | string[],
): Promise<T> {
const existing = store.get(key) as Entry<T> | undefined;
if (existing && Date.now() - existing.fetchedAt < ttl) return existing.promise;
const promise = fetcher().catch((err) => {
// Only evict on real failures, not user cancellations
if (err?.name !== "AbortError") store.delete(key);
return Promise.reject(err);
}) as Promise<T>;
store.set(key, { promise, fetchedAt: Date.now() });
// Register in invalidation groups
if (group) {
const tags = Array.isArray(group) ? group : [group];
for (const tag of tags) {
if (!groups.has(tag)) groups.set(tag, new Set());
groups.get(tag)!.add(key);
}
}
// Notify subscribers once the fetch resolves (reactive update on new data)
promise.then(() => notify(key)).catch(() => {});
return promise;
},
/**
* Directly write a value into the cache — for optimistic updates and
* post-mutation patching. Notifies subscribers immediately.
*/
set<T>(key: string, value: T, group?: string | string[]) {
const promise = Promise.resolve(value);
store.set(key, { promise, fetchedAt: Date.now() });
if (group) {
const tags = Array.isArray(group) ? group : [group];
for (const tag of tags) {
if (!groups.has(tag)) groups.set(tag, new Set());
groups.get(tag)!.add(key);
}
}
notify(key);
},
/**
* Atomically patch a cached value.
* If the key doesn't exist, does nothing.
*/
update<T>(key: string, fn: (prev: T) => T) {
const existing = store.get(key) as Entry<T> | undefined;
if (!existing) return;
const next = existing.promise.then(fn);
store.set(key, { promise: next, fetchedAt: Date.now() });
next.then(() => notify(key)).catch(() => {});
},
has(key: string): boolean { return store.has(key); },
/** How old (ms) a cached entry is, or undefined if absent. */
ageOf(key: string): number | undefined {
const e = store.get(key);
return e ? Date.now() - e.fetchedAt : undefined;
@@ -60,15 +109,30 @@ export const cache = {
clear(key: string) {
store.delete(key);
subs.get(key)?.forEach((cb) => cb());
notify(key);
},
/**
* Clear all keys belonging to an invalidation group.
* e.g. cache.clearGroup("library") clears "library", "all_manga_unfiltered", etc.
*/
clearGroup(tag: string) {
const keys = groups.get(tag);
if (!keys) return;
for (const key of keys) {
store.delete(key);
notify(key);
}
groups.delete(tag);
},
clearAll() {
const allKeys = [...store.keys()];
store.clear();
subs.forEach((set) => set.forEach((cb) => cb()));
groups.clear();
allKeys.forEach(notify);
},
/** Subscribe to cache invalidation for a key. Returns unsubscribe fn. */
subscribe(key: string, cb: () => void): () => void {
if (!subs.has(key)) subs.set(key, new Set());
subs.get(key)!.add(cb);
@@ -78,24 +142,24 @@ export const cache = {
// ── Cache key constants ───────────────────────────────────────────────────────
/**
* Invalidation group tags.
* cache.clearGroup(CACHE_GROUPS.LIBRARY) clears all library-related keys at once.
*/
export const CACHE_GROUPS = {
LIBRARY: "g:library", // library + all_manga_unfiltered
SOURCES: "g:sources", // sources list + per-source page caches
} as const;
export const CACHE_KEYS = {
LIBRARY: "library",
SOURCES: "sources",
POPULAR: "popular",
LIBRARY: "library",
ALL_MANGA: "all_manga_unfiltered",
SOURCES: "sources",
POPULAR: "popular",
GENRE: (genre: string) => `genre:${genre}`,
MANGA: (id: number) => `manga:${id}`,
CHAPTERS: (id: number) => `chapters:${id}`,
/**
* Stable key for a browse session's page-number set.
* Tag arrays are sorted so order never creates duplicate buckets —
* ["Action","Romance"] and ["Romance","Action"] share one key.
*
* Examples:
* CACHE_KEYS.sourceMangaPages("src123", "POPULAR")
* CACHE_KEYS.sourceMangaPages("src123", "SEARCH", "naruto")
* CACHE_KEYS.sourceMangaPages("src123", "SEARCH", ["Action","Romance"])
*/
sourceMangaPages(
sourceId: string,
type: "POPULAR" | "LATEST" | "SEARCH",
@@ -105,7 +169,6 @@ export const CACHE_KEYS = {
return `pages:${sourceId}:${type}:${q}`;
},
/** Per-page result key. Always pair with sourceMangaPages(). */
sourceMangaPage(
sourceId: string,
type: "POPULAR" | "LATEST" | "SEARCH",
+87 -7
View File
@@ -30,16 +30,60 @@ export function dedupeSources(sources: Source[], preferredLang: string): Source[
// ── Manga deduplication ───────────────────────────────────────────────────────
/**
* Deduplicates manga by title (case-insensitive), keeping the first occurrence.
* Use this when merging results across sources — eliminates the same series
* appearing multiple times in grids from different source variants.
* Normalizes a title for fuzzy matching:
* - Lowercases and trims
* - Strips common subtitle suffixes: "(Official)", "(Web Comic)", etc.
* - Removes all non-alphanumeric characters (punctuation, dashes, colons)
* - Strips leading articles: "the ", "a ", "an "
* - Collapses whitespace
*
* "The Solo Leveling: Official Comic" → "solo leveling official comic"
* "Solo Leveling (Web Comic)" → "solo leveling web comic"
*/
export function dedupeMangaByTitle<T extends { id: number; title: string }>(items: T[]): T[] {
const seen = new Set<string>();
export function normalizeTitle(title: string): string {
return title
.toLowerCase()
.replace(/\(official\)|\(web comic\)|\(webtoon\)|\(manhwa\)|\(manhua\)/gi, "")
.replace(/[^a-z0-9\s]/g, " ")
.replace(/^(the|a|an)\s+/, "")
.replace(/\s+/g, " ")
.trim();
}
/**
* Builds a short fingerprint from a description — first 120 chars, normalized.
* Used as a secondary dedup signal when titles differ but the series is the same.
* Returns null if the description is too short to be a reliable signal (< 40 chars).
*/
function descFingerprint(desc: string | null | undefined): string | null {
if (!desc) return null;
const norm = desc.toLowerCase().replace(/[^a-z0-9\s]/g, " ").replace(/\s+/g, " ").trim();
if (norm.length < 40) return null;
return norm.slice(0, 120);
}
/**
* Deduplicates manga by normalized title OR description fingerprint, keeping the
* first occurrence. Runs in a single O(n) pass — no nested loops.
*
* Use this when merging results across sources. Same series from different source
* variants (e.g. MangaDex EN + Asura Scans) will be collapsed.
*
* The kept entry is the first one seen, so prefer passing library manga first so
* the richer/preferred entry survives.
*/
export function dedupeMangaByTitle<T extends { id: number; title: string; description?: string | null }>(items: T[]): T[] {
const seenTitles = new Set<string>();
const seenDescs = new Set<string>();
const out: T[] = [];
for (const m of items) {
const key = m.title.toLowerCase().trim();
if (!seen.has(key)) { seen.add(key); out.push(m); }
const tk = normalizeTitle(m.title);
const dk = descFingerprint(m.description);
if (seenTitles.has(tk)) continue;
if (dk && seenDescs.has(dk)) continue;
seenTitles.add(tk);
if (dk) seenDescs.add(dk);
out.push(m);
}
return out;
}
@@ -57,3 +101,39 @@ export function dedupeMangaById<T extends { id: number }>(items: T[]): T[] {
}
return out;
}
/**
* Groups items that share a normalized title or description fingerprint.
* Returns an array of groups — single-member groups are non-duplicates,
* multi-member groups are the same series from different sources.
*
* Used by MangaPreview to show alternate thumbnails for merged entries.
*/
export function groupDuplicates<T extends { id: number; title: string; description?: string | null }>(items: T[]): T[][] {
const titleMap = new Map<string, T[]>();
const descMap = new Map<string, T[]>();
for (const m of items) {
const tk = normalizeTitle(m.title);
const dk = descFingerprint(m.description);
const existingGroup = titleMap.get(tk) ?? (dk ? descMap.get(dk) : undefined);
if (existingGroup) {
existingGroup.push(m);
if (!titleMap.has(tk)) titleMap.set(tk, existingGroup);
if (dk && !descMap.has(dk)) descMap.set(dk, existingGroup);
} else {
const group = [m];
titleMap.set(tk, group);
if (dk) descMap.set(dk, group);
}
}
// Return unique groups only
const seen = new Set<T[]>();
const out: T[][] = [];
for (const g of titleMap.values()) {
if (!seen.has(g)) { seen.add(g); out.push(g); }
}
return out;
}