mirror of
https://github.com/moku-project/Moku.git
synced 2026-06-13 09:19:56 -05:00
Fix: Futile Attempt to Implement Image-Dedupe (#55)
This commit is contained in:
@@ -0,0 +1,27 @@
|
||||
import { store, linkManga } from "@store/state.svelte";
|
||||
import type { Manga } from "@types";
|
||||
|
||||
export function autoLinkLibrary(focal: Manga, allManga: Manga[]): Promise<number> {
|
||||
return new Promise((resolve) => {
|
||||
const worker = new Worker(
|
||||
new URL("./autoLinkWorker.ts", import.meta.url),
|
||||
{ type: "module" },
|
||||
);
|
||||
|
||||
worker.onmessage = (e: MessageEvent<number[]>) => {
|
||||
const matches = e.data;
|
||||
for (const id of matches) linkManga(focal.id, id);
|
||||
worker.terminate();
|
||||
resolve(matches.length);
|
||||
};
|
||||
|
||||
worker.onerror = () => { worker.terminate(); resolve(0); };
|
||||
|
||||
worker.postMessage({
|
||||
focalTitle: focal.title,
|
||||
focalId: focal.id,
|
||||
allManga: allManga.map(m => ({ id: m.id, title: m.title })),
|
||||
linkedIds: store.settings.mangaLinks?.[focal.id] ?? [],
|
||||
});
|
||||
});
|
||||
}
|
||||
@@ -0,0 +1,29 @@
|
||||
interface WorkerMsg {
|
||||
focalTitle: string;
|
||||
focalId: number;
|
||||
allManga: { id: number; title: string }[];
|
||||
linkedIds: number[];
|
||||
}
|
||||
|
||||
function titleSimilarity(a: string, b: string): number {
|
||||
const norm = (s: string) =>
|
||||
s.toLowerCase().replace(/[^a-z0-9\s]/g, "").split(/\s+/).filter(Boolean);
|
||||
const wa = new Set(norm(a));
|
||||
const wb = new Set(norm(b));
|
||||
if (!wa.size || !wb.size) return 0;
|
||||
const intersection = [...wa].filter(w => wb.has(w)).length;
|
||||
return intersection / new Set([...wa, ...wb]).size;
|
||||
}
|
||||
|
||||
self.onmessage = (e: MessageEvent<WorkerMsg>) => {
|
||||
const { focalTitle, focalId, allManga, linkedIds } = e.data;
|
||||
const matches: number[] = [];
|
||||
|
||||
for (const m of allManga) {
|
||||
if (m.id === focalId) continue;
|
||||
if (linkedIds.includes(m.id)) continue;
|
||||
if (titleSimilarity(focalTitle, m.title) >= 0.65) matches.push(m.id);
|
||||
}
|
||||
|
||||
self.postMessage(matches);
|
||||
};
|
||||
@@ -0,0 +1,54 @@
|
||||
const THUMB_SIZE = 16;
|
||||
const DUPE_THRESH = 0.12;
|
||||
|
||||
const hashCache = new Map<string, Uint8ClampedArray>();
|
||||
|
||||
function toGray(data: Uint8ClampedArray, pixels: number): Uint8ClampedArray {
|
||||
const gray = new Uint8ClampedArray(pixels);
|
||||
for (let i = 0; i < pixels; i++) {
|
||||
const o = i * 4;
|
||||
gray[i] = (data[o] * 299 + data[o + 1] * 587 + data[o + 2] * 114) / 1000;
|
||||
}
|
||||
return gray;
|
||||
}
|
||||
|
||||
function loadThumb(url: string): Promise<Uint8ClampedArray> {
|
||||
return new Promise((resolve, reject) => {
|
||||
const img = new Image();
|
||||
img.crossOrigin = "anonymous";
|
||||
img.onload = () => {
|
||||
const canvas = document.createElement("canvas");
|
||||
canvas.width = canvas.height = THUMB_SIZE;
|
||||
const ctx = canvas.getContext("2d")!;
|
||||
ctx.drawImage(img, 0, 0, THUMB_SIZE, THUMB_SIZE);
|
||||
resolve(toGray(ctx.getImageData(0, 0, THUMB_SIZE, THUMB_SIZE).data, THUMB_SIZE * THUMB_SIZE));
|
||||
};
|
||||
img.onerror = reject;
|
||||
img.src = url;
|
||||
});
|
||||
}
|
||||
|
||||
function similarity(a: Uint8ClampedArray, b: Uint8ClampedArray): number {
|
||||
let diff = 0;
|
||||
for (let i = 0; i < a.length; i++) diff += Math.abs(a[i] - b[i]);
|
||||
return diff / (a.length * 255);
|
||||
}
|
||||
|
||||
export async function getHash(url: string): Promise<Uint8ClampedArray | null> {
|
||||
if (hashCache.has(url)) return hashCache.get(url)!;
|
||||
try {
|
||||
const thumb = await loadThumb(url);
|
||||
hashCache.set(url, thumb);
|
||||
return thumb;
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
export function areDuplicates(a: Uint8ClampedArray, b: Uint8ClampedArray): boolean {
|
||||
return similarity(a, b) <= DUPE_THRESH;
|
||||
}
|
||||
|
||||
export function clearHashCache(): void {
|
||||
hashCache.clear();
|
||||
}
|
||||
@@ -0,0 +1,95 @@
|
||||
import { store } from "@store/state.svelte";
|
||||
import { searchWithScore } from "@core/algorithms/search";
|
||||
import { getHash, areDuplicates } from "@core/cover/coverHash";
|
||||
|
||||
type CoverManga = { id: number; thumbnailUrl: string; source?: { displayName: string } | null };
|
||||
|
||||
export type CoverCandidate = {
|
||||
mangaId: number;
|
||||
url: string;
|
||||
label: string;
|
||||
isActive: boolean;
|
||||
};
|
||||
|
||||
const FUZZY_SCORE_THRESHOLD = 0.65;
|
||||
|
||||
function normalizeUrl(url: string): string {
|
||||
try {
|
||||
const u = new URL(url);
|
||||
u.search = "";
|
||||
return u.href.toLowerCase();
|
||||
} catch {
|
||||
return url.toLowerCase();
|
||||
}
|
||||
}
|
||||
|
||||
export function resolvedCover(mangaId: number, ownUrl: string): string {
|
||||
return store.settings.mangaPrefs?.[mangaId]?.coverUrl ?? ownUrl;
|
||||
}
|
||||
|
||||
function fuzzyMatchIds(
|
||||
mangaId: number,
|
||||
title: string,
|
||||
mangaById: Map<number, CoverManga & { title: string }>,
|
||||
): number[] {
|
||||
const results = searchWithScore(
|
||||
[...mangaById.values()].filter(m => m.id !== mangaId),
|
||||
title,
|
||||
m => m.title,
|
||||
);
|
||||
return results
|
||||
.filter(r => r.score >= FUZZY_SCORE_THRESHOLD)
|
||||
.map(r => r.item.id);
|
||||
}
|
||||
|
||||
export function coverCandidatesSync(
|
||||
mangaId: number,
|
||||
title: string,
|
||||
ownUrl: string,
|
||||
mangaById: Map<number, CoverManga & { title: string }>,
|
||||
): CoverCandidate[] {
|
||||
const linkedIds = store.getLinkedMangaIds(mangaId);
|
||||
const fuzzyIds = fuzzyMatchIds(mangaId, title, mangaById);
|
||||
const current = store.settings.mangaPrefs?.[mangaId]?.coverUrl ?? ownUrl;
|
||||
|
||||
const allIds = Array.from(new Set([...linkedIds, ...fuzzyIds]));
|
||||
|
||||
const raw: { mangaId: number; url: string; label: string }[] = [
|
||||
{ mangaId, url: ownUrl, label: "This source" },
|
||||
...allIds.flatMap(id => {
|
||||
const m = mangaById.get(id);
|
||||
return m ? [{ mangaId: m.id, url: m.thumbnailUrl, label: m.source?.displayName ?? `ID ${m.id}` }] : [];
|
||||
}),
|
||||
];
|
||||
|
||||
const seen = new Set<string>();
|
||||
return raw
|
||||
.filter(c => {
|
||||
const key = normalizeUrl(c.url);
|
||||
if (seen.has(key)) return false;
|
||||
seen.add(key);
|
||||
return true;
|
||||
})
|
||||
.map(c => ({ ...c, isActive: normalizeUrl(c.url) === normalizeUrl(current) }));
|
||||
}
|
||||
|
||||
export async function dedupeByImage(candidates: CoverCandidate[]): Promise<CoverCandidate[]> {
|
||||
const hashes = await Promise.all(candidates.map(c => getHash(c.url)));
|
||||
|
||||
const groups: number[][] = [];
|
||||
|
||||
for (let i = 0; i < candidates.length; i++) {
|
||||
const hi = hashes[i];
|
||||
const existing = hi
|
||||
? groups.find(g => { const hj = hashes[g[0]]; return hj ? areDuplicates(hi, hj) : false; })
|
||||
: undefined;
|
||||
if (existing) existing.push(i);
|
||||
else groups.push([i]);
|
||||
}
|
||||
|
||||
return groups.map(group => {
|
||||
const active = group.find(i => candidates[i].isActive) ?? group[0];
|
||||
const labels = [...new Set(group.map(i => candidates[i].label))];
|
||||
return { ...candidates[active], label: labels.join(" · ") };
|
||||
});
|
||||
}
|
||||
@@ -0,0 +1,4 @@
|
||||
export { getHash, areDuplicates, clearHashCache } from "./coverHash";
|
||||
export { resolvedCover, coverCandidatesSync, dedupeByImage } from "./coverResolver";
|
||||
export type { CoverCandidate } from "./coverResolver";
|
||||
export { autoLinkLibrary } from "./autoLink";
|
||||
Reference in New Issue
Block a user