attempt to improve analysis

This commit is contained in:
Daniel Bulant 2026-05-25 18:35:25 +02:00
parent ff733b9774
commit 2154c0b6d1
No known key found for this signature in database
4 changed files with 126 additions and 21 deletions

View file

@ -1,6 +1,12 @@
import { describe, expect, it } from "vitest";
import { getQuestionRange, getReleaseYearRange } from "../question-utils";
import type { PartyAnalytics } from "../question-utils";
import {
getQuestionRange,
getReleaseYearRange,
getTopClusterArtists,
getTopClusterTracks,
} from "../question-utils";
describe("question range helpers", () => {
it("normalizes inverted generic ranges", () => {
@ -25,3 +31,34 @@ describe("question range helpers", () => {
expect(range.max).toBeLessThanOrEqual(2026);
});
});
describe("analytics cluster helpers", () => {
it("returns usable tracks and artists across all story clusters", () => {
const analytics: PartyAnalytics = {
storyClusters: [
{
tracks: [
{ name: "Shared Track", artists: [{ name: "Shared Artist" }] },
],
artists: [{ name: "Shared Artist" }],
},
{
tracks: [
{ name: "Solo Track", artists: [{ name: "Solo Artist" }] },
{ name: "" },
],
artists: [{ name: "Solo Artist" }, { name: "" }],
},
],
};
expect(getTopClusterTracks(analytics).map((track) => track.name)).toEqual([
"Shared Track",
"Solo Track",
]);
expect(getTopClusterArtists(analytics)).toEqual([
"Shared Artist",
"Solo Artist",
]);
});
});

View file

@ -39,6 +39,7 @@ export type AnalyticsTrack = {
albumName?: string;
memberScores?: { userId: string; score: number }[];
};
type AnalyticsArtist = { name: string };
type QuestionLike = {
text: string;
questionKey?: string;
@ -180,17 +181,13 @@ function normalizeQuestionKey(value: string): string {
}
export function getTopClusterArtists(analytics: PartyAnalytics): string[] {
return (analytics?.storyClusters?.[0]?.artists ?? []).map(
(artist) => artist.name,
);
return getAllClusterArtists(analytics).map((artist) => artist.name);
}
export function getTopClusterTracks(
analytics: PartyAnalytics,
): AnalyticsTrack[] {
return (analytics?.storyClusters?.[0]?.tracks ?? []).filter((track) =>
isUsableText(track.name),
);
return getAllClusterTracks(analytics);
}
export function pickRelevantTrack(
@ -338,9 +335,9 @@ async function collectSongCandidates({
);
push(peopleSong);
const topClusterTracks = [...(analytics?.storyClusters?.[0]?.tracks ?? [])]
.filter((track) => isUsableText(track.name))
.sort((a, b) => getTrackScore(b) - getTrackScore(a));
const topClusterTracks = getAllClusterTracks(analytics).sort(
(a, b) => getTrackScore(b) - getTrackScore(a),
);
for (const track of topClusterTracks) {
const song = await resolveQuestionSong(db, analytics, {
@ -400,12 +397,9 @@ async function resolveSongFromMentionedPeople(
if (userIds.length === 0) return null;
const tracks = [...(analytics?.storyClusters?.[0]?.tracks ?? [])]
.filter((track) => isUsableText(track.name))
.sort(
(a, b) =>
getMemberTrackScore(b, userIds) - getMemberTrackScore(a, userIds),
);
const tracks = getAllClusterTracks(analytics).sort(
(a, b) => getMemberTrackScore(b, userIds) - getMemberTrackScore(a, userIds),
);
for (const track of tracks) {
const song = await resolveQuestionSong(db, analytics, {
@ -484,6 +478,43 @@ function getMemberTrackScore(
}, 0);
}
function getAllClusterTracks(analytics: PartyAnalytics): AnalyticsTrack[] {
const tracks: AnalyticsTrack[] = [];
const seen = new Set<string>();
for (const cluster of analytics?.storyClusters ?? []) {
for (const track of cluster.tracks ?? []) {
if (!isUsableText(track.name)) continue;
const key = [
track.name,
track.albumName ?? "",
track.artists?.map((artist) => artist.name).join("|") ?? "",
].join("::");
if (seen.has(key)) continue;
seen.add(key);
tracks.push(track);
}
}
return tracks;
}
function getAllClusterArtists(analytics: PartyAnalytics): AnalyticsArtist[] {
const artists: AnalyticsArtist[] = [];
const seen = new Set<string>();
for (const cluster of analytics?.storyClusters ?? []) {
for (const artist of cluster.artists ?? []) {
if (!isUsableText(artist.name)) continue;
if (seen.has(artist.name)) continue;
seen.add(artist.name);
artists.push(artist);
}
}
return artists;
}
export function isUsableText(
value: string | null | undefined,
): value is string {

View file

@ -239,6 +239,40 @@ describe("PartyAnalysisWorkflow", () => {
});
describe("analyzeParty - story clusters", () => {
it("keeps names for entities that only appear for later members", async () => {
const { partyId, userIdB } = await seedPartyWithTwoSimilarUsers();
const uniqueAlbum = await createAlbum("User B Unique Album");
const uniqueArtist = await createArtist("User B Unique Artist");
const uniqueTrack = await createTrack(
"User B Unique Track",
uniqueAlbum.id,
[uniqueArtist.id],
);
await addTopTrack(userIdB, uniqueTrack.id, 2);
await addTopArtist(userIdB, uniqueArtist.id, 2);
const result = await partyAnalysisWorkflow.analyzeParty(partyId);
const allTracks = result.storyClusters.flatMap(
(cluster) => cluster.tracks,
);
const allArtists = result.storyClusters.flatMap(
(cluster) => cluster.artists,
);
expect(allTracks).toContainEqual(
expect.objectContaining({
id: uniqueTrack.id,
name: "User B Unique Track",
}),
);
expect(allArtists).toContainEqual(
expect.objectContaining({
id: uniqueArtist.id,
name: "User B Unique Artist",
}),
);
});
it("sorts clusters with all-member cluster first", async () => {
const { partyId, sharedTrackId } = await seedPartyWithTwoSimilarUsers();

View file

@ -469,8 +469,9 @@ export class PartyAnalysisWorkflow extends ConfiguredInstance {
const result = new Map<string, TrackEntityScore>();
for (const [trackId, memberScores] of entityMap) {
const firstData = memberData.values().next().value;
const trackInfo = firstData?.tracks.get(trackId) ?? {
const trackInfo = Array.from(memberData.values())
.find((data) => data.tracks.has(trackId))
?.tracks.get(trackId) ?? {
name: "",
artists: [],
albumName: undefined,
@ -525,8 +526,9 @@ export class PartyAnalysisWorkflow extends ConfiguredInstance {
}
}
const firstData = memberData.values().next().value;
const artistInfo = firstData?.artists.get(artistId) ?? { name: "" };
const artistInfo = Array.from(memberData.values())
.find((data) => data.artists.has(artistId))
?.artists.get(artistId) ?? { name: "" };
result.set(artistId, {
id: artistId,
@ -869,7 +871,8 @@ export class PartyAnalysisWorkflow extends ConfiguredInstance {
genres.sort(
(a, b) =>
b.memberCount - a.memberCount ||
b.memberScores.length - a.memberScores.length,
b.memberScores.reduce((total, member) => total + member.score, 0) -
a.memberScores.reduce((total, member) => total + member.score, 0),
);
// Return top genres that are shared by at least 2 members