attempt to improve analysis

This commit is contained in:
Daniel Bulant 2026-05-25 18:35:25 +02:00
parent ff733b9774
commit 2154c0b6d1
No known key found for this signature in database
4 changed files with 126 additions and 21 deletions

View file

@ -1,6 +1,12 @@
import { describe, expect, it } from "vitest"; import { describe, expect, it } from "vitest";
import { getQuestionRange, getReleaseYearRange } from "../question-utils"; import type { PartyAnalytics } from "../question-utils";
import {
getQuestionRange,
getReleaseYearRange,
getTopClusterArtists,
getTopClusterTracks,
} from "../question-utils";
describe("question range helpers", () => { describe("question range helpers", () => {
it("normalizes inverted generic ranges", () => { it("normalizes inverted generic ranges", () => {
@ -25,3 +31,34 @@ describe("question range helpers", () => {
expect(range.max).toBeLessThanOrEqual(2026); expect(range.max).toBeLessThanOrEqual(2026);
}); });
}); });
describe("analytics cluster helpers", () => {
it("returns usable tracks and artists across all story clusters", () => {
const analytics: PartyAnalytics = {
storyClusters: [
{
tracks: [
{ name: "Shared Track", artists: [{ name: "Shared Artist" }] },
],
artists: [{ name: "Shared Artist" }],
},
{
tracks: [
{ name: "Solo Track", artists: [{ name: "Solo Artist" }] },
{ name: "" },
],
artists: [{ name: "Solo Artist" }, { name: "" }],
},
],
};
expect(getTopClusterTracks(analytics).map((track) => track.name)).toEqual([
"Shared Track",
"Solo Track",
]);
expect(getTopClusterArtists(analytics)).toEqual([
"Shared Artist",
"Solo Artist",
]);
});
});

View file

@ -39,6 +39,7 @@ export type AnalyticsTrack = {
albumName?: string; albumName?: string;
memberScores?: { userId: string; score: number }[]; memberScores?: { userId: string; score: number }[];
}; };
type AnalyticsArtist = { name: string };
type QuestionLike = { type QuestionLike = {
text: string; text: string;
questionKey?: string; questionKey?: string;
@ -180,17 +181,13 @@ function normalizeQuestionKey(value: string): string {
} }
export function getTopClusterArtists(analytics: PartyAnalytics): string[] { export function getTopClusterArtists(analytics: PartyAnalytics): string[] {
return (analytics?.storyClusters?.[0]?.artists ?? []).map( return getAllClusterArtists(analytics).map((artist) => artist.name);
(artist) => artist.name,
);
} }
export function getTopClusterTracks( export function getTopClusterTracks(
analytics: PartyAnalytics, analytics: PartyAnalytics,
): AnalyticsTrack[] { ): AnalyticsTrack[] {
return (analytics?.storyClusters?.[0]?.tracks ?? []).filter((track) => return getAllClusterTracks(analytics);
isUsableText(track.name),
);
} }
export function pickRelevantTrack( export function pickRelevantTrack(
@ -338,9 +335,9 @@ async function collectSongCandidates({
); );
push(peopleSong); push(peopleSong);
const topClusterTracks = [...(analytics?.storyClusters?.[0]?.tracks ?? [])] const topClusterTracks = getAllClusterTracks(analytics).sort(
.filter((track) => isUsableText(track.name)) (a, b) => getTrackScore(b) - getTrackScore(a),
.sort((a, b) => getTrackScore(b) - getTrackScore(a)); );
for (const track of topClusterTracks) { for (const track of topClusterTracks) {
const song = await resolveQuestionSong(db, analytics, { const song = await resolveQuestionSong(db, analytics, {
@ -400,12 +397,9 @@ async function resolveSongFromMentionedPeople(
if (userIds.length === 0) return null; if (userIds.length === 0) return null;
const tracks = [...(analytics?.storyClusters?.[0]?.tracks ?? [])] const tracks = getAllClusterTracks(analytics).sort(
.filter((track) => isUsableText(track.name)) (a, b) => getMemberTrackScore(b, userIds) - getMemberTrackScore(a, userIds),
.sort( );
(a, b) =>
getMemberTrackScore(b, userIds) - getMemberTrackScore(a, userIds),
);
for (const track of tracks) { for (const track of tracks) {
const song = await resolveQuestionSong(db, analytics, { const song = await resolveQuestionSong(db, analytics, {
@ -484,6 +478,43 @@ function getMemberTrackScore(
}, 0); }, 0);
} }
function getAllClusterTracks(analytics: PartyAnalytics): AnalyticsTrack[] {
const tracks: AnalyticsTrack[] = [];
const seen = new Set<string>();
for (const cluster of analytics?.storyClusters ?? []) {
for (const track of cluster.tracks ?? []) {
if (!isUsableText(track.name)) continue;
const key = [
track.name,
track.albumName ?? "",
track.artists?.map((artist) => artist.name).join("|") ?? "",
].join("::");
if (seen.has(key)) continue;
seen.add(key);
tracks.push(track);
}
}
return tracks;
}
function getAllClusterArtists(analytics: PartyAnalytics): AnalyticsArtist[] {
const artists: AnalyticsArtist[] = [];
const seen = new Set<string>();
for (const cluster of analytics?.storyClusters ?? []) {
for (const artist of cluster.artists ?? []) {
if (!isUsableText(artist.name)) continue;
if (seen.has(artist.name)) continue;
seen.add(artist.name);
artists.push(artist);
}
}
return artists;
}
export function isUsableText( export function isUsableText(
value: string | null | undefined, value: string | null | undefined,
): value is string { ): value is string {

View file

@ -239,6 +239,40 @@ describe("PartyAnalysisWorkflow", () => {
}); });
describe("analyzeParty - story clusters", () => { describe("analyzeParty - story clusters", () => {
it("keeps names for entities that only appear for later members", async () => {
const { partyId, userIdB } = await seedPartyWithTwoSimilarUsers();
const uniqueAlbum = await createAlbum("User B Unique Album");
const uniqueArtist = await createArtist("User B Unique Artist");
const uniqueTrack = await createTrack(
"User B Unique Track",
uniqueAlbum.id,
[uniqueArtist.id],
);
await addTopTrack(userIdB, uniqueTrack.id, 2);
await addTopArtist(userIdB, uniqueArtist.id, 2);
const result = await partyAnalysisWorkflow.analyzeParty(partyId);
const allTracks = result.storyClusters.flatMap(
(cluster) => cluster.tracks,
);
const allArtists = result.storyClusters.flatMap(
(cluster) => cluster.artists,
);
expect(allTracks).toContainEqual(
expect.objectContaining({
id: uniqueTrack.id,
name: "User B Unique Track",
}),
);
expect(allArtists).toContainEqual(
expect.objectContaining({
id: uniqueArtist.id,
name: "User B Unique Artist",
}),
);
});
it("sorts clusters with all-member cluster first", async () => { it("sorts clusters with all-member cluster first", async () => {
const { partyId, sharedTrackId } = await seedPartyWithTwoSimilarUsers(); const { partyId, sharedTrackId } = await seedPartyWithTwoSimilarUsers();

View file

@ -469,8 +469,9 @@ export class PartyAnalysisWorkflow extends ConfiguredInstance {
const result = new Map<string, TrackEntityScore>(); const result = new Map<string, TrackEntityScore>();
for (const [trackId, memberScores] of entityMap) { for (const [trackId, memberScores] of entityMap) {
const firstData = memberData.values().next().value; const trackInfo = Array.from(memberData.values())
const trackInfo = firstData?.tracks.get(trackId) ?? { .find((data) => data.tracks.has(trackId))
?.tracks.get(trackId) ?? {
name: "", name: "",
artists: [], artists: [],
albumName: undefined, albumName: undefined,
@ -525,8 +526,9 @@ export class PartyAnalysisWorkflow extends ConfiguredInstance {
} }
} }
const firstData = memberData.values().next().value; const artistInfo = Array.from(memberData.values())
const artistInfo = firstData?.artists.get(artistId) ?? { name: "" }; .find((data) => data.artists.has(artistId))
?.artists.get(artistId) ?? { name: "" };
result.set(artistId, { result.set(artistId, {
id: artistId, id: artistId,
@ -869,7 +871,8 @@ export class PartyAnalysisWorkflow extends ConfiguredInstance {
genres.sort( genres.sort(
(a, b) => (a, b) =>
b.memberCount - a.memberCount || b.memberCount - a.memberCount ||
b.memberScores.length - a.memberScores.length, b.memberScores.reduce((total, member) => total + member.score, 0) -
a.memberScores.reduce((total, member) => total + member.score, 0),
); );
// Return top genres that are shared by at least 2 members // Return top genres that are shared by at least 2 members