attempt to improve analysis
This commit is contained in:
parent
ff733b9774
commit
2154c0b6d1
4 changed files with 126 additions and 21 deletions
|
|
@ -1,6 +1,12 @@
|
||||||
import { describe, expect, it } from "vitest";
|
import { describe, expect, it } from "vitest";
|
||||||
|
|
||||||
import { getQuestionRange, getReleaseYearRange } from "../question-utils";
|
import type { PartyAnalytics } from "../question-utils";
|
||||||
|
import {
|
||||||
|
getQuestionRange,
|
||||||
|
getReleaseYearRange,
|
||||||
|
getTopClusterArtists,
|
||||||
|
getTopClusterTracks,
|
||||||
|
} from "../question-utils";
|
||||||
|
|
||||||
describe("question range helpers", () => {
|
describe("question range helpers", () => {
|
||||||
it("normalizes inverted generic ranges", () => {
|
it("normalizes inverted generic ranges", () => {
|
||||||
|
|
@ -25,3 +31,34 @@ describe("question range helpers", () => {
|
||||||
expect(range.max).toBeLessThanOrEqual(2026);
|
expect(range.max).toBeLessThanOrEqual(2026);
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
describe("analytics cluster helpers", () => {
|
||||||
|
it("returns usable tracks and artists across all story clusters", () => {
|
||||||
|
const analytics: PartyAnalytics = {
|
||||||
|
storyClusters: [
|
||||||
|
{
|
||||||
|
tracks: [
|
||||||
|
{ name: "Shared Track", artists: [{ name: "Shared Artist" }] },
|
||||||
|
],
|
||||||
|
artists: [{ name: "Shared Artist" }],
|
||||||
|
},
|
||||||
|
{
|
||||||
|
tracks: [
|
||||||
|
{ name: "Solo Track", artists: [{ name: "Solo Artist" }] },
|
||||||
|
{ name: "" },
|
||||||
|
],
|
||||||
|
artists: [{ name: "Solo Artist" }, { name: "" }],
|
||||||
|
},
|
||||||
|
],
|
||||||
|
};
|
||||||
|
|
||||||
|
expect(getTopClusterTracks(analytics).map((track) => track.name)).toEqual([
|
||||||
|
"Shared Track",
|
||||||
|
"Solo Track",
|
||||||
|
]);
|
||||||
|
expect(getTopClusterArtists(analytics)).toEqual([
|
||||||
|
"Shared Artist",
|
||||||
|
"Solo Artist",
|
||||||
|
]);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
|
||||||
|
|
@ -39,6 +39,7 @@ export type AnalyticsTrack = {
|
||||||
albumName?: string;
|
albumName?: string;
|
||||||
memberScores?: { userId: string; score: number }[];
|
memberScores?: { userId: string; score: number }[];
|
||||||
};
|
};
|
||||||
|
type AnalyticsArtist = { name: string };
|
||||||
type QuestionLike = {
|
type QuestionLike = {
|
||||||
text: string;
|
text: string;
|
||||||
questionKey?: string;
|
questionKey?: string;
|
||||||
|
|
@ -180,17 +181,13 @@ function normalizeQuestionKey(value: string): string {
|
||||||
}
|
}
|
||||||
|
|
||||||
export function getTopClusterArtists(analytics: PartyAnalytics): string[] {
|
export function getTopClusterArtists(analytics: PartyAnalytics): string[] {
|
||||||
return (analytics?.storyClusters?.[0]?.artists ?? []).map(
|
return getAllClusterArtists(analytics).map((artist) => artist.name);
|
||||||
(artist) => artist.name,
|
|
||||||
);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
export function getTopClusterTracks(
|
export function getTopClusterTracks(
|
||||||
analytics: PartyAnalytics,
|
analytics: PartyAnalytics,
|
||||||
): AnalyticsTrack[] {
|
): AnalyticsTrack[] {
|
||||||
return (analytics?.storyClusters?.[0]?.tracks ?? []).filter((track) =>
|
return getAllClusterTracks(analytics);
|
||||||
isUsableText(track.name),
|
|
||||||
);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
export function pickRelevantTrack(
|
export function pickRelevantTrack(
|
||||||
|
|
@ -338,9 +335,9 @@ async function collectSongCandidates({
|
||||||
);
|
);
|
||||||
push(peopleSong);
|
push(peopleSong);
|
||||||
|
|
||||||
const topClusterTracks = [...(analytics?.storyClusters?.[0]?.tracks ?? [])]
|
const topClusterTracks = getAllClusterTracks(analytics).sort(
|
||||||
.filter((track) => isUsableText(track.name))
|
(a, b) => getTrackScore(b) - getTrackScore(a),
|
||||||
.sort((a, b) => getTrackScore(b) - getTrackScore(a));
|
);
|
||||||
|
|
||||||
for (const track of topClusterTracks) {
|
for (const track of topClusterTracks) {
|
||||||
const song = await resolveQuestionSong(db, analytics, {
|
const song = await resolveQuestionSong(db, analytics, {
|
||||||
|
|
@ -400,12 +397,9 @@ async function resolveSongFromMentionedPeople(
|
||||||
|
|
||||||
if (userIds.length === 0) return null;
|
if (userIds.length === 0) return null;
|
||||||
|
|
||||||
const tracks = [...(analytics?.storyClusters?.[0]?.tracks ?? [])]
|
const tracks = getAllClusterTracks(analytics).sort(
|
||||||
.filter((track) => isUsableText(track.name))
|
(a, b) => getMemberTrackScore(b, userIds) - getMemberTrackScore(a, userIds),
|
||||||
.sort(
|
);
|
||||||
(a, b) =>
|
|
||||||
getMemberTrackScore(b, userIds) - getMemberTrackScore(a, userIds),
|
|
||||||
);
|
|
||||||
|
|
||||||
for (const track of tracks) {
|
for (const track of tracks) {
|
||||||
const song = await resolveQuestionSong(db, analytics, {
|
const song = await resolveQuestionSong(db, analytics, {
|
||||||
|
|
@ -484,6 +478,43 @@ function getMemberTrackScore(
|
||||||
}, 0);
|
}, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function getAllClusterTracks(analytics: PartyAnalytics): AnalyticsTrack[] {
|
||||||
|
const tracks: AnalyticsTrack[] = [];
|
||||||
|
const seen = new Set<string>();
|
||||||
|
|
||||||
|
for (const cluster of analytics?.storyClusters ?? []) {
|
||||||
|
for (const track of cluster.tracks ?? []) {
|
||||||
|
if (!isUsableText(track.name)) continue;
|
||||||
|
const key = [
|
||||||
|
track.name,
|
||||||
|
track.albumName ?? "",
|
||||||
|
track.artists?.map((artist) => artist.name).join("|") ?? "",
|
||||||
|
].join("::");
|
||||||
|
if (seen.has(key)) continue;
|
||||||
|
seen.add(key);
|
||||||
|
tracks.push(track);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return tracks;
|
||||||
|
}
|
||||||
|
|
||||||
|
function getAllClusterArtists(analytics: PartyAnalytics): AnalyticsArtist[] {
|
||||||
|
const artists: AnalyticsArtist[] = [];
|
||||||
|
const seen = new Set<string>();
|
||||||
|
|
||||||
|
for (const cluster of analytics?.storyClusters ?? []) {
|
||||||
|
for (const artist of cluster.artists ?? []) {
|
||||||
|
if (!isUsableText(artist.name)) continue;
|
||||||
|
if (seen.has(artist.name)) continue;
|
||||||
|
seen.add(artist.name);
|
||||||
|
artists.push(artist);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return artists;
|
||||||
|
}
|
||||||
|
|
||||||
export function isUsableText(
|
export function isUsableText(
|
||||||
value: string | null | undefined,
|
value: string | null | undefined,
|
||||||
): value is string {
|
): value is string {
|
||||||
|
|
|
||||||
|
|
@ -239,6 +239,40 @@ describe("PartyAnalysisWorkflow", () => {
|
||||||
});
|
});
|
||||||
|
|
||||||
describe("analyzeParty - story clusters", () => {
|
describe("analyzeParty - story clusters", () => {
|
||||||
|
it("keeps names for entities that only appear for later members", async () => {
|
||||||
|
const { partyId, userIdB } = await seedPartyWithTwoSimilarUsers();
|
||||||
|
const uniqueAlbum = await createAlbum("User B Unique Album");
|
||||||
|
const uniqueArtist = await createArtist("User B Unique Artist");
|
||||||
|
const uniqueTrack = await createTrack(
|
||||||
|
"User B Unique Track",
|
||||||
|
uniqueAlbum.id,
|
||||||
|
[uniqueArtist.id],
|
||||||
|
);
|
||||||
|
await addTopTrack(userIdB, uniqueTrack.id, 2);
|
||||||
|
await addTopArtist(userIdB, uniqueArtist.id, 2);
|
||||||
|
|
||||||
|
const result = await partyAnalysisWorkflow.analyzeParty(partyId);
|
||||||
|
const allTracks = result.storyClusters.flatMap(
|
||||||
|
(cluster) => cluster.tracks,
|
||||||
|
);
|
||||||
|
const allArtists = result.storyClusters.flatMap(
|
||||||
|
(cluster) => cluster.artists,
|
||||||
|
);
|
||||||
|
|
||||||
|
expect(allTracks).toContainEqual(
|
||||||
|
expect.objectContaining({
|
||||||
|
id: uniqueTrack.id,
|
||||||
|
name: "User B Unique Track",
|
||||||
|
}),
|
||||||
|
);
|
||||||
|
expect(allArtists).toContainEqual(
|
||||||
|
expect.objectContaining({
|
||||||
|
id: uniqueArtist.id,
|
||||||
|
name: "User B Unique Artist",
|
||||||
|
}),
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
||||||
it("sorts clusters with all-member cluster first", async () => {
|
it("sorts clusters with all-member cluster first", async () => {
|
||||||
const { partyId, sharedTrackId } = await seedPartyWithTwoSimilarUsers();
|
const { partyId, sharedTrackId } = await seedPartyWithTwoSimilarUsers();
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -469,8 +469,9 @@ export class PartyAnalysisWorkflow extends ConfiguredInstance {
|
||||||
const result = new Map<string, TrackEntityScore>();
|
const result = new Map<string, TrackEntityScore>();
|
||||||
|
|
||||||
for (const [trackId, memberScores] of entityMap) {
|
for (const [trackId, memberScores] of entityMap) {
|
||||||
const firstData = memberData.values().next().value;
|
const trackInfo = Array.from(memberData.values())
|
||||||
const trackInfo = firstData?.tracks.get(trackId) ?? {
|
.find((data) => data.tracks.has(trackId))
|
||||||
|
?.tracks.get(trackId) ?? {
|
||||||
name: "",
|
name: "",
|
||||||
artists: [],
|
artists: [],
|
||||||
albumName: undefined,
|
albumName: undefined,
|
||||||
|
|
@ -525,8 +526,9 @@ export class PartyAnalysisWorkflow extends ConfiguredInstance {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const firstData = memberData.values().next().value;
|
const artistInfo = Array.from(memberData.values())
|
||||||
const artistInfo = firstData?.artists.get(artistId) ?? { name: "" };
|
.find((data) => data.artists.has(artistId))
|
||||||
|
?.artists.get(artistId) ?? { name: "" };
|
||||||
|
|
||||||
result.set(artistId, {
|
result.set(artistId, {
|
||||||
id: artistId,
|
id: artistId,
|
||||||
|
|
@ -869,7 +871,8 @@ export class PartyAnalysisWorkflow extends ConfiguredInstance {
|
||||||
genres.sort(
|
genres.sort(
|
||||||
(a, b) =>
|
(a, b) =>
|
||||||
b.memberCount - a.memberCount ||
|
b.memberCount - a.memberCount ||
|
||||||
b.memberScores.length - a.memberScores.length,
|
b.memberScores.reduce((total, member) => total + member.score, 0) -
|
||||||
|
a.memberScores.reduce((total, member) => total + member.score, 0),
|
||||||
);
|
);
|
||||||
|
|
||||||
// Return top genres that are shared by at least 2 members
|
// Return top genres that are shared by at least 2 members
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue