// /api/discover/cluster — Feature 4: Semantic Clustering (pure TF-IDF)
// Reads all recent algo_feed_snapshots and clusters by topic family

import { NextResponse } from 'next/server';
import pool from '../../../../lib/db';
import { clusterTitles } from '../../../../lib/cluster';
import '../../../../lib/discoverDb';

export async function GET() {
    try {
        // Pull recent crawled titles from DB (last 12 hours)
        const [rows] = await pool.execute<any[]>(
            `SELECT title, channel FROM algo_feed_snapshots
             WHERE crawled_at > NOW() - INTERVAL 12 HOUR
             ORDER BY crawled_at DESC LIMIT 300`
        );

        if (rows.length < 5) {
            return NextResponse.json({ clusters: [], message: 'Not enough data — run the Crawl first.' });
        }

        const clusters = clusterTitles(rows, 2);

        return NextResponse.json({ clusters, total: rows.length });

    } catch (err: any) {
        console.error('[Cluster]', err);
        return NextResponse.json({ error: err.message }, { status: 500 });
    }
}
