#!/usr/bin/env python3
"""Ingest new signals from Cycle 1 - Healthcare, Manufacturing, Education."""
import sqlite3, os

DB = os.path.join("/root/workspace", "saas_opportunities.db")
conn = sqlite3.connect(DB)
conn.row_factory = sqlite3.Row

# --- NEW RAW SIGNALS ---
signals = [
    # === HEALTHCARE ===
    ("reddit", "https://reddit.com/r/PriorAuthorization/",
     "r/PriorAuthorization - Prior auth high stress",
     "Prior authorization is high-stress. Dealing with insurance companies and patient demands daily. How long do people typically stay in these roles?",
     "Medical staff waste hours on prior authorization phone calls and paperwork instead of patient care",
     "Medical biller / Prior auth specialist",
     "Phone calls, fax, manual paperwork", 5),

    ("reddit", "https://reddit.com/r/healthcare/",
     "r/healthcare - Claim denials rising 2026",
     "40% of medical billers reported an increase in denials over the past year. 50% of denied claims never get reprocessed. Cost is $25 per claim to rework.",
     "Medical practices lose revenue from rising claim denials and costly rework",
     "Medical practice owner / billing manager",
     "Manual rework, expensive clearinghouse software", 5),

    ("healthcare_industry", "https://riviahealth.com/top-5-pain-points-medical-practices",
     "Healthcare - No-show appointments cost $150B/year",
     "Missed appointments cost US providers an estimated $150 billion annually. No-shows disrupt schedules and waste staff productivity.",
     "Medical practices lose $150B/year to patient no-shows without effective automated reminders",
     "Medical practice manager",
     "Manual reminder calls, postcards", 4),

    # === MANUFACTURING ===
    ("reddit", "https://reddit.com/r/Machinists/comments/1p1sr5n/",
     "r/Machinists - ERPs too heavy for small shops",
     "The ERPs that actually stick on the shop floor are the ones that keep scheduling + inventory + work orders in one place without forcing a bunch of extra data entry. Most ERPs looked great in demos but day to day the floor was still living in spreadsheets.",
     "Small machine shops live in spreadsheets because ERPs are too expensive and complex",
     "Small machine shop owner / manager",
     "Spreadsheets, expensive ERP (Odoo/Katana)", 4),

    ("reddit", "https://reddit.com/r/CNC/comments/1nn1igt/",
     "r/CNC - Biggest pain point at your shop",
     "Biggest bottleneck is lack of trained people. They get apprentices but they lack fundamentals. They can get machines faster than they can find operators. Clients change priorities constantly, machines break at worst times.",
     "CNC shops cannot find skilled operators and lose productivity from constant schedule changes",
     "CNC shop owner / production manager",
     "Manual scheduling, poaching talent", 4),

    ("manufacturing_industry", "https://www.steckermachine.com/blog/maximize-machine-shop-inventory",
     "Manufacturing - Cutting tool inventory nightmare",
     "Small job shops tie up capital equivalent to a machine purchase in cutting tool inventory. No good system exists for small shops to track tooling.",
     "Small machine shops tie up huge capital in unmanaged cutting tool inventory",
     "Small machine shop owner",
     "Manual spreadsheets, pegboards, overstocking", 4),

    # === EDUCATION ===
    ("reddit", "https://reddit.com/r/StudentTeaching/comments/1rof1y9/",
     "r/StudentTeaching - Lesson planning too time-draining",
     "Making slides that don't look terrible. Creating a worksheet that matches what she's covering. Writing an exit ticket. Then doing it all again next week for a completely different topic. The repetitive work should not take as long as it does in 2026.",
     "Teachers spend every Sunday evening creating slides, worksheets, and exit tickets from scratch",
     "Primary/secondary school teacher",
     "PowerPoint, manual worksheet creation, Canva", 5),

    ("education_industry", "https://skolera.com/en/blog/tracking-student-progress-software",
     "Education - Student progress tracking fragmentation",
     "Teachers use manual follow-up notebooks and paper attendance records. No unified tool for tracking student academic and behavioral progress across subjects.",
     "Teachers waste time on manual attendance and progress tracking across fragmented tools",
     "Teacher / School administrator",
     "Paper records, manual spreadsheets, multiple logins", 3),

    ("reddit", "https://reddit.com/r/teaching/",
     "r/teaching - Parent communication drain",
     "Sending individual updates to 30+ parents per class, multiple classes. No easy way to batch communication with personalization.",
     "Teachers spend hours on parent communication with no bulk personalization tool",
     "School teacher",
     "Individual emails, newsletters, class dojo", 3),
]

for s in signals:
    conn.execute(
        "INSERT INTO raw_signals (source, url, title, text, detected_pain_point, persona, workaround, severity) VALUES (?,?,?,?,?,?,?,?)",
        s
    )

# --- PROMOTE TO OPPORTUNITIES ---
rows = conn.execute("SELECT id, detected_pain_point FROM raw_signals WHERE processed=0 ORDER BY id").fetchall()

opportunities = [
    ("Healthcare/Admin", "Prior Auth Automator - AI agent that handles prior authorization calls and paperwork",
     4, 4, 4, 5, ["prior", "authorization", "insurance"]),
    ("Healthcare/Billing", "Claim Denial Rescue - automated denial analysis and reprocessing engine",
     4, 3, 3, 5, ["denial", "claim", "reprocess"]),
    ("Healthcare/Ops", "No-Show Buster - multi-channel automated reminder system with 2-way confirmation",
     5, 5, 4, 4, ["no-show", "appointment", "reminder"]),
    ("Manufacturing/ERP", "Shop Floor Lite - lightweight scheduling + inventory + work orders for small machine shops",
     4, 3, 4, 4, ["erp", "shop", "spreadsheet"]),
    ("Manufacturing/Ops", "CNC Schedule Optimizer - automated job scheduling with machine availability and skill matching",
     4, 4, 3, 4, ["cnc", "schedule", "operator"]),
    ("Manufacturing/Inventory", "Tool Crib - cutting tool inventory tracking with usage analytics for small shops",
     4, 4, 3, 3, ["tool", "inventory", "cutting"]),
    ("Education/Tools", "Lesson Plan Copilot - AI generates slides, worksheets, and exit tickets from curriculum standards",
     5, 4, 4, 4, ["lesson", "slide", "worksheet"]),
    ("Education/Admin", "Student Progress Hub - unified attendance, grades, behavior tracking for teachers",
     3, 4, 4, 3, ["progress", "tracking", "attendance"]),
    ("Education/Comms", "Parent Pulse - bulk personalized parent updates with translation and scheduling",
     3, 5, 4, 3, ["parent", "communication", "update"]),
]

for opp in opportunities:
    vertical, value_prop, complexity, potential, distribution, monetization, keywords = opp
    sig_id = None
    for r in rows:
        pain = r["detected_pain_point"]
        if any(kw.lower() in pain.lower() for kw in keywords):
            sig_id = r["id"]
            break
    if sig_id:
        conn.execute(
            "INSERT INTO saas_opportunities (signal_id, vertical, value_proposition, complexity_score, potential_score, distribution_score, monetization_score, status) VALUES (?,?,?,?,?,?,?,'validated')",
            (sig_id, vertical, value_prop, complexity, potential, distribution, monetization)
        )
        conn.execute("UPDATE raw_signals SET processed=1 WHERE id=?", (sig_id,))

conn.commit()

count_signals = conn.execute("SELECT COUNT(*) FROM raw_signals").fetchone()[0]
count_opps = conn.execute("SELECT COUNT(*) FROM saas_opportunities").fetchone()[0]
print(f"OK {count_signals} raw_signals total")
print(f"OK {count_opps} opportunities total")
conn.close()
