quantfolio/api_server.py at main · Claricexu/quantfolio · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
"""
Quantfolio API Server
==============================
FastAPI server that wraps the Lite and Pro ML models for real-time predictions.

Endpoints:
  GET  /api/predict/{symbol}   — single-ticker prediction with SVR
  GET  /api/movers             — daily scan of all symbols, sorted by % change
  GET  /api/symbols            — list available symbol universe
  GET  /                       — serves the React dashboard (index.html)

Setup:
  pip install fastapi uvicorn apscheduler
  (plus all finance_model_v2 deps — see requirements.txt)

Run:
  python api_server.py
  → opens http://localhost:8000
"""

import sys
# Force UTF-8 stdout/stderr on Windows — cp1252 default crashes on Unicode
# characters in print/log output. errors='replace' means a truly unsupported
# char becomes '?' rather than raising. See DEVELOPMENT.md "Encoding".
if hasattr(sys.stdout, 'reconfigure'):
    sys.stdout.reconfigure(encoding='utf-8', errors='replace')
if hasattr(sys.stderr, 'reconfigure'):
    sys.stderr.reconfigure(encoding='utf-8', errors='replace')

import os
import json
import time
import threading
import smtplib
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart
from datetime import datetime, timedelta
from zoneinfo import ZoneInfo
from concurrent.futures import ThreadPoolExecutor
from contextlib import asynccontextmanager

# Load .env (if present) before any os.environ reads below.
try:
    from dotenv import load_dotenv
    load_dotenv()
except ImportError:
    pass  # dotenv is optional — env vars still work without it

from fastapi import FastAPI, HTTPException
from fastapi.responses import HTMLResponse, FileResponse, JSONResponse
from fastapi.staticfiles import StaticFiles
from fastapi.middleware.cors import CORSMiddleware

# Import the model engine
from finance_model_v2 import (
    predict_ticker,
    predict_ticker_compare,
    daily_scan,
    daily_scan_both,
    get_all_symbols,
    get_strategy_mode,
    backtest_multi_strategy,
    SYMBOL_UNIVERSE,
    CACHE_DIR,
    ETF_TICKERS,
    HAS_LGBM,
    _ensure_cache_dir,
)

# Round 7c verification round 8: canonical (sector, industry_group, industry)
# classifier. Imported at module load — leaf module, stdlib-only, no SEC or
# screener dependency, so it's safe to use even if HAS_SCREENER is False.
from classifier import classify as _classify_symbol

# ─── Optional: fundamental screener (Good Firm Framework) ────────────────────
# Loads edgar_fetcher + fundamental_screener. Failure here does NOT affect the
# three existing tabs — endpoints below simply return 503 if unavailable.
try:
    # fundamental_screener import kept as a module-availability gate even
    # though Bucket 2 serves /api/screener from the CSV via verdict_provider.
    # run_full_screen stays importable for the leaders rebuild subprocess.
    from fundamental_screener import run_full_screen as _screener_run_full  # noqa: F401
    from edgar_fetcher import (
        fetch_all as _edgar_fetch_all,
        get_db as _edgar_get_db,
        load_tickers_from_csv as _edgar_load_tickers,
    )
    # Bucket 2 (2026-04-21): CSV-backed unified verdict reader. All three
    # tabs (Lookup, Daily Report, Leader Detector) flow through this module
    # so they can never disagree on what verdict/reason a symbol has.
    import verdict_provider as _verdict_provider
    HAS_SCREENER = True
except Exception as _screener_err:
    print(f"[Screener] Not available: {_screener_err}")
    HAS_SCREENER = False

# =============================================================================
# CONFIGURATION
# =============================================================================

PORT = 8000
HOST = "0.0.0.0"
FRONTEND_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "frontend")

# =============================================================================
# EMAIL ALERTS — configured via .env (see .env.example)
# =============================================================================
# To use Gmail: go to https://myaccount.google.com/apppasswords and generate
# an App Password (requires 2-Step Verification enabled). Put it in SMTP_PASSWORD
# in your local .env file — do NOT hardcode here.
SMTP_ENABLED  = os.environ.get("SMTP_ENABLED", "false").strip().lower() in ("1", "true", "yes", "on")
SMTP_SERVER   = os.environ.get("SMTP_SERVER", "smtp.gmail.com")
SMTP_PORT     = int(os.environ.get("SMTP_PORT", "587"))
SMTP_USER     = os.environ.get("SMTP_USER", "")
SMTP_PASSWORD = os.environ.get("SMTP_PASSWORD", "")
ALERT_TO      = [e.strip() for e in os.environ.get("ALERT_TO", "").split(",") if e.strip()]
ALERT_SUBJECT = os.environ.get("ALERT_SUBJECT", "Quantfolio Signal Brief")


_BUY_VALIDATING_KEYS_PRO  = ('pro_buyonly', 'pro_full')
_BUY_VALIDATING_KEYS_LITE = ('lite_buyonly', 'lite_full')
_SELL_GATE_KEYS           = ('buyhold', 'lite_buyonly', 'pro_buyonly')


def _row_signals(r):
    """Extract (lite_sig, pro_sig) from a daily-report row, defaulting to HOLD."""
    v2 = r.get('v2') or {}
    v3 = r.get('v3') or {}
    lite_sig = (v2.get('signal') or 'HOLD').upper()
    pro_sig  = (v3.get('signal') or 'HOLD').upper()
    return lite_sig, pro_sig


def _classify_alert(lite_sig, pro_sig, best_key):
    """Decide whether a row qualifies for a BUY or SELL email alert.

    Returns ('BUY'|'SELL', path_label) when the row qualifies, or
    (None, suppression_reason) otherwise. The path label / reason string is
    suitable for direct logging — see the [Alert] log lines emitted in
    ``_send_signal_alerts``. Pure function: no globals, no I/O — so this is
    table-tested in tests/unit/test_signal_alerts.py.

    Rules (Round 8b + Decision A — consensus SELL ungated):
      BUY fires when ANY of:
        (a) Both Lite and Pro signaled BUY (consensus).
        (b) Only Pro signaled BUY (Lite=HOLD) AND best ∈ {pro_buyonly, pro_full}.
        (c) Only Lite signaled BUY (Pro=HOLD) AND best ∈ {lite_buyonly, lite_full}.
      SELL fires when ANY of:
        (a) Both Lite and Pro signaled SELL (consensus) — fires regardless of
            best_strategy. Decision A: dual-model agreement is its own
            validation; backtest historical preference for buy-and-hold no
            longer suppresses a strong current consensus.
        (b) best == pro_full AND Pro=SELL (Lite=HOLD).
        (c) best == lite_full AND Lite=SELL (Pro=HOLD).
        Single-model SELL on a non-validating best_strategy (buyhold,
        lite_buyonly, pro_buyonly, mismatched full-signal model, or null)
        is still suppressed — only the consensus path lost its gate.
      Conflict (Lite/Pro disagree BUY vs SELL) → never fires.
      best_strategy null/missing → only path (a) on either side can fire.
    """
    if lite_sig == 'HOLD' and pro_sig == 'HOLD':
        return None, 'no model signals'
    if {lite_sig, pro_sig} == {'BUY', 'SELL'}:
        return None, f'model conflict (Lite={lite_sig} Pro={pro_sig})'

    # BUY paths
    if lite_sig == 'BUY' and pro_sig == 'BUY':
        return 'BUY', 'consensus (Lite+Pro)'
    if pro_sig == 'BUY' and lite_sig == 'HOLD':
        if best_key is None:
            return None, 'best_strategy null/missing'
        if best_key in _BUY_VALIDATING_KEYS_PRO:
            return 'BUY', f'pro-only validated_by={best_key}'
        return None, f"best_strategy={best_key} doesn't validate signal direction"
    if lite_sig == 'BUY' and pro_sig == 'HOLD':
        if best_key is None:
            return None, 'best_strategy null/missing'
        if best_key in _BUY_VALIDATING_KEYS_LITE:
            return 'BUY', f'lite-only validated_by={best_key}'
        return None, f"best_strategy={best_key} doesn't validate signal direction"

    # SELL paths
    # Decision A: consensus SELL (both models) bypasses the hard gate. The
    # dual-model agreement is itself the validation — we no longer suppress
    # a strong present-tape consensus on backtest historical preference.
    if lite_sig == 'SELL' and pro_sig == 'SELL':
        return 'SELL', 'consensus (Lite+Pro)'
    if pro_sig == 'SELL' or lite_sig == 'SELL':
        # Single-model SELL: backtest evidence still gates. The "SELL gate
        # active" reason now only reaches single-model rows where the best
        # strategy never sells (buyhold / *_buyonly) — consensus SELLs above
        # already returned. Mismatched-full-signal and null-best fall through
        # to the generic "doesn't validate signal direction" / "null/missing"
        # branches for log clarity.
        if best_key in _SELL_GATE_KEYS:
            return None, f'SELL gate active (best_strategy={best_key})'
        if best_key == 'pro_full' and pro_sig == 'SELL':
            return 'SELL', 'pro-full-signal'
        if best_key == 'lite_full' and lite_sig == 'SELL':
            return 'SELL', 'lite-full-signal'
        if best_key is None:
            return None, 'best_strategy null/missing'
        return None, f"best_strategy={best_key} doesn't validate signal direction"

    return None, 'no model signals'


def _classify_report_alerts(report, *, log_prefix="[Alert]"):
    """Run the alert classifier over every row in a daily report.

    Returns ``(buys, sells)`` lists of report rows that qualify under
    ``_classify_alert``. ``log_prefix`` lets callers distinguish scheduled
    vs manual triggers in the log stream — the per-row "BUY path=...",
    "SELL path=...", and "suppressed:" lines all carry the same prefix.

    Pure over the report payload + the best-strategy lookup; safe to call
    from both the scheduled hot path and the manual-trigger HTTP handler.
    Per PATTERNS.md P-4 there is exactly ONE rule path — both callers
    flow through ``_classify_alert`` here, never a parallel implementation.
    """
    if not report or 'data' not in report:
        return [], []
    rows = report['data'] or []
    best_map = _get_best_strategy_map()
    buys, sells = [], []
    for r in rows:
        sym = r.get('symbol', '?')
        lite_sig, pro_sig = _row_signals(r)
        bs = best_map.get(sym)
        best_key = bs.get('key') if bs else None
        verdict, reason = _classify_alert(lite_sig, pro_sig, best_key)
        if verdict == 'BUY':
            print(f"{log_prefix} {sym} BUY: path={reason}")
            buys.append(r)
        elif verdict == 'SELL':
            print(f"{log_prefix} {sym} SELL: path={reason}")
            sells.append(r)
        else:
            print(f"{log_prefix} {sym} suppressed: {reason}")
    return buys, sells


def _render_alert_email(buys, sells, report):
    """Build (subject, text_body, html_body) for the signal-brief email.

    Same rendering both the scheduled and manual paths use — keeps the
    HTML/plain-text parity invariant from Round 8b in one place.
    """
    date_str = datetime.now().strftime('%B %d, %Y')
    best_map = _get_best_strategy_map()

    # Round 8b: peer median SVR per row, sourced from the screener CSV via
    # verdict_provider. Already float-coerced; None for ETFs / tickers
    # without an industry_group bucket. Loaded once outside the row loop —
    # load_screener_index is mtime-cached but we still avoid the per-row
    # function call in the email-rendering hot path.
    screener_idx = {}
    if HAS_SCREENER:
        try:
            screener_idx = _verdict_provider.load_screener_index()
        except Exception as exc:
            print(f"[Alert] Peer median SVR lookup unavailable: {exc}")

    def _best_str(sym):
        b = best_map.get(sym)
        return b['name'] if b else ''

    def _svr_str(a):
        svr = a.get('svr')
        return f"{svr:.1f}x" if svr is not None else ''

    def _peer_svr_str(sym):
        row = screener_idx.get(sym)
        if not row:
            return ''
        psvr = row.get('peer_median_svr')
        return f"{psvr:.1f}x" if psvr is not None else ''

    # Plain text version
    lines = [f"Quantfolio Signal Brief — {date_str}", "=" * 50, ""]
    if buys:
        lines.append(f"BUY SIGNALS ({len(buys)}):")
        lines.append("-" * 60)
        for a in buys:
            v2c = a['v2']['pct_change'] if a.get('v2') else 0
            v3c = a['v3']['pct_change'] if a.get('v3') else 0
            bs = _best_str(a['symbol'])
            sv = _svr_str(a)
            psv = _peer_svr_str(a['symbol'])
            lines.append(f"  {a['symbol']:<6}  Price: ${a['current_price']:<10}  "
                         f"Lite: {v2c:+.2f}%  Pro: {v3c:+.2f}%"
                         f"{'  Best: ' + bs if bs else ''}"
                         f"{'  SVR: ' + sv if sv else ''}"
                         f"{'  Peer SVR: ' + psv if psv else ''}")
        lines.append("")
    if sells:
        lines.append(f"SELL SIGNALS ({len(sells)}):")
        lines.append("-" * 60)
        for a in sells:
            v2c = a['v2']['pct_change'] if a.get('v2') else 0
            v3c = a['v3']['pct_change'] if a.get('v3') else 0
            bs = _best_str(a['symbol'])
            sv = _svr_str(a)
            psv = _peer_svr_str(a['symbol'])
            lines.append(f"  {a['symbol']:<6}  Price: ${a['current_price']:<10}  "
                         f"Lite: {v2c:+.2f}%  Pro: {v3c:+.2f}%"
                         f"{'  Best: ' + bs if bs else ''}"
                         f"{'  SVR: ' + sv if sv else ''}"
                         f"{'  Peer SVR: ' + psv if psv else ''}")
        lines.append("")
    lines.append(f"Total scanned: {report['summary']['total_symbols']} symbols")
    lines.append(f"Market sentiment: {report['summary'].get('market_sentiment', 'N/A')}")
    lines.append("")
    lines.append("— Quantfolio (auto-generated, do not reply)")
    text_body = "\n".join(lines)

    # HTML version (nicer in most email clients)
    def _row(a, color):
        v2c = a['v2']['pct_change'] if a.get('v2') else 0
        v3c = a['v3']['pct_change'] if a.get('v3') else 0
        bs = _best_str(a['symbol'])
        sv = _svr_str(a)
        psv = _peer_svr_str(a['symbol'])
        return (f'<tr><td style="padding:6px 12px;font-weight:700">{a["symbol"]}</td>'
                f'<td style="padding:6px 12px">${a["current_price"]}</td>'
                f'<td style="padding:6px 12px;color:{color};font-weight:700">'
                f'{a["consensus_signal"]}</td>'
                f'<td style="padding:6px 12px">{v2c:+.2f}%</td>'
                f'<td style="padding:6px 12px">{v3c:+.2f}%</td>'
                f'<td style="padding:6px 12px;font-size:12px">{bs or "—"}</td>'
                f'<td style="padding:6px 12px;font-size:12px">{sv or "—"}</td>'
                f'<td style="padding:6px 12px;font-size:12px">{psv or "—"}</td></tr>')

    rows_html = ""
    for a in buys:
        rows_html += _row(a, "#22c55e")
    for a in sells:
        rows_html += _row(a, "#ef4444")

    html_body = f"""
    <div style="font-family:Arial,sans-serif;max-width:700px;margin:0 auto;color:#1e293b">
      <h2 style="color:#0f172a;border-bottom:2px solid #2d8b8b;padding-bottom:8px">
        Quantfolio Signal Brief — {date_str}
      </h2>
      <p style="color:#475569;font-size:14px">
        <strong>{len(buys)}</strong> BUY and <strong>{len(sells)}</strong> SELL
        high-conviction signals.
      </p>
      <table style="border-collapse:collapse;width:100%;font-size:14px;margin:16px 0">
        <tr style="background:#f1f5f9;font-weight:600;font-size:12px;text-transform:uppercase;color:#64748b">
          <th style="padding:8px 12px;text-align:left">Symbol</th>
          <th style="padding:8px 12px;text-align:left">Price</th>
          <th style="padding:8px 12px;text-align:left">Signal</th>
          <th style="padding:8px 12px;text-align:left">Lite</th>
          <th style="padding:8px 12px;text-align:left">Pro</th>
          <th style="padding:8px 12px;text-align:left">Best Strategy</th>
          <th style="padding:8px 12px;text-align:left">SVR</th>
          <th style="padding:8px 12px;text-align:left">Peer SVR</th>
        </tr>
        {rows_html}
      </table>
      <p style="color:#94a3b8;font-size:12px;margin-top:24px">
        Scanned {report['summary']['total_symbols']} symbols &bull;
        Sentiment: {report['summary'].get('market_sentiment', 'N/A')} &bull;
        Auto-generated by Quantfolio
      </p>
    </div>"""

    subject = f"{ALERT_SUBJECT} — {len(buys)} BUY, {len(sells)} SELL ({date_str})"
    return subject, text_body, html_body


def _send_alert_email(subject, text_body, html_body):
    """Send the rendered email via SMTP. Raises RuntimeError on bad config
    (no recipients) and propagates ``smtplib`` / ``OSError`` exceptions on
    transport failure so callers can surface a structured error to the UI.
    """
    if not ALERT_TO:
        raise RuntimeError("no recipients configured")
    msg = MIMEMultipart("alternative")
    msg["Subject"] = subject
    msg["From"]    = SMTP_USER
    msg["To"]      = ", ".join(ALERT_TO)
    msg.attach(MIMEText(text_body, "plain"))
    msg.attach(MIMEText(html_body, "html"))
    with smtplib.SMTP(SMTP_SERVER, SMTP_PORT) as server:
        server.starttls()
        server.login(SMTP_USER, SMTP_PASSWORD)
        server.sendmail(SMTP_USER, ALERT_TO, msg.as_string())


def _send_signal_alerts(report):
    """After a dual report completes, email any backtest-validated BUY or
    SELL signals. Scheduled-trigger wrapper — never raises (legacy contract:
    a transport failure must not crash the daily-scan thread). The manual-
    trigger HTTP handler uses the same classifier + renderer + sender but
    surfaces errors to the caller.
    """
    if not SMTP_ENABLED:
        return
    buys, sells = _classify_report_alerts(report, log_prefix="[Alert]")
    if not buys and not sells:
        print("[Alert] No backtest-validated signals today — no email sent.")
        return
    subject, text_body, html_body = _render_alert_email(buys, sells, report)
    try:
        _send_alert_email(subject, text_body, html_body)
        print(f"[Alert] Email sent to {', '.join(ALERT_TO)} — {len(buys)} BUY, {len(sells)} SELL.")
    except Exception as e:
        print(f"[Alert] Email failed: {e}")


# Daily movers cache (in-memory, refreshed on schedule + on-demand)
_movers_cache = {
    "data": [],
    "generated_at": None,
    "is_running": False,
}
_movers_lock = threading.Lock()


# =============================================================================
# SCHEDULED DAILY SCAN  (4:30 PM EST each weekday)
# =============================================================================

def _run_daily_scan(version=None):
    """Background task: run the full daily scan and cache results."""
    with _movers_lock:
        if _movers_cache["is_running"]:
            return
        _movers_cache["is_running"] = True

    ver_label = (version or 'auto').upper()
    print(f"\n[{datetime.now():%Y-%m-%d %H:%M}] Starting daily scan ({ver_label})…")
    try:
        from finance_model_v2 import get_all_symbols as _get_syms
        symbols = _get_syms()
        results = []
        for i, sym in enumerate(symbols):
            try:
                r = predict_ticker(sym, cache_dir=CACHE_DIR, verbose=False, version=version)
                if "error" not in r:
                    results.append(r)
            except Exception as e:
                print(f"  [scan] {sym}: {e}")
            if (i + 1) % 10 == 0:
                print(f"  [{i+1}/{len(symbols)}] scanned…")

        if results:
            import pandas as pd
            df = pd.DataFrame(results).sort_values('pct_change', ascending=False).reset_index(drop=True)
            records = json.loads(df.to_json(orient='records'))

            # Save CSV and JSON files with model version in filename
            ver_tag = version or 'auto'
            timestamp = datetime.now().strftime('%Y%m%d_%H%M')
            csv_path = os.path.join(CACHE_DIR, f"daily_scan_{ver_tag}_{timestamp}.csv")
            json_path = os.path.join(CACHE_DIR, f"daily_scan_{ver_tag}_{timestamp}.json")
            _ensure_cache_dir(CACHE_DIR)
            df.to_csv(csv_path, index=False)
            df.to_json(json_path, orient='records', indent=2)
            print(f"  Saved: {csv_path}")

            with _movers_lock:
                _movers_cache["data"] = records
                _movers_cache["generated_at"] = datetime.now().isoformat()
                _movers_cache["model_version"] = ver_tag
            print(f"[{datetime.now():%Y-%m-%d %H:%M}] Scan complete — {len(records)} symbols ({ver_label}).\n")
    except Exception as exc:
        print(f"[SCAN ERROR] {exc}")
    finally:
        with _movers_lock:
            _movers_cache["is_running"] = False


def _start_scheduler():
    """APScheduler: auto-run dual-model daily report after market close,
    plus quarterly Leader Detector rebuild after 10-Q filing season."""
    try:
        from apscheduler.schedulers.background import BackgroundScheduler
        scheduler = BackgroundScheduler()
        # Full Lite+Pro report at 4:05 PM EST (Yahoo Finance updates ~4:01 PM).
        # misfire_grace_time=14400 (4h): laptop-host scheduler can sleep/lock
        # right through 16:05; APScheduler's 1-second default silently dropped
        # every fire after April 2026 (server.err.log: "Run time of job
        # _run_dual_report was missed by 0:40:28"). 4h covers a typical
        # afternoon away from the desk; max_instances=1 stops overlap if a
        # prior late-fire is still scanning. Band-Aid until cron-off-laptop.
        scheduler.add_job(
            _run_dual_report,
            'cron',
            day_of_week='mon-fri',
            hour=16, minute=5,
            timezone='US/Eastern',
            id='daily_dual_report',
            replace_existing=True,
            misfire_grace_time=14400,
            max_instances=1,
        )
        # Quarterly Leader Detector rebuild (Phase 1.0 → 1.4): Feb/May/Aug/Nov
        # 15 at 2 AM EST. These dates fall ~2 weeks after the typical 10-Q
        # filing deadline (40 days after quarter-end), so SEC XBRL data for
        # the most-recent quarter is available. Cold run ~3.5h; warm reruns
        # (checkpointed + 90-day XBRL TTL) ~10 min. max_instances=1 prevents
        # overlap if a prior run is still going. misfire_grace_time=14400
        # mirrors the daily job — laptop sleep at 2 AM EST is the rule, not
        # the exception.
        scheduler.add_job(
            _leaders_rebuild_worker,
            'cron',
            month='2,5,8,11', day=15,
            hour=2, minute=0,
            timezone='US/Eastern',
            id='quarterly_leader_rebuild',
            replace_existing=True,
            misfire_grace_time=14400,
            max_instances=1,
        )
        # Round 8d: biweekly backtest refresh. Cron fires every Friday 9 PM ET;
        # the wrapper applies a parity check against BACKTEST_REFRESH_REFERENCE_WEEK
        # so only every-other-Friday actually runs. misfire_grace_time=3600
        # tolerates short server-restart windows; max_instances=1 prevents
        # overlap if a prior run is still processing.
        scheduler.add_job(
            _biweekly_backtest_refresh_job,
            'cron',
            day_of_week='fri',
            hour=21, minute=0,
            timezone='America/New_York',
            id='biweekly_backtest_refresh',
            replace_existing=True,
            misfire_grace_time=3600,
            max_instances=1,
        )
        scheduler.start()
        print("[Scheduler] Daily Lite+Pro report → 4:05 PM EST, Mon–Fri (auto after market close).")
        print("[Scheduler] Quarterly Leader Detector rebuild → Feb/May/Aug/Nov 15 at 2 AM EST.")
        print("[Scheduler] Biweekly backtest refresh → 9 PM ET, every other Friday.")
        # Startup probe: log the next 3 fire times for each registered job
        # so a server restart leaves an audit trail of "when did this think
        # it was going to fire next." Pairs with the Round 8b/post-misfire
        # fix — the empirical evidence for the dropped-fires bug came from
        # comparing the saved daily_scan_*.json mtimes against the expected
        # 16:05 ET cadence; this probe makes that comparison observable
        # without grepping the cache directory.
        try:
            now = datetime.now(ZoneInfo("America/New_York"))
            for job in scheduler.get_jobs():
                fires = []
                prev = None
                for _ in range(3):
                    nxt = job.trigger.get_next_fire_time(prev, now if prev is None else prev)
                    if nxt is None:
                        break
                    fires.append(nxt)
                    prev = nxt
                fires_str = ", ".join(f.strftime("%Y-%m-%d %H:%M %Z") for f in fires) if fires else "(none)"
                print(f"[Scheduler] {job.id} next fires: {fires_str}")
        except Exception as exc:
            print(f"[Scheduler] next-fires probe failed: {exc}")
    except ImportError:
        print("[Scheduler] apscheduler not installed — no auto-scheduling.")
        print("  Install: pip install apscheduler")
        print("  Manual trigger: GET /api/report?refresh=true  (daily report)")
        print("  Manual trigger: POST /api/leaders/rebuild     (leader rebuild)")


def _load_latest_scan_from_disk():
    """On startup, load the most recent daily_scan_*.json from cache."""
    try:
        scan_files = sorted([
            f for f in os.listdir(CACHE_DIR)
            if f.startswith("daily_scan_") and f.endswith(".json")
        ])
        if scan_files:
            latest = os.path.join(CACHE_DIR, scan_files[-1])
            with open(latest) as f:
                data = json.load(f)
            with _movers_lock:
                _movers_cache["data"] = data
                _movers_cache["generated_at"] = scan_files[-1].replace(
                    "daily_scan_", "").replace(".json", "")
            print(f"[Startup] Loaded {len(data)} symbols from {scan_files[-1]}")
    except Exception as exc:
        print(f"[Startup] No cached scan: {exc}")


# =============================================================================
# FASTAPI APP
# =============================================================================

@asynccontextmanager
async def lifespan(app: FastAPI):
    _ensure_cache_dir(CACHE_DIR)
    _start_scheduler()
    _load_latest_scan_from_disk()
    # Round 8b: surface backtest-library coverage so the alert engine's
    # validation step is observable. Single-model BUY/SELL paths require a
    # populated best_strategy entry — coverage gaps explain "expected fire,
    # nothing happened" cases without needing to grep the per-ticker logs.
    try:
        n_strats = len(_get_best_strategy_map())
        n_total = len(get_all_symbols())
        pct = (n_strats * 100 // n_total) if n_total else 0
        print(f"[Scheduler] best_strategy_map populated: {n_strats} of {n_total} tickers ({pct}%)")
    except Exception as exc:
        print(f"[Scheduler] best_strategy_map probe failed: {exc}")
    # Bucket 2: warn once if screener_results.csv predates the tests_json /
    # dealbreakers_json columns. Non-fatal — verdict_provider handles the
    # missing columns by rendering dashes in the test-dot row / flag chips.
    if HAS_SCREENER:
        try:
            ok, missing = _verdict_provider.csv_has_required_columns()
            if not ok:
                print(
                    "[startup] screener_results.csv missing "
                    + "/".join(missing)
                    + " columns — test-dot row and flag chips will render "
                    "as dashes until next screener run. Regenerate with: "
                    "python fundamental_screener.py --universe "
                    "universe_prescreened.csv --csv-out screener_results.csv"
                )
        except Exception as exc:  # never block startup on this
            print(f"[startup] verdict_provider column check failed: {exc}")
    yield


app = FastAPI(
    title="Quantfolio",
    version="2.0",
    lifespan=lifespan,
)

app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_methods=["*"],
    allow_headers=["*"],
)


# ─── API Routes ───


def _inject_classifier_fields(result: dict, symbol: str) -> dict:
    """Round 7c verification round 8 — overlay classifier-derived
    (sector, industry_group, industry) onto a /api/predict[-compare] result
    so Ticker Lookup shows the same canonical labels as Leader Detector.

    Resolution order:
      1. Look up SIC from the screener CSV via verdict_provider (cheap —
         mtime-keyed in-process cache; only re-reads CSV on file change).
      2. Call classifier.classify(symbol, sic, yahoo_industry).
      3. If classifier returns a non-Unknown sector (i.e. ticker is in
         TICKER_OVERRIDES OR the SIC matched a range), OVERWRITE result's
         sector + industry, and set industry_group. Override tickers are
         keyed on the symbol so they win regardless of SIC.
      4. If classifier returns Unknown (ETFs, off-list stocks with no SEC
         filings), KEEP Yahoo's sector/industry — wright's review:
         "'Unknown' on the verdict card for an ETF is a worse UX
         regression than showing Yahoo's slightly-different taxonomy."
         Leave industry_group absent so the frontend renders an em-dash.

    No changes to finance_model_v2.py per the round constraint — the model
    result is mutated only here in the API layer.
    """
    sic = None
    if HAS_SCREENER:
        try:
            row = _verdict_provider.load_screener_index().get(symbol.upper())
            if row:
                sic = row.get("sic")
                # Round 7c-2: piggyback the screener-row lookup to surface
                # pe_trailing on Ticker Lookup's P/E card. verdict_provider's
                # _FLOAT_COLS whitelist doesn't include pe_trailing, so the
                # row value may be a raw CSV string — coerce to float here.
                pe_raw = row.get("pe_trailing")
                if pe_raw not in (None, ""):
                    try:
                        result["pe_trailing"] = float(pe_raw)
                    except (TypeError, ValueError):
                        result["pe_trailing"] = None
                # Round 8a Phase 3: surface peer_median_svr on compare results
                # so the live SVR card on Ticker Lookup can annotate "peer 13.3x"
                # alongside the live yfinance value. The verdict-card SVR row
                # was removed in the same change to eliminate the dual-source
                # divergence (live yfinance vs CSV-frozen). verdict_provider
                # whitelists peer_median_svr, so the row value is already
                # float-typed when present; coerce defensively for the cached
                # path where the row dict may still hold a CSV string.
                psvr_raw = row.get("peer_median_svr")
                if psvr_raw not in (None, ""):
                    try:
                        result["peer_median_svr"] = float(psvr_raw)
                    except (TypeError, ValueError):
                        result["peer_median_svr"] = None
        except Exception:
            pass
    sec, ig, ind = _classify_symbol(symbol, sic, result.get("industry"))
    if sec != "Unknown":
        result["sector"] = sec
        result["industry_group"] = ig
        result["industry"] = ind
    return result


@app.get("/api/predict/{symbol}")
async def api_predict(symbol: str, version: str = None, strategy: str = None,
                      weight_rf: float = 0.8, weight_xgb: float = 0.2,
                      rolling_window: int = None):
    """
    Full ensemble prediction for a single ticker.
    Query params:
      ?version=v3              — Pro (stacking) or Lite (RF+XGB)
      ?strategy=auto           — auto (ETF→full, stock→buy_only), full, buy_only
      ?weight_rf=0.8&weight_xgb=0.2  — Lite model weights
      ?rolling_window=504             — training window (omit or 0 for all data)
    """
    symbol = symbol.upper().strip()
    if not symbol.isalnum() or len(symbol) > 6:
        raise HTTPException(400, "Invalid ticker symbol")
    rw = rolling_window if rolling_window and rolling_window > 0 else None
    total_w = weight_rf + weight_xgb
    if total_w > 0:
        weight_rf /= total_w
        weight_xgb /= total_w
    # Validate version and strategy
    ver = version if version in ('v2', 'v3') else None
    strat = strategy if strategy in ('auto', 'full', 'buy_only') else 'auto'
    try:
        result = predict_ticker(symbol, cache_dir=CACHE_DIR, verbose=False,
                                version=ver, strategy=strat,
                                weight_rf=weight_rf, weight_xgb=weight_xgb,
                                rolling_window=rw)
    except Exception as exc:
        raise HTTPException(500, f"Prediction failed: {exc}")
    if "error" in result:
        raise HTTPException(404, result["error"])
    # Enrich with best backtest strategy if available
    best_map = _get_best_strategy_map()
    result['best_strategy'] = best_map.get(symbol)
    _inject_classifier_fields(result, symbol)
    return JSONResponse(result)


def _is_cached_report_acceptable(report_timestamp: datetime, now: datetime | None = None) -> bool:
    """Return True if the cached report is the freshest available.

    Daily Report scheduler runs Mon-Fri at 4:05pm EST. A cached report is
    acceptable when no scheduled run has occurred between the cache timestamp
    and now — so Friday's report stays valid through Monday 4:05pm.
    """
    if now is None:
        now = datetime.now(tz=ZoneInfo("America/New_York"))

    age = now - report_timestamp
    # Clock-skew guard: a future-dated cache (NTP skew, manual clock change)
    # would otherwise short-circuit to "fresh forever". Reject and let the
    # next scheduled run rebuild it.
    if age.total_seconds() < 0:
        return False
    # Within 22h is always fresh — short-circuit to skip the schedule walk.
    if age.total_seconds() < 22 * 3600:
        return True
    return not _scheduled_run_occurred_between(report_timestamp, now)


def _scheduled_run_occurred_between(start: datetime, end: datetime) -> bool:
    """Did a Mon-Fri 4:05pm EST scheduled Daily Report run occur in (start, end]?"""
    if end <= start:
        return False
    cursor = start.replace(hour=16, minute=5, second=0, microsecond=0)
    if cursor <= start:
        cursor = cursor + timedelta(days=1)
    while cursor <= end:
        if cursor.weekday() < 5:  # Mon-Fri
            return True
        cursor = cursor + timedelta(days=1)
    return False


def _get_cached_compare_result(symbol):
    """
    Fast-path: return the same-day daily-report entry for this symbol if available.
    Returns a dict copy (with `cached_from_report=True` and `cached_at` metadata)
    or None if the report is stale / missing / doesn't contain the symbol.

    The daily report runs at 4:05 PM EST and stores full `predict_ticker_compare`
    results, so we can serve them instantly instead of rebuilding both models.
    Cache is valid until the next scheduled 4:05pm EST run (weekend-aware).
    """
    with _report_lock:
        gen_at = _report_cache.get("generated_at")
        data = _report_cache.get("data")

    # Cold-start fallback: in-memory cache is empty, but a valid report may exist
    # on disk. _load_latest_report_from_disk() populates _report_cache as a
    # side-effect under its own lock; re-snapshot after.
    if not gen_at or not data:
        _load_latest_report_from_disk()
        with _report_lock:
            gen_at = _report_cache.get("generated_at")
            data = _report_cache.get("data")
        if not gen_at or not data:
            return None

    try:
        gen_dt = datetime.fromisoformat(gen_at)
    except Exception:
        return None
    # Weekend-aware freshness: cache is valid until the next scheduled
    # 4:05pm EST run.
    if not _is_cached_report_acceptable(gen_dt):
        return None

    entries = data.get('data', []) if isinstance(data, dict) else data
    for entry in entries:
        if entry.get('symbol') == symbol:
            hit = dict(entry)
            hit['cached_from_report'] = True
            hit['cached_at'] = gen_at
            return hit
    return None


@app.get("/api/predict-compare/{symbol}")
async def api_predict_compare(symbol: str, strategy: str = None, refresh: bool = False):
    """
    Run BOTH Lite and Pro models on a single ticker.
    Returns side-by-side predictions with consensus signal.
    Query params:
      ?strategy=auto    — auto (ETF→full, stock→buy_only), full, buy_only
      ?refresh=true     — bypass same-day report cache, run fresh prediction
    """
    symbol = symbol.upper().strip()
    if not symbol.isalnum() or len(symbol) > 6:
        raise HTTPException(400, "Invalid ticker symbol")

    # Fast-path: if today's daily report already contains this symbol, serve it instantly.
    # Only applies to the default auto strategy — explicit overrides need fresh compute
    # since the report was generated with auto strategy selection per symbol.
    if not refresh and (strategy is None or strategy == 'auto'):
        cached = _get_cached_compare_result(symbol)
        if cached is not None:
            best_map = _get_best_strategy_map()
            cached['best_strategy'] = best_map.get(symbol)
            _inject_classifier_fields(cached, symbol)
            return JSONResponse(cached)

    strat = strategy if strategy in ('auto', 'full', 'buy_only') else 'auto'
    try:
        result = predict_ticker_compare(symbol, cache_dir=CACHE_DIR, verbose=False, strategy=strat)
    except Exception as exc:
        raise HTTPException(500, f"Comparison failed: {exc}")
    if "error" in result:
        raise HTTPException(404, result["error"])
    # Enrich with best backtest strategy if available
    best_map = _get_best_strategy_map()
    result['best_strategy'] = best_map.get(symbol)
    _inject_classifier_fields(result, symbol)
    return JSONResponse(result)


# ─── Dual-model report cache ───
_report_cache = {
    "data": None,
    "generated_at": None,
    "is_running": False,
}
_report_lock = threading.Lock()


def _run_dual_report():
    """Background task: run dual-model scan and cache results."""
    with _report_lock:
        if _report_cache["is_running"]:
            return
        _report_cache["is_running"] = True

    print(f"\n[{datetime.now():%Y-%m-%d %H:%M}] Starting dual-model report…")
    try:
        report = daily_scan_both(cache_dir=CACHE_DIR)
        if report:
            with _report_lock:
                _report_cache["data"] = report
                _report_cache["generated_at"] = datetime.now(ZoneInfo("America/New_York")).isoformat()
            print(f"[{datetime.now():%Y-%m-%d %H:%M}] Dual report complete — {report['summary']['total_symbols']} symbols.\n")
            # Send email alert if any high-confidence signals found
            _send_signal_alerts(report)
    except Exception as exc:
        print(f"[DUAL REPORT ERROR] {exc}")
    finally:
        with _report_lock:
            _report_cache["is_running"] = False


@app.get("/api/report")
async def api_report(refresh: bool = False):
    """
    Dual-model daily report.
    GET /api/report               — return cached report
    GET /api/report?refresh=true  — trigger fresh dual-model scan
    """
    if refresh:
        with _report_lock:
            is_running = _report_cache["is_running"]
            snap_data = _report_cache["data"]
            snap_at = _report_cache["generated_at"]
        if not is_running:
            thread = threading.Thread(target=_run_dual_report, daemon=True)
            thread.start()
            return JSONResponse({
                "status": "scan_started",
                # C-11: banner band — keep in sync with DAILY_REPORT_EST in
                # frontend/index.html and USER_GUIDE.md Parts 4 & 11.
                "message": "Dual-model report started. This may take 25-55 minutes.",
                "data": snap_data,
                "generated_at": snap_at,
            })
        else:
            return JSONResponse({
                "status": "scan_in_progress",
                "message": "Dual-model scan already running.",
                "data": snap_data,
                "generated_at": snap_at,
            })

    # Try loading from disk if cache is empty (call OUTSIDE lock — it acquires its own)
    if _report_cache["data"] is None:
        _load_latest_report_from_disk()

    with _report_lock:
        snap_data = _report_cache["data"]
        snap_at = _report_cache["generated_at"]

    best_strats = _get_best_strategy_map()
    return JSONResponse({
        "status": "ok",
        "data": snap_data,
        "generated_at": snap_at,
        "best_strategies": best_strats,
    })


def _load_latest_report_from_disk():
    """On startup/first request, load the most recent dual_report_*.json."""
    try:
        report_files = sorted([
            f for f in os.listdir(CACHE_DIR)
            if f.startswith("dual_report_") and f.endswith(".json")
        ])
        if report_files:
            latest = os.path.join(CACHE_DIR, report_files[-1])
            with open(latest) as f:
                data = json.load(f)
            # C-12: derive a parseable ISO timestamp. Prefer the embedded summary
            # value; fall back to the file's mtime (NOT the filename string,
            # which was 'dual_report_YYYYMMDD_HHMM.json' and broke Date parsing
            # on the frontend → "Invalid Date").
            gen_at = data.get('summary', {}).get('generated_at')
            # Back-compat: pre-Round 8a reports stored naive ISO timestamps.
            # Localize any naive value to America/New_York so the freshness
            # check works on non-EST machines.
            if gen_at:
                try:
                    parsed = datetime.fromisoformat(gen_at)
                    if parsed.tzinfo is None:
                        gen_at = parsed.replace(tzinfo=ZoneInfo("America/New_York")).isoformat()
                except (ValueError, TypeError):
                    pass
            if not gen_at:
                try:
                    gen_at = datetime.fromtimestamp(
                        os.path.getmtime(latest), tz=ZoneInfo("America/New_York")
                    ).isoformat()
                except Exception:
                    gen_at = None
            with _report_lock:
                _report_cache["data"] = data
                _report_cache["generated_at"] = gen_at
            print(f"[Startup] Loaded dual report from {report_files[-1]}")
    except Exception as exc:
        print(f"[Startup] No cached dual report: {exc}")


@app.post("/api/alerts/send-manual")
async def api_alerts_send_manual():
    """Round 8c: re-send the signal-brief email from the current on-disk
    report. Does NOT regenerate the report — purely uses the cached payload
    (which was either freshly produced by the 4:05 PM EST scheduled run or
    loaded from disk on startup).

    Reuses ``_classify_report_alerts`` + ``_render_alert_email`` +
    ``_send_alert_email`` so the manual path goes through the SAME rule and
    rendering code as the scheduled trigger (PATTERNS.md P-4 — one rule
    path, no parallel implementation).

    Returns ``{success, recipients_sent, alert_count: {buy, sell}, error?}``.
    """
    # Lazy-load from disk if cold start hasn't populated the cache yet —