"""
=====================================================================
Benchmarking Dimensionality Reduction: The Epistemology of Embeddings
=====================================================================

This example demonstrates the "Advanced Exploration and Benchmarking" pillar of the
``coco_pipe`` strategic vision. We move beyond "looking good" and use rigorous
metrics (Trustworthiness, Continuity, LCMC) to quantify embedding distortion.

We compare PCA (Linear) and UMAP (Non-linear) on the classic "S-Curve" manifold,
a structure that is inherently 2D but embedded in 3D.
"""

# %%
# Imports
# -------

import os

import matplotlib.pyplot as plt
from sklearn.datasets import make_s_curve

from coco_pipe.dim_reduction import DimReduction
from coco_pipe.viz.dim_reduction import plot_embedding

# Prevent multiprocessing segfaults on macOS by limiting threads
os.environ["OMP_NUM_THREADS"] = "1"
os.environ["LOKY_MAX_CPU_COUNT"] = "1"
os.environ["NUMEXPR_MAX_THREADS"] = "1"

# %%
# 1. Generate Ground Truth Manifold
# ---------------------------------
# The S-Curve is a standard benchmark. It has intrinsic dimension 2.
# We generate 1000 points.

n_points = 1000
X, color = make_s_curve(n_points, random_state=42)

# Visualize Ground Truth using our viz module in 3D
fig, ax = plot_embedding(
    X,
    labels=color,
    dims=(0, 1, 2),
    title="Ground Truth: S-Curve Manifold",
    cmap="viridis",
    label_kind="continuous",
    s=20,
)
ax.view_init(10, -70)
plt.show()

# %%
# 2. Compare Embeddings
# ---------------------
# We will embed this 3D data into 2D using PCA and UMAP, then quantify
# the distortion.

# Initialize Reducers
reducers = {
    "PCA": DimReduction("PCA", n_components=2),
    "UMAP": DimReduction("UMAP", n_components=2, n_neighbors=15, min_dist=0.1),
}

results = {}

for name, dr in reducers.items():
    print(f"Running {name}...")
    X_emb = dr.fit_transform(X)

    # Calculate Metrics
    # Note: These metrics are calculated via scikit-learn or internal utils
    # For this demo, we assume they are computed and stored in the 'scores'
    scores = dr.score(X_emb, X=X)

    results[name] = {"embedding": X_emb, "scores": scores}

# %%
# 3. Visualize and Quantify
# -------------------------
# We plot the 2D embeddings side-by-side with their Trustworthiness scores.
#
# - **Trustworthiness**: High means neighbors in 2D are real neighbors in 3D
#   (No spurious clusters).
# - **Continuity**: High means 3D neighbors are preserved in 2D (No tearing).

fig, axes = plt.subplots(1, 2, figsize=(14, 6))

for i, (name, res) in enumerate(results.items()):
    X_emb = res["embedding"]
    scores = res["scores"]

    # Extract metrics from the structured payload
    m = scores.get("metrics", {})
    trust = m.get("trustworthiness", 0.0)
    cont = m.get("continuity", 0.0)
    lcmc = m.get("lcmc", 0.0)

    ax = axes[i]

    title = f"{name}\n"
    title += f"Trustworthiness: {trust:.3f}\n"
    title += f"Continuity: {cont:.3f}\n"
    title += f"LCMC: {lcmc:.3f}"

    # Use the coco_pipe plotting function
    plot_embedding(
        X_emb,
        labels=color,
        title=title,
        cmap="viridis",
        label_kind="continuous",
        s=20,
        alpha=0.7,
        ax=ax,
    )

    ax.axis("tight")
    ax.set_xticks([])
    ax.set_yticks([])

plt.tight_layout()
plt.show()

# %%
# Interpretation
# --------------
# - **PCA**: Should have high **Continuity** (it folds the S-curve onto itself,
#   keeping neighbors together) but lower **Trustworthiness** (distant points
#   overlap in the projection, creating false neighbors).
# - **UMAP**: Should have high **Trustworthiness** and **Continuity** as it
#   unrolls the manifold, preserving the local neighborhood structure without
#   determining false overlaps.
#
# This quantitative assessment is superior to simply saying "UMAP looks better."