"""Helix IR command-line interface."""
from __future__ import annotations
import json
import sys
from pathlib import Path
from typing import Optional
import typer
from rich.console import Console
from rich.table import Table
from rich.text import Text
app = typer.Typer(
name="helix-ir",
help="Helix IR — Schema inference, normalization, DDL generation.",
add_completion=False,
)
console = Console()
err_console = Console(stderr=True)
# -------------------------------------------------------------------------
# infer command
# -------------------------------------------------------------------------
[docs]
@app.command("infer")
def cmd_infer(
input_file: Path = typer.Argument(..., help="Path to JSON/NDJSON/Parquet file"),
name: str = typer.Option("inferred", "--name", "-n", help="Schema name"),
sample_size: int = typer.Option(2000, "--sample-size", "-s"),
seed: Optional[int] = typer.Option(None, "--seed"),
no_pii: bool = typer.Option(False, "--no-pii", help="Disable PII detection"),
locale: str = typer.Option("in", "--locale", "-l", help="PII locale: in/us/eu/all"),
output: Optional[Path] = typer.Option(None, "--output", "-o", help="Write schema JSON here"),
dialect: str = typer.Option("duckdb", "--dialect", "-d", help="DDL dialect for preview"),
) -> None:
"""Infer a schema from a JSON/NDJSON/Parquet file."""
from helix_ir.infer import infer
from helix_ir.sources.json_source import JSONSource
from helix_ir.sources.parquet_source import ParquetSource
ext = input_file.suffix.lower()
if ext == ".parquet":
source = ParquetSource(str(input_file))
else:
source = JSONSource(str(input_file))
console.print(f"[bold cyan]Inferring schema from[/] {input_file} ...")
try:
schema = infer(
source.read(),
name=name,
sample_size=sample_size,
seed=seed,
detect_pii=not no_pii,
pii_locale=locale,
)
except Exception as e:
err_console.print(f"[bold red]Error:[/] {e}")
raise typer.Exit(1)
_print_schema_table(schema)
if output:
schema_json = schema.to_json()
output.write_text(json.dumps(schema_json, indent=2))
console.print(f"\n[green]Schema written to[/] {output}")
# -------------------------------------------------------------------------
# ddl command
# -------------------------------------------------------------------------
[docs]
@app.command("ddl")
def cmd_ddl(
schema_file: Path = typer.Argument(..., help="Path to schema JSON file"),
dialect: str = typer.Option("duckdb", "--dialect", "-d"),
output: Optional[Path] = typer.Option(None, "--output", "-o"),
if_not_exists: bool = typer.Option(True, "--if-not-exists/--no-if-not-exists"),
) -> None:
"""Generate DDL SQL from a schema JSON file."""
from helix_ir.ddl import DDLOptions, compile_ddl
from helix_ir.schema.schema import Schema
try:
data = json.loads(schema_file.read_text())
schema = Schema.from_json(data)
except Exception as e:
err_console.print(f"[bold red]Error loading schema:[/] {e}")
raise typer.Exit(1)
opts = DDLOptions(if_not_exists=if_not_exists)
try:
script = compile_ddl(schema, dialect=dialect, options=opts)
except Exception as e:
err_console.print(f"[bold red]DDL error:[/] {e}")
raise typer.Exit(1)
sql = script.to_sql()
if output:
output.write_text(sql)
console.print(f"[green]DDL written to[/] {output}")
else:
console.print(sql)
# -------------------------------------------------------------------------
# normalize command
# -------------------------------------------------------------------------
[docs]
@app.command("normalize")
def cmd_normalize(
schema_file: Path = typer.Argument(..., help="Path to schema JSON file"),
strategy: str = typer.Option("1nf", "--strategy", "-s", help="1nf/mongo/inline_small"),
dialect: str = typer.Option("duckdb", "--dialect", "-d"),
output: Optional[Path] = typer.Option(None, "--output", "-o"),
) -> None:
"""Normalize a schema and generate multi-table DDL."""
from helix_ir.ddl import DDLOptions, compile_ddl
from helix_ir.normalize import normalize
from helix_ir.schema.schema import Schema
try:
data = json.loads(schema_file.read_text())
schema = Schema.from_json(data)
except Exception as e:
err_console.print(f"[bold red]Error loading schema:[/] {e}")
raise typer.Exit(1)
plan = normalize(schema, strategy=strategy)
console.print(f"[bold]Tables:[/] {', '.join(plan.table_names())}")
console.print(f"[bold]Foreign keys:[/] {len(plan.foreign_keys)}")
opts = DDLOptions()
try:
script = compile_ddl(plan, dialect=dialect, options=opts)
except Exception as e:
err_console.print(f"[bold red]DDL error:[/] {e}")
raise typer.Exit(1)
sql = script.to_sql()
if output:
output.write_text(sql)
console.print(f"[green]DDL written to[/] {output}")
else:
console.print(sql)
# -------------------------------------------------------------------------
# diff command
# -------------------------------------------------------------------------
[docs]
@app.command("diff")
def cmd_diff(
old_schema: Path = typer.Argument(..., help="Path to old schema JSON"),
new_schema: Path = typer.Argument(..., help="Path to new schema JSON"),
output: Optional[Path] = typer.Option(None, "--output", "-o"),
show_safe: bool = typer.Option(True, "--safe/--no-safe"),
) -> None:
"""Diff two schemas and display changes."""
from helix_ir.diff import diff
from helix_ir.schema.schema import Schema
try:
old = Schema.from_json(json.loads(old_schema.read_text()))
new = Schema.from_json(json.loads(new_schema.read_text()))
except Exception as e:
err_console.print(f"[bold red]Error loading schemas:[/] {e}")
raise typer.Exit(1)
schema_diff = diff(old, new)
if not schema_diff:
console.print("[green]No changes detected.[/]")
return
tbl = Table(title=f"Schema Diff: {old.name} → {new.name}")
tbl.add_column("Path", style="cyan")
tbl.add_column("Kind")
tbl.add_column("Severity")
tbl.add_column("Description")
severity_colors = {"safe": "green", "risky": "yellow", "breaking": "red"}
for change in schema_diff.changes:
if not show_safe and change.severity == "safe":
continue
color = severity_colors.get(change.severity, "white")
tbl.add_row(
str(change.path),
change.kind,
Text(change.severity.upper(), style=f"bold {color}"),
change.description,
)
console.print(tbl)
summary = schema_diff.summary()
console.print(
f"[green]{summary['safe']} safe[/], "
f"[yellow]{summary['risky']} risky[/], "
f"[red]{summary['breaking']} breaking[/]"
)
if schema_diff.has_breaking_changes:
raise typer.Exit(1)
# -------------------------------------------------------------------------
# lineage command
# -------------------------------------------------------------------------
[docs]
@app.command("lineage")
def cmd_lineage(
schema_file: Path = typer.Argument(..., help="Path to schema JSON"),
format: str = typer.Option("dot", "--format", "-f", help="dot/openlineage"),
output: Optional[Path] = typer.Option(None, "--output", "-o"),
strategy: str = typer.Option("1nf", "--strategy", "-s"),
) -> None:
"""Generate lineage graph from schema normalization."""
from helix_ir.normalize import normalize
from helix_ir.schema.schema import Schema
try:
data = json.loads(schema_file.read_text())
schema = Schema.from_json(data)
except Exception as e:
err_console.print(f"[bold red]Error:[/] {e}")
raise typer.Exit(1)
plan = normalize(schema, strategy=strategy)
if format == "dot":
result = plan.lineage.to_dot()
elif format == "openlineage":
result = json.dumps(plan.lineage.to_openlineage(), indent=2)
else:
err_console.print(f"[bold red]Unknown format:[/] {format}")
raise typer.Exit(1)
if output:
output.write_text(result)
console.print(f"[green]Lineage written to[/] {output}")
else:
console.print(result)
# -------------------------------------------------------------------------
# test command
# -------------------------------------------------------------------------
[docs]
@app.command("test")
def cmd_test(
schema_file: Path = typer.Argument(..., help="Path to schema JSON"),
output: Optional[Path] = typer.Option(None, "--output", "-o"),
sensitivity: float = typer.Option(1.5, "--sensitivity"),
) -> None:
"""Generate data quality tests from a schema."""
from helix_ir.schema.schema import Schema
from helix_ir.test.generator import generate_tests
try:
data = json.loads(schema_file.read_text())
schema = Schema.from_json(data)
except Exception as e:
err_console.print(f"[bold red]Error:[/] {e}")
raise typer.Exit(1)
tests = generate_tests(schema, sensitivity=sensitivity)
console.print(f"[bold]Generated {len(tests)} tests[/]")
tbl = Table(title="Generated Tests")
tbl.add_column("Name", style="cyan")
tbl.add_column("Kind")
tbl.add_column("Severity")
tbl.add_column("Description")
for test in tests:
tbl.add_row(test.name, test.kind, test.severity, test.description)
console.print(tbl)
if output:
test_data = [
{
"name": t.name,
"path": str(t.path),
"kind": t.kind,
"severity": t.severity,
"description": t.description,
"sql_template": t.sql_template,
"metadata": t.metadata,
}
for t in tests
]
output.write_text(json.dumps(test_data, indent=2))
console.print(f"[green]Tests written to[/] {output}")
# -------------------------------------------------------------------------
# Helpers
# -------------------------------------------------------------------------
def _print_schema_table(schema: "Schema") -> None:
"""Print a schema as a rich table."""
tbl = Table(title=f"Schema: {schema.name}")
tbl.add_column("Field", style="cyan")
tbl.add_column("Type")
tbl.add_column("Null%")
tbl.add_column("Semantic")
tbl.add_column("PII")
tbl.add_column("Cardinality")
for fname, ht in schema.fields:
tbl.add_row(
fname,
str(ht.arrow_type),
f"{ht.null_ratio:.1%}",
ht.semantic or "",
Text(ht.pii_class or "", style="bold red") if ht.pii_class else "",
str(ht.cardinality_estimate) if ht.cardinality_estimate else "",
)
console.print(tbl)
if __name__ == "__main__":
app()