Quick Start¶
Installation¶
pip install helix-ir
# Optional extras
pip install helix-ir[mongo] # MongoDB source
pip install helix-ir[postgres] # PostgreSQL source
pip install helix-ir[kafka] # Kafka source
Hello World: Infer → Normalize → DDL¶
from helix_ir import infer, normalize, compile_ddl
from helix_ir.sources import JsonLinesSource
# Point at a nested source
source = JsonLinesSource("orders.jsonl")
docs = source.stream()
# Infer a typed schema
schema = infer(docs, name="orders", sample_size=2000, detect_pii=True)
print(schema)
# Decompose into a relational plan
plan = normalize(schema, strategy="1nf")
print(f"{len(plan.tables)} tables, {len(plan.foreign_keys)} foreign keys")
# Compile to Postgres DDL
ddl = compile_ddl(plan, dialect="postgres")
print(ddl.to_sql())
Transformation Pipeline¶
from helix_ir import col
from helix_ir.sources import JsonLinesSource
from helix_ir.transform import Table
orders = Table.from_source(JsonLinesSource("orders.jsonl"))
items = Table.from_source(JsonLinesSource("items.jsonl"))
order_summaries = (
orders
.join(items, on="order_id", how="left")
.with_columns(
month = col("created_at").date_trunc("month"),
line_total = col("qty") * col("unit_price"),
)
.group_by("month")
.agg(revenue=col("line_total").sum(), orders=col("order_id").count())
.sort("month")
)
print(order_summaries.to_sql(dialect="postgres"))