Quick Start

Installation

pip install helix-ir

# Optional extras
pip install helix-ir[mongo]     # MongoDB source
pip install helix-ir[postgres]  # PostgreSQL source
pip install helix-ir[kafka]     # Kafka source

Hello World: Infer → Normalize → DDL

from helix_ir import infer, normalize, compile_ddl
from helix_ir.sources import JsonLinesSource

# Point at a nested source
source = JsonLinesSource("orders.jsonl")
docs = source.stream()

# Infer a typed schema
schema = infer(docs, name="orders", sample_size=2000, detect_pii=True)
print(schema)

# Decompose into a relational plan
plan = normalize(schema, strategy="1nf")
print(f"{len(plan.tables)} tables, {len(plan.foreign_keys)} foreign keys")

# Compile to Postgres DDL
ddl = compile_ddl(plan, dialect="postgres")
print(ddl.to_sql())

Transformation Pipeline

from helix_ir import col
from helix_ir.sources import JsonLinesSource
from helix_ir.transform import Table

orders = Table.from_source(JsonLinesSource("orders.jsonl"))
items  = Table.from_source(JsonLinesSource("items.jsonl"))

order_summaries = (
    orders
      .join(items, on="order_id", how="left")
      .with_columns(
          month      = col("created_at").date_trunc("month"),
          line_total = col("qty") * col("unit_price"),
      )
      .group_by("month")
      .agg(revenue=col("line_total").sum(), orders=col("order_id").count())
      .sort("month")
)

print(order_summaries.to_sql(dialect="postgres"))