Pytest templates and patterns for ETL pipeline testing - unit, integration, data quality.
View on GitHubmajesticlabs-dev/majestic-marketplace
majestic-data
January 24, 2026
Select agents to install to:
npx add-skill https://github.com/majesticlabs-dev/majestic-marketplace/blob/main/plugins/majestic-data/skills/testing-patterns/SKILL.md -a claude-code --skill testing-patternsInstallation paths:
.claude/skills/testing-patterns/# Testing Patterns
Pytest templates for comprehensive ETL pipeline testing.
## Unit Tests - Transform Functions
```python
# tests/test_transforms.py
import pytest
import pandas as pd
from pipeline.transforms import clean_email, calculate_total, categorize_customer
class TestCleanEmail:
def test_lowercase(self):
assert clean_email("John@Example.COM") == "john@example.com"
def test_strip_whitespace(self):
assert clean_email(" john@example.com ") == "john@example.com"
def test_invalid_returns_none(self):
assert clean_email("not-an-email") is None
def test_null_input(self):
assert clean_email(None) is None
class TestCalculateTotal:
@pytest.fixture
def order_items(self):
return pd.DataFrame({
'order_id': [1, 1, 2],
'quantity': [2, 3, 1],
'unit_price': [10.0, 5.0, 100.0]
})
def test_sums_correctly(self, order_items):
result = calculate_total(order_items)
assert result.loc[result['order_id'] == 1, 'total'].values[0] == 35.0
def test_handles_empty(self):
empty = pd.DataFrame(columns=['order_id', 'quantity', 'unit_price'])
result = calculate_total(empty)
assert len(result) == 0
class TestCategorizeCustomer:
@pytest.mark.parametrize("total_spent,expected", [
(0, 'bronze'),
(99, 'bronze'),
(100, 'silver'),
(999, 'silver'),
(1000, 'gold'),
(9999, 'gold'),
(10000, 'platinum'),
])
def test_tiers(self, total_spent, expected):
assert categorize_customer(total_spent) == expected
```
## Integration Tests - Full Pipeline
```python
# tests/test_pipeline.py
import pytest
from pipeline import OrdersPipeline
from tests.fixtures import generate_orders_fixture
class TestOrdersPipeline:
@pytest.fixture
def pipeline(self, tmp_path):
return OrdersPipeline(
source_path=tmp_path / "source",
target_path=tmp_path