csv-wrangler

# CSV-Wrangler

Patterns for handling real-world messy CSV files.

## Encoding Detection

```python
import chardet

def detect_encoding(file_path: str, sample_size: int = 10000) -> str:
    """Detect file encoding from sample."""
    with open(file_path, 'rb') as f:
        raw = f.read(sample_size)
    result = chardet.detect(raw)
    return result['encoding']

def read_with_encoding(path: str) -> pd.DataFrame:
    """Read CSV with auto-detected encoding."""
    encoding = detect_encoding(path)
    # Common fallback chain
    encodings = [encoding, 'utf-8', 'latin-1', 'cp1252']

    for enc in encodings:
        try:
            return pd.read_csv(path, encoding=enc)
        except UnicodeDecodeError:
            continue

    # Last resort: ignore errors
    return pd.read_csv(path, encoding='utf-8', errors='ignore')
```

## Delimiter Detection

```python
import csv

def detect_delimiter(file_path: str) -> str:
    """Detect CSV delimiter from file sample."""
    with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
        sample = f.read(4096)

    sniffer = csv.Sniffer()
    try:
        dialect = sniffer.sniff(sample, delimiters=',;\t|')
        return dialect.delimiter
    except csv.Error:
        # Count occurrences and pick most common
        counts = {d: sample.count(d) for d in [',', ';', '\t', '|']}
        return max(counts, key=counts.get)

def read_with_delimiter_detection(path: str) -> pd.DataFrame:
    """Read CSV with auto-detected delimiter."""
    delimiter = detect_delimiter(path)
    return pd.read_csv(path, sep=delimiter)
```

## Handling Malformed Rows

```python
def read_with_error_handling(path: str) -> tuple[pd.DataFrame, list[dict]]:
    """Read CSV, capturing malformed rows separately."""
    good_rows = []
    bad_rows = []

    with open(path, 'r', encoding='utf-8', errors='replace') as f:
        reader = csv.reader(f)
        header = next(reader)
        expected_cols = len(header)

        for line_num, row in enumer
Marketplace

Plugin

Repository

Last Verified

Install Skill

Instructions

Validation Details