Skip to content
dev

Configuration Reference

Complete reference for all Aether configuration options.

Configuration Schema

yaml
services:
  torch:
    base_url: string
    username: string
    password: string
    extraction_timeout: duration  # default: PT30M
    polling_interval: duration    # default: PT5S
    max_polling_interval: duration # default: PT30S

  dimp:
    url: string
    bundle_split_threshold_mb: integer   # 1-100, default: 10

  flattening:
    service_url: string
    lookup_path: string
    formats: [string]                    # ["csv"]
    timeout: duration                    # default: PT30M
    batch_size_mb: integer               # default: 500

  send:
    send_as: string                      # "direct_resource_load", "transfer_load", or "s3_upload"
    url: string                          # required for FHIR modes; ignored for s3_upload
    batch_size: integer                  # 0-1000, default: 100 (direct_resource_load only)
    auth:                                # FHIR auth, or proxy auth in s3_upload mode
      username: string
      password: string
      oauth_issuer_uri: string
      oauth_client_id: string
      oauth_client_secret: string
    transfer:                            # transfer_load only
      project_identifier: string
      organization_identifier: string
    s3:                                  # s3_upload only
      bucket: string                     # required
      region: string                     # required
      access_key_id: string              # required
      secret_access_key: string          # required
      endpoint: string                   # custom S3-compatible endpoint
      use_path_style: boolean            # default: false
      timeout: duration                  # default: PT30M

  validation:
    url: string
    max_concurrent_requests: integer   # default: 4
    bundle_chunk_size_mb: integer      # default: 10
    fail_on_error: boolean             # default: true

  local_import:
    dir: string

  crtdl_preprocessing:
    enabled: boolean                       # default: false
    enrichments_path: string               # Path to external JSON file
    enrichments:                           # Inline enrichment rules
      - group_reference: string
        create_if_not_exists:              # Optional: create group if not in CRTDL
          group_name: string
        attributes_to_add:
          - attribute_ref: string
            must_have: boolean
            linked_groups: [string]        # Profile URLs, resolved to group IDs

pipeline:
  enabled_steps: [string]

retry:
  max_attempts: integer                  # 1-10, default: 5
  initial_backoff_ms: integer            # default: 1000
  max_backoff_ms: integer                # default: 30000

tls:
  ca_cert_path: string                   # PEM bundle of additional trusted certs
  insecure_skip_verify: boolean          # default: false

compression:
  enabled: boolean                       # default: true
  level: string                          # fastest, default, better, best

jobs_dir: string                         # default: ./jobs

Services

TORCH

TORCH server for FHIR data extraction.

yaml
services:
  torch:
    base_url: "https://torch.example.org"
    username: "${TORCH_USER}"
    password: "${TORCH_PASSWORD}"
    extraction_timeout: PT30M
    polling_interval: PT5S
    max_polling_interval: PT30S
OptionTypeDefaultDescription
base_urlstring-TORCH server URL (required if torch step enabled)
usernamestring-Authentication username
passwordstring-Authentication password
extraction_timeoutdurationPT30MMax wait time for extraction. Also serves as the safety net for transient polling errors — polling retries until this timeout is exceeded.
polling_intervaldurationPT5SInitial status check interval
max_polling_intervaldurationPT30SMax interval (exponential backoff cap)
file_ready_retriesint10Number of retries for file availability check
file_ready_intervaldurationPT10SInterval between file availability checks

DIMP

DIMP pseudonymization service.

yaml
services:
  dimp:
    url: "http://dimp:32861"
    bundle_split_threshold_mb: 10
OptionTypeDefaultDescription
urlstring-DIMP server root URL (required if dimp step enabled). Do not include /fhir — the client appends it.
bundle_split_threshold_mbint10Split Bundles larger than this (1-100 MB)

Flattening

fhir-flattener service for FHIR to CSV transformation.

yaml
services:
  flattening:
    service_url: "http://fhir-flattener:8000"
    lookup_path: "/config/flatten-lookup.json"
    formats:
      - csv
    timeout: PT30M
OptionTypeDefaultDescription
service_urlstring-fhir-flattener service URL
lookup_pathstring-Path to lookup table file
formats[]string["csv"]Output formats
timeoutduration30mRequest timeout
batch_size_mbint500Total memory budget in MB, divided across attribute groups (0 = use default)

Send

Destination server or object store for uploading processed data. Mode is selected via send_as.

Direct Resource Load

Upload FHIR resources directly to a FHIR server.

yaml
services:
  send:
    send_as: "direct_resource_load"
    url: "https://fhir-server.example.com"
    batch_size: 100
    auth:
      username: "${FHIR_USER}"
      password: "${FHIR_PASSWORD}"

Transfer Load

Package files for DSF-based transfer.

yaml
services:
  send:
    send_as: "transfer_load"
    url: "https://transfer.example.com"
    auth:
      oauth_issuer_uri: "${OAUTH_ISSUER}"
      oauth_client_id: "${OAUTH_CLIENT}"
      oauth_client_secret: "${OAUTH_SECRET}"
    transfer:
      project_identifier: "MII-PROJECT"
      organization_identifier: "your-org.example.de"

S3 Upload

Upload files to an S3-compatible bucket (AWS S3, MinIO, Ceph).

yaml
services:
  send:
    send_as: "s3_upload"
    s3:
      bucket: "${S3_BUCKET}"
      region: "eu-central-1"
      access_key_id: "${AWS_ACCESS_KEY_ID}"
      secret_access_key: "${AWS_SECRET_ACCESS_KEY}"
      # endpoint: "http://minio.example.com:9000"
      # use_path_style: true
      # timeout: PT30M
OptionTypeDefaultDescription
send_asstring-direct_resource_load, transfer_load, or s3_upload
urlstring-FHIR server root URL — required for FHIR modes, ignored for s3_upload. Do not include /fhir; the client appends it.
batch_sizeint100Resources per transaction (direct_resource_load only, 0-1000)

Authentication (choose one for FHIR modes):

OptionDescription
auth.username + auth.passwordBasic authentication
auth.oauth_issuer_uri + oauth_client_id + oauth_client_secretOAuth2 client credentials

In s3_upload mode the auth block is optional and used only as upstream proxy authentication (basic auth via Proxy-Authorization); the S3 API itself is authenticated via s3.access_key_id / s3.secret_access_key.

Transfer settings (transfer_load mode only):

OptionDescription
transfer.project_identifierMII project identifier
transfer.organization_identifierOrganization identifier

S3 settings (s3_upload mode only):

OptionTypeDefaultDescription
s3.bucketstring-Target bucket name (required)
s3.regionstring-AWS region, e.g. eu-central-1 (required)
s3.access_key_idstring-S3 access key (required)
s3.secret_access_keystring-S3 secret key (required)
s3.endpointstring-Custom endpoint URL (MinIO, Ceph, etc.). Leave empty for AWS S3.
s3.use_path_styleboolfalseUse path-style addressing (required for MinIO and many S3-compatible stores)
s3.timeoutdurationPT30MPer-request timeout

Validation

FHIR validation service for data quality checks.

yaml
services:
  validation:
    url: "http://validator:8080/fhir"
    max_concurrent_requests: 4
    bundle_chunk_size_mb: 10
    fail_on_error: true
OptionTypeDefaultDescription
urlstring-Validation service URL (required if validation step enabled)
max_concurrent_requestsint4Concurrent validation requests
bundle_chunk_size_mbint10Bundle chunk size for batching resources (MB)
fail_on_errorbooltrueStop pipeline when validation finds data quality errors

When fail_on_error is true (default), the pipeline stops after the validation step completes with errors. When false, validation reports are written but the pipeline continues.

Local Import

Default directory for local FHIR imports.

yaml
services:
  local_import:
    dir: "/data/fhir"
OptionTypeDescription
dirstringDefault import directory (overridable with --dir flag)

CRTDL Preprocessing

Enriches CRTDL documents with additional attributes before sending to TORCH. This is required when using DIMP pseudonymization, which needs certain identifier attributes (e.g., Patient.identifier) to be present in the CRTDL extraction query.

yaml
services:
  crtdl_preprocessing:
    enabled: true
    enrichments:
      - group_reference: "https://www.medizininformatik-initiative.de/fhir/core/modul-person/StructureDefinition/PatientPseudonymisiert"
        create_if_not_exists:
          group_name: "PatientPseudonymisiert"
        attributes_to_add:
          - attribute_ref: "Patient.identifier"
            must_have: false
OptionTypeDefaultDescription
enabledboolfalseEnable CRTDL preprocessing
enrichments_pathstring-Path to external JSON enrichment file
enrichmentslist-Inline enrichment rules (mutually exclusive with enrichments_path)

Enrichment rule options:

OptionTypeDescription
group_referencestringProfile URL of the CRTDL attribute group to enrich (required)
create_if_not_exists.group_namestringIf group is missing from CRTDL, create it with this name
attributes_to_add[].attribute_refstringFHIR attribute reference to add (required)
attributes_to_add[].must_haveboolWhether the attribute is required for extraction
attributes_to_add[].linked_groups[]stringProfile URLs to resolve to group IDs for cross-references

External JSON file format:

When using enrichments_path, the file uses camelCase field names:

json
[
  {
    "groupReference": "https://www.medizininformatik-initiative.de/fhir/core/modul-person/StructureDefinition/PatientPseudonymisiert",
    "createIfNotExists": {
      "groupName": "PatientPseudonymisiert"
    },
    "attributesToAdd": [
      {
        "attributeRef": "Patient.identifier",
        "mustHave": false
      }
    ]
  }
]

A shorter syntax is also supported for group creation:

json
{
  "groupReference": "https://example.org/fhir/StructureDefinition/Patient",
  "addGroupIfNotExists": true,
  "attributesToAdd": [
    {"attributeRef": "Patient.identifier", "mustHave": false}
  ]
}

When addGroupIfNotExists is true, the group name is automatically derived from the last segment of the profile URL (e.g., "Patient" from the URL above). Use createIfNotExists with an explicit groupName if you need a custom name.

Note: addGroupIfNotExists and createIfNotExists are mutually exclusive. Unknown fields in the JSON file will produce an error.

Pipeline

yaml
pipeline:
  enabled_steps:
    - local_import
    - dimp
    - flattening
OptionTypeDefaultDescription
enabled_steps[]string-Pipeline steps to execute in order

Available steps:

StepDescription
torchImport via TORCH (requires CRTDL)
local_importImport from local directory
http_importImport from HTTP URL
dimpPseudonymize via DIMP
waitPause for manual inspection
flatteningTransform to CSV (requires CRTDL)
sendUpload to destination server
validationValidate FHIR data against profiles

Rules:

  • One import step must be first (torch, local_import, or http_import)
  • Wait step cannot be first or consecutive
  • Flattening requires CRTDL input

Retry

yaml
retry:
  max_attempts: 5
  initial_backoff_ms: 1000
  max_backoff_ms: 30000
OptionTypeDefaultRangeDescription
max_attemptsint51-10Max retry attempts for transient errors
initial_backoff_msint1000-Initial backoff delay
max_backoff_msint30000-Max backoff delay

Exponential backoff: wait = min(initial * 2^attempt, max)

TLS

Trust custom or internal certificates and, when needed, disable verification entirely. Applied to every outgoing HTTP client (TORCH, DIMP, validation, flattening, send, HTTP import).

yaml
tls:
  ca_cert_path: "/path/to/certs.pem"
  insecure_skip_verify: false
OptionTypeDefaultDescription
ca_cert_pathstring-PEM bundle of additional CA or server certificates to trust. System CAs remain trusted alongside these. Supports ${ENV} substitution.
insecure_skip_verifyboolfalseSkip TLS verification entirely. Development/testing only.

Compression

yaml
compression:
  enabled: true
  level: "default"
OptionTypeDefaultDescription
enabledbooltrueEnable zstd compression
levelstring"default"Compression level

Compression levels:

LevelSpeedRatioUse Case
fastest~500 MB/s~3-4xLarge datasets, CPU-constrained
default~200 MB/s~4-5xBalanced (recommended)
better~100 MB/s~5-6xStorage-constrained
best~50 MB/s~6-7xArchival

Output files use .ndjson.zst extension when enabled. Aether auto-detects and reads both compressed and uncompressed files.

Jobs Directory

yaml
jobs_dir: "./jobs"

Directory for job state and data files.

Environment Variables

All string values support environment variable substitution:

yaml
services:
  torch:
    username: "${TORCH_USERNAME}"
    password: "${TORCH_PASSWORD}"
  send:
    url: "${FHIR_SERVER_URL}"

Example Configurations

TORCH + DIMP

yaml
services:
  torch:
    base_url: "https://torch.hospital.org"
    username: "${TORCH_USER}"
    password: "${TORCH_PASS}"
  dimp:
    url: "http://dimp:32861"

pipeline:
  enabled_steps:
    - torch
    - dimp

jobs_dir: "./jobs"

Local Import with Flattening

yaml
services:
  local_import:
    dir: "/data/fhir"
  dimp:
    url: "http://dimp:32861"
  flattening:
    service_url: "http://fhir-flattener:8000"
    lookup_path: "/config/lookup.json"

pipeline:
  enabled_steps:
    - local_import
    - dimp
    - flattening

compression:
  enabled: true
  level: "default"

jobs_dir: "./jobs"

Full Pipeline with Send

yaml
services:
  torch:
    base_url: "https://torch.hospital.org"
    username: "${TORCH_USER}"
    password: "${TORCH_PASS}"
  dimp:
    url: "http://dimp:32861"
  send:
    send_as: "transfer_load"
    url: "https://transfer.mii.de"
    auth:
      oauth_issuer_uri: "${OAUTH_ISSUER}"
      oauth_client_id: "${OAUTH_CLIENT}"
      oauth_client_secret: "${OAUTH_SECRET}"
    transfer:
      project_identifier: "MII-PROJECT"
      organization_identifier: "hospital.example.de"

pipeline:
  enabled_steps:
    - torch
    - dimp
    - send

retry:
  max_attempts: 5

compression:
  enabled: true

jobs_dir: "/data/aether/jobs"

Next Steps

Healthcare data integration made simple