Apache Iceberg
Apache Iceberg connector for writing to Iceberg tables.
Connection Profile
catalog(required): Catalog configuration. One of:- REST catalog (
type: "rest"):url(string, required): Base URL for the REST catalogwarehouse(string, optional): The Warehouse to connect totoken(string, optional): Authentication token
- Glue catalog (
type: "glue"):glue_catalog_config(required):region(string, required): AWS regionwarehouse(string, optional): S3 warehouse path (e.g.,s3://bucket/warehouse)access_key_id(string, optional): AWS access key IDsecret_access_key(string, optional): AWS secret access keyendpoint(string, optional): Custom AWS endpoint
- REST catalog (
Connection Table
type(string, required): "sink"sink_table_config(required): Sink table configurationtable_name(string, required): Table namenamespace(string, optional): Table namespacelocation_path(string, optional): Data file locationrolling_policy(object, optional): File rolling policyfile_size_bytes(integer, optional): Files rolled after reaching this size in bytesinterval_seconds(integer, optional): Seconds to wait before rolling to new fileinactivity_seconds(integer, optional): Seconds of inactivity before rolling to new file
file_naming(object, optional): Filename prefix/suffix and strategyprefix(string, optional): Filename prefixsuffix(string, optional): Filename suffix (overwrites default .parquet)strategy(string, optional): "serial" | "uuid" | "uuidV7" | "ulid"
multipart(object, optional): Multipart upload tuning for object storestarget_part_size_bytes(integer, optional): Target size per part (min 5242880)max_parts(integer, optional): Max parts in multipart upload
partitioning(object, optional): Data partitioning configurationtime_pattern(string, optional): Pattern of date stringfields(array, optional): Partition field configurationsname(string, required): Field to partition bytransform(string, optional): "identity" | "hour" | "year" | "month"
shuffle_by_partition(object, required): Partition shuffle settingsenabled(boolean, optional): Enable partition key shuffling
storage_options(object, optional): See FileSystem connector for full list of options
JSON Schema Reference
Connection Profile Schema
{
"type": "object",
"additionalProperties": false,
"properties": {
"catalog": {
"oneOf": [
{
"type": "object",
"additionalProperties": false,
"properties": {
"type": {"type": "string", "const": "rest"},
"url": {
"type": "string",
"description": "Base URL for the REST catalog",
"examples": ["http://localhost:8001/iceberg"],
"format": "uri"
},
"warehouse": {
"type": "string",
"description": "The Warehouse to connect to",
"examples": ["16ba210d70caae96ecb1f6e17afe6f3b_my-bucket"]
},
"token": {
"type": "string",
"format": "var-str",
"description": "Authentication token"
}
},
"required": ["type", "url"],
"description": "REST catalog connector for Apache Iceberg."
},
{
"type": "object",
"additionalProperties": false,
"properties": {
"type": {"type": "string", "const": "glue"},
"glue_catalog_config": {
"type": "object",
"properties": {
"region": {
"type": "string",
"description": "AWS region"
},
"warehouse": {
"type": "string",
"description": "S3 warehouse path",
"examples": ["s3://my-bucket/warehouse"]
},
"access_key_id": {
"type": "string",
"description": "AWS access key ID"
},
"secret_access_key": {
"type": "string",
"description": "AWS secret access key"
},
"endpoint": {
"type": "string",
"description": "Custom AWS endpoint"
}
},
"required": ["region"]
}
},
"required": ["type", "glue_catalog_config"],
"description": "AWS Glue catalog connector for Apache Iceberg."
}
]
}
},
"required": ["catalog"]
}Connection Table Schema
{
"type": "object",
"additionalProperties": false,
"properties": {
"type": {"type": "string", "const": "sink"},
"sink_table_config": {
"type": "object",
"additionalProperties": false,
"properties": {
"table_name": {
"type": "string",
"title": "Table Name",
"description": "Table name"
},
"namespace": {
"type": "string",
"title": "Namespace",
"description": "Table namespace"
},
"location_path": {
"type": "string",
"title": "Location Path",
"description": "Data file location"
},
"storage_options": {
"type": "object",
"additionalProperties": {"type": "string"},
"title": "Storage Options",
"description": "See the FileSystem connector docs for the full list of options"
},
"rolling_policy": {
"type": "object",
"additionalProperties": false,
"properties": {
"file_size_bytes": {
"type": "integer",
"format": "uint64",
"minimum": 0,
"title": "File Size",
"description": "Files will be rolled after reaching this number of bytes"
},
"interval_seconds": {
"type": "integer",
"format": "uint64",
"minimum": 1,
"title": "Interval Seconds",
"description": "Number of seconds to wait before rolling over to a new file"
},
"inactivity_seconds": {
"type": "integer",
"format": "uint64",
"minimum": 1,
"title": "Inactivity Seconds",
"description": "Number of seconds of inactivity to wait before rolling over to a new file"
}
},
"title": "File Rolling Policy",
"description": "Rolling policy for file sinks (when & why to close a file and open a new one)."
},
"file_naming": {
"type": "object",
"additionalProperties": false,
"properties": {
"prefix": {
"type": "string",
"title": "Filename Prefix",
"description": "The prefix to use in file name. i.e prefix-<uuid>.parquet"
},
"suffix": {
"type": "string",
"title": "Filename Suffix",
"description": "This will overwrite the default file suffix. i.e .parquet, use with caution"
},
"strategy": {
"type": "string",
"enum": ["serial", "uuid", "uuidV7", "ulid"],
"title": "Filename Strategy",
"description": "Filename generation strategy."
}
},
"title": "File Naming",
"description": "Controls filename prefix/suffix and strategy."
},
"multipart": {
"type": "object",
"additionalProperties": false,
"properties": {
"target_part_size_bytes": {
"type": "integer",
"format": "uint64",
"minimum": 5242880,
"title": "Target Part Size",
"description": "Target size for each part of the multipart upload, in bytes"
},
"max_parts": {
"type": "integer",
"format": "uint64",
"minimum": 1,
"title": "Max Parts",
"description": "Maximum number of parts to upload in a multipart upload"
}
},
"title": "Multipart Upload Settings",
"description": "Multipart‑upload tuning for object stores that need it."
},
"partitioning": {
"type": "object",
"additionalProperties": false,
"properties": {
"time_pattern": {
"type": "string",
"title": "Time Partition Pattern",
"description": "The pattern of the date string"
},
"fields": {
"type": "array",
"items": {
"title": "Partition Field",
"description": "Partition field configuration",
"type": "object",
"properties": {
"name": {
"title": "Field Name",
"description": "The field to partition by",
"type": "string"
},
"transform": {
"title": "Transform",
"description": "Transformation to apply (identity, hour, year, month)",
"type": "string",
"enum": ["identity", "hour", "year", "month"]
}
},
"additionalProperties": false,
"required": ["name"]
},
"title": "Partition Fields",
"description": "Fields to partition the data by with transformations"
},
"shuffle_by_partition": {
"type": "object",
"additionalProperties": false,
"properties": {
"enabled": {
"type": "boolean",
"title": "Enable partition shuffling",
"description": "If enabled, we will shuffle by the partition keys, which can reduce the number of files a sink produces; however this may cause backlog if data is skewed"
}
},
"description": "Advanced tuning for hash shuffling of partition keys",
"title": "Partition shuffle settings"
}
},
"required": ["shuffle_by_partition"],
"description": "Data‑layout partitioning for sinks."
}
},
"required": ["table_name"]
}
},
"required": ["type", "sink_table_config"],
"description": "Iceberg sink definition."
}