FileSystem
Read or write to a filesystem or object store like S3.
Connection Profile
No connection profile needed.
Connection Table
tableType(required): One of:- Source:
path(string, required): URI of the folder to read fromcompressionFormat(string, required): "none" | "zstd" | "gzip"regexPattern(string, optional): Regex pattern for files to includestorageOptions(object, optional): Storage configuration options
- Sink:
path(string, required): URI of the folder to write tostorageOptions(object, optional): Storage configuration optionsrollingPolicy(object, optional): File rolling policyfileSizeBytes(integer, optional): Files rolled after reaching this sizeintervalSeconds(integer, optional): Seconds to wait before rolling to new fileinactivitySeconds(integer, optional): Seconds of inactivity before rolling
fileNaming(object, optional): Filename prefix/suffix and strategyprefix(string, optional): Filename prefixsuffix(string, optional): Filename suffixstrategy(string, optional): "serial" | "uuid" | "uuidV7" | "ulid"
partitioning(object, optional): Data partitioning configurationtimePattern(string, optional): Pattern of date stringfields(array, optional): Partition field configurationsname(string, required): Field to partition bytransform(string, optional): "identity" | "hour" | "year" | "month"
shuffleByPartition(object, required): Partition shuffle settingsenabled(boolean, optional): Enable partition key shuffling
multipart(object, optional): Multipart upload tuning for object storestargetPartSizeBytes(integer, optional): Target size per part (min 5242880)maxParts(integer, optional): Max parts in multipart upload
- Source:
JSON Schema Reference
Connection Table Schema
{
"type": "object",
"additionalProperties": false,
"properties": {
"tableType": {
"title": "Table Type",
"oneOf": [
{
"type": "object",
"additionalProperties": false,
"properties": {
"path": {
"type": "string",
"title": "Path",
"description": "URI of the folder to read from"
},
"compressionFormat": {
"type": "string",
"enum": ["none", "zstd", "gzip"],
"title": "Compression format",
"description": "Compression format of the files in the source path"
},
"regexPattern": {
"type": "string",
"title": "File Regex Pattern",
"description": "Regex pattern for files to include in source. Will search everything under the source path."
},
"storageOptions": {
"type": "object",
"additionalProperties": {"type": "string"},
"title": "Storage Options",
"description": "See the FileSystem connector docs for the full list of options"
}
},
"required": ["path", "compressionFormat"],
"title": "Source"
},
{
"type": "object",
"additionalProperties": false,
"properties": {
"path": {
"type": "string",
"title": "Path",
"description": "URI of the folder to write to"
},
"storageOptions": {
"type": "object",
"additionalProperties": {"type": "string"},
"title": "Storage Options",
"description": "See the FileSystem connector docs for the full list of options"
},
"rollingPolicy": {
"type": "object",
"additionalProperties": false,
"properties": {
"fileSizeBytes": {
"type": "integer",
"format": "uint64",
"minimum": 0,
"title": "File Size",
"description": "Files will be rolled after reaching this number of bytes"
},
"intervalSeconds": {
"type": "integer",
"format": "uint64",
"minimum": 1,
"title": "Interval Seconds",
"description": "Number of seconds to wait before rolling over to a new file"
},
"inactivitySeconds": {
"type": "integer",
"format": "uint64",
"minimum": 1,
"title": "Inactivity Seconds",
"description": "Number of seconds of inactivity to wait before rolling over to a new file"
}
},
"title": "File Rolling Policy",
"description": "Rolling policy for file sinks (when & why to close a file and open a new one)."
},
"fileNaming": {
"type": "object",
"additionalProperties": false,
"properties": {
"prefix": {
"type": "string",
"title": "Filename Prefix",
"description": "The prefix to use in file name. i.e prefix-<uuid>.parquet"
},
"suffix": {
"type": "string",
"title": "Filename Suffix",
"description": "This will overwrite the default file suffix. i.e .parquet, use with caution"
},
"strategy": {
"type": "string",
"enum": ["serial", "uuid", "uuidV7", "ulid"],
"title": "Filename Strategy",
"description": "Filename generation strategy."
}
},
"title": "File Naming",
"description": "Controls filename prefix/suffix and strategy."
},
"partitioning": {
"type": "object",
"additionalProperties": false,
"properties": {
"timePattern": {
"type": "string",
"title": "Time Partition Pattern",
"description": "The pattern of the date string"
},
"fields": {
"type": "array",
"items": {
"title": "Partition Field",
"description": "Partition field configuration",
"type": "object",
"properties": {
"name": {
"title": "Field Name",
"description": "The field to partition by",
"type": "string"
},
"transform": {
"title": "Transform",
"description": "Transformation to apply (identity, hour, year, month)",
"type": "string",
"enum": ["identity", "hour", "year", "month"]
}
},
"additionalProperties": false,
"required": ["name"]
},
"title": "Partition Fields",
"description": "Fields to partition the data by with transformations"
},
"shuffleByPartition": {
"type": "object",
"additionalProperties": false,
"properties": {
"enabled": {
"type": "boolean",
"title": "Enable partition shuffling",
"description": "If enabled, we will shuffle by the partition keys, which can reduce the number of files a sink produces; however this may cause backlog if data is skewed"
}
},
"description": "Advanced tuning for hash shuffling of partition keys",
"title": "Partition shuffle settings"
}
},
"required": ["shuffleByPartition"],
"description": "Data‑layout partitioning for sinks."
},
"multipart": {
"type": "object",
"additionalProperties": false,
"properties": {
"targetPartSizeBytes": {
"type": "integer",
"format": "uint64",
"minimum": 5242880,
"title": "Target Part Size",
"description": "Target size for each part of the multipart upload, in bytes"
},
"maxParts": {
"type": "integer",
"format": "uint64",
"minimum": 1,
"title": "Max Parts",
"description": "Maximum number of parts to upload in a multipart upload"
}
},
"title": "Multipart Upload Settings",
"description": "Multipart‑upload tuning for object stores that need it."
}
},
"required": ["path"],
"title": "Sink"
}
]
}
},
"required": ["tableType"]
}