Work in Progress: This page is under development. Use the feedback button on the bottom right to help us improve it.

FileSystem

Read or write to a filesystem or object store like S3.

Connection Profile

No connection profile needed.

Connection Table

  • tableType (required): One of:
    • Source:
      • path (string, required): URI of the folder to read from
      • compressionFormat (string, required): "none" | "zstd" | "gzip"
      • regexPattern (string, optional): Regex pattern for files to include
      • storageOptions (object, optional): Storage configuration options
    • Sink:
      • path (string, required): URI of the folder to write to
      • storageOptions (object, optional): Storage configuration options
      • rollingPolicy (object, optional): File rolling policy
        • fileSizeBytes (integer, optional): Files rolled after reaching this size
        • intervalSeconds (integer, optional): Seconds to wait before rolling to new file
        • inactivitySeconds (integer, optional): Seconds of inactivity before rolling
      • fileNaming (object, optional): Filename prefix/suffix and strategy
        • prefix (string, optional): Filename prefix
        • suffix (string, optional): Filename suffix
        • strategy (string, optional): "serial" | "uuid" | "uuidV7" | "ulid"
      • partitioning (object, optional): Data partitioning configuration
        • timePattern (string, optional): Pattern of date string
        • fields (array, optional): Partition field configurations
          • name (string, required): Field to partition by
          • transform (string, optional): "identity" | "hour" | "year" | "month"
        • shuffleByPartition (object, required): Partition shuffle settings
          • enabled (boolean, optional): Enable partition key shuffling
      • multipart (object, optional): Multipart upload tuning for object stores
        • targetPartSizeBytes (integer, optional): Target size per part (min 5242880)
        • maxParts (integer, optional): Max parts in multipart upload

JSON Schema Reference

Connection Table Schema

{
  "type": "object",
  "additionalProperties": false,
  "properties": {
    "tableType": {
      "title": "Table Type",
      "oneOf": [
        {
          "type": "object",
          "additionalProperties": false,
          "properties": {
            "path": {
              "type": "string",
              "title": "Path",
              "description": "URI of the folder to read from"
            },
            "compressionFormat": {
              "type": "string",
              "enum": ["none", "zstd", "gzip"],
              "title": "Compression format",
              "description": "Compression format of the files in the source path"
            },
            "regexPattern": {
              "type": "string",
              "title": "File Regex Pattern",
              "description": "Regex pattern for files to include in source. Will search everything under the source path."
            },
            "storageOptions": {
              "type": "object",
              "additionalProperties": {"type": "string"},
              "title": "Storage Options",
              "description": "See the FileSystem connector docs for the full list of options"
            }
          },
          "required": ["path", "compressionFormat"],
          "title": "Source"
        },
        {
          "type": "object",
          "additionalProperties": false,
          "properties": {
            "path": {
              "type": "string",
              "title": "Path",
              "description": "URI of the folder to write to"
            },
            "storageOptions": {
              "type": "object",
              "additionalProperties": {"type": "string"},
              "title": "Storage Options",
              "description": "See the FileSystem connector docs for the full list of options"
            },
            "rollingPolicy": {
              "type": "object",
              "additionalProperties": false,
              "properties": {
                "fileSizeBytes": {
                  "type": "integer",
                  "format": "uint64",
                  "minimum": 0,
                  "title": "File Size",
                  "description": "Files will be rolled after reaching this number of bytes"
                },
                "intervalSeconds": {
                  "type": "integer",
                  "format": "uint64",
                  "minimum": 1,
                  "title": "Interval Seconds",
                  "description": "Number of seconds to wait before rolling over to a new file"
                },
                "inactivitySeconds": {
                  "type": "integer",
                  "format": "uint64",
                  "minimum": 1,
                  "title": "Inactivity Seconds",
                  "description": "Number of seconds of inactivity to wait before rolling over to a new file"
                }
              },
              "title": "File Rolling Policy",
              "description": "Rolling policy for file sinks (when & why to close a file and open a new one)."
            },
            "fileNaming": {
              "type": "object",
              "additionalProperties": false,
              "properties": {
                "prefix": {
                  "type": "string",
                  "title": "Filename Prefix",
                  "description": "The prefix to use in file name. i.e prefix-<uuid>.parquet"
                },
                "suffix": {
                  "type": "string",
                  "title": "Filename Suffix",
                  "description": "This will overwrite the default file suffix. i.e .parquet, use with caution"
                },
                "strategy": {
                  "type": "string",
                  "enum": ["serial", "uuid", "uuidV7", "ulid"],
                  "title": "Filename Strategy",
                  "description": "Filename generation strategy."
                }
              },
              "title": "File Naming",
              "description": "Controls filename prefix/suffix and strategy."
            },
            "partitioning": {
              "type": "object",
              "additionalProperties": false,
              "properties": {
                "timePattern": {
                  "type": "string",
                  "title": "Time Partition Pattern",
                  "description": "The pattern of the date string"
                },
                "fields": {
                  "type": "array",
                  "items": {
                    "title": "Partition Field",
                    "description": "Partition field configuration",
                    "type": "object",
                    "properties": {
                      "name": {
                        "title": "Field Name",
                        "description": "The field to partition by",
                        "type": "string"
                      },
                      "transform": {
                        "title": "Transform",
                        "description": "Transformation to apply (identity, hour, year, month)",
                        "type": "string",
                        "enum": ["identity", "hour", "year", "month"]
                      }
                    },
                    "additionalProperties": false,
                    "required": ["name"]
                  },
                  "title": "Partition Fields",
                  "description": "Fields to partition the data by with transformations"
                },
                "shuffleByPartition": {
                  "type": "object",
                  "additionalProperties": false,
                  "properties": {
                    "enabled": {
                      "type": "boolean",
                      "title": "Enable partition shuffling",
                      "description": "If enabled, we will shuffle by the partition keys, which can reduce the number of files a sink produces; however this may cause backlog if data is skewed"
                    }
                  },
                  "description": "Advanced tuning for hash shuffling of partition keys",
                  "title": "Partition shuffle settings"
                }
              },
              "required": ["shuffleByPartition"],
              "description": "Data‑layout partitioning for sinks."
            },
            "multipart": {
              "type": "object",
              "additionalProperties": false,
              "properties": {
                "targetPartSizeBytes": {
                  "type": "integer",
                  "format": "uint64",
                  "minimum": 5242880,
                  "title": "Target Part Size",
                  "description": "Target size for each part of the multipart upload, in bytes"
                },
                "maxParts": {
                  "type": "integer",
                  "format": "uint64",
                  "minimum": 1,
                  "title": "Max Parts",
                  "description": "Maximum number of parts to upload in a multipart upload"
                }
              },
              "title": "Multipart Upload Settings",
              "description": "Multipart‑upload tuning for object stores that need it."
            }
          },
          "required": ["path"],
          "title": "Sink"
        }
      ]
    }
  },
  "required": ["tableType"]
}