Framework/Configurations/SVT/Services/DataFactory.json

{
  "FeatureName": "DataFactory",
  "Reference": "aka.ms/azsdktcp/datafactory",
  "IsManintenanceMode": false,
  "Controls": [
    {
      "ControlID": "Azure_DataFactory_DP_LinkSvc_Encrypt_In_Transit",
      "Description": "Linked Service must use encryption in transit",
      "Id": "DataFactory110",
      "ControlSeverity": "High",
      "Automated": "Yes",
      "MethodName": "CheckDataFactoryLinkedService",
      "Rationale": "Use of HTTPS ensures server/service authentication and protects data in transit from network layer man-in-the-middle, eavesdropping, session-hijacking attacks.",
      "Recommendation": "Linked Services used to transfer data between a data source and Azure Data Factory must use encrypted channels to transmit the data. (e.g., for an Azure Storage account the HTTPS endpoint must be specified in the service JSON and, similarly, for SQL Server the JSON must have Encrypt=True in the connection string, etc.). ",
      "Tags": [
        "SDL",
        "TCP",
        "Automated",
        "DP"
      ],
      "Enabled": true
    },
    {
      "ControlID": "Azure_DataFactory_AuthZ_Grant_Min_Access",
      "Description": "User accounts/roles connecting to data source must have minimum required permissions",
      "Id": "DataFactory120",
      "ControlSeverity": "Medium",
      "Automated": "No",
      "MethodName": "",
      "Rationale": "Granting minimum access ensures that users are granted just enough permissions to perform their tasks. This minimizes exposure of the resources in case of user/service account compromise.",
      "Recommendation": "All user accounts/roles which are involved in Azure Data Factory must have minimum required access rights to data sources. (e.g. If the Data Factory is just reading data from the data source then the account employed must use just read-only access.)",
      "Tags": [
        "SDL",
        "TCP",
        "Manual",
        "AuthZ"
      ],
      "Enabled": true
    },
    {
      "ControlID": "Azure_DataFactory_Config_Lockdown_DMG_Server",
      "Description": "Data Management Gateway (DMG), if used, must be installed on a locked down machine",
      "Id": "DataFactory130",
      "ControlSeverity": "Medium",
      "Automated": "No",
      "MethodName": "",
      "Rationale": "The DMG machine is serving as a 'gateway' into the corporate environment allowing endpoint in the cloud access to enteprise data. Using a locked-down, secure baseline configuration ensures that this machine does not get leveraged as an entry point to the downstream data server/service.",
      "Recommendation": "Use Windows Server lockdown templates to minimize the attack surface available. For the most critical scenarios, consider using Code Integrity feature as well.",
      "Tags": [
        "SDL",
        "TCP",
        "Manual",
        "Config"
      ],
      "Enabled": true
    },
    {
      "ControlID": "Azure_DataFactory_Deploy_Register_DMG_Securely",
      "Description": "Data Management Gateway (DMG), if used, must be registered in secure way",
      "Id": "DataFactory140",
      "ControlSeverity": "Medium",
      "Automated": "No",
      "MethodName": "",
      "Rationale": "Data Management Gateway(DMG) tool is required to connect data sources which are hidden behind corpnet/firewall. DMG tool needs to be installed and registered on a machine which has access to data source.The gateway key needs to be exchanged between Azure Portal and DMG tool for registration. Manual handling of this key may impose an operational risk and thus registration of DMG tool must be done via PowerShell.",
      "Recommendation": "Use a PowerShell-based approach to register the tool instead of manually handling the key to minimize operational risk. Refer: https://docs.microsoft.com/en-us/azure/data-factory/data-factory-data-management-gateway#powershell-cmdlets",
      "Tags": [
        "SDL",
        "TCP",
        "Manual",
        "Deploy"
      ],
      "Enabled": true
    },
    {
      "ControlID": "Azure_DataFactory_DP_Rotate_Gateway_Key",
      "Description": "Data Gateway key (on Azure Portal) must be rotated periodically",
      "Id": "DataFactory150",
      "ControlSeverity": "Medium",
      "Automated": "No",
      "MethodName": "",
      "Rationale": "Periodic key/password rotation is a good security hygiene practice as, over time, it minimizes the likelihood of data loss/compromise which can arise from key theft/brute forcing/recovery attacks.",
      "Recommendation": "Rotate the Data Gateway key every six months or whenever the DMG service account password is renewed in order to reduce risk from brute force key-guessing attacks.",
      "Tags": [
        "SDL",
        "TCP",
        "Manual",
        "DP"
      ],
      "Enabled": true
    },
    {
      "ControlID": "Azure_DataFactory_AuthZ_Use_Svc_Acct_for_DMG",
      "Description": "Linked Service must be setup using a service account when Data Management Gateway is used ",
      "Id": "DataFactory160",
      "ControlSeverity": "Medium",
      "Automated": "No",
      "MethodName": "",
      "Rationale": "Using a service account with access to the data tier eliminates the need to manage a separate account/connection string that DMG would otherwise need to use to access the backend data.",
      "Recommendation": "Configuration of Linked Service involves credentials (username, password etc.) for the target data source. The service account used to run the DMG may be granted access to the target data source. This can let us leverage integrated authentication and do away with the need to store credentials.",
      "Tags": [
        "SDL",
        "TCP",
        "Manual",
        "AuthZ"
      ],
      "Enabled": true
    },
    {
      "ControlID": "Azure_DataFactory_Audit_Enable_Logging_and_Monitoring",
      "Description": "Monitoring must be enabled for Azure Data Factory",
      "Id": "DataFactory180",
      "ControlSeverity": "Medium",
      "Automated": "No",
      "MethodName": "",
      "Rationale": "Auditing enables log collection of important system events pertinent to security. Regular monitoring of audit logs can help to detect any suspicious and malicious activity early and respond in a timely manner.",
      "Recommendation": "Refer: https://docs.microsoft.com/en-us/azure/data-factory/data-factory-monitor-manage-app",
      "Tags": [
        "SDL",
        "TCP",
        "Manual",
        "Audit"
      ],
      "Enabled": true
    },
    {
      "ControlID": "Azure_DataFatory_AuthN_DataLakeStore_LinkedService",
      "Description": "Data lake store linked service in Data Factory must be configured with service-to-service authentication",
      "Id": "DataFactory190",
      "ControlSeverity": "High",
      "Automated": "No",
      "MethodName": "",
      "Rationale": "If user credentials are employed, there may be downtime due to token expiration.",
      "Recommendation": "Use service principal based authentication in the data lake store linked service. Refer: https://docs.microsoft.com/en-us/azure/data-factory/data-factory-azure-datalake-connector#linked-service-properties",
      "Tags": [
        "SDL",
        "TCP",
        "Manual",
        "AuthN"
      ],
      "Enabled": true
    },
    {
      "ControlID": "Azure_DataFatory_BCDR_Multiple_Node_DMG",
      "Description": "To ensure high availability multiple nodes should be configured on DMG",
      "Id": "DataFactory200",
      "ControlSeverity": "High",
      "Automated": "No",
      "MethodName": "",
      "Rationale": "DMG configured on multiple node ensures that another node is available for moving data even if one of the nodes goes down.",
      "Recommendation": "Refer: https://docs.microsoft.com/en-us/azure/data-factory/data-factory-data-management-gateway-high-availability-scalability#set-up-a-multi-node-gateway",
      "Tags": [
        "SDL",
        "TCP",
        "Manual",
        "BCDR"
      ],
      "Enabled": true
    },
    {
      "ControlID": "Azure_DataFactory_DP_Encrypt_Node_To_Node_Communication",
      "Description": "For multi-node gateway, node to node communication must be encrypted with valid TLS/SSL certificate",
      "Id": "DataFactory210",
      "ControlSeverity": "High",
      "Automated": "No",
      "MethodName": "",
      "Rationale": "Use of HTTPS ensures server/service authentication and protects data in transit from network layer man-in-the-middle, eavesdropping, session-hijacking attacks.",
      "Recommendation": "For each DMG node following: Go to Data Management Gateway Application --> After Registration --> Settings Tab --> Node-Node Communication Encryption Mode --> Change. Upload a valid certificate which fulfils https://docs.microsoft.com/en-us/azure/data-factory/data-factory-data-management-gateway-high-availability-scalability#tlsssl-certificate-requirements requirements.",
      "Tags": [
        "SDL",
        "TCP",
        "Manual",
        "DP"
      ],
      "Enabled": true
    },
    {
      "ControlID": "Azure_DataFactory_BCDR_DMG_Configuration_Backup",
      "Description": "Use backup feature to keep a snapshot of DMG configurations",
      "Id": "DataFactory220",
      "ControlSeverity": "Medium",
      "Automated": "No",
      "MethodName": "",
      "Rationale": "Keeping backup of DMG config ensures that there is always a previous snapshot that can be leveraged to restore configurations quickly during a recovery scenario.",
      "Recommendation": "Go to Data Management Gateway Application --> After Registration --> Home Tab --> Generate Backup --> Provide strong password.",
      "Tags": [
        "SDL",
        "Best Practice",
        "Manual",
        "BCDR"
      ],
      "Enabled": true
    },
    {
      "ControlID": "Azure_DataFactory_DP_Encrypt_Credentials_LinkedService",
      "Description": "Encrypt credentials in Linked Service while configuring Data Factory (Applicable only for Linked Services which require Gateway)",
      "Id": "DataFactory230",
      "ControlSeverity": "High",
      "Automated": "No",
      "MethodName": "",
      "Rationale": "By using this configuration, credentials will be transmitted in encrypted format to the DMG which connects to the data source. This ensures that on-premise data source is accessible only through DMG as only DMG can decrypt the credential.",
      "Recommendation": "Refer: https://docs.microsoft.com/en-us/azure/data-factory/data-factory-data-management-gateway#encrypting-credentials",
      "Tags": [
        "SDL",
        "TCP",
        "Manual",
        "DP"
      ],
      "Enabled": true
    },
    {
      "ControlID": "Azure_DataFactory_BCDR_Config_Fault_Tolerance",
      "Description": "Configure fault tolerance in copy activity to handle incompatible data",
      "Id": "DataFactory240",
      "ControlSeverity": "Medium",
      "Automated": "No",
      "MethodName": "",
      "Rationale": "In case of incompatible data flowing through the ADF channel, the default behavior will abort the copy activity. A fault tolerant configuration is helpful to continue copy activity by skipping and logging incorrect data.",
      "Recommendation": "Refer: https://docs.microsoft.com/en-us/azure/data-factory/data-factory-copy-activity-fault-tolerance#configuration",
      "Tags": [
        "SDL",
        "Best Practice",
        "Manual",
        "BCDR"
      ],
      "Enabled": true
    }
  ]
}