Framework/Configurations/SVT/Services/DataLakeAnalytics.json

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
{
  "FeatureName": "DataLakeAnalytics",
  "Reference": "aka.ms/azsktcp/datalakeanalytics",
  "IsMaintenanceMode": false,
  "Controls": [
    {
      "ControlID": "Azure_DataLakeAnalytics_AuthZ_Assign_Required_RBAC_To_Creator",
      "Description": "Data Lake Analytics creator must be granted only required Role Based Access Control (RBAC) access on Subscription/Resource Group/Resource",
      "Id": "DataLakeAnalytics110",
      "ControlSeverity": "Medium",
      "Automated": "No",
      "MethodName": "",
      "Rationale": "Granting minimum access by leveraging RBAC feature ensures that users are granted just enough permissions to perform their tasks. This minimizes exposure of the resources in case of user/service account compromise.",
      "Recommendation": "Assign 'Owner' privilege only at resource group and default data lake store account scope.",
      "Tags": [
        "SDL",
        "TCP",
        "Manual",
        "AuthZ",
        "DataLakeAnalytics"
      ],
      "Enabled": true
    },
    {
      "ControlID": "Azure_DataLakeAnalytics_AuthN_AAD_For_Client_AuthN",
      "Description": "All Data Lake Analytics users/service principal must be authenticated using AAD backed credentials",
      "Id": "DataLakeAnalytics120",
      "ControlSeverity": "High",
      "Automated": "No",
      "MethodName": "",
      "Rationale": "Using the native enterprise directory for authentication ensures that there is a built-in high level of assurance in the user identity established for subsequent access control. All Enterprise subscriptions are automatically associated with their enterprise directory (xxx.onmicrosoft.com) and users in the native directory are trusted for authentication to enterprise subscriptions.",
      "Recommendation": "No action required. ADLA supports only AAD authentication.",
      "Tags": [
        "SDL",
        "Information",
        "Manual",
        "AuthN",
        "DataLakeAnalytics"
      ],
      "Enabled": true
    },
    {
      "ControlID": "Azure_DataLakeAnalytics_AuthZ_Grant_Min_RBAC_Access",
      "Description": "All users/identities must be granted minimum required permissions using Role Based Access Control (RBAC)",
      "Id": "DataLakeAnalytics130",
      "ControlSeverity": "Medium",
      "Automated": "Yes",
      "MethodName": "CheckRBACAccess",
      "Rationale": "Granting minimum access by leveraging RBAC feature ensures that users are granted just enough permissions to perform their tasks. This minimizes exposure of the resources in case of user/service account compromise.",
      "Recommendation": "Assign only the 'Data Lake Analytics Developer' RBAC role to developers who manage U-SQL jobs. Refer: https://docs.microsoft.com/en-us/azure/data-lake-analytics/data-lake-analytics-manage-use-portal#manage-users",
      "Tags": [
        "SDL",
        "TCP",
        "Automated",
        "AuthZ",
        "RBAC",
        "DataLakeAnalytics"
      ],
      "Enabled": true
    },
    {
      "ControlID": "Azure_DataLakeAnalytics_AuthZ_Assign_Least_Privilege_ACL",
      "Description": "Data Lake Analytics developer (user/service principal) must have least required ACLs on Catalog/Database and Data Lake Store file system",
      "Id": "DataLakeAnalytics140",
      "ControlSeverity": "Medium",
      "Automated": "No",
      "MethodName": "",
      "Rationale": "Granting minimum permissions to users/security groups on data lake store file system by leveraging Access Control List (ACL) feature ensures that U-SQL jobs executed by users/security groups would perform only intended read/write operations. This minimizes exposure of the data in case of user credentials leak or faulty job programming.",
      "Recommendation": "Navigate to Azure Portal --> Data Lake Analytics Account --> Add User Wizard option to add users. Ensure least necessary privileges are granted.",
      "Tags": [
        "SDL",
        "TCP",
        "Manual",
        "AuthZ",
        "DataLakeAnalytics"
      ],
      "Enabled": true
    },
    {
      "ControlID": "Azure_DataLakeAnalytics_Config_Storage_Datasource_Securely",
      "Description": "Storage account data source must be added securely",
      "Id": "DataLakeAnalytics150",
      "ControlSeverity": "Medium",
      "Automated": "No",
      "MethodName": "",
      "Rationale": "Setting up storage data source using PowerShell command eliminates the risk of exposing storage account key (because the key itself is not displayed on the GUI at all).",
      "Recommendation": "Setup storage data source using PowerShell command 'Add-AzDataLakeAnalyticsDataSource -ResourceGroupName <ResourceGroup> -Account <ADLAAccount> -Blob <StorageAccount> -AccessKey ((Get-AzStorageAccountKey -ResourceGroupName <ResourceGroupOfStorageAccount> -Name <StorageAccountName>).Key1",
      "Tags": [
        "SDL",
        "Best Practice",
        "Manual",
        "Config",
        "DataLakeAnalytics"
      ],
      "Enabled": true
    },
    {
      "ControlID": "Azure_DataLakeAnalytics_DP_Encrypt_Connection_Strings",
      "Description": "Secrets to access SQL Azure/SQL VM/SQL Data Warehouse must be securely stored under Catalog database credentials",
      "Id": "DataLakeAnalytics160",
      "ControlSeverity": "High",
      "Automated": "No",
      "MethodName": "",
      "Rationale": "Credentials added in form of plain text in U-SQL job have higher chances of getting compromised. The job code is also likely check in into a code repository so the credential is visible to many parties.",
      "Recommendation": "Password of the account used to access SQL Azure must be created as a new catalog credential using the 'New-AzDataLakeAnalyticsCatalogCredential' PS command. Refer: https://docs.microsoft.com/en-us/powershell/module/az.datalakeanalytics/new-azdatalakeanalyticscatalogcredential, https://docs.microsoft.com/en-us/rest/api/datalakeanalytics/catalog/createcredential and https://docs.microsoft.com/en-us/azure/sql-data-warehouse/sql-data-warehouse-load-from-azure-data-lake-store#configure-the-data-source for details.",
      "Tags": [
        "SDL",
        "Best Practice",
        "Manual",
        "DP",
        "DataLakeAnalytics"
      ],
      "Enabled": true
    },
    {
      "ControlID": "Azure_DataLakeAnalytics_SI_USQL_Script_Integrity",
      "Description": "U-SQL script file(s) must be uploaded from a secured/trusted location",
      "Id": "DataLakeAnalytics170",
      "ControlSeverity": "High",
      "Automated": "No",
      "MethodName": "",
      "Rationale": "ADLA U-SQL File Import wizard does not impose any restrictions on file format nor does it scan uploaded files for security issues. If these files are at an insecure/untrustworthy location, they can be tampered/infested. This can lead to compromise of the ADLA jobs and data.",
      "Recommendation": "Make sure that the client machine used to upload scripts is secure (Antimalware, patching, etc.). Also, do not upload files that have originated from potentially untrusted sites.",
      "Tags": [
        "SDL",
        "TCP",
        "Manual",
        "SI",
        "DataLakeAnalytics"
      ],
      "Enabled": true
    },
    {
      "ControlID": "Azure_DataLakeAnalytics_Audit_Enable_Diagnostics_Log",
      "Description": "Diagnostics logs must be enabled with a retention period of at least $($this.ControlSettings.Diagnostics_RetentionPeriod_Min) days.",
      "Id": "DataLakeAnalytics180",
      "ControlSeverity": "Medium",
      "Automated": "Yes",
      "MethodName": "CheckDiagnosticsSettings",
      "Rationale": "Logs should be retained for a long enough period so that activity trail can be recreated when investigations are required in the event of an incident or a compromise. A period of 1 year is typical for several compliance requirements as well.",
      "Recommendation": "You can change the diagnostic settings from the Azure Portal by following the steps given here: https://docs.microsoft.com/en-us/azure/monitoring-and-diagnostics/monitoring-archive-diagnostic-logs#archive-diagnostic-logs-using-the-portal",
      "Tags": [
        "SDL",
        "TCP",
        "Automated",
        "Audit",
        "Diagnostics",
        "DataLakeAnalytics"
      ],
      "Enabled": true
    },
    {
      "ControlID": "Azure_DataLakeAnalytics_DP_Encrypt_At_Rest",
      "Description": "Sensitive data must be encrypted at rest",
      "Id": "DataLakeAnalytics190",
      "ControlSeverity": "High",
      "Automated": "Yes",
      "MethodName": "CheckEncryptionAtRest",
      "Rationale": "Using this feature ensures that sensitive data is stored encrypted at rest. This minimizes the risk of data loss from physical theft and also helps meet regulatory compliance requirements.",
      "Recommendation": "Default Data Lake Store Account must have encryption enabled. Refer: https://docs.microsoft.com/en-us/azure/data-lake-store/data-lake-store-security-overview#data-protection",
      "Tags": [
        "SDL",
        "TCP",
        "Automated",
        "DP",
        "DataLakeAnalytics"
      ],
      "Enabled": true
    },
    {
      "ControlID": "Azure_DataLakeAnalytics_DP_Encrypt_In_Transit",
      "Description": "Sensitive data must be encrypted in transit",
      "Id": "DataLakeAnalytics200",
      "ControlSeverity": "High",
      "Automated": "No",
      "MethodName": "",
      "Rationale": "Use of HTTPS ensures server/service authentication and protects data in transit from network layer man-in-the-middle, eavesdropping, session-hijacking attacks.",
      "Recommendation": "No action required. ADLA provides encryption in transit using HTTPS transport layer security.",
      "Tags": [
        "SDL",
        "Information",
        "Manual",
        "DP",
        "DataLakeAnalytics"
      ],
      "Enabled": true
    },
    {
      "ControlID": "Azure_DataLakeAnalytics_BCDR_Plan_Default_Data_Lake_Store",
      "Description": "Backup and Disaster Recovery must be planned for the default Data Lake Store account",
      "Id": "DataLakeAnalytics210",
      "ControlSeverity": "Medium",
      "Automated": "No",
      "MethodName": "",
      "Rationale": "Data Lake Analytics does not offer features to cover backup/disaster recovery out-of-the-box. As a result, when processing critical workloads, a team must have adequate backups of the data and the jobs (catalog, code, etc.).",
      "Recommendation": "Ensure that any critical business/catalog data in the default Data Lake Store has been backed up from a BC-DR standpoint.",
      "Tags": [
        "SDL",
        "TCP",
        "Manual",
        "BCDR",
        "DataLakeAnalytics"
      ],
      "Enabled": true
    },
    {
      "ControlID": "Azure_DataLakeAnalytics_Audit_Review_Logs",
      "Description": "Diagnostic and activity logs for Data Lake Analytics should be reviewed periodically",
      "Id": "DataLakeAnalytics220",
      "ControlSeverity": "Medium",
      "Automated": "No",
      "MethodName": "",
      "Rationale": "Periodic reviews of diagnostics, activity and audit logs ensures that anomalous activity can be identified early enough instead of after a major compromise.",
      "Recommendation": "Review diagnostic/activity logs to check activities on the resource. Refer: https://docs.microsoft.com/en-us/azure/monitoring-and-diagnostics/monitoring-overview-of-diagnostic-logs and https://docs.microsoft.com/en-us/azure/monitoring-and-diagnostics/monitoring-overview-activity-logs",
      "Tags": [
        "SDL",
        "Best Practice",
        "Manual",
        "Audit",
        "DataLakeAnalytics"
      ],
      "Enabled": true
    }
  ]
}