Framework/Configurations/SVT/Services/DataLakeAnalytics.json

{
  "FeatureName": "DataLakeAnalytics",
  "Reference": "aka.ms/azsdktcp/datalakeanalytics",
  "IsManintenanceMode": false,
  "Controls": [
    {
      "ControlID": "Azure_DataLakeAnalytics_AuthZ_Assign_Required_RBAC_To_Creator",
      "Description": "Data Lake Analytics creator must be granted only required Role Based Access Control (RBAC) access on Subscription/Resource Group/Resource",
      "Id": "DataLakeAnalytics110",
      "ControlSeverity": "Medium",
      "Automated": "No",
      "MethodName": "",
      "Rationale": "Granting minimum access by leveraging RBAC feature ensures that users are granted just enough permissions to perform their tasks. This minimizes exposure of the resources in case of user/service account compromise.",
      "Recommendation": "Assign 'Owner' privilege only at resource group and default data lake store account scope.",
      "Tags": [
        "SDL",
        "TCP",
        "Manual",
        "AuthZ"
      ],
      "Enabled": true
    },
    {
      "ControlID": "Azure_DataLakeAnalytics_AuthN_AAD_For_Client_AuthN",
      "Description": "All Data Lake Analytics users/service principal must be authenticated using AAD backed credentials",
      "Id": "DataLakeAnalytics120",
      "ControlSeverity": "High",
      "Automated": "No",
      "MethodName": "",
      "Rationale": "Using the native enterprise directory for authentication ensures that there is a built-in high level of assurance in the user identity established for subsequent access control. All Enterprise subscriptions are automatically associated with their enterprise directory (xxx.onmicrosoft.com) and users in the native directory are trusted for authentication to enterprise subscriptions.",
      "Recommendation": "No action required. ADLA supports only AAD authentication.",
      "Tags": [
        "SDL",
        "Information",
        "Manual",
        "AuthN"
      ],
      "Enabled": true
    },
    {
      "ControlID": "Azure_DataLakeAnalytics_AuthZ_Grant_Min_RBAC_Access",
      "Description": "All users/identities must be granted minimum required permissions using Role Based Access Control (RBAC)",
      "Id": "DataLakeAnalytics130",
      "ControlSeverity": "Medium",
      "Automated": "Yes",
      "MethodName": "CheckRBACAccess",
      "Rationale": "Granting minimum access by leveraging RBAC feature ensures that users are granted just enough permissions to perform their tasks. This minimizes exposure of the resources in case of user/service account compromise.",
      "Recommendation": "Assign only the 'Data Lake Analytics Developer' RBAC role to developers who manage U-SQL jobs. Refer: https://docs.microsoft.com/en-us/azure/data-lake-analytics/data-lake-analytics-manage-use-portal#manage-users",
      "Tags": [
        "SDL",
        "TCP",
        "Automated",
        "AuthZ",
        "RBAC"
      ],
      "Enabled": true
    },
    {
      "ControlID": "Azure_DataLakeAnalytics_AuthZ_Assign_Least_Privilege_ACL",
      "Description": "Data Lake Analytics developer (user/service principal) must have least required ACLs on Catalog/Database and Data Lake Store file system",
      "Id": "DataLakeAnalytics140",
      "ControlSeverity": "Medium",
      "Automated": "No",
      "MethodName": "",
      "Rationale": "Granting minimum permissions to users/security groups on data lake store file system by leveraging Access Control List (ACL) feature ensures that U-SQL jobs executed by users/security groups would perform only intended read/write operations. This minimizes exposure of the data in case of user credentials leak or faulty job programming.",
      "Recommendation": "Navigate to Azure Portal --> Data Lake Analytics Account --> Add User Wizard option to add users. Ensure least necessary privileges are granted.",
      "Tags": [
        "SDL",
        "TCP",
        "Manual",
        "AuthZ"
      ],
      "Enabled": true
    },
    {
      "ControlID": "Azure_DataLakeAnalytics_Config_Storage_Datasource_Securely",
      "Description": "Storage account data source must be added securely",
      "Id": "DataLakeAnalytics150",
      "ControlSeverity": "Medium",
      "Automated": "No",
      "MethodName": "",
      "Rationale": "Setting up storage data source using PowerShell command eliminates the risk of exposing storage account key (because the key itself is not displayed on the GUI at all).",
      "Recommendation": "Setup storage data source using PowerShell command 'Add-AzureRmDataLakeAnalyticsDataSource -ResourceGroupName <ResourceGroup> -Account <ADLAAccount> -AzureBlob <StorageAccount> -AccessKey ((Get-AzureRmStorageAccountKey -ResourceGroupName <ResourceGroupOfStorageAccount> -Name <StorageAccountName>).Key1",
      "Tags": [
        "SDL",
        "Best Practice",
        "Manual",
        "Config"
      ],
      "Enabled": true
    },
    {
      "ControlID": "Azure_DataLakeAnalytics_DP_Encrypt_Connection_Strings",
      "Description": "Secrets to access SQL Azure/SQL VM/SQL Data Warehouse must be securely stored under Catalog database credentials",
      "Id": "DataLakeAnalytics160",
      "ControlSeverity": "High",
      "Automated": "No",
      "MethodName": "",
      "Rationale": "Credentials added in form of plain text in U-SQL job have higher chances of getting compromised. The job code is also likely check in into a code repository so the credential is visible to many parties.",
      "Recommendation": "Password of the account used to access SQL Azure must be created as a new catalog secret using the 'New-AzureRmDataLakeAnalyticsCatalogSecret' PS command. Refer: https://docs.microsoft.com/en-us/powershell/module/azurerm.datalakeanalytics/new-azurermdatalakeanalyticscatalogsecret. Then create credential object for this catalog secret using the U-SQL command 'CREATE CREDENTIAL', Refer: https://docs.microsoft.com/en-us/azure/sql-data-warehouse/sql-data-warehouse-load-from-azure-data-lake-store#configure-the-data-source",
      "Tags": [
        "SDL",
        "Best Practice",
        "Manual",
        "DP"
      ],
      "Enabled": true
    },
    {
      "ControlID": "Azure_DataLakeAnalytics_SI_USQL_Script_Integrity",
      "Description": "U-SQL script file(s) must be uploaded from a secured/trusted location",
      "Id": "DataLakeAnalytics170",
      "ControlSeverity": "High",
      "Automated": "No",
      "MethodName": "",
      "Rationale": "ADLA U-SQL File Import wizard does not impose any restrictions on file format nor does it scan uploaded files for security issues. If these files are at an insecure/untrustworthy location, they can be tampered/infested. This can lead to compromise of the ADLA jobs and data.",
      "Recommendation": "Make sure that the client machine used to upload scripts is secure (Antimalware, patching, etc.). Also, do not upload files that have originated from potentially untrusted sites.",
      "Tags": [
        "SDL",
        "TCP",
        "Manual",
        "SI"
      ],
      "Enabled": true
    },
    {
      "ControlID": "Azure_DataLakeAnalytics_Audit_Enable_Diagnostics_Log",
      "Description": "Diagnostics logs must be enabled with a retention period of at least $($this.ControlSettings.Diagnostics_RetentionPeriod_Min) days.",
      "Id": "DataLakeAnalytics180",
      "ControlSeverity": "Medium",
      "Automated": "Yes",
      "MethodName": "CheckDiagnosticsSettings",
      "Rationale": "Logs should be retained for a long enough period so that activity trail can be recreated when investigations are required in the event of an incident or a compromise. A period of 1 year is typical for several compliance requirements as well.",
      "Recommendation": "Run command: Set-AzureRmDiagnosticSetting -ResourceId {'ResourceId'} -Enable `$true -StorageAccountId '{StorageAccountId}' -RetentionInDays $($this.ControlSettings.Diagnostics_RetentionPeriod_Min) -RetentionEnabled `$true",
      "Tags": [
        "SDL",
        "TCP",
        "Automated",
        "Audit",
        "Diagnostics"
      ],
      "Enabled": true
    },
    {
      "ControlID": "Azure_DataLakeAnalytics_DP_Encrypt_At_Rest",
      "Description": "Sensitive data must be encrypted at rest",
      "Id": "DataLakeAnalytics190",
      "ControlSeverity": "High",
      "Automated": "Yes",
      "MethodName": "CheckEncryptionAtRest",
      "Rationale": "Using this feature ensures that sensitive data is stored encrypted at rest. This minimizes the risk of data loss from physical theft and also helps meet regulatory compliance requirements.",
      "Recommendation": "Default Data Lake Store Account must have encryption enabled. Refer: https://docs.microsoft.com/en-us/azure/data-lake-store/data-lake-store-security-overview#data-protection",
      "Tags": [
        "SDL",
        "TCP",
        "Automated",
        "DP"
      ],
      "Enabled": true
    },
    {
      "ControlID": "Azure_DataLakeAnalytics_DP_Encrypt_In_Transit",
      "Description": "Sensitive data must be encrypted in transit",
      "Id": "DataLakeAnalytics200",
      "ControlSeverity": "High",
      "Automated": "No",
      "MethodName": "",
      "Rationale": "Use of HTTPS ensures server/service authentication and protects data in transit from network layer man-in-the-middle, eavesdropping, session-hijacking attacks.",
      "Recommendation": "No action required. ADLA provides encryption in transit using HTTPS transport layer security.",
      "Tags": [
        "SDL",
        "Information",
        "Manual",
        "DP"
      ],
      "Enabled": true
    },
    {
      "ControlID": "Azure_DataLakeAnalytics_BCDR_Plan_Default_Data_Lake_Store",
      "Description": "Backup and Disaster Recovery must be planned for the default Data Lake Store account",
      "Id": "DataLakeAnalytics210",
      "ControlSeverity": "Medium",
      "Automated": "No",
      "MethodName": "",
      "Rationale": "Data Lake Analytics does not offer features to cover backup/disaster recovery out-of-the-box. As a result, when processing critical workloads, a team must have adequate backups of the data and the jobs (catalog, code, etc.).",
      "Recommendation": "Ensure that any critical business/catalog data in the default Data Lake Store has been backed up from a BC-DR standpoint.",
      "Tags": [
        "SDL",
        "TCP",
        "Manual",
        "BCDR"
      ],
      "Enabled": true
    },
    {
      "ControlID": "Azure_DataLakeAnalytics_Audit_Review_Logs",
      "Description": "Diagnostic and activity logs for Data Lake Analytics should be reviewed periodically",
      "Id": "DataLakeAnalytics220",
      "ControlSeverity": "Medium",
      "Automated": "No",
      "MethodName": "",
      "Rationale": "Periodic reviews of diagnostics, activity and audit logs ensures that anomalous activity can be identified early enough instead of after a major compromise.",
      "Recommendation": "Review diagnostic/activity logs to check activities on the resource. Refer: https://docs.microsoft.com/en-us/azure/monitoring-and-diagnostics/monitoring-overview-of-diagnostic-logs and https://docs.microsoft.com/en-us/azure/monitoring-and-diagnostics/monitoring-overview-activity-logs",
      "Tags": [
        "SDL",
        "Best Practice",
        "Manual",
        "Audit"
      ],
      "Enabled": true
    }
  ]
}