Framework/Configurations/SVT/Services/DataLakeStore.json

{
  "FeatureName": "DataLakeStore",
  "Reference": "aka.ms/azsdktcp/datalakestore",
  "IsManintenanceMode": false,
  "Controls": [
    {
      "ControlID": "Azure_DataLakeStore_AuthN_AAD_For_Client_AuthN",
      "Description": "All users/applications are authenticated using Azure Active Directory (AAD) based credentials",
      "Id": "DataLakeStore110",
      "ControlSeverity": "High",
      "Automated": "No",
      "MethodName": "",
      "Rationale": "Using the native enterprise directory for authentication ensures that there is a built-in high level of assurance in the user identity established for subsequent access control. All Enterprise subscriptions are automatically associated with their enterprise directory (xxx.onmicrosoft.com) and users in the native directory are trusted for authentication to enterprise subscriptions.",
      "Recommendation": "No action required. ADLS supports only AAD authentication.",
      "Tags": [
        "SDL",
        "Information",
        "Manual",
        "AuthN"
      ],
      "Enabled": true
    },
    {
      "ControlID": "Azure_DataLakeStore_AuthZ_Grant_Min_RBAC_Access",
      "Description": "All users/identities must be granted minimum required permissions using Role Based Access Control (RBAC)",
      "Id": "DataLakeStore120",
      "ControlSeverity": "Medium",
      "Automated": "Yes",
      "MethodName": "CheckRBACAccess",
      "Rationale": "Granting minimum access by leveraging RBAC feature ensures that users are granted just enough permissions to perform their tasks. This minimizes exposure of the resources in case of user/service account compromise.",
      "Recommendation": "Assign 'Owner' role to Data Lake Store creator at resource group scope. Refer: https://docs.microsoft.com/en-us/azure/active-directory/role-based-access-control-configure",
      "Tags": [
        "SDL",
        "TCP",
        "Automated",
        "AuthZ",
        "RBAC"
      ],
      "Enabled": true
    },
    {
      "ControlID": "Azure_DataLakeStore_AuthZ_Assign_ACLs_On_FileSystem",
      "Description": "Access to Data Lake Store file system must be limited by using appropriate Access Control List (ACL). The 'Other' group must not have any access",
      "Id": "DataLakeStore130",
      "ControlSeverity": "High",
      "Automated": "Yes",
      "MethodName": "CheckACLAccess",
      "Rationale": "Using appropriate ACLs ensures that data in ADLS is protected and accessible only to the entities with a legitimate need to access it.",
      "Recommendation": "Use PS command 'Set-AzureRmDataLakeStoreItemAcl [-Account] <String> [-Path] <DataLakeStorePathInstance> [-Acl] <DataLakeStoreItemAcl>'. Refer: https://docs.microsoft.com/en-us/azure/data-lake-store/data-lake-store-secure-data#a-namefilepermissionsaassign-users-or-security-group-as-acls-to-the-azure-data-lake-store-file-system",
      "Tags": [
        "SDL",
        "TCP",
        "Automated",
        "AuthZ"
      ],
      "Enabled": true
    },
    {
      "ControlID": "Azure_DataLakeStore_AuthZ_Enable_Firewall",
      "Description": "Firewall should be enabled on Data Lake Store",
      "Id": "DataLakeStore140",
      "ControlSeverity": "Medium",
      "Automated": "Yes",
      "MethodName": "CheckFirewall",
      "Rationale": "Using the firewall feature ensures that access to the data or the service is restricted to a specific set/group of clients. While this may not be feasible in all scenarios, when it can be used, it provides an extra layer of access control protection for critical assets.",
      "Recommendation": "Enable firewall and add rules for specific IP/IP ranges. Do not add the IP range $($this.ControlSettings.UniversalIPRange)) as it means open access for all IPs. Refer: https://docs.microsoft.com/en-us/powershell/module/azurerm.datalakestore/add-azurermdatalakestorefirewallrule",
      "Tags": [
        "SDL",
        "TCP",
        "Automated",
        "AuthZ"
      ],
      "Enabled": true
    },
    {
      "ControlID": "Azure_DataLakeStore_DP_Use_AdlCopy_Securely",
      "Description": "AdlCopy tool must be used securly while copying data from storage blobs to Data Lake Store",
      "Id": "DataLakeStore150",
      "ControlSeverity": "High",
      "Automated": "No",
      "MethodName": "",
      "Rationale": "Use of HTTPS ensures server/service authentication and protects data in transit from network layer man-in-the-middle, eavesdropping, session-hijacking attacks.�Incautious use of storage key in�AdlCopy command may result into exposure of the key to unauthorized users.",
      "Recommendation": "Use HTTPS URL for blob storage endpoint.",
      "Tags": [
        "SDL",
        "TCP",
        "Manual",
        "DP"
      ],
      "Enabled": true
    },
    {
      "ControlID": "Azure_DataLakeStore_AuthZ_Use_SP_For_ADLS_Access",
      "Description": "Clients such as web jobs, standalone apps should use a service principal identity to access Data Lake Store",
      "Id": "DataLakeStore170",
      "ControlSeverity": "High",
      "Automated": "No",
      "MethodName": "",
      "Rationale": "Using a 'user' account should be avoided because, in general, a user account will likely have broader set of privileges to enterprise assets. Using a dedicated SPN ensures that the SPN does not have permissions beyond the ones specifically granted for the given scenario.",
      "Recommendation": "Create a service principal and use ACLs on ADLS to grant it the least required access. Refer: https://docs.microsoft.com/en-us/azure/data-lake-store/data-lake-store-secure-data#a-namefilepermissionsaassign-users-or-security-group-as-acls-to-the-azure-data-lake-store-file-system",
      "Tags": [
        "SDL",
        "Best Practice",
        "Manual",
        "AuthZ"
      ],
      "Enabled": true
    },
    {
      "ControlID": "Azure_DataLakeStore_DP_Encrypt_At_Rest",
      "Description": "Sensitive data must be encrypted at rest",
      "Id": "DataLakeStore180",
      "ControlSeverity": "High",
      "Automated": "Yes",
      "MethodName": "CheckEncryptionAtRest",
      "Rationale": "Using this feature ensures that sensitive data is stored encrypted at rest. This minimizes the risk of data loss from physical theft and also helps meet regulatory compliance requirements.",
      "Recommendation": "Enable encryption while creating Data Lake Store. Refer: https://docs.microsoft.com/en-us/azure/data-lake-store/data-lake-store-security-overview#data-protection",
      "Tags": [
        "SDL",
        "TCP",
        "Automated",
        "DP"
      ],
      "Enabled": true
    },
    {
      "ControlID": "Azure_DataLakeStore_DP_Encrypt_In_Transit",
      "Description": "Sensitive data must be encrypted in transit",
      "Id": "DataLakeStore190",
      "ControlSeverity": "High",
      "Automated": "No",
      "MethodName": "",
      "Rationale": "Use of HTTPS ensures server/service authentication and protects data in transit from network layer man-in-the-middle, eavesdropping, session-hijacking attacks.",
      "Recommendation": "No action required. ADLS provides encryption in transit using HTTPS transport layer security.",
      "Tags": [
        "SDL",
        "Information",
        "Manual",
        "DP"
      ],
      "Enabled": true
    },
    {
      "ControlID": "Azure_DataLakeStore_Audit_Enable_Diagnostics_Log",
      "Description": "Diagnostics logs must be enabled with a retention period of at least $($this.ControlSettings.Diagnostics_RetentionPeriod_Min) days.",
      "Id": "DataLakeStore200",
      "ControlSeverity": "Medium",
      "Automated": "Yes",
      "MethodName": "CheckDiagnosticsSettings",
      "Rationale": "Logs should be retained for a long enough period so that activity trail can be recreated when investigations are required in the event of an incident or a compromise. A period of 1 year is typical for several compliance requirements as well.",
      "Recommendation": "Run command: Set-AzureRmDiagnosticSetting -ResourceId {'ResourceId'} -Enable `$true -StorageAccountId '{StorageAccountId}' -RetentionInDays $($this.ControlSettings.Diagnostics_RetentionPeriod_Min) -RetentionEnabled `$true",
      "Tags": [
        "SDL",
        "TCP",
        "Automated",
        "Audit",
        "Diagnostics"
      ],
      "Enabled": true
    },
    {
      "ControlID": "Azure_DataLakeStore_Audit_Review_Logs",
      "Description": "Diagnostic logs for Data Lake Store should be reviewed periodically",
      "Id": "DataLakeStore210",
      "ControlSeverity": "Medium",
      "Automated": "No",
      "MethodName": "",
      "Rationale": "Periodic reviews of diagnostics, activity and audit logs ensures that anomalous activity can be identified early enough instead of after a major compromise.",
      "Recommendation": "Review diagnostic/activity logs to check activities on the resource. Refer: https://docs.microsoft.com/en-us/azure/data-lake-store/data-lake-store-diagnostic-logs and https://docs.microsoft.com/en-us/azure/monitoring-and-diagnostics/monitoring-overview-activity-logs",
      "Tags": [
        "SDL",
        "Best Practice",
        "Manual",
        "Audit"
      ],
      "Enabled": true
    },
    {
      "ControlID": "Azure_DataLakeStore_BCDR_Plan",
      "Description": "Backup and Disaster Recovery must be planned for Data Lake Store",
      "Id": "DataLakeStore220",
      "ControlSeverity": "Medium",
      "Automated": "No",
      "MethodName": "",
      "Rationale": "Data Lake Analytics does not offer features to cover backup/disaster recovery out-of-the-box. As a result, when processing critical workloads, a team must have adequate backups of the data.",
      "Recommendation": "Ensure the critical business data in the Data Lake Store has been backed up from a BC-DR standpoint.",
      "Tags": [
        "SDL",
        "TCP",
        "Manual",
        "BCDR"
      ],
      "Enabled": true
    },
    {
      "ControlID": "Azure_DataLakeStore_Config_Cleanup_Data",
      "Description": "Data in Data Lake Store should be cleaned up using file retention",
      "Id": "DataLakeStore230",
      "ControlSeverity": "Medium",
      "Automated": "No",
      "MethodName": "",
      "Rationale": "Data should not be retained for periods longer than required for business use case scenarios. Purging/cleaning up data periodically minimizes risk from compromise while also helping limit the costs of maintaining it.",
      "Recommendation": "Set expiry date by navigating to file in ADLS data explorer and the 'Set Expiry' property or use PS Command 'Set-AzureRmDataLakeStoreItemExpiry [-Account] <String> [-Path] <DataLakeStorePathInstance> [[-Expiration] <DateTimeOffset>]'",
      "Tags": [
          "SDL",
        "Best Practice",
        "Manual",
        "Config"
      ],
      "Enabled": true
    }
  ]
}