functions/Invoke-DbaDbPiiScan.ps1

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
function Invoke-DbaDbPiiScan {
    <#
    .SYNOPSIS
        Command to return any columns that could potentially contain PII (Personal Identifiable Information)
 
    .DESCRIPTION
        This command will go through the tables in your database and asses each column.
        It will first check the columns names if it was named in such a way that it would indicate PII.
        The next thing that it will do is pattern recognition by looking into the data from the table.
 
    .PARAMETER SqlInstance
        The target SQL Server instance or instances.
 
    .PARAMETER SqlCredential
        Login to the target instance using alternative credentials. Windows and SQL Authentication supported. Accepts credential objects (Get-Credential)
 
    .PARAMETER Database
        Databases to process through
 
    .PARAMETER Table
        Table(s) to process. By default all the tables will be processed
 
    .PARAMETER Column
        Column(s) to process. By default all the columns will be processed
 
    .PARAMETER Country
        Filter out the patterns and known types for one or more countries
 
    .PARAMETER CountryCode
        Filter out the patterns and known types for one or more country code
 
    .PARAMETER SampleCount
        Amount of rows to sample to make an assessment. The default is 100
 
    .PARAMETER KnownNamesFile
        Points to a file containing the custom known names
 
    .PARAMETER PatternsFile
        Points to a file containing the custom patterns
 
    .PARAMETER ExcludeDefaultKnownNames
        Excludes the default known names
 
    .PARAMETER ExcludeDefaultPatterns
        Excludes the default patterns
 
    .PARAMETER ExcludeTable
        Exclude certain tables
 
    .PARAMETER ExcludeColumn
        Exclude certain columns
 
    .PARAMETER Force
        Forcefully execute commands when needed
 
    .PARAMETER WhatIf
        If this switch is enabled, no actions are performed but informational messages will be displayed that explain what would happen if the command were to run.
 
    .PARAMETER Confirm
        If this switch is enabled, you will be prompted for confirmation before executing any operations that change state.
 
    .PARAMETER EnableException
        By default, when something goes wrong we try to catch it, interpret it and give you a friendly warning message.
        This avoids overwhelming you with "sea of red" exceptions, but is inconvenient because it basically disables advanced scripting.
        Using this switch turns this "nice by default" feature off and enables you to catch exceptions with your own try/catch.
 
    .NOTES
        Tags: Data Masking, Database, Personal Information, GDPR
        Author: Sander Stad (@sqlstad, sqlstad.nl)
 
        Website: https://dbatools.io
        Copyright: (c) 2018 by dbatools, licensed under MIT
        License: MIT https://opensource.org/licenses/MIT
 
    .LINK
        https://dbatools.io/Invoke-DbaDbPiiScan
 
    .EXAMPLE
        Invoke-DbaDbPiiScan -SqlInstance sql1 -Database db1
 
        Scan the database db1 on instance sql1
 
    .EXAMPLE
        Invoke-DbaDbPiiScan -SqlInstance sql1, sql2 -Database db1, db2
 
        Scan multiple databases on multiple instances
 
    .EXAMPLE
        Invoke-DbaDbPiiScan -SqlInstance sql1 -Database db2 -ExcludeColumn firstname
 
        Scan database db2 but exclude the column firstname
 
    .EXAMPLE
        Invoke-DbaDbPiiScan -SqlInstance sql1 -Database db2 -CountryCode US
 
        Scan database db2 but only apply data patterns used for the United States
 
    .EXAMPLE
        Invoke-DbaDbPiiScan -SqlInstance sql1 -Database db1 -PatternsFile c:\pii\patterns.json
 
        Scans db1 on instance sql1 with additional custom patterns
 
    .EXAMPLE
        Invoke-DbaDbPiiScan -SqlInstance sql1 -Database db1 -PatternsFile c:\pii\patterns.json -ExcludeDefaultPatterns
 
        Scans db1 on instance sql1 with additional custom patterns, excluding the default patterns
    #>

    [CmdLetBinding()]
    param (
        [DbaInstanceParameter[]]$SqlInstance,
        [PSCredential]$SqlCredential,
        [string[]]$Database,
        [string[]]$Table,
        [string[]]$Column,
        [string[]]$Country,
        [string[]]$CountryCode,
        [string[]]$ExcludeTable,
        [string[]]$ExcludeColumn,
        [int]$SampleCount = 100,
        [string]$KnownNamesFile,
        [string]$PatternsFile,
        [switch]$ExcludeDefaultKnownNames,
        [switch]$ExcludeDefaultPatterns,
        [switch]$EnableException
    )

    begin {
        # Initialize the arrays
        $knownNames = @()
        $patterns = @()

        # Get the known names
        if (-not $ExcludeDefaultKnownNames) {
            try {
                $knownNameFilePath = Resolve-Path -Path "$script:PSModuleRoot\bin\datamasking\pii-knownnames.json"
                $knownNames += Get-Content -Path $knownNameFilePath -ErrorAction Stop | ConvertFrom-Json -ErrorAction Stop
            } catch {
                Stop-Function -Message "Couldn't parse known names file" -ErrorRecord $_
                return
            }
        }

        # Get the patterns
        if (-not $ExcludeDefaultPatterns) {
            try {
                $patternFilePath = Resolve-Path -Path "$script:PSModuleRoot\bin\datamasking\pii-patterns.json"
                $patterns = Get-Content -Path $patternFilePath -ErrorAction Stop | ConvertFrom-Json -ErrorAction Stop
            } catch {
                Stop-Function -Message "Couldn't parse pattern file" -ErrorRecord $_
                return
            }
        }

        # Get custom known names and patterns
        if ($KnownNamesFile) {
            if (Test-Path -Path $KnownNamesFile) {
                try {
                    $knownNames += Get-Content -Path $KnownNamesFile -ErrorAction Stop | ConvertFrom-Json -ErrorAction Stop
                } catch {
                    Stop-Function -Message "Couldn't parse known types file" -ErrorRecord $_ -Target $KnownNamesFile
                    return
                }
            } else {
                Stop-Function -Message "Couldn't not find known names file" -Target $KnownNamesFile
            }
        }

        if ($PatternsFile) {
            if (Test-Path -Path $PatternsFile) {
                try {
                    $patterns += Get-Content -Path $PatternsFile -ErrorAction Stop | ConvertFrom-Json -ErrorAction Stop
                } catch {
                    Stop-Function -Message "Couldn't parse patterns file" -ErrorRecord $_ -Target $PatternsFile
                    return
                }
            } else {
                Stop-Function -Message "Couldn't not find patterns file" -Target $PatternsFile
            }
        }

        # Check parameters
        if (-not $SqlInstance) {
            Stop-Function -Message "Please enter a SQL Server instance" -Category InvalidArgument
        }

        if (-not $Database) {
            Stop-Function -Message "Please enter a database" -Category InvalidArgument
        }

        # Filter the patterns
        if ($Country.Count -ge 1) {
            $patterns = $patterns | Where-Object Country -in $Country
        }

        if ($CountryCode.Count -ge 1) {
            $patterns = $patterns | Where-Object CountryCode -in $CountryCode
        }
    }

    process {
        if (Test-FunctionInterrupt) {
            return
        }

        $results = @()

        # Loop through the instances
        foreach ($instance in $SqlInstance) {

            # Try to connect to the server
            try {
                $server = Connect-SqlInstance -SqlInstance $instance -SqlCredential $sqlcredential -MinimumVersion 9
            } catch {
                Stop-Function -Message "Error occurred while establishing connection to $instance" -Category ConnectionError -ErrorRecord $_ -Target $instance -Continue
            }

            $progressActivity = "Scanning databases for PII"
            $progressId = 1

            # Loop through the databases
            foreach ($dbName in $Database) {

                $progressTask = "Scanning Database $dbName"
                Write-Progress -Id $progressId -Activity $progressActivity -Status $progressTask

                # Get the database object
                $db = $server.Databases[$($dbName)]

                # Filter the tables if needed
                if ($Table) {
                    $tables = $db.Tables | Where-Object Name -in $Table
                } else {
                    $tables = $db.Tables
                }

                if ($ExcludeTable) {
                    $tables = $tables | Where-Object Name -notin $ExcludeTable
                }

                # Filter the tables based on the column
                if ($Column) {
                    $tables = $tables | Where-Object { $_.Columns.Name -in $Column }
                }

                $tableNumber = 1
                $progressStatusText = '"Table $($tableNumber.ToString().PadLeft($($tables.Count).Count.ToString().Length)) of $($tables.Count) | Scanning tables for database $dbName"'
                $progressStatusBlock = [ScriptBlock]::Create($progressStatusText)


                # Loop through the tables
                foreach ($tableobject in $tables) {
                    Write-Message -Level Verbose -Message "Scanning table [$($tableobject.Schema)].[$($tableobject.Name)]"

                    $progressTask = "Scanning columns and data"
                    Write-Progress -Id $progressId -Activity $progressActivity -Status (& $progressStatusBlock) -CurrentOperation $progressTask -PercentComplete ($tableNumber / $($tables.Count) * 100)

                    # Get the columns
                    if ($Column) {
                        $columns = $tableobject.Columns | Where-Object Name -in $Column
                    } else {
                        $columns = $tableobject.Columns
                    }

                    if ($ExcludeColumn) {
                        $columns = $columns | Where-Object Name -notin $ExcludeColumn
                    }

                    # Loop through the columns
                    foreach ($columnobject in $columns) {
                        if ($columnobject.DataType.Name -eq "geography") {
                            if ($null -eq ($results | Where-Object { $_.Database -eq $dbName -and $_.Schema -eq $tableobject.Schema -and $_.Table -eq $tableobject.Name -and $_.Column -eq $columnobject.Name })) {
                                # Add the results
                                $results += [pscustomobject]@{
                                    ComputerName   = $db.Parent.ComputerName
                                    InstanceName   = $db.Parent.ServiceName
                                    SqlInstance    = $db.Parent.DomainInstanceName
                                    Database       = $dbName
                                    Schema         = $tableobject.Schema
                                    Table          = $tableobject.Name
                                    Column         = $columnobject.Name
                                    "PII-Category" = "Location"
                                    "PII-Name"     = "Geography"
                                    FoundWith      = "DataType"
                                }
                            }
                        } else {
                            if ($knownNames.Count -ge 1) {
                                # Check if the results not already contain a similar object
                                if ($null -eq ($results | Where-Object { $_.Database -eq $dbName -and $_.Schema -eq $tableobject.Schema -and $_.Table -eq $tableobject.Name -and $_.Column -eq $columnobject.Name })) {
                                    # Go through the first check to see if any column is found with a known type
                                    foreach ($knownName in $knownNames) {
                                        foreach ($pattern in $knownName.Pattern) {
                                            if ($columnobject.Name -match $pattern ) {
                                                # Add the results
                                                $results += [pscustomobject]@{
                                                    ComputerName   = $db.Parent.ComputerName
                                                    InstanceName   = $db.Parent.ServiceName
                                                    SqlInstance    = $db.Parent.DomainInstanceName
                                                    Database       = $dbName
                                                    Schema         = $tableobject.Schema
                                                    Table          = $tableobject.Name
                                                    Column         = $columnobject.Name
                                                    "PII-Category" = $knownName.Category
                                                    "PII-Name"     = $knownName.Name
                                                    FoundWith      = "KnownName"
                                                }
                                            }
                                        }
                                    }
                                    $knownName = $null
                                }
                            } else {
                                Write-Message -Level Verbose -Message "No known names found to perform check on"
                            }
                        } # End for each column


                        if ($patterns.Count -ge 1) {
                            # Check if the results not already contain a similar object
                            if ($null -eq ($results | Where-Object { $_.Database -eq $dbName -and $_.Schema -eq $tableobject.Schema -and $_.Table -eq $tableobject.Name -and $_.Column -eq $columnobject.Name })) {
                                # Setup the query
                                $query = "SELECT TOP($SampleCount) $($columnobject.Name) FROM [$($tableobject.Schema)].[$($tableobject.Name)]"

                                # Get the data
                                $dataset = @()

                                try {
                                    $dataset += Invoke-DbaQuery -SqlInstance $instance -SqlCredential $SqlCredential -Database $dbName -Query $query -EnableException
                                } catch {
                                    $errormessage = $_.Exception.Message.ToString()
                                    Stop-Function -Message "Error executing query $($tableobject.Schema).$($tableobject.Name): $errormessage" -Target $updatequery -Continue -ErrorRecord $_
                                }

                                # Check if there is any data
                                if ($dataset.Count -ge 1) {

                                    # Loop through the patterns
                                    foreach ($patternobject in $patterns) {

                                        # If there is a result from the match
                                        if ($dataset.$($columnobject.Name) -match $patternobject.Pattern) {
                                            # Add the results
                                            $results += [pscustomobject]@{
                                                ComputerName   = $db.Parent.ComputerName
                                                InstanceName   = $db.Parent.ServiceName
                                                SqlInstance    = $db.Parent.DomainInstanceName
                                                Database       = $dbName
                                                Schema         = $tableobject.Schema
                                                Table          = $tableobject.Name
                                                Column         = $columnobject.Name
                                                "PII-Category" = $patternobject.category
                                                "PII-Name"     = $patternobject.Name
                                                FoundWith      = "Pattern"
                                            }
                                        }
                                        $patternobject = $null
                                    }
                                } else {
                                    Write-Message -Message "Table $($tableobject.Name) does not contain any rows" -Level Verbose
                                }
                            }
                        } else {
                            Write-Message -Level Verbose -Message "No patterns found to perform check on"
                        }
                    }

                    $tableNumber++

                } # End for each table
            } # End for each database
        } # End for each instance

        # Return the results
        return $results

    } # End process
}