functions/Find-DbaSimilarTable.ps1

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
function Find-DbaSimilarTable {
<#
.SYNOPSIS
Returns all tables/views that are similar in structure by comparing the column names of matching and matched tables/views
 
.DESCRIPTION
This function can either run against specific databases or all databases searching all/specific tables and views including in system databases.
    Typically one would use this to find for example archive version(s) of a table whose structures are similar.
    This can also be used to find tables/views that are very similar to a given table/view structure to see where a table/view might be used.
  
    More information can be found here: https://sqljana.wordpress.com/2017/03/31/sql-server-find-tables-with-similar-table-structure/
 
.PARAMETER SqlInstance
SQLServer name or SMO object representing the SQL Server to connect to. This can be a collection and receive pipeline input
 
.PARAMETER SqlCredential
PSCredential object to connect as. If not specified, current Windows login will be used.
 
.PARAMETER Database
The database(s) to process - this list is auto-populated from the server. If unspecified, all databases will be processed.
 
.PARAMETER ExcludeDatabase
The database(s) to exclude - this list is auto-populated from the server
 
.PARAMETER SchemaName
If you are looking in a specific schema whose table structures is to be used as reference structure, provide the name of the schema.
    If no schema is provided, looks at all schemas
 
.PARAMETER TableName
If you are looking in a specific table whose structure is to be used as reference structure, provide the name of the table.
    If no table is provided, looks at all tables
    If the table name exists in multiple schemas, all of them would qualify
 
.PARAMETER ExcludeViews
By default, views are included. You can exclude them by setting this switch to $false
    This excludes views in both matching and matched list
 
.PARAMETER IncludeSystemDatabases
By default system databases are ignored but you can include them within the search using this parameter
 
.PARAMETER MatchPercentThreshold
The minimum percentage of column names that should match between the matching and matched objects.
    Entries with no matches are eliminated
 
.PARAMETER EnableException
  By default, when something goes wrong we try to catch it, interpret it and give you a friendly warning message.
  This avoids overwhelming you with "sea of red" exceptions, but is inconvenient because it basically disables advanced scripting.
  Using this switch turns this "nice by default" feature off and enables you to catch exceptions with your own try/catch.
   
.NOTES
Author: Jana Sattainathan (@SQLJana - http://sqljana.wordpress.com)
 
Website: https://dbatools.io
Copyright: (C) Chrissy LeMaire, clemaire@gmail.com
License: GNU GPL v3 https://opensource.org/licenses/GPL-3.0
 
.LINK
https://dbatools.io/Find-DbaSimilarTable
 
.EXAMPLE
Find-DbaSimilarTable -SqlInstance DEV01
 
Searches all user database tables and views for each, returns all tables or views with their matching tables/views and match percent
 
.EXAMPLE
Find-DbaSimilarTable -SqlInstance DEV01 -Database AdventureWorks
 
Searches AdventureWorks database and lists tables/views and their corresponding matching tables/views with match percent
 
.EXAMPLE
Find-DbaSimilarTable -SqlInstance DEV01 -Database AdventureWorks -SchemaName HumanResource
 
Searches AdventureWorks database and lists tables/views in the HumanResource schema with their corresponding matching tables/views with match percent
 
.EXAMPLE
Find-DbaSimilarTable -SqlInstance DEV01 -Database AdventureWorks -SchemaName HumanResource -Table Employee
 
Searches AdventureWorks database and lists tables/views in the HumanResource schema and table Employee with its corresponding matching tables/views with match percent
 
.EXAMPLE
Find-DbaSimilarTable -SqlInstance DEV01 -Database AdventureWorks -MatchPercentThreshold 60
 
Searches AdventureWorks database and lists all tables/views with its corresponding matching tables/views with match percent greater than or equal to 60
 
 
#>

    [CmdletBinding()]
    Param (
        [parameter(Position = 0, Mandatory = $true, ValueFromPipeline = $True)]
        [Alias("ServerInstance", "SqlServer", "SqlServers")]
        [DbaInstanceParameter[]]$SqlInstance,
        [PSCredential]$SqlCredential,
        [Alias("Databases")]
        [object[]]$Database,
        [object[]]$ExcludeDatabase,
        [string]$SchemaName,
        [string]$TableName,
        [switch]$ExcludeViews,
        [switch]$IncludeSystemDatabases,
        [int]$MatchPercentThreshold,
        [switch][Alias('Silent')]$EnableException
    )
    
    begin {
        $everyServerVwCount = 0
        
        $sqlSelect = "WITH ColCountsByTable
                AS
                (
                      SELECT
                            c.TABLE_CATALOG,
                            c.TABLE_SCHEMA,
                            c.TABLE_NAME,
                            COUNT(1) AS Column_Count
                      FROM INFORMATION_SCHEMA.COLUMNS c
                      GROUP BY
                            c.TABLE_CATALOG,
                            c.TABLE_SCHEMA,
                            c.TABLE_NAME
                )
                SELECT
                      100 * COUNT(c2.COLUMN_NAME) /*Matching_Column_Count*/ / MIN(ColCountsByTable.Column_Count) /*Column_Count*/ AS MatchPercent,
                      DENSE_RANK() OVER(ORDER BY c.TABLE_CATALOG, c.TABLE_SCHEMA, c.TABLE_NAME) TableNameRankInDB,
                      c.TABLE_CATALOG AS DatabaseName,
                      c.TABLE_SCHEMA AS SchemaName,
                      c.TABLE_NAME AS TableName,
                   t.TABLE_TYPE AS TableType,
                      MIN(ColCountsByTable.Column_Count) AS ColumnCount,
                      c2.TABLE_CATALOG AS MatchingDatabaseName,
                      c2.TABLE_SCHEMA AS MatchingSchemaName,
                      c2.TABLE_NAME AS MatchingTableName,
                   t2.TABLE_TYPE AS MatchingTableType,
                      COUNT(c2.COLUMN_NAME) AS MatchingColumnCount
                FROM INFORMATION_SCHEMA.TABLES t
                      INNER JOIN INFORMATION_SCHEMA.COLUMNS c
                            ON t.TABLE_CATALOG = c.TABLE_CATALOG
                                  AND t.TABLE_SCHEMA = c.TABLE_SCHEMA
                                  AND t.TABLE_NAME = c.TABLE_NAME
                      INNER JOIN ColCountsByTable
                            ON t.TABLE_CATALOG = ColCountsByTable.TABLE_CATALOG
                                  AND t.TABLE_SCHEMA = ColCountsByTable.TABLE_SCHEMA
                                  AND t.TABLE_NAME = ColCountsByTable.TABLE_NAME
                      LEFT OUTER JOIN INFORMATION_SCHEMA.COLUMNS c2
                            ON t.TABLE_NAME != c2.TABLE_NAME
                                  AND c.COLUMN_NAME = c2.COLUMN_NAME
                      LEFT JOIN INFORMATION_SCHEMA.TABLES t2
                            ON c2.TABLE_NAME = t2.TABLE_NAME"

        
        $sqlWhere = "
                WHERE "

        
        $sqlGroupBy = "
                GROUP BY
                      c.TABLE_CATALOG,
                      c.TABLE_SCHEMA,
                      c.TABLE_NAME,
                   t.TABLE_TYPE,
                      c2.TABLE_CATALOG,
                      c2.TABLE_SCHEMA,
                      c2.TABLE_NAME,
                   t2.TABLE_TYPE "

        
        $sqlHaving = "
                HAVING
                    /*Match_Percent should be greater than 0 at minimum!*/
                    "

        
        $sqlOrderBy = "
                ORDER BY
                      MatchPercent DESC"

        
        
        $sql = ''
        $wherearray = @()
        
        if ($ExcludeViews) {
            $wherearray += " (t.TABLE_TYPE <> 'VIEW' AND t2.TABLE_TYPE <> 'VIEW') "
        }
        
        if ($SchemaName) {
            $wherearray += (" (c.TABLE_SCHEMA = '{0}') " -f $SchemaName.Replace("'", "''")) #Replace single quotes with two single quotes!
        }
        
        if ($TableName) {
            $wherearray += (" (c.TABLE_NAME = '{0}') " -f $TableName.Replace("'", "''")) #Replace single quotes with two single quotes!
            
        }
        
        if ($wherearray.length -gt 0) {
            $sqlWhere = "$sqlWhere " + ($wherearray -join " AND ")
        }
        else {
            $sqlWhere = ""
        }
        
        
        $matchThreshold = 0
        if ($MatchPercentThreshold) {
            $matchThreshold = $MatchPercentThreshold
        }
        else {
            $matchThreshold = 0
        }
        
        $sqlHaving += (" (100 * COUNT(c2.COLUMN_NAME) / MIN(ColCountsByTable.Column_Count) >= {0}) " -f $matchThreshold)
        
        
        
        $sql = "$sqlSelect $sqlWhere $sqlGroupBy $sqlHaving $sqlOrderBy"
        
        Write-Message -Level Debug -Message $sql
        
    }
    
    process {
        foreach ($Instance in $SqlInstance) {
            
            try {
                $server = Connect-SqlInstance -SqlInstance $instance -SqlCredential $SqlCredential -MinimumVersion 9
            }
            catch {
                Stop-Function -Message "Failure" -Category ConnectionError -ErrorRecord $_ -Target $instance -Continue
            }
            
            
            #Use IsAccessible instead of Status -eq 'normal' because databases that are on readable secondaries for AG or mirroring replicas will cause errors to be thrown
            if ($IncludeSystemDatabases) {
                $dbs = $server.Databases | Where-Object { $_.IsAccessible -eq $true }
            }
            else {
                $dbs = $server.Databases | Where-Object { $_.IsAccessible -eq $true -and $_.IsSystemObject -eq $false }
            }
            
            if ($Database) {
                $dbs = $server.Databases | Where-Object Name -In $Database
            }
            
            if ($ExcludeDatabase) {
                $dbs = $dbs | Where-Object Name -NotIn $ExcludeDatabase
            }
            
            
            $totalCount = 0
            $dbCount = $dbs.count
            foreach ($db in $dbs) {
                
                Write-Message -Level Verbose -Message "Searching on database $db"
                $rows = $db.Query($sql)
                
                foreach ($row in $rows) {
                    [PSCustomObject]@{
                        ComputerName       = $server.NetName
                        InstanceName       = $server.ServiceName
                        SqlInstance       = $server.DomainInstanceName
                        Table           = "$($row.DatabaseName).$($row.SchemaName).$($row.TableName)"
                        MatchingTable       = "$($row.MatchingDatabaseName).$($row.MatchingSchemaName).$($row.MatchingTableName)"
                        MatchPercent       = $row.MatchPercent
                        OriginalDatabaseName = $row.DatabaseName
                        OriginalSchemaName = $row.SchemaName
                        OriginalTableName  = $row.TableName
                        OriginalTableNameRankInDB = $row.TableNameRankInDB
                        OriginalTableType  = $row.TableType
                        OriginalColumnCount = $row.ColumnCount
                        MatchingDatabaseName = $row.MatchingDatabaseName
                        MatchingSchemaName = $row.MatchingSchemaName
                        MatchingTableName  = $row.MatchingTableName
                        MatchingTableType  = $row.MatchingTableType
                        MatchingColumnCount = $row.MatchingColumnCount
                    }
                }
                
                $vwCount = $vwCount + $rows.Count
                $totalCount = $totalCount + $rows.Count
                $everyServerVwCount = $everyServerVwCount + $rows.Count
                
                Write-Message -Level Verbose -Message "Found $vwCount tables/views in $db"
            }
            
            Write-Message -Level Verbose -Message "Found $totalCount total tables/views in $dbCount databases"
        }
    }
    end {
        Write-Message -Level Verbose -Message "Found $everyServerVwCount total tables/views"
    }
}