functions/Find-DbaSimilarTable.ps1

function Find-DbaSimilarTable {
<#
.SYNOPSIS
Returns all tables/views that are similar in structure by comparing the column names of matching and matched tables/views
 
.DESCRIPTION
This function can either run against specific databases or all databases searching all/specific tables and views including in system databases.
    Typically one would use this to find for example archive version(s) of a table whose structures are similar.
    This can also be used to find tables/views that are very similar to a given table/view structure to see where a table/view might be used.
     
    More information can be found here: https://sqljana.wordpress.com/2017/03/31/sql-server-find-tables-with-similar-table-structure/
 
.PARAMETER SqlInstance
SQLServer name or SMO object representing the SQL Server to connect to. This can be a collection and receive pipeline input
 
.PARAMETER SqlCredential
PSCredential object to connect as. If not specified, current Windows login will be used.
 
.PARAMETER Database
The database(s) to process - this list is auto-populated from the server. If unspecified, all databases will be processed.
 
.PARAMETER ExcludeDatabase
The database(s) to exclude - this list is auto-populated from the server
 
.PARAMETER SchemaName
If you are looking in a specific schema whose table structures is to be used as reference structure, provide the name of the schema.
    If no schema is provided, looks at all schemas
 
.PARAMETER TableName
If you are looking in a specific table whose structure is to be used as reference structure, provide the name of the table.
    If no table is provided, looks at all tables
    If the table name exists in multiple schemas, all of them would qualify
 
.PARAMETER ExcludeViews
By default, views are included. You can exclude them by setting this switch to $false
    This excludes views in both matching and matched list
 
.PARAMETER IncludeSystemDatabases
By default system databases are ignored but you can include them within the search using this parameter
 
.PARAMETER MatchPercentThreshold
The minimum percentage of column names that should match between the matching and matched objects.
    Entries with no matches are eliminated
 
.PARAMETER Silent
Use this switch to disable any kind of verbose messages
 
.NOTES
Author: Jana Sattainathan (@SQLJana - http://sqljana.wordpress.com)
 
Website: https://dbatools.io
Copyright: (C) Chrissy LeMaire, clemaire@gmail.com
License: GNU GPL v3 https://opensource.org/licenses/GPL-3.0
 
.LINK
https://dbatools.io/Find-DbaSimilarTable
 
.EXAMPLE
Find-DbaSimilarTable -SqlInstance DEV01
 
Searches all user database tables and views for each, returns all tables or views with their matching tables/views and match percent
 
.EXAMPLE
Find-DbaSimilarTable -SqlInstance DEV01 -Database AdventureWorks
 
Searches AdventureWorks database and lists tables/views and their corresponding matching tables/views with match percent
 
.EXAMPLE
Find-DbaSimilarTable -SqlInstance DEV01 -Database AdventureWorks -SchemaName HumanResource
 
Searches AdventureWorks database and lists tables/views in the HumanResource schema with their corresponding matching tables/views with match percent
 
.EXAMPLE
Find-DbaSimilarTable -SqlInstance DEV01 -Database AdventureWorks -SchemaName HumanResource -Table Employee
 
Searches AdventureWorks database and lists tables/views in the HumanResource schema and table Employee with its corresponding matching tables/views with match percent
 
.EXAMPLE
Find-DbaSimilarTable -SqlInstance DEV01 -Database AdventureWorks -MatchPercentThreshold 60
 
Searches AdventureWorks database and lists all tables/views with its corresponding matching tables/views with match percent greater than or equal to 60
 
 
#>

    [CmdletBinding()]
    Param (
        [parameter(Position = 0, Mandatory = $true, ValueFromPipeline = $True)]
        [Alias("ServerInstance", "SqlServer", "SqlServers")]
        [DbaInstanceParameter[]]$SqlInstance,
        [PSCredential]$SqlCredential,
        [Alias("Databases")]
        [object[]]$Database,
        [object[]]$ExcludeDatabase,
        [string]$SchemaName,
        [string]$TableName,
        [switch]$ExcludeViews,
        [switch]$IncludeSystemDatabases,
        [int]$MatchPercentThreshold,
        [switch]$Silent
    )
    
    begin {
        $everyServerVwCount = 0
        
        $sqlSelect = "WITH ColCountsByTable
                AS
                (
                      SELECT
                            c.TABLE_CATALOG,
                            c.TABLE_SCHEMA,
                            c.TABLE_NAME,
                            COUNT(1) AS Column_Count
                      FROM INFORMATION_SCHEMA.COLUMNS c
                      GROUP BY
                            c.TABLE_CATALOG,
                            c.TABLE_SCHEMA,
                            c.TABLE_NAME
                )
                SELECT
                      100 * COUNT(c2.COLUMN_NAME) /*Matching_Column_Count*/ / MIN(ColCountsByTable.Column_Count) /*Column_Count*/ AS MatchPercent,
                      DENSE_RANK() OVER(ORDER BY c.TABLE_CATALOG, c.TABLE_SCHEMA, c.TABLE_NAME) TableNameRankInDB,
                      c.TABLE_CATALOG AS DatabaseName,
                      c.TABLE_SCHEMA AS SchemaName,
                      c.TABLE_NAME AS TableName,
                      t.TABLE_TYPE AS TableType,
                      MIN(ColCountsByTable.Column_Count) AS ColumnCount,
                      c2.TABLE_CATALOG AS MatchingDatabaseName,
                      c2.TABLE_SCHEMA AS MatchingSchemaName,
                      c2.TABLE_NAME AS MatchingTableName,
                      t2.TABLE_TYPE AS MatchingTableType,
                      COUNT(c2.COLUMN_NAME) AS MatchingColumnCount
                FROM INFORMATION_SCHEMA.TABLES t
                      INNER JOIN INFORMATION_SCHEMA.COLUMNS c
                            ON t.TABLE_CATALOG = c.TABLE_CATALOG
                                  AND t.TABLE_SCHEMA = c.TABLE_SCHEMA
                                  AND t.TABLE_NAME = c.TABLE_NAME
                      INNER JOIN ColCountsByTable
                            ON t.TABLE_CATALOG = ColCountsByTable.TABLE_CATALOG
                                  AND t.TABLE_SCHEMA = ColCountsByTable.TABLE_SCHEMA
                                  AND t.TABLE_NAME = ColCountsByTable.TABLE_NAME
                      LEFT OUTER JOIN INFORMATION_SCHEMA.COLUMNS c2
                            ON t.TABLE_NAME != c2.TABLE_NAME
                                  AND c.COLUMN_NAME = c2.COLUMN_NAME
                      LEFT JOIN INFORMATION_SCHEMA.TABLES t2
                            ON c2.TABLE_NAME = t2.TABLE_NAME"

        
        $sqlWhere = "
                WHERE "

        
        $sqlGroupBy = "
                GROUP BY
                      c.TABLE_CATALOG,
                      c.TABLE_SCHEMA,
                      c.TABLE_NAME,
                      t.TABLE_TYPE,
                      c2.TABLE_CATALOG,
                      c2.TABLE_SCHEMA,
                      c2.TABLE_NAME,
                      t2.TABLE_TYPE "

        
        $sqlHaving = "
                HAVING
                    /*Match_Percent should be greater than 0 at minimum!*/
                    "

        
        $sqlOrderBy = "
                ORDER BY
                      MatchPercent DESC"

        
        
        $sql = ''
        $wherearray = @()
        
        if ($ExcludeViews) {
            $wherearray += " (t.TABLE_TYPE <> 'VIEW' AND t2.TABLE_TYPE <> 'VIEW') "
        }
        
        if ($SchemaName) {
            $wherearray += (" (c.TABLE_SCHEMA = '{0}') " -f $SchemaName.Replace("'", "''")) #Replace single quotes with two single quotes!
        }
        
        if ($TableName) {
            $wherearray += (" (c.TABLE_NAME = '{0}') " -f $TableName.Replace("'", "''")) #Replace single quotes with two single quotes!
            
        }
        
        if ($wherearray.length -gt 0) {
            $sqlWhere = "$sqlWhere " + ($wherearray -join " AND ")
        }
        else {
            $sqlWhere = ""
        }
        
        
        $matchThreshold = 0
        if ($MatchPercentThreshold) {
            $matchThreshold = $MatchPercentThreshold
        }
        else {
            $matchThreshold = 0
        }
        
        $sqlHaving += (" (100 * COUNT(c2.COLUMN_NAME) / MIN(ColCountsByTable.Column_Count) >= {0}) " -f $matchThreshold)
        
        
        
        $sql = "$sqlSelect $sqlWhere $sqlGroupBy $sqlHaving $sqlOrderBy"
        
        Write-Message -Level Debug -Message $sql
        
    }
    
    process {
        foreach ($Instance in $SqlInstance) {
            
            try {
                $server = Connect-SqlInstance -SqlInstance $instance -SqlCredential $SqlCredential -MinimumVersion 9
            }
            catch {
                Stop-Function -Message "Failure" -Category ConnectionError -ErrorRecord $_ -Target $instance -Continue
            }
            
            
            #Use IsAccessible instead of Status -eq 'normal' because databases that are on readable secondaries for AG or mirroring replicas will cause errors to be thrown
            if ($IncludeSystemDatabases) {
                $dbs = $server.Databases | Where-Object { $_.IsAccessible -eq $true }
            }
            else {
                $dbs = $server.Databases | Where-Object { $_.IsAccessible -eq $true -and $_.IsSystemObject -eq $false }
            }
            
            if ($Database) {
                $dbs = $server.Databases | Where-Object Name -In $Database
            }
            
            if ($ExcludeDatabase) {
                $dbs = $dbs | Where-Object Name -NotIn $ExcludeDatabase
            }
            
            
            $totalCount = 0
            $dbCount = $dbs.count
            foreach ($db in $dbs) {
                
                Write-Message -Level Verbose -Message "Searching on database $db"
                $rows = $db.Query($sql)
                
                foreach ($row in $rows) {
                    [PSCustomObject]@{
                        ComputerName       = $server.NetName
                        InstanceName       = $server.ServiceName
                        SqlInstance       = $server.DomainInstanceName
                        Table           = "$($row.DatabaseName).$($row.SchemaName).$($row.TableName)"
                        MatchingTable       = "$($row.MatchingDatabaseName).$($row.MatchingSchemaName).$($row.MatchingTableName)"
                        MatchPercent       = $row.MatchPercent
                        OriginalDatabaseName = $row.DatabaseName
                        OriginalSchemaName = $row.SchemaName
                        OriginalTableName  = $row.TableName
                        OriginalTableNameRankInDB = $row.TableNameRankInDB
                        OriginalTableType  = $row.TableType
                        OriginalColumnCount = $row.ColumnCount
                        MatchingDatabaseName = $row.MatchingDatabaseName
                        MatchingSchemaName = $row.MatchingSchemaName
                        MatchingTableName  = $row.MatchingTableName
                        MatchingTableType  = $row.MatchingTableType
                        MatchingColumnCount = $row.MatchingColumnCount
                    }
                }
                
                $vwCount = $vwCount + $rows.Count
                $totalCount = $totalCount + $rows.Count
                $everyServerVwCount = $everyServerVwCount + $rows.Count
                
                Write-Message -Level Verbose -Message "Found $vwCount tables/views in $db"
            }
            
            Write-Message -Level Verbose -Message "Found $totalCount total tables/views in $dbCount databases"
        }
    }
    end {
        Write-Message -Level Verbose -Message "Found $everyServerVwCount total tables/views"
    }
}