DuplicateFinder.ps1
class UniqueSize { UniqueSize() { $this.Count=0 $this.ListOfFiles=@() $this.hashOfFiles=@() } [int32]$FileSize [int32]$Count [System.Collections.Generic.List[System.Object]]$ListOfFiles [System.Collections.Generic.List[System.Object]]$hashOfFiles } #Generate a file list that grouped by size and removed where size is unique #Generate a reference object for each size, and add file list and hashes #Group by hash #Remove those hashes and file that are unique #Generate a new list based on unique hash instead of size. #Ready to manage delete... function FindDuplicates() { #Generate a file list that gruped by size and removed where size is unique $FullListOfDuplicationGroups=Get-ChildItem -File -Recurse | Sort-Object {$_.Length} | Group-Object -Property Length | where-Object {$_.Count -gt 1} $ListOfUniqueHashObjects=@() foreach($nextFileGroup in $FullListOfDuplicationGroups) { #Generate an reference object for each file size, that contain all the [UniqueSize]$NewSize=[UniqueSize]::new() $NewSize.FileSize=$nextFileGroup.Name $NewSize.Count=$nextFileGroup.Count $NewSize.ListOfFiles+=$nextFileGroup.Group #Generate hash for files foreach($NextFileToHash in $NewSize.ListOfFiles) { $NewSize.hashOfFiles+=($NextFileToHash | Get-FileHash).hash } #Group by hash and generate new reference object for each invidual hash $NewUniqueHashes=$NewSize.hashOfFiles | Group-Object | Where-Object {$_.Count -gt 1} #If only one hash group exists just copy over the original $NewSize object, #If more than one hash exists, generate separate object for each. if($NewUniqueHashes.Count -eq 1) { $ListOfUniqueHashObjects+=$NewSize } else { foreach($nextHashGroup in $NewUniqueHashes) { [UniqueSize]$NewHashSize=[UniqueSize]::new() $NewHashSize.FileSize=$nextFileGroup.Name for([int]$i=0; $i -lt $NewSize.ListOfFiles.Count;$i++) { if($NewSize.hashOfFiles[$i] -eq $nextHashGroup.Name) { $NewHashSize.ListOfFiles+=$NewSize.ListOfFiles[$i] $NewHashSize.hashOfFiles+=$NewSize.hashOfFiles[$i] $NewHashSize.Count++ } } $ListOfUniqueHashObjects+=$NewHashSize } } } return $ListOfUniqueHashObjects } function CleanUpManual() { param($ListOfDuplicatesGroups) foreach($nextDuplicateGroup in $ListOfDuplicatesGroups) { $firstFile=$nextDuplicateGroup.ListOfFiles[0] $RestOfFiles=$nextDuplicateGroup.ListOfFiles | select -Skip 1 Write-host "=============================================================================================================" $FirstLineString="`n[0] {0,-20} {1} {2}" -f $firstFile.Name, $($nextDuplicateGroup.hashOfFiles[0].Substring(0,20)+"..."), $firstFile.DirectoryName Write-Host $FirstLineString -ForegroundColor Cyan $FileCounter=1 foreach($nextFile in $RestOfFiles) { " [{3}] {0,-20} {1} {2}" -f $nextFile.Name, $($nextDuplicateGroup.hashOfFiles[0].Substring(0,20)+"..."), $nextFile.DirectoryName, $FileCounter $FileCounter++ } Write-Host "`nWhich one would you like to keep? Please specify by Nr of the file. `nIf you do not define whichone and hit enter, de default will be kept" $ItIsNotANumber=$true Do { try { [int]$FileNr=Read-Host if($FileCounter -gt $FileNr) { $ItIsNotANumber=$false } else { "Please enter your choise again!" } } catch { "Please enter your choise again!" } }while($ItIsNotANumber) if($FileNr -eq 0) { foreach($nextFileToDelete in $RestOfFiles) { "This has ben deleted {0,-20} {1} {2}" -f $nextFileToDelete.Name,$($nextDuplicateGroup.hashOfFiles[0].Substring(0,20)+"..."), $nextFileToDelete.DirectoryName | Tee-Object -Append DeletedFilesLog.txt $nextFileToDelete | Remove-Item } } else { #Remove first Item "This has ben deleted {0,-20} {1} {2}" -f $firstFile.Name,$($nextDuplicateGroup.hashOfFiles[0].Substring(0,20)+"..."), $firstFile.DirectoryName | Tee-Object -Append DeletedFilesLog.txt $firstFile | Remove-Item #Build a list without the file that will be kept $FileToNotDelete=$RestOfFiles[$($FileNr - 1)] $listToDelete=$RestOfFiles | where {$_ -ne $FileToNotDelete} #Remove rest of the files foreach($nextFileToDelete in $listToDelete) { "This has ben deleted {0,-20} {1} {2}" -f $nextFileToDelete.Name,$($nextDuplicateGroup.hashOfFiles[0].Substring(0,20)+"..."), $nextFileToDelete.DirectoryName | Tee-Object -Append DeletedFilesLog.txt $nextFileToDelete | Remove-Item } } } Write-host "=============================================================================================================" Write-host " You can find summary of file that has been deleted in DeletedFilesLog.txt" Write-host "=============================================================================================================" } function CleanUpAuto() { param($ListOfDuplicatesGroups) foreach($nextDuplicateGroup in $ListOfDuplicatesGroups) { $firstFile=$nextDuplicateGroup.ListOfFiles[0] $RestOfFiles=$nextDuplicateGroup.ListOfFiles | select -Skip 1 $FirstLine="This has ben kept {0,-20} {1} {2}" -f $firstFile.Name,$($nextDuplicateGroup.hashOfFiles[0].Substring(0,20)+"..."), $firstFile.DirectoryName Write-host $FirstLine -ForegroundColor Cyan foreach($nextFileToDelete in $RestOfFiles) { "This has ben deleted {0,-20} {1} {2}" -f $nextFileToDelete.Name,$($nextDuplicateGroup.hashOfFiles[0].Substring(0,20)+"..."), $nextFileToDelete.DirectoryName | Tee-Object -Append DeletedFilesLog.txt $nextFileToDelete | Remove-Item } } Write-host "=============================================================================================================" Write-host " You can find summary of file that has been deleted in DeletedFilesLog.txt" Write-host "=============================================================================================================" } function CleanUpDefaultDir() { param($ListOfDuplicatesGroups) #Collect readonly directory list $ReadOnlyDirectoryList=@() $InValidPath=$true do { Write-Host "`nPlease define read-only directories" try { $ReadOnlyDirectory=Read-Host if(Test-Path -PathType Container $ReadOnlyDirectory ) { $lastChar=$ReadOnlyDirectory.Substring($($ReadOnlyDirectory.Length-1)) if($lastChar -eq "\") { $ReadOnlyDirectoryList+=$ReadOnlyDirectory+"*" } else { $ReadOnlyDirectoryList+=$ReadOnlyDirectory+"\*" } $YesNoNotValid=$true do{ Write-host "Do you want to add another Directory?(y/n)" $Answer=read-host if($Answer -match "[yYnN]") { if($Answer -match "[nN]") { $InValidPath=$false } $YesNoNotValid=$false } else { write-host "Pelase answer with y or n" } }While($YesNoNotValid) } else { Write-Host "Invalid Path" } } catch { Write-Host "Invalid path." } }While($InValidPath) $ReadOnlyDirectoryList | Format-Table foreach($nextDuplicateGroup in $ListOfDuplicatesGroups) { $protectedFileList=@() $FilesToDelete=@() foreach($nextFile in $nextDuplicateGroup.ListOfFiles) { $IsThisFileProtected=$false foreach($nextPath in $ReadOnlyDirectoryList) { if($($nextFile.DirectoryName.ToLower()+"\") -like $nextPath) { $IsThisFileProtected=$true } } if($IsThisFileProtected) { $protectedFileList+=$nextFile } else { $FilesToDelete+=$nextFile } } if($protectedFileList.count -gt 0) { foreach($nextFileToProtect in $protectedFileList) { $FirstLine="This has ben kept {0,-20} {1} {2}" -f $nextFileToProtect.Name,$($nextDuplicateGroup.hashOfFiles[0].Substring(0,20)+"..."), $nextFileToProtect.DirectoryName Write-host $FirstLine -ForegroundColor Cyan } foreach($nextFileToDelete in $FilesToDelete) { "This has ben deleted {0,-20} {1} {2}" -f $nextFileToDelete.Name,$($nextDuplicateGroup.hashOfFiles[0].Substring(0,20)+"..."), $nextFileToDelete.DirectoryName | Tee-Object -Append DeletedFilesLog.txt $nextFileToDelete | Remove-Item } } else { #Keep the first file in this case $firstFile=$nextDuplicateGroup.ListOfFiles[0] $RestOfFiles=$nextDuplicateGroup.ListOfFiles | select -Skip 1 $FirstLine="This has ben kept {0,-20} {1} {2}" -f $firstFile.Name,$($nextDuplicateGroup.hashOfFiles[0].Substring(0,20)+"..."), $firstFile.DirectoryName Write-host $FirstLine -ForegroundColor Cyan foreach($nextFileToDelete in $RestOfFiles) { "This has ben deleted {0,-20} {1} {2}" -f $nextFileToDelete.Name,$($nextDuplicateGroup.hashOfFiles[0].Substring(0,20)+"..."), $nextFileToDelete.DirectoryName | Tee-Object -Append DeletedFilesLog.txt $nextFileToDelete | Remove-Item } } } Write-host "=============================================================================================================" Write-host " You can find summary of file that has been deleted in DeletedFilesLog.txt" Write-host "=============================================================================================================" } function Find-FileDuplicates() { <# .DESCRIPTION This function will scan all files in current folder and sub folders and build a list about duplication in default mode. You can use switches to choose one of the following options to manage files: DisplayOnly / Manual / Auto / DefendedDirectory / Save / ReturnObject .EXAMPLE Find-FileDuplicates -AfterBehaviour DisplayOnly This is the default behaviour. Script will only scan and display result .EXAMPLE Find-FileDuplicates -AfterBehaviour Manual After scan, script will offer option to keep a selected file after every single duplication group. .EXAMPLE Find-FileDuplicates -AfterBehaviour Auto After scan, script will delete all duplication except first instance. .EXAMPLE Find-FileDuplicates -AfterBehaviour DefendedDirectory After scan, script will delete all duplication except copies that located in protected folder instance. In case of duplication where no instance found in protected folder, the first instance will be kept. Script will ask for protected folders after start of the script. .EXAMPLE Find-FileDuplicates -AfterBehaviour Save After scan, script will save result in an XML file. .EXAMPLE Find-FileDuplicates -AfterBehaviour ReturnObject After scan, script will return the Object list that can be used for further manipulation. #> param( [ValidateSet('DisplayOnly','Manual','Auto','DefendedDirectory', 'Save', 'ReturnObject')] [Parameter(Mandatory=$false, HelpMessage="Option to choose what script will do after file scan.")] [string]$AfterBehaviour="DisplayOnly") $ListOfDuplicates=FindDuplicates if($AfterBehaviour -eq "DisplayOnly") { $ListOfDuplicates } if($AfterBehaviour -eq "ReturnObject") { return $ListOfDuplicates } if($AfterBehaviour -eq "Save") { Write-host "All object has been exported to FileDuplication.xml" return $ListOfDuplicates | Export-CliXML FileDuplication.xml } if($AfterBehaviour -eq "Manual") { CleanUpManual $ListOfDuplicates } if($AfterBehaviour -eq "Auto") { CleanUpAuto $ListOfDuplicates } if($AfterBehaviour -eq "DefendedDirectory") { CleanUpDefaultDir $ListOfDuplicates } } |