Functions/Get-SorensenDiceCoefficient.ps1

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
function Get-SorensenDiceCoefficient {
    <#
        .SYNOPSIS
            Get the Sorensen Dice Coefficient of two strings.
        .DESCRIPTION
            The Sørensen–Dice index, is a statistic used for comparing the similarity of two samples.
        .EXAMPLE
            Get-SorensenDiceCoefficient -String1 'night' -String2 'night'
        .LINK
            http://en.wikipedia.org/wiki/S%C3%B8rensen%E2%80%93Dice_coefficient
            https://communary.wordpress.com/
            https://github.com/gravejester/Communary.PASM
        .NOTES
            Author: Øyvind Kallstad
            Date: 03.11.2014
            Version: 1.0
            Dependencies: Get-NGram, Get-Intersection
    #>

    [CmdletBinding()]
    param (
        [Parameter(Position = 0, Mandatory)]
        [ValidateNotNullOrEmpty()]
        [string] $String1,

        [Parameter(Position = 1, Mandatory)]
        [ValidateNotNullOrEmpty()]
        [string] $String2,

        # Makes matches case-sensitive. By default, matches are not case-sensitive.
        [Parameter()]
        [switch] $CaseSensitive
    )

    # handle case insensitivity
    if (-not($CaseSensitive)) {
        $String1 = $String1.ToLowerInvariant()
        $String2 = $String2.ToLowerInvariant()
    }

    $string1Bigrams = Get-NGram -String $String1 -Size 2
    $string2Bigrams = Get-NGram -String $String2 -Size 2
    $intersections = (Get-Intersection $string1Bigrams $string2Bigrams -CaseSensitive:$CaseSensitive).count
    Write-Output ([double](2 * $intersections) / [double]($string1Bigrams.count + $string2Bigrams.count))
}