public/ConvertFrom-IngredientText.ps1

function ConvertFrom-IngredientText {
    [CmdletBinding()]
    param(
        [string]$Text
    )

    if (-not $Text) { return @() }

    # ------------------------------------------------------------
    # 1) Normalize encoding junk (mojibake) + whitespace
    # ------------------------------------------------------------
    $t = $Text

    # Remove zero-width garbage
    $t = $t -replace "[\u200B-\u200D\uFEFF]", ""

    # Common mojibake variants for fractions
    $t = $t -replace "¼|Γö¼|¼", "¼"
    $t = $t -replace "½|Γö½|½", "½"
    $t = $t -replace "¾|Γö¾|¾", "¾"
    $t = $t -replace "Â⅓|Γö⅓|ÂÂ⅓", "⅓"
    $t = $t -replace "Â⅔|Γö⅔|ÂÂ⅔", "⅔"
    $t = $t -replace "Â⅛|Γö⅛|ÂÂ⅛", "⅛"
    $t = $t -replace "Â⅜|Γö⅜|ÂÂ⅜", "⅜"
    $t = $t -replace "Â⅝|Γö⅝|ÂÂ⅝", "⅝"
    $t = $t -replace "Â⅞|Γö⅞|ÂÂ⅞", "⅞"

    # Normalize curly quotes / dashes
    $t = $t -replace "[“”]", '"'
    $t = $t -replace "[‘’]", "'"
    $t = $t -replace "[–—]", "-"

    # Normalize newlines + tabs/spaces
    $t = $t -replace "\r\n?", "`n"
    $t = $t -replace "[\t ]+", " "

    $lines = $t -split "`n" | ForEach-Object { $_.Trim() } | Where-Object { $_ }

    # ------------------------------------------------------------
    # 2) Unit dictionary (extend as you like)
    # ------------------------------------------------------------
    $unitMap = @(
        "teaspoon","teaspoons","tsp","tsp.","t",
        "tablespoon","tablespoons","tbsp","tbsp.","T",
        "cup","cups",
        "pint","pints","pt","pt.",
        "quart","quarts","qt","qt.",
        "gallon","gallons","gal","gal.",
        "ml","milliliter","milliliters",
        "l","liter","liters",
        "g","gram","grams",
        "kg","kilogram","kilograms",
        "oz","ounce","ounces",
        "lb","lbs","pound","pounds",
        "pinch","pinches",
        "dash","dashes",
        "clove","cloves",
        "can","cans",
        "package","packages","pkg",
        "stick","sticks",
        "slice","slices",
        "piece","pieces",
        "bunch","bunches"
    )

    # Make a regex like: ^(unit1|unit2|unit3)\b
    $unitRegex = "(?i)^(" + (($unitMap | Sort-Object -Descending) -join "|") + ")\b"

    # ------------------------------------------------------------
    # 3) Fraction conversion helpers
    # ------------------------------------------------------------
    function Convert-FractionGlyphToAscii {
        param([string]$s)

        if (-not $s) { return $s }

        $s = $s.Replace("¼","1/4").Replace("½","1/2").Replace("¾","3/4")
        $s = $s.Replace("⅓","1/3").Replace("⅔","2/3")
        $s = $s.Replace("⅛","1/8").Replace("⅜","3/8").Replace("⅝","5/8").Replace("⅞","7/8")
        $s
    }

    function Normalize-Quantity {
        param([string]$q)

        $q = (Convert-FractionGlyphToAscii $q).Trim()

        # Convert "1-1/2" -> "1 1/2"
        $q = $q -replace "^(\d+)-(\d+/\d+)$", '$1 $2'

        $q
    }

    # ------------------------------------------------------------
    # 4) Parse each line into Quantity, Unit, Item
    # ------------------------------------------------------------
    $results = foreach ($line in $lines) {

        $original = $line
        $line = Convert-FractionGlyphToAscii $line

        $quantity = ""
        $unit = ""
        $item = ""

        # Example patterns:
        # "1 1/2 cups flour"
        # "1/2 cup milk"
        # "2 tbsp cocoa powder"
        # "salt to taste" (no qty)
        # "1 (14 oz) can tomatoes" (qty + parenthetical + unit + item)

        # Capture quantity at the start:
        # - whole number: 2
        # - fraction: 1/2
        # - mixed: 1 1/2
        # - range: 2-3 (we keep it)
        $qtyPattern = '^(?<qty>(\d+\s+\d+/\d+)|(\d+/\d+)|(\d+(\.\d+)?)|(\d+\s*-\s*\d+(\.\d+)?))\s+(?<rest>.+)$'

        if ($line -match $qtyPattern) {
            $quantity = Normalize-Quantity $Matches.qty
            $rest = $Matches.rest.Trim()

            # If next token is a parenthetical like "(14 oz)" keep it in item
            # but still parse unit after it
            $paren = ""
            if ($rest -match '^(?<paren>\([^)]+\))\s+(?<after>.+)$') {
                $paren = $Matches.paren.Trim()
                $rest  = $Matches.after.Trim()
            }

            # Parse unit at start of rest
            if ($rest -match $unitRegex) {
                $unit = $Matches[1]
                $rest = ($rest -replace $unitRegex, "").Trim()
            }

            # Put parenthetical back in front of item if it exists
            if ($paren) {
                $item = "$paren $rest".Trim()
            }
            else {
                $item = $rest
            }
        }
        else {
            # No quantity found - try to parse unit anyway (rare)
            $rest = $line
            if ($rest -match $unitRegex) {
                $unit = $Matches[1]
                $rest = ($rest -replace $unitRegex, "").Trim()
            }
            $item = $rest
        }

        # Final cleanup
        $quantity = ($quantity ?? "").Trim()
        $unit     = ($unit ?? "").Trim()
        $item     = ($item ?? "").Trim()

        if (-not $item) { continue }

        [pscustomobject]@{
            Quantity = $quantity
            Unit     = $unit
            Item     = $item
            Original = $original
        }
    }

    @($results)
}