commands.ps1

function Export-EBBook
{
<#
    .SYNOPSIS
        Exports pages and images into a epub ebook.
     
    .DESCRIPTION
        Exports pages and images into a epub ebook.
     
    .PARAMETER Path
        The path to export to.
        Will ignore the name if an explicit filename was specified.
         
    .PARAMETER Name
        The name of the ebook. Will also be used for the filename if a path to a folder was specified.
     
    .PARAMETER Author
        The author to set for the ebook.
     
    .PARAMETER Publisher
        The publisher of the ebook.
     
    .PARAMETER CssData
        Custom CSS to use to style the ebook.
        Allows you to tune how the ebook is styled.
     
    .PARAMETER Page
        The pages to compile into an ebook.
     
    .EXAMPLE
        PS C:\> Read-EBMicrosoftDocsIndexPage -Url https://docs.microsoft.com/en-us/windows-server/identity/ad-ds/plan/security-best-practices/best-practices-for-securing-active-directory | Export-EBBook -Path . -Name ads-best-practices.epub -Author "Friedrich Weinmann" -Publisher "Infernal Press"
     
        Compiles an ebook out of the Active Directory Best Practices.
#>

    [CmdletBinding()]
    param (
        [PsfValidateScript({ Resolve-PSFPath -Path $args[0] -Provider FileSystem -SingleItem -NewChild }, ErrorMessage = "Folder to place the file in must exist!")]
        [string]
        $Path = ".",
        
        [string]
        $Name = "New Book",
        
        [string]
        $Author = $env:USERNAME,
        
        [string]
        $Publisher = $env:USERNAME,
        
        [string]
        $CssData,
        
        [Parameter(Mandatory = $true, ValueFromPipeline = $true)]
        [EbookBuilder.Item[]]
        $Page
    )
    
    begin
    {
        function Write-File
        {
            [CmdletBinding()]
            param (
                [System.IO.DirectoryInfo]
                $Root,
                
                [string]
                $Path,
                
                [string]
                $Text
            )
            
            $tempPath = Resolve-PSFPath -Path (Join-Path $Root.FullName $Path) -NewChild
            Write-PSFMessage -Level SomewhatVerbose -Message "Writing file: $($Path)"
            $utf8NoBom = New-Object System.Text.UTF8Encoding($false)
            [System.IO.File]::WriteAllText($tempPath, $Text, $utf8NoBom)
        }
        
        function ConvertTo-ManifestPageData
        {
            [CmdletBinding()]
            param (
                $Pages
            )
            
            $lines = $Pages | ForEach-Object {
                    ' <item id="{0}" href="Text/{0}" media-type="application/xhtml+xml"/>' -f $_.EbookFileName
            }
            $lines -join "`n"
        }
        
        function ConvertToManifestImageData
        {
            [CmdletBinding()]
            param (
                $Images
            )
            
            $lines = $images | ForEach-Object {
                ' <item id="{0}" href="Images/{1}" media-type="image/{2}"/>' -f $_.ImageID, $_.FileName, "Jpeg"
            }
            $lines -join "`n"
        }
        
        
        #region Prepare Resources
        $resolvedPath = Resolve-PSFPath -Path $Path -Provider FileSystem -SingleItem -NewChild
        if (Test-Path $resolvedPath)
        {
            if ((Get-Item $resolvedPath).PSIsContainer) { $resolvedPath = Join-Path $resolvedPath $Name }
        }
        if ($resolvedPath -notlike "*.epub") { $resolvedPath += ".epub" }
        $zipPath = $resolvedPath -replace 'epub$', 'zip'
        $cssContent = $CssData
        if (-not $cssContent)
        {
            $cssContent = [System.IO.File]::ReadAllText((Resolve-Path "$($script:ModuleRoot)\data\Common.css"), [System.Text.Encoding]::UTF8)
        }
        $pages = @()
        $images = @()
        #endregion Prepare Resources
    }
    process
    {
        #region Process Input items
        foreach ($item in $Page)
        {
            switch ($item.Type)
            {
                "Page" { $pages += $item }
                "Image" { $images += $item }
            }
        }
        #endregion Process Input items
    }
    end
    {
        $id = 1
        $pages = $pages | Sort-Object Index | Select-PSFObject -KeepInputObject -Property @{
            Name = "EbookFileName"
            Expression = { "{0}.xhtml" -f (New-Guid) }
        }, @{
            Name = "TocIndex"
            Expression = { $id++ }
        }
        
        $tempPath = New-Item -Path $env:TEMP -Name "Ebook-$(Get-Random -Maximum 99999 -Minimum 10000)" -ItemType Directory -Force
        
        Write-File -Root $tempPath -Path 'mimetype' -Text 'application/epub+zip'
        $metaPath = New-Item -Path $tempPath.FullName -Name "META-INF" -ItemType Directory
        Write-File -Root $metaPath -Path 'cotnainer.xml' -Text @'
<?xml version="1.0" encoding="UTF-8"?>
<container version="1.0" xmlns="urn:oasis:names:tc:opendocument:xmlns:container">
    <rootfiles>
        <rootfile full-path="OEBPS/content.opf" media-type="application/oebps-package+xml"/>
   </rootfiles>
</container>
'@

        $oebpsPath = New-Item -Path $tempPath.FullName -Name "OEBPS" -ItemType Directory
        
        #region content.opf
        $contentOpfText = @'
<?xml version="1.0" encoding="utf-8"?>
<package version="2.0" unique-identifier="uuid_id" xmlns="http://www.idpf.org/2007/opf">
  <metadata xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:opf="http://www.idpf.org/2007/opf" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:calibre="http://calibre.kovidgoyal.net/2009/metadata">
    <dc:publisher>{0}</dc:publisher>
    <dc:language>en</dc:language>
    <dc:creator opf:role="aut" opf:file-as="{1}">{1}</dc:creator>
    <dc:title opf:file-as="{2}">{2}</dc:title>
  </metadata>
  <manifest>
{3}
{4}
    <item id="ncx" href="toc.ncx" media-type="application/x-dtbncx+xml"/>
    <item id="style.css" href "Styles/Style.css" media-type="application/css"/>
  </manifest>
  <spine toc="ncx">
{5}
  </spine>
  <guide/>
</package>
'@
 -f $Publisher, $Author, $Name, (ConvertTo-ManifestPageData -Pages $pages), (ConvertToManifestImageData -Images $images), (($pages | ForEach-Object { ' <itemref idref="{0}"/>' -f $_.EbookFileName }) -join "`n")
        Write-File -Root $oebpsPath -Path 'content.opf' -Text $contentOpfText
        #endregion content.opf
        
        #region TOC.ncx
        $bookMarkText = ($pages | ForEach-Object {
                @'
    <navPoint id="navPoint-{0}" playOrder="{0}">
      <navLabel>
        <text>Chapter {0}</text>
      </navLabel>
      <content src="Text/{1}"/>
    </navPoint>
'@
 -f $_.TocIndex, $_.EbookFileName
        }) -join "`n"
        
        $contentTocNcxText = @'
<?xml version="1.0" encoding="utf-8" ?>
<!DOCTYPE ncx PUBLIC "-//NISO//DTD ncx 2005-1//EN"
 "http://www.daisy.org/z3986/2005/ncx-2005-1.dtd"><ncx version="2005-1" xmlns="http://www.daisy.org/z3986/2005/ncx/">
  <head>
    <meta content="{0}" name="dtb:uid"/>
    <meta content="1" name="dtb:depth"/>
    <meta content="0" name="dtb:totalPageCount"/>
    <meta content="0" name="dtb:maxPageNumber"/>
  </head>
  <docTitle>
    <text>{1}</text>
  </docTitle>
  <navMap>
{2}
  </navMap>
</ncx>
'@
 -f (New-Guid), $Name, $bookMarkText
        Write-File -Root $oebpsPath -Path 'toc.ncx' -Text $contentTocNcxText
        #endregion TOC.ncx
        
        #region Files
        $stylesPath = New-Item -Path $oebpsPath.FullName -Name "Styles" -ItemType Directory
        Write-File -Root $stylesPath -Path 'Style.css' -Text $cssContent
        
        $textPath = New-Item -Path $oebpsPath.FullName -Name 'Text' -ItemType Directory
        foreach ($pageItem in $pages)
        {
            $pageText = @'
<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"
  "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
 
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
  <title>{0}</title>
  <meta content="http://www.w3.org/1999/xhtml; charset=utf-8" http-equiv="Content-Type"/>
  <link href="../Styles/Style.css" type="text/css" rel="stylesheet"/>
</head>
<body>
{1}
</body>
</html>
'@
 -f $Name, $pageItem.Content
            Write-File -Root $textPath -Path $pageItem.EbookFileName -Text $pageText
        }
        #endregion Files
        
        #region Images
        if ($images)
        {
            $imagesPath = New-Item -Path $oebpsPath.FullName -Name 'Images' -ItemType Directory
            
            foreach ($image in $images)
            {
                $targetPath = Join-Path $imagesPath.FullName $image.FileName
                [System.IO.File]::WriteAllBytes($targetPath, $image.Data)
            }
        }
        #endregion Images
        
        Get-ChildItem $tempPath | Compress-Archive -DestinationPath $zipPath
        Rename-Item -Path $zipPath -NewName (Split-Path $resolvedPath -Leaf)
        Remove-Item $tempPath -Recurse -Force
    }
}

function Read-EBMicrosoftDocsIndexPage
{
<#
    .SYNOPSIS
        Converts an index page of a Microsoft Docs into a book.
     
    .DESCRIPTION
        Converts an index page of a Microsoft Docs into a book.
        Resolves all links in the index.
     
    .PARAMETER Url
        The Url to the index page.
     
    .PARAMETER StartIndex
        Start Index the pages will begin with.
        Index is what Export-EBBook will use to determine page order.
     
    .EXAMPLE
        PS C:\> Read-EBMicrosoftDocsIndexPage -Url https://docs.microsoft.com/en-us/windows-server/identity/ad-ds/plan/security-best-practices/best-practices-for-securing-active-directory
     
        Parses the Active Directory Security Best Practices into page and image objects.
#>

    [CmdletBinding()]
    Param (
        [string]
        $Url,
        
        [int]
        $StartIndex = 0
    )
    
    begin
    {
        $index = $StartIndex
    }
    process
    {
        $indexPage = Read-EBMicrosoftDocsPage -Url $Url -StartIndex $index
        $indexPage
        $index++
        
        $pages = $indexPage.Content | Select-String '<a href="(.*?)"' -AllMatches | Select-Object -ExpandProperty Matches | ForEach-Object { $_.Groups[1].Value }
        $basePath = (Split-Path $indexPage.SourceName) -replace "\\", "/"
        
        foreach ($page in $pages)
        {
            $tempPath = $basePath
            while ($page -like "../*")
            {
                $tempPath = (Split-Path $tempPath) -replace "\\", "/"
                $page = $page -replace "^../", ""
            }
            Read-EBMicrosoftDocsPage -Url ("{0}/{1}" -f $tempPath, $page) -StartIndex $index
            $index++
        }
    }
}


function Read-EBMicrosoftDocsPage
{
<#
    .SYNOPSIS
        Parses a web document from the Microsoft documents.
     
    .DESCRIPTION
        Parses a web document from the Microsoft documents.
     
    .PARAMETER Url
        The url of the website to parse.
     
    .PARAMETER StartIndex
        The index of the page. Used for sorting the pages when building the ebook.
     
    .EXAMPLE
        PS C:\> Read-EBMicrosoftDocsPage -Url https://docs.microsoft.com/en-us/windows-server/identity/ad-ds/plan/security-best-practices/best-practices-for-securing-active-directory
     
        Parses the file of the specified link and converts it into a page.
#>

    [CmdletBinding()]
    param (
        [Parameter(Mandatory = $true, ValueFromPipeline = $true)]
        [string[]]
        $Url,
        
        [int]
        $StartIndex = 1
    )
    
    begin
    {
        $index = $StartIndex
    }
    process
    {
        foreach ($weblink in $Url)
        {
            $data = Invoke-WebRequest -UseBasicParsing -Uri $weblink
            $main = ($data.RawContent | Select-String "(?ms)<main.*?>(.*?)</main>").Matches.Groups[1].Value
            $source, $title = ($main | Select-String '<h1.*?sourceFile="(.*?)".*?>(.*?)</h1>').Matches.Groups[1 .. 2].Value
            $text = ($main | Select-String '(?ms)<!-- <content> -->(.*?)<!-- </content> -->').Matches.Groups[1].Value.Trim()
            $content = "<h1>{0}</h1> {1}" -f $title, $text
            $webClient = New-Object System.Net.WebClient
            foreach ($imageMatch in ($content | Select-String '(<img.*?src="(.*?)".*?alt="(.*?)".*?>)' -AllMatches).Matches)
            {
                $relativeImagePath = $imageMatch.Groups[2].Value
                $imageName = $imageMatch.Groups[3].Value
                $imagePath = "{0}/{1}" -f ($weblink -replace '/[^/]*?$', '/'), $relativeImagePath
                $image = New-Object EbookBuilder.Image -Property @{
                    Data = $webClient.DownloadData($imagePath)
                    Name = $imageName
                    TimeCreated = Get-Date
                    Extension = $imagePath.Split(".")[-1]
                    MetaData = @{ WebLink = $imagePath }
                }
                $image
                $content = $content -replace ([regex]::Escape($relativeImagePath)), "../Images/$($image.FileName)"
            }
            
            New-Object EbookBuilder.Page -Property @{
                Index = $index++
                Name  = $title
                Content = $content
                SourceName = $weblink
                TimeCreated = Get-Date
                MetaData = @{ GithubPath = $source }
            }
        }
    }
}