Export-TechNetContributionToCSV.ps1

<#PSScriptInfo
.VERSION 1.2
 
.GUID f04db573-8ee4-4b45-9b23-2787dc5522f4
 
.AUTHOR Evgenij Smirnov
 
.COMPANYNAME it-pro-berlin.de
 
.COPYRIGHT
 
.TAGS technet forum contribution community
 
.LICENSEURI
 
.PROJECTURI
 
.ICONURI
 
.EXTERNALMODULEDEPENDENCIES
 
.REQUIREDSCRIPTS
 
.EXTERNALSCRIPTDEPENDENCIES
 
.RELEASENOTES
#>


<#
.SYNOPSIS
Reads the TechNet Forum contribution of a given user into CSV which can be processed by whatever BI you have at your disposal - or by "Measure-TechNetContribution.ps1".
 
.DESCRIPTION
 This script navigates through the forum threads of the specified user and reads his or her contributions: any posts, posts proposed as answer and post upvotes. The results are written to a semicolon-delimited CSV file.
 The following data is stored for each post:
 - Kind of post: "question", "reply", "proposed answer", "answer"
 - Number of upvotes
 - Forum
 - Subforum
 - Thread title
 - Post time as displayed in the web page
 - time stamp of the post calculated from the above (including "5 hours and 23 moinuts ago")
 - thread URL
 
.PARAMETER UserName
 TechNet User you are exporting the information for. The name needs to be spelled exactly as it is on TechNet.
 
.PARAMETER FilePath
 Full path to the CSV output. The default Out-File cmdlet is being used, therefore the folder structure containg the file must exist.
 If omitted, a file named "my_technet_contribution.csv" will be created in your TEMP folder.
 
.PARAMETER ForumLanguage
 Possible values are "DE" or "EN". Since the "User X's threads" view will display all threads in any language, the only real difference is the time format in the resulting CSV.
 
.PARAMETER MaxPages
 Restricts the number of pages (most recent threads are read first) that will be read from the TechNet Forum webpage.
 
.EXAMPLE
 Export-TechNetContributionToCSV -UserName "Ken Myer"
 
 Reads all threads that contain a post by Ken Myer
 
.EXAMPLE
 Export-TechNetContributionToCSV -UserName "Ken Myer" -FilePath c:\temp\kens_technet.csv -MaxPages 20
 
 Reads the last 20 pages of Ken's threads (i.e. 400 threads altogether) into the file specified.
 
#>


[CmdletBinding()]
Param(
    [Parameter(Mandatory = $true, Position = 1, valueFromPipeline = $false)][string]$UserName
   ,[Parameter(Mandatory = $false, valueFromPipeline = $false)][string]$FilePath = "$($env:TEMP)\my_technet_contribution.csv"
   ,[Parameter(Mandatory = $false)][ValidateSet("DE","EN")][string]$ForumLanguage = "DE"
   ,[Parameter(Mandatory = $false)][int]$MaxPages = 1000000
)


$language = $ForumLanguage
[Reflection.Assembly]::LoadWithPartialName("System.Web") | Out-Null
$start_uris = @{"DE"="https://social.technet.microsoft.com/Forums/de-de/user/threads?user=";"EN"="https://social.technet.microsoft.com/Forums/en-us/user/threads?user="}
$next_tags = @{"DE"="Weiter";"EN"="Next"}
$user_stops = @{"DE"="`"";"EN"="&quot;"}
$ixs_hours = @{"DE"="Stunde";"EN"="hour"}
$ixs_minutes = @{"DE"="Minute";"EN"="minute"}
$date_formats = @{"DE"="dd.MM.yyyy HH:mm:ss";"EN"="yyyy-MM-dd HH:mm:ss"}

$start_uri = "$($start_uris."$language")$([System.Web.HttpUtility]::URLEncode($UserName))"
try {
    $testpage = Invoke-WebRequest -Uri $start_uri
} catch {
    Write-Host "Could not get threads for user '$UserName' from TechNet.`r`nCheck URL and network connectivity for $start_uri" -ForegroundColor Red
    exit    
}

$user_id = "threads?user="
$next_tag = $next_tags."$language"
$user_stop = $user_stops."$language"
$date_format = $date_formats."$language"

$ix_hours = $ixs_hours."$language"
$ix_minutes = $ixs_minutes."$language"

$list_uri = $start_uri
$replies = @()
$ipage = 0
$ithr = 0
"MsgKind;Votes;Forum;Subforum;Topic;Date;TimeStamp;ThreadURL" | Set-Content $FilePath -Encoding UTF8
do {
    $ipage++
    Write-Progress -Activity "PAGE $ipage ==> $list_uri"
    $page = Invoke-WebRequest -Uri $list_uri
    
    $html = $page.ParsedHtml
    $html.all.tags("DIV") | foreach {
        if ($_.className) {
            if ($_.className.Trim() -eq "threadsnippet") {
                $ithr++
                $snippet = $_
                foreach ($head in $snippet.all.tags("H3")) {
                    $thread_topic = $head.innerText.Trim()
                    foreach($link in $head.all.tags("A")) {
                        $thread_link = $link.href
                    }
                }
                foreach ($div in $snippet.all.tags("DIV")) {
                    if ($div.className) {
                        if ($div.className.Trim() -eq "EyebrowElement forumBreadcrumb") {
                            $thread_subforum = $div.innerText.Trim()
                        }
                        if ($div.className.Trim() -eq "EyebrowElement") {
                            $thread_forum = $div.innerText.Trim()
                        }
                    }
                }
                Write-Progress -Activity "PAGE $ipage ==> $list_uri" -Status "THREAD $ithr ==> $thread_subforum | $thread_topic"
                $thread = (Invoke-WebRequest -Uri $thread_link).ParsedHTML
                foreach ($ul in $thread.all.tags("LI")) {
                    if ($ul.className) {
                        $xclass = $ul.className.Trim() -replace " ", ""
                        if ($xclass -like "message*") {
                            switch ($xclass) {
                                "message" { $fc = "Cyan"; $kind = "reply" }
                                "messageroot" { $fc = "Magenta"; $kind = "question" }
                                "messageanswer" { $fc = "Green"; $kind = "answer" }
                                "messagepropose" { $fc = "Yellow"; $kind = "proposed" }
                                default {$fc = "Gray"; $kind = "other"}
                            }
                            $user_votes = "0"
                            $xdate = ""
                            $user_name = "Th3r35h0u1dN0tB35ucHU53r@T3chN3t"
                            foreach ($uldiv in $ul.all.tags("DIV")) {
                                if ($uldiv.className) {
                                    if ($uldiv.className.Trim() -eq "date") {
                                        $user_date = $uldiv.innerText
                                        try {
                                            $xdate = Get-Date $user_date -Format $date_format
                                        } catch {
                                            $date_parts = $user_date -split " "
                                            $hrs = 0
                                            $mins = 0
                                            for ($i = 0;$i -lt $date_parts.Count;$i++) {
                                                if ($date_parts[$i] -like "$ix_hours*") { $hrs = [int]($date_parts[$i - 1]) }
                                                if ($date_parts[$i] -like "$ix_minutes*") { $mins = [int]($date_parts[$i - 1]) }
                                            }
                                            $xdate = Get-Date ((Get-Date).AddHours(-$hrs).AddMinutes(-$mins))-Format $date_format                                            
                                        }
                                    }
                                    if ($uldiv.className.Trim() -eq "votenumber") {
                                        $user_votes = $uldiv.innerText
                                    }
                                    if ($uldiv.className.Trim() -eq "unified-baseball-card-mini") {
                                        $xcard = $uldiv.outerHTML
                                        $start_ix = $xcard.IndexOf($user_id)
                                        if ($start_ix) {
                                            $start_ix += $user_id.Length
                                            $stop_ix = $xcard.IndexOf($user_stop, $start_ix)
                                            $user_name = $xcard.Substring($start_ix, ($stop_ix - $start_ix))
                                            $user_name = [System.Web.HttpUtility]::URLDecode($user_name)
                                        }
                                    }
                                }
                            }
                            if ($user_name -like $UserName) {
                                $xreply = "`"$kind`";`"$user_votes`";`"$thread_forum`";`"$thread_subforum`";`"$thread_topic`";`"$user_date`";`"$xdate`";`"$thread_link`""
                                Write-Progress -Activity "PAGE $ipage ==> $list_uri" -Status "THREAD $ithr ==> $thread_subforum | $thread_topic" -CurrentOperation "$kind : $user_votes"# -ForegroundColor $fc
                                if ($replies -notcontains $xreply) {
                                    $replies += $xreply
                                    $xreply | Out-File $FilePath -Append -Encoding utf8
                                }
                            }

                        }
                    }
                }
            } 
        }
    }
    $next_link = $page.Links | where { $_.outerText -like "$next_tag*" }
    $list_uri = $next_link.href -replace "&amp;","&"
    if ($list_uri.Count -gt 1) {
        $list_uri
        $list_uri = $list_uri[1]
    }
} until (!($next_link) -or ($ipage -ge $MaxPages))