Functions/GenXdev.Queries.Text/Get-WikipediaSummary.cs
|
// ################################################################################
// Part of PowerShell module : GenXdev.Queries.Text // Original cmdlet filename : Get-WikipediaSummary.cs // Original author : René Vaessen / GenXdev // Version : 2.1.2025 // ################################################################################ // Copyright (c) René Vaessen / GenXdev // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // ################################################################################ using System; using System.Linq; using System.Management.Automation; namespace GenXdev.Queries.Text { /// <summary> /// <para type="synopsis"> /// Retrieves a summary of a topic from Wikipedia. /// </para> /// /// <para type="description"> /// Queries the Wikipedia API to get a concise summary of the specified topic, /// removing parenthetical content for improved readability. /// </para> /// /// <para type="description"> /// PARAMETERS /// </para> /// /// <para type="description"> /// -Queries <string[]><br/> /// One or more search terms to look up on Wikipedia.<br/> /// - <b>Aliases</b>: q, Name, Text, Query<br/> /// - <b>Position</b>: 0<br/> /// - <b>Mandatory</b>: true<br/> /// - <b>ValueFromPipeline</b>: true<br/> /// - <b>ValueFromPipelineByPropertyName</b>: true<br/> /// - <b>HelpMessage</b>: The query to perform<br/> /// </para> /// /// <example> /// <para>Get a Wikipedia summary for PowerShell</para> /// <para>This example retrieves a summary of PowerShell from Wikipedia.</para> /// <code> /// Get-WikipediaSummary -Queries "PowerShell" /// </code> /// </example> /// /// <example> /// <para>Get Wikipedia summaries for multiple topics</para> /// <para>This example retrieves summaries for PowerShell, TypeScript, and C#.</para> /// <code> /// wikitxt "PowerShell", "Typescript", "C#" /// </code> /// </example> /// </summary> [Cmdlet(VerbsCommon.Get, "WikipediaSummary")] [Alias("wikitxt")] [OutputType(typeof(string))] public class GetWikipediaSummaryCommand : PSGenXdevCmdlet { /// <summary> /// One or more search terms to look up on Wikipedia. /// </summary> [Parameter( Mandatory = true, Position = 0, ValueFromRemainingArguments = false, ValueFromPipeline = true, ValueFromPipelineByPropertyName = true, HelpMessage = "The query to perform")] [Alias("q", "Name", "Text", "Query")] public string[] Queries { get; set; } /// <summary> /// Begin processing - initialization logic /// </summary> protected override void BeginProcessing() { } /// <summary> /// Helper method to clean up wikipedia text by removing parentheticals /// </summary> /// <param name="text">The text to clean</param> /// <returns>The cleaned text</returns> private string RemoveParentheticalContent(string text) { // Find the first opening parenthesis var i = text.IndexOf('('); // If no parenthesis found or it's after position 150, return as-is if (i >= 150) { return text; } // If parenthesis is at the end, remove it and trailing space if (i == text.Length - 1) { return text.Substring(0, i).Replace(" ", " "); } // Find the matching closing parenthesis var end = text.IndexOf(')', i); // Build result by removing the parenthetical content var result = text.Substring(0, i); // Add the rest of the text after the closing parenthesis if (end < text.Length) { result += text.Substring(end + 1); } // Clean up double spaces return result.Replace(" ", " "); } /// <summary> /// Process record - main cmdlet logic for each query /// </summary> protected override void ProcessRecord() { // Process each query in the array foreach (var query in Queries) { // Log the search operation WriteVerbose($"Searching Wikipedia for: {query}"); // Prepare the URL-encoded query var urlPart = Uri.EscapeDataString(query.Replace("-", " ")); // Construct the Wikipedia API URL var url = $"https://en.wikipedia.org/w/api.php?format=json&action=query&prop=extracts&exintro=1&explaintext=1&titles={urlPart}"; try { // Fetch the response using PowerShell's Invoke-WebRequest var webRequestScript = ScriptBlock.Create("param($url) Microsoft.PowerShell.Utility\\Invoke-WebRequest -Uri $url -MaximumRedirection 20"); var response = webRequestScript.Invoke(url); // Extract the content from the response var content = response[0].Properties["Content"].Value.ToString(); // Parse the JSON response using PowerShell's ConvertFrom-Json var jsonParseScript = ScriptBlock.Create("param($content) $content | Microsoft.PowerShell.Utility\\ConvertFrom-Json"); var data = jsonParseScript.Invoke(content); // Navigate to the pages object var queryObj = ((PSObject)data[0]).Properties["query"].Value; var pages = ((PSObject)queryObj).Properties["pages"].Value; // Get the first page ID (there should be only one) var properties = ((PSObject)pages).Properties; if (properties.Count() == 0) { WriteWarning($"No Wikipedia content found for '{query}'"); continue; } var pageId = properties.First().Name; // Extract the page content var page = ((PSObject)pages).Properties[pageId].Value; var extract = ((PSObject)page).Properties["extract"].Value.ToString(); // Check if content was found if (string.IsNullOrEmpty(extract)) { WriteWarning($"No Wikipedia content found for '{query}'"); continue; } // Log successful content retrieval WriteVerbose("Found content, cleaning up response"); try { // Clean up the content by removing parentheticals var cleanedContent = RemoveParentheticalContent(extract); WriteObject(cleanedContent); } catch { // If cleaning fails, return the raw extract WriteVerbose("Failed to clean content, returning raw extract"); WriteObject(extract); } } catch (Exception ex) { // Handle errors by writing a PowerShell error record WriteError(new ErrorRecord(ex, "WikipediaQueryFailed", ErrorCategory.InvalidOperation, query)); } } } /// <summary> /// End processing - cleanup logic /// </summary> protected override void EndProcessing() { } } } |