src/programFrames/exe21sp.cs
|
// Uses AsmResolver to read embedded script resources from a ps12exe-built exe
// and return the original PowerShell script text. Exposed via the exe21sp PowerShell helper. using System; using System.IO; using System.IO.Compression; using System.Text; using AsmResolver.DotNet; using AsmResolver.PE.File; namespace exe21sp { public static class Extractor { /// <summary> /// Extracts the embedded PowerShell script from a ps12exe-built executable. /// </summary> /// <param name="exePath">Full path to the .exe file.</param> /// <returns> /// For normal ps12exe exes: the original PowerShell script from an embedded resource. /// For TinySharp-compiled exes: a synthesized script that prints the captured output string and, /// if applicable, appends an exit statement with the recorded exit code. /// Returns null if the exe is not a ps12exe output or payload cannot be recovered. /// </returns> public static string ExtractScriptFromExe(string exePath) { if (string.IsNullOrEmpty(exePath) || !File.Exists(exePath)) return null; // First, try the standard program frame: embedded main.par resource. var script = TryExtractFromMainPar(exePath); if (script != null) return script; // Fallback: TinySharp-compiled minimal exe (no main.par). return TryExtractFromTinySharp(exePath); } private static string TryExtractFromMainPar(string exePath) { try { var module = ModuleDefinition.FromFile(exePath); foreach (var resource in module.Resources) { if (!resource.IsEmbedded) continue; string name = object.ReferenceEquals(resource.Name, null) ? null : resource.Name.ToString(); if (string.Equals(name, "main.par", StringComparison.OrdinalIgnoreCase)) { var raw = resource.GetData(); if (raw == null || raw.Length == 0) return null; using (var ms = new MemoryStream(raw)) using (var gzip = new GZipStream(ms, CompressionMode.Decompress)) using (var reader = new StreamReader(gzip, Encoding.UTF8)) { return reader.ReadToEnd(); } } // Fallback: some frames may embed the plain script as a .ps1 resource instead of main.par. if (name != null && name.EndsWith(".ps1", StringComparison.OrdinalIgnoreCase)) { var raw = resource.GetData(); if (raw == null || raw.Length == 0) continue; using (var ms = new MemoryStream(raw)) // Detect encoding from BOM when present; default to UTF-8 without BOM. using (var reader = new StreamReader(ms, Encoding.UTF8, detectEncodingFromByteOrderMarks: true)) { return reader.ReadToEnd(); } } } } catch { // Not a valid .NET module or read error. } return null; } private static string TryExtractFromTinySharp(string exePath) { var peFile = PEFile.FromFile(exePath); // Only treat as TinySharp when the PE is a .NET assembly (has CLR header). // Otherwise native exes (e.g. notepad.exe) would yield garbage from .text. if (peFile.OptionalHeader == null) return null; var clrDir = peFile.OptionalHeader.GetDataDirectory(DataDirectoryIndex.ClrDirectory); if (clrDir.Size == 0 || !clrDir.IsPresentInPE) return null; // From here on we consider this a potential TinySharp exe; layout failures must throw. PESection section = null; foreach (var s in peFile.Sections) { if (!object.ReferenceEquals(s.Name, null) && s.Name.ToString() == ".text") { section = s; break; } } if (section == null) throw new InvalidOperationException("TinySharpNoTextSection"); var size = (uint)Math.Min(section.GetPhysicalSize(), 1024 * 1024); if (size == 0) throw new InvalidOperationException("TinySharpTextSectionEmpty"); var sectionReader = peFile.CreateReaderAtFileOffset(section.Offset, size); var raw = sectionReader.ReadBytes((int)sectionReader.Length); if (raw == null || raw.Length == 0) throw new InvalidOperationException("TinySharpCannotReadText"); // Locate the message string by counting ldc.i4 VA references in the CIL region. // TinySharp patches the message address into every MessageBoxW call site (2× for the // two-path MessageBox build, 1× for console builds), while infrastructure strings // (e.g. VerQueryValueW subBlock path) are referenced only once. The most-referenced // VA that maps to actual file content in .text is therefore the message — no content // heuristics needed. string message = FindMessageByVARefCount(raw, peFile.OptionalHeader.ImageBase, section); if (string.IsNullOrEmpty(message)) throw new InvalidOperationException("TinySharpPayloadNotRecovered"); // TinySharp embeds non-zero exit code as CIL: Ldc_I4 (0x20) + 4-byte LE + Ret (0x2A). Find last such sequence. int exitCode = TryDetectTinySharpExitCode(raw); var builder = new StringBuilder(); var escaped = message.Replace("'", "''"); builder.Append("'").Append(escaped).Append("'"); if (exitCode != 0) builder.Append("\nexit ").Append(exitCode); return builder.ToString(); } /// <summary> /// Scans .text for TinySharp main's trailing CIL: Ldc_I4 (0x20) + 4-byte LE exit code + Ret (0x2A). /// Only scans the first 2KB (CIL region); string data at end of .text could otherwise false-match. /// Returns the last matching exit code, or 0 if not found / not plausible. /// </summary> private static int TryDetectTinySharpExitCode(byte[] raw) { const byte CilLdcI4 = 0x20; const byte CilRet = 0x2A; const int MinPlausible = -32768; const int MaxPlausible = 32767; int scanLen = Math.Min(raw.Length - 6, 2048); if (scanLen < 0) return 0; int lastExit = 0; for (int i = 0; i <= scanLen; i++) { if (raw[i] != CilLdcI4 || raw[i + 5] != CilRet) continue; int code = BitConverter.ToInt32(raw, i + 1); if (code >= MinPlausible && code <= MaxPlausible) lastExit = code; } return lastExit; } private static bool IsPrintableAscii(string s) { foreach (var c in s) if (c < 32 || c > 126) return false; return true; } private static bool IsPrintableUnicode(string s) { foreach (var c in s) if (char.IsControl(c) && c != '\r' && c != '\n' && c != '\t') return false; return true; } /// <summary> /// Scans the first 2 KB of .text (the CIL region) for ldc.i4 operands whose value /// is a VA within the physical file content of .text. Counts how many times each /// such VA appears; the most-referenced one is the message string (TinySharp MessageBox /// patches it at every call site — 2×, whereas infra strings like the VerQueryValueW /// subBlock path appear only 1×). No content heuristics are used. /// </summary> private static string FindMessageByVARefCount(byte[] raw, ulong imageBase, PESection section) { ulong textVABase = imageBase + section.Rva; // Parallel arrays instead of Dictionary<> to avoid requiring extra assembly references. // At most a handful of distinct .text VAs appear as ldc.i4 operands in 2 KB of CIL. const int MaxSlots = 64; uint[] vaKeys = new uint[MaxSlots]; int[] vaCounts = new int[MaxSlots]; int slotCount = 0; int cilEnd = Math.Min(raw.Length - 6, 2048); for (int i = 0; i <= cilEnd; i++) { if (raw[i] != 0x20) continue; // ldc.i4 opcode uint operand = (uint)BitConverter.ToInt32(raw, i + 1); // TinySharp imageBase < 2^32, so the ldc.i4 operand IS the full 32-bit VA. ulong va = (imageBase & 0xFFFFFFFF00000000UL) | (ulong)operand; if (va < textVABase) continue; ulong fileOff = va - textVABase; if (fileOff >= (ulong)raw.Length) continue; // BSS/virtual — no file content // Linear search is fine; < 20 distinct candidates expected. int idx = -1; for (int j = 0; j < slotCount; j++) if (vaKeys[j] == operand) { idx = j; break; } if (idx < 0 && slotCount < MaxSlots) { vaKeys[slotCount] = operand; vaCounts[slotCount] = 1; slotCount++; } else if (idx >= 0) vaCounts[idx]++; } // Most-referenced VA = message; tiebreak by lowest file offset (message placed first). int bestCount = 0; ulong bestFileOff = ulong.MaxValue; uint bestOperand = 0; for (int j = 0; j < slotCount; j++) { ulong va = (imageBase & 0xFFFFFFFF00000000UL) | (ulong)vaKeys[j]; ulong fileOff = va - textVABase; if (vaCounts[j] > bestCount || (vaCounts[j] == bestCount && fileOff < bestFileOff)) { bestCount = vaCounts[j]; bestFileOff = fileOff; bestOperand = vaKeys[j]; } } if (bestOperand == 0) return null; int off = (int)bestFileOff; // Distinguish encoding by checking whether the second byte is a null (UTF-16LE pattern). // MessageBox / WriteConsoleW builds use Unicode (raw[off+1] == 0x00 for ASCII-range text). // puts builds use plain ASCII (raw[off+1] is a printable byte, not zero). bool looksUtf16 = (off + 1 < raw.Length && raw[off + 1] == 0); if (looksUtf16) { var msgU = TryReadNullTermUnicode(raw, off); return msgU ?? TryReadNullTermAscii(raw, off); } else { var msgA = TryReadNullTermAscii(raw, off); return msgA ?? TryReadNullTermUnicode(raw, off); } } private static string TryReadNullTermUnicode(byte[] raw, int offset) { if (offset < 0 || offset + 2 > raw.Length) return null; int end = offset; while (end + 1 < raw.Length && (raw[end] != 0 || raw[end + 1] != 0)) end += 2; if (end == offset || end - offset > 8192) return null; var s = Encoding.Unicode.GetString(raw, offset, end - offset); return IsPrintableUnicode(s) ? s : null; } private static string TryReadNullTermAscii(byte[] raw, int offset) { if (offset < 0 || offset >= raw.Length) return null; int end = offset; while (end < raw.Length && raw[end] != 0) end++; if (end == offset || end - offset > 8192) return null; var s = Encoding.ASCII.GetString(raw, offset, end - offset); return IsPrintableAscii(s) ? s : null; } } } |