SearchGoogleImagesDownload.ps1

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89

<#PSScriptInfo
 
.VERSION 1.0.0
 
.GUID af53cecf-f280-4fdf-ba80-f6fe400058e8
 
.AUTHOR mikko@lavento.com
 
.COMPANYNAME
 
.COPYRIGHT
 
.TAGS Google, imagesearch, download image, image, search
 
.LICENSEURI
 
.PROJECTURI
 
.ICONURI
 
.EXTERNALMODULEDEPENDENCIES
 
.REQUIREDSCRIPTS
 
.EXTERNALSCRIPTDEPENDENCIES
 
.RELEASENOTES
 
 
#>


<#
 
.DESCRIPTION
 Example how to download first x number image hits from Google image search. This can be done without Google APIs because it makes direct request mimicking browser request.
 
#>
 

Param()


#19.7.2019 M.Lavento
#Get first x number of images from Google image search based on Searchterm

Add-Type -AssemblyName System.Web

$SearchPlaintext = "LPS #2"
#Convert searchstring to HTTP
$SearchItem = [System.Web.HttpUtility]::UrlEncode($SearchPlaintext)

$HowManyHits = "4"

#Folder to store pics
$TargetFolder = "C:\Skriptit\WebImageCrawler\Downloadedpics"
if ( (Test-Path -Path $TargetFolder) -eq $false) { md $TargetFolder }
Invoke-Item $TargetFolder


$url = "https://www.google.com/search?as_st=y&tbm=isch&as_q=$SearchItem&as_epq=&as_oq=&as_eq=&imgsz=&imgar=&imgc=&imgcolor=&imgtype=&cr=&as_sitesearch=&safe=images&as_filetype=&as_rights="

$browserAgent = 'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/33.0.1750.146 Safari/537.36'
$page = Invoke-WebRequest -Uri $url -UserAgent $browserAgent -UseBasicParsing

#Google, have to read hits from rawcontent
$jpgsplit = $page.RawContent -split ":" | where {$_ -like "*.jpg*"}

#clean up the answer some more and get 10 first results
$jpgsplit = $jpgsplit -replace '"', "" -split "," | where {$_ -like "*.jpg"} | select -First $HowManyHits

#Loop and get the images
foreach ($urlhit in $jpgsplit)
{

#add http: to address
$finalurlhttp = "http:" + $urlhit
$finalurlhttps = "https:" + $urlhit

#Save file
$file = Split-Path -Path $finalurlhttp -Leaf
$finalfile = $SearchPlaintext + "_" + $file
#replace illegal chars if there is any
[System.IO.Path]::GetInvalidFileNameChars() | foreach {$finalfile = $finalfile.replace($_,' ')}
$path = Join-Path -Path $TargetFolder -ChildPath $finalfile
#fetch the image
Invoke-WebRequest -Uri $finalurlhttp -OutFile $path -ErrorAction Continue

}