1+ import os
2+ import time
3+ import urllib
4+ import requests
5+ from urllib .parse import quote
6+ import array as arr
7+
8+ class simple_image_download :
9+ def __init__ (self ):
10+ pass
11+
12+ def urls (self , keywords , limit ):
13+ keyword_to_search = [str (item ).strip () for item in keywords .split (',' )]
14+ i = 0
15+ links = []
16+ while i < len (keyword_to_search ):
17+ url = 'https://www.google.com/search?q=' + quote (
18+ keyword_to_search [i ].encode (
19+ 'utf-8' )) + '&biw=1536&bih=674&tbm=isch&sxsrf=ACYBGNSXXpS6YmAKUiLKKBs6xWb4uUY5gA:1581168823770&source=lnms&sa=X&ved=0ahUKEwioj8jwiMLnAhW9AhAIHbXTBMMQ_AUI3QUoAQ'
20+ raw_html = self ._download_page (url )
21+
22+ end_object = - 1 ;
23+
24+ j = 0
25+ while j < limit :
26+ while (True ):
27+ try :
28+ new_line = raw_html .find ('"https://' , end_object + 1 )
29+ end_object = raw_html .find ('"' , new_line + 1 )
30+
31+ buffor = raw_html .find ('\\ ' , new_line + 1 , end_object )
32+ if buffor != - 1 :
33+ object_raw = (raw_html [new_line + 1 :buffor ])
34+ else :
35+ object_raw = (raw_html [new_line + 1 :end_object ])
36+
37+ if '.jpg' in object_raw or 'png' in object_raw or '.ico' in object_raw or '.gif' in object_raw or '.jpeg' in object_raw :
38+ break
39+
40+ except Exception as e :
41+ print (e )
42+ break
43+
44+ links .append (object_raw )
45+ j += 1
46+
47+ i += 1
48+ return (links )
49+
50+
51+ def download (self , keywords , limit ):
52+ keyword_to_search = [str (item ).strip () for item in keywords .split (',' )]
53+ main_directory = "simple_images/"
54+ i = 0
55+
56+ while i < len (keyword_to_search ):
57+ self ._create_directories (main_directory , keyword_to_search [i ])
58+ url = 'https://www.google.com/search?q=' + quote (
59+ keyword_to_search [i ].encode ('utf-8' )) + '&biw=1536&bih=674&tbm=isch&sxsrf=ACYBGNSXXpS6YmAKUiLKKBs6xWb4uUY5gA:1581168823770&source=lnms&sa=X&ved=0ahUKEwioj8jwiMLnAhW9AhAIHbXTBMMQ_AUI3QUoAQ'
60+ raw_html = self ._download_page (url )
61+
62+ end_object = - 1 ;
63+
64+ j = 0
65+ while j < limit :
66+ while (True ):
67+ try :
68+ new_line = raw_html .find ('"https://' , end_object + 1 )
69+ end_object = raw_html .find ('"' , new_line + 1 )
70+
71+ buffor = raw_html .find ('\\ ' , new_line + 1 , end_object )
72+ if buffor != - 1 :
73+ object_raw = (raw_html [new_line + 1 :buffor ])
74+ else :
75+ object_raw = (raw_html [new_line + 1 :end_object ])
76+
77+ if '.jpg' in object_raw or 'png' in object_raw or '.ico' in object_raw or '.gif' in object_raw or '.jpeg' in object_raw :
78+ break
79+
80+ except Exception as e :
81+ print (e )
82+ break
83+
84+ path = main_directory + keyword_to_search [i ]
85+
86+ #print(object_raw)
87+
88+ if not os .path .exists (path ):
89+ os .makedirs (path )
90+
91+ filename = str (keyword_to_search [i ]) + "_" + str (j + 1 ) + ".jpg"
92+
93+ try :
94+ r = requests .get (object_raw , allow_redirects = True )
95+ open (os .path .join (path , filename ), 'wb' ).write (r .content )
96+ except Exception as e :
97+ print (e )
98+ j -= 1
99+ j += 1
100+
101+ i += 1
102+
103+
104+ def _create_directories (self , main_directory , name ):
105+ try :
106+ if not os .path .exists (main_directory ):
107+ os .makedirs (main_directory )
108+ time .sleep (0.2 )
109+ path = (name )
110+ sub_directory = os .path .join (main_directory , path )
111+ if not os .path .exists (sub_directory ):
112+ os .makedirs (sub_directory )
113+ else :
114+ path = (name )
115+ sub_directory = os .path .join (main_directory , path )
116+ if not os .path .exists (sub_directory ):
117+ os .makedirs (sub_directory )
118+
119+ except OSError as e :
120+ if e .errno != 17 :
121+ raise
122+ pass
123+ return
124+
125+ def _download_page (self ,url ):
126+
127+ try :
128+ headers = {}
129+ headers ['User-Agent' ] = "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.87 Safari/537.36"
130+ req = urllib .request .Request (url , headers = headers )
131+ resp = urllib .request .urlopen (req )
132+ respData = str (resp .read ())
133+ return respData
134+
135+ except Exception as e :
136+ print (e )
137+ exit (0 )
0 commit comments