URL 주소를 이미 알고 있는 Python을 사용하여 이미지를 로컬로 저장하는 방법은 무엇입니까?
나는 인터넷에 있는 이미지의 URL을 알고 있습니다.
예: Google 로고가 포함된 http://www.digimouth.com/news/media/2011/09/google-logo.jpg, .
이제 어떻게 하면 브라우저에서 URL을 열고 파일을 수동으로 저장하지 않고 파이썬을 사용하여 이 이미지를 다운로드할 수 있습니까?
파이썬 2
파일로 저장하기만 하면 되는 경우 보다 간단한 방법은 다음과 같습니다.
import urllib
urllib.urlretrieve("http://www.digimouth.com/news/media/2011/09/google-logo.jpg", "local-filename.jpg")
두 번째 인수는 파일을 저장해야 하는 로컬 경로입니다.
파이썬 3
SergO가 제안했듯이 아래 코드는 Python 3에서 작동해야 합니다.
import urllib.request
urllib.request.urlretrieve("http://www.digimouth.com/news/media/2011/09/google-logo.jpg", "local-filename.jpg")
import urllib
resource = urllib.urlopen("http://www.digimouth.com/news/media/2011/09/google-logo.jpg")
output = open("file01.jpg","wb")
output.write(resource.read())
output.close()
file01.jpg
이미지가 포함됩니다.
저는 이것을 할 수 있는 스크립트를 작성했고, 그것은 당신이 사용할 수 있도록 제 github에서 사용할 수 있습니다.
BeautifulSoup을 사용하여 웹 사이트에서 이미지를 구문 분석할 수 있습니다.만약 당신이 웹스크래핑을 많이 할 예정이라면 (또는 내 도구를 사용할 예정이라면) 저는 당신에게 제안합니다.sudo pip install BeautifulSoup
BeautifulSoup에 대한 정보는 여기에서 확인할 수 있습니다.
편의상 제 코드는 다음과 같습니다.
from bs4 import BeautifulSoup
from urllib2 import urlopen
import urllib
# use this image scraper from the location that
#you want to save scraped images to
def make_soup(url):
html = urlopen(url).read()
return BeautifulSoup(html)
def get_images(url):
soup = make_soup(url)
#this makes a list of bs4 element tags
images = [img for img in soup.findAll('img')]
print (str(len(images)) + "images found.")
print 'Downloading images to current working directory.'
#compile our unicode list of image links
image_links = [each.get('src') for each in images]
for each in image_links:
filename=each.split('/')[-1]
urllib.urlretrieve(each, filename)
return image_links
#a standard call looks like this
#get_images('http://www.wookmark.com')
이 작업은 요청을 통해 수행할 수 있습니다.페이지를 로드하고 이진 콘텐츠를 파일에 덤프합니다.
import os
import requests
url = 'https://apod.nasa.gov/apod/image/1701/potw1636aN159_HST_2048.jpg'
page = requests.get(url)
f_ext = os.path.splitext(url)[-1]
f_name = 'img{}'.format(f_ext)
with open(f_name, 'wb') as f:
f.write(page.content)
파이썬 3
urllib.request - URL을 열기 위한 확장 라이브러리
from urllib.error import HTTPError
from urllib.request import urlretrieve
try:
urlretrieve(image_url, image_local_path)
except FileNotFoundError as err:
print(err) # something wrong with local path
except HTTPError as err:
print(err) # something wrong with url
저는 Yup.의 대본을 확장해서 대본을 만들었습니다.저는 몇 가지를 고쳤습니다.이제 403을 우회합니다.금지된 문제.이미지 검색에 실패해도 충돌하지 않습니다.손상된 미리 보기를 방지합니다.그것은 올바른 절대 URL을 얻습니다.그것은 더 많은 정보를 제공합니다.명령줄의 인수를 사용하여 실행할 수 있습니다.
# getem.py
# python2 script to download all images in a given url
# use: python getem.py http://url.where.images.are
from bs4 import BeautifulSoup
import urllib2
import shutil
import requests
from urlparse import urljoin
import sys
import time
def make_soup(url):
req = urllib2.Request(url, headers={'User-Agent' : "Magic Browser"})
html = urllib2.urlopen(req)
return BeautifulSoup(html, 'html.parser')
def get_images(url):
soup = make_soup(url)
images = [img for img in soup.findAll('img')]
print (str(len(images)) + " images found.")
print 'Downloading images to current working directory.'
image_links = [each.get('src') for each in images]
for each in image_links:
try:
filename = each.strip().split('/')[-1].strip()
src = urljoin(url, each)
print 'Getting: ' + filename
response = requests.get(src, stream=True)
# delay to avoid corrupted previews
time.sleep(1)
with open(filename, 'wb') as out_file:
shutil.copyfileobj(response.raw, out_file)
except:
print ' An error occured. Continuing.'
print 'Done.'
if __name__ == '__main__':
url = sys.argv[1]
get_images(url)
Python 2 및 Python 3과 함께 작동하는 솔루션:
try:
from urllib.request import urlretrieve # Python 3
except ImportError:
from urllib import urlretrieve # Python 2
url = "http://www.digimouth.com/news/media/2011/09/google-logo.jpg"
urlretrieve(url, "local-filename.jpg")
또는 의 추가 요구사항이 허용 가능하고 http(s) URL인 경우:
def load_requests(source_url, sink_path):
"""
Load a file from an URL (e.g. http).
Parameters
----------
source_url : str
Where to load the file from.
sink_path : str
Where the loaded file is stored.
"""
import requests
r = requests.get(source_url, stream=True)
if r.status_code == 200:
with open(sink_path, 'wb') as f:
for chunk in r:
f.write(chunk)
요청 라이브러리 사용
import requests
import shutil,os
headers = {
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36'
}
currentDir = os.getcwd()
path = os.path.join(currentDir,'Images')#saving images to Images folder
def ImageDl(url):
attempts = 0
while attempts < 5:#retry 5 times
try:
filename = url.split('/')[-1]
r = requests.get(url,headers=headers,stream=True,timeout=5)
if r.status_code == 200:
with open(os.path.join(path,filename),'wb') as f:
r.raw.decode_content = True
shutil.copyfileobj(r.raw,f)
print(filename)
break
except Exception as e:
attempts+=1
print(e)
ImageDl(url)
단순 파이썬 사용wget
모듈을 클릭하여 링크를 다운로드합니다.아래 사용량:
import wget
wget.download('http://www.digimouth.com/news/media/2011/09/google-logo.jpg')
이것은 매우 짧은 대답입니다.
import urllib
urllib.urlretrieve("http://photogallery.sandesh.com/Picture.aspx?AlubumId=422040", "Abc.jpg")
파이썬 3용 버전
파이썬 3용 @madprops의 코드를 조정했습니다.
# getem.py
# python2 script to download all images in a given url
# use: python getem.py http://url.where.images.are
from bs4 import BeautifulSoup
import urllib.request
import shutil
import requests
from urllib.parse import urljoin
import sys
import time
def make_soup(url):
req = urllib.request.Request(url, headers={'User-Agent' : "Magic Browser"})
html = urllib.request.urlopen(req)
return BeautifulSoup(html, 'html.parser')
def get_images(url):
soup = make_soup(url)
images = [img for img in soup.findAll('img')]
print (str(len(images)) + " images found.")
print('Downloading images to current working directory.')
image_links = [each.get('src') for each in images]
for each in image_links:
try:
filename = each.strip().split('/')[-1].strip()
src = urljoin(url, each)
print('Getting: ' + filename)
response = requests.get(src, stream=True)
# delay to avoid corrupted previews
time.sleep(1)
with open(filename, 'wb') as out_file:
shutil.copyfileobj(response.raw, out_file)
except:
print(' An error occured. Continuing.')
print('Done.')
if __name__ == '__main__':
get_images('http://www.wookmark.com')
답이 늦었지만, 때문에.python>=3.6
dload를 사용할 수 있습니다. 예:
import dload
dload.save("http://www.digimouth.com/news/media/2011/09/google-logo.jpg")
이미지가 필요한 경우bytes
사용:
img_bytes = dload.bytes("http://www.digimouth.com/news/media/2011/09/google-logo.jpg")
사용하여 설치pip3 install dload
요청을 사용하여 Python 3에 대한 새로운 기능:
코드의 주석입니다.기능을 사용할 준비가 되었습니다.
import requests
from os import path
def get_image(image_url):
"""
Get image based on url.
:return: Image name if everything OK, False otherwise
"""
image_name = path.split(image_url)[1]
try:
image = requests.get(image_url)
except OSError: # Little too wide, but work OK, no additional imports needed. Catch all conection problems
return False
if image.status_code == 200: # we could have retrieved error page
base_dir = path.join(path.dirname(path.realpath(__file__)), "images") # Use your own path or "" to use current working directory. Folder must exist.
with open(path.join(base_dir, image_name), "wb") as f:
f.write(image.content)
return image_name
get_image("https://apod.nasddfda.gov/apod/image/2003/S106_Mishra_1947.jpg")
이미지를 다운로드하는 가장 쉬운 방법입니다.
import requests
from slugify import slugify
img_url = 'https://apod.nasa.gov/apod/image/1701/potw1636aN159_HST_2048.jpg'
img = requests.get(img_url).content
img_file = open(slugify(img_url) + '.' + str(img_url).split('.')[-1], 'wb')
img_file.write(img)
img_file.close()
이미지에 대한 URL을 아직 가지고 있지 않은 경우 가스파초로 긁어낼 수 있습니다.
from gazpacho import Soup
base_url = "http://books.toscrape.com"
soup = Soup.get(base_url)
links = [img.attrs["src"] for img in soup.find("img")]
그런 다음 자산을 다운로드합니다.urllib
언급한 바와 같이:
from pathlib import Path
from urllib.request import urlretrieve as download
directory = "images"
Path(directory).mkdir(exist_ok=True)
link = links[0]
name = link.split("/")[-1]
download(f"{base_url}/{link}", f"{directory}/{name}")
# import the required libraries from Python
import pathlib,urllib.request
# Using pathlib, specify where the image is to be saved
downloads_path = str(pathlib.Path.home() / "Downloads")
# Form a full image path by joining the path to the
# images' new name
picture_path = os.path.join(downloads_path, "new-image.png")
# "/home/User/Downloads/new-image.png"
# Using "urlretrieve()" from urllib.request save the image
urllib.request.urlretrieve("//example.com/image.png", picture_path)
# urlretrieve() takes in 2 arguments
# 1. The URL of the image to be downloaded
# 2. The image new name after download. By default, the image is saved
# inside your current working directory
좋아요, 이것은 제 초보적인 시도입니다. 아마도 완전히 과잉 살상일 겁니다.필요에 따라 업데이트하십시오. 제한 시간을 처리할 수 없기 때문입니다. 하지만 재미로 작동하게 되었습니다.
여기에 나열된 코드: https://github.com/JayRizzo/JayRizzoTools/blob/master/pyImageDownloader.py
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# =============================================================================
# Created Syst: MAC OSX High Sierra 21.5.0 (17G65)
# Created Plat: Python 3.9.5 ('v3.9.5:0a7dcbdb13', 'May 3 2021 13:17:02')
# Created By : Jeromie Kirchoff
# Created Date: Thu Jun 15 23:31:01 2022 CDT
# Last ModDate: Thu Jun 16 01:41:01 2022 CDT
# =============================================================================
# NOTE: Doesn't work on SVG images at this time.
# I will look into this further: https://stackoverflow.com/a/6599172/1896134
# =============================================================================
import requests # to get image from the web
import shutil # to save it locally
import os # needed
from os.path import exists as filepathexist # check if file paths exist
from os.path import join # joins path for different os
from os.path import expanduser # expands current home
from pyuser_agent import UA # generates random UserAgent
class ImageDownloader(object):
"""URL ImageDownloader.
Input : Full Image URL
Output: Image saved to your ~/Pictures/JayRizzoDL folder.
"""
def __init__(self, URL: str):
self.url = URL
self.headers = {"User-Agent" : UA().random}
self.currentHome = expanduser('~')
self.desktop = join(self.currentHome + "/Desktop/")
self.download = join(self.currentHome + "/Downloads/")
self.pictures = join(self.currentHome + "/Pictures/JayRizzoDL/")
self.outfile = ""
self.filename = ""
self.response = ""
self.rawstream = ""
self.createdfilepath = ""
self.imgFileName = ""
# Check if the JayRizzoDL exists in the pictures folder.
# if it doesn't exist create it.
if not filepathexist(self.pictures):
os.mkdir(self.pictures)
self.main()
def getFileNameFromURL(self, URL: str):
"""Parse the URL for the name after the last forward slash."""
NewFileName = self.url.strip().split('/')[-1].strip()
return NewFileName
def getResponse(self, URL: str):
"""Try streaming the URL for the raw data."""
self.response = requests.get(self.url, headers=self.headers, stream=True)
return self.response
def gocreateFile(self, name: str, response):
"""Try creating the file with the raw data in a custom folder."""
self.outfile = join(self.pictures, name)
with open(self.outfile, 'wb') as outFilePath:
shutil.copyfileobj(response.raw, outFilePath)
return self.outfile
def main(self):
"""Combine Everything and use in for loops."""
self.filename = self.getFileNameFromURL(self.url)
self.rawstream = self.getResponse(self.url)
self.createdfilepath = self.gocreateFile(self.filename, self.rawstream)
print(f"File was created: {self.createdfilepath}")
return
if __name__ == '__main__':
# Example when calling the file directly.
ImageDownloader("https://stackoverflow.design/assets/img/logos/so/logo-stackoverflow.png")
가능한 모든 오류를 방지하면서 이미지 파일을 다운로드합니다.
import requests
import validators
from urllib.request import Request, urlopen
from urllib.error import URLError, HTTPError
def is_downloadable(url):
valid=validators. url(url)
if valid==False:
return False
req = Request(url)
try:
response = urlopen(req)
except HTTPError as e:
return False
except URLError as e:
return False
else:
return True
for i in range(len(File_data)): #File data Contain list of address for image
#file
url = File_data[i][1]
try:
if (is_downloadable(url)):
try:
r = requests.get(url, allow_redirects=True)
if url.find('/'):
fname = url.rsplit('/', 1)[1]
fname = pth+File_data[i][0]+"$"+fname #Destination to save
#image file
open(fname, 'wb').write(r.content)
except Exception as e:
print(e)
except Exception as e:
print(e)
언급URL : https://stackoverflow.com/questions/8286352/how-to-save-an-image-locally-using-python-whose-url-address-i-already-know
'programing' 카테고리의 다른 글
SQL Server: 새 ID 열을 추가하고 열을 ID로 채우는 방법은 무엇입니까? (0) | 2023.06.24 |
---|---|
고유한 이름을 가진 테이블 생성 (0) | 2023.06.24 |
SQL Server에서 CREATE OR REPLACE VIEW를 작동시키는 방법은 무엇입니까? (0) | 2023.06.24 |
Android 앱에서 Google Firebase 로그아웃 및 사용자 잊기 (0) | 2023.06.24 |
PLSQL Anonymous 블록이 완료되면 출력이 없는 이유는 무엇입니까? (0) | 2023.06.19 |