개발일지/자바

크롤링해서 이미지 파일 다운받기

ZI_CO 2023. 12. 21.

package test;

import java.io.IOException;
import java.net.URL;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.StandardCopyOption;
import java.util.Iterator;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

public class Test01 {

    public static void main(String[] args) {
        // 크롤링할 대상 URL
        final String url = "http://www.cgv.co.kr/movies/?lt=1&ft=0";
        Document doc = null;

        try {
            // Jsoup을 사용하여 웹 페이지의 Document를 가져옴
            doc = Jsoup.connect(url).get();
        } catch (IOException e) {
            e.printStackTrace();
        }

        // 선택자를 사용하여 이미지 URL을 추출
        String selector = "li > div.box-image > a > span.thumb-image > img";
        Elements elems = doc.select(selector);

        // Iterator를 사용하여 이미지 URL을 하나씩 처리
        Iterator<Element> itr = elems.iterator();
        String imageUrl = "";

        // 이미지 다운로드를 위한 폴더 경로
        String destinationFolder = "C:/JO";

        while (itr.hasNext()) {
            // 현재 Iterator가 가리키는 Element에서 "src" 속성 값을 가져옴
            imageUrl = itr.next().attr("src");

            try {
                // 이미지 다운로드 함수 호출
                downloadImage(imageUrl, destinationFolder);
                // 다운로드 성공 메시지 출력
                System.out.println("Image downloaded successfully");
            } catch (IOException e) {
                // 다운로드 실패 시 에러 메시지 출력
                System.err.println("Image download failed: " + e.getMessage());
            }
        }
    }

    // 이미지 다운로드 함수
    public static void downloadImage(String imageUrl, String destinationFolder) throws IOException {
        // 폴더가 없으면 생성
        Path folderPath = Path.of(destinationFolder);
        if (!Files.exists(folderPath)) {
            Files.createDirectories(folderPath);
        }

        // 파일명에서 마지막 슬래시 뒤의 문자열을 추출하여 파일명으로 사용
        String fileName = imageUrl.substring(imageUrl.lastIndexOf("/") + 1);
        Path destination = folderPath.resolve(fileName);

        URL url = new URL(imageUrl);

        // 다운로드 및 저장
        Files.copy(url.openStream(), destination, StandardCopyOption.REPLACE_EXISTING);
    }
}

'개발일지 > 자바' 카테고리의 다른 글

웹 클롤링 원하는 요소 접근하기 (0)	2023.12.22
Request시 URL 파라미터 아스키코드 디코딩 하는법 (1)	2023.12.22
Base64 인코딩, 디코딩 (0)	2023.12.17
Stream API (0)	2022.11.08
Http통신으로 JSON 객체 여러개 가져오기(리스트), 파싱하기 (0)	2022.10.31

크롤링해서 이미지 파일 다운받기

'개발일지 > 자바' 카테고리의 다른 글

댓글

티스토리툴바