From 794a28398aebc879003f5d45fb6992434ada6b7b Mon Sep 17 00:00:00 2001 From: simon Date: Mon, 29 Mar 2021 12:02:07 +0700 Subject: [PATCH] improved image dedection from lightbox gallery a tag --- src/parse_html.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/parse_html.py b/src/parse_html.py index 601dab8..efd19a5 100644 --- a/src/parse_html.py +++ b/src/parse_html.py @@ -56,10 +56,16 @@ def get_images(soup, config): img_url_set.add(url) except: continue + # gallery item from a tag + all_a_tag = soup.find_all('a') + for a_tag in all_a_tag: + a_href = a_tag['href'] + if upload_folder in a_href: + img_url_set.add(a_href) # external all_external_css = soup.find_all("link", {"rel": "stylesheet"}) for css_file in all_external_css: - remote_file = all_external_css[0]["href"] + remote_file = css_file["href"] try: remote_css = requests.get(remote_file).text except ConnectionError: