diff --git a/src/parse_html.py b/src/parse_html.py index 601dab8..efd19a5 100644 --- a/src/parse_html.py +++ b/src/parse_html.py @@ -56,10 +56,16 @@ def get_images(soup, config): img_url_set.add(url) except: continue + # gallery item from a tag + all_a_tag = soup.find_all('a') + for a_tag in all_a_tag: + a_href = a_tag['href'] + if upload_folder in a_href: + img_url_set.add(a_href) # external all_external_css = soup.find_all("link", {"rel": "stylesheet"}) for css_file in all_external_css: - remote_file = all_external_css[0]["href"] + remote_file = css_file["href"] try: remote_css = requests.get(remote_file).text except ConnectionError: