From 1d88097ad6d7db30da60cf837f1eda4a1d995b63 Mon Sep 17 00:00:00 2001 From: simon Date: Mon, 29 Mar 2021 13:20:43 +0700 Subject: [PATCH] skip fake links --- src/parse_html.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/parse_html.py b/src/parse_html.py index efd19a5..c269b1a 100644 --- a/src/parse_html.py +++ b/src/parse_html.py @@ -59,9 +59,12 @@ def get_images(soup, config): # gallery item from a tag all_a_tag = soup.find_all('a') for a_tag in all_a_tag: - a_href = a_tag['href'] - if upload_folder in a_href: - img_url_set.add(a_href) + try: + a_href = a_tag['href'] + if upload_folder in a_href: + img_url_set.add(a_href) + except KeyError: + continue # external all_external_css = soup.find_all("link", {"rel": "stylesheet"}) for css_file in all_external_css: