diff --git a/archive/main.py b/archive/main.py index ec35b50..c137c0f 100644 --- a/archive/main.py +++ b/archive/main.py @@ -1,7 +1,17 @@ +import os from bs4 import BeautifulSoup +import pymongo -soup = BeautifulSoup(open("messages/messages0.html"), "html.parser") +DIRECTORY = "messages/" -for attch_desc in soup.find_all('div', attrs={'class': 'attachment__description'}): - if (str(attch_desc.string) == "Фотография"): - print(attch_desc.find_next_sibling().string) +for filename in os.listdir(DIRECTORY): + if filename.endswith(".html"): + soup = BeautifulSoup(open(DIRECTORY+filename), "html.parser") + + for attch_desc in soup.find_all('div', attrs={'class': 'attachment__description'}): + if (str(attch_desc.string) == "Фотография"): + image = str(attch_desc.find_next_sibling().string) + head = attch_desc.find_next_sibling().parent.parent.parent.find_previous_sibling() + name = str(head.a and head.a.string or "Толя Копыл") + date = str(head.text).split(", ")[1].replace(" (ред.)", "") +