From 5296ced94e287042ea7685c894651c6f9cbbe2ce Mon Sep 17 00:00:00 2001 From: Anatoly Kopyl Date: Sat, 27 Mar 2021 01:54:36 +0300 Subject: [PATCH] =?UTF-8?q?:construction:=20=D0=A1=D0=BA=D1=80=D0=B5=D0=B9?= =?UTF-8?q?=D0=BF=D0=B5=D1=80=20=D1=81=D0=BE=D0=BE=D0=B1=D1=89=D0=B5=D0=BD?= =?UTF-8?q?=D0=B8=D0=B9=20=D1=87=D0=B8=D1=82=D0=B0=D0=B5=D1=82=20=D0=BD?= =?UTF-8?q?=D0=B5=D1=81=D0=BA=D0=BE=D0=BB=D1=8C=D0=BA=D0=BE=20=D1=84=D0=B0?= =?UTF-8?q?=D0=B9=D0=BB=D0=BE=D0=B2?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- archive/main.py | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/archive/main.py b/archive/main.py index ec35b50..c137c0f 100644 --- a/archive/main.py +++ b/archive/main.py @@ -1,7 +1,17 @@ +import os from bs4 import BeautifulSoup +import pymongo -soup = BeautifulSoup(open("messages/messages0.html"), "html.parser") +DIRECTORY = "messages/" -for attch_desc in soup.find_all('div', attrs={'class': 'attachment__description'}): - if (str(attch_desc.string) == "Фотография"): - print(attch_desc.find_next_sibling().string) +for filename in os.listdir(DIRECTORY): + if filename.endswith(".html"): + soup = BeautifulSoup(open(DIRECTORY+filename), "html.parser") + + for attch_desc in soup.find_all('div', attrs={'class': 'attachment__description'}): + if (str(attch_desc.string) == "Фотография"): + image = str(attch_desc.find_next_sibling().string) + head = attch_desc.find_next_sibling().parent.parent.parent.find_previous_sibling() + name = str(head.a and head.a.string or "Толя Копыл") + date = str(head.text).split(", ")[1].replace(" (ред.)", "") +