🚧 Скрейпер сообщений читает несколько файлов

This commit is contained in:
2021-03-27 01:54:36 +03:00
parent b7011ba4d9
commit 5296ced94e

View File

@@ -1,7 +1,17 @@
import os
from bs4 import BeautifulSoup
import pymongo
soup = BeautifulSoup(open("messages/messages0.html"), "html.parser")
DIRECTORY = "messages/"
for attch_desc in soup.find_all('div', attrs={'class': 'attachment__description'}):
for filename in os.listdir(DIRECTORY):
if filename.endswith(".html"):
soup = BeautifulSoup(open(DIRECTORY+filename), "html.parser")
for attch_desc in soup.find_all('div', attrs={'class': 'attachment__description'}):
if (str(attch_desc.string) == "Фотография"):
print(attch_desc.find_next_sibling().string)
image = str(attch_desc.find_next_sibling().string)
head = attch_desc.find_next_sibling().parent.parent.parent.find_previous_sibling()
name = str(head.a and head.a.string or "Толя Копыл")
date = str(head.text).split(", ")[1].replace(" (ред.)", "")