mirror of
https://github.com/anatolykopyl/vk-bingo.git
synced 2026-03-26 21:04:26 +00:00
🚧 Скрейпер сообщений читает несколько файлов
This commit is contained in:
@@ -1,7 +1,17 @@
|
|||||||
|
import os
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
|
import pymongo
|
||||||
|
|
||||||
soup = BeautifulSoup(open("messages/messages0.html"), "html.parser")
|
DIRECTORY = "messages/"
|
||||||
|
|
||||||
for attch_desc in soup.find_all('div', attrs={'class': 'attachment__description'}):
|
for filename in os.listdir(DIRECTORY):
|
||||||
if (str(attch_desc.string) == "Фотография"):
|
if filename.endswith(".html"):
|
||||||
print(attch_desc.find_next_sibling().string)
|
soup = BeautifulSoup(open(DIRECTORY+filename), "html.parser")
|
||||||
|
|
||||||
|
for attch_desc in soup.find_all('div', attrs={'class': 'attachment__description'}):
|
||||||
|
if (str(attch_desc.string) == "Фотография"):
|
||||||
|
image = str(attch_desc.find_next_sibling().string)
|
||||||
|
head = attch_desc.find_next_sibling().parent.parent.parent.find_previous_sibling()
|
||||||
|
name = str(head.a and head.a.string or "Толя Копыл")
|
||||||
|
date = str(head.text).split(", ")[1].replace(" (ред.)", "")
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user