43 lines
948 B
Python
43 lines
948 B
Python
import os
|
|
from dotenv import load_dotenv
|
|
from telethon.sync import TelegramClient
|
|
import markovify
|
|
|
|
load_dotenv()
|
|
|
|
api_id = os.getenv('API_ID')
|
|
api_hash = os.getenv('API_HASH')
|
|
chats = [
|
|
'tg1337const',
|
|
'cyberboec'
|
|
]
|
|
|
|
def cleanup(msg):
|
|
unwanted_strings = [
|
|
'[1337const | подписаться](https://t.me/tg1337const)',
|
|
'[1337const | Подписаться](https://t.me/tg1337const)',
|
|
'**',
|
|
'__'
|
|
]
|
|
|
|
for unwanted in unwanted_strings:
|
|
msg = msg.replace(unwanted, '')
|
|
|
|
return msg
|
|
|
|
|
|
with TelegramClient('bot', api_id, api_hash) as client:
|
|
for chat in chats:
|
|
print(f'Collecting {chat}')
|
|
data = ''
|
|
for message in client.iter_messages(chat):
|
|
if (message.text):
|
|
data = data + cleanup(message.text)
|
|
|
|
text_model = markovify.Text(data)
|
|
# text_model.compile(inplace = True)
|
|
model_json = text_model.to_json()
|
|
f = open(f"data/{chat}.json", "w")
|
|
f.write(model_json)
|
|
f.close()
|