Files
politics-mark/create_models.py
Anatoly c0d2b1e35b
All checks were successful
continuous-integration/drone/push Build is passing
Added some more models
2022-04-03 19:41:03 +03:00

46 lines
991 B
Python

import os
from dotenv import load_dotenv
from telethon.sync import TelegramClient
import markovify
load_dotenv()
api_id = os.getenv('API_ID')
api_hash = os.getenv('API_HASH')
chats = [
'tg1337const',
'cyberboec',
'topor',
'workBrothers',
'anekdot_bb'
]
def cleanup(msg):
unwanted_strings = [
'[1337const | подписаться](https://t.me/tg1337const)',
'[1337const | Подписаться](https://t.me/tg1337const)',
'**',
'__'
]
for unwanted in unwanted_strings:
msg = msg.replace(unwanted, '')
return msg
with TelegramClient('bot', api_id, api_hash) as client:
for chat in chats:
print(f'Collecting {chat}')
data = ''
for message in client.iter_messages(chat):
if (message.text):
data = data + cleanup(message.text)
text_model = markovify.Text(data)
text_model.compile(inplace = True)
model_json = text_model.to_json()
f = open(f"data/{chat}.json", "w")
f.write(model_json)
f.close()