import os from dotenv import load_dotenv from telethon.sync import TelegramClient import markovify load_dotenv() api_id = os.getenv('API_ID') api_hash = os.getenv('API_HASH') chats = os.getenv('CHANNELS').split() def cleanup(msg): unwanted_strings = [ '[1337const | подписаться](https://t.me/tg1337const)', '[1337const | Подписаться](https://t.me/tg1337const)', '**', '__' ] for unwanted in unwanted_strings: msg = msg.replace(unwanted, '') return msg with TelegramClient('bot', api_id, api_hash) as client: for chat in chats: print(f'Collecting {chat}') data = '' for message in client.iter_messages(chat): if (message.text): data = data + cleanup(message.text) text_model = markovify.Text(data) text_model.compile(inplace = True) model_json = text_model.to_json() f = open(f"data/{chat}.json", "w") f.write(model_json) f.close()