This commit is contained in:
42
create_models.py
Normal file
42
create_models.py
Normal file
@@ -0,0 +1,42 @@
|
||||
import os
|
||||
from dotenv import load_dotenv
|
||||
from telethon.sync import TelegramClient
|
||||
import markovify
|
||||
|
||||
load_dotenv()
|
||||
|
||||
api_id = os.getenv('API_ID')
|
||||
api_hash = os.getenv('API_HASH')
|
||||
chats = [
|
||||
'tg1337const',
|
||||
'cyberboec'
|
||||
]
|
||||
|
||||
def cleanup(msg):
|
||||
unwanted_strings = [
|
||||
'[1337const | подписаться](https://t.me/tg1337const)',
|
||||
'[1337const | Подписаться](https://t.me/tg1337const)',
|
||||
'**',
|
||||
'__'
|
||||
]
|
||||
|
||||
for unwanted in unwanted_strings:
|
||||
msg = msg.replace(unwanted, '')
|
||||
|
||||
return msg
|
||||
|
||||
|
||||
with TelegramClient('bot', api_id, api_hash) as client:
|
||||
for chat in chats:
|
||||
print(f'Collecting {chat}')
|
||||
data = ''
|
||||
for message in client.iter_messages(chat):
|
||||
if (message.text):
|
||||
data = data + cleanup(message.text)
|
||||
|
||||
text_model = markovify.Text(data)
|
||||
# text_model.compile(inplace = True)
|
||||
model_json = text_model.to_json()
|
||||
f = open(f"data/{chat}.json", "w")
|
||||
f.write(model_json)
|
||||
f.close()
|
||||
Reference in New Issue
Block a user