Split models
All checks were successful
continuous-integration/drone/push Build is passing

This commit is contained in:
2022-04-03 18:58:18 +03:00
parent a2beaed8b0
commit 9ba51c0fd1
2 changed files with 34 additions and 24 deletions

42
create_models.py Normal file
View File

@@ -0,0 +1,42 @@
import os
from dotenv import load_dotenv
from telethon.sync import TelegramClient
import markovify
load_dotenv()
api_id = os.getenv('API_ID')
api_hash = os.getenv('API_HASH')
chats = [
'tg1337const',
'cyberboec'
]
def cleanup(msg):
unwanted_strings = [
'[1337const | подписаться](https://t.me/tg1337const)',
'[1337const | Подписаться](https://t.me/tg1337const)',
'**',
'__'
]
for unwanted in unwanted_strings:
msg = msg.replace(unwanted, '')
return msg
with TelegramClient('bot', api_id, api_hash) as client:
for chat in chats:
print(f'Collecting {chat}')
data = ''
for message in client.iter_messages(chat):
if (message.text):
data = data + cleanup(message.text)
text_model = markovify.Text(data)
# text_model.compile(inplace = True)
model_json = text_model.to_json()
f = open(f"data/{chat}.json", "w")
f.write(model_json)
f.close()