From 9ba51c0fd1ac1b52a06bfa25b1b857ccfa8908a9 Mon Sep 17 00:00:00 2001 From: Anatoly Date: Sun, 3 Apr 2022 18:58:18 +0300 Subject: [PATCH] Split models --- grab_samples.py => create_models.py | 26 +++++++++++------------ main.py | 32 +++++++++++++++++++---------- 2 files changed, 34 insertions(+), 24 deletions(-) rename grab_samples.py => create_models.py (60%) diff --git a/grab_samples.py b/create_models.py similarity index 60% rename from grab_samples.py rename to create_models.py index 3951e61..c606325 100644 --- a/grab_samples.py +++ b/create_models.py @@ -1,6 +1,7 @@ import os from dotenv import load_dotenv from telethon.sync import TelegramClient +import markovify load_dotenv() @@ -19,24 +20,23 @@ def cleanup(msg): '__' ] - try: - for unwanted in unwanted_strings: - msg = msg.replace(unwanted, '') + for unwanted in unwanted_strings: + msg = msg.replace(unwanted, '') - return msg - except TypeError: - return msg + return msg with TelegramClient('bot', api_id, api_hash) as client: - f = open("data/data.txt", "w") - for chat in chats: print(f'Collecting {chat}') + data = '' for message in client.iter_messages(chat): - try: - f.write(cleanup(message.text)) - except: - pass + if (message.text): + data = data + cleanup(message.text) - f.close() + text_model = markovify.Text(data) + # text_model.compile(inplace = True) + model_json = text_model.to_json() + f = open(f"data/{chat}.json", "w") + f.write(model_json) + f.close() diff --git a/main.py b/main.py index 71d0025..fb6d4b1 100644 --- a/main.py +++ b/main.py @@ -1,4 +1,5 @@ import os +import random from dotenv import load_dotenv from telethon.sync import TelegramClient import markovify @@ -15,20 +16,29 @@ api_hash = os.getenv('API_HASH') entity = os.getenv('ENTITY') dry_run = args.dry -with open("data/data.txt", "r") as f: - text = f.read() +def create_message(model): + message = '' -text_model = markovify.Text(text) -message = '' + for i in range(5): + try: + message = message + model.make_sentence() + except TypeError: + pass -for i in range(5): - try: - message = message + text_model.make_sentence() - except TypeError: - pass + return message + +avalible_files = os.listdir('data') +avalible_files.remove('.gitkeep') + +filename = random.choice(avalible_files) +print(f'chosen {filename}') +with open(os.path.join('data', filename), 'r') as f: + model_json = f.read() + +model = markovify.Text.from_json(model_json) if (dry_run): - print(message) + print(create_message(model)) else: with TelegramClient('bot', api_id, api_hash) as client: - client.send_message(entity=entity, message=message) + client.send_message(entity=entity, message=create_message(model))