commit edada2a968871c120b0c64ac9a188c50de5223c7 Author: Anatoly Date: Sat Apr 2 22:03:41 2022 +0300 Initial commit diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..a7777c6 --- /dev/null +++ b/.gitignore @@ -0,0 +1,6 @@ +__pycache__ +.vscode +venv +.env +/data/* +*.session diff --git a/grab_samples.py b/grab_samples.py new file mode 100644 index 0000000..edf8804 --- /dev/null +++ b/grab_samples.py @@ -0,0 +1,34 @@ +import os +from dotenv import load_dotenv +from telethon.sync import TelegramClient + +load_dotenv() + +api_id = os.getenv('API_ID') +api_hash = os.getenv('API_HASH') +chat = 'tg1337const' + +def cleanup(msg): + unwanted_strings = [ + '[1337const | подписаться](https://t.me/tg1337const)', + '[1337const | Подписаться](https://t.me/tg1337const)' + ] + + try: + for unwanted in unwanted_strings: + msg = msg.replace(unwanted, '') + + return msg + except: + return msg + + +with TelegramClient('session_name', api_id, api_hash) as client: + f = open("data/data.txt", "w") + for message in client.iter_messages(chat): + try: + f.write(cleanup(message.text)) + except: + pass + + f.close() diff --git a/main.py b/main.py new file mode 100644 index 0000000..fc2e926 --- /dev/null +++ b/main.py @@ -0,0 +1,10 @@ +import markovify + +f = open("data/data.txt", "r") +text = f.read() + +text_model = markovify.Text(text) + +for i in range(3): + # print(text_model.make_short_sentence(280)) + print(text_model.make_sentence()) diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..45f06a2 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,3 @@ +markovify +telethon +python-dotenv