Commit 2bef2a34 authored by Dom Walden's avatar Dom Walden
Browse files

Script to apply random edits to a string.

parent e1967d0b
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
# http://www.gnu.org/copyleft/gpl.html
import nltk
import random
from nltk.tokenize.treebank import TreebankWordDetokenizer
edit = """
<insert text here>
"""
# break on new lines
lines = edit.splitlines()
tolines = []
for line in lines:
if random.choice([True, False]):
# Make random edits to the sentence
tokens = nltk.word_tokenize(line)
random.shuffle(tokens)
newline = TreebankWordDetokenizer().detokenize(tokens)
else:
# Leave the sentence as is
newline = line
tolines.append(newline)
# Randomise order of sentences/lines/paragraphs
random.shuffle(tolines)
print("\n".join(tolines))
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment