技术人生,  编程基础

Python基础:英文拼写检查器

本文使用Python实现一个简单的英文拼写检查器。

程序主要功能描述如下

  • 从用户输入或指定文件读取文本内容
  • 文本预处理:忽略非英文内容
  • 逐一检查单词拼写(检测单词是否存在于单词库EnglishWords.txt中)
  • 针对错误单词,可由用户选择忽略、问号标记、添加到词典、推荐正确单词等选项
  • 使用字符串契合度作为指标推荐正确单词
  • 生成并输出统计数据
  • 将统计数据和修订后的文本存储到指定文件中
  • 菜单美化:添加边框
  • 错误处理:防止程序因用户输入或文件读写等异常原因崩溃
# --- RyanXin SpellChecker ---
import os
import time
from difflib import SequenceMatcher


class SpellChecker():
	def __init__(self):
		'''start the process of SpellChecker'''

		# initialize English words list
		if not os.path.exists("EnglishWords.txt"):  # file check
			print("* Error: can not find the file \"EnglishWords.txt\".")
			return
		# use the hash set rather than list to speed up matching process
		self.wordList = set(line.rstrip('\n') for line in open("EnglishWords.txt", "r"))

		# initialize user dictionary which was kept in the file
		if os.path.exists("userDictionary.txt"):
			self.dictionary = set(line.rstrip() for line in open("userDictionary.txt", "r") if line.rstrip())
		else:
			self.dictionary = set()

		# main loop starts from here
		print("Welcome to use Spell Checker!" ,end='')
		while True:
			if self.displayMenu() == -1:
				break  # quit
			self.checkingWords()
			self.generateResults()

			while True:
				isRecheck = input("Return to the main menu? (y/n): ").strip().lower()
				if isRecheck in ('yes','y','no','n'):
					break
			if isRecheck[0] == 'n':
				break  # quit

		# store user dictionary
		with open("userDictionary.txt", "w") as f:
			for w in self.dictionary:
				f.write(w + '\n')

	def displayMenu(self):
		''' Main Menu: Work mode selection'''
		while True:
			print()
			# display main menu
			self.formatDisplay("1. Spell check a sentence;\n2. Spell check a file;\n0. Quit.", "Work mode selection menu.")
		
			while True:  # validate user input
				mode = input("What do you want to do: ").strip()
				if mode in ('1','2','0'):
					break
				else:
					print("* Invalid input. Please choose again.")

			if mode == '1':  # Spell check a sentence
				self.texts = input("\nPlease enter a sentence: ").strip()
				if self.texts:  # check empty string
					break
				else:
					print("* The sentence cannot be empty.")
					# return to main menu
					continue
			elif mode == '2':  # Spell check a file
				filename = input("\nPlease enter a filename: ").strip()
				if not filename:  # check empty string
					print("* The file name cannot be empty.")
					continue
				if os.path.exists(filename):  # filename validation
					try:
						with open(filename, 'r') as f:
							self.texts = f.read()
						break
					except:  # possible reading error, e.g. non-text files
						print("* Error: unexpected error occurred when reading your file.")
						continue
				else:
					print("* Error: The file does not exist.")
					continue
			else:  # quit
				return -1
		return 0

	def checkingWords(self):
		'''word checking process'''
		self.start_time = time.perf_counter()

		self.texts = self.texts.split()  # split original input
		self.original_length = len(self.texts)

		self.total_number = 0  # non-empty English words
		self.correct_number = 0  # words spelt correctly
		self.incorrect_number = 0  # words spelt incorrectly
		self.added_number = 0  # words added to the user dictionary
		self.accepted_number = 0  # words changed by the user accepting the suggested word

		# iterate with every word
		for i,word in enumerate(self.texts): 
			raw = self.texts[i]
			# filter out non-alpha characters and convert to lower case letters
			word = ''.join(x for x in word if x.isalpha()).lower()
			# store processed word
			self.texts[i] = word

			if not word:  # skip empty word
				print("\nSkipped non-alpha word \"" + raw + "\". (" + str(i+1) + "/" + str(self.original_length) + ")")
				continue
			
			self.total_number += 1
			print("\nchecking word: \"" + word + "\". (" + str(i+1) + "/" + str(self.original_length) + ")")

			# words in wordlist or user dictionary will be treated as correct
			if word in self.wordList or word in self.dictionary:
				self.correct_number += 1
				print('OK')
			else:
				# deal with incorrect word
				self.handleIncorrectWord(i, word)

		self.end_time = time.perf_counter()

	def handleIncorrectWord(self, i, word):
		'''deal with incorrect word'''
		print("Encountered an incorrect word.")
		self.formatDisplay("--> " + word + "\n\n1. Ignore;\n2. Mark;\n3. Add to dictionary;\n4. Suggest likely correct spelling.")
		while True:  # validate user input
			handle_method = input("Please choose the way to handle the incorrect word: ").strip()
			if handle_method in ('1','2','3','4'):
				break
			else:
				print("* Invalid input. Please choose again.")

		def markWord(word):
			'''mark word and update original texts'''
			marked = '?' + word + '?'
			self.texts[i] = marked
			print("\"" + word + "\" has been marked as \"" + marked + "\".")

		if handle_method == '1':
			# ignore
			self.incorrect_number += 1
			print("\"" + word + "\" has been ignored.")
		elif handle_method == '2':
			# mark
			self.incorrect_number += 1
			markWord(word)
		elif handle_method == '3':
			# add to dictionary
			self.correct_number += 1
			self.dictionary.add(word)
			self.added_number += 1
			print("\"" + word + "\" has been added into the dictionary and will be treated as correct word in the future.")
		else:
			# give suggestion
			suggest_word = ('', 0)  # (suggested word, max matching ratio)
			for w in self.wordList.union(self.dictionary):
				# match with every word in word list and user dictionary
				score = SequenceMatcher(None, word, w).ratio()
				if score > suggest_word[1]:
					suggest_word = (w, score)

			# ask user accept or not
			print("- A possible suggestion is \"" + suggest_word[0] + "\".")
			while True:
				isAccept = input("Accept this suggestion? (y/n): ").strip().lower()
				if isAccept in ('yes','y','no','n'):
					break

			if isAccept[0] == 'y':
				# accept the suggestion
				self.correct_number += 1
				self.accepted_number += 1
				# modify word and update original texts
				self.texts[i] = suggest_word[0]
				print("\"" + word + "\" has been changed to \"" + suggest_word[0] + "\".")
			else:
				# reject the suggestion
				self.incorrect_number += 1
				print("The suggestion has been rejected.")
				# if reject, the word will be marked for user to further check
				markWord(word)

	def generateResults(self):
		# generate summary statistics
		statistics =  "- Original words number: " + str(self.original_length) + ";\n"
		statistics += "- Total number of English words: " + str(self.total_number) + ";\n"
		statistics += "- Words spelt correctly: " + str(self.correct_number) + ";\n"
		statistics += "- Words spelt incorrectly: " + str(self.incorrect_number) + ";\n"
		statistics += "- Words added to the dictionary: " + str(self.added_number) + ";\n"
		statistics += "- Suggested words accepted: " + str(self.accepted_number) + ";\n"
		statistics += "- Spellcheck at: " + time.asctime(time.localtime(time.time()))[4:] + ";\n"
		statistics += "- The amount of time elapsed: {:.2f}s.".format(self.end_time - self.start_time)

		print()
		self.formatDisplay(statistics, "Summary Statistics")

		# generate file with summary statistics and checked texts
		while True:
			try:
				while True:  # ensure file name is not empty
					result_file_name = input("Name of new file with results: ").strip()
					if not result_file_name:  # check empty string
						print("* The file name cannot be empty.")
					else:
						break
				with open(result_file_name, 'w') as f:
					f.write("Spellcheck Statistics\n")
					f.write(statistics)
					f.write('\n\n')
					f.write(' '.join(w for w in self.texts if w))  # skip empty words
					f.write('\n')
				print("File \"" + result_file_name + "\" has been successfully created.\n")
				break
			except:  # possible error: invalid file name characters, etc.
				print("* Error: unexpected error occurred when writing into the file. Please try again.")

	def formatDisplay(self, contents, title=None, width=40):
		'''beautify and display contents with borders'''
		contents = contents.split('\n')
		# the top border
		print("╔" + "═"*width + "╗")
		# display title area (center)
		if title:
			left_distance = (width-len(title))//2
			right_distance = width - len(title) - left_distance
			print("║" + " "*left_distance + title + " "*right_distance + "║")  # center the title
			print("║" + " " + "─"*(width-2) + " " + "║")  # Dividing line
		# display all contents (left align)
		for line in contents:
			print("║" + line + " "*(width-len(line)) + "║")
		# the bottom border
		print("╚" + "═"*width + "╝")


if __name__ == '__main__':
	SpellChecker()
import os
import time
from difflib import SequenceMatcher


class SpellChecker():
	def __init__(self):
		'''start the process of SpellChecker'''

		# initialize English words list
		if not os.path.exists("EnglishWords.txt"):  # file check
			print("* Error: can not find the file \"EnglishWords.txt\".")
			return
		# use the hash set rather than list to speed up matching process
		self.wordList = set(line.rstrip('\n') for line in open("EnglishWords.txt", "r"))

		# initialize user dictionary which was kept in the file
		if os.path.exists("userDictionary.txt"):
			self.dictionary = set(line.rstrip() for line in open("userDictionary.txt", "r") if line.rstrip())
		else:
			self.dictionary = set()

		# main loop starts from here
		print("Welcome to use Spell Checker!" ,end='')
		while True:
			if self.displayMenu() == -1:
				break  # quit
			self.checkingWords()
			self.generateResults()

			while True:
				isRecheck = input("Return to the main menu? (y/n): ").strip().lower()
				if isRecheck in ('yes','y','no','n'):
					break
			if isRecheck[0] == 'n':
				break  # quit

		# store user dictionary
		with open("userDictionary.txt", "w") as f:
			for w in self.dictionary:
				f.write(w + '\n')

	def displayMenu(self):
		''' Main Menu: Work mode selection'''
		while True:
			print()
			# display main menu
			self.formatDisplay("1. Spell check a sentence;\n2. Spell check a file;\n0. Quit.", "Work mode selection menu.")
		
			while True:  # validate user input
				mode = input("What do you want to do: ").strip()
				if mode in ('1','2','0'):
					break
				else:
					print("* Invalid input. Please choose again.")

			if mode == '1':  # Spell check a sentence
				self.texts = input("\nPlease enter a sentence: ").strip()
				if self.texts:  # check empty string
					break
				else:
					print("* The sentence cannot be empty.")
					# return to main menu
					continue
			elif mode == '2':  # Spell check a file
				filename = input("\nPlease enter a filename: ").strip()
				if not filename:  # check empty string
					print("* The file name cannot be empty.")
					continue
				if os.path.exists(filename):  # filename validation
					try:
						with open(filename, 'r') as f:
							self.texts = f.read()
						break
					except:  # possible reading error, e.g. non-text files
						print("* Error: unexpected error occurred when reading your file.")
						continue
				else:
					print("* Error: The file does not exist.")
					continue
			else:  # quit
				return -1
		return 0

	def checkingWords(self):
		'''word checking process'''
		self.start_time = time.perf_counter()

		self.texts = self.texts.split()  # split original input
		self.original_length = len(self.texts)

		self.total_number = 0  # non-empty English words
		self.correct_number = 0  # words spelt correctly
		self.incorrect_number = 0  # words spelt incorrectly
		self.added_number = 0  # words added to the user dictionary
		self.accepted_number = 0  # words changed by the user accepting the suggested word

		for i,word in enumerate(self.texts):  # iterate with every word
			word = self.processWord(word)
			if not word:  # skip empty word
				print("\nSkipped non-alpha word \"" + self.texts[i] + "\". (" + str(i+1) + "/" + str(self.original_length) + ")")
				continue
			
			self.total_number += 1
			print("\nchecking word: \"" + word + "\". (" + str(i+1) + "/" + str(self.original_length) + ")")

			# words in wordlist or user dictionary will be treated as correct
			if word in self.wordList or word in self.dictionary:
				self.correct_number += 1
				print('OK')
			else:
				# deal with incorrect word
				self.handleIncorrectWord(i, word)

		self.end_time = time.perf_counter()

	def processWord(self, word):
		'''extract pure English word'''
		wd = ''.join(x for x in word if x.isalpha())  # filter out non-alpha characters
		wd = wd.lower()  # transform into lower case letters

		if not wd:  # check if wd is empty
			return ''

		# back up non-alpha characters in both sides of the word
		for l in range(len(word)):
			if word[l].isalpha():
				break
		for r in range(len(word)-1, -1, -1):
			if word[r].isalpha():
				break
		self.left_sides = word[:l]
		self.right_sides = word[r+1:]

		return wd  # return processed word

	def handleIncorrectWord(self, i, word):
		'''deal with incorrect word'''
		print("Encountered an incorrect word.")
		self.formatDisplay("--> " + word + "\n\n1. Ignore;\n2. Mark;\n3. Add to dictionary;\n4. Suggest likely correct spelling.")
		while True:  # validate user input
			handle_method = input("Please choose the way to handle the incorrect word: ").strip()
			if handle_method in ('1','2','3','4'):
				break
			else:
				print("* Invalid input. Please choose again.")

		def markWord(word):
			'''mark word and restore non-alpha characters'''
			marked = '?' + word + '?'
			self.texts[i] = self.left_sides + marked + self.right_sides
			print("\"" + word + "\" has been marked as \"" + marked + "\".")

		if handle_method == '1':
			# ignore
			self.incorrect_number += 1
			print("\"" + word + "\" has been ignored.")
		elif handle_method == '2':
			# mark
			self.incorrect_number += 1
			markWord(word)
		elif handle_method == '3':
			# add to dictionary
			self.correct_number += 1
			self.dictionary.add(word)
			self.added_number += 1
			print("\"" + word + "\" has been added into the dictionary and will be treated as correct word in the future.")
		else:
			# give suggestion
			suggest_word = ('', 0)  # (suggested word, max matching ratio)
			for w in self.wordList.union(self.dictionary):
				# match with every word in word list and user dictionary
				score = SequenceMatcher(None, word, w).ratio()
				if score > suggest_word[1]:
					suggest_word = (w, score)

			# ask user accept or not
			print("- A possible suggestion is \"" + suggest_word[0] + "\".")
			while True:
				isAccept = input("Accept this suggestion? (y/n): ").strip().lower()
				if isAccept in ('yes','y','no','n'):
					break

			if isAccept[0] == 'y':
				# accept the suggestion
				self.correct_number += 1
				self.accepted_number += 1
				# modify word and restore non-alpha characters
				self.texts[i] = self.left_sides + suggest_word[0] + self.right_sides
				print("\"" + word + "\" has been changed to \"" + suggest_word[0] + "\".")
			else:
				# reject the suggestion
				self.incorrect_number += 1
				print("The suggestion has been rejected.")
				markWord(word)

	def generateResults(self):
		# generate summary statistics
		statistics =  "- Original words number: " + str(self.original_length) + ";\n"
		statistics += "- Total number of English words: " + str(self.total_number) + ";\n"
		statistics += "- Words spelt correctly: " + str(self.correct_number) + ";\n"
		statistics += "- Words spelt incorrectly: " + str(self.incorrect_number) + ";\n"
		statistics += "- Words added to the dictionary: " + str(self.added_number) + ";\n"
		statistics += "- Suggested words accepted: " + str(self.accepted_number) + ";\n"
		statistics += "- Spellcheck at: " + time.asctime(time.localtime(time.time()))[4:] + ";\n"
		statistics += "- The amount of time elapsed: {:.2f}s.".format(self.end_time - self.start_time)

		print()
		self.formatDisplay(statistics, "Summary Statistics")

		# generate file with summary statistics and checked texts
		while True:
			try:
				while True:  # ensure file name is not empty
					result_file_name = input("Name of new file with results: ").strip()
					if not result_file_name:  # check empty string
						print("* The file name cannot be empty.")
					else:
						break
				with open(result_file_name, 'w') as f:
					f.write("Spellcheck Statistics\n")
					f.write(statistics)
					f.write('\n\n')
					f.write(' '.join(self.texts))
					f.write('\n')
				print("File \"" + result_file_name + "\" has been successfully created.\n")
				break
			except:  # possible error: invalid file name characters, etc.
				print("* Error: unexpected error occurred when writing into the file. Please try again.")

	def formatDisplay(self, contents, title=None, width=40):
		'''beautify and display contents with borders'''
		contents = contents.split('\n')
		# the top border
		print("╔" + "═"*width + "╗")
		# display title area (center)
		if title:
			left_distance = (width-len(title))//2
			right_distance = width - len(title) - left_distance
			print("║" + " "*left_distance + title + " "*right_distance + "║")  # center the title
			print("║" + " " + "─"*(width-2) + " " + "║")  # Dividing line
		# display all contents (left align)
		for line in contents:
			print("║" + line + " "*(width-len(line)) + "║")
		# the bottom border
		print("╚" + "═"*width + "╝")


if __name__ == '__main__':
	SpellChecker()

A WindRunner. VoyagingOne

留言

您的电子邮箱地址不会被公开。 必填项已用 * 标注