#Project 2 - MapReduce Program - Checking for non-english words in song lyrics import os,glob import read_english_dictionary import re #Navigating to the directory paths = [os.getcwd()+"\McCutchionAndBhowmick_Songs",os.getcwd()+"\Weslyan_Songs"] #Enter paths as full directory. If files are in the same folder as check_english.py enter os.getcwd() as "file_paths" def split_files(file_paths): for path in file_paths: #Looping through all files in the each folder for filename in glob.glob(os.path.join(path, '*.txt')): with open(filename, 'r') as f: contents = f.read().split() num=1 #If a file's length is longer than 5000 words, we split the file #saving each paragraph as a different file and adding a part number to the file name. if len(contents) > 5000: split = f.read().split("\n\n") for paragraph in split: paths.append(os.getcwd()+"\split+files") indexed_filename=filename[:-4]+str(num)+".txt" file = open(os.path.join(os.getcwd()+'\split_files' ,indexed_filename), "w") file.write(str(paragraph)) file.close() num+=1