#Project 2 - MapReduce Program - Checking for non-english words in song lyrics


import os,glob
import read_english_dictionary
import re

#Navigating to the directory 
paths = [os.getcwd()+"\McCutchionAndBhowmick_Songs",os.getcwd()+"\Weslyan_Songs"]

#Enter paths as full directory. If files are in the same folder as check_english.py enter os.getcwd() as "file_paths"
def split_files(file_paths):
    for path in file_paths:
    #Looping through all files in the each folder
        for filename in glob.glob(os.path.join(path, '*.txt')):
            with open(filename, 'r') as f:
               contents = f.read().split()
               num=1
               #If a file's length is longer than 5000 words, we split the file
               #saving each paragraph as a different file and adding a part number to the file name.
               if len(contents) > 5000:
                   split = f.read().split("\n\n")
                   for paragraph in split:
                       paths.append(os.getcwd()+"\split+files")
                       indexed_filename=filename[:-4]+str(num)+".txt"
                       file = open(os.path.join(os.getcwd()+'\split_files' ,indexed_filename), "w")
                       file.write(str(paragraph))
                       file.close()
                       num+=1