import os

input_folder = 'egw-with-some-comments'  # Where your 62 files are
output_folder = 'cleaned_files'

# Create the folder if it doesn't exist
if not os.path.exists(output_folder):
    os.makedirs(output_folder)

def should_keep(line):
    stripped = line.strip()
    if not stripped: return True
    
    upper_count = sum(1 for c in stripped if c.isupper())
    lower_count = sum(1 for c in stripped if c.islower())
    
    # Keep if lowercase letters >= uppercase letters
    return lower_count >= upper_count

for filename in os.listdir(input_folder):
    if filename.endswith(".txt"):
        in_path = os.path.join(input_folder, filename)
        out_path = os.path.join(output_folder, filename)
        
        # Stream the file line-by-line to save memory
        with open(in_path, 'r', encoding='utf-8') as f_in, \
             open(out_path, 'w', encoding='utf-8') as f_out:
            for line in f_in:
                if should_keep(line):
                    f_out.write(line)

print(f"Done! Cleaned files are in the '{output_folder}' folder.")
