A-A+
批量txt去重程序 支持超大txt文件去重(GB) python脚本
选定文件夹,支持对文件夹内的每个txt文本里面的内容进行逐一去重,使每个文本内都不会有重复的内容。
下载地址:https://github.com/sysalong/Batch-txt-deduplication/releases/download/v1/txt.exe
程序运行后截图:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 | import tkinter as tk
from tkinter import filedialog
import os
def select_folder():
folder_path = filedialog.askdirectory()
folder_label.config(text="已选择的文件夹路径: " + folder_path)
def process_files():
folder_path = folder_label.cget("text").split(": ")[1]
files_list = [file for file in os.listdir(folder_path) if file.endswith(".txt")]
total_files = len(files_list)
for index, file in enumerate(files_list):
file_path = os.path.join(folder_path, file)
output_file_path = file_path + ".temp"
ii = 0
with open(file_path, 'r', encoding='utf-8') as input_file, open(output_file_path, 'w', encoding='utf-8') as output_file:
lines_seen = set()
for line in input_file:
stripped_line = line.rstrip('\n') # Strip trailing newline character
if stripped_line not in lines_seen:
output_file.write(line)
lines_seen.add(stripped_line)
output_file.flush()
ii = ii+1
if ii % 30 ==0:
progress_label.config(text="已处理到第{}个文件的第{}行数据!".format(index + 1, ii))
os.remove(file_path)
os.rename(output_file_path, file_path)
progress_label.config(text="正在处理文件 {}/{}".format(index+1, total_files))
result_label.config(text="处理完成!")
def update_window_size():
root.update_idletasks()
root.geometry("400x" + str(root.winfo_reqheight()))
root = tk.Tk()
root.title("文件夹处理工具")
root.geometry("400x200") # 设置窗口大小
select_button = tk.Button(root, text="选择文件夹", command=select_folder)
select_button.pack(pady=10, padx=10, side=tk.LEFT)
folder_label = tk.Label(root, text="已选择的文件夹路径: ")
folder_label.pack(pady=5, padx=10, anchor=tk.W)
process_button = tk.Button(root, text="开始", command=process_files)
process_button.pack(pady=10, padx=10, side=tk.LEFT)
progress_label = tk.Label(root, text="")
progress_label.pack(pady=5, padx=10, anchor=tk.W)
result_label = tk.Label(root, text="")
result_label.pack(pady=5, padx=10, anchor=tk.W)
root.after(100, update_window_size) # Update window size after widgets are drawn
root.mainloop() |

布施恩德可便相知重
微信扫一扫打赏
支付宝扫一扫打赏