python 读写大文件 – 杰力皓博

笔者遇到一个大文件，前面的字母都是0A和00，需要处理一些，去掉前面的0A和00

import os

def process_large_file(input_file, output_file):
    with open(input_file, 'rb') as infile, open(output_file, 'wb') as outfile:
        # Read and ignore leading zero bytes
        while True:
            byte = infile.read(1)
            if byte == b'\x00' or byte == b'\x0A':
                continue
            elif byte:
                # Write the first non-zero byte and break the loop
                outfile.write(byte)
                break
            else:
                # If we reach the end of file, break the loop
                break

        # Copy the rest of the file
        if byte:
            for chunk in iter(lambda: infile.read(1024*1024), b''):
                outfile.write(chunk)

if __name__ == "__main__":
    input_file = "path/to/your/large_input_file.dat"
    output_file = "path/to/your/output_file.dat"
    
    if os.path.exists(input_file):
        process_large_file(input_file, output_file)
        print(f"Processing complete. Output written to {output_file}.")
    else:
        print(f"Input file {input_file} does not exist.")

import os

def process_large_file(input_file, output_file):

with open(input_file, 'rb') as infile, open(output_file, 'wb') as outfile:

# Read and ignore leading zero bytes

while True:

byte = infile.read(1)

if byte == b'\x00' or byte == b'\x0A':

continue

elif byte:

# Write the first non-zero byte and break the loop

outfile.write(byte)

break

else:

# If we reach the end of file, break the loop

break

# Copy the rest of the file

if byte:

for chunk in iter(lambda: infile.read(1024*1024), b''):

outfile.write(chunk)

if __name__ == "__main__":

input_file = "path/to/your/large_input_file.dat"

output_file = "path/to/your/output_file.dat"

if os.path.exists(input_file):

process_large_file(input_file, output_file)

print(f"Processing complete. Output written to {output_file}.")

else:

print(f"Input file {input_file} does not exist.")

相关文章

发表评论 取消回复

发表评论取消回复