Merge.py
Join the DZone community and get the full member experience.
Join For FreeMerge a number of text files, removing duplicates and sorting the results.
Usage: [-e charset] filenames destination
Example: merge folder/*.log list.txt merged.txt
from codecs import open
from getopt import getopt
from glob import glob
from os import linesep
from sys import argv
def main():
if len(argv) < 3:
exit('usage: %s [-e charset] filenames destination' % argv[0])
options, filenames = getopt(argv[1:-1], 'e:')
destination, filenames = argv[-1], set(filenames)
try: charset = dict(options)['-e']
except KeyError: charset = 'utf-8'
for name in argv[1:-1]:
if not name.count('*'): continue
filenames.remove(name)
filenames.update(glob(name))
result = []
for name in filenames:
lines = open(name, 'U', charset).read().split('\n')
result.extend(lines)
result = sorted(set(result))
open(destination, 'w', charset).writelines(linesep.join(result))
print '%s = %s (%d lines)' % (' + '.join(filenames), destination, len(result))
if __name__ == '__main__':
main()
Opinions expressed by DZone contributors are their own.
Comments