Delete x-line paragraphs from text file with Python -
i have long text file paragraph 6 , 7 lines each. need take 7 line paragraphs , write them file , take 6 line paragraphs , write them file. or delete 6-line (7-line) paragraphs. each paragraph separated blank line (or 2 blank lines). text file example:
firs name last name address1 address2 note 1 note 2 note3 note 4 first name lastname add 1 add 2 note2 note3 note4 etc...
i want use python 3 windows. welcome. thanks!
as welcome on stackoverflow, , because think have searched more code , propose following code.
it verifies paragraphs have not more 7 lines , not less 6 lines. warns when such paragraphs exist in source.
you'll remove prints have clean code, them can follow algorithm.
i think there no bug in it, don't take 100 % sure.
it isn't manner , choosed way can used types of files, big or not: iterating 1 line @ time. reading entire file in 1 pass done, , split list of lines, or treated of regexes; , when file enormous, reading in 1 time memory consuming.
with open('source.txt') fsource,\ open('six.txt','w') six, open('seven.txt','w') seven: buf = [] cnt = 0 exceeding7paragraphs = 0 tinyparagraphs = 0 line = 'go' while line: line = fsource.readline() cnt += 1 buf.append(line) if len(buf)<6 , line.rstrip('\n\r')=='': tinyparagraphs += 1 print cnt,repr(line),"this line of paragraph < 6 void,"+\ "\nthe treatment of paragraph skipped\n"+\ '\n# '+str(cnt)+' '+ repr(line)+" skipped line " buf = [] while line , line.rstrip('\n\r')=='': line = fsource.readline() cnt += 1 if line=='': print "line",cnt,"is '' , eof -> program stopped" elif line.rstrip('\n\r')=='': print '#',cnt,repr(line) else: buf.append(line) print '!',cnt,repr(line),' put in void buf' else: print cnt,repr(line),' put in buf' if len(buf)==6: line = fsource.readline() # reading potential seventh line of paragraph cnt += 1 if line.rstrip('\n\r'): # means content of seventh line isn't void buf.append(line) print cnt,repr(line),'seventh line put in buf' line = fsource.readline() cnt += 1 if line.rstrip('\n\r'): # means content of eighth line isn't void exceeding7paragraphs += 1 print cnt,repr(line),"the 8 line isn't void,"+\ "\nthe treatment of paragraph skipped"+\ "\neighth line skipped" buf = [] while line , line.rstrip('\n\r'): line = fsource.readline() cnt += 1 if line=='': print "line",cnt,"is '' , eof -> program stopped" elif line.rstrip('\n\r')=='': print '\n#',cnt,repr(line) else: print str(cnt) + ' ' + repr(line)+' skipped line' else: if line=='': print cnt,"line '' , eof -> program stopped\n" else: # line.rstrip('\n\r') '' print cnt,'eighth line void',repr(line) seven.write(''.join(buf) + '\n') print buf,'\n',len(buf),'lines recorded in file seven\n' buf = [] else: print cnt,repr(line),'seventh line: void' six.write(''.join(buf) + '\n') print buf,'\n',len(buf),'lines recorded in file six' buf = [] if line=='': print "line",cnt,"is '' , eof -> program stopped" else: print '\nthe line is',cnt, repr(line) while line , line.rstrip('\n\r')=='': line = fsource.readline() cnt += 1 if line=='': print "line",cnt,"is '' , eof -> program stopped" elif line.rstrip('\n\r')=='': print '#',cnt,repr(line) else: # line.rstrip('\n\r') != '' buf.append(line) print '!',cnt,repr(line),' put in void buf' if exceeding7paragraphs>0: print '\nwarning :'+\ '\nthere '+str(exceeding7paragraphs)+' paragraphs number of lines exceeds 7.' if tinyparagraphs>0: print '\nwarning :'+\ '\nthere '+str(tinyparagraphs)+' paragraphs number of lines less 6.' print '\n====================================================================' print 'file six\n' open('six.txt') six: print six.read() print '====================================================================' print 'file seven\n' open('seven.txt') seven: print seven.read()
i upvote question because problem not easy it's seems solve, , not let 1 post , 1 downvote, demoralizing beginning. try make presentation better next time, other said.
.
edit:
here's simplified code text containing paragraphs of 6 or 7 lines precisely, separated 1 or 2 lines exactly, stated in problem's wording
with open('source2.txt') fsource,\ open('six.txt','w') six, open('seven.txt','w') seven: buf = [] line = fsource.readline() while not line: # go first non empty line line = fsource.readline() while true: buf.append(line) # line first of paragraph print '\n- first line of paragraph',repr(line) in xrange(5): buf.append(fsource.readline()) # @ point , 6 lines of paragraph have been read print '-- buf 6 : ',buf line = fsource.readline() print '--- line seventh',repr(line),id(line) if line.rstrip('\r\n'): buf.append(line) seven.write(''.join(buf) + '\n') buf = [] line = fsource.readline() else: six.write(''.join(buf) + '\n') buf = [] # @ point, line empty line after paragraph or eof print '---- line after',repr(line),id(line) line = fsource.readline() print '----- second line after',repr(line) # @ point, line empty line after paragraph or eof # or first line of new paragraph if not line: # eof break if not line.rstrip('\r\n'): # second empty line line = fsource.readline() # line first of new paragraph print '\n====================================================================' print 'file six\n' open('six.txt') six: print six.read() print '====================================================================' print 'file seven\n' open('seven.txt') seven: print seven.read()
Comments
Post a Comment