인문지식 처리와 프로그래밍2020 4.02

soook
Soook (토론 | 기여) 사용자의 2020년 4월 9일 (목) 20:13 판 (parser02.py)

이동: 둘러보기, 검색

parser01.py(대단위)

#!/usr/bin/python
#-*- coding: utf-8 -*-

import sys

def main():

	try:
		filename = sys.argv[1]
	except:
		return
	
	list = filename+'.lst'
		
	f = open( list, 'r', encoding='utf-8')	
	
	while 1:
		line = f.readline()
		if not line: break
		text = line.strip()
		
		parsed = text.split(sep='\t')
		i = 0
		for x in parsed: 			
			if( i == 0): print( x )
			else: print( '\t\t{0}'.format(x) )			
			i = i+1
			
	f.close()

main()

parser02.py(탭단위 세분화)

#!/usr/bin/python
#-*- coding: utf-8 -*-

import sys

def main():

	try:
		filename = sys.argv[1]
		task = sys.argv[2]
	except:
		print( '\nparser02.py [file name] [task: 1, 2]' )
		return
	
	list = filename+'.lst'
		
	f = open( list, 'r', encoding='utf-8')	
	
	while 1:
		line = f.readline()
		if not line: break
		text = line.strip()
		
		parsed = text.split(sep='\t')

		if( task == '1' ):
			title = parsed[7]
		else :
			if( task == '2' ):
				title = parsed[9]
			else:
				title=''
				
		print( '{0}\t{1}_{2}\t{3}-{4}-{5}\t{6}'.format(parsed[0], parsed[2], parsed[3], parsed[4], parsed[5], parsed[6], title) )
			
	f.close()

main()



parser03.py(엑셀자료->lst파일->국문/영문 파싱)

#!/usr/bin/python
#-*- coding: utf-8 -*-

import sys

def main():

	try:
		filename = sys.argv[1]
		task = sys.argv[2]
	except:
		print( '\nparser03 [file name] [task: 1, 2]' )
		return
	
	list = filename+'.lst'
		
	f = open( list, 'r', encoding='utf-8')	
	
	while 1:
		line = f.readline()
		if not line: break
		text = line.strip()
		
		parsed = text.split(sep='\t')

		if( task == '1' ):
			title = parsed[7]
		else :
			if( task == '2' ):
				title = parsed[9]
			else:
				title=''
		
		title = title.replace(',', '')
		title = title.replace('(', '')
		title = title.replace(')', '')	
		title = title.strip()
		
		word = title.split(sep=' ')
		
		for x in word:
			print( '{0}\t{1}_{2}\t{3}-{4}-{5}\t{6}'.format(parsed[0], parsed[2], parsed[3], parsed[4], parsed[5], parsed[6], x) )
			
	f.close()

main()