"인문지식 처리와 프로그래밍2020 4.16"의 두 판 사이의 차이

soook
이동: 둘러보기, 검색
(새 문서: <pre> #!/usr/bin/python import sys import os import urllib.request import shutil address = "http://www.heritage.go.kr/heri/cul/culSelectDetail.do?ccbaCpno=" #문화재청 국가...)
 
1번째 줄: 1번째 줄:
 +
 +
==믄화재==
 
<pre>
 
<pre>
  
58번째 줄: 60번째 줄:
 
except:
 
except:
 
print( "{0}: Invalid Cpno!".format( file ) )
 
print( "{0}: Invalid Cpno!".format( file ) )
 +
return
 +
 +
findImage( path )
 +
 +
def main():
 +
 +
try:
 +
filename = sys.argv[1]
 +
except:
 +
return
 +
 +
list = filename.__add__( ".lst" )
 +
folder = filename
 +
 +
try:
 +
os.makedirs( folder )
 +
except OSError:
 +
pass
 +
 +
i = 0
 +
f = open( list )
 +
 +
while 1:
 +
line = f.readline()
 +
if not line: break
 +
file = line.strip()
 +
print( "{0}: Processing....".format(file))
 +
downHtml( folder, file )
 +
 +
f.close()
 +
 +
main()
 +
 +
 +
</pre>
 +
 +
==민족문화대백과사전==
 +
 +
<pre>
 +
#!/usr/bin/python
 +
 +
import sys
 +
import os
 +
import urllib.request
 +
import shutil
 +
 +
address1 = "http://encykorea.aks.ac.kr/Contents/Item/" #민족문화대백과사전 기사 Url
 +
address2 = "http://encykorea.aks.ac.kr/Contents/GetImage?id=" #민족문화대백과사전 이미지 Url
 +
 +
def downImage( document, sequence, urlstring ):
 +
 +
url = address2.__add__(urlstring)
 +
filename=document[:-4]
 +
suffix = "{:03}.jpg".format(sequence)
 +
path = "{0}-{1}".format(filename, suffix)
 +
 +
try:
 +
urllib.request.urlretrieve(url, path)
 +
except:
 +
shutil.copy2('noimage.jpg', path)
 +
 +
 +
def findImage( path ):
 +
 +
#src="/Contents/GetImage?id=ceb01938-4ed4-4bd8-ac91-e884b7fc0460&w=260&h=260&fit=w&clip=1"
 +
#multi_item = {mmid:'2a71cba8-eba7-4e1d-b4b0-30818c766013'
 +
 +
g = open( path, 'rt', encoding='UTF8')
 +
 +
found = 0
 +
 +
while 1:
 +
line = g.readline()
 +
if not line: break
 +
 +
a=line.find("/GetImage?id=")
 +
if( a != -1 ):
 +
b=line.find("&w=")
 +
whatIfound=line[a+13:b]
 +
downImage( path, found, whatIfound )
 +
found += 1
 +
return
 +
 +
if found == 0:
 +
print( "{0}: No Image".format(path))
 +
path = "{0}-000.jpg".format(path[:-4])
 +
shutil.copy2('noimage.jpg', path)
 +
 +
g.close()
 +
 +
def downHtml( folder, file ):
 +
 +
url = address1.__add__(file)
 +
path = folder + '/' + file + '.htm'
 +
 +
try:
 +
urllib.request.urlretrieve(url, path)
 +
except:
 +
print( "{0}: Invalid ID!".format( file ) )
 
return
 
return
 
 

2020년 4월 22일 (수) 22:44 판

믄화재


#!/usr/bin/python

import sys
import os
import urllib.request
import shutil

address = "http://www.heritage.go.kr/heri/cul/culSelectDetail.do?ccbaCpno="  #문화재청 국가문화유산포털문화재
	
def downImage( document, sequence, urlstring ):

	checkline = urlstring.upper()
	begin = checkline.find("HTTP:")
	end = checkline.find(".JPG")+4
	url = urlstring[begin:end]
	
	filename=document[:-4]
	suffix = "{:03}.jpg".format(sequence)
	path = "{0}-{1}".format(filename, suffix)
	
	try:
		urllib.request.urlretrieve(url, path)
	except:
		shutil.copy2('noimage.jpg', path)


def findImage( path ):

	g = open( path, 'rt', encoding='UTF8')
	
	found = 0
		
	while 1:
		line = g.readline()
		if not line: break	
		checkline = line.upper()
		if( checkline.find(".JPG") != -1 ):
			whatIfound = line.strip()
			downImage( path, found, whatIfound )
			# found += 1
			return
			
	if found == 0:
		print( "{0}: No Image".format(path))
		path = "{0}-000.jpg".format(path[:-4])
		shutil.copy2('noimage.jpg', path)		
	g.close()
	
def downHtml( folder, file ):

	url = address.__add__(file)
	path = folder + '/' + file + '.htm'

	try:
		urllib.request.urlretrieve(url, path)
	except:
		print( "{0}: Invalid Cpno!".format( file ) )
		return
	
	findImage( path )

def main():

	try:
		filename = sys.argv[1]
	except:
		return
		
	list = filename.__add__( ".lst" )
	folder = filename

	try:
		os.makedirs( folder )
	except OSError:
		pass
		
	i = 0			
	f = open( list )

	while 1:
		line = f.readline()
		if not line: break
		file = line.strip()
		print( "{0}: Processing....".format(file))
		downHtml( folder, file )
		
	f.close()

main()


민족문화대백과사전

#!/usr/bin/python

import sys
import os
import urllib.request
import shutil

address1 = "http://encykorea.aks.ac.kr/Contents/Item/"	#민족문화대백과사전 기사 Url
address2 = "http://encykorea.aks.ac.kr/Contents/GetImage?id=" #민족문화대백과사전 이미지 Url

def downImage( document, sequence, urlstring ):

	url = address2.__add__(urlstring)
	filename=document[:-4]
	suffix = "{:03}.jpg".format(sequence)
	path = "{0}-{1}".format(filename, suffix)
	
	try:
		urllib.request.urlretrieve(url, path)
	except:
		shutil.copy2('noimage.jpg', path)


def findImage( path ):

	#src="/Contents/GetImage?id=ceb01938-4ed4-4bd8-ac91-e884b7fc0460&w=260&h=260&fit=w&clip=1" 
	#multi_item = {mmid:'2a71cba8-eba7-4e1d-b4b0-30818c766013'

	g = open( path, 'rt', encoding='UTF8')
	
	found = 0
		
	while 1:
		line = g.readline()
		if not line: break	

		a=line.find("/GetImage?id=")
		if( a != -1 ):
			b=line.find("&w=")
			whatIfound=line[a+13:b]
			downImage( path, found, whatIfound )
			found += 1
			return
			
	if found == 0:
		print( "{0}: No Image".format(path))
		path = "{0}-000.jpg".format(path[:-4])
		shutil.copy2('noimage.jpg', path)
		
	g.close()
	
def downHtml( folder, file ):

	url = address1.__add__(file)
	path = folder + '/' + file + '.htm'

	try:
		urllib.request.urlretrieve(url, path)
	except:
		print( "{0}: Invalid ID!".format( file ) )
		return
	
	findImage( path )

def main():

	try:
		filename = sys.argv[1]
	except:
		return
		
	list = filename.__add__( ".lst" )
	folder = filename

	try:
		os.makedirs( folder )
	except OSError:
		pass
		
	i = 0			
	f = open( list )

	while 1:
		line = f.readline()
		if not line: break
		file = line.strip()
		print( "{0}: Processing....".format(file))
		downHtml( folder, file )
		
	f.close()

main()