#!/usr/bin/env python # -*- coding: utf-8 -*- import os import deepzoom import urllib import re MAX_PAGES = 10 PAGE_COUNT = {} def ensure_dir( filename ): directory = os.path.dirname( filename ) if not os.path.exists( directory ): os.makedirs( directory ) # Create Deep Zoom Image creator with weird parameters creator = deepzoom.ImageCreator( tile_size = 512, tile_overlap = 2, image_quality = 1, tile_format = "tif", resize_filter = "antialias" ) tiff_list = open( 'tiffs.txt', 'r' )\ .read()\ .split( '\n' ) for tiff_url in tiff_list: parts = re.match( r'http://lcweb2\.loc\.gov/master/pnp/ppmsca/(\d*)/(\d*)/(\d*)u\.tif', tiff_url ).groups() id = parts[ 1 ] fileid = parts[ 2 ] if id not in PAGE_COUNT: PAGE_COUNT[ id ] = 0 path = tiff_url.replace( 'http://lcweb2.loc.gov/master/', '' ) dzi_files = path.replace( 'u.tif', '_files' ) if PAGE_COUNT[ id ] < MAX_PAGES\ and not os.path.exists( dzi_files ): print 'making directory: %s' % os.path.dirname( path ) ensure_dir( path ) print 'downloading master tiff: %s' % tiff_url tiff_file = open( path, 'wb' ) tiff_file.write( urllib.urlopen( tiff_url ).read() ) tiff_file.close() print 'creating dzi: %s' % path # Create Deep Zoom image pyramid from source creator.create( path, path.replace( 'u.tif', '.dzi' ) ) PAGE_COUNT[ id ] += 1