Source code for pydriosm.downloader

"""
Downloading `Geofabrik <https://download.geofabrik.de/>`_ and
`BBBike <https://download.bbbike.org/>`_ OpenStreetMap (OSM) data extracts.
"""

import copy
import csv
import io
import time
import urllib.error
import urllib.parse
import urllib.request

import bs4
import pandas as pd
import requests
from pyhelpers.dir import validate_input_data_dir
from pyhelpers.ops import confirmed, download_file_from_url, fake_requests_headers, \
    update_nested_dict
from pyhelpers.store import load_pickle, save_pickle

from .utils import *


[docs]class GeofabrikDownloader: """ A class for downloading OSM data from `Geofabrik <https://download.geofabrik.de/>`_'s free download server. **Example**:: >>> from pydriosm.downloader import GeofabrikDownloader >>> geofabrik_downloader = GeofabrikDownloader() >>> print(geofabrik_downloader.Name) Geofabrik OpenStreetMap data extracts """ def __init__(self): """ Constructor method. """ self.Name = 'Geofabrik OpenStreetMap data extracts' self.URL = geofabrik_homepage() self.DownloadIndexURL = urllib.parse.urljoin(self.URL, 'index-v1.json') self.ValidFileFormats = [".osm.pbf", ".shp.zip", ".osm.bz2"] self.DownloadIndexName = 'Geofabrik index of all downloads' self.ContinentSubregionTableName = 'Geofabrik continent subregions' self.RegionSubregionTier = 'Geofabrik region-subregion tier' self.DownloadsCatalogue = 'Geofabrik downloads catalogue' self.SubregionNameList = 'Geofabrik subregion name list'
[docs] @staticmethod def get_raw_directory_index(url, verbose=False): """ Get a raw directory index. This includes logs of older files and their and download URLs. :param url: a URL to the web resource :type url: str :param verbose: whether to print relevant information in console, defaults to ``False`` :type verbose: bool or int :return: a table of raw directory index :rtype: pandas.DataFrame or None **Examples**:: >>> from pydriosm.downloader import GeofabrikDownloader >>> geofabrik_downloader = GeofabrikDownloader() >>> ex_url = 'https://download.geofabrik.de/europe/great-britain.html' >>> raw_dir_idx = geofabrik_downloader.get_raw_directory_index(ex_url) >>> print(raw_dir_idx.head()) File ... FileURL 0 great-britain-updates ... https://download.geofabrik.de/... 1 great-britain-latest.osm.pbf.md5 ... https://download.geofabrik.de/... 2 great-britain-200914.osm.pbf.md5 ... https://download.geofabrik.de/... 3 great-britain.kml ... https://download.geofabrik.de/... 4 great-britain-latest.osm.pbf ... https://download.geofabrik.de/... [5 rows x 4 columns] >>> ex_url = 'http://download.geofabrik.de/' >>> raw_dir_idx = geofabrik_downloader.get_raw_directory_index( ... ex_url, verbose=True) The web page does not have a raw directory index. """ try: import humanfriendly raw_directory_index = pd.read_html(url, match='file', header=0, parse_dates=['date']) raw_directory_index = pd.concat(raw_directory_index, ignore_index=True) raw_directory_index.columns = [c.title() for c in raw_directory_index.columns] # Clean the DataFrame raw_directory_index.Size = \ raw_directory_index.Size.apply(humanfriendly.format_size) raw_directory_index.sort_values('Date', ascending=False, inplace=True) raw_directory_index.index = range(len(raw_directory_index)) raw_directory_index['FileURL'] = raw_directory_index.File.map( lambda x: urllib.parse.urljoin(url, x)) except (urllib.error.HTTPError, TypeError, ValueError): if len(urllib.parse.urlparse(url).path) <= 1 and verbose: print("The web page does not have a raw directory index.") raw_directory_index = None return raw_directory_index
[docs] def get_subregion_table(self, url, verbose=False): """ Get download information for all geographic regions on a web page. :param url: URL to the web resource :type url: str :param verbose: whether to print relevant information in console, defaults to ``False`` :type verbose: bool or int :return: a table of all available subregions' URLs :rtype: pandas.DataFrame or None **Example**:: >>> from pydriosm.downloader import GeofabrikDownloader >>> geofabrik_downloader = GeofabrikDownloader() >>> ex_url = 'https://download.geofabrik.de/europe/great-britain.html' >>> subregion_tbl = geofabrik_downloader.get_subregion_table(ex_url) >>> print(subregion_tbl.head()) Subregion ... .osm.bz2 0 England ... https://download.geofabrik.de/europe/great-bri... 1 Scotland ... https://download.geofabrik.de/europe/great-bri... 2 Wales ... https://download.geofabrik.de/europe/great-bri... [3 rows x 6 columns] """ try: subregion_table = pd.read_html( url, match=re.compile(r'(Special )?Sub[ \-]Regions?'), encoding='UTF-8') subregion_table = pd.concat(subregion_table, axis=0, ignore_index=True) # Specify column names file_formats = self.ValidFileFormats column_names = ['Subregion'] + file_formats column_names.insert(2, '.osm.pbf.Size') # Add column/names if len(subregion_table.columns) == 4: subregion_table.insert(2, '.osm.pbf.Size', np.nan) subregion_table.columns = column_names subregion_table.replace( {'.osm.pbf.Size': {re.compile('[()]'): '', re.compile('\xa0'): ' '}}, inplace=True) # Get the URLs source = requests.get(url, headers=fake_requests_headers()) soup = bs4.BeautifulSoup(source.content, 'lxml') source.close() for file_type in file_formats: text = '[{}]'.format(file_type) urls = [urllib.parse.urljoin(url, link['href']) for link in soup.find_all(name='a', href=True, text=text)] subregion_table.loc[ subregion_table[file_type].notnull(), file_type] = urls try: subregion_urls = [ urllib.parse.urljoin(url, soup.find('a', text=text).get('href')) for text in subregion_table.Subregion] except (AttributeError, TypeError): subregion_urls = [kml['onmouseover'] for kml in soup.find_all('tr', onmouseover=True)] subregion_urls = [ s[s.find('(') + 1:s.find(')')][1:-1].replace('kml', 'html') for s in subregion_urls] subregion_urls = [urllib.parse.urljoin(url, sub_url) for sub_url in subregion_urls] subregion_table['SubregionURL'] = subregion_urls column_names = list(subregion_table.columns) column_names.insert(1, column_names.pop(len(column_names) - 1)) subregion_table = subregion_table[column_names] subregion_table['.osm.pbf.Size'] = \ subregion_table['.osm.pbf.Size'].str.replace('(', '').str.replace(')', '') subregion_table = subregion_table.where(pd.notnull(subregion_table), None) except (ValueError, TypeError, ConnectionRefusedError, ConnectionError): # No more data available for subregions within the region if verbose: print("Checked out \"{}\".".format( url.split('/')[-1].split('.')[0].title())) subregion_table = None return subregion_table
[docs] def get_download_index(self, update=False, confirmation_required=True, verbose=False): """ Get the formal index of all available downloads. :param update: whether to check on update and proceed to update the package data, defaults to ``False`` :type update: bool :param confirmation_required: whether to prompt a message for confirmation to proceed, defaults to ``True`` :type confirmation_required: bool :param verbose: whether to print relevant information in console, defaults to ``False`` :type verbose: bool or int :return: the formal index of all downloads :rtype: pandas.DataFrame or None **Example**:: >>> from pydriosm.downloader import GeofabrikDownloader >>> geofabrik_downloader = GeofabrikDownloader() >>> download_idx = geofabrik_downloader.get_download_index() >>> print(download_idx.head()) id ... updates 0 afghanistan ... https://download.geofabrik.de/asia/afghanistan... 1 africa ... https://download.geofabrik.de/africa-updates 2 albania ... https://download.geofabrik.de/europe/albania-u... 3 alberta ... https://download.geofabrik.de/north-america/ca... 4 algeria ... https://download.geofabrik.de/africa/algeria-u... [5 rows x 12 columns] """ path_to_download_index = cd_dat( self.DownloadIndexName.replace(" ", "-") + ".pickle") if os.path.isfile(path_to_download_index) and not update: download_index = load_pickle(path_to_download_index) else: if confirmed("To get {}?".format(self.DownloadIndexName), confirmation_required=confirmation_required): if verbose == 2: print("Collecting {}".format(self.DownloadIndexName), end=" ... ") try: import geopandas as gpd download_index_ = gpd.read_file(self.DownloadIndexURL) # Note that '<br />' exists in all the names of Poland' subregions download_index_.name = download_index_.name.str.replace('<br />', ' ') urls = download_index_.urls.map( lambda x: pd.DataFrame.from_dict(x, 'index').T) urls_ = pd.concat(urls.values, ignore_index=True) download_index = pd.concat([download_index_, urls_], axis=1) print("Done. ") if verbose == 2 else "" save_pickle(download_index, path_to_download_index, verbose=verbose) except Exception as e: print("Failed. {}.".format(e)) download_index = None else: download_index = None if verbose: print("No data of {} is available.".format(self.DownloadIndexName)) return download_index
[docs] def get_continents_subregion_tables(self, update=False, confirmation_required=True, verbose=False): """ Get download information for continents. :param update: whether to check on update and proceed to update the package data, defaults to ``False`` :type update: bool :param confirmation_required: whether to prompt a message for confirmation to proceed, defaults to ``True`` :type confirmation_required: bool :param verbose: whether to print relevant information in console, defaults to ``False`` :type verbose: bool or int :return: subregion information for each continent :rtype: dict or None **Example**:: >>> from pydriosm.downloader import GeofabrikDownloader >>> geofabrik_downloader = GeofabrikDownloader() >>> subregion_tbls = geofabrik_downloader.get_continents_subregion_tables() >>> print(list(subregion_tbls.keys())) ['Africa', 'Antarctica', 'Asia', 'Australia and Oceania', 'Central America', 'Europe', 'North America', 'South America'] """ path_to_pickle = \ cd_dat(self.ContinentSubregionTableName.replace(" ", "-") + ".pickle") if os.path.isfile(path_to_pickle) and not update: subregion_tables = load_pickle(path_to_pickle) else: if confirmed("To collect information of {}?".format( self.ContinentSubregionTableName), confirmation_required=confirmation_required): if verbose == 2: print("Collecting a table of {}".format( self.ContinentSubregionTableName), end=" ... ") try: # Scan the homepage to collect info of regions for each continent source = requests.get(self.URL, headers=fake_requests_headers()) soup = bs4.BeautifulSoup(source.text, 'lxml').find_all( 'td', {'class': 'subregion'}) source.close() continent_names = [td.a.text for td in soup] continent_links = [urllib.parse.urljoin(self.URL, td.a['href']) for td in soup] subregion_tables = dict( zip(continent_names, [self.get_subregion_table(url, verbose) for url in continent_links])) print("Done. ") if verbose == 2 else "" save_pickle(subregion_tables, path_to_pickle, verbose=verbose) except Exception as e: print("Failed. {}.".format(e)) subregion_tables = None else: subregion_tables = None if verbose: print(f"No data of {self.ContinentSubregionTableName} is available.") return subregion_tables
[docs] def get_region_subregion_tier(self, update=False, confirmation_required=True, verbose=False): """ Get a catalogue of region-subregion tier. This includes all geographic regions to which data of subregions is unavailable. :param update: whether to check on update and proceed to update the package data, defaults to ``False`` :type update: bool :param confirmation_required: whether to prompt a message for confirmation to proceed, defaults to ``True`` :type confirmation_required: bool :param verbose: whether to print relevant information in console, defaults to ``False`` :type verbose: bool or int :return: region-subregion tier (in ``dict`` type) and all that have no subregions (in ``list`` type) :rtype: tuple .. _`dict`: https://docs.python.org/3/library/stdtypes.html#dict .. _`list`: https://docs.python.org/3/library/stdtypes.html#list **Example**:: >>> from pydriosm.downloader import GeofabrikDownloader >>> geofabrik_downloader = GeofabrikDownloader() >>> rs_tier, ns_list = geofabrik_downloader.get_region_subregion_tier() >>> print(list(rs_tier.keys())) ['Africa', 'Antarctica', 'Asia', 'Australia and Oceania', 'Central America', 'Europe', 'North America', 'South America'] >>> print(ns_list[0:5]) ['Antarctica', 'Algeria', 'Angola', 'Benin', 'Botswana'] """ path_to_file = cd_dat(self.RegionSubregionTier.replace(" ", "-") + ".pickle") if os.path.isfile(path_to_file) and not update: region_subregion_tier, non_subregions = \ load_pickle(path_to_file, verbose=verbose) else: def compile_region_subregion_tier(sub_reg_tbls): """ Find out the all regions and their subregions. :param sub_reg_tbls: obtained from get_continents_subregion_tables() :type sub_reg_tbls: pandas.DataFrame :return: a dictionary of region-subregion, and a list of (sub)regions without subregions :rtype: dict **Test**:: sub_reg_tbls = subregion_tables.copy() """ having_subregions = sub_reg_tbls.copy() region_subregion_tiers = having_subregions.copy() non_subregions_list = [] for k, v in sub_reg_tbls.items(): if v is not None and isinstance(v, pd.DataFrame): region_subregion_tiers = \ update_nested_dict(sub_reg_tbls, {k: set(v.Subregion)}) else: non_subregions_list.append(k) for x in non_subregions_list: having_subregions.pop(x) having_subregions_temp = copy.deepcopy(having_subregions) while having_subregions_temp: for region_name, subregion_table in having_subregions.items(): subregion_names = subregion_table.Subregion subregion_links = subregion_table.SubregionURL sub_subregion_tables = dict( zip(subregion_names, [self.get_subregion_table(link) for link in subregion_links])) subregion_index, without_subregion_ = \ compile_region_subregion_tier(sub_subregion_tables) non_subregions_list += without_subregion_ region_subregion_tiers.update({region_name: subregion_index}) having_subregions_temp.pop(region_name) # Russian Federation in both pages of Asia and Europe, # so there are duplicates in non_subregions_list import more_itertools non_subregions_list = \ list(more_itertools.unique_everseen(non_subregions_list)) return region_subregion_tiers, non_subregions_list if confirmed( "To compile {}? (Note this may take up to a few minutes.)".format( self.RegionSubregionTier), confirmation_required=confirmation_required): if verbose == 2: print("Compiling {} ... ".format(self.RegionSubregionTier), end="") # Scan the download pages to collect a catalogue of region-subregion tier try: subregion_tables = self.get_continents_subregion_tables(update=update) region_subregion_tier, non_subregions = \ compile_region_subregion_tier(subregion_tables) print("Done. ") if verbose == 2 else "" save_pickle((region_subregion_tier, non_subregions), path_to_file, verbose=verbose) except Exception as e: print("Failed. {}.".format(e)) region_subregion_tier, non_subregions = None, None else: region_subregion_tier, non_subregions = None, None if verbose: print("No data of {} is available.".format(self.RegionSubregionTier)) return region_subregion_tier, non_subregions
[docs] def get_download_catalogue(self, update=False, confirmation_required=True, verbose=False): """ Get a catalogue of download information. Similar to :py:meth:`.get_download_index() <pydriosm.downloader.GeofabrikDownloader.get_download_index>`. :param update: whether to check on update and proceed to update the package data, defaults to ``False`` :type update: bool :param confirmation_required: whether to prompt a message for confirmation to proceed, defaults to ``True`` :type confirmation_required: bool :param verbose: whether to print relevant information in console, defaults to ``False`` :type verbose: bool or int :return: a catalogues for subregion downloads :rtype: pandas.DataFrame or None **Example**:: >>> from pydriosm.downloader import GeofabrikDownloader >>> geofabrik_downloader = GeofabrikDownloader() >>> downloads_catalogue = geofabrik_downloader.get_download_catalogue() >>> print(downloads_catalogue.head()) Subregion ... .osm.bz2 0 Algeria ... http://download.geofabrik.de/africa/algeria-la... 1 Angola ... http://download.geofabrik.de/africa/angola-lat... 2 Benin ... http://download.geofabrik.de/africa/benin-late... 3 Botswana ... http://download.geofabrik.de/africa/botswana-l... 4 Burkina Faso ... http://download.geofabrik.de/africa/burkina-fa... [5 rows x 6 columns] """ path_to_downloads_catalogue = cd_dat( self.DownloadsCatalogue.replace(" ", "-") + ".pickle") if os.path.isfile(path_to_downloads_catalogue) and not update: subregion_downloads_catalogue = load_pickle(path_to_downloads_catalogue) else: if confirmed( "To collect {}? (Note that it may take a few minutes.)".format( self.DownloadsCatalogue), confirmation_required=confirmation_required): if verbose == 2: print("Collecting {}".format(self.DownloadsCatalogue), end=" ... ") try: source = requests.get(self.URL, headers=fake_requests_headers()) soup = bs4.BeautifulSoup(source.text, 'lxml') source.close() # avail_subregions = \ # [td.a.text for td in soup.find_all('td', {'class': 'subregion'})] subregion_href = soup.find_all('td', {'class': 'subregion'}) avail_subregion_urls = (urllib.parse.urljoin(self.URL, td.a['href']) for td in subregion_href) avail_subregion_url_tables_0 = ( self.get_subregion_table(sub_url, verbose) for sub_url in avail_subregion_urls) avail_subregion_url_tables = [ tbl for tbl in avail_subregion_url_tables_0 if tbl is not None] subregion_url_tables = list(avail_subregion_url_tables) while subregion_url_tables: subregion_url_tables_ = [] for subregion_url_table in subregion_url_tables: # subregions = list(subregion_url_table.Subregion) subregion_urls = list(subregion_url_table.SubregionURL) subregion_url_tables_0 = [ self.get_subregion_table(sr_url, verbose) for sr_url in subregion_urls] subregion_url_tables_ += [ tbl for tbl in subregion_url_tables_0 if tbl is not None] # (Note that 'Russian Federation' data is available in both # 'Asia' and 'Europe') # avail_subregions += subregions # avail_subregion_urls += subregion_urls avail_subregion_url_tables += subregion_url_tables_ subregion_url_tables = list(subregion_url_tables_) # All available URLs for downloading home_subregion_url_table = self.get_subregion_table(self.URL) avail_subregion_url_tables.append(home_subregion_url_table) subregion_downloads_catalogue = pd.concat(avail_subregion_url_tables, ignore_index=True) subregion_downloads_catalogue.drop_duplicates(inplace=True) duplicated = subregion_downloads_catalogue[ subregion_downloads_catalogue.Subregion.duplicated(keep=False)] if not duplicated.empty: import humanfriendly for i in range(0, 2, len(duplicated)): temp = duplicated.iloc[i:i + 2] size = temp['.osm.pbf.Size'].map( lambda x: humanfriendly.parse_size( x.strip('(').strip(')').replace('\xa0', ' '))) idx = size[size == size.min()].index subregion_downloads_catalogue.drop(idx, inplace=True) subregion_downloads_catalogue.index = \ range(len(subregion_downloads_catalogue)) # Save subregion_index_downloads to local disk save_pickle(subregion_downloads_catalogue, path_to_downloads_catalogue, verbose=verbose) except Exception as e: print("Failed. {}.".format(e)) subregion_downloads_catalogue = None else: subregion_downloads_catalogue = None if verbose: print("No data of {} is available.".format(self.DownloadsCatalogue)) return subregion_downloads_catalogue
[docs] def get_list_of_subregion_names(self, update=False, confirmation_required=True, verbose=False): """ Get a list of names of all available geographic regions. :param update: whether to check on update and proceed to update the package data, defaults to ``False`` :type update: bool :param confirmation_required: whether to prompt a message for confirmation to proceed, defaults to ``True`` :type confirmation_required: bool :param verbose: whether to print relevant information in console, defaults to ``False`` :type verbose: bool or int :return: names of geographic regions available on the free download server :rtype: list **Example**:: >>> from pydriosm.downloader import GeofabrikDownloader >>> geofabrik_downloader = GeofabrikDownloader() >>> sr_name_list = geofabrik_downloader.get_list_of_subregion_names() >>> print(sr_name_list[:5]) ['Algeria', 'Angola', 'Benin', 'Botswana', 'Burkina Faso'] """ path_to_name_list = cd_dat(self.SubregionNameList.replace(" ", "-") + ".pickle") if os.path.isfile(path_to_name_list) and not update: subregion_name_list = load_pickle(path_to_name_list) else: if confirmed("To get {}?".format(self.SubregionNameList), confirmation_required=confirmation_required): downloads_catalogue = self.get_download_catalogue( update=update, confirmation_required=False) subregion_name_list = downloads_catalogue.Subregion.to_list() save_pickle(subregion_name_list, path_to_name_list, verbose=verbose) else: subregion_name_list = [] if verbose: print("No data of {} is available.".format(self.SubregionNameList)) return subregion_name_list
[docs] def validate_input_subregion_name(self, subregion_name): """ Validate an input name of a geographic region. The validation is done by matching the input ``subregion_name`` to a name of a geographic region available on Geofabrik's free download server. :param subregion_name: name of a geographic region (case-insensitive) available on Geofabrik's free download server :type subregion_name: str :return: valid subregion name that matches, or is the most similar to, the input ``subregion_name`` :rtype: str **Examples**:: >>> from pydriosm.downloader import GeofabrikDownloader >>> geofabrik_downloader = GeofabrikDownloader() >>> sr_name = 'london' >>> sr_name_ = geofabrik_downloader.validate_input_subregion_name(sr_name) >>> print(sr_name_) Greater London >>> sr_name = 'https://download.geofabrik.de/europe/great-britain.html' >>> sr_name_ = geofabrik_downloader.validate_input_subregion_name(sr_name) >>> print(sr_name_) Great Britain """ assert isinstance(subregion_name, str) # Get a list of available subregion_names = self.get_list_of_subregion_names() if os.path.isdir(os.path.dirname(subregion_name)) or \ urllib.parse.urlparse(subregion_name).path: subregion_name_ = find_similar_str(os.path.basename(subregion_name), subregion_names) else: subregion_name_ = find_similar_str(subregion_name, subregion_names) if not subregion_name_: raise ValueError( "The input subregion name is not identified.\n" "Check if the required subregion exists in the catalogue and retry.") return subregion_name_
[docs] def validate_input_file_format(self, osm_file_format): """ Validate an input file format of OSM data. The validation is done by matching the input ``osm_file_format`` to a filename extension available on Geofabrik's free download server. :param osm_file_format: filename extension of any OSM data extract :type osm_file_format: str :return: formal file format :rtype: str **Examples**:: >>> from pydriosm.downloader import GeofabrikDownloader >>> geofabrik_downloader = GeofabrikDownloader() >>> file_format = ".pbf" >>> file_fmt = geofabrik_downloader.validate_input_file_format(file_format) >>> print(file_fmt) .osm.pbf >>> file_format = ".shp" >>> file_fmt = geofabrik_downloader.validate_input_file_format(file_format) >>> print(file_fmt) .shp.zip """ osm_file_format_ = find_similar_str(osm_file_format, self.ValidFileFormats) assert osm_file_format_ in self.ValidFileFormats, \ "The input file format must be one from {}.".format(self.ValidFileFormats) return osm_file_format_
[docs] def get_subregion_download_url(self, subregion_name, osm_file_format, update=False, verbose=False): """ Get a download URL of a geographic region. :param subregion_name: name of a geographic region (case-insensitive) available on Geofabrik's free download server :type subregion_name: str :param osm_file_format: OSM file format available on the free download server; valid values include ``".osm.pbf"``, ``".shp.zip"`` and ``".osm.bz2"`` :type osm_file_format: str :param update: whether to check on update and proceed to update the package data, defaults to ``False`` :type update: bool :param verbose: whether to print relevant information in console, defaults to ``False`` :type verbose: bool or int :return: name and URL of the subregion :rtype: tuple **Examples**:: >>> from pydriosm.downloader import GeofabrikDownloader >>> geofabrik_downloader = GeofabrikDownloader() >>> region_name = 'london' >>> file_format = '.pbf' >>> fml_name, dwnld_link = geofabrik_downloader.get_subregion_download_url( ... region_name, file_format) >>> print(fml_name) Greater London >>> print(dwnld_link) http://download.geofabrik.de/.../greater-london-latest.osm.pbf >>> region_name = 'Great Britain' >>> file_format = '.shp' >>> fml_name, dwnld_link = geofabrik_downloader.get_subregion_download_url( ... region_name, file_format) >>> print(fml_name) Greater London >>> print(dwnld_link) None """ # Get an index of download URLs subregion_downloads_index = self.get_download_catalogue( update=update, verbose=verbose) subregion_downloads_index.set_index('Subregion', inplace=True) subregion_name_ = self.validate_input_subregion_name(subregion_name) osm_file_format_ = self.validate_input_file_format(osm_file_format) # Get the URL download_url = subregion_downloads_index.loc[subregion_name_, osm_file_format_] return subregion_name_, download_url
[docs] def get_default_osm_filename(self, subregion_name, osm_file_format, update=False): """ get a default filename for a geograpic region. The default filename is derived from the relevant download URL for the requested data file. :param subregion_name: name of a geographic region (case-insensitive) available on Geofabrik's free download server :type subregion_name: str :param osm_file_format: OSM file format; valid values include ``".osm.pbf"``, ``".shp.zip"`` and ``".osm.bz2"`` :type osm_file_format: str :param update: whether to check on update and proceed to update the package data, defaults to ``False`` :type update: bool :return: default OSM filename for the ``subregion_name`` :rtype: str **Examples**:: >>> from pydriosm.downloader import GeofabrikDownloader >>> geofabrik_downloader = GeofabrikDownloader() >>> sr_name = 'london' >>> file_format = ".pbf" >>> sr_filename = geofabrik_downloader.get_default_osm_filename( ... sr_name, file_format) >>> print(sr_filename) greater-london-latest.osm.pbf >>> sr_name = 'britain' >>> file_format = ".shp" >>> sr_filename = geofabrik_downloader.get_default_osm_filename( ... sr_name, file_format) No .shp.zip data is available to download for Great Britain. >>> print(sr_filename) None """ subregion_name_ = self.validate_input_subregion_name(subregion_name) osm_file_format_ = self.validate_input_file_format(osm_file_format) _, download_url = self.get_subregion_download_url( subregion_name_, osm_file_format_, update=update) if download_url is None: print("No {} data is available to download for {}.".format( osm_file_format_, subregion_name_)) else: subregion_filename = os.path.split(download_url)[-1] return subregion_filename
[docs] def get_default_path_to_osm_file(self, subregion_name, osm_file_format, mkdir=False, update=False, verbose=False): """ Get a default path to a local directory for storing a downloaded data file. The default file path is derived from the relevant download URL for the requested data file. :param subregion_name: name of a geographic region (case-insensitive) available on Geofabrik's free download server :type subregion_name: str :param osm_file_format: OSM file format; valid values include ``".osm.pbf"``, ``".shp.zip"`` and ``".osm.bz2"`` :type osm_file_format: str :param mkdir: whether to create a directory, defaults to ``False`` :type mkdir: bool :param update: whether to check on update and proceed to update the package data, defaults to ``False`` :type update: bool :param verbose: whether to print relevant information in console, defaults to ``False`` :type verbose: bool or int :return: default filename of the subregion and default (absolute) path to the file :rtype: tuple **Example**:: >>> import os >>> from pydriosm.downloader import GeofabrikDownloader >>> geofabrik_downloader = GeofabrikDownloader() >>> sr_name = 'london' >>> file_format = ".pbf" >>> filename, file_path = geofabrik_downloader.get_default_path_to_osm_file( ... sr_name, file_format) >>> print(filename) greater-london-latest.osm.pbf >>> print(os.path.relpath(file_path)) dat_GeoFabrik\\Europe\\Great Britain\\England\\greater-london-latest.osm.pbf """ subregion_name_ = self.validate_input_subregion_name(subregion_name) osm_file_format_ = self.validate_input_file_format(osm_file_format) subregion_name_, download_url = self.get_subregion_download_url( subregion_name_, osm_file_format_, update=update) if download_url is None: if verbose: print("{} data is not available for {}".format( osm_file_format_, subregion_name_)) default_filename, default_file_path = None, None else: parsed_path = urllib.parse.urlparse(download_url).path.lstrip('/').split('/') if len(parsed_path) == 1: parsed_path = [subregion_name_] + parsed_path subregion_names = self.get_list_of_subregion_names() directory = cd_dat_geofabrik( *[find_similar_str(x, subregion_names) if x != 'us' else 'United States' for x in parsed_path[0:-1]], mkdir=mkdir) default_filename = parsed_path[-1] default_file_path = os.path.join(directory, default_filename) return default_filename, default_file_path
[docs] def search_for_subregions(self, *subregion_name, deep=False): """ Retrieve names of all subregions (if any) of the given geographic region(s). The is based on the region-subregion tier. See also [`RNS-1 <https://stackoverflow.com/questions/9807634/>`_]. :param subregion_name: name of a geographic region (case-insensitive) available on Geofabrik's free download server :type subregion_name: str or None :param deep: whether to get subregion names of the subregions, defaults to ``False`` :type deep: bool :return: list of subregions (if any); if ``subregion_name=None``, all regions that do have subregions :rtype: list **Examples**:: >>> from pydriosm.downloader import GeofabrikDownloader >>> geofabrik_downloader = GeofabrikDownloader() >>> sr_names = geofabrik_downloader.search_for_subregions() >>> print(sr_names[:5]) ['Antarctica', 'Algeria', 'Angola', 'Benin', 'Botswana'] >>> sr_names = geofabrik_downloader.search_for_subregions( ... 'england', 'asia', deep=False) >>> print(sr_names[:5]) ['Bedfordshire', 'Berkshire', 'Bristol', 'Buckinghamshire', 'Cambridgeshire'] >>> print(sr_names[-5:]) ['Thailand', 'Turkmenistan', 'Uzbekistan', 'Vietnam', 'Yemen'] >>> sr_names = geofabrik_downloader.search_for_subregions( ... 'britain', deep=True) >>> print(sr_names[:5]) ['Scotland', 'Wales', 'Bedfordshire', 'Berkshire', 'Bristol'] """ region_subregion_tier, non_subregions_list = self.get_region_subregion_tier() if not subregion_name: subregion_names = non_subregions_list else: def find_subregions(reg_name, reg_sub_idx): """ :param reg_name: name of a geographic region :type reg_name: str :param reg_sub_idx: :type reg_sub_idx: dict :return: :rtype: generator object **Test**:: reg_name = region reg_sub_idx = region_subregion_tier """ for k, v in reg_sub_idx.items(): if reg_name == k: if isinstance(v, dict): yield list(v.keys()) else: yield [reg_name] if isinstance(reg_name, str) else reg_name elif isinstance(v, dict): for sub in find_subregions(reg_name, v): if isinstance(sub, dict): yield list(sub.keys()) else: yield [sub] if isinstance(sub, str) else sub res = [] for region in subregion_name: res += list(find_subregions(self.validate_input_subregion_name(region), region_subregion_tier))[0] if not deep: subregion_names = res else: check_list = [x for x in res if x not in non_subregions_list] if check_list: res_ = list(set(res) - set(check_list)) # for region in check_list: # res_ += self.retrieve_names_of_subregions_of(region) res_ += self.search_for_subregions(*check_list) else: res_ = res del non_subregions_list, region_subregion_tier, check_list subregion_names = list(dict.fromkeys(res_)) return subregion_names
[docs] def make_sub_download_dir(self, subregion_name, osm_file_format, download_dir=None, mkdir=False): """ Make a default directory for downloading data of a geographic region's subregions. This is particularly useful when data of a geographic region and requested file format is unavailable. :param subregion_name: name of a geographic region (case-insensitive) available on Geofabrik's free download server :type subregion_name: str :param osm_file_format: OSM file format; valid values include ``".osm.pbf"``, ``".shp.zip"`` and ``".osm.bz2"`` :type osm_file_format: str :param download_dir: directory for saving the downloaded file(s); if ``None`` (default), the default directory :type download_dir: str or None :param mkdir: whether to create a directory, defaults to ``False`` :type mkdir: bool :return: default download directory if the requested data file is not available :rtype: str **Example**:: >>> import os >>> from pydriosm.downloader import GeofabrikDownloader >>> geofabrik_downloader = GeofabrikDownloader() >>> sr_name = 'london' >>> file_format = ".pbf" >>> dwnld_dir = geofabrik_downloader.make_sub_download_dir( ... sr_name, file_format) >>> print(os.path.relpath(dwnld_dir)) # dat_GeoFabrik\\Europe\\Great Britain\\England\\greater-london-latest-osm-pbf >>> sr_name = 'britain' >>> file_format = ".shp" >>> dwnld_dir = geofabrik_downloader.make_sub_download_dir( ... sr_name, file_format, download_dir="tests") >>> print(os.path.relpath(dwnld_dir)) tests\\great-britain-shp-zip """ subregion_name_ = self.validate_input_subregion_name(subregion_name) osm_file_format_ = self.validate_input_file_format(osm_file_format) default_filename, default_file_path = self.get_default_path_to_osm_file( subregion_name_, osm_file_format_) if not default_filename: default_sub_dir = re.sub( r"[. ]", "-", subregion_name_.lower() + osm_file_format_) else: default_sub_dir = re.sub(r"[. ]", "-", default_filename).lower() if not download_dir: default_download_dir = cd_dat_geofabrik(os.path.dirname(default_file_path), default_sub_dir, mkdir=mkdir) else: default_download_dir = cd(validate_input_data_dir(download_dir), default_sub_dir, mkdir=mkdir) return default_download_dir
[docs] def download_osm_data(self, subregion_names, osm_file_format, download_dir=None, update=False, confirmation_required=True, deep_retry=False, interval_sec=None, verbose=False, ret_download_path=False): """ Download OSM data (in a specific file format) of one (or multiple) geographic region(s). :param subregion_names: name(s) of one (or multiple) geographic region(s) available on Geofabrik's free download server :type subregion_names: str or list :param osm_file_format: OSM file format; valid values include ``".osm.pbf"``, ``".shp.zip"`` and ``".osm.bz2"`` :type osm_file_format: str :param download_dir: directory for saving the downloaded file(s); if None (default), use the default directory :type download_dir: str or None :param update: whether to check on update and proceed to update the package data, defaults to ``False`` :type update: bool :param confirmation_required: whether to prompt a message for confirmation to proceed, defaults to ``True`` :type confirmation_required: bool :param deep_retry: whether to further check availability of sub-subregions data, defaults to ``False`` :type deep_retry: bool :param interval_sec: interval (in sec) between downloading two subregions, defaults to ``None`` :type interval_sec: int or None :param verbose: whether to print relevant information in console, defaults to ``False`` :type verbose: bool or int :param ret_download_path: whether to return the path(s) to the downloaded file(s), defaults to ``False`` :type ret_download_path: bool :return: absolute path(s) to downloaded file(s) when ``ret_download_path=True`` :rtype: list or str **Examples**:: >>> import os >>> from pyhelpers.dir import delete_dir >>> from pydriosm.downloader import GeofabrikDownloader, cd_dat_geofabrik >>> geofabrik_downloader = GeofabrikDownloader() >>> # Download PBF data file of Greater London and Rutland >>> sr_names = ['London', 'Rutland'] >>> file_fmt = ".pbf" >>> dwnld_paths = geofabrik_downloader.download_osm_data( ... sr_names, file_fmt, verbose=True, ret_download_path=True) Confirmed to download .osm.pbf data of the following geographic region(s): Greater London Rutland ? [No]|Yes: yes Downloading "greater-london-latest.osm.pbf" to "\\dat_... \\England" ... Done. Downloading "rutland-latest.osm.pbf" to "\\dat_... \\England" ... Done. >>> for dwnld_path in dwnld_paths: ... print(os.path.relpath(dwnld_path)) dat_GeoFabrik\\Europe\\Great Britain\\England\\greater-london-latest.osm.pbf dat_GeoFabrik\\Europe\\Great Britain\\England\\rutland-latest.osm.pbf >>> # Delete the directory generated above >>> delete_dir(cd_dat_geofabrik(), verbose=True) The directory "\\dat_Geofabrik" is not empty. Confirmed to delete it? [No]|Yes: yes Deleting "\\dat_Geofabrik" ... Done. >>> # Download shapefiles of West Midlands >>> sr_name = 'west midlands' >>> file_fmt = ".shp" >>> dwnld_dir = "tests" >>> dwnld_path = geofabrik_downloader.download_osm_data( ... sr_name, file_fmt, dwnld_dir, verbose=True, ret_download_path=True) Confirmed to download .shp.zip data of the following geographic region(s): West Midlands ? [No]|Yes: yes Downloading "west-midlands-latest-free.shp.zip" to "\\tests" ... Done. >>> print(os.path.relpath(dwnld_path)) tests\\west-midlands-latest-free.shp.zip >>> # Delete the downloaded .shp.zip file >>> os.remove(dwnld_path) >>> # Download shapefiles of Great Britain >>> sr_name = 'Great Britain' >>> file_fmt = ".shp" >>> dwnld_path = geofabrik_downloader.download_osm_data( ... sr_name, file_fmt, dwnld_dir, deep_retry=True, verbose=True, ... ret_download_path=True) Confirmed to download .shp.zip data of the following geographic region(s): Great Britain ? [No]|Yes: yes The .shp.zip data is not found for "Great Britain". Try downloading the data of its subregions instead [No]|Yes: no >>> print(dwnld_path) [] """ subregion_names_ = [subregion_names] if isinstance(subregion_names, str) \ else subregion_names.copy() subregion_names_ = [ self.validate_input_subregion_name(x) for x in subregion_names_] osm_file_format_ = self.validate_input_file_format(osm_file_format) if confirmed( "Confirmed to download {} data of the following geographic region(s):" "\n\t{}\n?".format(osm_file_format_, "\n\t".join(subregion_names_)), confirmation_required=confirmation_required): download_paths = [] for sub_reg_name in subregion_names_: # Get download URL subregion_name_, download_url = self.get_subregion_download_url( sub_reg_name, osm_file_format_) if download_url is None: if verbose: print("The {} data is not found for \"{}\".".format( osm_file_format_, subregion_name_)) if confirmed("Try downloading the data of its subregions instead", confirmation_required=confirmation_required): sub_subregions = self.search_for_subregions( subregion_name_, deep=deep_retry) if sub_subregions == [subregion_name_]: print("No {} data is available " "for this geographic region.".format(osm_file_format_)) break else: if not download_dir: _, path_to_file_ = self.get_default_path_to_osm_file( subregion_name_, ".osm.pbf") download_dir = os.path.dirname(path_to_file_) download_dir_ = self.make_sub_download_dir( subregion_name_, osm_file_format_, download_dir) self.download_osm_data( sub_subregions, osm_file_format=osm_file_format_, download_dir=download_dir_, update=update, confirmation_required=False, verbose=verbose, ret_download_path=ret_download_path) else: if not download_dir: # Download the requested OSM file to default directory osm_filename, path_to_file = self.get_default_path_to_osm_file( subregion_name_, osm_file_format_, mkdir=True) else: download_dir_ = validate_input_data_dir(download_dir) osm_filename = self.get_default_osm_filename( subregion_name_, osm_file_format=osm_file_format_) path_to_file = os.path.join(download_dir_, osm_filename) download_paths.append(path_to_file) if os.path.isfile(path_to_file) and not update: if verbose: print("\"{}\" is already available at \"\\{}\".".format( os.path.basename(path_to_file), os.path.relpath(os.path.dirname(path_to_file)))) else: if verbose: print("{} \"{}\" to \"\\{}\" ... ".format( "Updating" if os.path.isfile(path_to_file) else "Downloading", osm_filename, os.path.relpath(os.path.dirname(path_to_file)))) try: download_file_from_url(download_url, path_to_file) print("Done. ") if verbose else "" except Exception as e: print("Failed. {}.".format(e)) if interval_sec: time.sleep(interval_sec) if ret_download_path: if len(download_paths) == 1: download_paths = download_paths[0] return download_paths
[docs] def osm_file_exists(self, subregion_name, osm_file_format, data_dir=None, update=False, verbose=False, ret_file_path=False): """ Check if a requested data file of a geographic region already exists locally. :param subregion_name: name of a geographic region (case-insensitive) available on Geofabrik's free download server :type subregion_name: str :param osm_file_format: OSM file format; valid values include ``".osm.pbf"``, ``".shp.zip"`` and ``".osm.bz2"`` :type osm_file_format: str :param data_dir: directory for saving the downloaded file(s); if None (default), use the default directory :type data_dir: str or None :param update: whether to check on update and proceed to update the package data, defaults to ``False`` :type update: bool :param verbose: whether to print relevant information in console, defaults to ``False`` :type verbose: bool or int :param ret_file_path: whether to return the path to the data file (if it exists), defaults to ``False`` :type ret_file_path: bool :return: whether requested data file exists :rtype: bool **Examples**:: >>> from pydriosm.downloader import GeofabrikDownloader >>> geofabrik_downloader = GeofabrikDownloader() >>> sr_name = 'london' >>> file_fmt = ".pbf" >>> path_to_pbf = geofabrik_downloader.osm_file_exists( ... sr_name, file_fmt, verbose=True) >>> print(path_to_pbf) True # (if the PBF data file exists) >>> path_to_pbf = geofabrik_downloader.osm_file_exists( ... sr_name, file_fmt, ret_file_path=True) >>> print(os.path.relpath(path_to_pbf)) # (if the data file exists) dat_GeoFabrik\\Europe\\Great Britain\\England\\greater-london-latest.osm.pbf """ subregion_name_ = self.validate_input_subregion_name(subregion_name) osm_file_format_ = self.validate_input_file_format(osm_file_format) default_filename, path_to_file = self.get_default_path_to_osm_file( subregion_name_, osm_file_format_) if data_dir: path_to_file = cd(validate_input_data_dir(data_dir), default_filename) if os.path.isfile(path_to_file) and not update: if verbose == 2: print("\"{}\" of {} is available at \"{}\".".format( default_filename, subregion_name_, os.path.relpath(os.path.dirname(path_to_file)))) if ret_file_path: return path_to_file else: return True else: return False
[docs] def download_subregion_data(self, subregion_names, osm_file_format, download_dir=None, update=False, verbose=False, ret_download_path=False): """ Download OSM data (in a specific file format) of one (or multiple) geographic region(s) and all its (or their) subregions. :param subregion_names: name(s) of one (or multiple) region(s)/subregion(s) available on Geofabrik's free download server :type subregion_names: str or list :param osm_file_format: OSM file format; valid values include ``".osm.pbf"``, ``".shp.zip"`` and ``".osm.bz2"`` :type osm_file_format: str :param download_dir: directory for saving the downloaded file(s); if None (default), use the default directory :type download_dir: str or None :param update: whether to check on update and proceed to update the package data, defaults to ``False`` :type update: bool :param verbose: whether to print relevant information in console, defaults to ``False`` :type verbose: bool or int :param ret_download_path: whether to return the path(s) to the downloaded file(s), defaults to ``False`` :type ret_download_path: bool :return: the path(s) to the downloaded file(s) when ``ret_download_path=True`` :rtype: list or str **Examples**:: >>> import os >>> from pyhelpers.dir import cd >>> from pydriosm.downloader import GeofabrikDownloader >>> geofabrik_downloader = GeofabrikDownloader() >>> file_fmt = ".pbf" >>> dwnld_dir = "tests" >>> sr_names = ['rutland', 'west yorkshire'] >>> geofabrik_downloader.download_subregion_data(sr_names, file_fmt, ... dwnld_dir, verbose=True) Confirmed to download .osm.pbf data of the following geographic region(s): Rutland West Yorkshire ? [No]|Yes: yes Downloading "rutland-latest.osm.pbf" to "\\tests" ... Done. Downloading "west-yorkshire-latest.osm.pbf" to "\\tests" ... Done. >>> os.remove(cd("tests", "rutland-latest.osm.pbf")) >>> sr_names = ['west midlands', 'west yorkshire'] >>> dwnld_paths = geofabrik_downloader.download_subregion_data( ... sr_names, file_fmt, dwnld_dir, verbose=True, ret_download_path=True) Confirmed to download .osm.pbf data of the following geographic region(s): West Midlands ? [No]|Yes: yes Downloading "west-midlands-latest.osm.pbf" to "\\tests" ... Done. "west-yorkshire-latest.osm.pbf" is already available at "\\tests". >>> for dwnld_path in dwnld_paths: print(os.path.relpath(dwnld_path)) tests\\west-midlands-latest.osm.pbf tests\\west-yorkshire-latest.osm.pbf >>> for dwnld_path in dwnld_paths: os.remove(dwnld_path) """ subregion_names_ = [subregion_names] if isinstance(subregion_names, str) \ else subregion_names.copy() subregion_names_ = [ self.validate_input_subregion_name(x) for x in subregion_names_] subregion_names_ = self.search_for_subregions(*subregion_names_) subregion_name_list = subregion_names_.copy() osm_file_format_ = self.validate_input_file_format(osm_file_format) for subregion_name in subregion_names_: if self.osm_file_exists(subregion_name, osm_file_format_, download_dir, update): subregion_name_list.remove(subregion_name) confirmation_required_ = False if not subregion_name_list else True if confirmed( "Confirmed to download {} data of the following geographic region(s): " "\n\t{}\n?".format(osm_file_format_, "\n\t".join(subregion_name_list)), confirmation_required=confirmation_required_): download_paths = self.download_osm_data( subregion_names_, osm_file_format=osm_file_format_, download_dir=download_dir, update=update, confirmation_required=False, verbose=verbose, ret_download_path=ret_download_path) if ret_download_path: if len(download_paths) == 1: download_paths = download_paths[0] return download_paths
[docs]class BBBikeDownloader: """ A class for downloading OSM data from `BBBike <https://download.bbbike.org/>`_'s free download server. **Example**:: >>> from pydriosm.downloader import BBBikeDownloader >>> bbbike_downloader = BBBikeDownloader() >>> print(bbbike_downloader.Name) BBBike OpenStreetMap data extracts """ def __init__(self): """ Constructor method. """ self.Name = 'BBBike OpenStreetMap data extracts' self.URL = bbbike_homepage() self.URLCities = \ 'https://raw.githubusercontent.com/wosch/bbbike-world/world/etc/cities.txt' self.CitiesNames = 'BBBike cities' self.URLCitiesCoordinates = \ 'https://raw.githubusercontent.com/wosch/bbbike-world/world/etc/cities.csv' self.CitiesCoordinates = 'BBBike cities coordinates' self.SubregionCatalogue = 'BBBike subregion catalogue' self.SubregionNameList = 'BBBike subregion name list' self.DownloadDictName = 'BBBike download dictionary'
[docs] def get_list_of_cities(self, update=False, confirmation_required=True, verbose=False): """ Get a list of names of cities. :param update: whether to check on update and proceed to update the package data, defaults to ``False`` :type update: bool :param confirmation_required: whether to prompt a message for confirmation to proceed, defaults to ``True`` :type confirmation_required: bool :param verbose: whether to print relevant information in console, defaults to ``False`` :type verbose: bool or int :return: catalogue for subregions of BBBike data :rtype: pandas.DataFrame or None **Example**:: >>> from pydriosm.downloader import BBBikeDownloader >>> bbbike_downloader = BBBikeDownloader() >>> names_of_cities = bbbike_downloader.get_list_of_cities() >>> print(names_of_cities[:5]) ['Heilbronn', 'Emden', 'Bremerhaven', 'Paris', 'Ostrava'] """ path_to_pickle = cd_dat(self.CitiesNames.replace(" ", "-") + ".pickle") if os.path.isfile(path_to_pickle) and not update: cities_names = load_pickle(path_to_pickle) else: if confirmed("To collect {}?".format(self.CitiesNames), confirmation_required=confirmation_required): try: cities_names_ = pd.read_csv(self.URLCities, header=None) cities_names = list(cities_names_.values.flatten()) save_pickle(cities_names, path_to_pickle, verbose=verbose) except Exception as e: print("Failed. {}.".format(e)) cities_names = None else: if verbose: print("No data of \"{}\" is available.".format(self.CitiesNames)) cities_names = None return cities_names
[docs] def get_coordinates_of_cities(self, update=False, confirmation_required=True, verbose=False): """ Get location information of cities (geographic regions). :param update: whether to check on update and proceed to update the package data, defaults to ``False`` :type update: bool :param confirmation_required: whether to prompt a message for confirmation to proceed, defaults to ``True`` :type confirmation_required: bool :param verbose: whether to print relevant information in console, defaults to ``False`` :type verbose: bool or int :return: location information of BBBike cities :rtype: pandas.DataFrame or None **Example**:: >>> from pydriosm.downloader import BBBikeDownloader >>> bbbike_downloader = BBBikeDownloader() >>> coords_of_cities = bbbike_downloader.get_coordinates_of_cities() >>> print(coords_of_cities.tail()) City Real name ... ur_longitude ur_latitude 233 Zagreb de!Agram,en!Zagreb ... 16.291 45.94 234 Zuerich de!Zürich,en!Zurich ... 8.87 47.58 238 bbbike ... 14.249353 52.355108 240 dummy ... 44.5259 33.4238 241 Finowfurt ... 13.8591 52.8787 [5 rows x 13 columns] """ path_to_pickle = cd_dat(self.CitiesCoordinates.replace(" ", "-") + ".pickle") if os.path.isfile(path_to_pickle) and not update: cities_coordinates = load_pickle(path_to_pickle) else: if confirmed("To collect {}?".format(self.CitiesCoordinates), confirmation_required=confirmation_required): try: csv_temp = urllib.request.urlopen(self.URLCitiesCoordinates) csv_file = list( csv.reader(io.StringIO(csv_temp.read().decode('utf-8')), delimiter=':')) csv_data = [[x.strip().strip('\u200e').replace('#', '') for x in row] for row in csv_file[5:-1]] column_names = [x.replace('#', '').strip().capitalize() for x in csv_file[0]] cities_coords = pd.DataFrame(csv_data, columns=column_names) coordinates = cities_coords.Coord.str.split(' ').apply(pd.Series) coords_cols = ['ll_longitude', 'll_latitude1', 'ur_longitude', 'ur_latitude'] coordinates.columns = coords_cols cities_coords.drop(['Coord'], axis=1, inplace=True) cities_coordinates = pd.concat([cities_coords, coordinates], axis=1) cities_coordinates.dropna(subset=coords_cols, inplace=True) save_pickle(cities_coordinates, path_to_pickle, verbose=verbose) except Exception as e: print("Failed. {}.".format(e)) cities_coordinates = None else: if verbose: print("No data of \"{}\" is available.".format( self.CitiesCoordinates)) cities_coordinates = None return cities_coordinates
[docs] def get_subregion_catalogue(self, update=False, confirmation_required=True, verbose=False): """ Get a catalogue for geographic regions. :param update: whether to check on update and proceed to update the package data, defaults to ``False`` :type update: bool :param confirmation_required: whether to prompt a message for confirmation to proceed, defaults to ``True`` :type confirmation_required: bool :param verbose: whether to print relevant information in console, defaults to ``False`` :type verbose: bool or int :return: catalogue for subregions of BBBike data :rtype: pandas.DataFrame or None **Example**:: >>> from pydriosm.downloader import BBBikeDownloader >>> bbbike_downloader = BBBikeDownloader() >>> subregion_catalog = bbbike_downloader.get_subregion_catalogue() >>> print(subregion_catalog.head()) Name ... URL 1 Aachen ... http://download.bbbike.org/osm/bbbike/Aachen/ 2 Aarhus ... http://download.bbbike.org/osm/bbbike/Aarhus/ 3 Adelaide ... http://download.bbbike.org/osm/bbbike/Adelaide/ 4 Albuquerque ... http://download.bbbike.org/osm/bbbike/Albuquer... 5 Alexandria ... http://download.bbbike.org/osm/bbbike/Alexandria/ [5 rows x 3 columns] """ path_to_pickle = cd_dat(self.SubregionCatalogue.replace(" ", "-") + ".pickle") if os.path.isfile(path_to_pickle) and not update: subregion_catalogue = load_pickle(path_to_pickle) else: if confirmed("To collect {}?".format(self.SubregionCatalogue), confirmation_required=confirmation_required): try: bbbike_subregion_catalogue_ = pd.read_html( self.URL, header=0, parse_dates=['Last Modified']) subregion_catalogue = bbbike_subregion_catalogue_[0].drop(0).drop( ['Size', 'Type'], axis=1) subregion_catalogue.Name = subregion_catalogue.Name.map( lambda x: x.strip('/')) source = requests.get(self.URL, headers=fake_requests_headers()) table_soup = bs4.BeautifulSoup(source.text, 'lxml').find('table') urls = [urllib.parse.urljoin(self.URL, x.get('href')) for x in table_soup.find_all('a')[1:]] subregion_catalogue['URL'] = urls save_pickle(subregion_catalogue, path_to_pickle, verbose=verbose) except Exception as e: print("Failed. {}.".format(e)) subregion_catalogue = None else: if verbose: print("No data of \"{}\" is available.".format( self.SubregionCatalogue)) subregion_catalogue = None return subregion_catalogue
[docs] def get_list_of_subregion_names(self, update=False, confirmation_required=True, verbose=False): """ Get a list of names of all geographic regions. :param update: whether to check on update and proceed to update the package data, defaults to ``False`` :type update: bool :param confirmation_required: whether to prompt a message for confirmation to proceed, defaults to ``True`` :type confirmation_required: bool :param verbose: whether to print relevant information in console, defaults to ``False`` :type verbose: bool or int :return: a list of geographic region names available on BBBike's free download server :rtype: list **Example**:: >>> from pydriosm.downloader import BBBikeDownloader >>> bbbike_downloader = BBBikeDownloader() >>> sr_name_list = bbbike_downloader.get_list_of_subregion_names() >>> print(sr_name_list[:5]) ['Aachen', 'Aarhus', 'Adelaide', 'Albuquerque', 'Alexandria'] """ path_to_name_list = cd_dat(self.SubregionNameList.replace(" ", "-") + ".pickle") if os.path.isfile(path_to_name_list) and not update: subregion_name_list = load_pickle(path_to_name_list) else: if confirmed("To get {}?".format(self.SubregionNameList), confirmation_required=confirmation_required): subregion_catalogue = self.get_subregion_catalogue( update, confirmation_required=False, verbose=verbose) subregion_name_list = subregion_catalogue.Name.to_list() save_pickle(subregion_name_list, path_to_name_list, verbose=verbose) else: subregion_name_list = [] if verbose: print("No data of {} is available.".format(self.SubregionNameList)) return subregion_name_list
[docs] def validate_input_subregion_name(self, subregion_name): """ Validate an input name of a geographic region. The validation is done by matching the input ``subregion_name`` to a name of a geographic region available on BBBike's free download server. :param subregion_name: name of a geographic region (case-insensitive) :type subregion_name: str :return: valid subregion name that matches, or is the most similar to, the input ``subregion_name`` :rtype: str **Example**:: >>> from pydriosm.downloader import BBBikeDownloader >>> bbbike_downloader = BBBikeDownloader() >>> sr_name = 'leeds' >>> sr_name_ = bbbike_downloader.validate_input_subregion_name(sr_name) >>> print(sr_name_) Leeds """ assert isinstance(subregion_name, str) bbbike_subregion_names = self.get_list_of_subregion_names() subregion_name_ = find_similar_str(subregion_name, bbbike_subregion_names) return subregion_name_
[docs] def get_subregion_download_catalogue(self, subregion_name, confirmation_required=True, verbose=False): """ Get a download catalogue of OSM data available for a geographic region. :param subregion_name: name of a geographic region (case-insensitive) available on BBBike's free download server :type subregion_name: str :param confirmation_required: whether to prompt a message for confirmation to proceed, defaults to ``True`` :type confirmation_required: bool :param verbose: whether to print relevant information in console, defaults to ``False`` :type verbose: bool or int :return: a catalogues for subregion downloads :rtype: pandas.DataFrame or None **Example**:: >>> from pydriosm.downloader import BBBikeDownloader >>> bbbike_downloader = BBBikeDownloader() >>> sr_name = 'leeds' >>> leeds_dwnld_cat = bbbike_downloader.get_subregion_download_catalogue( ... subregion_name=sr_name, verbose=True) Confirmed to collect the download catalogue for Leeds? [No]|Yes: yes In progress ... Done. >>> print(leeds_dwnld_cat.head()) Filename ... LastUpdate 0 Leeds.osm.pbf ... 2020-09-25 10:04:25 1 Leeds.osm.gz ... 2020-09-25 15:11:49 2 Leeds.osm.shp.zip ... 2020-09-25 15:33:10 3 Leeds.osm.garmin-onroad-latin1.zip ... 2020-09-25 17:49:15 4 Leeds.osm.garmin-onroad.zip ... 2020-09-25 17:49:04 [5 rows x 5 columns] """ subregion_name_ = self.validate_input_subregion_name(subregion_name) if confirmed("Confirmed to collect the download catalogue for {}?".format( subregion_name_), confirmation_required=confirmation_required): try: if confirmation_required: print("In progress", end=" ... ") if verbose else "" else: print(f" {subregion_name_}", end=" ... ") if verbose else "" url = urllib.parse.urljoin(self.URL, subregion_name_ + '/') source = requests.get(url, headers=fake_requests_headers()) source_soup = bs4.BeautifulSoup(source.text, 'lxml') download_links_class = source_soup.find_all( name='a', attrs={'class': ['download_link', 'small']}) def parse_dlc(dlc): dlc_href = dlc.get('href') # URL filename = os.path.basename(dlc_href) download_url = urllib.parse.urljoin(url, dlc_href) if not dlc.has_attr('title'): file_format, file_size, last_update = 'Poly', None, None else: if len(dlc.contents) < 3: file_format, file_size = 'Txt', None else: file_format, file_size, _ = dlc.contents # File type and size file_format, file_size = file_format.strip(), file_size.text last_update = pd.to_datetime(dlc.get('title')) # Date and time parsed_dat = [ filename, download_url, file_format, file_size, last_update] return parsed_dat subregion_download_catalogue = pd.DataFrame( parse_dlc(x) for x in download_links_class) subregion_download_catalogue.columns = [ 'Filename', 'URL', 'DataType', 'Size', 'LastUpdate'] # file_path = cd_dat_bbbike( # subregion_name_, subregion_name_ + "-download-catalogue.pickle") # save_pickle(subregion_downloads_catalogue, file_path, verbose=verbose) print("Done. ") if verbose else "" except Exception as e: subregion_download_catalogue = None print("Failed. {}".format(subregion_name_, e)) if verbose else "" return subregion_download_catalogue
[docs] def get_download_index(self, update=False, confirmation_required=True, verbose=False): """ Get a dict-type index of available formats, data types and a download catalogue. :param update: whether to check on update and proceed to update the package data, defaults to ``False`` :type update: bool :param confirmation_required: whether to prompt a message for confirmation to proceed, defaults to ``True`` :type confirmation_required: bool :param verbose: whether to print relevant information in console, defaults to ``False`` :type verbose: bool or int :return: a list of available formats, a list of available data types and a dictionary of download catalogue :rtype: dict **Examples**:: >>> from pydriosm.downloader import BBBikeDownloader >>> bbbike_downloader = BBBikeDownloader() >>> dwnld_dict = bbbike_downloader.get_download_index() >>> print(list(dwnld_dict.keys())) ['FileFormat', 'DataType', 'Catalogue'] >>> print(dwnld_dict['Catalogue']['Leeds'].head()) Filename ... LastUpdate 0 Leeds.osm.pbf ... 2020-08-14 18:10:47 1 Leeds.osm.gz ... 2020-08-14 23:26:15 2 Leeds.osm.shp.zip ... 2020-08-14 23:48:29 3 Leeds.osm.garmin-onroad-latin1.zip ... 2020-08-15 01:59:13 4 Leeds.osm.garmin-onroad.zip ... 2020-08-15 01:59:02 [5 rows x 5 columns] """ path_to_pickle = cd_dat(self.DownloadDictName.replace(" ", "-") + ".pickle") if os.path.isfile(path_to_pickle) and not update: download_dictionary = load_pickle(path_to_pickle) else: if confirmed("To collect {} from BBBike's free download server?".format( self.DownloadDictName), confirmation_required=confirmation_required): try: bbbike_subregion_names = \ self.get_subregion_catalogue(verbose=verbose).Name.to_list() if verbose: print("Collecting {} ... ".format(self.DownloadDictName)) download_catalogue = [ self.get_subregion_download_catalogue(subregion_name, confirmation_required=False, verbose=verbose) for subregion_name in bbbike_subregion_names] sr_name = bbbike_subregion_names[0] sr_download_catalogue = download_catalogue[0] # Available file formats file_fmt = [re.sub('{}|CHECKSUM'.format(sr_name), '', f) for f in sr_download_catalogue.Filename] # Available data types data_typ = sr_download_catalogue.DataType.tolist() download_dictionary = { 'FileFormat': [x.replace(".osm", "", 1) for x in file_fmt[:-2]], 'DataType': data_typ[:-2], 'Catalogue': dict(zip(bbbike_subregion_names, download_catalogue))} print("Finished. ") if verbose else "" save_pickle(download_dictionary, path_to_pickle, verbose=verbose) except Exception as e: print("Failed. {}".format(e)) download_dictionary = None else: if verbose: print("No data of \"{}\" is available.".format(self.DownloadDictName)) download_dictionary = None return download_dictionary
[docs] def get_osm_file_formats(self): """ Get a list of valid OSM data file formats. :return: a list of valid BBBike OSM file formats on BBBike's free download server :rtype: list **Example**:: >>> from pydriosm.downloader import BBBikeDownloader >>> bbbike_downloader = BBBikeDownloader() >>> file_fmts = bbbike_downloader.get_osm_file_formats() >>> for file_fmt in file_fmts: ... print(file_fmt) .pbf .gz .shp.zip .garmin-onroad-latin1.zip .garmin-onroad.zip .garmin-opentopo.zip .garmin-osm.zip .geojson.xz .svg-osm.zip .mapsforge-osm.zip .navit.zip .csv.xz """ osm_file_formats = self.get_download_index()['FileFormat'] return osm_file_formats
[docs] def validate_input_file_format(self, osm_file_format): """ Validate an input file format of OSM data. The validation is done by matching the input ``osm_file_format`` to a filename extension available on BBBike's free download server. :param osm_file_format: file extension of an OSM data extract :type osm_file_format: str :return: valid file format (file extension) :rtype: str **Example**:: >>> from pydriosm.downloader import BBBikeDownloader >>> bbbike_downloader = BBBikeDownloader() >>> file_fmt = 'PBF' >>> file_fmt_ = bbbike_downloader.validate_input_file_format(file_fmt) >>> print(file_fmt_) .pbf """ assert isinstance(osm_file_format, str) bbbike_osm_file_formats = self.get_osm_file_formats() try: osm_file_format_ = find_similar_str(osm_file_format, bbbike_osm_file_formats) if osm_file_format_: return osm_file_format_ else: print("The input file format must be one of the following:" " \n \"{}\".".format("\",\n \"".join(bbbike_osm_file_formats))) except Exception as e: print(e)
[docs] def get_subregion_download_url(self, subregion_name, osm_file_format): """ Get a valid URL for downloading OSM data of a specific file format for a geographic region. :param subregion_name: name of a geographic region (case-insensitive) available on BBBike's free download server :type subregion_name: str :param osm_file_format: format (file extension) of an OSM data :type osm_file_format: str :return: a valid name of ``subregion_name`` and a download URL for the given ``osm_file_format`` :rtype: tuple **Examples**:: >>> from pydriosm.downloader import BBBikeDownloader >>> bbbike_downloader = BBBikeDownloader() >>> sr_name = 'leeds' >>> file_fmt = 'pbf' >>> sr_name_, sr_url = bbbike_downloader.get_subregion_download_url( ... sr_name, file_fmt) >>> print(sr_name_) Leeds >>> print(sr_url) http://download.bbbike.org/osm/bbbike/Leeds/Leeds.osm.pbf >>> file_fmt = 'csv.xz' >>> sr_name_, sr_url = bbbike_downloader.get_subregion_download_url( ... sr_name, file_fmt) >>> print(sr_name_) Leeds >>> print(sr_url) http://download.bbbike.org/osm/bbbike/Leeds/Leeds.osm.csv.xz """ subregion_name_ = self.validate_input_subregion_name(subregion_name) osm_file_format_ = ".osm" + self.validate_input_file_format(osm_file_format) bbbike_download_dictionary = self.get_download_index()['Catalogue'] sub_download_catalogue = bbbike_download_dictionary[subregion_name_] tmp = subregion_name_ + osm_file_format_ url = sub_download_catalogue[sub_download_catalogue.Filename == tmp].URL.iloc[0] return subregion_name_, url
[docs] def get_valid_download_info(self, subregion_name, osm_file_format, download_dir=None): """ Get information of downloading (or downloaded) data file. The information includes a valid subregion name, a default filename, a URL and an absolute path where the data file is (to be) saved locally. :param subregion_name: name of a geographic region (case-insensitive) :type subregion_name: str :param osm_file_format: format (file extension) of an OSM data :type osm_file_format: str :param download_dir: directory where downloaded OSM file is saved; if ``None`` (default), package data directory :type download_dir: str or None :return: valid subregion name, filename, download url and absolute file path :rtype: tuple **Examples**:: >>> import os >>> from pydriosm.downloader import BBBikeDownloader >>> bbbike_downloader = BBBikeDownloader() >>> sr_name = 'leeds' >>> file_fmt = 'pbf' >>> info = bbbike_downloader.get_valid_download_info(sr_name, file_fmt) >>> sr_name_, pbf_filename, dwnld_url, path_to_pbf = info >>> print(sr_name_) Leeds >>> print(pbf_filename) Leeds.osm.pbf >>> print(dwnld_url) http://download.bbbike.org/osm/bbbike/Leeds/Leeds.osm.pbf >>> print(os.path.relpath(path_to_pbf)) dat_BBBike\\Leeds\\Leeds.osm.pbf """ subregion_name_, download_url = self.get_subregion_download_url( subregion_name, osm_file_format) osm_filename = os.path.basename(download_url) if download_dir: path_to_file = cd(validate_input_data_dir(download_dir), osm_filename, mkdir=True) else: # default directory of package data path_to_file = cd_dat_bbbike(subregion_name_, osm_filename, mkdir=True) return subregion_name_, osm_filename, download_url, path_to_file
[docs] def download_osm_data(self, subregion_names, osm_file_format, download_dir=None, update=False, confirmation_required=True, interval_sec=1, verbose=False, ret_download_path=False): """ Download OSM data (in a specific file format) of one (or multiple) geographic region(s). :param subregion_names: name(s) of one (or multiple) geographic region(s) available on BBBike's free download server :type subregion_names: str or list :param osm_file_format: format (file extension) of an OSM data :type osm_file_format: str :param download_dir: directory where downloaded OSM file is saved; if ``None`` (default), package data directory :type download_dir: str or None :param update: whether to check on update and proceed to update the package data, defaults to ``False`` :type update: bool :param confirmation_required: whether to prompt a message for confirmation to proceed, defaults to ``True`` :type confirmation_required: bool :param interval_sec: interval (in sec) between downloading two subregions, defaults to ``1`` :type interval_sec: int :param verbose: whether to print relevant information in console, defaults to ``False`` :type verbose: bool or int :param ret_download_path: whether to return the path(s) to the downloaded file(s), defaults to ``False`` :type ret_download_path: bool :return: the path(s) to the downloaded file(s) when ``ret_download_path=True`` :rtype: list or str **Examples**:: >>> import os >>> from pyhelpers.dir import delete_dir >>> from pydriosm.downloader import BBBikeDownloader, cd_dat_bbbike >>> bbbike_downloader = BBBikeDownloader() >>> sr_names = 'London' >>> file_fmt = 'pbf' >>> bbbike_downloader.download_osm_data(sr_names, file_fmt, verbose=True) Confirmed to download .pbf data of the following geographic region(s): London ? [No]|Yes: yes Downloading "London.osm.pbf" to "\\dat_BBBike\\London" ... Done. >>> # Delete the directory generated above >>> delete_dir(cd_dat_bbbike(), verbose=True) The directory "\\dat_BBBike" is not empty. Confirmed to delete it? [No]|Yes: yes Deleting "\\dat_BBBike" ... Done. >>> sr_names = ['leeds', 'birmingham'] >>> dwnld_dir = "tests" >>> dwnld_paths = bbbike_downloader.download_osm_data(sr_names, file_fmt, ... dwnld_dir, verbose=True, ... ret_download_path=True) Confirmed to download .pbf data of the following geographic region(s): Leeds Birmingham ? [No]|Yes: yes Downloading "Leeds.osm.pbf" to "\\tests" ... Done. Downloading "Birmingham.osm.pbf" to "\\tests" ... Done. >>> for dwnld_path in dwnld_paths: ... print(os.path.relpath(dwnld_path)) tests\\Leeds.osm.pbf tests\\Birmingham.osm.pbf >>> # Delete the above downloaded data files >>> for dwnld_path in dwnld_paths: ... os.remove(dwnld_path) """ subregion_names_ = [subregion_names] if isinstance(subregion_names, str) \ else subregion_names.copy() subregion_names_ = [ self.validate_input_subregion_name(x) for x in subregion_names_] osm_file_format_ = self.validate_input_file_format(osm_file_format) download_path = [] if confirmed("Confirmed to download {} data of the following geographic region(s):" "\n\t{}\n?".format(osm_file_format_, "\n\t".join(subregion_names_)), confirmation_required=confirmation_required): for sub_reg_name in subregion_names_: subregion_name_, osm_filename, download_url, path_to_file = \ self.get_valid_download_info(sub_reg_name, osm_file_format_, download_dir) if os.path.isfile(path_to_file) and not update: if verbose: print( "The {} data of {} is already available at \"\\{}\".".format( osm_file_format_, subregion_name_, os.path.relpath(path_to_file))) download_path.append(path_to_file) else: try: if verbose: print("{} \"{}\" to \"\\{}\" ... ".format( "Updating" if os.path.isfile(path_to_file) else "Downloading", osm_filename, os.path.relpath(os.path.dirname(path_to_file)))) download_file_from_url(download_url, path_to_file) print("Done. ") if verbose else "" download_path.append(path_to_file) if os.path.getsize(path_to_file) / (1024 ** 2) <= 5: time.sleep(interval_sec) except Exception as e: print("Failed. {}.".format(e)) if ret_download_path: if len(download_path) == 1: download_path = download_path[0] return download_path
[docs] def download_subregion_data(self, subregion_name, download_dir=None, update=False, confirmation_required=True, verbose=False, ret_download_path=False): """ Download OSM data of all available formats for a geographic region. :param subregion_name: name of a geographic region (case-insensitive) available on BBBike's free download server :type subregion_name: str :param download_dir: directory where the downloaded file is saved, defaults to ``None`` :type download_dir: str or None :param update: whether to check on update and proceed to update the package data, defaults to ``False`` :type update: bool :param confirmation_required: whether to prompt a message for confirmation to proceed, defaults to ``True`` :type confirmation_required: bool :param verbose: whether to print relevant information in console, defaults to ``False`` :type verbose: bool or int :param ret_download_path: whether to return the path(s) to the downloaded file(s), defaults to ``False`` :type ret_download_path: bool :return: the path(s) to the downloaded file(s) when ``ret_download_path=True`` :rtype: list or str **Example**:: >>> import os >>> from pyhelpers.dir import delete_dir >>> from pydriosm.downloader import BBBikeDownloader, cd_dat_bbbike >>> bbbike_downloader = BBBikeDownloader() >>> sr_name = 'london' >>> bbbike_downloader.download_subregion_data(sr_name, verbose=True) Confirmed to download all available BBBike OSM data of London? [No]|Yes: yes Downloading in progress ... London.osm.pbf ... London.osm.gz ... London.osm.shp.zip ... London.osm.garmin-onroad-latin1.zip ... London.osm.garmin-onroad.zip ... London.osm.garmin-opentopo.zip ... London.osm.garmin-osm.zip ... London.osm.geojson.xz ... London.osm.svg-osm.zip ... London.osm.mapsforge-osm.zip ... London.osm.navit.zip ... London.osm.csv.xz ... London.poly ... CHECKSUM.txt ... Done. Check out the downloaded OSM data at "\\dat_BBBike\\London". >>> # Delete the download directory generated above >>> delete_dir(cd_dat_bbbike(), verbose=True) The directory "\\dat_BBBike" is not empty. Confirmed to delete it? [No]|Yes: yes Deleting "\\dat_BBBike" ... Done. >>> sr_name = 'leeds' >>> dwnld_dir = "tests" >>> dwnld_paths = bbbike_downloader.download_subregion_data( ... sr_name, dwnld_dir, confirmation_required=False, verbose=True, ... ret_download_path=True) Downloading all available BBBike OSM data of Leeds ... Leeds.osm.pbf ... Leeds.osm.gz ... Leeds.osm.shp.zip ... Leeds.osm.garmin-onroad-latin1.zip ... Leeds.osm.garmin-onroad.zip ... Leeds.osm.garmin-opentopo.zip ... Leeds.osm.garmin-osm.zip ... Leeds.osm.geojson.xz ... Leeds.osm.svg-osm.zip ... Leeds.osm.mapsforge-osm.zip ... Leeds.osm.navit.zip ... Leeds.osm.csv.xz ... Leeds.poly ... CHECKSUM.txt ... Done. Check out the downloaded OSM data at "\\tests\\Leeds". >>> for dwnld_path in dwnld_paths: ... print(os.path.relpath(dwnld_path)) tests\\Leeds\\Leeds.osm.pbf tests\\Leeds\\Leeds.osm.gz tests\\Leeds\\Leeds.osm.shp.zip tests\\Leeds\\Leeds.osm.garmin-onroad-latin1.zip tests\\Leeds\\Leeds.osm.garmin-onroad.zip tests\\Leeds\\Leeds.osm.garmin-opentopo.zip tests\\Leeds\\Leeds.osm.garmin-osm.zip tests\\Leeds\\Leeds.osm.geojson.xz tests\\Leeds\\Leeds.osm.svg-osm.zip tests\\Leeds\\Leeds.osm.mapsforge-osm.zip tests\\Leeds\\Leeds.osm.navit.zip tests\\Leeds\\Leeds.osm.csv.xz tests\\Leeds\\Leeds.poly tests\\Leeds\\CHECKSUM.txt >>> # Delete the download directory generated above >>> delete_dir(os.path.dirname(dwnld_paths[0]), confirmation_required=False) """ subregion_name_ = self.validate_input_subregion_name(subregion_name) bbbike_download_dictionary = self.get_download_index()['Catalogue'] sub_download_catalogue = bbbike_download_dictionary[subregion_name_] data_dir = validate_input_data_dir(download_dir) if download_dir \ else cd_dat_bbbike(subregion_name_, mkdir=True) if confirmed("Confirmed to download all available BBBike OSM data of {}?".format( subregion_name_), confirmation_required=confirmation_required): if verbose: if confirmation_required: print("Downloading in progress ... ") else: print("Downloading all available BBBike OSM data of {} ... ".format( subregion_name_)) download_paths = [] for download_url, osm_filename in zip(sub_download_catalogue.URL, sub_download_catalogue.Filename): try: path_to_file = os.path.join( data_dir, "" if not download_dir else subregion_name_, osm_filename) if os.path.isfile(path_to_file) and not update: if verbose: print("\t\"{}\" is already available.".format( os.path.basename(path_to_file))) else: print("\t{} ... ".format(osm_filename)) if verbose else "" download_file_from_url(download_url, path_to_file) # if os.path.getsize(path_to_file) / (1024 ** 2) <= 5: # time.sleep(5) download_paths.append(path_to_file) except Exception as e: print("Failed. {}.".format(e)) if verbose and download_paths: print("Done. Check out the downloaded OSM data at \"\\{}\".".format( os.path.relpath(os.path.dirname(download_paths[0])))) if ret_download_path: return download_paths