"""
Reading `OSM <https://www.openstreetmap.org/>`_ data extracts.
"""
import collections
import gc
import glob
import itertools
import lzma
import zipfile
import rapidjson
from pyhelpers.ops import split_list
from .downloader import *
from .settings import gdal_configurations
from .utils import *
[docs]def get_osm_pbf_layer_names(path_to_osm_pbf):
"""
Get names of all layers in a PBF data file.
:param path_to_osm_pbf: absolute path to a PBF data file
:type path_to_osm_pbf: str
:return: name (and index) of each layer of the PBF data file
:rtype: dict
**Example**::
>>> import os
>>> from pydriosm.reader import GeofabrikDownloader, get_osm_pbf_layer_names
>>> geofabrik_downloader = GeofabrikDownloader()
>>> sr_name = 'Rutland'
>>> file_fmt = ".pbf"
>>> dwnld_dir = "tests"
>>> path_to_rutland_pbf = geofabrik_downloader.download_osm_data(
... sr_name, file_fmt, dwnld_dir, verbose=True, ret_download_path=True)
Confirmed to download .osm.pbf data of the following geographic region(s):
Rutland
? [No]|Yes: yes
Downloading "rutland-latest.osm.pbf" to "\\tests" ...
Done.
>>> lyr_idx_names = get_osm_pbf_layer_names(path_to_rutland_pbf)
>>> for k, v in lyr_idx_names.items(): print(f'{k}: {v}')
0: points
1: lines
2: multilinestrings
3: multipolygons
4: other_relations
>>> # Delete the downloaded PBF data file
>>> os.remove(path_to_rutland_pbf)
"""
try:
import ogr
# Start parsing the '.osm.pbf' file
osm_pbf = ogr.Open(path_to_osm_pbf)
# Find out the available layers in the file
layer_count, layer_names = osm_pbf.GetLayerCount(), []
# Loop through all available layers
for i in range(layer_count):
lyr = osm_pbf.GetLayerByIndex(i) # Hold the i-th layer
layer_names.append(lyr.GetName()) # Get the name of the i-th layer
layer_idx_names = dict(zip(range(layer_count), layer_names))
return layer_idx_names
except Exception as e:
print("Failed to get layer names of \"{}\". {}.".format(path_to_osm_pbf, e))
[docs]def parse_osm_pbf_layer(pbf_layer_data, geo_typ, transform_geom, transform_other_tags):
"""
Parse data of a layer of PBF data.
:param pbf_layer_data: data of a specific layer of PBF data.
:type pbf_layer_data: pandas.DataFrame
:param geo_typ: geometric type
:type geo_typ: str
:param transform_geom: whether to transform a single coordinate
(or a collection of coordinates) into a geometric object
:type transform_geom: bool
:param transform_other_tags: whether to transform a ``'other_tags'`` into a dictionary
:type transform_other_tags: bool
:return: parsed data of the ``geo_typ`` layer of a given .pbf file
:rtype: pandas.DataFrame
.. _`shapely.geometry`:
https://shapely.readthedocs.io/en/latest/manual.html#geometric-objects
See the examples for the function
:py:func:`parse_osm_pbf()<pydriosm.reader.parse_osm_pbf>`.
"""
def make_point_as_polygon(mp_coords):
mp_coords, temp = mp_coords.copy(), mp_coords[0][0].copy()
if len(temp) == 2 and temp[0] == temp[1]:
mp_coords[0][0] += [temp[0]]
return mp_coords
def transform_single_geometry_(geom_data):
"""
Transform a single coordinate into a geometric object
by using `shapely.geometry_`.
"""
geom_types_funcs = get_osm_geom_object_dict()
pbf_layer_feat_types = get_pbf_layer_feat_types_dict()
geom_type = pbf_layer_feat_types[geo_typ]
geom_type_func = geom_types_funcs[geom_type]
if geom_type == 'MultiPolygon':
sub_geom_type_func = geom_types_funcs['Polygon']
geom_coords = geom_data.coordinates.map(
lambda x: geom_type_func(
sub_geom_type_func(y) for ls in make_point_as_polygon(x) for y in ls))
else:
geom_coords = geom_data.coordinates.map(lambda x: geom_type_func(x))
return geom_coords
def transform_multi_geometries_(geom_collection):
"""
Transform a collection of coordinates into a geometric object formatted by
`shapely.geometry
<https://shapely.readthedocs.io/en/latest/manual.html#geometric-objects>`_.
"""
import shapely.geometry
geom_obj_funcs = get_osm_geom_object_dict()
geom_types = [g['type'] for g in geom_collection]
coordinates = [gs['coordinates'] for gs in geom_collection]
geometry_collection = [
geom_obj_funcs[geom_type](coords) if 'Polygon' not in geom_type
else geom_obj_funcs[geom_type](pt for pts in coords for pt in pts)
for geom_type, coords in zip(geom_types, coordinates)]
geom_collection_ = shapely.geometry.GeometryCollection(geometry_collection)
return geom_collection_
def transform_other_tags_(other_tags):
"""
Transform a ``'other_tags'`` into a dictionary.
:param other_tags: data of a single record in the ``'other_tags'`` feature
:type other_tags: str or None
:return: parsed data of the ``'other_tags'`` record
:rtype: dict or None
"""
if other_tags:
raw_other_tags = (re.sub('^"|"$', '', each_tag)
for each_tag in re.split('(?<="),(?=")', other_tags))
other_tags_ = {
k: v.replace('<br>', ' ') for k, v in
(re.split('"=>"?', each_tag)
for each_tag in filter(None, raw_other_tags))}
else: # e.g. other_tags_x is None
other_tags_ = other_tags
return other_tags_
if not pbf_layer_data.empty:
# Start parsing 'geometry' column
dat_geometry = pd.DataFrame(
x for x in pbf_layer_data.geometry).rename(columns={'type': 'geom_type'})
if geo_typ != 'other_relations':
# `geo_type` can be 'points', 'lines', 'multilinestrings' or 'multipolygons'
if transform_geom:
dat_geometry.coordinates = transform_single_geometry_(dat_geometry)
else: # geo_typ == 'other_relations'
if transform_geom:
dat_geometry.geometries = \
dat_geometry.geometries.map(transform_multi_geometries_)
dat_geometry.rename(columns={'geometries': 'coordinates'}, inplace=True)
# Start parsing 'properties' column
dat_properties = pd.DataFrame(x for x in pbf_layer_data.properties)
if transform_other_tags:
dat_properties.other_tags = dat_properties.other_tags.map(
transform_other_tags_)
parsed_layer_data = pbf_layer_data[['id']].join(dat_geometry).join(dat_properties)
parsed_layer_data.drop(['geom_type'], axis=1, inplace=True)
del dat_geometry, dat_properties
gc.collect()
else:
parsed_layer_data = pbf_layer_data
if 'id' in parsed_layer_data.columns:
parsed_layer_data.sort_values('id', inplace=True)
parsed_layer_data.index = range(len(parsed_layer_data))
return parsed_layer_data
[docs]def parse_osm_pbf(path_to_osm_pbf, number_of_chunks, parse_raw_feat, transform_geom,
transform_other_tags, max_tmpfile_size=None):
"""
Parse a PBF data file.
:param path_to_osm_pbf: absolute path to a PBF data file
:type path_to_osm_pbf: str
:param number_of_chunks: number of chunks
:type number_of_chunks: int or None
:param parse_raw_feat: whether to parse each feature in the raw data
:type parse_raw_feat: bool
:param transform_geom: whether to transform a single coordinate
(or a collection of coordinates) into a geometric object
:param transform_other_tags: whether to transform a ``'other_tags'`` into a dictionary
:type transform_other_tags: bool
:param max_tmpfile_size: defaults to ``None``,
see also :py:func:`pydriosm.settings.gdal_configurations`
:type max_tmpfile_size: int or None
:return: parsed OSM PBF data
:rtype: dict
.. _pydriosm-reader-parse_osm_pbf:
.. note::
This function can require fairly high amount of physical memory to read
large files e.g. > 200MB
The driver categorises features into 5 layers:
- **0: 'points'** - "node" features having significant tags attached
- **1: 'lines'** - "way" features being recognized as non-area
- **2: 'multilinestrings'** -
"relation" features forming a multilinestring
(type='multilinestring' / type='route')
- **3: 'multipolygons'** - "relation" features forming a multipolygon
(type='multipolygon' / type='boundary'),
and "way" features being recognized as area
- **4: 'other_relations'** - "relation" features
not belonging to the above 2 layers
See also [`POP-1 <https://gdal.org/drivers/vector/osm.html>`_].
**Example**::
>>> import os
>>> from pydriosm.reader import GeofabrikDownloader, parse_osm_pbf
>>> geofabrik_downloader = GeofabrikDownloader()
>>> sr_name = 'Rutland'
>>> file_fmt = ".pbf"
>>> dwnld_dir = "tests"
>>> path_to_rutland_pbf = geofabrik_downloader.download_osm_data(
... sr_name, file_fmt, dwnld_dir, verbose=True, ret_download_path=True)
Confirmed to download .osm.pbf data of the following geographic region(s):
Rutland
? [No]|Yes: yes
Downloading "rutland-latest.osm.pbf" to "\\tests" ...
Done.
>>> rutland_pbf_raw = parse_osm_pbf(path_to_rutland_pbf, number_of_chunks=50,
... parse_raw_feat=False, transform_geom=False,
... transform_other_tags=False)
>>> print(list(rutland_pbf_raw.keys()))
['points', 'lines', 'multilinestrings', 'multipolygons', 'other_relations']
>>> rutland_pbf_raw_points = rutland_pbf_raw['points']
>>> print(rutland_pbf_raw_points.head())
points
0 {"type": "Feature", "geometry": {"type": "Poin...
1 {"type": "Feature", "geometry": {"type": "Poin...
2 {"type": "Feature", "geometry": {"type": "Poin...
3 {"type": "Feature", "geometry": {"type": "Poin...
4 {"type": "Feature", "geometry": {"type": "Poin...
>>> rutland_pbf_parsed = parse_osm_pbf(path_to_rutland_pbf, number_of_chunks=50,
... parse_raw_feat=True, transform_geom=False,
... transform_other_tags=False)
>>> rutland_pbf_parsed_points = rutland_pbf_parsed['points']
>>> print(rutland_pbf_parsed_points.head())
id coordinates ... man_made other_tags
0 488432 [-0.5134241, 52.6555853] ... None "odbl"=>"clean"
1 488658 [-0.5313354, 52.6737716] ... None None
2 13883868 [-0.7229332, 52.5889864] ... None None
3 14049101 [-0.7249922, 52.6748223] ... None "traffic_calming"=>"cushion"
4 14558402 [-0.7266686, 52.6695051] ... None "direction"=>"clockwise"
[5 rows x 12 columns]
>>> rutland_pbf_parsed_1 = parse_osm_pbf(path_to_rutland_pbf, number_of_chunks=50,
... parse_raw_feat=True, transform_geom=True,
... transform_other_tags=False)
>>> rutland_pbf_parsed_points_1 = rutland_pbf_parsed_1['points']
>>> print(rutland_pbf_parsed_points_1[['coordinates']].head())
coordinates
0 POINT (-0.5134241 52.6555853)
1 POINT (-0.5313354 52.6737716)
2 POINT (-0.7229332000000001 52.5889864)
3 POINT (-0.7249922 52.6748223)
4 POINT (-0.7266686 52.6695051)
>>> rutland_pbf_parsed_2 = parse_osm_pbf(path_to_rutland_pbf, number_of_chunks=50,
... parse_raw_feat=True, transform_geom=True,
... transform_other_tags=True)
>>> rutland_pbf_parsed_points_2 = rutland_pbf_parsed_2['points']
>>> print(rutland_pbf_parsed_points_2[['coordinates', 'other_tags']].head())
coordinates other_tags
0 POINT (-0.5134241 52.6555853) {'odbl': 'clean'}
1 POINT (-0.5313354 52.6737716) None
2 POINT (-0.7229332000000001 52.5889864) None
3 POINT (-0.7249922 52.6748223) {'traffic_calming': 'cushion'}
4 POINT (-0.7266686 52.6695051) {'direction': 'clockwise'}
>>> # Delete the downloaded PBF data file
>>> os.remove(path_to_rutland_pbf)
.. seealso::
The examples for the method :py:meth:`GeofabrikReader.read_osm_pbf()
<pydriosm.reader.GeofabrikReader.read_osm_pbf>`.
"""
parse_raw_feat_ = True if transform_geom or transform_other_tags \
else copy.copy(parse_raw_feat)
import ogr
if max_tmpfile_size:
gdal_configurations(max_tmpfile_size=max_tmpfile_size)
raw_osm_pbf = ogr.Open(path_to_osm_pbf)
layer_names, all_layer_data = [], []
# Parse the data feature by feature
layer_count = raw_osm_pbf.GetLayerCount()
# Loop through all available layers
for i in range(layer_count):
# Get the data and name of the i-th layer
layer_dat = raw_osm_pbf.GetLayerByIndex(i)
layer_name = layer_dat.GetName()
layer_names.append(layer_name)
if number_of_chunks:
features = [feature for _, feature in enumerate(layer_dat)]
# number_of_chunks = file_size_in_mb / chunk_size_limit
# chunk_size = len(features) / number_of_chunks
feats = split_list(lst=features, num_of_sub=number_of_chunks)
del features
gc.collect()
all_lyr_dat = []
for feat in feats:
if parse_raw_feat_:
lyr_dat_ = pd.DataFrame(f.ExportToJson(as_object=True) for f in feat)
lyr_dat = parse_osm_pbf_layer(
lyr_dat_, geo_typ=layer_name, transform_geom=transform_geom,
transform_other_tags=transform_other_tags)
del lyr_dat_
gc.collect()
else:
lyr_dat = pd.DataFrame(f.ExportToJson() for f in feat)
lyr_dat.columns = [layer_name]
all_lyr_dat.append(lyr_dat)
del feat, lyr_dat
gc.collect()
layer_data = pd.concat(all_lyr_dat, ignore_index=True, sort=False)
else:
if parse_raw_feat_:
layer_data_ = pd.DataFrame(feature.ExportToJson(as_object=True)
for _, feature in enumerate(layer_dat))
layer_data = parse_osm_pbf_layer(
layer_data_, geo_typ=layer_name, transform_geom=transform_geom,
transform_other_tags=transform_other_tags)
del layer_data_
gc.collect()
else:
layer_data = pd.DataFrame(
feature.ExportToJson() for _, feature in enumerate(layer_dat))
layer_data.columns = [layer_name]
all_layer_data.append(layer_data)
del layer_data
gc.collect()
# Make a dictionary in a dictionary form: {Layer name: Layer data}
osm_pbf_data = dict(zip(layer_names, all_layer_data))
return osm_pbf_data
[docs]def unzip_shp_zip(path_to_shp_zip, path_to_extract_dir=None, layer_names=None,
mode='r', clustered=False, verbose=False, ret_extract_dir=False):
"""
Unzip a .shp.zip file data.
:param path_to_shp_zip: absolute path to a zipped shapefile data (.shp.zip)
:type path_to_shp_zip: str
:param path_to_extract_dir: absolute path to a directory where extracted files will
be saved; if ``None`` (default), use the same directory where the .shp.zip file is
:type path_to_extract_dir: str or None
:param layer_names: name of a .shp layer, e.g. 'railways',
or names of multiple layers;
if ``None`` (default), all available layers
:type layer_names: str or list or None
:param mode: the ``mode`` parameter of `zipfile.ZipFile()`_, defaults to ``'r'``
:type mode: str
:param clustered: whether to put the data files of different layer
in respective folders, defaults to ``False``
:type clustered: bool
:param verbose: whether to print relevant information in console as the function runs,
defaults to ``False``
:type verbose: bool or int
:param ret_extract_dir: whether to return the path to the directory
where extracted files are saved, defaults to ``False``
:type ret_extract_dir: bool
:return: the path to the directory of extracted files when ``ret_extract_dir=True``
:rtype: str
.. _`zipfile.ZipFile()`:
https://docs.python.org/3/library/zipfile.html#zipfile-objects
**Examples**::
>>> import os
>>> from pyhelpers.dir import cd, delete_dir
>>> from pydriosm.reader import GeofabrikDownloader, unzip_shp_zip
>>> geofabrik_downloader = GeofabrikDownloader()
>>> sr_name = 'Rutland'
>>> file_fmt = ".shp"
>>> dwnld_dir = "tests"
>>> path_to_rutland_shp_zip = geofabrik_downloader.download_osm_data(
... sr_name, file_fmt, dwnld_dir, ret_download_path=True)
Confirmed to download .shp.zip data of the following geographic region(s):
Rutland
? [No]|Yes: yes
>>> layer_name = 'railways'
>>> unzip_shp_zip(path_to_rutland_shp_zip, layer_names=layer_name, verbose=True)
Extracting from "rutland-latest-free.shp.zip" the following layer(s):
'railways'
to "\\tests\\rutland-latest-free-shp" ...
In progress ... Done.
>>> path_to_rutland_shp_dir = unzip_shp_zip(path_to_rutland_shp_zip, verbose=True,
... ret_extract_dir=True)
Extracting all of "rutland-latest-free.shp.zip" to "\\tests\\rutland-latest-free-shp"
In progress ... Done.
>>> print(os.path.relpath(path_to_rutland_shp_dir))
tests\\rutland-latest-free-shp
>>> lyr_names = ['railways', 'transport', 'traffic']
>>> paths_to_layer_dirs = unzip_shp_zip(path_to_rutland_shp_zip,
... layer_names=lyr_names, clustered=True,
... verbose=2, ret_extract_dir=True)
Extracting from "rutland-latest-free.shp.zip" the following layer(s):
'railways'
'transport'
'traffic'
to "\\tests\\rutland-latest-free-shp" ...
In progress ... Done.
Clustering the layer data ...
railways ...
transport ...
traffic ...
traffic_a ...
transport_a ...
Done.
>>> for path_to_lyr_dir in paths_to_layer_dirs:
... print(os.path.relpath(path_to_lyr_dir))
tests\\rutland-latest-free-shp\\railways
tests\\rutland-latest-free-shp\\transport
tests\\rutland-latest-free-shp\\traffic
>>> # Delete the extracted files
>>> delete_dir(os.path.dirname(path_to_lyr_dir), verbose=True)
The directory "\\tests\\rutland-latest-free-shp" is not empty.
Confirmed to delete it? [No]|Yes: yes
Deleting "\\tests\\rutland-latest-free-shp" ... Done.
>>> # Delete the downloaded .shp.zip data file
>>> os.remove(path_to_rutland_shp_zip)
"""
extract_dir = path_to_extract_dir if path_to_extract_dir \
else os.path.splitext(path_to_shp_zip)[0].replace(".", "-")
if not layer_names:
layer_names_ = layer_names
if verbose:
print("Extracting all of \"{}\" to \"\\{}\" ... ".format(
os.path.basename(path_to_shp_zip), os.path.relpath(extract_dir)))
else:
layer_names_ = [layer_names] if isinstance(layer_names, str) \
else layer_names.copy()
if verbose:
print("Extracting from \"{}\" the following layer(s):".format(
os.path.basename(path_to_shp_zip)))
print("\t{}".format("\n\t".join([f"'{x}'" for x in layer_names_])))
print("to \"\\{}\" ... ".format(os.path.relpath(extract_dir)))
print("In progress", end=" ... ") if verbose else ""
try:
with zipfile.ZipFile(path_to_shp_zip, mode) as shp_zip:
if layer_names_:
extract_files = [f.filename for f in shp_zip.filelist
if any(x in f.filename for x in layer_names_)]
else:
extract_files = None
shp_zip.extractall(extract_dir, members=extract_files)
shp_zip.close()
if isinstance(extract_files, list) and len(extract_files) == 0:
if verbose:
print("The specified layer does not exist.\nNo data has been extracted. ")
else:
print("Done. ") if verbose else ""
if clustered:
print("Clustering the layer data ... ") if verbose else ""
file_list = extract_files if extract_files else os.listdir(extract_dir)
if 'README' in file_list:
file_list.remove('README')
filenames_ = [os.path.splitext(x)[0] for x in file_list]
exts_ = [os.path.splitext(x)[1] for x in file_list]
filenames, exts = list(set(filenames_)), list(set(exts_))
layer_names_ = [find_shp_layer_name(f) for f in filenames]
extract_dirs = []
for lyr, fn in zip(layer_names_, filenames):
extract_dir_ = cd(extract_dir, lyr)
if verbose == 2:
print("\t{} ... ".format(lyr if '_a_' not in fn else lyr + '_a'))
for ext in exts:
filename = fn + ext
orig = cd(extract_dir, filename, mkdir=True)
dest = cd(extract_dir_, filename, mkdir=True)
shutil.copyfile(orig, dest)
os.remove(orig)
extract_dirs.append(extract_dir_)
extract_dir = list(set(extract_dirs))
print("Done. ") if verbose == 2 else ""
except Exception as e:
print("Failed. {}".format(e)) if verbose else ""
if ret_extract_dir:
return extract_dir
[docs]def read_shp_file(path_to_shp, method='geopandas', **kwargs):
"""
Parse a shapefile.
:param path_to_shp: absolute path to a .shp data file
:type: str
:param method: the method used to read the .shp file;
if ``'geopandas'`` (default), use the `geopandas.read_file()`_ method,
for otherwise use `shapefile.Reader()`_
:type method: str
:param kwargs: optional parameters of `geopandas.read_file()`_
:return: data frame of the .shp data
:rtype: pandas.DataFrame or geopandas.GeoDataFrame
.. _`geopandas.read_file()`: https://geopandas.org/reference/geopandas.read_file.html
.. _`shapefile.Reader()`: https://github.com/GeospatialPython/pyshp#reading-shapefiles
**Examples**::
>>> from pyhelpers.dir import cd, delete_dir
>>> from pydriosm.reader import GeofabrikDownloader, unzip_shp_zip, read_shp_file
>>> geofabrik_downloader = GeofabrikDownloader()
>>> sr_name = 'Rutland'
>>> file_fmt = ".shp"
>>> dwnld_dir = "tests"
>>> path_to_rutland_shp_zip = geofabrik_downloader.download_osm_data(
... sr_name, file_fmt, dwnld_dir, ret_download_path=True)
Confirmed to download .shp.zip data of the following geographic region(s):
Rutland
? [No]|Yes: yes
>>> path_to_rutland_shp_dir = unzip_shp_zip(path_to_rutland_shp_zip,
... ret_extract_dir=True)
>>> railways_shp_filename = "gis_osm_railways_free_1.shp"
>>> path_to_rutland_railways_shp = cd(
... path_to_rutland_shp_dir, railways_shp_filename)
>>> rutland_railways_shp = read_shp_file(path_to_rutland_railways_shp,
... method='gpd')
>>> print(rutland_railways_shp.head())
osm_id code ... tunnel geometry
0 2162114 6101 ... F LINESTRING (-0.45281 52.69934, -0.45189 52.698...
1 3681043 6101 ... F LINESTRING (-0.65312 52.57308, -0.65318 52.572...
2 3693985 6101 ... F LINESTRING (-0.73234 52.67821, -0.73191 52.678...
3 3693986 6101 ... F LINESTRING (-0.61731 52.61323, -0.62419 52.614...
4 4806329 6101 ... F LINESTRING (-0.45769 52.70352, -0.45654 52.702...
[5 rows x 8 columns]
>>> rutland_railways_shp_ = read_shp_file(path_to_rutland_railways_shp,
... method='pyshp')
>>> print(rutland_railways_shp_.head())
osm_id code ... coords shape_type
0 2162114 6101 ... [(-0.4528083, 52.6993402), (-0.4518933, 52.698... 3
1 3681043 6101 ... [(-0.6531215, 52.5730787), (-0.6531793, 52.572... 3
2 3693985 6101 ... [(-0.7323403, 52.6782102), (-0.7319059, 52.678... 3
3 3693986 6101 ... [(-0.6173072, 52.6132317), (-0.6241869, 52.614... 3
4 4806329 6101 ... [(-0.4576926, 52.7035194), (-0.4565358, 52.702... 3
[5 rows x 9 columns]
>>> delete_dir(path_to_rutland_shp_dir, verbose=True)
The directory "\\tests\\rutland-latest-free-shp" is not empty.
Confirmed to delete it? [No]|Yes: yes
Deleting "\\tests\\rutland-latest-free-shp" ... Done.
>>> # Delete the downloaded shapefile
>>> os.remove(path_to_rutland_shp_zip)
"""
if method in ('geopandas', 'gpd'): # default
import geopandas as gpd
shp_data = gpd.read_file(path_to_shp, **kwargs)
else:
import shapefile
# Read .shp file using shapefile.Reader()
shp_reader = shapefile.Reader(path_to_shp)
# Transform the data to a DataFrame
filed_names = [field[0] for field in shp_reader.fields[1:]]
shp_data = pd.DataFrame(shp_reader.records(), columns=filed_names)
# Clean data
# shp_data['name'] = shp_data.name.str.encode('utf-8').str.decode('utf-8')
shape_info = pd.DataFrame(
((s.points, s.shapeType) for s in shp_reader.iterShapes()),
index=shp_data.index, columns=['coords', 'shape_type'])
shp_data = shp_data.join(shape_info)
shp_reader.close()
return shp_data
[docs]def get_default_shp_crs():
"""
Get default `CRS <https://en.wikipedia.org/wiki/Spatial_reference_system>`_
for saving shapefile format data.
:return: default settings of CRS
:rtype: dict
**Example**::
>>> from pydriosm.reader import get_default_shp_crs
>>> default_shp_crs = get_default_shp_crs()
>>> print(default_shp_crs)
{'no_defs': True, 'ellps': 'WGS84', 'datum': 'WGS84', 'proj': 'longlat'}
"""
crs = {'no_defs': True, 'ellps': 'WGS84', 'datum': 'WGS84', 'proj': 'longlat'}
return crs
[docs]def parse_layer_shp(path_to_layer_shp, feature_names=None, crs=None,
save_fclass_shp=False, driver='ESRI Shapefile',
ret_path_to_fclass_shp=False, **kwargs):
"""
Parse a layer of OSM shapefile data.
:param path_to_layer_shp: absolute path(s) to one (or multiple) shapefile(s)
:type path_to_layer_shp: str or list
:param feature_names: class name(s) of feature(s), defaults to ``None``
:type feature_names: str or list or None
:param crs: specification of coordinate reference system; if ``None`` (default),
check :py:func:`specify_shp_crs()<pydriosm.reader.specify_shp_crs>`
:type crs: dict
:param save_fclass_shp: (when ``fclass`` is not ``None``)
whether to save data of the ``fclass`` as shapefile, defaults to ``False``
:type save_fclass_shp: bool
:param driver: the OGR format driver, defaults to ``'ESRI Shapefile'``;
see also the ``driver`` parameter of `geopandas.GeoDataFrame.to_file()`_
:type driver: str
:param ret_path_to_fclass_shp: (when ``save_fclass_shp`` is ``True``)
whether to return the path to the saved data of ``fclass``, defaults to ``False``
:type ret_path_to_fclass_shp: bool
:param kwargs: optional parameters of
:py:func:`read_shp_file()<pydriosm.reader.read_shp_file>`
:return: parsed shapefile data
:rtype: geopandas.GeoDataFrame
.. _`geopandas.GeoDataFrame.to_file()`:
https://geopandas.org/reference.html#geopandas.GeoDataFrame.to_file
**Examples**::
>>> import os
>>> from pyhelpers.dir import cd, delete_dir
>>> from pydriosm.reader import GeofabrikDownloader
>>> from pydriosm.reader import parse_layer_shp, unzip_shp_zip
>>> geofabrik_downloader = GeofabrikDownloader()
>>> sr_name = 'Rutland'
>>> path_to_rutland_shp_zip = geofabrik_downloader.download_osm_data(
... sr_name, osm_file_format=".shp", download_dir="tests",
... confirmation_required=False, ret_download_path=True)
>>> # Extract the downloaded .shp.zip file
>>> rutland_shp_dir = unzip_shp_zip(path_to_rutland_shp_zip, ret_extract_dir=True)
>>> path_to_railways_shp = cd(rutland_shp_dir, "gis_osm_railways_free_1.shp")
>>> rutland_railways_shp = parse_layer_shp(path_to_railways_shp)
>>> print(rutland_railways_shp.head())
osm_id code ... tunnel geometry
0 2162114 6101 ... F LINESTRING (-0.45281 52.69934, -0.45189 52.698...
1 3681043 6101 ... F LINESTRING (-0.65312 52.57308, -0.65318 52.572...
2 3693985 6101 ... F LINESTRING (-0.73234 52.67821, -0.73191 52.678...
3 3693986 6101 ... F LINESTRING (-0.61731 52.61323, -0.62419 52.614...
4 4806329 6101 ... F LINESTRING (-0.45769 52.70352, -0.45654 52.702...
[5 rows x 8 columns]
>>> rutland_railways_rail, path_to_rutland_railways_rail = parse_layer_shp(
... path_to_railways_shp, feature_names='rail', save_fclass_shp=True,
... ret_path_to_fclass_shp=True)
>>> print(rutland_railways_rail.head())
osm_id code ... tunnel geometry
0 2162114 6101 ... F LINESTRING (-0.45281 52.69934, -0.45189 52.698...
1 3681043 6101 ... F LINESTRING (-0.65312 52.57308, -0.65318 52.572...
2 3693985 6101 ... F LINESTRING (-0.73234 52.67821, -0.73191 52.678...
3 3693986 6101 ... F LINESTRING (-0.61731 52.61323, -0.62419 52.614...
4 4806329 6101 ... F LINESTRING (-0.45769 52.70352, -0.45654 52.702...
[5 rows x 8 columns]
>>> print(os.path.relpath(path_to_rutland_railways_rail))
tests\\rutland-latest-free-shp\\railways\\gis_osm_railways_free_1_rail.shp
>>> # Delete the extracted data files
>>> delete_dir(rutland_shp_dir, verbose=True)
The directory "\\tests\\rutland-latest-free-shp" is not empty.
Confirmed to delete it? [No]|Yes: yes
Deleting "\\tests\\rutland-latest-free-shp" ... Done.
>>> # Delete the downloaded shapefile
>>> os.remove(path_to_rutland_shp_zip)
"""
path_to_lyr_shp = [path_to_layer_shp] if isinstance(path_to_layer_shp, str) \
else copy.copy(path_to_layer_shp)
if len(path_to_lyr_shp) == 0:
shp_data = None
else:
if crs is None:
crs = get_default_shp_crs()
if len(path_to_lyr_shp) == 1:
path_to_lyr_shp_ = path_to_lyr_shp[0]
# gpd.GeoDataFrame(read_shp_file(path_to_shp))
shp_data = read_shp_file(path_to_lyr_shp_, **kwargs)
else:
shp_data = [read_shp_file(path_to_lyr_shp_, **kwargs)
for path_to_lyr_shp_ in path_to_lyr_shp]
shp_data = pd.concat(shp_data, axis=0, ignore_index=True)
shp_data.crs = crs
if feature_names:
feature_names_ = [feature_names] if isinstance(feature_names, str) \
else feature_names.copy()
# valid_features = shp_data.fclass.unique().tolist()
# if any(f for f in feature_names_ if f not in valid_features):
# raise ValueError(f"`feature_names` must belong to {valid_features}")
if ('type' in shp_data.columns) and ('fclass' not in shp_data.columns):
shp_data.rename(columns={'type': 'fclass'}, inplace=True)
shp_data = shp_data.query('fclass in @feature_names_')
if save_fclass_shp:
path_to_lyr_shp_ = path_to_lyr_shp[0].replace("_a_", "_")
path_to_lyr_feat_shp = append_fclass_to_filename(path_to_lyr_shp_,
feature_names_)
shp_data.to_file(path_to_lyr_feat_shp, driver=driver)
if ret_path_to_fclass_shp:
shp_data = shp_data, path_to_lyr_feat_shp
return shp_data
[docs]def merge_shps(paths_to_shp_files, path_to_merged_dir, method='geopandas'):
"""
Merge multiple shapefiles.
:param paths_to_shp_files: list of absolute paths to shapefiles (in .shp format)
:type paths_to_shp_files: list
:param path_to_merged_dir: absolute path to a directory
where the merged files are to be saved
:type path_to_merged_dir: str
:param method: the method used to merge/save .shp files;
if ``'geopandas'`` (default), use the `geopandas.GeoDataFrame.to_file`_ method,
use `shapefile.Writer`_ otherwise
:type method: str
.. _`geopandas.GeoDataFrame.to_file`:
https://geopandas.org/reference.html#geopandas.GeoDataFrame.to_file
.. _`shapefile.Writer`: https://github.com/GeospatialPython/pyshp#writing-shapefiles
See the example for the function :py:func:`merge_layer_shps()
<pydriosm.reader.merge_layer_shps>`.
"""
if method in ('geopandas', 'gpd'):
import geopandas as gpd
shp_data, geom_types = [], []
for shp_file_path in paths_to_shp_files:
shp_dat = gpd.read_file(shp_file_path)
shp_data.append(shp_dat)
geom_types.append(shp_dat['geometry'].type[0])
geom_types_ = list(set(geom_types))
if len(geom_types_) > 1:
shp_data_dict = collections.defaultdict(list)
for geo_typ, shp_dat in zip(geom_types, shp_data):
shp_data_dict[geo_typ].append(shp_dat)
for k, v in shp_data_dict.items():
shp_data_ = pd.concat(v, ignore_index=True)
shp_data_.crs = get_default_shp_crs()
shp_data_.to_file(filename=path_to_merged_dir + f"_{k.lower()}",
driver="ESRI Shapefile")
else:
merged_shp_data = pd.concat(shp_data, ignore_index=True)
merged_shp_data.crs = get_default_shp_crs()
merged_shp_data.to_file(filename=path_to_merged_dir, driver="ESRI Shapefile")
else: # method == 'pyshp'
import shapefile
# Resource: https://github.com/GeospatialPython/pyshp
w = shapefile.Writer(path_to_merged_dir)
for f in paths_to_shp_files:
r = shapefile.Reader(f)
w.fields = r.fields[1:] # skip first deletion field
w.shapeType = r.shapeType
for shaperec in r.iterShapeRecords():
w.record(*shaperec.record)
w.shape(shaperec.shape)
r.close()
w.close()
[docs]def merge_layer_shps(paths_to_shp_zip_files, layer_name, method='geopandas',
rm_zip_extracts=True, merged_shp_dir=None, rm_shp_temp=True,
verbose=False, ret_merged_shp_path=False):
"""
Merge shapefiles over a layer for multiple geographic regions.
:param paths_to_shp_zip_files: list of absolute paths to data of shapefiles
(in .shp.zip format)
:type paths_to_shp_zip_files: list
:param layer_name: name of a layer (e.g. 'railways')
:type layer_name: str
:param method: the method used to merge/save .shp files;
if ``'geopandas'`` (default), use the `geopandas.GeoDataFrame.to_file`_ method,
use `shapefile.Writer`_ otherwise
:type method: str
:param rm_zip_extracts: whether to delete the extracted files, defaults to ``False``
:type rm_zip_extracts: bool
:param rm_shp_temp: whether to delete temporary layer files, defaults to ``False``
:type rm_shp_temp: bool
:param merged_shp_dir: if ``None`` (default), use the layer name
as the name of the folder where the merged .shp files will be saved
:type merged_shp_dir: str or None
:param verbose: whether to print relevant information in console as the function runs,
defaults to ``False``
:type verbose: bool or int
:param ret_merged_shp_path: whether to return the path to the merged .shp file,
defaults to ``False``
:type ret_merged_shp_path: bool
:return: the path to the merged file when ``ret_merged_shp_path=True``
:rtype: list or str
.. _`geopandas.GeoDataFrame.to_file`:
https://geopandas.org/reference.html#geopandas.GeoDataFrame.to_file
.. _`shapefile.Writer`: https://github.com/GeospatialPython/pyshp#writing-shapefiles
.. note::
This function does not create projection (.prj) for the merged map
(see also [`MMS-1
<http://geospatialpython.com/2011/02/create-prj-projection-file-for.html>`_])
For valid ``layer_name``, check
:py:func:`get_valid_shp_layer_names()<pydriosm.utils.get_valid_shp_layer_names>`.
.. _pydriosm-reader-merge_layer_shps:
**Example**::
>>> import os
>>> from pyhelpers.dir import delete_dir
>>> from pydriosm.downloader import GeofabrikDownloader
>>> from pydriosm.reader import merge_layer_shps
>>> # To merge 'railways' layers of Greater Manchester and West Yorkshire"
>>> geofabrik_downloader = GeofabrikDownloader()
>>> sr_names = ['Greater Manchester', 'West Yorkshire']
>>> dat_dir = "tests"
>>> shp_zip_file_paths = geofabrik_downloader.download_osm_data(
... sr_names, osm_file_format=".shp", download_dir=dat_dir,
... confirmation_required=False, ret_download_path=True)
>>> lyr_name = 'railways'
>>> merged_shp_path = merge_layer_shps(shp_zip_file_paths, layer_name=lyr_name,
... verbose=True, ret_merged_shp_path=True)
Extracting from "greater-manchester-latest-free.shp.zip" the following layer(s):
'railways'
to "\\tests\\greater-manchester-latest-free-shp" ...
In progress ... Done.
Extracting from "west-yorkshire-latest-free.shp.zip" the following layer(s):
'railways'
to "\\tests\\west-yorkshire-latest-free-shp" ...
In progress ... Done.
Merging the following shapefiles:
"greater-manchester_gis_osm_railways_free_1.shp"
"west-yorkshire_gis_osm_railways_free_1.shp"
In progress ... Done.
Find the merged .shp file(s) at "\\tests\\greater-manchester_west-yorkshire_railways".
>>> print(os.path.relpath(merged_shp_path))
tests\\...\\greater-manchester_west-yorkshire_railways.shp
>>> # Delete the merged shapefile
>>> delete_dir(os.path.dirname(merged_shp_path), verbose=True)
The directory "\\tests\\greater-manchester_west-yorkshire_railways" is not empty.
Confirmed to delete it? [No]|Yes: yes
Deleting "\\tests\\greater-manchester_west-yorkshire_railways" ... Done.
>>> # Delete the downloaded shapefiles
>>> for shp_zip_file_path in shp_zip_file_paths: os.remove(shp_zip_file_path)
.. seealso::
The examples for the method :py:meth:`GeofabrikReader.merge_subregion_layer_shp()
<pydriosm.reader.GeofabrikReader.merge_subregion_layer_shp>`.
"""
path_to_extract_dirs = []
for path_to_shp_zip in paths_to_shp_zip_files:
extract_dir = unzip_shp_zip(path_to_shp_zip, layer_names=layer_name,
verbose=verbose, ret_extract_dir=True)
path_to_extract_dirs.append(extract_dir)
region_names = [re.search(r'.*(?=\.shp\.zip)',
os.path.basename(x).replace("-latest-free", "")).group(0)
for x in paths_to_shp_zip_files]
# Specify a directory that stores files for the specific layer
path_to_data_dir = os.path.commonpath(paths_to_shp_zip_files)
prefix = "_".join([x.lower().replace(' ', '-') for x in region_names]) + "_"
suffix = "_temp"
merged_dirname_temp = f"{prefix}{layer_name}{suffix}"
path_to_merged_dir_temp = cd(path_to_data_dir, merged_dirname_temp, mkdir=True)
# Copy files into a temp directory
paths_to_temp_files = []
for subregion_name, path_to_extract_dir in zip(region_names, path_to_extract_dirs):
orig_filename_list = glob.glob1(path_to_extract_dir, f"*{layer_name}*")
for orig_filename in orig_filename_list:
orig = cd(path_to_extract_dir, orig_filename)
dest = cd(path_to_merged_dir_temp,
f"{subregion_name.lower().replace(' ', '-')}_{orig_filename}")
shutil.copyfile(orig, dest)
paths_to_temp_files.append(dest)
# Get the absolute paths to the target .shp files
paths_to_shp_files = [x for x in paths_to_temp_files if x.endswith(".shp")]
if verbose:
print("Merging the following shapefiles:")
print("\t{}".format("\n\t".join("\"{}\"".format(os.path.basename(f))
for f in paths_to_shp_files)))
print("In progress ... ", end="")
try:
if merged_shp_dir:
path_to_merged_dir = cd(validate_input_data_dir(merged_shp_dir), mkdir=True)
else:
path_to_merged_dir = cd(path_to_data_dir,
merged_dirname_temp.replace(suffix, "", -1),
mkdir=True)
merge_shps(paths_to_shp_files, path_to_merged_dir, method)
if method in ('geopandas', 'gpd'):
# shp_data, geom_types = [], []
# for shp_file_path in paths_to_shp_files:
# shp_dat = gpd.read_file(shp_file_path)
# shp_data.append(shp_dat)
# geom_types.append(shp_dat['geometry'].type[0])
#
# geom_types_ = list(set(geom_types))
# if len(geom_types_) > 1:
# shp_data_dict = collections.defaultdict(list)
# for geo_typ, shp_dat in zip(geom_types, shp_data):
# shp_data_dict[geo_typ].append(shp_dat)
#
# for k, v in shp_data_dict.items():
# shp_data_ = pd.concat(v, ignore_index=True)
# shp_data_.crs = get_default_shp_crs()
# shp_data_.to_file(filename=path_to_merged_dir + f"_{k.lower()}",
# driver="ESRI Shapefile")
if not os.listdir(path_to_merged_dir):
temp_dirs = []
for temp_output_file in glob.glob(
cd(path_to_merged_dir + "*", f"{prefix}*")):
output_file = cd(path_to_merged_dir_temp.replace(suffix, ""))
shutil.move(temp_output_file, output_file)
temp_dirs.append(os.path.dirname(temp_output_file))
for temp_dir in set(temp_dirs):
shutil.rmtree(temp_dir)
# else:
# merged_shp_data = pd.concat(shp_data, ignore_index=True)
# merged_shp_data.crs = get_default_shp_crs()
# merged_shp_data.to_file(filename=path_to_merged_dir,
# driver="ESRI Shapefile")
else: # method == 'pyshp'
# # Resource: https://github.com/GeospatialPython/pyshp
# w = shapefile.Writer(cd(path_to_merged_dir))
# for f in paths_to_shp_files:
# r = shapefile.Reader(f)
# w.fields = r.fields[1:] # skip first deletion field
# w.shapeType = r.shapeType
# for shaperec in r.iterShapeRecords():
# w.record(*shaperec.record)
# w.shape(shaperec.shape)
# r.close()
# w.close()
temp_dir = os.path.dirname(path_to_merged_dir)
paths_to_output_files_temp = [glob.glob(cd(temp_dir, f"{prefix}*.{ext}"))
for ext in ("dbf", "shp", "shx")]
paths_to_output_files_temp = \
list(itertools.chain.from_iterable(paths_to_output_files_temp))
for temp_output_file in paths_to_output_files_temp:
output_file = cd(path_to_merged_dir,
os.path.basename(temp_output_file).replace(suffix, ""))
shutil.move(temp_output_file, output_file)
print("Done.") if verbose else ""
if rm_zip_extracts:
for path_to_extract_dir in path_to_extract_dirs:
shutil.rmtree(path_to_extract_dir)
if rm_shp_temp:
shutil.rmtree(path_to_merged_dir_temp)
if verbose:
print("Find the merged .shp file(s) at \"\\{}\".".format(
os.path.relpath(path_to_merged_dir)))
if ret_merged_shp_path:
path_to_merged_shp = glob.glob(cd(f"{path_to_merged_dir}*", "*.shp"))
if len(path_to_merged_shp) == 1:
path_to_merged_shp = path_to_merged_shp[0]
return path_to_merged_shp
except Exception as e:
print("Failed. {}".format(e)) if verbose else ""
[docs]def parse_csv_xz(path_to_csv_xz, col_names=None):
"""
Parse a compressed CSV (.csv.xz) data file.
:param path_to_csv_xz: absolute path to a .csv.xz data file
:type path_to_csv_xz: str
:param col_names: column names of .csv.xz data, defaults to ``None``
:type col_names: list or None
:return: tabular data of the CSV file
:rtype: pandas.DataFrame
See the example for the method
:py:meth:`BBBikeReader.read_csv_xz()<pydriosm.reader.BBBikeReader.read_csv_xz>`.
"""
csv_xz_raw = lzma.open(path_to_csv_xz, mode='rt', encoding='utf-8').readlines()
csv_xz_dat = [x.rstrip('\t\n').split('\t') for x in csv_xz_raw]
if col_names is None:
col_names = ['type', 'id', 'feature']
csv_xz = pd.DataFrame.from_records(csv_xz_dat, columns=col_names)
return csv_xz
[docs]def parse_geojson_xz(path_to_geojson_xz, fmt_geom=False):
"""
Parse a compressed Osmium GeoJSON (.geojson.xz) data file.
:param path_to_geojson_xz: absolute path to a .geojson.xz data file
:type path_to_geojson_xz: str
:param fmt_geom: whether to reformat coordinates into a geometric object,
defaults to ``False``
:type fmt_geom: bool
:return: tabular data of the Osmium GeoJSON file
:rtype: pandas.DataFrame
See the example for the method :py:meth:`BBBikeReader.read_geojson_xz()
<pydriosm.reader.BBBikeReader.read_geojson_xz>`.
"""
geojson_xz_raw = rapidjson.load(
lzma.open(path_to_geojson_xz, mode='rt', encoding='utf-8'))
geojson_xz_dat = pd.DataFrame.from_dict(geojson_xz_raw)
feature_types = geojson_xz_dat.features.map(
lambda x: x['type']).to_frame(name='feature_name')
geom_types = geojson_xz_dat.features.map(
lambda x: x['geometry']['type']).to_frame(name='geom_types')
if fmt_geom:
geom_types_funcs = get_osm_geom_object_dict()
def reformat_geom(geo_typ, coords):
sub_geom_type_func = geom_types_funcs[geo_typ]
if geo_typ == 'MultiPolygon':
geom_coords = sub_geom_type_func(
geom_types_funcs['Polygon'](y) for x in coords for y in x)
else:
geom_coords = sub_geom_type_func(coords)
return geom_coords
coordinates = geojson_xz_dat.features.map(
lambda x: reformat_geom(
x['geometry']['type'],
x['geometry']['coordinates'])).to_frame(name='coordinates')
else:
coordinates = geojson_xz_dat.features.map(
lambda x: x['geometry']['coordinates']).to_frame(name='coordinates')
properties = geojson_xz_dat.features.map(
lambda x: x['properties']).to_frame(name='properties')
# decode_properties=False
#
# :param decode_properties: whether to transform a 'properties' dictionary into
# tabular form, defaults to ``False``
# :type decode_properties: bool
#
# if decode_properties:
# if confirmed("Confirmed to decode \"properties\"\n"
# "(Note this can be very computationally expensive and costing "
# "fairly large amount of memory)?"):
# properties = \
# pd.concat(properties['properties'].map(pd.json_normalize).to_list())
geojson_xz_data = \
pd.concat([feature_types, geom_types, coordinates, properties], axis=1)
del feature_types, geom_types, coordinates, properties
gc.collect()
return geojson_xz_data
[docs]class GeofabrikReader:
"""
A class representation of a tool for reading Geofabrik data extracts.
:param max_tmpfile_size: defaults to ``5000``,
see also :py:func:`pydriosm.settings.gdal_configurations`
:type max_tmpfile_size: int or None
**Example**::
>>> from pydriosm.reader import GeofabrikReader
>>> geofabrik_reader = GeofabrikReader()
>>> print(geofabrik_reader.Name)
Geofabrik OpenStreetMap data extracts
"""
def __init__(self, max_tmpfile_size=5000):
"""
Constructor method.
"""
self.Downloader = GeofabrikDownloader()
self.Name = copy.copy(self.Downloader.Name)
self.URL = copy.copy(self.Downloader.URL)
if max_tmpfile_size:
gdal_configurations(max_tmpfile_size=max_tmpfile_size)
[docs] def get_path_to_osm_pbf(self, subregion_name, data_dir=None):
"""
Get the absolute local path to a PBF (.osm.pbf) data file for a geographic region.
:param subregion_name: name of a geographic region (case-insensitive) available
on Geofabrik's free download server
:type subregion_name: str
:param data_dir: directory where the data file of the ``subregion_name`` is
located/saved; if ``None`` (default), the default local directory
:type data_dir: str or None
:return: path to PBF (.osm.pbf) file
:rtype: str or None
**Example**::
>>> import os
>>> from pydriosm.reader import GeofabrikReader
>>> geofabrik_reader = GeofabrikReader()
>>> sr_name = 'Rutland'
>>> path_to_rutland_pbf = geofabrik_reader.get_path_to_osm_pbf(sr_name)
>>> print(path_to_rutland_pbf)
# (if "rutland-latest.osm.pbf" is unavailable at the package data directory)
# None
>>> file_fmt = ".pbf"
>>> dwnld_dir = "tests"
>>> # Download the PBF data file of Rutland to "\\tests"
>>> geofabrik_reader.Downloader.download_osm_data(sr_name, file_fmt,
... dwnld_dir, verbose=True)
Confirmed to download .osm.pbf data of the following geographic region(s):
Rutland
? [No]|Yes: yes
Downloading "rutland-latest.osm.pbf" to "\\tests" ...
Done.
>>> path_to_rutland_pbf = geofabrik_reader.get_path_to_osm_pbf(
... sr_name, dwnld_dir)
>>> print(os.path.relpath(path_to_rutland_pbf))
tests\\rutland-latest.osm.pbf
>>> # Delete the downloaded PBF data file
>>> os.remove(path_to_rutland_pbf)
"""
osm_pbf_filename_, path_to_osm_pbf_ = \
self.Downloader.get_default_path_to_osm_file(
subregion_name, osm_file_format=".osm.pbf", mkdir=False)
if data_dir is None: # Go to default file path
path_to_osm_pbf = path_to_osm_pbf_
else:
osm_pbf_dir = validate_input_data_dir(data_dir)
path_to_osm_pbf = os.path.join(osm_pbf_dir, osm_pbf_filename_)
if not os.path.isfile(path_to_osm_pbf):
path_to_osm_pbf = None
return path_to_osm_pbf
[docs] def read_osm_pbf(self, subregion_name, data_dir=None, chunk_size_limit=50,
parse_raw_feat=False, transform_geom=False,
transform_other_tags=False, update=False,
download_confirmation_required=True, pickle_it=False,
ret_pickle_path=False, rm_osm_pbf=False, verbose=False):
"""
Read a PBF (.osm.pbf) data file of a geographic region.
:param subregion_name: name of a geographic region (case-insensitive) available
on Geofabrik's free download server
:type subregion_name: str
:param data_dir: directory where the .osm.pbf data file is located/saved;
if ``None``, the default local directory
:type data_dir: str or None
:param chunk_size_limit: threshold (in MB) that triggers the use of chunk parser,
defaults to ``50``; if the size of the .osm.pbf file (in MB) is greater than
``chunk_size_limit``, it will be parsed in a chunk-wise way
:type chunk_size_limit: int
:param parse_raw_feat: whether to parse each feature in the raw data,
defaults to ``False``
:type parse_raw_feat: bool
:param transform_geom: whether to transform a single coordinate
(or a collection of coordinates) into a geometric object,
defaults to ``False``
:type transform_geom: bool
:param transform_other_tags: whether to transform a ``'other_tags'`` into
a dictionary, defaults to ``False``
:type transform_other_tags: bool
:param update: whether to check to update pickle backup (if available),
defaults to ``False``
:type update: bool
:param download_confirmation_required: whether to ask for confirmation before
starting to download a file, defaults to ``True``
:type download_confirmation_required: bool
:param pickle_it: whether to save the .pbf data as a .pickle file,
defaults to ``False``
:type pickle_it: bool
:param ret_pickle_path: whether to return an absolute path to
the saved pickle file (when ``pickle_it=True``)
:type ret_pickle_path: bool
:param rm_osm_pbf: whether to delete the downloaded .osm.pbf file,
defaults to ``False``
:type rm_osm_pbf: bool
:param verbose: whether to print relevant information in console as
the function runs, defaults to ``False``
:type verbose: bool or int
:return: dictionary of the .osm.pbf data; when ``pickle_it=True``,
return a tuple of the dictionary and an absolute path to the pickle file
:rtype: dict or tuple or None
.. _pydriosm-reader-geofabrik-read_osm_pbf:
**Examples**::
>>> import os
>>> from pydriosm.reader import GeofabrikReader
>>> geofabrik_reader = GeofabrikReader()
>>> sr_name = 'Rutland'
>>> dat_dir = "tests"
>>> rutland_pbf_raw = geofabrik_reader.read_osm_pbf(sr_name, dat_dir,
... verbose=True)
Confirmed to download .osm.pbf data of the following geographic region(s):
Rutland
? [No]|Yes: yes
>>> print(list(rutland_pbf_raw.keys()))
['points', 'lines', 'multilinestrings', 'multipolygons', 'other_relations']
>>> rutland_pbf_raw_points = rutland_pbf_raw['points']
>>> print(rutland_pbf_raw_points.head())
points
0 {"type": "Feature", "geometry": {"type": "Poin...
1 {"type": "Feature", "geometry": {"type": "Poin...
2 {"type": "Feature", "geometry": {"type": "Poin...
3 {"type": "Feature", "geometry": {"type": "Poin...
4 {"type": "Feature", "geometry": {"type": "Poin...
>>> rutland_pbf_parsed = geofabrik_reader.read_osm_pbf(sr_name, dat_dir,
... parse_raw_feat=True,
... verbose=True)
Parsing "\\tests\\rutland-latest.osm.pbf" ... Done.
>>> rutland_pbf_parsed_points = rutland_pbf_parsed['points']
>>> print(rutland_pbf_parsed_points.head())
id coordinates ... other_tags
0 488432 [-0.5134241, 52.6555853] ... "odbl"=>"clean"
1 488658 [-0.5313354, 52.6737716] ... None
2 13883868 [-0.7229332, 52.5889864] ... None
3 14049101 [-0.7249922, 52.6748223] ... "traffic_calming"=>"cushion"
4 14558402 [-0.7266686, 52.6695051] ... "direction"=>"clockwise"
[5 rows x 12 columns]
>>> rutland_pbf_parsed_1 = geofabrik_reader.read_osm_pbf(sr_name, dat_dir,
... parse_raw_feat=True,
... transform_geom=True,
... verbose=True)
Parsing "\\tests\\rutland-latest.osm.pbf" ... Done.
>>> rutland_pbf_parsed_1_points = rutland_pbf_parsed_1['points']
>>> print(rutland_pbf_parsed_1_points[['coordinates']].head())
coordinates
0 POINT (-0.5134241 52.6555853)
1 POINT (-0.5313354 52.6737716)
2 POINT (-0.7229332000000001 52.5889864)
3 POINT (-0.7249922 52.6748223)
4 POINT (-0.7266686 52.6695051)
>>> rutland_pbf_parsed_2 = geofabrik_reader.read_osm_pbf(
... sr_name, dat_dir, parse_raw_feat=True, transform_geom=True,
... transform_other_tags=True, verbose=True)
>>> rutland_pbf_parsed_2_points = rutland_pbf_parsed_2['points']
>>> print(rutland_pbf_parsed_2_points[['other_tags']].head())
other_tags
0 {'odbl': 'clean'}
1 None
2 None
3 {'traffic_calming': 'cushion'}
4 {'direction': 'clockwise'}
>>> # Delete the downloaded PBF data file
>>> os.remove(f"{dat_dir}\\rutland-latest.osm.pbf")
"""
osm_file_format = ".osm.pbf"
assert isinstance(chunk_size_limit, int) or chunk_size_limit is None
osm_pbf_filename, path_to_osm_pbf = self.Downloader.get_default_path_to_osm_file(
subregion_name, osm_file_format=osm_file_format, mkdir=False)
if osm_pbf_filename and path_to_osm_pbf:
if not data_dir: # Go to default file path
path_to_osm_pbf = path_to_osm_pbf
else:
osm_pbf_dir = validate_input_data_dir(data_dir)
path_to_osm_pbf = os.path.join(osm_pbf_dir, osm_pbf_filename)
path_to_pickle = path_to_osm_pbf.replace(
osm_file_format, "-pbf.pickle" if parse_raw_feat else "-raw.pickle")
if os.path.isfile(path_to_pickle) and not update:
osm_pbf_data = load_pickle(path_to_pickle)
if ret_pickle_path:
osm_pbf_data = osm_pbf_data, path_to_pickle
else:
if not os.path.isfile(path_to_osm_pbf) or update:
# If the target file is not available, try downloading it first.
self.Downloader.download_osm_data(
subregion_name, osm_file_format=osm_file_format,
download_dir=data_dir, update=update,
confirmation_required=download_confirmation_required,
verbose=False)
if verbose and parse_raw_feat:
print("Parsing \"\\{}\"".format(os.path.relpath(path_to_osm_pbf)),
end=" ... ")
try:
number_of_chunks = get_number_of_chunks(
path_to_osm_pbf, chunk_size_limit)
osm_pbf_data = parse_osm_pbf(
path_to_osm_pbf, number_of_chunks=number_of_chunks,
parse_raw_feat=parse_raw_feat, transform_geom=transform_geom,
transform_other_tags=transform_other_tags)
print("Done. ") if verbose and parse_raw_feat else ""
if pickle_it:
save_pickle(osm_pbf_data, path_to_pickle, verbose=verbose)
if ret_pickle_path:
osm_pbf_data = osm_pbf_data, path_to_pickle
if rm_osm_pbf:
remove_subregion_osm_file(path_to_osm_pbf, verbose=verbose)
except Exception as e:
print("Failed. {}".format(e))
osm_pbf_data = None
return osm_pbf_data
else:
print("Errors occur. Data might not be available for the \"subregion_name\".")
[docs] def get_path_to_osm_shp(self, subregion_name, layer_name=None, feature_name=None,
data_dir=None, file_ext=".shp"):
"""
Get the absolute path(s) to .shp file(s) for a geographic region
(by searching a local data directory).
:param subregion_name: name of a region/subregion (case-insensitive) available
on Geofabrik's free download server
:type subregion_name: str
:param layer_name: name of a .shp layer (e.g. ``'railways'``),
defaults to ``None``
:type layer_name: str or None
:param feature_name: name of a feature (e.g. ``'rail'``);
if ``None`` (default), all available features included
:type feature_name: str or None
:param data_dir: directory where the search is conducted;
if ``None`` (default), the default directory
:type data_dir: str or None
:param file_ext: file extension, defaults to ``".shp"``
:type file_ext: str
:return: path(s) to .shp file(s)
:rtype: list or str
**Examples**::
>>> import os
>>> from pyhelpers.dir import delete_dir
>>> from pydriosm.reader import GeofabrikReader
>>> from pydriosm.reader import unzip_shp_zip, parse_layer_shp
>>> geofabrik_reader = GeofabrikReader()
>>> sr_name = 'Rutland'
>>> file_fmt = ".shp"
>>> path_to_shp_file = geofabrik_reader.get_path_to_osm_shp(sr_name)
>>> print(path_to_shp_file)
# (if "gis.osm_railways_free_1.shp" is unavailable at the package data directory)
[]
>>> dwnld_dir = "tests"
>>> # Download the shapefiles of Rutland
>>> path_to_rutland_shp_zip = geofabrik_reader.Downloader.download_osm_data(
... sr_name, file_fmt, dwnld_dir, confirmation_required=False,
... ret_download_path=True)
>>> unzip_shp_zip(path_to_rutland_shp_zip, verbose=True)
Extracting all ... to "\\tests\\rutland-latest-free-shp" ...
In progress ... Done.
>>> lyr_name = 'railways'
>>> path_to_rutland_railways_shp = geofabrik_reader.get_path_to_osm_shp(
... sr_name, lyr_name, data_dir=dwnld_dir)
>>> print(os.path.relpath(path_to_rutland_railways_shp))
tests\\rutland-latest-free-shp\\gis_osm_railways_free_1.shp
>>> feat_name = 'rail'
>>> _ = parse_layer_shp(path_to_rutland_railways_shp, feature_names=feat_name,
... save_fclass_shp=True)
>>> path_to_rutland_railways_rail_shp = geofabrik_reader.get_path_to_osm_shp(
... sr_name, lyr_name, feat_name, data_dir=dwnld_dir)
>>> print(os.path.relpath(path_to_rutland_railways_rail_shp))
tests\\rutland-latest-free-shp\\railways\\gis_osm_railways_free_1_rail.shp
>>> # Delete the extracted files
>>> delete_dir(os.path.dirname(path_to_rutland_railways_shp), verbose=True)
The directory "\\tests\\rutland-latest-free-shp" is not empty.
Confirmed to delete it? [No]|Yes: yes
Deleting "\\tests\\rutland-latest-free-shp" ... Done.
>>> # Delete the downloaded .shp.zip file
>>> os.remove(path_to_rutland_shp_zip)
"""
if data_dir is None: # Go to default file path
_, path_to_shp_zip = self.Downloader.get_default_path_to_osm_file(
subregion_name, osm_file_format=".shp.zip", mkdir=False)
else:
shp_zip_filename = self.Downloader.get_default_osm_filename(
subregion_name, osm_file_format=".shp.zip")
path_to_shp_zip = cd(validate_input_data_dir(data_dir), shp_zip_filename)
shp_dir = os.path.splitext(path_to_shp_zip)[0].replace(".", "-")
if layer_name is None:
path_to_osm_shp_file = glob.glob(shp_dir + "\\*" + file_ext)
else:
layer_name_ = find_similar_str(layer_name, get_valid_shp_layer_names())
if feature_name is None:
pat = re.compile(r"gis_osm_{}(_a)?(_free)?(_1)?{}".format(
layer_name_, file_ext))
path_to_osm_shp_file = [f for f in glob.glob(cd(shp_dir, f"*{file_ext}"))
if re.search(pat, f)]
else:
pat = re.compile(r"gis_osm_{}(_a)?(_free)?(_1)_{}{}".format(
layer_name_, feature_name, file_ext))
path_to_osm_shp_file = [
f for f in glob.glob(cd(shp_dir, layer_name_, f"*{file_ext}"))
if re.search(pat, f)]
# if not osm_file_paths: print("The required file may not exist.")
if len(path_to_osm_shp_file) == 1:
path_to_osm_shp_file = path_to_osm_shp_file[0]
return path_to_osm_shp_file
[docs] def merge_subregion_layer_shp(self, layer_name, subregion_names, data_dir=None,
method='geopandas', update=False,
download_confirmation_required=True,
rm_zip_extracts=True, merged_shp_dir=None,
rm_shp_temp=True, verbose=False,
ret_merged_shp_path=False):
"""
Merge shapefiles for a specific layer of two or multiple geographic regions.
:param subregion_names: a list of region/subregion names (case-insensitive)
that are available on Geofabrik's free download server
:type subregion_names: list
:param layer_name: name of a layer (e.g. 'railways')
:type layer_name: str
:param method: the method used to merge/save .shp files;
if ``'geopandas'`` (default),
use the `geopandas.GeoDataFrame.to_file`_ method,
otherwise, use `shapefile.Writer`_
:type method: str
:param update: whether to update the source .shp.zip files, defaults to ``False``
:type update: bool
:param download_confirmation_required: whether to ask for confirmation
before starting to download a file, defaults to ``True``
:type download_confirmation_required: bool
:param data_dir: directory where the .shp.zip data files are located/saved;
if ``None``, the default directory
:type data_dir: str or None
:param rm_zip_extracts: whether to delete the extracted files,
defaults to ``False``
:type rm_zip_extracts: bool
:param rm_shp_temp: whether to delete temporary layer files, defaults to ``False``
:type rm_shp_temp: bool
:param merged_shp_dir: if ``None`` (default), use the layer name
as the name of the folder where the merged .shp files will be saved
:type merged_shp_dir: str or None
:param verbose: whether to print relevant information in console
as the function runs, defaults to ``False``
:type verbose: bool or int
:param ret_merged_shp_path: whether to return the path to the merged .shp file,
defaults to ``False``
:type ret_merged_shp_path: bool
:return: the path to the merged file when ``ret_merged_shp_path=True``
:rtype: list or str
.. _`geopandas.GeoDataFrame.to_file`:
https://geopandas.org/reference.html#geopandas.GeoDataFrame.to_file
.. _`shapefile.Writer`:
https://github.com/GeospatialPython/pyshp#writing-shapefiles
.. _pydriosm-GeofabrikReader-merge_subregion_layer_shp:
**Examples**::
>>> import os
>>> from pyhelpers.dir import cd, delete_dir
>>> from pydriosm.reader import GeofabrikReader
>>> geofabrik_reader = GeofabrikReader()
>>> # To merge 'railways' of Greater Manchester and West Yorkshire
>>> lyr_name = 'railways'
>>> sr_names = ['Manchester', 'West Yorkshire']
>>> dat_dir = "tests"
>>> path_to_merged_shp_file = geofabrik_reader.merge_subregion_layer_shp(
... lyr_name, sr_names, dat_dir, verbose=True, ret_merged_shp_path=True)
Confirmed to download .shp.zip data of the following geographic region(s):
Greater Manchester
West Yorkshire
? [No]|Yes: yes
Downloading "greater-manchester-latest-free.shp.zip" to "\\tests" ...
Done.
Downloading "west-yorkshire-latest-free.shp.zip" to "\\tests" ...
Done.
Extracting from "greater-manchester-latest-free.shp.zip" the following layer(s):
'railways'
to "\\tests\\greater-manchester-latest-free-shp" ...
In progress ... Done.
Extracting from "west-yorkshire-latest-free.shp.zip" the following layer(s):
'railways'
to "\\tests\\west-yorkshire-latest-free-shp" ...
In progress ... Done.
Merging the following shapefiles:
"greater-manchester_gis_osm_railways_free_1.shp"
"west-yorkshire_gis_osm_railways_free_1.shp"
In progress ... Done.
Find ... file(s) at "\\tests\\greater-manchester_west-yorkshire_railways".
>>> print(os.path.relpath(path_to_merged_shp_file))
tests\\...\\greater-manchester_west-yorkshire_railways.shp
>>> # Delete the merged files
>>> delete_dir(os.path.dirname(path_to_merged_shp_file), verbose=True)
The directory "\\tests\\greater-manchester_west-yorkshire_railways" is not empty.
Confirmed to delete it? [No]|Yes: yes
Deleting "\\tests\\greater-manchester_west-yorkshire_railways" ... Done.
>>> # Delete the downloaded .shp.zip data files
>>> os.remove(cd(dat_dir, "greater-manchester-latest-free.shp.zip"))
>>> os.remove(cd(dat_dir, "west-yorkshire-latest-free.shp.zip"))
>>> # To merge 'transport' of Greater London, Kent and Surrey
>>> lyr_name = 'transport'
>>> sr_names = ['London', 'Kent', 'Surrey']
>>> path_to_merged_shp_files = geofabrik_reader.merge_subregion_layer_shp(
... lyr_name, sr_names, dat_dir, verbose=True, ret_merged_shp_path=True)
Confirmed to download .shp.zip data of the following geographic region(s):
Greater London
Kent
Surrey
? [No]|Yes: yes
Downloading "greater-london-latest-free.shp.zip" to "\\tests" ...
Done.
Downloading "kent-latest-free.shp.zip" to "\\tests" ...
Done.
Downloading "surrey-latest-free.shp.zip" to "\\tests" ...
Done.
Extracting from "greater-london-latest-free.shp.zip" the following layer(s):
'transport'
to "\\tests\\greater-london-latest-free-shp" ...
In progress ... Done.
Extracting from "kent-latest-free.shp.zip" the following layer(s):
'transport'
to "\\tests\\kent-latest-free-shp" ...
In progress ... Done.
Extracting from "surrey-latest-free.shp.zip" the following layer(s):
'transport'
to "\\tests\\surrey-latest-free-shp" ...
In progress ... Done.
Merging the following shapefiles:
"greater-london_gis_osm_transport_a_free_1.shp"
"greater-london_gis_osm_transport_free_1.shp"
"kent_gis_osm_transport_a_free_1.shp"
"kent_gis_osm_transport_free_1.shp"
"surrey_gis_osm_transport_a_free_1.shp"
"surrey_gis_osm_transport_free_1.shp"
In progress ... Done.
Find the merged .shp file(s) at "\\tests\\greater-london_kent_surrey_transport".
>>> for path_to_merged_shp_file in path_to_merged_shp_files:
... print(os.path.relpath(path_to_merged_shp_file))
tests\\...\\greater-london_kent_surrey_transport_point.shp
tests\\...\\greater-london_kent_surrey_transport_polygon.shp
>>> # Delete the merged files
>>> delete_dir(os.path.commonpath(path_to_merged_shp_files), verbose=True)
The directory "\\tests\\greater-london_kent_surrey_transport" is not empty.
Confirmed to delete it? [No]|Yes: yes
Deleting "\\tests\\greater-london_kent_surrey_transport" ... Done.
>>> # Delete the downloaded .shp.zip data files
>>> os.remove(cd(dat_dir, "greater-london-latest-free.shp.zip"))
>>> os.remove(cd(dat_dir, "kent-latest-free.shp.zip"))
>>> os.remove(cd(dat_dir, "surrey-latest-free.shp.zip"))
"""
# Make sure all the required shape files are ready
layer_name_ = find_similar_str(layer_name, get_valid_shp_layer_names())
subregion_names_ = [
self.Downloader.validate_input_subregion_name(x) for x in subregion_names]
osm_file_format = ".shp.zip"
# Download the files (if not available)
paths_to_shp_zip_files = self.Downloader.download_osm_data(
subregion_names_, osm_file_format=osm_file_format, download_dir=data_dir,
update=update, confirmation_required=download_confirmation_required,
deep_retry=True, interval_sec=0, verbose=verbose, ret_download_path=True)
if all(os.path.isfile(path_to_shp_zip_file)
for path_to_shp_zip_file in paths_to_shp_zip_files):
path_to_merged_shp = merge_layer_shps(
paths_to_shp_zip_files, layer_name_, method=method,
rm_zip_extracts=rm_zip_extracts, merged_shp_dir=merged_shp_dir,
rm_shp_temp=rm_shp_temp, verbose=verbose,
ret_merged_shp_path=ret_merged_shp_path)
if ret_merged_shp_path:
return path_to_merged_shp
[docs] def read_shp_zip(self, subregion_name, layer_names=None, feature_names=None,
data_dir=None, update=False, download_confirmation_required=True,
pickle_it=False, ret_pickle_path=False, rm_extracts=False,
rm_shp_zip=False, verbose=False):
"""
Read a .shp.zip data file of a geographic region.
:param subregion_name: name of a region/subregion (case-insensitive) available
on Geofabrik's free download server
:type subregion_name: str
:param layer_names: name of a .shp layer, e.g. 'railways',
or names of multiple layers; if ``None`` (default), all available layers
:type layer_names: str or list or None
:param feature_names: name of a feature, e.g. 'rail',
or names of multiple features;
if ``None`` (default), all available features
:type feature_names: str or list or None
:param data_dir: directory where the .shp.zip data file is located/saved;
if ``None``, the default directory
:type data_dir: str or None
:param update: whether to check to update pickle backup (if available),
defaults to ``False``
:type update: bool
:param download_confirmation_required: whether to ask for confirmation
before starting to download a file, defaults to ``True``
:type download_confirmation_required: bool
:param pickle_it: whether to save the .shp data as a .pickle file,
defaults to ``False``
:type pickle_it: bool
:param ret_pickle_path: whether to return an absolute path to
the saved pickle file (when ``pickle_it=True``)
:type ret_pickle_path: bool
:param rm_extracts: whether to delete extracted files from the .shp.zip file,
defaults to ``False``
:type rm_extracts: bool
:param rm_shp_zip: whether to delete the downloaded .shp.zip file,
defaults to ``False``
:type rm_shp_zip: bool
:param verbose: whether to print relevant information in console
as the function runs, defaults to ``False``
:type verbose: bool or int
:return: dictionary of the shapefile data,
with keys and values being layer names and
tabular data (in the format of `geopandas.GeoDataFrame`_), respectively
:rtype: dict or None
.. _`geopandas.GeoDataFrame`: https://geopandas.org/reference.html#geodataframe
**Example**::
>>> from pydriosm.reader import GeofabrikReader
>>> geofabrik_reader = GeofabrikReader()
>>> sr_name = 'Rutland'
>>> dat_dir = "tests"
>>> rutland_shp = geofabrik_reader.read_shp_zip(sr_name, data_dir=dat_dir)
Confirmed to download .shp.zip data of the following geographic region(s):
Rutland
? [No]|Yes: yes
>>> print(list(rutland_shp.keys()))
['buildings',
'traffic',
'water',
'roads',
'places',
'pofw',
'waterways',
'pois',
'landuse',
'transport',
'natural',
'railways']
>>> rutland_shp_railways = rutland_shp['railways']
>>> print(rutland_shp_railways.head())
osm_id code ... tunnel geometry
0 2162114 6101 ... F LINESTRING (-0.45281 52.69934, -0.45189 52.698...
1 3681043 6101 ... F LINESTRING (-0.65312 52.57308, -0.65318 52.572...
2 3693985 6101 ... F LINESTRING (-0.73234 52.67821, -0.73191 52.678...
3 3693986 6101 ... F LINESTRING (-0.61731 52.61323, -0.62419 52.614...
4 4806329 6101 ... F LINESTRING (-0.45769 52.70352, -0.45654 52.702...
[5 rows x 8 columns]
>>> sr_layer = 'transport'
>>> rutland_shp_transport = geofabrik_reader.read_shp_zip(
... sr_name, sr_layer, data_dir=dat_dir, verbose=True, rm_extracts=True)
Deleting the extracts "\\tests\\rutland-latest-free-shp" ... Done.
>>> print(list(rutland_shp_transport.keys()))
['transport']
>>> print(rutland_shp_transport['transport'].head())
osm_id code fclass name geometry
0 472398147 5621 bus_stop None POINT (-0.73213 52.66974)
1 502322073 5621 bus_stop Fife Close POINT (-0.50962 52.66052)
2 502322075 5621 bus_stop Fife Close POINT (-0.50973 52.66058)
3 502322076 5621 bus_stop Aberdeen Close POINT (-0.51039 52.65817)
4 502322077 5621 bus_stop Arran Road (South End) POINT (-0.50973 52.65469)
>>> feat_name = 'bus_stop'
>>> rutland_shp_transport_bus_stop = geofabrik_reader.read_shp_zip(
... sr_name, sr_layer, feat_name, dat_dir, verbose=True, rm_extracts=True)
Extracting from "rutland-latest-free.shp.zip" the following layer(s):
'transport'
to "\\tests\\rutland-latest-free-shp" ...
In progress ... Done.
Deleting the extracts "\\tests\\rutland-latest-free-shp" ... Done.
>>> print(list(rutland_shp_transport_bus_stop.keys()))
['transport']
>>> print(rutland_shp_transport_bus_stop['transport'].fclass.unique())
['bus_stop']
>>> sr_layers = ['traffic', 'roads']
>>> feat_names = ['parking', 'trunk']
>>> rutland_shp_tr_pt = geofabrik_reader.read_shp_zip(
... sr_name, sr_layers, feat_name, dat_dir, verbose=True,
... rm_extracts=True, rm_shp_zip=True)
Extracting from "rutland-latest-free.shp.zip" the following layer(s):
'traffic'
'roads'
to "\\tests\\rutland-latest-free-shp" ...
In progress ... Done.
Deleting the extracts "\\tests\\rutland-latest-free-shp" ... Done.
Deleting "tests\\rutland-latest-free.shp.zip" ... Done.
>>> print(list(rutland_shp_tr_pt.keys()))
['traffic', 'roads']
>>> selected_columns = ['fclass', 'name', 'geometry']
>>> rutland_shp_tr_pt_traffic = rutland_shp_tr_pt['traffic']
>>> print(rutland_shp_tr_pt_traffic[selected_columns].head())
fclass name geometry
0 parking None POLYGON ((-0.66704 52.71108, -0.66670 52.71121...
1 parking None POLYGON ((-0.78712 52.71974, -0.78700 52.71991...
2 parking None POLYGON ((-0.70368 52.65567, -0.70362 52.65587...
3 parking None POLYGON ((-0.63381 52.66442, -0.63367 52.66441...
4 parking None POLYGON ((-0.62814 52.64093, -0.62701 52.64169...
>>> rutland_shp_tr_pt_roads = rutland_shp_tr_pt['roads']
>>> print(rutland_shp_tr_pt_roads[selected_columns].head())
fclass name geometry
0 trunk None LINESTRING (-0.72461 52.59642, -0.72452 52.596...
1 trunk Glaston Road LINESTRING (-0.64671 52.59353, -0.64590 52.593...
3 trunk Orange Street LINESTRING (-0.72293 52.58899, -0.72297 52.588...
11 trunk Ayston Road LINESTRING (-0.72483 52.59610, -0.72493 52.596...
12 trunk London Road LINESTRING (-0.72261 52.58759, -0.72264 52.587...
"""
osm_file_format = ".shp.zip"
shp_zip_filename, path_to_shp_zip = self.Downloader.get_default_path_to_osm_file(
subregion_name=subregion_name, osm_file_format=osm_file_format, mkdir=False)
if layer_names:
layer_names_ = [layer_names] if isinstance(layer_names, str) \
else layer_names.copy()
else:
layer_names_ = [] # get_valid_shp_layer_names()
if feature_names:
feature_names_ = [feature_names] if isinstance(feature_names, str) \
else feature_names.copy()
else:
feature_names_ = []
if shp_zip_filename and path_to_shp_zip:
path_to_extract_dir = os.path.splitext(path_to_shp_zip)[0].replace(".", "-")
if data_dir:
shp_zip_dir = validate_input_data_dir(data_dir)
path_to_shp_zip = cd(shp_zip_dir, shp_zip_filename)
path_to_extract_dir = cd(shp_zip_dir,
os.path.basename(path_to_extract_dir))
if layer_names_: # layer is not None
# Make a local path for saving a pickle file for .shp data
filename_ = shp_zip_filename.replace("-latest-free.shp.zip", "")
sub_fname = "-".join(
x for x in [filename_] + layer_names_ +
(feature_names_ if feature_names_ else []) if x)
path_to_shp_pickle = cd(os.path.dirname(path_to_extract_dir),
sub_fname + "-shp.pickle")
else:
path_to_shp_pickle = path_to_extract_dir + ".pickle"
if os.path.isfile(path_to_shp_pickle) and not update:
shp_data = load_pickle(path_to_shp_pickle)
if ret_pickle_path:
shp_data = shp_data, path_to_shp_pickle
else:
# Download the requested OSM file urlretrieve(download_url, file_path)
if not os.path.exists(path_to_extract_dir):
if not os.path.exists(path_to_shp_zip):
self.Downloader.download_osm_data(
subregion_name, osm_file_format=osm_file_format,
download_dir=data_dir, update=update,
confirmation_required=download_confirmation_required,
verbose=verbose)
unzip_shp_zip(path_to_shp_zip, path_to_extract_dir,
layer_names=layer_names_, verbose=verbose)
if not layer_names_:
layer_names_ = list(set(
[find_shp_layer_name(x)
for x in os.listdir(cd(path_to_extract_dir))
if x != 'README']))
else:
unavailable_layers = []
layer_names_temp_ = [find_shp_layer_name(x)
for x in os.listdir(cd(path_to_extract_dir))
if x != 'README']
layer_names_temp = list(set(layer_names_ + layer_names_temp_))
for lyr_name in layer_names_temp:
shp_filename = self.get_path_to_osm_shp(subregion_name,
layer_name=lyr_name,
data_dir=data_dir)
if not shp_filename:
unavailable_layers.append(lyr_name)
if unavailable_layers:
if not os.path.exists(path_to_shp_zip):
self.Downloader.download_osm_data(
subregion_name, osm_file_format=osm_file_format,
download_dir=data_dir, update=update,
confirmation_required=download_confirmation_required,
verbose=verbose)
unzip_shp_zip(path_to_shp_zip, path_to_extract_dir,
layer_names=unavailable_layers, verbose=verbose)
if not layer_names_:
layer_names_ = layer_names_temp
paths_to_layers_shp = [
glob.glob(
cd(path_to_extract_dir, r"gis_osm_{}_*.shp".format(layer_name)))
for layer_name in layer_names_]
paths_to_layers_shp = [x for x in paths_to_layers_shp if x]
shp_data_ = [parse_layer_shp(p, feature_names=feature_names_)
for p in paths_to_layers_shp]
shp_data = dict(zip(layer_names_, shp_data_))
if pickle_it:
save_pickle(shp_data, path_to_shp_pickle, verbose=verbose)
if ret_pickle_path:
shp_data = shp_data, path_to_shp_pickle
if os.path.exists(path_to_extract_dir) and rm_extracts:
if verbose:
print("Deleting the extracts \"\\{}\" ".format(
os.path.relpath(path_to_extract_dir)), end=" ... ")
try:
# for f in glob.glob(os.path.join(extract_dir, "gis_osm*")):
# # if layer not in f:
# os.remove(f)
shutil.rmtree(path_to_extract_dir)
print("Done. ") if verbose else ""
except Exception as e:
print("Failed. {}".format(e))
if os.path.isfile(path_to_shp_zip) and rm_shp_zip:
remove_subregion_osm_file(path_to_shp_zip, verbose=verbose)
else:
shp_data = None
return shp_data
[docs]class BBBikeReader:
"""
A class representation of a tool for reading BBBike data extracts.
:param max_tmpfile_size: defaults to ``5000``,
see also :py:func:`pydriosm.settings.gdal_configurations`
:type max_tmpfile_size: int or None
**Example**::
>>> from pydriosm.reader import BBBikeReader
>>> bbbike_reader = BBBikeReader()
>>> print(bbbike_reader.Name)
BBBike OpenStreetMap data extracts
"""
def __init__(self, max_tmpfile_size=5000):
"""
Constructor method.
"""
self.Downloader = BBBikeDownloader()
self.Name = copy.copy(self.Downloader.Name)
self.URL = copy.copy(self.Downloader.URL)
if max_tmpfile_size:
gdal_configurations(max_tmpfile_size=max_tmpfile_size)
[docs] def get_path_to_osm_file(self, subregion_name, osm_file_format, data_dir=None):
"""
Get the absolute path to an OSM data file (if available) of a specific file format
for a geographic region.
:param subregion_name: name of a geographic region (case-insensitive) available
on BBBike's free download server
:type subregion_name: str
:param osm_file_format: format (file extension) of an OSM data
:type osm_file_format: str
:param data_dir: directory where the data file is located/saved;
if ``None`` (default), the default directory
:type data_dir: str or None
:return: path to the data file
:rtype: str or None
**Example**::
>>> import os
>>> from pydriosm.reader import BBBikeReader
>>> bbbike_reader = BBBikeReader()
>>> sr_name = 'Leeds'
>>> file_fmt = ".pbf"
>>> dat_dir = "tests"
>>> path_to_leeds_pbf = bbbike_reader.Downloader.download_osm_data(
... sr_name, file_fmt, dat_dir, verbose=True, ret_download_path=True)
Confirmed to download .pbf data of the following geographic region(s):
Leeds
? [No]|Yes: yes
Downloading "Leeds.osm.pbf" to "\tests" ...
Done.
>>> path_to_leeds_pbf_ = bbbike_reader.get_path_to_osm_file(
... sr_name, file_fmt, dat_dir)
>>> print(os.path.relpath(path_to_leeds_pbf_))
tests\\Leeds.osm.pbf
>>> print(path_to_leeds_pbf == path_to_leeds_pbf_)
True
>>> # Delete the downloaded PBF data file
>>> os.remove(path_to_leeds_pbf_)
"""
_, _, _, path_to_file = self.Downloader.get_valid_download_info(
subregion_name, osm_file_format=osm_file_format, download_dir=data_dir)
return path_to_file
[docs] def read_osm_pbf(self, subregion_name, data_dir=None, chunk_size_limit=50,
parse_raw_feat=False, transform_geom=False,
transform_other_tags=False, update=False,
download_confirmation_required=True, pickle_it=False,
ret_pickle_path=False, rm_osm_pbf=False, verbose=False):
"""
Read a PBF data file of a geographic region.
:param subregion_name: name of a geographic region (case-insensitive) available
on BBBike's free download server
:type subregion_name: str
:param data_dir: directory where the PBF data file is saved;
if ``None`` (default), the default directory
:type data_dir: str or None
:param chunk_size_limit: threshold (in MB) that triggers the use of chunk parser,
defaults to ``50``; if the size of the .osm.pbf file (in MB) is greater than
``chunk_size_limit``, it will be parsed in a chunk-wise way
:type chunk_size_limit: int
:param parse_raw_feat: whether to parse each feature in the raw data,
defaults to ``False``
:type parse_raw_feat: bool
:param transform_geom: whether to transform a single coordinate
(or a collection of coordinates) into a geometric object,
defaults to ``False``
:type transform_geom: bool
:param transform_other_tags: whether to transform a ``'other_tags'`` into
a dictionary, defaults to ``False``
:type transform_other_tags: bool
:param update: whether to check to update pickle backup (if available),
defaults to ``False``
:type update: bool
:param download_confirmation_required: whether to ask for confirmation
before starting to download a file, defaults to ``True``
:type download_confirmation_required: bool
:param pickle_it: whether to save the .pbf data as a .pickle file,
defaults to ``False``
:type pickle_it: bool
:param ret_pickle_path: whether to return an absolute path to
the saved pickle file (when ``pickle_it=True``)
:type ret_pickle_path: bool
:param rm_osm_pbf: whether to delete the downloaded .osm.pbf file,
defaults to ``False``
:type rm_osm_pbf: bool
:param verbose: whether to print relevant information in console
as the function runs, defaults to ``False``
:type verbose: bool or int
:return: dictionary of the .osm.pbf data; when ``pickle_it=True``,
return a tuple of the dictionary and an absolute path to the pickle file
:rtype: dict or tuple or None
**Example**::
>>> import os
>>> from pyhelpers.dir import cd
>>> from pydriosm.reader import BBBikeReader
>>> bbbike_reader = BBBikeReader()
>>> sr_name = 'Leeds'
>>> dat_dir = "tests"
>>> # (Note that this process may take a long time.)
>>> leeds_osm_pbf = bbbike_reader.read_osm_pbf(sr_name, dat_dir,
... parse_raw_feat=True,
... transform_geom=True,
... transform_other_tags=True,
... verbose=True)
Parsing "\\tests\\Leeds.osm.pbf" ... Done.
>>> print(list(leeds_osm_pbf.keys()))
['points', 'lines', 'multilinestrings', 'multipolygons', 'other_relations']
>>> leeds_osm_pbf_multipolygons = leeds_osm_pbf['multipolygons']
>>> print(leeds_osm_pbf_multipolygons.head())
id coordinates ... other_tags
0 10595 (POLYGON ((-1.5030223 53.6725382, -1.5034495 5... ... None
1 10600 (POLYGON ((-1.5116994 53.6764287, -1.5099361 5... ... None
2 10601 (POLYGON ((-1.5142403 53.6710831, -1.5143686 5... ... None
3 10612 (POLYGON ((-1.5129341 53.6704885, -1.5131883 5... ... None
4 10776 (POLYGON ((-1.5523801 53.7029081, -1.5522831 5... ... None
[5 rows x 27 columns]
>>> # Delete the downloaded PBF data file
>>> os.remove(cd(data_dir, "Leeds.osm.pbf"))
"""
assert isinstance(chunk_size_limit, int) or chunk_size_limit is None
osm_file_format = ".osm.pbf"
path_to_osm_pbf = self.get_path_to_osm_file(subregion_name, osm_file_format,
data_dir)
path_to_pickle = path_to_osm_pbf.replace(
".osm.pbf", "-pbf.pickle" if parse_raw_feat else "-raw.pickle")
if os.path.isfile(path_to_pickle) and not update:
osm_pbf_data = load_pickle(path_to_pickle)
if ret_pickle_path:
osm_pbf_data = osm_pbf_data, path_to_pickle
else:
if not os.path.isfile(path_to_osm_pbf):
path_to_osm_pbf = self.Downloader.download_osm_data(
subregion_name, osm_file_format=osm_file_format,
download_dir=data_dir,
confirmation_required=download_confirmation_required, verbose=verbose,
ret_download_path=True)
if verbose and parse_raw_feat:
print("Parsing \"\\{}\"".format(os.path.relpath(path_to_osm_pbf)),
end=" ... ")
try:
number_of_chunks = get_number_of_chunks(path_to_osm_pbf,
chunk_size_limit=chunk_size_limit)
osm_pbf_data = parse_osm_pbf(path_to_osm_pbf,
number_of_chunks=number_of_chunks,
parse_raw_feat=parse_raw_feat,
transform_geom=transform_geom,
transform_other_tags=transform_other_tags)
print("Done. ") if verbose and parse_raw_feat else ""
if pickle_it:
save_pickle(osm_pbf_data, path_to_pickle, verbose=verbose)
if ret_pickle_path:
osm_pbf_data = osm_pbf_data, path_to_pickle
if rm_osm_pbf:
remove_subregion_osm_file(path_to_osm_pbf, verbose=verbose)
except Exception as e:
print("Failed. {}".format(e))
osm_pbf_data = None
return osm_pbf_data
[docs] def read_shp_zip(self, subregion_name, layer_names=None, feature_names=None,
data_dir=None, update=False, download_confirmation_required=True,
pickle_it=False, ret_pickle_path=False, rm_extracts=False,
rm_shp_zip=False, verbose=False):
"""
Read a shapefile of a geographic region.
:param subregion_name: name of a geographic region (case-insensitive) available
on BBBike's free download server
:type subregion_name: str
:param layer_names: name of a .shp layer, e.g. 'railways',
or names of multiple layers; if ``None`` (default), all available layers
:type layer_names: str or list or None
:param feature_names: name of a feature, e.g. 'rail',
or names of multiple features; if ``None`` (default), all available features
:type feature_names: str or list or None
:param data_dir: directory where the .shp.zip data file is located/saved;
if ``None``, the default directory
:type data_dir: str or None
:param update: whether to check to update pickle backup (if available),
defaults to ``False``
:type update: bool
:param download_confirmation_required: whether to ask for confirmation
before starting to download a file, defaults to ``True``
:type download_confirmation_required: bool
:param pickle_it: whether to save the .shp data as a .pickle file,
defaults to ``False``
:type pickle_it: bool
:param ret_pickle_path: whether to return an absolute path to
the saved pickle file (when ``pickle_it=True``)
:type ret_pickle_path: bool
:param rm_extracts: whether to delete extracted files from the .shp.zip file,
defaults to ``False``
:type rm_extracts: bool
:param rm_shp_zip: whether to delete the downloaded .shp.zip file,
defaults to ``False``
:type rm_shp_zip: bool
:param verbose: whether to print relevant information in console
as the function runs, defaults to ``False``
:type verbose: bool or int
:return: dictionary of the shapefile data, with keys and values being layer names
and tabular data (in the format of `geopandas.GeoDataFrame`_), respectively;
when ``pickle_it=True``, return a tuple of the dictionary and an absolute path
to the pickle file
:rtype: dict or tuple or None
.. _`geopandas.GeoDataFrame`: https://geopandas.org/reference.html#geodataframe
**Examples**::
>>> import os
>>> from pydriosm.reader import BBBikeReader
>>> bbbike_reader = BBBikeReader()
>>> sr_name = 'Birmingham'
>>> dat_dir = "tests"
>>> birmingham_shp = bbbike_reader.read_shp_zip(sr_name, data_dir=dat_dir,
... verbose=True)
Confirmed to download .shp.zip data of the following geographic region(s):
Birmingham
? [No]|Yes: yes
Downloading "Birmingham.osm.shp.zip" to "\\tests" ...
Done.
Extracting all of "Birmingham.osm.shp.zip" to "\\tests" ...
In progress ... Done.
Parsing "\\tests\\Birmingham-shp\\shape" ... Done.
>>> print(list(birmingham_shp.keys()))
['buildings',
'landuse',
'natural',
'places',
'points',
'pofw',
'pois',
'railways']
>>> birmingham_railways_shp = birmingham_shp['railways']
>>> print(birmingham_railways_shp.head())
osm_id ... geometry
0 740 ... LINESTRING (-1.81789 52.57010, -1.81793 52.569...
1 2148 ... LINESTRING (-1.87319 52.50555, -1.87271 52.505...
2 2950000 ... LINESTRING (-1.87941 52.48138, -1.87960 52.481...
3 3491845 ... LINESTRING (-1.74060 52.51858, -1.73942 52.518...
4 3981454 ... LINESTRING (-1.77475 52.52284, -1.77449 52.522...
[5 rows x 4 columns]
>>> layer_name = 'roads'
>>> feat_name = None
>>> birmingham_roads_shp = bbbike_reader.read_shp_zip(sr_name, layer_name,
... feat_name, dat_dir,
... rm_extracts=True,
... verbose=True)
Parsing "\\tests\\Birmingham-shp\\shape\\roads.shp" ... Done.
Deleting the extracts "\\tests\\Birmingham-shp" ... Done.
>>> print(list(birmingham_roads_shp.keys()))
['roads']
>>> print(birmingham_roads_shp['roads'].head())
osm_id ... geometry
0 37 ... LINESTRING (-1.82675 52.55580, -1.82646 52.555...
1 38 ... LINESTRING (-1.81541 52.54785, -1.81475 52.547...
2 41 ... LINESTRING (-1.81931 52.55219, -1.81860 52.552...
3 42 ... LINESTRING (-1.82492 52.55504, -1.82309 52.556...
4 45 ... LINESTRING (-1.82121 52.55389, -1.82056 52.55432)
[5 rows x 8 columns]
>>> lyr_names = ['railways', 'waterways']
>>> feat_names = ['rail', 'canal']
>>> bham_rw_rc_shp = bbbike_reader.read_shp_zip(
... sr_name, lyr_names, feat_names, dat_dir, rm_extracts=True,
... rm_shp_zip=True, verbose=True)
Extracting from "Birmingham.osm.shp.zip" the following layer(s):
'railways'
'waterways'
to "\\tests" ...
In progress ... Done.
Parsing "\\tests\\Birmingham-shp\\shape" ... Done.
Deleting the extracts "\\tests\\Birmingham-shp" ... Done.
Deleting "tests\\Birmingham.osm.shp.zip" ... Done.
>>> print(list(bham_rw_rc_shp.keys()))
['railways', 'waterways']
>>> bham_rw_rc_shp_railways = bham_rw_rc_shp['railways']
>>> print(bham_rw_rc_shp_railways[['fclass', 'name']].head())
fclass name
0 rail Cross-City Line
1 rail Cross-City Line
2 rail None
3 rail Birmingham to Peterborough Line
4 rail Freight Line
>>> bham_rw_rc_shp_waterways = bham_rw_rc_shp['waterways']
>>> print(bham_rw_rc_shp_waterways[['fclass', 'name']].head())
fclass name
2 canal Birmingham and Fazeley Canal
8 canal Birmingham and Fazeley Canal
9 canal Birmingham Old Line Canal Navigations - Rotton P
10 canal Oozells Street Loop
11 canal Worcester & Birmingham Canal
"""
osm_file_format = ".shp.zip"
path_to_shp_zip = self.get_path_to_osm_file(subregion_name, osm_file_format,
data_dir)
path_to_extract_dir, shp_zip_filename = os.path.split(path_to_shp_zip)
path_to_extract_dir_ = os.path.splitext(path_to_shp_zip)[0].replace(".osm.", "-")
if layer_names:
layer_names_ = [layer_names] if isinstance(layer_names, str) \
else layer_names.copy()
else:
layer_names_ = [] # get_valid_shp_layer_names()
if feature_names:
feature_names_ = [feature_names] if isinstance(feature_names, str) \
else feature_names.copy()
else:
feature_names_ = []
if layer_names_: # layer is not None
# Make a local path for saving a pickle file for .shp data
filename_ = shp_zip_filename.replace(".osm.shp.zip", "").lower()
sub_fname = "-".join(
x for x in [filename_] + layer_names_ +
(feature_names_ if feature_names_ else []) if x)
path_to_shp_pickle = cd(os.path.dirname(path_to_extract_dir_),
sub_fname + "-shp.pickle")
else:
path_to_shp_pickle = path_to_extract_dir_ + ".pickle"
if os.path.isfile(path_to_shp_pickle) and not update:
shp_data = load_pickle(path_to_shp_pickle)
if ret_pickle_path:
shp_data = shp_data, path_to_shp_pickle
else:
try:
# Download the requested OSM file urlretrieve(download_url, file_path)
if not os.path.exists(path_to_extract_dir_):
if not os.path.exists(path_to_shp_zip):
self.Downloader.download_osm_data(
subregion_name, osm_file_format=osm_file_format,
download_dir=data_dir, update=update,
confirmation_required=download_confirmation_required,
verbose=verbose)
unzip_shp_zip(path_to_shp_zip, path_to_extract_dir,
layer_names=layer_names_, verbose=verbose)
if not layer_names_:
layer_names_ = list(set(
[x.rsplit(".", 1)[0]
for x in os.listdir(cd(path_to_extract_dir_, "shape"))]))
else:
unavailable_layers = []
layer_names_temp_ = [
x.rsplit(".", 1)[0]
for x in os.listdir(cd(path_to_extract_dir_, "shape"))]
layer_names_temp = list(set(layer_names_ + layer_names_temp_))
for lyr_name in layer_names_temp:
shp_filename = cd(path_to_extract_dir_, "shape",
f"{lyr_name}.shp")
if not os.path.isfile(shp_filename):
unavailable_layers.append(lyr_name)
if unavailable_layers:
if not os.path.exists(path_to_shp_zip):
self.Downloader.download_osm_data(
subregion_name, osm_file_format=osm_file_format,
download_dir=data_dir, update=update,
confirmation_required=download_confirmation_required,
verbose=verbose)
unzip_shp_zip(path_to_shp_zip, path_to_extract_dir,
layer_names=unavailable_layers, verbose=verbose)
if not layer_names_:
layer_names_ = layer_names_temp
paths_to_layers_shp = [
glob.glob(cd(path_to_extract_dir_, "shape", f"{lyr_name}.shp"))
for lyr_name in layer_names_]
paths_to_layers_shp = [x for x in paths_to_layers_shp if x]
if verbose:
files_dir = os.path.relpath(os.path.commonpath(
itertools.chain.from_iterable(paths_to_layers_shp)))
print("Parsing \"\\{}\"".format(files_dir), end=" ... ")
shp_data_ = [parse_layer_shp(p, feature_names=feature_names_)
for p in paths_to_layers_shp]
shp_data = dict(zip(layer_names_, shp_data_))
print("Done. ") if verbose else ""
if pickle_it:
save_pickle(shp_data, path_to_shp_pickle, verbose=verbose)
if ret_pickle_path:
shp_data = shp_data, path_to_shp_pickle
if rm_extracts and os.path.exists(path_to_extract_dir_):
if verbose:
print("Deleting the extracts \"\\{}\"".format(
os.path.relpath(path_to_extract_dir_)), end=" ... ")
try:
# for f in glob.glob(os.path.join(extract_dir, "gis_osm*")):
# # if layer not in f:
# os.remove(f)
shutil.rmtree(path_to_extract_dir_)
print("Done. ") if verbose else ""
except Exception as e:
print("Failed. {}".format(e))
if rm_shp_zip and os.path.isfile(path_to_shp_zip):
remove_subregion_osm_file(path_to_shp_zip, verbose=verbose)
except Exception as e:
print("Failed. {}".format(e))
shp_data = None
return shp_data
[docs] def read_csv_xz(self, subregion_name, data_dir=None,
download_confirmation_required=True, verbose=False):
"""
Read a compressed CSV (.csv.xz) data file of a geographic region.
:param subregion_name: name of a geographic region (case-insensitive) available
on BBBike's free download server
:type subregion_name: str
:param data_dir: directory where the .csv.xz data file is located/saved;
if ``None`` (default), the default directory
:type data_dir: str or None
:param download_confirmation_required: whether to ask for confirmation
before starting to download a file, defaults to ``True``
:type download_confirmation_required: bool
:param verbose: whether to print relevant information in console
as the function runs, defaults to ``False``
:type verbose: bool or int
:return: tabular data of the .csv.xz file
:rtype: pandas.DataFrame or None
.. _pydriosm-BBBikeReader-read_csv_xz:
**Example**::
>>> import os
>>> from pyhelpers.dir import cd
>>> from pydriosm.reader import BBBikeReader
>>> bbbike_reader = BBBikeReader()
>>> sr_name = 'Leeds'
>>> dat_dir = "tests"
>>> leeds_csv_xz = bbbike_reader.read_csv_xz(sr_name, dat_dir, verbose=True)
Confirmed to download .csv.xz data of the following geographic region(s):
Leeds
? [No]|Yes: yes
Downloading "Leeds.osm.csv.xz" to "\\tests" ...
Done.
Parsing "\\tests\\Leeds.osm.csv.xz" ... Done.
>>> print(leeds_csv_xz.head())
type id feature
0 node 154915 None
1 node 154916 None
2 node 154921 None
3 node 154922 None
4 node 154923 None
>>> # Delete the downloaded .csv.xz data file
>>> os.remove(cd(dat_dir, "Leeds.osm.csv.xz"))
"""
subregion_name_ = self.Downloader.validate_input_subregion_name(subregion_name)
osm_file_format = ".csv.xz"
path_to_csv_xz = self.get_path_to_osm_file(subregion_name_, osm_file_format,
data_dir)
if not os.path.isfile(path_to_csv_xz):
path_to_csv_xz = self.Downloader.download_osm_data(
subregion_name_, osm_file_format=osm_file_format, download_dir=data_dir,
confirmation_required=download_confirmation_required, verbose=verbose,
ret_download_path=True)
if verbose:
print("Parsing \"\\{}\"".format(os.path.relpath(path_to_csv_xz)), end=" ... ")
try:
csv_xz_data = parse_csv_xz(path_to_csv_xz)
print("Done. ") if verbose else ""
except Exception as e:
print("Failed. {}".format(e))
csv_xz_data = None
return csv_xz_data
[docs] def read_geojson_xz(self, subregion_name, data_dir=None, fmt_geom=False,
download_confirmation_required=True, verbose=False):
"""
Read a .geojson.xz data file of a geographic region.
:param subregion_name: name of a region/subregion (case-insensitive) available
on BBBike's free download server
:type subregion_name: str
:param data_dir: directory where the .geojson.xz data file is located/saved;
if ``None`` (default), the default directory
:type data_dir: str or None
:param fmt_geom: whether to reformat coordinates into a geometric object,
defaults to ``False``
:type fmt_geom: bool
:param download_confirmation_required: whether to ask for confirmation
before starting to download a file, defaults to ``True``
:type download_confirmation_required: bool
:param verbose: whether to print relevant information in console
as the function runs, defaults to ``False``
:type verbose: bool or int
:return: tabular data of the .csv.xz file
:rtype: pandas.DataFrame or None
.. _pydriosm-BBBikeReader-read_geojson_xz:
**Examples**::
>>> import os
>>> from pyhelpers.dir import cd
>>> from pydriosm.reader import BBBikeReader
>>> bbbike_reader = BBBikeReader()
>>> sr_name = 'Leeds'
>>> dat_dir = "tests"
>>> leeds_geojson_xz = bbbike_reader.read_geojson_xz(sr_name, dat_dir,
... verbose=True)
Confirmed to download .geojson.xz data of the following geographic region(s):
Leeds
? [No]|Yes: yes
Downloading "Leeds.osm.geojson.xz" to "\\tests" ...
Done.
Parsing "\\tests\\Leeds.osm.geojson.xz" ... Done.
>>> print(leeds_geojson_xz.head())
feature_name ... properties
0 Feature ... {'ref': '40', 'name': 'Flushdyke', 'highway': ...
1 Feature ... {'ref': '44', 'name': 'Bramham', 'highway': 'm...
2 Feature ... {'ref': '43', 'name': 'Belle Isle', 'highway':...
3 Feature ... {'ref': '42', 'name': 'Lofthouse', 'highway': ...
4 Feature ... {'ref': '42', 'name': 'Lofthouse', 'highway': ...
[5 rows x 4 columns]
>>> print(leeds_geojson_xz[['coordinates']].head())
coordinates
0 [-1.5558097, 53.6873431]
1 [-1.34293, 53.844618]
2 [-1.517335, 53.7499667]
3 [-1.514124, 53.7416937]
4 [-1.516511, 53.7256632]
>>> leeds_geojson_xz_ = bbbike_reader.read_geojson_xz(sr_name, dat_dir,
... fmt_geom=True)
>>> print(leeds_geojson_xz_[['coordinates']].head())
coordinates
0 POINT (-1.5558097 53.6873431)
1 POINT (-1.34293 53.844618)
2 POINT (-1.517335 53.7499667)
3 POINT (-1.514124 53.7416937)
4 POINT (-1.516511 53.7256632)
>>> # Delete the downloaded .csv.xz data file
>>> os.remove(cd(dat_dir, "Leeds.osm.geojson.xz"))
"""
subregion_name_ = self.Downloader.validate_input_subregion_name(subregion_name)
osm_file_format = ".geojson.xz"
path_to_geojson_xz = self.get_path_to_osm_file(subregion_name_, osm_file_format,
data_dir)
if not os.path.isfile(path_to_geojson_xz):
path_to_geojson_xz = self.Downloader.download_osm_data(
subregion_name_, osm_file_format=osm_file_format, download_dir=data_dir,
confirmation_required=download_confirmation_required, verbose=verbose,
ret_download_path=True)
if verbose:
print("Parsing \"\\{}\"".format(os.path.relpath(path_to_geojson_xz)),
end=" ... ")
try:
geojson_xz_data = parse_geojson_xz(path_to_geojson_xz, fmt_geom=fmt_geom)
print("Done. ") if verbose else ""
except Exception as e:
print("Failed. {}".format(e))
geojson_xz_data = None
return geojson_xz_data