SHPReadParse.read_shp

classmethod SHPReadParse.read_shp(shp_pathname, engine='pyshp', emulate_gpd=False, **kwargs)[source]

Read a shapefile.

Parameters:
  • shp_pathname (str) – pathname of a shape format file (.shp)

  • engine (str) – method used to read shapefiles; options include: 'pyshp' (default) and 'geopandas' (or 'gpd') this function by default relies on shapefile.reader(); when engine='geopandas' (or engine='gpd'), it relies on geopandas.read_file();

  • emulate_gpd (bool) – whether to emulate the data format produced by geopandas.read_file() when engine='pyshp'.

  • kwargs – [optional] parameters of the function geopandas.read_file() or shapefile.reader()

Returns:

data frame of the shapefile data

Return type:

pandas.DataFrame | geopandas.GeoDataFrame

Note

  • If engine is set to be 'geopandas' (or 'gpd'), it requires that

    GeoPandas is installed.

Examples:

>>> from pydriosm.reader import SHPReadParse
>>> from pydriosm.downloader import GeofabrikDownloader
>>> from pyhelpers.dirs import cd, delete_dir
>>> import os
>>> import glob

>>> # Download the shapefile data of London as an example
>>> subrgn_name = 'london'
>>> file_format = ".shp"
>>> dwnld_dir = "tests\osm_data"

>>> gfd = GeofabrikDownloader()

>>> gfd.download_osm_data(subrgn_name, file_format, dwnld_dir, verbose=True)
To download .shp.zip data of the following geographic (sub)region(s):
    Greater London
? [No]|Yes: yes
Downloading "greater-london-latest-free.shp.zip"
    to "tests\osm_data\greater-london\" ... Done.

>>> london_shp_zip = gfd.data_paths[0]
>>> os.path.relpath(london_shp_zip)
'tests\osm_data\greater-london\greater-london-latest-free.shp.zip'

>>> # Extract all
>>> london_shp_dir = SHPReadParse.unzip_shp_zip(london_shp_zip, ret_extract_dir=True)

>>> # Get the pathname of the .shp data of 'railways'
>>> path_to_railways_shp = glob.glob(cd(london_shp_dir, "*railways*.shp"))[0]
>>> os.path.relpath(path_to_railways_shp)  # Check the pathname of the .shp file
'tests\osm_data\greater-london\greater-london-latest-free-shp\gis_osm_railwa...

>>> # Read the data of 'railways'
>>> london_railways = SHPReadParse.read_shp(path_to_railways_shp)
>>> london_railways.head()
   osm_id  code  ...                                        coordinates shape_type
0   30804  6101  ...  [(0.0048644, 51.6279262), (0.0061979, 51.62926...          3
1  101298  6103  ...  [(-0.2249906, 51.493682), (-0.2251678, 51.4945...          3
2  101486  6103  ...  [(-0.2055497, 51.5195429), (-0.2051377, 51.519...          3
3  101511  6101  ...  [(-0.2119027, 51.5241906), (-0.2108059, 51.523...          3
4  282898  6103  ...  [(-0.1862586, 51.6159083), (-0.1868721, 51.613...          3
[5 rows x 9 columns]

>>> # Set `emulate_gpd=True` to return data of similar format to what GeoPandas does
>>> london_railways = SHPReadParse.read_shp(path_to_railways_shp, emulate_gpd=True)
>>> london_railways.head()
   osm_id  code  ... tunnel                                           geometry
0   30804  6101  ...      F  LINESTRING (0.0048644 51.6279262, 0.0061979 51...
1  101298  6103  ...      F  LINESTRING (-0.2249906 51.493682, -0.2251678 5...
2  101486  6103  ...      F  LINESTRING (-0.2055497 51.5195429, -0.2051377 ...
3  101511  6101  ...      F  LINESTRING (-0.2119027 51.5241906, -0.2108059 ...
4  282898  6103  ...      F  LINESTRING (-0.1862586 51.6159083, -0.1868721 ...
[5 rows x 8 columns]

>>> # Alternatively, set `engine` to be 'geopandas' (or 'gpd') to use GeoPandas
>>> london_railways_ = SHPReadParse.read_shp(path_to_railways_shp, engine='geopandas')
>>> london_railways_.head()
   osm_id  code  ... tunnel                                           geometry
0   30804  6101  ...      F    LINESTRING (0.00486 51.62793, 0.00620 51.62927)
1  101298  6103  ...      F  LINESTRING (-0.22499 51.49368, -0.22517 51.494...
2  101486  6103  ...      F  LINESTRING (-0.20555 51.51954, -0.20514 51.519...
3  101511  6101  ...      F  LINESTRING (-0.21190 51.52419, -0.21081 51.523...
4  282898  6103  ...      F  LINESTRING (-0.18626 51.61591, -0.18687 51.61384)
[5 rows x 8 columns]

>>> # Check the data types of `london_railways` and `london_railways_`
>>> railways_data = [london_railways, london_railways_]
>>> list(map(type, railways_data))
[pandas.core.frame.DataFrame, geopandas.geodataframe.GeoDataFrame]
>>> # Check the geometry data of `london_railways` and `london_railways_`
>>> geom1, geom2 = map(lambda x: x['geometry'].map(lambda y: y.wkt), railways_data)
>>> geom1.equals(geom2)
True

>>> # Delete the download/data directory
>>> delete_dir(gfd.download_dir, verbose=True)
To delete the directory "tests\osm_data\" (Not empty)
? [No]|Yes: yes
Deleting "tests\osm_data\" ... Done.