Source code for dave_data.datapool.osm_request

from collections import namedtuple
from time import sleep
from urllib.parse import urlencode
from urllib.request import urlopen

import pandas as pd
from defusedxml.ElementTree import fromstring
from geopandas import GeoDataFrame
from pandas import DataFrame
from pandas import concat
from pandas import to_datetime
from shapely.geometry import LineString
from shapely.geometry import Point
from six import string_types

from dave_data.core import Data
from dave_data.core import MetaData


def osm_settings():
    """
    This function returns a dictonary with the DaVe settings for used data and
    assumptions
    """
    settings = {
        # osm time delay (because osm doesn't alowed more than 1 request per
        # second)
        "osm_time_delay": 60,  # in seconds
        # osm considered area (data for this area will be downloaded and
        # impplemented in database)
        "osm_area": "germany",
        # osm tags: (type: (osm key, osm tags, osm type, parameter))
        "osm_tags": {
            "road": (
                "highway",
                [
                    "secondary",
                    "tertiary",
                    "unclassified",
                    "residential",
                    "living_street",
                    "footway",
                    "track",
                    "path",
                ],
                ["way"],
                ["geometry", "name", "highway", "surface"],
                "id",
            ),
            "road_plot": (
                "highway",
                ["motorway", "trunk", "primary"],
                ["way"],
                ["geometry", "name", "id", "surface"],
            ),
            "landuse": (
                "landuse",
                True,
                ["way", "relation"],
                ["landuse", "geometry", "name", "id", "surface"],
            ),
            "leisure": (
                "leisure",
                ["golf_course", "garden", "park"],
                ["way", "relation"],
                [
                    "leisure",
                    "landuse",
                    "natural",
                    "name",
                    "geometry",
                    "id",
                    "surface",
                ],
            ),
            "natural": (
                "natural",
                ["scrub", "grassland", "water", "wood"],
                ["way", "relation"],
                [
                    "natural",
                    "landuse",
                    "leisure",
                    "name",
                    "geometry",
                    "id",
                    "surface",
                ],
            ),
            "building": (
                "building",
                True,
                ["way"],
                [
                    "addr:housenumber",
                    "addr:street",
                    "addr:suburb",
                    "amenity",
                    "building",
                    "building:levels",
                    "geometry",
                    "name",
                    "id",
                ],
            ),
            "railway": (
                "railway",
                [
                    "construction",
                    "disused",
                    "light_rail",
                    "monorail",
                    "narrow_gauge",
                    "rail",
                    "subway",
                    "tram",
                ],
                ["way"],
                [
                    "name",
                    "railway",
                    "geometry",
                    "tram",
                    "train",
                    "usage",
                    "voltage",
                    "id",
                ],
            ),
            "waterway": (
                "waterway",
                [
                    "river",
                    "stream",
                    "canal",
                    "tidal_channel ",
                    "pressurised",
                    "drain",
                ],
                ["way"],
                ["name", "waterway", "geometry", "depth", "width", "id"],
            ),
        },
        # osm categories
        "buildings_residential": [
            "apartments",
            "detached",
            "dormitory",
            "dwelling_house",
            "farm",
            "house",
            "houseboat",
            "residential",
            "semidetached_house",
            "static_caravan",
            "terrace",
            "yes",
        ],
        "buildings_commercial": [
            "commercial",
            "hall",
            "industrial",
            "kindergarten",
            "kiosk",
            "office",
            "retail",
            "school",
            "supermarket",
            "warehouse",
        ],
        # --- assumptions at power grid generating:
        # mv level
        "mv_voltage": 20,
        # hours per year
        "h_per_a": 8760,
        # power factors for loads
        "cos_phi_residential": 0.95,  # induktiv
        "cos_phi_industrial": 0.75,  # induktiv
        "cos_phi_commercial": 0.75,  # induktiv
        # avarage load values for ehv, hv, and mv loads
        "residential_load": 2,  # in MW/km²
        "industrial_load": 10,  # in MW/km²
        "commercial_load": 3,  # in MW/km²
        # --- assumptions at pandapower convert:
        # lines standard types
        # dummy value, must be changed
        "mv_line_std_type": "NA2XS2Y 1x240 RM/25 12/20 kV",
        "lv_line_std_type": "NAYY 4x150 SE",  # dummy value, must be changed
        # trafo parameters for ehv/ehv and  ehv/hv. The dummy values are
        # based on the pandapower
        # standarttype "160 MVA 380/110 kV" which is the biggest model
        "trafo_vkr_percent": 0.25,  # dummy value
        "trafo_vk_percent": 12.2,  # dummy value
        "trafo_pfe_kw": 60,  # dummy value
        "trafo_i0_percent": 0.06,  # dummy value
        # trafo standard types
        # dummy value, must be changed
        "hvmv_trafo_std_type": "63 MVA 110/20 kV",
        # dummy value, must be changed
        "mvlv_trafo_std_type": "0.63 MVA 20/0.4 kV",
        # --- assumptions at gas grid generating:
        # hp level
        "hp_nodes_height_m": 1,  # dummy value, must be changed
        # value based on shutterwald data, must be changed
        "hp_pipes_k_mm": 0.1,
        "hp_pipes_tfluid_k": 273.15,  # dummy value , must be changed
        # --- assumptions at model utils:
        "min_number_nodes": 4,
    }
    return settings



[docs]
def osm_request(data_type, area):
    """
    This function requests OSM data from database or OSM directly

    Examples
    --------
    >>> from shapely import box
    >>> streets = osm_request("road", box(13.409, 52.519, 13.41, 52.52))
    >>> len(streets.data) > 0
    True

    """
    data_param = osm_settings()["osm_tags"][data_type]
    request_data = GeoDataFrame([])
    meta_data = None
    data = GeoDataFrame
    for osm_type in data_param[2]:
        # create tags
        tags = (
            f'{data_param[0]}~"{"|".join(data_param[1])}"'
            if isinstance(data_param[1], list)
            else f"{data_param[0]}"
        )
        # get data from OSM directly via API query
        data, meta_data = query_osm(osm_type, area, recurse="down", tags=tags)
        request_data = concat([request_data, data], ignore_index=True)
    meta = MetaData(
        source_license="ODBL", source_date=None, organisation="OpenStreetMap"
    )
    return Data(
        name="OSM roads filtered",
        description="Some description",
        data=data,
        meta=meta,
        polygon=area,
        tags=["roads", "osm"],
    )



# --- request directly from OSM via Overpass API and geopandas_osm package

# This functions are based on the geopandas_osm python package, which was
# published under the # following license:

# The MIT License (MIT)

# Copyright (c) 2014 Jacob Wasserman

# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:

# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.

# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.


OSMData = namedtuple(
    "OSMData", ("nodes", "waynodes", "waytags", "relmembers", "reltags")
)
_crs = "epsg:4326"

# Tags to remove so we don't clobber the output. This list comes from
# osmtogeojson's index.js (https://github.com/tyrasd/osmtogeojson)
uninteresting_tags = {
    "source",
    "source_ref",
    "source:ref",
    "history",
    "attribution",
    "created_by",
    "tiger:county",
    "tiger:tlid",
    "tiger:upload_uuid",
}


# http://wiki.openstreetmap.org/wiki/Overpass_API/Language_Guide
def query_osm(
    typ, bbox=None, recurse=None, tags="", raw=False, meta=False, **kwargs
):
    """
    Query the Overpass API to obtain OpenStreetMap data.

    See also:
    http://wiki.openstreetmap.org/wiki/Overpass_API/Language_Guide

    The OSM XML data is parsed into an intermediate set of DataFrames.
    By passing in 'render=False', this will return these DataFrames stored
    as the OSMData namedtuple. If render is True, then the DataFrames
    are built into their corresponding geometries.

    Parameters
    ----------
    typ : {'node', 'way', 'relation'}
        The type of OSM data to query
    bbox : (min lon, min lat, max lon, max lat) bounding box
        Optional bounding box to restrict the query. Unless the query
        is extremely restricted, you usually want to specify this.
        It can be retrieved from GeoPandas objects as 'df.total_bounds' or
        from Shapely objects as 'geom.bounds'
    recurse : {'up, 'down', 'uprel', 'downrel'}
        This is used to get more data than the original query. If 'typ' is
        'way', you'll usually want this set to 'down' which grabs all nodes
        of the matching ways
    tags : string or list of query strings
        See also the OverpassQL (referenced above) for more tag options
        Examples:
            tags='highway'
                Matches objects with a 'highway' tag
            tags='highway=motorway' <-- Matches ob
                Matches objects where the 'highway' tag is 'motorway'
            tags='name~[Mm]agazine'
                Match if the 'name' tag matches the regular expression

            Specify a list of tag requests to match all of them
            tags=['highway', 'name~"^Magazine"']
                Match tags that have 'highway' and where 'name' starts
                with 'Magazine'
    raw : boolean, default False
        Return the raw XML data returned by the request
    meta : boolean, default False
        Indicates whether to query the metadata with each OSM object. This
        includes the changeset, timestamp, uid, user, and version.

    Returns
    -------
    df - GeoDataFrame
    Note that there's probably a bit more filtering required to get the
    exact desired data. For example if you only want ways, you may want
    to grab only the linestrings like:

    Examples
    --------
    >>> #  df = df[df.type == 'LineString']

    """
    url = _build_url(typ, bbox, recurse, tags, meta)
    # add time delay because osm doesn't alowed more than 1 request per second.
    time_delay = osm_settings()["osm_time_delay"]

    # TODO: Raise on non-200 (or 400-599)
    # with urlopen(url) as response:
    #     content = response.read()
    while 1:
        try:
            if not url.startswith(("http:", "https:")):
                raise ValueError("URL must start with 'http:' or 'https:'")

            with urlopen(url) as response:  # noqa: S310
                content = response.read()
                if response.getcode() == 200:
                    break
        except Exception as inst:
            print(f'\n Retry OSM query because of "{inst}"')
            # add time delay
            sleep(time_delay)

    # get meta informations
    meta_data = pd.Series({"meta": "coming soon"})

    if raw:
        return content, meta_data
    return read_osm(content, **kwargs), meta_data


def _build_url(typ, bbox=None, recurse=None, tags="", meta=False):
    recurse_map = {
        "up": "<",
        "uprel": "<<",
        "down": ">",
        "downrel": ">>",
    }
    if recurse is None:
        recursestr = ""
    else:
        try:
            recursestr = recurse_map[recurse]
        except KeyError as k_exception:
            raise ValueError(
                "Unrecognized recurse value '{}'. "
                "Must be one of: {}.".format(
                    recurse, ", ".join(recurse_map.keys())
                )
            ) from k_exception

    # Allow tags to be a single string
    if isinstance(tags, string_types) and tags:
        tags = [tags]
    queries = "".join(f"[{t}]" for t in tags)

    # Overpass QL takes the bounding box as
    # (min latitude, min longitude, max latitude, max longitude)
    if bbox is None:
        bboxstr = ""
    else:
        bboxstr = '(poly:"{}")'.format(
            " ".join(f"{c[1]} {c[0]}" for c in bbox.exterior.coords)
        )

    metastr = "meta" if meta else ""

    query = f"({typ}{bboxstr}{queries};{recursestr};);out {metastr};"

    url = "".join(
        [
            "http://www.overpass-api.de/api/interpreter?",
            urlencode({"data": query}),
        ]
    )

    return url


def read_osm(content, render=True, **kwargs):
    """
    Parse OSM XML data and store as several DataFrames. Optionally "render"
    the DataFrames to GeoDataFrames.

    """
    doc = fromstring(content)

    nodes = read_nodes(doc)
    waynodes, waytags = read_ways(doc)
    relmembers, reltags = read_relations(doc)

    # check if all requested variables are empty
    # if nodes.empty and waynodes.empty and waytags.empty and relmembers.empty
    # and reltags.empty:

    data = OSMData(nodes, waynodes, waytags, relmembers, reltags)

    if render:
        data = render_to_gdf(data, **kwargs)
    return data


def read_nodes(doc):
    #   Example:
    #   <node id="1705717514" lat="42.3630798" lon="-71.0997601">
    #       <tag k="crossing" v="zebra"/>
    #       <tag k="highway" v="crossing"/>
    #       <tag k="source" v="Bing"/>
    #   </node>
    nodes = [_element_to_dict(xmlnode) for xmlnode in doc.findall("node")]
    nodes = _dict_to_dataframe(nodes)
    if not nodes.empty:
        nodes["lon"] = nodes["lon"].astype(float)
        nodes["lat"] = nodes["lat"].astype(float)

    return nodes


def _element_to_dict(element):
    d = element.attrib.copy()
    for t in element.findall("tag"):
        k = t.attrib["k"]
        if k not in uninteresting_tags:
            d[k] = t.attrib["v"]

    return d


def _dict_to_dataframe(d):
    df = DataFrame.from_dict(d)
    if "timestamp" in df:
        df["timestamp"] = to_datetime(df["timestamp"])

    return df


def read_ways(doc):
    #   Example:
    #   <way id="8614593">
    #       <nd ref="61326730"/>
    #       <nd ref="61326036"/>
    #       <nd ref="61321194"/>
    #       <tag k="attribution" v="Office of Geographic and Environmental
    #           Information (MassGIS)"/>
    #       <tag k="condition" v="fair"/>
    #       <tag k="created_by" v="JOSM"/>
    #       <tag k="highway" v="residential"/>
    #       <tag k="lanes" v="2"/>
    #       <tag k="massgis:way_id" v="171099"/>
    #       <tag k="name" v="Centre Street"/>
    #       <tag k="source" v="massgis_import_v0.1_20071008165629"/>
    #       <tag k="width" v="13.4"/>
    #   </way>
    waytags = []
    waynodes = []
    for xmlway in doc.findall("way"):
        wayid = xmlway.attrib["id"]
        for i, xmlnd in enumerate(xmlway.findall("nd")):
            d = xmlnd.attrib.copy()
            d["id"] = wayid
            d["index"] = i
            waynodes.append(d)

        tags = _element_to_dict(xmlway)
        waytags.append(tags)

    waynodes = _dict_to_dataframe(waynodes)
    waytags = _dict_to_dataframe(waytags)

    return waynodes, waytags


def read_relations(doc):
    # Example:
    #   <relation id="1933745">
    #     <member type="way" ref="134055159" role="outer"/>
    #     <member type="way" ref="260533047" role="outer"/>
    #     <member type="way" ref="142867799" role="outer"/>
    #     <member type="way" ref="134063352" role="outer"/>
    #     <member type="way" ref="142803038" role="outer"/>
    #     <member type="way" ref="134056144" role="outer"/>
    #     <member type="way" ref="134056141" role="outer"/>
    #     <tag k="admin_level" v="8"/>
    #     <tag k="boundary" v="administrative"/>
    #     <tag k="name" v="Cambridge"/>
    #     <tag k="type" v="boundary"/>
    #     <tag k="wikipedia" v="en:Cambridge, Massachusetts"/>
    #   </relation>
    reltags = []
    relmembers = []
    for xmlrel in doc.findall("relation"):
        relid = xmlrel.attrib["id"]
        for i, xmlmember in enumerate(xmlrel.findall("member")):
            d = xmlmember.attrib.copy()
            d["id"] = relid
            d["index"] = i
            relmembers.append(d)

        tags = _element_to_dict(xmlrel)
        reltags.append(tags)

    relmembers = _dict_to_dataframe(relmembers)
    reltags = _dict_to_dataframe(reltags)
    return relmembers, reltags


def render_to_gdf(osmdata, drop_untagged=True):
    nodes = render_nodes(osmdata.nodes, drop_untagged)
    ways = render_ways(osmdata.nodes, osmdata.waynodes, osmdata.waytags)

    # set landuse tag from origin relation at relation members who has no
    # landuse tag
    if (
        (ways is not None)
        and ("landuse" in ways.keys())
        and (not osmdata.relmembers.empty)
    ):
        for i, way in ways.iterrows():
            # get and add origin relation id
            rel_id = (
                osmdata.relmembers[osmdata.relmembers.ref == way.id].iloc[0].id
            )
            ways.at[i, "relation_id"] = rel_id
            # get and add origin relation landuse if needed
            osm_reltag = osmdata.reltags[osmdata.reltags.id == rel_id].iloc[0]
            if "landuse" in osm_reltag.keys() and str(way.landuse) == "nan":
                ways.at[i, "landuse"] = osm_reltag.landuse

    if ways is not None:
        nodes = concat([nodes, ways], ignore_index=True)
        nodes = nodes.set_geometry("geometry", crs=_crs)

    return nodes


def render_nodes(nodes, drop_untagged=True):
    # check if their are nodes
    if not nodes.empty:
        # Drop nodes that have no tags, convert lon/lat to points
        if drop_untagged:
            nodes = nodes.dropna(
                subset=nodes.columns.drop(["id", "lon", "lat"]), how="all"
            )
        points = [Point(x["lon"], x["lat"]) for i, x in nodes.iterrows()]
        nodes = nodes.drop(["lon", "lat"], axis=1)
        nodes = nodes.set_geometry(points, crs=_crs)

    return nodes


def render_ways(nodes, waynodes, waytags):
    if waynodes is None or waynodes.empty:
        return None

    node_points = nodes[["id", "lon", "lat"]]

    def wayline(df):
        df = df.sort_values(by="index")[["lon", "lat"]]
        if len(df) > 1:
            return LineString(df.values)

    # Group the ways and create a LineString for each one.  way_lines is a
    # Series where the index is the way id and the value is the LineString.
    # Merge it with the waytags to get a single GeoDataFrame of ways
    waynodes = waynodes.merge(
        node_points, left_on="ref", right_on="id", suffixes=("", "_nodes")
    )
    way_lines = waynodes.groupby("id", group_keys=False).apply(
        wayline, include_groups=False
    )
    ways = waytags.set_index("id").set_geometry(way_lines, crs=_crs)
    ways.reset_index(inplace=True)

    return ways


if __name__ == "__main__":
    pass