Source code for packages.meetup.country_groups

import logging
import os
import requests
import time
import pandas as pd
import geopy.distance                                  
import shapefile            
import numpy as np
from . import meetup_utils


[docs]def generate_coords(x0, y0, x1, y1, n): '''Generate :math:`\mathcal{O}(\\frac{n}{2}^2)` coordinates in the bounding box :math:`(x0, y0), (x1, y1)`, such that overlapping circles of equal radii (situated at each coordinate) entirely cover the area of the bounding box. The longitude and latitude are treated as euclidean variables, although the radius (calculated from the smallest side of the bounding box divided by :math:`n`) is calculated correctly. In order for the circles to fully cover the region, an unjustified factor of 10% is included in the radius. Feel free to do the maths and work out a better strategy for covering a geographical area with circles. The circles (centred on each X) are staggered as so (single vertical lines or four underscores correspond to a circle radius): ____X____ ____X____ \| X________X________X \| ____X____ ____X____ This configuration corresponds to :math:`n=4`. Args: float x0, y0, x1, y1: Bounding box coordinates (lat/lon) n (int): The fraction by which to calculate the Meetup API radius parameter, with respect to the smallest side of the country's shape bbox. This will generate :math:`\mathcal{O}(\\frac{n}{2}^2)` separate Meetup API radius searches. The total number of searches scales with the ratio of the bbox sides. Returns: float, :obj:`list` of :obj:`tuple`: The radius and coordinates for the Meetup API request ''' fudge = 1.1 # Work out the number of coordinates required dx = np.fabs(x0-x1) dy = np.fabs(y0-y1) r = fudge*min(dx, dy)/n # Compensate for non-Euclidean geometry nx = int(np.ceil(dx/r)) ny = int(np.ceil(dy/r)) # Convert the radius to miles (unit required for Meetup API) radius = geopy.distance.distance((y0, x0), (y0+r, x0+r)).miles coords = [] # The output # Loop through y until the end is found y = 0 while ny >= y: # x starts with an offset every other iteration x = 0 if y % 2 == 0: x += 1 # Loop through x until the end is found while nx >= x: coords.append((x0 + x*r, y0 + y*r)) x += 2 y += 1 return radius, coords
[docs]def get_coordinate_data(n): '''Generate the radius and coordinate data (see :code:`generate_coords`) for each shape (country) in the shapefile pointed to by the environmental variable WORLD_BORDERS. Args: n (int): The fraction by which to calculate the Meetup API radius parameter, with respect to the smallest side of the country's shape bbox. This will generate :math:`\mathcal{O}(\\frac{n}{2}^2)` separate Meetup API radius searches. The total number of searches scales with the ratio of the bbox sides. Returns: :obj:`pd.DataFrame`: containing coordinate and radius for each country. ''' sf = shapefile.Reader(os.environ["WORLD_BORDERS"], encodingErrors='ignore') output = [] for shape_info in sf.shapeRecords(): # Zip together the field names and record values data = {field_info[0]: value for field_info, value in zip(sf.fields[1:], shape_info.record)} # Get the radius and coordinate data for this country radius, coords = generate_coords(n=n, *shape_info.shape.bbox) data["radius"] = radius data["coords"] = coords output.append(data) # Tidy up # TODO: Put in a pull request to do a better job of this in shapefile sf.shp.close() sf.shx.close() sf.dbf.close() return pd.DataFrame(output)
[docs]def assert_iso2_key(df, iso2): condition = df.ISO2 == iso2 if condition.sum() != 1: raise KeyError("%s retrieved %s entries from %s" % (iso2, condition.sum(), os.environ["WORLD_BORDERS"])) return condition
[docs]class MeetupCountryGroups: '''Extract all meetup groups for a given country. Attributes: country_code (str): ISO2 code params (:obj:'dict'): GET request parameters, including lat/lon. groups (:obj:`list` of :obj:`str`): List of meetup groups in this country, assigned assigned after calling `get_groups`. ''' def __init__(self, country_code, coords, radius, category, n=10): '''Set meetup search parameters. Args: country_code (str): A country ISO2 coords (:obj:`list` of :obj:`tuple`): (lat, lon) coordinates from which to perform the Meetup API calls, with radius :obj:`radius`. radius (float): Meetup API radius parameter. category (int): A Meetup category n (int): The fraction by which to calculate the Meetup API radius parameter, with respect to the smallest side of the country's shape bbox. This will generate :math:`\mathcal{O}(\\frac{n}{2}^2)` separate Meetup API radius searches. The total number of searches scales with the ratio of the bbox sides. ''' self.ids = set() self.country_code = country_code self.coords = coords # Set up the static Meetup API parameters self.params = dict(country=country_code, page=200, category=str(category), radius=radius) logging.info("Generated parameters %s" % self.params) self.groups = []
[docs] def get_groups(self, lon, lat, offset=0, max_pages=None): '''Recursively get all groups for the given parameters. It is assumed that you will run with the default arguments, since they are set automatically in the recursing procedure. ''' # Check if we're in too deep if max_pages is not None and offset >= max_pages: return # Set the offset parameter and make the request self.params["offset"] = offset self.params['lat'] = lat self.params['lon'] = lon self.params['key'] = meetup_utils.get_api_key() # Work out whether the task has failed or not failed = False try: r = requests.get("https://api.meetup.com/2/groups", params=self.params) r.raise_for_status() except Exception as err: failed = True if type(err) not in (requests.exceptions.HTTPError, requests.exceptions.ChunkedEncodingError, ConnectionResetError): if "reset by peer" in str(err): logging.info("Reset by peer error") else: raise err if not failed: failed = len(r.text) == 0 # If no response is found if failed: time.sleep(10) logging.info("Got a bad response, so retrying page %s" % offset) return self.get_groups(lon, lat, offset=offset, max_pages=max_pages) # Extract results in the country of interest (bonus countries # can enter the fold because of the radius parameter) data = r.json() for row in data["results"]: if row['id'] in self.ids: continue if row["country"].lower() != self.country_code.lower(): continue if 'category' not in row: continue if str(row['category']['id']) != self.params['category']: continue self.ids.add(row['id']) self.groups.append(row) # Check if a "next" url is specified next_url = data["meta"]["next"] if next_url != "": # If so, increment offset and get the groups self.get_groups(lon, lat, offset=offset+1, max_pages=max_pages)
[docs] def get_groups_recursive(self): '''Call :code:`get_groups` for each lat,lon coordinate''' for i, (lon, lat) in enumerate(self.coords): logging.info("--> %s / %s ==> %s" % (i+1, len(self.coords), len(self.groups))) self.get_groups(lon, lat)
if __name__ == "__main__": logging.getLogger().setLevel(logging.INFO) iso2 = "MX" category = 34 # Get all country data and generate the lat/lon and radius # parameter for this country df = get_coordinate_data(n=10) condition = assert_iso2_key(df, iso2) # Get parameters for this country name = df.loc[condition, "NAME"].values[0] coords = df.loc[condition, "coords"].values[0] radius = df.loc[condition, "radius"].values[0] # Get groups for the first 10 coords in this country mcg = MeetupCountryGroups(country_code=iso2, coords=coords[0:10], radius=radius, category=category) mcg.get_groups_recursive() logging.info("Got %s groups", len(mcg.groups)) # Flatten the json data output = meetup_utils.flatten_data(mcg.groups, country_name=name, country_code=iso2, keys=[('category', 'name'), ('category', 'shortname'), ('category', 'id'), 'description', 'created', 'country', 'city', 'id', 'lat', 'lon', 'members', 'name', 'topics', 'urlname']) # Write the output meetup_utils.save_sample(output, 'data/country_groups.json', 20)