Source code for packages.format_utils.datetools

'''
datetools
=========

Tools for processing dates in data.
'''

from datetime import datetime as dt
import re

DATE_FORMATS = [
    '%m/%d/%Y',   # 10/15/2013
    '%Y/%m/%d',   # 2013/10/15
    '%Y-%m-%d',   # 2013-10-15
    '%b %d %Y',   # Oct 15 2013
    '%d %B %Y',   # 15 October 2013
    '%d %b, %Y',  # 15 Oct, 2013
    '%B %Y',      # October 2013
    '%b %Y',      # Oct 2013
    '%Y'          # 2013
]


[docs]def extract_year(date): ''' Use search for 4 digits in a row to identify the year and return as YYYY-01-01. Args: date (str): The full date string. Returns: integer ''' try: year = re.search(r'\d{4}', date).group(0) except (TypeError, AttributeError): raise ValueError(f"No year extraction possible for: {date}") return int(year)
[docs]def extract_date(date, date_format='%Y-%m-%d', return_date_object=False): ''' Determine the date format, convert and return in YYYY-MM-DD format. Args: date (str): the full date string. Returns: Formatted date string. ''' date_object = None for df in DATE_FORMATS: try: date_object = dt.strptime(date.strip(), df) break except (ValueError, AttributeError): pass if not date_object: raise ValueError(f"No date conversion possible for: {date}") if return_date_object: return date_object return date_object.strftime(date_format)