Source code for pymacies_arg.extractor

# !/usr/bin/env python
# -*- coding: utf-8 -*-

# This file is part of the PymaciesArg Project
#     https://github.com/juniors90/PymaciesArg.
#
# Copyright (c) 2022. Ferreira Juan David
# License: MIT
#   Full Text: https://github.com/juniors90/PymaciesArg/blob/main/LICENSE

# =============================================================================
# DOCS
# =============================================================================

"""
PymaciesArg.

An extension that registers all pharmacies in Argentina.
"""

# =============================================================================
# IMPORTS
# =============================================================================

import logging
from datetime import datetime
from pathlib import Path

import requests

log = logging.getLogger()


[docs]class UrlExtractor(object):
    r"""Collapse your data into a single data frame.

    Parameters
    ----------
    name: str
        The name of data (in this case, pharmacies) to extract.
    url : str
        Describe the url such that allows then data.

    Return
    ------
        An instance of ``UrlExtractor`` containing two methods named
        ``extract()`` and ``trasform()`` for all the information of
        pharmacies in Córdoba.

    Examples
    --------
    >>> import pathlib
    >>> from pymacies_arg import UrlExtractor, Transform, trasform_raws
    >>> name="pharmacies"
    >>> url="http://datos.salud.gob.ar/dataset\
    ... /39117f8f-e2bc-4571-a572-15a6ce7ea9e1\
    ... /resource/19338ea7-a492-4af3-b212-18f8f4af9184\
    ... /download/establecimientos-farmacias-enero-2021.csv"
    >>> url_extractor=UrlExtractor(name=name, url=url)
    >>> url_extractor.__repr__()
    '<Extractor for Name: farmacias, URL: <long_url>>'
    >>> base_file_dir=pathlib.Path("/path/to/project")
    >>> path_to_csv = url_extractor.extract(
    ... date_str="2022-03-28", base_file_dir=base_file_dir)
    >>> path_to_csv
    PosixPath('/path/to/project/data/pharmacies/2022-03/pharmacies-28-03-2022.csv')
    >>> import pandas as pd
    >>> df = pd.read_csv(path_to_csv)
    >>> data_transform = Transform()
    >>> data_transform.transform(df)
                       id  ...  web
    0      70260072329721  ...  NaN
    1      70100352324743  ...  NaN
    2      70064412318286  ...  NaN
    3      70340492347884  ...  NaN
    4      70140142334991  ...  NaN
    ...               ...  ...  ...
    13672  70460212355713  ...  NaN
    13673  70421472354613  ...  NaN
    13674  70940142195567  ...  NaN
    13675  70420702154608  ...  NaN
    13676  70064272320083  ...  NaN

    [13677 rows x 11 columns]
    >>> trasform_raws(date_str, file_paths, province, base_file_dir)
    """

    file_path_crib = "data/{category}/{year}-{month:02d}/{category}-{day:02d}-{month:02d}-{year}.csv"  # noqa: E501

    def __init__(self, name, url) -> None:
        self.name = name
        self.url = url

    def __repr__(self) -> None:
        """Print a representation of your object."""
        extractor = "<Extractor for Name: {name}, URL: {url}>"
        return extractor.format(name=self.name, url=self.url)

[docs]    def extract(self, date_str: str, base_file_dir: Path) -> str:
        """Extract your data into a single csv file.

        Inspect the ``.csv`` and extract with data related
        whit pharmmacies.

        Parameters
        ----------
        date_str : str
            The date on run with format YYYY-mm-dd.

        Return
        ------
            file_path : str
                The destination location for your csv file.
        """
        log.info(f"Extracting {self.name}")
        date = datetime.strptime(date_str, "%Y-%m-%d").date()
        file_path = self.file_path_crib.format(
            category=self.name, year=date.year, month=date.month, day=date.day
        )

        pharm_path = base_file_dir / file_path

        pharm_path.parent.mkdir(parents=True, exist_ok=True)

        r = requests.get(self.url)
        r.encoding = "utf-8"

        log.info(f"Storing file in {pharm_path}")

        with open(pharm_path, "w") as f:
            f.write(r.text)

        return pharm_path