Metadata-Version: 2.4
Name: datamarket
Version: 0.10.7
Summary: Utilities that integrate advanced scraping knowledge into just one library.
License: GPL-3.0-or-later
License-File: LICENSE
Author: DataMarket
Author-email: techsupport@datamarket.es
Requires-Python: >=3.12,<4.0
Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3)
Classifier: License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)
Classifier: Operating System :: OS Independent
Classifier: Programming Language :: Python :: 3
Classifier: Programming Language :: Python :: 3.12
Classifier: Programming Language :: Python :: 3.13
Classifier: Programming Language :: Python :: 3.14
Provides-Extra: aws
Provides-Extra: azure-storage-blob
Provides-Extra: boto3
Provides-Extra: camoufox
Provides-Extra: chompjs
Provides-Extra: click
Provides-Extra: clickhouse-driver
Provides-Extra: datetime
Provides-Extra: ddgs
Provides-Extra: demjson3
Provides-Extra: dnspython
Provides-Extra: drive
Provides-Extra: fake-useragent
Provides-Extra: geoalchemy2
Provides-Extra: geopandas
Provides-Extra: google-api-python-client
Provides-Extra: google-auth-httplib2
Provides-Extra: google-auth-oauthlib
Provides-Extra: html2text
Provides-Extra: httpx
Provides-Extra: json5
Provides-Extra: llm
Provides-Extra: lxml
Provides-Extra: matplotlib
Provides-Extra: nodriver
Provides-Extra: openai
Provides-Extra: openpyxl
Provides-Extra: pandarallel
Provides-Extra: pandas
Provides-Extra: pandera
Provides-Extra: peerdb
Provides-Extra: pii
Provides-Extra: pillow
Provides-Extra: playwright
Provides-Extra: playwright-stealth
Provides-Extra: plotly
Provides-Extra: pyarrow
Provides-Extra: pydantic
Provides-Extra: pydrive2
Provides-Extra: pymupdf
Provides-Extra: pyproj
Provides-Extra: pyrate-limiter
Provides-Extra: pysocks
Provides-Extra: pyspark
Provides-Extra: pytest
Provides-Extra: retry
Provides-Extra: shapely
Provides-Extra: soda-core-mysql
Provides-Extra: soda-core-postgres
Provides-Extra: sqlparse
Provides-Extra: tqdm
Provides-Extra: undetected-chromedriver
Provides-Extra: xmltodict
Requires-Dist: SQLAlchemy (>=2.0.0,<3.0.0)
Requires-Dist: azure-storage-blob (>=12.0.0,<13.0.0) ; extra == "azure-storage-blob"
Requires-Dist: babel (>=2.0.0,<3.0.0)
Requires-Dist: beautifulsoup4 (>=4.0.0,<5.0.0)
Requires-Dist: boto3 (>=1.35.0,<1.36.0) ; extra == "boto3" or extra == "aws" or extra == "peerdb"
Requires-Dist: botocore (>=1.42.0,<1.43.0) ; extra == "aws"
Requires-Dist: browserforge (>=1.2.0,<2.0.0) ; extra == "camoufox"
Requires-Dist: camoufox[geoip] (>=0.4.11,<0.5.0) ; extra == "camoufox"
Requires-Dist: chompjs (>=1.0.0,<2.0.0) ; extra == "chompjs"
Requires-Dist: click (>=8.0.0,<9.0.0) ; extra == "click"
Requires-Dist: clickhouse-driver (>=0.2.0,<0.3.0) ; extra == "clickhouse-driver" or extra == "peerdb"
Requires-Dist: croniter (>=3.0.0,<4.0.0)
Requires-Dist: cryptography (>=43.0.0,<44.0.0) ; extra == "aws"
Requires-Dist: datetime (>=5.0,<6.0) ; extra == "datetime"
Requires-Dist: ddgs (>=9.0.0,<10.0.0) ; extra == "ddgs"
Requires-Dist: demjson3 (>=3.0.0,<4.0.0) ; extra == "demjson3"
Requires-Dist: dnspython (>=2.0.0,<3.0.0) ; extra == "dnspython"
Requires-Dist: dynaconf (>=3.0.0,<4.0.0)
Requires-Dist: fake-useragent (>=2.0.0,<3.0.0) ; extra == "fake-useragent"
Requires-Dist: geoalchemy2 (>=0.17.0,<0.18.0) ; extra == "geoalchemy2"
Requires-Dist: geopandas (>=1.0.0,<2.0.0) ; extra == "geopandas"
Requires-Dist: geopy (>=2.0.0,<3.0.0)
Requires-Dist: google-api-python-client (>=2.0.0,<3.0.0) ; extra == "google-api-python-client"
Requires-Dist: google-auth-httplib2 (>=0.2.0,<0.3.0) ; extra == "google-auth-httplib2"
Requires-Dist: google-auth-oauthlib (>=1.0.0,<2.0.0) ; extra == "google-auth-oauthlib"
Requires-Dist: html2text (>=2024.0.0,<2025.0.0) ; extra == "html2text"
Requires-Dist: httpx[http2] (>=0.28.0,<0.29.0) ; extra == "httpx"
Requires-Dist: inflection (>=0.5.0,<0.6.0)
Requires-Dist: jellyfish (>=1.0.0,<2.0.0)
Requires-Dist: jinja2 (>=3.0.0,<4.0.0)
Requires-Dist: json5 (>=0.10.0,<0.11.0) ; extra == "json5"
Requires-Dist: lxml[html-clean] (>=5.0.0,<6.0.0) ; extra == "lxml"
Requires-Dist: matplotlib (>=3.0.0,<4.0.0) ; extra == "matplotlib"
Requires-Dist: nodriver (>=0.44,<0.45) ; extra == "nodriver"
Requires-Dist: numpy (>=2.0.0,<3.0.0)
Requires-Dist: openai (>=2.0.0,<3.0.0) ; extra == "openai" or extra == "llm"
Requires-Dist: openpyxl (>=3.0.0,<4.0.0) ; extra == "openpyxl"
Requires-Dist: pandarallel (>=1.0.0,<2.0.0) ; extra == "pandarallel"
Requires-Dist: pandas (>=2.0.0,<3.0.0) ; extra == "pandas"
Requires-Dist: pandera (>=0.22.0,<0.23.0) ; extra == "pandera"
Requires-Dist: pendulum (>=3.0.0,<4.0.0)
Requires-Dist: pillow (>=11.0.0,<12.0.0) ; extra == "pillow"
Requires-Dist: playwright (==1.47.0) ; extra == "playwright" or extra == "camoufox"
Requires-Dist: plotly (>=6.0.0,<7.0.0) ; extra == "plotly"
Requires-Dist: pre-commit (>=4.0.0,<5.0.0)
Requires-Dist: presidio-analyzer[phonenumbers] (>=2.0.0,<3.0.0) ; extra == "pii"
Requires-Dist: presidio-anonymizer (>=2.0.0,<3.0.0) ; extra == "pii"
Requires-Dist: psycopg2-binary (>=2.0.0,<3.0.0)
Requires-Dist: pyarrow (>=19.0.0,<20.0.0) ; extra == "pyarrow"
Requires-Dist: pycountry (>=24.0.0,<25.0.0)
Requires-Dist: pydantic (>=2.0.0,<3.0.0) ; extra == "pydantic" or extra == "llm"
Requires-Dist: pydrive2 (>=1.0.0,<2.0.0) ; extra == "pydrive2" or extra == "drive"
Requires-Dist: pymupdf (>=1.0.0,<2.0.0) ; extra == "pymupdf"
Requires-Dist: pyproj (>=3.0.0,<4.0.0) ; extra == "pyproj"
Requires-Dist: pyrate-limiter (>=3.0.0,<4.0.0) ; extra == "pyrate-limiter"
Requires-Dist: pysocks (>=1.0.0,<2.0.0) ; extra == "pysocks"
Requires-Dist: pyspark (>=3.0.0,<4.0.0) ; extra == "pyspark"
Requires-Dist: pytest (>=8.0.0,<9.0.0) ; extra == "pytest"
Requires-Dist: python-string-utils (>=1.0.0,<2.0.0)
Requires-Dist: rapidfuzz (>=3.0.0,<4.0.0)
Requires-Dist: requests (>=2.0.0,<3.0.0)
Requires-Dist: retry (>=0.9.0,<0.10.0) ; extra == "retry"
Requires-Dist: rnet (>=3.0.0rc10,<4.0.0)
Requires-Dist: shapely (>=2.0.0,<3.0.0) ; extra == "shapely"
Requires-Dist: soda-core-mysql-utf8-hotfix (>=3.0.0,<4.0.0) ; extra == "soda-core-mysql"
Requires-Dist: soda-core-postgres (>=3.0.0,<4.0.0) ; extra == "soda-core-postgres"
Requires-Dist: spacy (>=3.0.0,<4.0.0) ; extra == "pii"
Requires-Dist: spacy-langdetect (>=0.1.0,<0.2.0) ; extra == "pii"
Requires-Dist: sqlparse (>=0.5.0,<0.6.0) ; extra == "sqlparse"
Requires-Dist: stem (>=1.0.0,<2.0.0)
Requires-Dist: tenacity (>=9.0.0,<10.0.0)
Requires-Dist: tqdm (>=4.0.0,<5.0.0) ; extra == "tqdm"
Requires-Dist: typer (>=0.15.0,<0.16.0)
Requires-Dist: unidecode (>=1.0.0,<2.0.0)
Requires-Dist: xmltodict (>=0.14.0,<0.15.0) ; extra == "xmltodict"
Project-URL: Documentation, https://github.com/Data-Market/datamarket
Project-URL: Homepage, https://datamarket.es
Project-URL: Repository, https://github.com/Data-Market/datamarket
Description-Content-Type: text/markdown

# DataMarket scraping core

------------------------------------------------------
[![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)


Utilities that integrate advance scraping knowledge into just one library.

## Installation

To install this library in your Python environment:

`pip install datamarket`

## Documentation

This library has built functionalities for the following topics:

- **Databases**: through sqlalchemy it allows to insert records and perform queries in any database.
- **Proxies**: wide range of functions to perform HTTP requests through custom proxies or the Tor network.
- **Tinybird**: a Python client for this popular API.
- **Drive**: functions to upload, delete or authenticate to Google Drive.
- **FTP**: functions to upload, delete or authenticate to an FTP, SFTP or FTPS server.
- **Selenium**: wrapper for the main Selenium functions.

