Skip to content

Commit

Permalink
Add linting
Browse files Browse the repository at this point in the history
  • Loading branch information
damklis committed Aug 3, 2020
1 parent d0ecedc commit ab0818e
Show file tree
Hide file tree
Showing 26 changed files with 52 additions and 54 deletions.
6 changes: 6 additions & 0 deletions .flake8
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
[flake8]
exclude:
__pycache__
random_headers_list.py
__init__.py
./airflow/modules/tests/*
3 changes: 2 additions & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@ python:
services:
- docker

before_script: pip install docker-compose
before_script: pip install docker-compose flake8

script:
- docker-compose run airflow sh -c "python -m pytest -v --show-capture=no"
- python -m flake8
4 changes: 2 additions & 2 deletions airflow/dags/dags_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ class Config:
REDIS_CONFIG = {
"host": "redis",
"port": "6379",
"db": 0
"db": 0
}

REDIS_KEY = "proxies"
Expand All @@ -26,6 +26,6 @@ class Config:
"deadspin": "https://deadspin.com/rss"
}

BOOTSTRAP_SERVERS = ["kafka:9092"]
BOOTSTRAP_SERVERS = ["kafka:9092"]

TOPIC = "rss"
6 changes: 2 additions & 4 deletions airflow/dags/rss_news_dag.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,16 @@
from datetime import datetime
from airflow import DAG
from airflow.operators.python_operator import PythonOperator
from airflow.operators.bash_operator import BashOperator
from dags_config import Config as config

from rss_news import export_news_to_broker
from proxypool import update_proxypool



def dummy_callable(task_id, action, dag):
def foo(action):
return f"{datetime.now()}: {action} scrapping RSS feeds!"

return PythonOperator(
task_id=task_id,
python_callable=foo,
Expand All @@ -39,7 +37,7 @@ def exporting_events(config, rss_feed, dag):
)

proxypool = PythonOperator(
task_id=f"updating_proxypoool",
task_id="updating_proxypoool",
python_callable=update_proxypool,
op_kwargs={"config": config},
dag=dag
Expand Down
2 changes: 1 addition & 1 deletion airflow/modules/log/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
from log.log import log, Logger
from log.log import log, Logger
2 changes: 1 addition & 1 deletion airflow/modules/parser/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
from parser.web_parser import WebParser
from parser.web_parser import WebParser
2 changes: 1 addition & 1 deletion airflow/modules/parser/web_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import re
import random
from contextlib import closing
from requests import get
from requests import get
from log import log
from parser.random_headers_list import headers_list

Expand Down
2 changes: 1 addition & 1 deletion airflow/modules/proxypool/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from proxypool.redis_proxypool_client import RedisProxyPoolClient
from proxypool.main import update_proxypool
from proxypool.proxypool_scraper import ProxyPoolScraper, ProxyRecord
from proxypool.proxypool_validator import ProxyPoolValidator
from proxypool.proxypool_validator import ProxyPoolValidator
4 changes: 2 additions & 2 deletions airflow/modules/proxypool/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,9 @@ def update_proxypool(config):
with ThreadPoolExecutor(max_workers=config.MAX_WORKERS) as executor:
results = executor.map(proxy_validator.validate_proxy, proxy_stream)
valid_proxies = filter(
lambda x: x.is_valid == True, results
lambda x: x.is_valid is True, results
)

with RedisProxyPoolClient(config.REDIS_KEY, config.REDIS_CONFIG) as client:
client.override_existing_proxies(
[json.dumps(record.proxy) for record in valid_proxies]
Expand Down
9 changes: 5 additions & 4 deletions airflow/modules/proxypool/proxypool_scraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
@dataclass
class ProxyRecord:
ip_address: str
port: int
port: int
country_code: str
country: str
anonymity: str
Expand All @@ -20,7 +20,7 @@ def __post_init__(self):

def format_proxy(self):
protocol = "https" if self.https == "yes" else "http"
url = f"{protocol}://{self.ip_address}:{self.port}"
url = f"{protocol}://{self.ip_address}:{self.port}"
return {"http": url, "https": url}


Expand All @@ -35,7 +35,8 @@ def get_proxy_stream(self, limit):
map(self._clear_up_record, raw_records)
)
for record in clean_records[:limit]:
if record: yield ProxyRecord(*record)
if record:
yield ProxyRecord(*record)

def extract_table_raw_records(self):
content = self.parser.get_content()
Expand All @@ -48,6 +49,6 @@ def extract_table_raw_records(self):

def _clear_up_record(self, raw_record):
return [
val.text for val
val.text for val
in raw_record.find_all("td")
]
6 changes: 3 additions & 3 deletions airflow/modules/proxypool/proxypool_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,14 @@

@dataclass(frozen=True)
class ProxyStatus:
proxy: str
proxy: str
is_valid: bool


@log
class ProxyPoolValidator:
def __init__(self, url, timeout=10):
self.timeout = timeout
self.timeout = timeout
self.parser = WebParser(url, rotate_header=True)

def validate_proxy(self, proxy_record):
Expand All @@ -21,7 +21,7 @@ def validate_proxy(self, proxy_record):
proxies=proxy_record.proxy
)
proxy_status = ProxyStatus(
proxy_record.proxy,
proxy_record.proxy,
content is not None
)
self.logger.info(f"Proxy status: {proxy_status}")
Expand Down
2 changes: 1 addition & 1 deletion airflow/modules/proxypool/redis_proxypool_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,4 +33,4 @@ def __exit__(self, type, value, traceback):
client_id = self.redis.client_id()
self.redis.client_kill_filter(
_id=client_id
)
)
2 changes: 1 addition & 1 deletion airflow/modules/retry/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
from retry.retry_on_exception import RetryOnException
from retry.retry_on_exception import RetryOnException
4 changes: 2 additions & 2 deletions airflow/modules/retry/retry_on_exception.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ def wrapper(*args, **kwargs):
return wrapper

def _raise_on_condition(self, retries, exception):
if retries == 0:
if retries == 0:
raise exception
else:
else:
self.logger.info(f"Retries: {retries}")
2 changes: 1 addition & 1 deletion airflow/modules/rss_news/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
from rss_news.main import export_news_to_broker
from rss_news.rss_news_producer import NewsProducer, NewsFormatter, News
from rss_news.rss_news_exporter import NewsExporter
from rss_news.rss_news_exporter import NewsExporter
2 changes: 0 additions & 2 deletions airflow/modules/rss_news/main.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import argparse
from proxypool import RedisProxyPoolClient
from log import Logger
from rss_news.rss_news_producer import NewsProducer
Expand All @@ -22,4 +21,3 @@ def export_news_to_broker(config, rss_feed):
config.TOPIC,
news.as_dict()
)

3 changes: 1 addition & 2 deletions airflow/modules/rss_news/rss_news_exporter.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ def __init__(self, bootstrap_servers):
bootstrap_servers=bootstrap_servers,
value_serializer=lambda x: self._encode(x)
)

def _encode(self, value):
return json.dumps(value).encode("utf-8")

Expand All @@ -28,4 +28,3 @@ def export_news_to_broker(self, topic, record, sleep_time=0.01):

def __exit__(self, type, value, traceback):
self.producer.close()

6 changes: 2 additions & 4 deletions airflow/modules/rss_news/rss_news_producer.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,8 @@
import re
from dataclasses import dataclass
import atoma
from dateutil import parser
import langdetect
from parser import WebParser
from rss_news.rss_news_exporter import NewsExporter


@dataclass(frozen=True)
Expand All @@ -17,7 +15,7 @@ class News:
description: str
author: str
language: str

def as_dict(self):
return self.__dict__

Expand All @@ -33,7 +31,7 @@ def _extract_news_feed_items(self, proxies):
return news_feed.items

def get_news_stream(self, proxies):
news_feed_items = self._extract_news_feed_items(proxies)
news_feed_items = self._extract_news_feed_items(proxies)
for entry in news_feed_items:
formatted_entry = self.formatter.format_entry(entry)
yield formatted_entry
Expand Down
20 changes: 10 additions & 10 deletions airflow/modules/tests/fixtures.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,11 +36,11 @@ def proxy_record():
yield ProxyRecord(
"127.0.0.1",
8080,
"PL",
"POLAND",
"gold",
"no",
"no",
"PL",
"POLAND",
"gold",
"no",
"no",
"30 minutes ago"
)

Expand All @@ -65,7 +65,7 @@ def redis_config():
yield {
"host": "redis",
"port": "6379",
"db": 0
"db": 0
}


Expand All @@ -76,7 +76,7 @@ def helper(status_code):
response.status_code = status_code
response.headers['Content-Type'] = "text/html"
return response
yield helper
yield helper


@pytest.fixture()
Expand All @@ -86,15 +86,15 @@ def helper(filename):
"tests",
f"dataresources/{filename}"
)

yield helper


@pytest.fixture()
def add_function():

@retry(5)
def func(a , b):
def func(a, b):
return a + b

yield func
yield func
3 changes: 1 addition & 2 deletions airflow/modules/tests/proxypool/test_proxypool_scraper.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import pytest
from proxypool import ProxyRecord
from unittest.mock import patch

Expand All @@ -8,7 +7,7 @@
@patch("parser.web_parser.WebParser.get_content")
def test_get_proxy_stream(get_content, raw_content, web_parser, scraper):
get_content.return_value = raw_content("proxy_list_file.txt")

scraper.parser = web_parser
stream = scraper.get_proxy_stream(5)

Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import pytest
from unittest.mock import patch
from proxypool import ProxyPoolValidator

Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
import json
from unittest.mock import patch
import pytest
import fakeredis
from proxypool import RedisProxyPoolClient

from ..fixtures import redis_config, redis_mock, proxies
Expand Down Expand Up @@ -32,7 +30,7 @@ def test_list_existing_proxies(redis, redis_config, redis_mock, proxies):

redis_client = RedisProxyPoolClient(key, redis_config)
redis_client.redis = redis_mock

result = redis_client.list_existing_proxies()

assert result == proxies
2 changes: 1 addition & 1 deletion airflow/modules/tests/rss_news/test_rss_news_exporter.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,5 +16,5 @@ def test_export_news_to_broker(export_news_to_broker):
}

export_news_to_broker(topic, news)

export_news_to_broker.assert_called_once_with(topic, news)
2 changes: 1 addition & 1 deletion airflow/modules/tests/rss_news/test_rss_news_producer.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ def test_construct_id(formatter, title, expected_id):

def test_unify_date(formatter):
expected = "2020-05-17 00:00:00"

date = datetime.datetime(2020, 5, 17)
result = formatter.unify_date(date)

Expand Down
3 changes: 2 additions & 1 deletion airflow/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,5 @@ redis==3.5.3
requests==2.23.0
fakeredis==1.4.1
langdetect==1.0.8
pytest==5.4.3
pytest==5.4.3
flake8=3.8.3
2 changes: 1 addition & 1 deletion api/service/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ def get(self, phrase):
"title": phrase
}
}
}
}
)

news_list = response.get("hits").get("hits")
Expand Down

0 comments on commit ab0818e

Please sign in to comment.