Skip to content

Commit

Permalink
Merge branch 'master' into arsenlosenko/source-facebook-marketing-for…
Browse files Browse the repository at this point in the history
…mat-date
  • Loading branch information
arsenlosenko authored Apr 6, 2023
2 parents af03df4 + cc18aba commit fec4a10
Show file tree
Hide file tree
Showing 254 changed files with 6,192 additions and 2,816 deletions.
2 changes: 1 addition & 1 deletion .bumpversion.cfg
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[bumpversion]
current_version = 0.42.1
current_version = 0.43.0
commit = False
tag = False
parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(\-[a-z]+)?
Expand Down
2 changes: 1 addition & 1 deletion .env
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@


### SHARED ###
VERSION=0.42.1
VERSION=0.43.0

# When using the airbyte-db via default docker image
CONFIG_ROOT=/data
Expand Down
55 changes: 55 additions & 0 deletions .github/actions/run-dagger-pipeline/action.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
name: "Run Dagger pipeline"
description: "Runs a given dagger pipeline"
inputs:
subcommand:
description: "Subcommand for airbyte-ci"
required: true
options:
description: "Options for the subcommand"
required: false
context:
description: "CI context (e.g., pull_request, manual)"
required: true
runs:
using: "composite"
steps:
- name: Get start timestamp
id: get-start-timestamp
run: echo "::set-output name=start-timestamp::$(date +%s)"
shell: bash
- name: Checkout Airbyte
uses: actions/checkout@v3
with:
repository: ${{ github.event.inputs.repo }}
ref: ${{ github.event.inputs.gitref }}
- name: Extract branch name
shell: bash
if: github.event_name == 'workflow_dispatch'
run: echo "##[set-output name=branch;]$(echo ${GITHUB_REF#refs/heads/})"
id: extract_branch
- name: Install Python 3.10
uses: actions/setup-python@v4
with:
python-version: "3.10"
- name: Install ci-connector-ops package
shell: bash
run: pip install -e ./tools/ci_connector_ops\[pipelines]\
- name: Run airbyte-ci
shell: bash
run: |
export _EXPERIMENTAL_DAGGER_RUNNER_HOST="unix:///var/run/buildkit/buildkitd.sock"
DAGGER_CLI_COMMIT="67c7e7635cf4ea0e446e2fed522a3e314c960f6a"
DAGGER_TMP_BINDIR="/tmp/dagger_${DAGGER_CLI_COMMIT}"
export _EXPERIMENTAL_DAGGER_CLI_BIN="$DAGGER_TMP_BINDIR/dagger"
if [ ! -f "$_EXPERIMENTAL_DAGGER_CLI_BIN" ]; then
mkdir -p "$DAGGER_TMP_BINDIR"
curl "https://dl.dagger.io/dagger/main/${DAGGER_CLI_COMMIT}/dagger_${DAGGER_CLI_COMMIT}_$(uname -s | tr A-Z a-z)_$(uname -m | sed s/x86_64/amd64/).tar.gz" | tar xvz -C "$DAGGER_TMP_BINDIR"
fi
airbyte-ci --is-ci --gha-workflow-run-id=${{ github.run_id }} ${{ inputs.subcommand }} ${{ inputs.options }}
env:
CI_GIT_BRANCH: ${{ github.head_ref }}
CI_GIT_REVISION: ${{ github.event.pull_request.head.sha }}
CI_CONTEXT: "${{ inputs.context }}"
CI_PIPELINE_START_TIMESTAMP: ${{ steps.get-start-timestamp.outputs.start-timestamp }}
12 changes: 6 additions & 6 deletions .github/workflows/connector_integration_test_single_dagger.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ on:
workflow_dispatch:
inputs:
test-connectors-options:
description: "Options to pass to the 'connectors-ci test-connectors' command"
description: "Options to pass to the 'airbyte-ci connectors test' command"
default: "--modified"
pull_request:
paths:
Expand Down Expand Up @@ -34,8 +34,8 @@ jobs:
with:
python-version: "3.10"
- name: Install ci-connector-ops package
run: pip install ./tools/ci_connector_ops\[pipelines]\
- name: Run connectors-ci test-connectors [WORKFLOW DISPATCH]
run: pip install -e ./tools/ci_connector_ops\[pipelines]\
- name: Run airbyte-ci connectors test [WORKFLOW DISPATCH]
if: github.event_name == 'workflow_dispatch'
run: |
export _EXPERIMENTAL_DAGGER_RUNNER_HOST="unix:///var/run/buildkit/buildkitd.sock"
Expand All @@ -46,7 +46,7 @@ jobs:
mkdir -p "$DAGGER_TMP_BINDIR"
curl "https://dl.dagger.io/dagger/main/${DAGGER_CLI_COMMIT}/dagger_${DAGGER_CLI_COMMIT}_$(uname -s | tr A-Z a-z)_$(uname -m | sed s/x86_64/amd64/).tar.gz" | tar xvz -C "$DAGGER_TMP_BINDIR"
fi
connectors-ci --is-ci --gha-workflow-run-id=${{ github.run_id }} test-connectors ${{ github.event.inputs.test-connectors-options }}
airbyte-ci --is-ci --gha-workflow-run-id=${{ github.run_id }} connectors test ${{ github.event.inputs.test-connectors-options }}
env:
_EXPERIMENTAL_DAGGER_CLOUD_TOKEN: "p.eyJ1IjogIjFiZjEwMmRjLWYyZmQtNDVhNi1iNzM1LTgxNzI1NGFkZDU2ZiIsICJpZCI6ICJlNjk3YzZiYy0yMDhiLTRlMTktODBjZC0yNjIyNGI3ZDBjMDEifQ.hT6eMOYt3KZgNoVGNYI3_v4CC-s19z8uQsBkGrBhU3k"
GCP_GSM_CREDENTIALS: ${{ secrets.GCP_GSM_CREDENTIALS }}
Expand All @@ -59,7 +59,7 @@ jobs:
CI_GIT_REVISION: ${{ github.sha }}
CI_CONTEXT: "manual"
CI_PIPELINE_START_TIMESTAMP: ${{ steps.get-start-timestamp.outputs.start-timestamp }}
- name: Run connectors-ci test-connectors [PULL REQUESTS]
- name: Run airbyte-ci connectors test [PULL REQUESTS]
if: github.event_name == 'pull_request'
run: |
export _EXPERIMENTAL_DAGGER_RUNNER_HOST="unix:///var/run/buildkit/buildkitd.sock"
Expand All @@ -70,7 +70,7 @@ jobs:
mkdir -p "$DAGGER_TMP_BINDIR"
curl "https://dl.dagger.io/dagger/main/${DAGGER_CLI_COMMIT}/dagger_${DAGGER_CLI_COMMIT}_$(uname -s | tr A-Z a-z)_$(uname -m | sed s/x86_64/amd64/).tar.gz" | tar xvz -C "$DAGGER_TMP_BINDIR"
fi
connectors-ci --is-ci --gha-workflow-run-id=${{ github.run_id }} test-connectors --modified
airbyte-ci --is-ci --gha-workflow-run-id=${{ github.run_id }} connectors test --modified
env:
_EXPERIMENTAL_DAGGER_CLOUD_TOKEN: "p.eyJ1IjogIjFiZjEwMmRjLWYyZmQtNDVhNi1iNzM1LTgxNzI1NGFkZDU2ZiIsICJpZCI6ICJlNjk3YzZiYy0yMDhiLTRlMTktODBjZC0yNjIyNGI3ZDBjMDEifQ.hT6eMOYt3KZgNoVGNYI3_v4CC-s19z8uQsBkGrBhU3k"
GCP_GSM_CREDENTIALS: ${{ secrets.GCP_GSM_CREDENTIALS }}
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/connector_nightly_builds_dagger.yml
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ jobs:
mkdir -p "$DAGGER_TMP_BINDIR"
curl "https://dl.dagger.io/dagger/main/${DAGGER_CLI_COMMIT}/dagger_${DAGGER_CLI_COMMIT}_$(uname -s | tr A-Z a-z)_$(uname -m | sed s/x86_64/amd64/).tar.gz" | tar xvz -C "$DAGGER_TMP_BINDIR"
fi
connectors-ci --is-ci --gha-workflow-run-id=${{ github.run_id }} test-connectors ${{ inputs.test-connectors-options || '--concurrency=5 --release-stage=generally_available --release-stage=beta' }}
airbyte-ci --is-ci --gha-workflow-run-id=${{ github.run_id }} connectors test ${{ inputs.test-connectors-options || '--concurrency=5 --release-stage=generally_available --release-stage=beta' }}
env:
_EXPERIMENTAL_DAGGER_CLOUD_TOKEN: "p.eyJ1IjogIjFiZjEwMmRjLWYyZmQtNDVhNi1iNzM1LTgxNzI1NGFkZDU2ZiIsICJpZCI6ICJlNjk3YzZiYy0yMDhiLTRlMTktODBjZC0yNjIyNGI3ZDBjMDEifQ.hT6eMOYt3KZgNoVGNYI3_v4CC-s19z8uQsBkGrBhU3k"
GCP_GSM_CREDENTIALS: ${{ secrets.GCP_GSM_CREDENTIALS }}
Expand Down
23 changes: 23 additions & 0 deletions .github/workflows/metadata_service_tests_dagger.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
name: Metadata Service CI

on:
workflow_dispatch:
pull_request:
paths:
- "airbyte-ci/connectors/metadata_service/**"
jobs:
metadata_service_ci:
name: Metadata Service CI
timeout-minutes: 240 # 4 hours
runs-on: medium-runner
steps:
- name: Checkout Airbyte
uses: actions/checkout@v2
- name: Run Metadata unit test pipeline
id: pipelinerun
uses: ./.github/actions/run-dagger-pipeline
with:
subcommand: "metadata test lib"
context: "pull_request"
env:
CI_GITHUB_ACCESS_TOKEN: ${{ secrets.GH_PAT_MAINTENANCE_OCTAVIA }}
2 changes: 1 addition & 1 deletion airbyte-cdk/python/.bumpversion.cfg
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[bumpversion]
current_version = 0.33.1
current_version = 0.33.2
commit = False

[bumpversion:file:setup.py]
Expand Down
3 changes: 3 additions & 0 deletions airbyte-cdk/python/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
# Changelog

## 0.33.2
low-code: fix duplicate stream slicer update

## 0.33.1
Low-Code CDK: make RecordFilter.filter_records as generator

Expand Down
4 changes: 2 additions & 2 deletions airbyte-cdk/python/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ RUN apk --no-cache upgrade \
&& apk --no-cache add tzdata build-base

# install airbyte-cdk
RUN pip install --prefix=/install airbyte-cdk==0.33.1
RUN pip install --prefix=/install airbyte-cdk==0.33.2

# build a clean environment
FROM base
Expand All @@ -32,5 +32,5 @@ ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py"
ENTRYPOINT ["python", "/airbyte/integration_code/main.py"]

# needs to be the same as CDK
LABEL io.airbyte.version=0.33.1
LABEL io.airbyte.version=0.33.2
LABEL io.airbyte.name=airbyte/source-declarative-manifest
Original file line number Diff line number Diff line change
Expand Up @@ -372,12 +372,14 @@ def read_records(
stream_slice,
stream_state,
)
cursor_updated = False
for record in records_generator:
# Only record messages should be parsed to update the cursor which is indicated by the Mapping type
if isinstance(record, Mapping):
self.stream_slicer.update_cursor(stream_slice, last_record=record)
cursor_updated = True
yield record
else:
if not cursor_updated:
last_record = self._last_records[-1] if self._last_records else None
if last_record and isinstance(last_record, Mapping):
self.stream_slicer.update_cursor(stream_slice, last_record=last_record)
Expand Down
2 changes: 1 addition & 1 deletion airbyte-cdk/python/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
name="airbyte-cdk",
# The version of the airbyte-cdk package is used at runtime to validate manifests. That validation must be
# updated if our semver format changes such as using release candidate versions.
version="0.33.1",
version="0.33.2",
description="A framework for writing Airbyte Connectors.",
long_description=README,
long_description_content_type="text/markdown",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
config = {}


@patch.object(HttpStream, "_read_pages", return_value=[])
@patch.object(HttpStream, "_read_pages", return_value=iter([]))
def test_simple_retriever_full(mock_http_stream):
requester = MagicMock()
request_params = {"param": "value"}
Expand Down Expand Up @@ -117,7 +117,7 @@ def test_simple_retriever_full(mock_http_stream):
paginator.reset.assert_called()


@patch.object(HttpStream, "_read_pages", return_value=[*request_response_logs, *records])
@patch.object(HttpStream, "_read_pages", return_value=iter([*request_response_logs, *records]))
def test_simple_retriever_with_request_response_logs(mock_http_stream):
requester = MagicMock()
paginator = MagicMock()
Expand Down Expand Up @@ -153,7 +153,7 @@ def test_simple_retriever_with_request_response_logs(mock_http_stream):
assert actual_messages[3] == records[1]


@patch.object(HttpStream, "_read_pages", return_value=[])
@patch.object(HttpStream, "_read_pages", return_value=iter([]))
def test_simple_retriever_with_request_response_log_last_records(mock_http_stream):
requester = MagicMock()
paginator = MagicMock()
Expand Down Expand Up @@ -667,5 +667,37 @@ def test_limit_stream_slices():
assert truncated_slices == _generate_slices(maximum_number_of_slices)


@pytest.mark.parametrize(
"test_name, last_records, records, expected_stream_slicer_update_count",
[
("test_two_records", [{"id": -1}], records, 2),
("test_no_records", [{"id": -1}], [], 1),
("test_no_records_no_previous_records", [], [], 0)
]
)
def test_read_records_updates_stream_slicer_once_if_no_records(test_name, last_records, records, expected_stream_slicer_update_count):
with patch.object(HttpStream, "_read_pages", return_value=iter(records)):
requester = MagicMock()
paginator = MagicMock()
record_selector = MagicMock()
stream_slicer = MagicMock()

retriever = SimpleRetriever(
name="stream_name",
primary_key=primary_key,
requester=requester,
paginator=paginator,
record_selector=record_selector,
stream_slicer=stream_slicer,
parameters={},
config={},
)
retriever._last_records = last_records

list(retriever.read_records(sync_mode=SyncMode.incremental, stream_slice={"repository": "airbyte"}))

assert stream_slicer.update_cursor.call_count == expected_stream_slicer_update_count


def _generate_slices(number_of_slices):
return [{"date": f"2022-01-0{day + 1}"} for day in range(number_of_slices)]
Original file line number Diff line number Diff line change
@@ -1,36 +1,45 @@
from dagster import Definitions

from .resources.gcp_resources import gcp_gcs_client, gcs_bucket_manager, gcs_file_manager, gcs_file_blob
from .resources.github_resources import github_client, github_connector_repo, github_connectors_directory
from .resources.local_resources import simple_local_file_manager
from orchestrator.resources.gcp import gcp_gcs_client, gcs_bucket_manager, gcs_file_manager, gcs_file_blob
from orchestrator.resources.github import github_client, github_connector_repo, github_connectors_directory
from orchestrator.resources.local import simple_local_file_manager

from .assets.github_assets import github_connector_folders
from .assets.spec_cache_assets import cached_specs
from .assets.catalog_report_assets import (
from orchestrator.assets.github import github_connector_folders
from orchestrator.assets.spec_cache import cached_specs
from orchestrator.assets.catalog_report import (
all_sources_dataframe,
all_destinations_dataframe,
connector_catalog_location_markdown,
connector_catalog_location_html,
)
from .assets.catalog_assets import (
from orchestrator.assets.catalog import (
oss_destinations_dataframe,
cloud_destinations_dataframe,
oss_sources_dataframe,
cloud_sources_dataframe,
latest_oss_catalog_dict,
latest_cloud_catalog_dict,
oss_catalog_from_metadata,
cloud_catalog_from_metadata,
)
from .assets.metadata_assets import (
from orchestrator.assets.metadata import (
catalog_derived_metadata_definitions,
valid_metadata_list,
valid_metadata_report_dataframe,
)

from .assets.dev_assets import persist_metadata_definitions, overrode_metadata_definitions
from orchestrator.assets.dev import (
persist_metadata_definitions,
overrode_metadata_definitions,
oss_catalog_diff,
cloud_catalog_diff,
cloud_catalog_diff_dataframe,
oss_catalog_diff_dataframe,
)

from .jobs.catalog_jobs import generate_catalog_markdown, generate_local_metadata_files
from .sensors.catalog_sensors import catalog_updated_sensor
from orchestrator.jobs.catalog import generate_catalog_markdown, generate_local_metadata_files
from orchestrator.sensors.catalog import catalog_updated_sensor

from .config import REPORT_FOLDER, CATALOG_FOLDER, CONNECTORS_PATH, CONNECTOR_REPO_NAME
from orchestrator.config import REPORT_FOLDER, CATALOG_FOLDER, CONNECTORS_PATH, CONNECTOR_REPO_NAME

ASSETS = [
oss_destinations_dataframe,
Expand All @@ -45,10 +54,16 @@
connector_catalog_location_html,
github_connector_folders,
catalog_derived_metadata_definitions,
valid_metadata_list,
valid_metadata_report_dataframe,
persist_metadata_definitions,
overrode_metadata_definitions,
cached_specs,
oss_catalog_diff,
oss_catalog_from_metadata,
cloud_catalog_diff,
cloud_catalog_from_metadata,
cloud_catalog_diff_dataframe,
oss_catalog_diff_dataframe,
]

RESOURCES = {
Expand Down
Loading

0 comments on commit fec4a10

Please sign in to comment.